OpenXM_contrib/gmp/mpn/arm/submul_1.asm - annotate

Return to submul_1.asm CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / arm
Annotation of OpenXM_contrib/gmp/mpn/arm/submul_1.asm, Revision 1.1.1.1

1.1       ohara       1: dnl  ARM mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
                      2: dnl  result from a second limb vector.
                      3: dnl  Based on mpn_addmul_1, which was contributed by Robert Harley.
                      4:
                      5: dnl  Copyright 1998, 2000, 2001 Free Software Foundation, Inc.
                      6:
                      7: dnl  This file is part of the GNU MP Library.
                      8:
                      9: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                     10: dnl  it under the terms of the GNU Lesser General Public License as published
                     11: dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
                     12: dnl  your option) any later version.
                     13:
                     14: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     15: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     16: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     17: dnl  License for more details.
                     18:
                     19: dnl  You should have received a copy of the GNU Lesser General Public License
                     20: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     21: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     22: dnl  MA 02111-1307, USA.
                     23:
                     24: include(`../config.m4')
                     25:
                     26: C This runs at 9.75 cycles/limb in the StrongARM.
                     27:
                     28: C Could use some register cleanup.  Some fewer registers might be needed, or
                     29: C r11 could be utilized for better speed.  Could avoid saving all registers for
                     30: C small (n <= 3) operands.
                     31:
                     32: define(`rp',`r0')
                     33: define(`up',`r1')
                     34: define(`n',`r2')
                     35: define(`v',`r3')
                     36:
                     37: ASM_START()
                     38: PROLOGUE(mpn_submul_1)
                     39:        stmfd   sp!, { r4-r10, lr }
                     40:        mov     r4, #0
                     41:        movs    n, n, lsr #1
                     42:        bcc     L(skip1)
                     43:        ldr     lr, [up], #4
                     44:        umull   r4, r12, v, lr
                     45:        ldr     r6, [rp]
                     46:        subs    r6, r6, r4
                     47:        sbc     r4, r0, r0
                     48:        sub     r4, r12, r4
                     49:        str     r6, [rp], #4
                     50: L(skip1):
                     51:        movs    n, n, lsr #1
                     52:        bcc     L(skip2)
                     53:        ldmia   up!, { r9, r10 }
                     54:        mov     r5, #0
                     55:        umlal   r4, r5, v, r9
                     56:        mov     r9, #0
                     57:        umlal   r5, r9, v, r10
                     58:        ldmia   rp, { r6, r7 }
                     59:        subs    r6, r6, r4
                     60:        sbcs    r7, r7, r5
                     61:        sbc     r4, r0, r0
                     62:        sub     r4, r9, r4
                     63:        stmia   rp!, { r6, r7 }
                     64: L(skip2):
                     65:        teq     n, #0
                     66:        beq     L(return)
                     67:
                     68: L(submul_loop):
                     69:        ldmia   up!, { r9, r10, r12, lr }
                     70:        mov     r5, #0
                     71:        umlal   r4, r5, v, r9
                     72:        mov     r9, #0
                     73:        umlal   r5, r9, v, r10
                     74:        mov     r10, #0
                     75:        umlal   r9, r10, v, r12
                     76:        mov     r12, #0
                     77:        umlal   r10, r12, v, lr
                     78:        ldmia   rp, { r6, r7, r8, lr }
                     79:        subs    r6, r6, r4
                     80:        sbcs    r7, r7, r5
                     81:        sbcs    r8, r8, r9
                     82:        sbcs    lr, lr, r10
                     83:        sbc     r4, r0, r0
                     84:        sub     r4, r12, r4
                     85:        subs    n, n, #1
                     86:        stmia   rp!, { r6, r7, r8, lr }
                     87:        bne     L(submul_loop)
                     88: L(return):
                     89:        mov     r0, r4
                     90:        ldmfd   sp!, { r4-r10, pc }
                     91: EPILOGUE(mpn_submul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>