[BACK]Return to addmul_1.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / arm

Annotation of OpenXM_contrib/gmp/mpn/arm/addmul_1.asm, Revision 1.1.1.1

1.1       ohara       1: dnl  ARM mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
                      2: dnl  to a second limb vector.
                      3: dnl  Contributed by Robert Harley.
                      4:
                      5: dnl  Copyright 1998, 2000, 2001 Free Software Foundation, Inc.
                      6:
                      7: dnl  This file is part of the GNU MP Library.
                      8:
                      9: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                     10: dnl  it under the terms of the GNU Lesser General Public License as published
                     11: dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
                     12: dnl  your option) any later version.
                     13:
                     14: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     15: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     16: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     17: dnl  License for more details.
                     18:
                     19: dnl  You should have received a copy of the GNU Lesser General Public License
                     20: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     21: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     22: dnl  MA 02111-1307, USA.
                     23:
                     24: include(`../config.m4')
                     25:
                     26: C This runs at 10 cycles/limb in the StrongARM.  Should make this look more
                     27: C like the submul code, since that uses fewer registers and runs slightly
                     28: C faster.
                     29:
                     30: define(`rp',`r0')
                     31: define(`up',`r1')
                     32: define(`n',`r2')
                     33: define(`v',`r3')
                     34:
                     35:
                     36: ASM_START()
                     37: PROLOGUE(mpn_addmul_1)
                     38:        stmfd   sp!, { r8-r11, lr }
                     39:        mov     r11, #0
                     40:        mov     ip, #0
                     41:        movs    n, n, lsr #1
                     42:        bcc     L(skip1)
                     43:        ldr     lr, [up], #4
                     44:        ldr     r9, [rp]
                     45:        umlal   r9, ip, v, lr
                     46:        str     r9, [rp], #4
                     47: L(skip1):
                     48:        movs    n, n, lsr #1
                     49:        bcc     L(skip2)
                     50:        ldmia   rp, { r9, r10 }
                     51:        adds    r8, ip, r9
                     52:        adc     r9, r11, #0
                     53:        ldmia   up!, { ip, lr }
                     54:        umlal   r8, r9, v, ip
                     55:        adds    r9, r9, r10
                     56:        adc     ip, r11, #0
                     57:        umlal   r9, ip, v, lr
                     58:        stmia   rp!, { r8, r9 }
                     59: L(skip2):
                     60:        teq     n, #0
                     61:        beq     L(return)
                     62:        stmfd   sp!, { r4-r7 }
                     63: L(addmul_loop):
                     64:        ldmia   rp, { r5, r6, r7, r8 }
                     65:        adds    r4, ip, r5
                     66:        adc     r5, r11, #0
                     67:        ldmia   up!, { r9, r10, ip, lr }
                     68:        umlal   r4, r5, v, r9
                     69:        adds    r5, r5, r6
                     70:        adc     r6, r11, #0
                     71:        umlal   r5, r6, v, r10
                     72:        adds    r6, r6, r7
                     73:        adc     r7, r11, #0
                     74:        umlal   r6, r7, v, ip
                     75:        adds    r7, r7, r8
                     76:        adc     ip, r11, #0
                     77:        umlal   r7, ip, v, lr
                     78:        subs    n, n, #1
                     79:        stmia   rp!, { r4, r5, r6, r7 }
                     80:        bne     L(addmul_loop)
                     81:        ldmfd   sp!, { r4-r7 }
                     82: L(return):
                     83:        mov     r0, ip
                     84:        ldmfd   sp!, { r8-r11, pc }
                     85: EPILOGUE(mpn_addmul_1)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>