[BACK]Return to addmul_2.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / alpha / ev6 / nails

Annotation of OpenXM_contrib/gmp/mpn/alpha/ev6/nails/addmul_2.asm, Revision 1.1

1.1     ! ohara       1: dnl  Alpha ev6 nails mpn_addmul_2.
        !             2:
        !             3: dnl  Copyright 2002 Free Software Foundation, Inc.
        !             4: dnl
        !             5: dnl  This file is part of the GNU MP Library.
        !             6: dnl
        !             7: dnl  The GNU MP Library is free software; you can redistribute it and/or
        !             8: dnl  modify it under the terms of the GNU Lesser General Public License as
        !             9: dnl  published by the Free Software Foundation; either version 2.1 of the
        !            10: dnl  License, or (at your option) any later version.
        !            11: dnl
        !            12: dnl  The GNU MP Library is distributed in the hope that it will be useful,
        !            13: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            14: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            15: dnl  Lesser General Public License for more details.
        !            16: dnl
        !            17: dnl  You should have received a copy of the GNU Lesser General Public
        !            18: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
        !            19: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
        !            20: dnl  Suite 330, Boston, MA 02111-1307, USA.
        !            21:
        !            22: include(`../config.m4')
        !            23:
        !            24: dnl  INPUT PARAMETERS
        !            25: define(`rp',`r16')
        !            26: define(`up',`r17')
        !            27: define(`n',`r18')
        !            28: define(`vp',`r19')
        !            29:
        !            30: dnl  Useful register aliases
        !            31: define(`numb_mask',`r24')
        !            32: define(`ulimb',`r25')
        !            33: define(`rlimb',`r27')
        !            34:
        !            35: define(`m0a',`r0')
        !            36: define(`m0b',`r1')
        !            37: define(`m1a',`r2')
        !            38: define(`m1b',`r3')
        !            39:
        !            40: define(`acc0',`r4')
        !            41: define(`acc1',`r5')
        !            42:
        !            43: define(`v0',`r6')
        !            44: define(`v1',`r7')
        !            45:
        !            46: dnl Used for temps: r8 r19 r28
        !            47:
        !            48: define(`NAIL_BITS',`GMP_NAIL_BITS')
        !            49: define(`NUMB_BITS',`GMP_NUMB_BITS')
        !            50:
        !            51: dnl  This declaration is munged by configure
        !            52: NAILS_SUPPORT(3-63)
        !            53:
        !            54: dnl  Runs at 4.0 cycles/limb.  With unrolling, the ulimb load and the 3
        !            55: dnl  bookkeeping increments and the `bis' that copies from r21 to r5 could be
        !            56: dnl  removed and the instruction count reduced from 21 to to 16.  We could
        !            57: dnl  thereby reach about 2.3 cycles/limb.
        !            58:
        !            59: dnl If this is going to be a Karatsuba basecase building block, we need some
        !            60: dnl of the combinations below.  That way, we won't ever hit the
        !            61: dnl slower mpn_addmul_1 for any huge multiplication.
        !            62: dnl
        !            63: dnl    Alt 3           Alt 4           Alt 5           Alt 6
        !            64: dnl    addmul_2        addmul_2        addmul_3        addmul_3
        !            65: dnl    addmul_3        addmul_3        addmul_4        addmul_4
        !            66: dnl                    addmul_4        addmul_5        addmul_5
        !            67: dnl                                                    addmul_6
        !            68:
        !            69: dnl Register usage:
        !            70: dnl callee-saves:      r9 r10 r11 r12 r13 r14 r15
        !            71: dnl scratch: r0 r1 r2 r3 r4 r5 r6 r7 r8
        !            72: dnl         r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r27 r28
        !            73: dnl return address: 26
        !            74: dnl global pointer: 29
        !            75: dnl stack pointer: 30
        !            76:
        !            77: ASM_START()
        !            78: PROLOGUE(mpn_addmul_2)
        !            79:        lda     numb_mask,-1(r31)
        !            80:        srl     numb_mask,NAIL_BITS,numb_mask
        !            81:
        !            82:        ldq     v0,     0(vp)
        !            83:        ldq     v1,     8(vp)
        !            84:
        !            85:        bis     r31,    r31,    acc0            C       zero acc0
        !            86:        sll     v0,NAIL_BITS,   v0
        !            87:        bis     r31,    r31,    acc1            C       zero acc1
        !            88:        sll     v1,NAIL_BITS,   v1
        !            89:        bis     r31,    r31,    r19
        !            90:
        !            91: C MAIN LOOP
        !            92:        ldq     ulimb,  0(up)
        !            93:        lda     up,     8(up)
        !            94:        mulq    v0,     ulimb,  m0a             C U1
        !            95:        umulh   v0,     ulimb,  m0b             C U1
        !            96:        mulq    v1,     ulimb,  m1a             C U1
        !            97:        umulh   v1,     ulimb,  m1b             C U1
        !            98:        lda     n,      -1(n)
        !            99:        beq     n,      Lend                    C U0
        !           100:        ALIGN(16)
        !           101: Loop:
        !           102:        bis     r31,    r31,    r31             C       nop
        !           103:        ldq     rlimb,  0(rp)
        !           104:        ldq     ulimb,  0(up)
        !           105:        addq    r19,    acc0,   acc0            C       propagate nail
        !           106:
        !           107:        lda     rp,     8(rp)
        !           108:        srl     m0a,NAIL_BITS,r8                C U0
        !           109:        lda     up,     8(up)
        !           110:        mulq    v0,     ulimb,  m0a             C U1
        !           111:
        !           112:        addq    r8,     acc0,   r19
        !           113:        addq    m0b,    acc1,   acc0
        !           114:        umulh   v0,     ulimb,  m0b             C U1
        !           115:        bis     r31,    r31,    r31             C       nop
        !           116:
        !           117:        addq    rlimb,  r19,    r19
        !           118:        srl     m1a,NAIL_BITS,r8                C U0
        !           119:        bis     r31,    r31,    r31             C       nop
        !           120:        mulq    v1,     ulimb,  m1a             C U1
        !           121:
        !           122:        addq    r8,     acc0,   acc0
        !           123:        bis     r31,    m1b,    acc1
        !           124:        umulh   v1,     ulimb,  m1b             C U1
        !           125:        and     r19,numb_mask,  r28             C       extract numb part
        !           126:
        !           127:        lda     n,      -1(n)
        !           128:        srl     r19,NUMB_BITS,  r19             C       extract nail part
        !           129:        stq     r28,    -8(rp)
        !           130:        bne     n,      Loop                    C U0
        !           131: C END LOOP
        !           132: Lend:
        !           133:        ldq     rlimb,  0(rp)
        !           134:        addq    r19,    acc0,   acc0            C       propagate nail
        !           135:        lda     rp,     8(rp)
        !           136:        srl     m0a,NAIL_BITS,r8                C U0
        !           137:        addq    r8,     acc0,   r19
        !           138:        addq    m0b,    acc1,   acc0
        !           139:        addq    rlimb,  r19,    r19
        !           140:        srl     m1a,NAIL_BITS,r8                C U0
        !           141:        addq    r8,     acc0,   acc0
        !           142:        bis     r31,    m1b,    acc1
        !           143:        and     r19,numb_mask,  r28             C extract limb
        !           144:
        !           145:        srl     r19,NUMB_BITS,  r19             C extract nail
        !           146:        stq     r28,    -8(rp)
        !           147:
        !           148:        addq    r19,    acc0,   acc0            C propagate nail
        !           149:        and     acc0,numb_mask, r28
        !           150:        stq     r28,    0(rp)
        !           151:        srl     acc0,NUMB_BITS, r19
        !           152:        addq    r19,    acc1,   r0
        !           153:
        !           154:        ret     r31,    (r26),  1
        !           155: EPILOGUE(mpn_addmul_2)
        !           156: ASM_END()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>