[BACK]Return to addmul_3.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / alpha / ev6 / nails

Annotation of OpenXM_contrib/gmp/mpn/alpha/ev6/nails/addmul_3.asm, Revision 1.1

1.1     ! ohara       1: dnl  Alpha ev6 nails mpn_addmul_3.
        !             2:
        !             3: dnl  Copyright 2002 Free Software Foundation, Inc.
        !             4: dnl
        !             5: dnl  This file is part of the GNU MP Library.
        !             6: dnl
        !             7: dnl  The GNU MP Library is free software; you can redistribute it and/or
        !             8: dnl  modify it under the terms of the GNU Lesser General Public License as
        !             9: dnl  published by the Free Software Foundation; either version 2.1 of the
        !            10: dnl  License, or (at your option) any later version.
        !            11: dnl
        !            12: dnl  The GNU MP Library is distributed in the hope that it will be useful,
        !            13: dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            14: dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
        !            15: dnl  Lesser General Public License for more details.
        !            16: dnl
        !            17: dnl  You should have received a copy of the GNU Lesser General Public
        !            18: dnl  License along with the GNU MP Library; see the file COPYING.LIB.  If
        !            19: dnl  not, write to the Free Software Foundation, Inc., 59 Temple Place -
        !            20: dnl  Suite 330, Boston, MA 02111-1307, USA.
        !            21:
        !            22: include(`../config.m4')
        !            23:
        !            24: dnl  INPUT PARAMETERS
        !            25: define(`rp',`r16')
        !            26: define(`up',`r17')
        !            27: define(`n',`r18')
        !            28: define(`vp',`r19')
        !            29:
        !            30: dnl  Useful register aliases
        !            31: define(`numb_mask',`r24')
        !            32: define(`ulimb',`r25')
        !            33: define(`rlimb',`r27')
        !            34:
        !            35: define(`m0a',`r0')
        !            36: define(`m0b',`r1')
        !            37: define(`m1a',`r2')
        !            38: define(`m1b',`r3')
        !            39: define(`m2a',`r20')
        !            40: define(`m2b',`r21')
        !            41:
        !            42: define(`acc0',`r4')
        !            43: define(`acc1',`r5')
        !            44: define(`acc2',`r22')
        !            45:
        !            46: define(`v0',`r6')
        !            47: define(`v1',`r7')
        !            48: define(`v2',`r23')
        !            49:
        !            50: dnl Used for temps: r8 r19 r28
        !            51:
        !            52: define(`NAIL_BITS',`GMP_NAIL_BITS')
        !            53: define(`NUMB_BITS',`GMP_NUMB_BITS')
        !            54:
        !            55: dnl  This declaration is munged by configure
        !            56: NAILS_SUPPORT(3-63)
        !            57:
        !            58: dnl  Runs at 3.0 cycles/limb.  With unrolling, the ulimb load and the 3
        !            59: dnl  bookkeeping increments and the `bis' that copies from r22 to r6 could be
        !            60: dnl  removed and the instruction count reduced from 26 to to 21.  We could
        !            61: dnl  thereby probably reach 2 cycles/limb, the IMUL bandwidth.
        !            62:
        !            63: dnl If this is going to be a Karatsuba basecase building block, we need some
        !            64: dnl of the combinations below.  That way, we won't ever hit the
        !            65: dnl slower mpn_addmul_1 for any huge multiplication.
        !            66: dnl
        !            67: dnl    Alt 3           Alt 4           Alt 5           Alt 6
        !            68: dnl    addmul_2        addmul_2        addmul_3        addmul_3
        !            69: dnl    addmul_3        addmul_3        addmul_4        addmul_4
        !            70: dnl                    addmul_4        addmul_5        addmul_5
        !            71: dnl                                                    addmul_6
        !            72:
        !            73: dnl Register usage:
        !            74: dnl callee-saves:      r9 r10 r11 r12 r13 r14 r15
        !            75: dnl scratch: r0 r1 r2 r3 r4 r5 r6 r7 r8
        !            76: dnl         r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r27 r28
        !            77: dnl return address: 26
        !            78: dnl global pointer: 29
        !            79: dnl stack pointer: 30
        !            80:
        !            81: ASM_START()
        !            82: PROLOGUE(mpn_addmul_3)
        !            83:        lda     numb_mask,-1(r31)
        !            84:        srl     numb_mask,NAIL_BITS,numb_mask
        !            85:
        !            86:        ldq     v0,     0(vp)
        !            87:        ldq     v1,     8(vp)
        !            88:        ldq     v2,     16(vp)
        !            89:
        !            90:        bis     r31,    r31,    acc0            C       zero acc0
        !            91:        sll     v0,NAIL_BITS,   v0
        !            92:        bis     r31,    r31,    acc1            C       zero acc1
        !            93:        sll     v1,NAIL_BITS,   v1
        !            94:        bis     r31,    r31,    acc2            C       zero acc2
        !            95:        sll     v2,NAIL_BITS,   v2
        !            96:        bis     r31,    r31,    r19
        !            97:
        !            98: C MAIN LOOP
        !            99:        ldq     ulimb,  0(up)
        !           100:        lda     up,     8(up)
        !           101:        mulq    v0,     ulimb,  m0a             C U1
        !           102:        umulh   v0,     ulimb,  m0b             C U1
        !           103:        mulq    v1,     ulimb,  m1a             C U1
        !           104:        umulh   v1,     ulimb,  m1b             C U1
        !           105:        lda     n,      -1(n)
        !           106:        mulq    v2,     ulimb,  m2a             C U1
        !           107:        umulh   v2,     ulimb,  m2b             C U1
        !           108:        beq     n,      Lend                    C U0
        !           109:        ALIGN(16)
        !           110: Loop:
        !           111:        bis     r31,    r31,    r31             C       nop
        !           112:        ldq     rlimb,  0(rp)
        !           113:        ldq     ulimb,  0(up)
        !           114:        addq    r19,    acc0,   acc0            C       propagate nail
        !           115:
        !           116:        lda     rp,     8(rp)
        !           117:        srl     m0a,NAIL_BITS,  r8              C U0
        !           118:        lda     up,     8(up)
        !           119:        mulq    v0,     ulimb,  m0a             C U1
        !           120:
        !           121:        addq    r8,     acc0,   r19
        !           122:        addq    m0b,    acc1,   acc0
        !           123:        umulh   v0,     ulimb,  m0b             C U1
        !           124:        bis     r31,    r31,    r31             C       nop
        !           125:
        !           126:        addq    rlimb,  r19,    r19
        !           127:        srl     m1a,NAIL_BITS,  r8              C U0
        !           128:        bis     r31,    r31,    r31             C       nop
        !           129:        mulq    v1,     ulimb,  m1a             C U1
        !           130:
        !           131:        addq    r8,     acc0,   acc0
        !           132:        addq    m1b,    acc2,   acc1
        !           133:        umulh   v1,     ulimb,  m1b             C U1
        !           134:        and     r19,numb_mask,  r28             C       extract numb part
        !           135:
        !           136:        bis     r31,    r31,    r31             C       nop
        !           137:        srl     m2a,NAIL_BITS,  r8              C U0
        !           138:        lda     n,      -1(n)
        !           139:        mulq    v2,     ulimb,  m2a             C U1
        !           140:
        !           141:        addq    r8,     acc1,   acc1
        !           142:        bis     r31,    m2b,    acc2
        !           143:        umulh   v2,     ulimb,  m2b             C U1
        !           144:        srl     r19,NUMB_BITS,  r19             C       extract nail part
        !           145:
        !           146:        bis     r31,    r31,    r31             C       nop
        !           147:        stq     r28,    -8(rp)
        !           148:
        !           149:        bne     n,      Loop                    C U0
        !           150: C END LOOP
        !           151: Lend:
        !           152:        ldq     rlimb,  0(rp)
        !           153:        addq    r19,    acc0,   acc0            C       propagate nail
        !           154:        lda     rp,     8(rp)
        !           155:        srl     m0a,NAIL_BITS,  r8              C U0
        !           156:        addq    r8,     acc0,   r19
        !           157:        addq    m0b,    acc1,   acc0
        !           158:        addq    rlimb,  r19,    r19
        !           159:        srl     m1a,NAIL_BITS,  r8              C U0
        !           160:        addq    r8,     acc0,   acc0
        !           161:        addq    m1b,    acc2,   acc1
        !           162:        and     r19,numb_mask,  r28             C extract limb
        !           163:        srl     m2a,NAIL_BITS,  r8              C U0
        !           164:        addq    r8,     acc1,   acc1
        !           165:        bis     r31,    m2b,    acc2
        !           166:        srl     r19,NUMB_BITS,  r19             C extract nail
        !           167:        stq     r28,    -8(rp)
        !           168:
        !           169:        addq    r19,    acc0,   acc0            C propagate nail
        !           170:        and     acc0,numb_mask, r28
        !           171:        stq     r28,    0(rp)
        !           172:        srl     acc0,NUMB_BITS, r19
        !           173:        addq    r19,    acc1,   acc1
        !           174:
        !           175:        and     acc1,numb_mask, r28
        !           176:        stq     r28,    8(rp)
        !           177:        srl     acc1,NUMB_BITS, r19
        !           178:        addq    r19,    acc2,   m0a
        !           179:
        !           180:        ret     r31,    (r26),  1
        !           181: EPILOGUE(mpn_addmul_3)
        !           182: ASM_END()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>