[BACK]Return to add_n.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / alpha / ev5

Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/add_n.asm, Revision 1.1.1.1

1.1       maekawa     1: dnl  Alpha EV5 __gmpn_add_n -- Add two limb vectors of the same length > 0 and
                      2: dnl  store sum in a third limb vector.
                      3:
                      4: dnl  Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
                      5:
                      6: dnl  This file is part of the GNU MP Library.
                      7:
                      8: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                      9: dnl  it under the terms of the GNU Lesser General Public License as published by
                     10: dnl  the Free Software Foundation; either version 2.1 of the License, or (at your
                     11: dnl  option) any later version.
                     12:
                     13: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     14: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     16: dnl  License for more details.
                     17:
                     18: dnl  You should have received a copy of the GNU Lesser General Public License
                     19: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: dnl  MA 02111-1307, USA.
                     22:
                     23: include(`../config.m4')
                     24:
                     25: dnl  INPUT PARAMETERS
                     26: dnl  res_ptr   r16
                     27: dnl  s1_ptr    r17
                     28: dnl  s2_ptr    r18
                     29: dnl  size      r19
                     30:
                     31: ASM_START()
                     32: PROLOGUE(mpn_add_n)
                     33:        bis     r31,r31,r25             C clear cy
                     34:        subq    r19,4,r19               C decr loop cnt
                     35:        blt     r19,$Lend2              C if less than 4 limbs, goto 2nd loop
                     36: C Start software pipeline for 1st loop
                     37:        ldq     r0,0(r18)
                     38:        ldq     r4,0(r17)
                     39:        ldq     r1,8(r18)
                     40:        ldq     r5,8(r17)
                     41:        addq    r17,32,r17              C update s1_ptr
                     42:        ldq     r2,16(r18)
                     43:        addq    r0,r4,r20               C 1st main add
                     44:        ldq     r3,24(r18)
                     45:        subq    r19,4,r19               C decr loop cnt
                     46:        ldq     r6,-16(r17)
                     47:        cmpult  r20,r0,r25              C compute cy from last add
                     48:        ldq     r7,-8(r17)
                     49:        addq    r1,r5,r28               C 2nd main add
                     50:        addq    r18,32,r18              C update s2_ptr
                     51:        addq    r28,r25,r21             C 2nd carry add
                     52:        cmpult  r28,r5,r8               C compute cy from last add
                     53:        blt     r19,$Lend1              C if less than 4 limbs remain, jump
                     54: C 1st loop handles groups of 4 limbs in a software pipeline
                     55:        ALIGN(16)
                     56: $Loop: cmpult  r21,r28,r25             C compute cy from last add
                     57:        ldq     r0,0(r18)
                     58:        bis     r8,r25,r25              C combine cy from the two adds
                     59:        ldq     r1,8(r18)
                     60:        addq    r2,r6,r28               C 3rd main add
                     61:        ldq     r4,0(r17)
                     62:        addq    r28,r25,r22             C 3rd carry add
                     63:        ldq     r5,8(r17)
                     64:        cmpult  r28,r6,r8               C compute cy from last add
                     65:        cmpult  r22,r28,r25             C compute cy from last add
                     66:        stq     r20,0(r16)
                     67:        bis     r8,r25,r25              C combine cy from the two adds
                     68:        stq     r21,8(r16)
                     69:        addq    r3,r7,r28               C 4th main add
                     70:        addq    r28,r25,r23             C 4th carry add
                     71:        cmpult  r28,r7,r8               C compute cy from last add
                     72:        cmpult  r23,r28,r25             C compute cy from last add
                     73:                addq    r17,32,r17              C update s1_ptr
                     74:        bis     r8,r25,r25              C combine cy from the two adds
                     75:                addq    r16,32,r16              C update res_ptr
                     76:        addq    r0,r4,r28               C 1st main add
                     77:        ldq     r2,16(r18)
                     78:        addq    r25,r28,r20             C 1st carry add
                     79:        ldq     r3,24(r18)
                     80:        cmpult  r28,r4,r8               C compute cy from last add
                     81:        ldq     r6,-16(r17)
                     82:        cmpult  r20,r28,r25             C compute cy from last add
                     83:        ldq     r7,-8(r17)
                     84:        bis     r8,r25,r25              C combine cy from the two adds
                     85:        subq    r19,4,r19               C decr loop cnt
                     86:        stq     r22,-16(r16)
                     87:        addq    r1,r5,r28               C 2nd main add
                     88:        stq     r23,-8(r16)
                     89:        addq    r25,r28,r21             C 2nd carry add
                     90:                addq    r18,32,r18              C update s2_ptr
                     91:        cmpult  r28,r5,r8               C compute cy from last add
                     92:        bge     r19,$Loop
                     93: C Finish software pipeline for 1st loop
                     94: $Lend1:        cmpult  r21,r28,r25             C compute cy from last add
                     95:        bis     r8,r25,r25              C combine cy from the two adds
                     96:        addq    r2,r6,r28               C 3rd main add
                     97:        addq    r28,r25,r22             C 3rd carry add
                     98:        cmpult  r28,r6,r8               C compute cy from last add
                     99:        cmpult  r22,r28,r25             C compute cy from last add
                    100:        stq     r20,0(r16)
                    101:        bis     r8,r25,r25              C combine cy from the two adds
                    102:        stq     r21,8(r16)
                    103:        addq    r3,r7,r28               C 4th main add
                    104:        addq    r28,r25,r23             C 4th carry add
                    105:        cmpult  r28,r7,r8               C compute cy from last add
                    106:        cmpult  r23,r28,r25             C compute cy from last add
                    107:        bis     r8,r25,r25              C combine cy from the two adds
                    108:        addq    r16,32,r16              C update res_ptr
                    109:        stq     r22,-16(r16)
                    110:        stq     r23,-8(r16)
                    111: $Lend2:        addq    r19,4,r19               C restore loop cnt
                    112:        beq     r19,$Lret
                    113: C Start software pipeline for 2nd loop
                    114:        ldq     r0,0(r18)
                    115:        ldq     r4,0(r17)
                    116:        subq    r19,1,r19
                    117:        beq     r19,$Lend0
                    118: C 2nd loop handles remaining 1-3 limbs
                    119:        ALIGN(16)
                    120: $Loop0:        addq    r0,r4,r28               C main add
                    121:        ldq     r0,8(r18)
                    122:        cmpult  r28,r4,r8               C compute cy from last add
                    123:        ldq     r4,8(r17)
                    124:        addq    r28,r25,r20             C carry add
                    125:        addq    r18,8,r18
                    126:        addq    r17,8,r17
                    127:        stq     r20,0(r16)
                    128:        cmpult  r20,r28,r25             C compute cy from last add
                    129:        subq    r19,1,r19               C decr loop cnt
                    130:        bis     r8,r25,r25              C combine cy from the two adds
                    131:        addq    r16,8,r16
                    132:        bne     r19,$Loop0
                    133: $Lend0:        addq    r0,r4,r28               C main add
                    134:        addq    r28,r25,r20             C carry add
                    135:        cmpult  r28,r4,r8               C compute cy from last add
                    136:        cmpult  r20,r28,r25             C compute cy from last add
                    137:        stq     r20,0(r16)
                    138:        bis     r8,r25,r25              C combine cy from the two adds
                    139:
                    140: $Lret: bis     r25,r31,r0              C return cy
                    141:        ret     r31,(r26),1
                    142: EPILOGUE(mpn_add_n)
                    143: ASM_END()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>