[BACK]Return to sub_n.asm CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / alpha / ev5

Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/sub_n.asm, Revision 1.1.1.1

1.1       maekawa     1: dnl  Alpha EV5 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0
                      2: dnl  and store difference in a third limb vector.
                      3:
                      4: dnl  Copyright (C) 1995, 1999, 2000 Free Software Foundation, Inc.
                      5:
                      6: dnl  This file is part of the GNU MP Library.
                      7:
                      8: dnl  The GNU MP Library is free software; you can redistribute it and/or modify
                      9: dnl  it under the terms of the GNU Lesser General Public License as published by
                     10: dnl  the Free Software Foundation; either version 2.1 of the License, or (at your
                     11: dnl  option) any later version.
                     12:
                     13: dnl  The GNU MP Library is distributed in the hope that it will be useful, but
                     14: dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     16: dnl  License for more details.
                     17:
                     18: dnl  You should have received a copy of the GNU Lesser General Public License
                     19: dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: dnl  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: dnl  MA 02111-1307, USA.
                     22:
                     23: include(`../config.m4')
                     24:
                     25: dnl  INPUT PARAMETERS
                     26: dnl  res_ptr   r16
                     27: dnl  s1_ptr    r17
                     28: dnl  s2_ptr    r18
                     29: dnl  size      r19
                     30:
                     31: ASM_START()
                     32: PROLOGUE(mpn_sub_n)
                     33:        bis     r31,r31,r25             C clear cy
                     34:        subq    r19,4,r19               C decr loop cnt
                     35:        blt     r19,$Lend2              C if less than 4 limbs, goto 2nd loop
                     36: C Start software pipeline for 1st loop
                     37:        ldq     r0,0(r18)
                     38:        ldq     r4,0(r17)
                     39:        ldq     r1,8(r18)
                     40:        ldq     r5,8(r17)
                     41:        addq    r17,32,r17              C update s1_ptr
                     42:        ldq     r2,16(r18)
                     43:        subq    r4,r0,r20               C 1st main subtract
                     44:        ldq     r3,24(r18)
                     45:        subq    r19,4,r19               C decr loop cnt
                     46:        ldq     r6,-16(r17)
                     47:        cmpult  r4,r0,r25               C compute cy from last subtract
                     48:        ldq     r7,-8(r17)
                     49:        subq    r5,r1,r28               C 2nd main subtract
                     50:        addq    r18,32,r18              C update s2_ptr
                     51:        subq    r28,r25,r21             C 2nd carry subtract
                     52:        cmpult  r5,r1,r8                C compute cy from last subtract
                     53:        blt     r19,$Lend1              C if less than 4 limbs remain, jump
                     54: C 1st loop handles groups of 4 limbs in a software pipeline
                     55:        ALIGN(16)
                     56: $Loop: cmpult  r28,r25,r25             C compute cy from last subtract
                     57:        ldq     r0,0(r18)
                     58:        bis     r8,r25,r25              C combine cy from the two subtracts
                     59:        ldq     r1,8(r18)
                     60:        subq    r6,r2,r28               C 3rd main subtract
                     61:        ldq     r4,0(r17)
                     62:        subq    r28,r25,r22             C 3rd carry subtract
                     63:        ldq     r5,8(r17)
                     64:        cmpult  r6,r2,r8                C compute cy from last subtract
                     65:        cmpult  r28,r25,r25             C compute cy from last subtract
                     66:        stq     r20,0(r16)
                     67:        bis     r8,r25,r25              C combine cy from the two subtracts
                     68:        stq     r21,8(r16)
                     69:        subq    r7,r3,r28               C 4th main subtract
                     70:        subq    r28,r25,r23             C 4th carry subtract
                     71:        cmpult  r7,r3,r8                C compute cy from last subtract
                     72:        cmpult  r28,r25,r25             C compute cy from last subtract
                     73:                addq    r17,32,r17              C update s1_ptr
                     74:        bis     r8,r25,r25              C combine cy from the two subtracts
                     75:                addq    r16,32,r16              C update res_ptr
                     76:        subq    r4,r0,r28               C 1st main subtract
                     77:        ldq     r2,16(r18)
                     78:        subq    r28,r25,r20             C 1st carry subtract
                     79:        ldq     r3,24(r18)
                     80:        cmpult  r4,r0,r8                C compute cy from last subtract
                     81:        ldq     r6,-16(r17)
                     82:        cmpult  r28,r25,r25             C compute cy from last subtract
                     83:        ldq     r7,-8(r17)
                     84:        bis     r8,r25,r25              C combine cy from the two subtracts
                     85:        subq    r19,4,r19               C decr loop cnt
                     86:        stq     r22,-16(r16)
                     87:        subq    r5,r1,r28               C 2nd main subtract
                     88:        stq     r23,-8(r16)
                     89:        subq    r28,r25,r21             C 2nd carry subtract
                     90:                addq    r18,32,r18              C update s2_ptr
                     91:        cmpult  r5,r1,r8                C compute cy from last subtract
                     92:        bge     r19,$Loop
                     93: C Finish software pipeline for 1st loop
                     94: $Lend1:        cmpult  r28,r25,r25             C compute cy from last subtract
                     95:        bis     r8,r25,r25              C combine cy from the two subtracts
                     96:        subq    r6,r2,r28               C cy add
                     97:        subq    r28,r25,r22             C 3rd main subtract
                     98:        cmpult  r6,r2,r8                C compute cy from last subtract
                     99:        cmpult  r28,r25,r25             C compute cy from last subtract
                    100:        stq     r20,0(r16)
                    101:        bis     r8,r25,r25              C combine cy from the two subtracts
                    102:        stq     r21,8(r16)
                    103:        subq    r7,r3,r28               C cy add
                    104:        subq    r28,r25,r23             C 4th main subtract
                    105:        cmpult  r7,r3,r8                C compute cy from last subtract
                    106:        cmpult  r28,r25,r25             C compute cy from last subtract
                    107:        bis     r8,r25,r25              C combine cy from the two subtracts
                    108:        addq    r16,32,r16              C update res_ptr
                    109:        stq     r22,-16(r16)
                    110:        stq     r23,-8(r16)
                    111: $Lend2:        addq    r19,4,r19               C restore loop cnt
                    112:        beq     r19,$Lret
                    113: C Start software pipeline for 2nd loop
                    114:        ldq     r0,0(r18)
                    115:        ldq     r4,0(r17)
                    116:        subq    r19,1,r19
                    117:        beq     r19,$Lend0
                    118: C 2nd loop handles remaining 1-3 limbs
                    119:        ALIGN(16)
                    120: $Loop0:        subq    r4,r0,r28               C main subtract
                    121:        cmpult  r4,r0,r8                C compute cy from last subtract
                    122:        ldq     r0,8(r18)
                    123:        ldq     r4,8(r17)
                    124:        subq    r28,r25,r20             C carry subtract
                    125:        addq    r18,8,r18
                    126:        addq    r17,8,r17
                    127:        stq     r20,0(r16)
                    128:        cmpult  r28,r25,r25             C compute cy from last subtract
                    129:        subq    r19,1,r19               C decr loop cnt
                    130:        bis     r8,r25,r25              C combine cy from the two subtracts
                    131:        addq    r16,8,r16
                    132:        bne     r19,$Loop0
                    133: $Lend0:        subq    r4,r0,r28               C main subtract
                    134:        subq    r28,r25,r20             C carry subtract
                    135:        cmpult  r4,r0,r8                C compute cy from last subtract
                    136:        cmpult  r28,r25,r25             C compute cy from last subtract
                    137:        stq     r20,0(r16)
                    138:        bis     r8,r25,r25              C combine cy from the two subtracts
                    139:
                    140: $Lret: bis     r25,r31,r0              C return cy
                    141:        ret     r31,(r26),1
                    142: EPILOGUE(mpn_sub_n)
                    143: ASM_END()

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>