Annotation of OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/alpha/ev5/add_n.s, Revision 1.1.1.1
1.1 takayama 1: # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
2: # store sum in a third limb vector.
3:
4: # Copyright (C) 1995 Free Software Foundation, Inc.
5:
6: # This file is part of the GNU MP Library.
7:
8: # The GNU MP Library is free software; you can redistribute it and/or modify
9: # it under the terms of the GNU Library General Public License as published by
10: # the Free Software Foundation; either version 2 of the License, or (at your
11: # option) any later version.
12:
13: # The GNU MP Library is distributed in the hope that it will be useful, but
14: # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
16: # License for more details.
17:
18: # You should have received a copy of the GNU Library General Public License
19: # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: # MA 02111-1307, USA.
22:
23:
24: # INPUT PARAMETERS
25: # res_ptr $16
26: # s1_ptr $17
27: # s2_ptr $18
28: # size $19
29:
30: .set noreorder
31: .set noat
32: .text
33: .align 3
34: .globl __mpn_add_n
35: .ent __mpn_add_n
36: __mpn_add_n:
37: .frame $30,0,$26,0
38:
39: or $31,$31,$25 # clear cy
40: subq $19,4,$19 # decr loop cnt
41: blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
42: # Start software pipeline for 1st loop
43: ldq $0,0($18)
44: ldq $1,8($18)
45: ldq $4,0($17)
46: ldq $5,8($17)
47: addq $17,32,$17 # update s1_ptr
48: ldq $2,16($18)
49: addq $0,$4,$20 # 1st main add
50: ldq $3,24($18)
51: subq $19,4,$19 # decr loop cnt
52: ldq $6,-16($17)
53: cmpult $20,$0,$25 # compute cy from last add
54: ldq $7,-8($17)
55: addq $1,$25,$28 # cy add
56: addq $18,32,$18 # update s2_ptr
57: addq $5,$28,$21 # 2nd main add
58: cmpult $28,$25,$8 # compute cy from last add
59: blt $19,.Lend1 # if less than 4 limbs remain, jump
60: # 1st loop handles groups of 4 limbs in a software pipeline
61: .align 4
62: .Loop: cmpult $21,$28,$25 # compute cy from last add
63: ldq $0,0($18)
64: or $8,$25,$25 # combine cy from the two adds
65: ldq $1,8($18)
66: addq $2,$25,$28 # cy add
67: ldq $4,0($17)
68: addq $28,$6,$22 # 3rd main add
69: ldq $5,8($17)
70: cmpult $28,$25,$8 # compute cy from last add
71: cmpult $22,$28,$25 # compute cy from last add
72: stq $20,0($16)
73: or $8,$25,$25 # combine cy from the two adds
74: stq $21,8($16)
75: addq $3,$25,$28 # cy add
76: addq $28,$7,$23 # 4th main add
77: cmpult $28,$25,$8 # compute cy from last add
78: cmpult $23,$28,$25 # compute cy from last add
79: addq $17,32,$17 # update s1_ptr
80: or $8,$25,$25 # combine cy from the two adds
81: addq $16,32,$16 # update res_ptr
82: addq $0,$25,$28 # cy add
83: ldq $2,16($18)
84: addq $4,$28,$20 # 1st main add
85: ldq $3,24($18)
86: cmpult $28,$25,$8 # compute cy from last add
87: ldq $6,-16($17)
88: cmpult $20,$28,$25 # compute cy from last add
89: ldq $7,-8($17)
90: or $8,$25,$25 # combine cy from the two adds
91: subq $19,4,$19 # decr loop cnt
92: stq $22,-16($16)
93: addq $1,$25,$28 # cy add
94: stq $23,-8($16)
95: addq $5,$28,$21 # 2nd main add
96: addq $18,32,$18 # update s2_ptr
97: cmpult $28,$25,$8 # compute cy from last add
98: bge $19,.Loop
99: # Finish software pipeline for 1st loop
100: .Lend1: cmpult $21,$28,$25 # compute cy from last add
101: or $8,$25,$25 # combine cy from the two adds
102: addq $2,$25,$28 # cy add
103: addq $28,$6,$22 # 3rd main add
104: cmpult $28,$25,$8 # compute cy from last add
105: cmpult $22,$28,$25 # compute cy from last add
106: stq $20,0($16)
107: or $8,$25,$25 # combine cy from the two adds
108: stq $21,8($16)
109: addq $3,$25,$28 # cy add
110: addq $28,$7,$23 # 4th main add
111: cmpult $28,$25,$8 # compute cy from last add
112: cmpult $23,$28,$25 # compute cy from last add
113: or $8,$25,$25 # combine cy from the two adds
114: addq $16,32,$16 # update res_ptr
115: stq $22,-16($16)
116: stq $23,-8($16)
117: .Lend2: addq $19,4,$19 # restore loop cnt
118: beq $19,.Lret
119: # Start software pipeline for 2nd loop
120: ldq $0,0($18)
121: ldq $4,0($17)
122: subq $19,1,$19
123: beq $19,.Lend0
124: # 2nd loop handles remaining 1-3 limbs
125: .align 4
126: .Loop0: addq $0,$25,$28 # cy add
127: ldq $0,8($18)
128: addq $4,$28,$20 # main add
129: ldq $4,8($17)
130: addq $18,8,$18
131: cmpult $28,$25,$8 # compute cy from last add
132: addq $17,8,$17
133: stq $20,0($16)
134: cmpult $20,$28,$25 # compute cy from last add
135: subq $19,1,$19 # decr loop cnt
136: or $8,$25,$25 # combine cy from the two adds
137: addq $16,8,$16
138: bne $19,.Loop0
139: .Lend0: addq $0,$25,$28 # cy add
140: addq $4,$28,$20 # main add
141: cmpult $28,$25,$8 # compute cy from last add
142: cmpult $20,$28,$25 # compute cy from last add
143: stq $20,0($16)
144: or $8,$25,$25 # combine cy from the two adds
145:
146: .Lret: or $25,$31,$0 # return cy
147: ret $31,($26),1
148: .end __mpn_add_n
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>