Annotation of OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/sparc32/add_n.S, Revision 1.1.1.1
1.1 takayama 1: ! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2: ! sum in a third limb vector.
3:
4: ! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
5:
6: ! This file is part of the GNU MP Library.
7:
8: ! The GNU MP Library is free software; you can redistribute it and/or modify
9: ! it under the terms of the GNU Library General Public License as published by
10: ! the Free Software Foundation; either version 2 of the License, or (at your
11: ! option) any later version.
12:
13: ! The GNU MP Library is distributed in the hope that it will be useful, but
14: ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
16: ! License for more details.
17:
18: ! You should have received a copy of the GNU Library General Public License
19: ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: ! MA 02111-1307, USA.
22:
23:
24: ! INPUT PARAMETERS
25: #define res_ptr %o0
26: #define s1_ptr %o1
27: #define s2_ptr %o2
28: #define size %o3
29:
30: #include "sysdep.h"
31:
32: .text
33: .align 4
34: .global C_SYMBOL_NAME(__mpn_add_n)
35: C_SYMBOL_NAME(__mpn_add_n):
36: xor s2_ptr,res_ptr,%g1
37: andcc %g1,4,%g0
38: bne L1 ! branch if alignment differs
39: nop
40: ! ** V1a **
41: L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
42: be L_v1 ! if no, branch
43: nop
44: /* Add least significant limb separately to align res_ptr and s2_ptr */
45: ld [s1_ptr],%g4
46: add s1_ptr,4,s1_ptr
47: ld [s2_ptr],%g2
48: add s2_ptr,4,s2_ptr
49: add size,-1,size
50: addcc %g4,%g2,%o4
51: st %o4,[res_ptr]
52: add res_ptr,4,res_ptr
53: L_v1: addx %g0,%g0,%o4 ! save cy in register
54: cmp size,2 ! if size < 2 ...
55: bl Lend2 ! ... branch to tail code
56: subcc %g0,%o4,%g0 ! restore cy
57:
58: ld [s1_ptr+0],%g4
59: addcc size,-10,size
60: ld [s1_ptr+4],%g1
61: ldd [s2_ptr+0],%g2
62: blt Lfin1
63: subcc %g0,%o4,%g0 ! restore cy
64: /* Add blocks of 8 limbs until less than 8 limbs remain */
65: Loop1: addxcc %g4,%g2,%o4
66: ld [s1_ptr+8],%g4
67: addxcc %g1,%g3,%o5
68: ld [s1_ptr+12],%g1
69: ldd [s2_ptr+8],%g2
70: std %o4,[res_ptr+0]
71: addxcc %g4,%g2,%o4
72: ld [s1_ptr+16],%g4
73: addxcc %g1,%g3,%o5
74: ld [s1_ptr+20],%g1
75: ldd [s2_ptr+16],%g2
76: std %o4,[res_ptr+8]
77: addxcc %g4,%g2,%o4
78: ld [s1_ptr+24],%g4
79: addxcc %g1,%g3,%o5
80: ld [s1_ptr+28],%g1
81: ldd [s2_ptr+24],%g2
82: std %o4,[res_ptr+16]
83: addxcc %g4,%g2,%o4
84: ld [s1_ptr+32],%g4
85: addxcc %g1,%g3,%o5
86: ld [s1_ptr+36],%g1
87: ldd [s2_ptr+32],%g2
88: std %o4,[res_ptr+24]
89: addx %g0,%g0,%o4 ! save cy in register
90: addcc size,-8,size
91: add s1_ptr,32,s1_ptr
92: add s2_ptr,32,s2_ptr
93: add res_ptr,32,res_ptr
94: bge Loop1
95: subcc %g0,%o4,%g0 ! restore cy
96:
97: Lfin1: addcc size,8-2,size
98: blt Lend1
99: subcc %g0,%o4,%g0 ! restore cy
100: /* Add blocks of 2 limbs until less than 2 limbs remain */
101: Loope1: addxcc %g4,%g2,%o4
102: ld [s1_ptr+8],%g4
103: addxcc %g1,%g3,%o5
104: ld [s1_ptr+12],%g1
105: ldd [s2_ptr+8],%g2
106: std %o4,[res_ptr+0]
107: addx %g0,%g0,%o4 ! save cy in register
108: addcc size,-2,size
109: add s1_ptr,8,s1_ptr
110: add s2_ptr,8,s2_ptr
111: add res_ptr,8,res_ptr
112: bge Loope1
113: subcc %g0,%o4,%g0 ! restore cy
114: Lend1: addxcc %g4,%g2,%o4
115: addxcc %g1,%g3,%o5
116: std %o4,[res_ptr+0]
117: addx %g0,%g0,%o4 ! save cy in register
118:
119: andcc size,1,%g0
120: be Lret1
121: subcc %g0,%o4,%g0 ! restore cy
122: /* Add last limb */
123: ld [s1_ptr+8],%g4
124: ld [s2_ptr+8],%g2
125: addxcc %g4,%g2,%o4
126: st %o4,[res_ptr+8]
127:
128: Lret1: retl
129: addx %g0,%g0,%o0 ! return carry-out from most sign. limb
130:
131: L1: xor s1_ptr,res_ptr,%g1
132: andcc %g1,4,%g0
133: bne L2
134: nop
135: ! ** V1b **
136: mov s2_ptr,%g1
137: mov s1_ptr,s2_ptr
138: b L0
139: mov %g1,s1_ptr
140:
141: ! ** V2 **
142: /* If we come here, the alignment of s1_ptr and res_ptr as well as the
143: alignment of s2_ptr and res_ptr differ. Since there are only two ways
144: things can be aligned (that we care about) we now know that the alignment
145: of s1_ptr and s2_ptr are the same. */
146:
147: L2: cmp size,1
148: be Ljone
149: nop
150: andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
151: be L_v2 ! if no, branch
152: nop
153: /* Add least significant limb separately to align s1_ptr and s2_ptr */
154: ld [s1_ptr],%g4
155: add s1_ptr,4,s1_ptr
156: ld [s2_ptr],%g2
157: add s2_ptr,4,s2_ptr
158: add size,-1,size
159: addcc %g4,%g2,%o4
160: st %o4,[res_ptr]
161: add res_ptr,4,res_ptr
162:
163: L_v2: addx %g0,%g0,%o4 ! save cy in register
164: addcc size,-8,size
165: blt Lfin2
166: subcc %g0,%o4,%g0 ! restore cy
167: /* Add blocks of 8 limbs until less than 8 limbs remain */
168: Loop2: ldd [s1_ptr+0],%g2
169: ldd [s2_ptr+0],%o4
170: addxcc %g2,%o4,%g2
171: st %g2,[res_ptr+0]
172: addxcc %g3,%o5,%g3
173: st %g3,[res_ptr+4]
174: ldd [s1_ptr+8],%g2
175: ldd [s2_ptr+8],%o4
176: addxcc %g2,%o4,%g2
177: st %g2,[res_ptr+8]
178: addxcc %g3,%o5,%g3
179: st %g3,[res_ptr+12]
180: ldd [s1_ptr+16],%g2
181: ldd [s2_ptr+16],%o4
182: addxcc %g2,%o4,%g2
183: st %g2,[res_ptr+16]
184: addxcc %g3,%o5,%g3
185: st %g3,[res_ptr+20]
186: ldd [s1_ptr+24],%g2
187: ldd [s2_ptr+24],%o4
188: addxcc %g2,%o4,%g2
189: st %g2,[res_ptr+24]
190: addxcc %g3,%o5,%g3
191: st %g3,[res_ptr+28]
192: addx %g0,%g0,%o4 ! save cy in register
193: addcc size,-8,size
194: add s1_ptr,32,s1_ptr
195: add s2_ptr,32,s2_ptr
196: add res_ptr,32,res_ptr
197: bge Loop2
198: subcc %g0,%o4,%g0 ! restore cy
199:
200: Lfin2: addcc size,8-2,size
201: blt Lend2
202: subcc %g0,%o4,%g0 ! restore cy
203: Loope2: ldd [s1_ptr+0],%g2
204: ldd [s2_ptr+0],%o4
205: addxcc %g2,%o4,%g2
206: st %g2,[res_ptr+0]
207: addxcc %g3,%o5,%g3
208: st %g3,[res_ptr+4]
209: addx %g0,%g0,%o4 ! save cy in register
210: addcc size,-2,size
211: add s1_ptr,8,s1_ptr
212: add s2_ptr,8,s2_ptr
213: add res_ptr,8,res_ptr
214: bge Loope2
215: subcc %g0,%o4,%g0 ! restore cy
216: Lend2: andcc size,1,%g0
217: be Lret2
218: subcc %g0,%o4,%g0 ! restore cy
219: /* Add last limb */
220: Ljone: ld [s1_ptr],%g4
221: ld [s2_ptr],%g2
222: addxcc %g4,%g2,%o4
223: st %o4,[res_ptr]
224:
225: Lret2: retl
226: addx %g0,%g0,%o0 ! return carry-out from most sign. limb
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>