Annotation of OpenXM_contrib/gmp/mpn/sparc32/sub_n.S, Revision 1.1.1.1
1.1 maekawa 1: ! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2: ! store difference in a third limb vector.
3:
4: ! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
5:
6: ! This file is part of the GNU MP Library.
7:
8: ! The GNU MP Library is free software; you can redistribute it and/or modify
9: ! it under the terms of the GNU Library General Public License as published by
10: ! the Free Software Foundation; either version 2 of the License, or (at your
11: ! option) any later version.
12:
13: ! The GNU MP Library is distributed in the hope that it will be useful, but
14: ! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: ! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
16: ! License for more details.
17:
18: ! You should have received a copy of the GNU Library General Public License
19: ! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: ! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: ! MA 02111-1307, USA.
22:
23:
24: ! INPUT PARAMETERS
25: #define res_ptr %o0
26: #define s1_ptr %o1
27: #define s2_ptr %o2
28: #define size %o3
29:
30: #include "sysdep.h"
31:
32: .text
33: .align 4
34: .global C_SYMBOL_NAME(__mpn_sub_n)
35: C_SYMBOL_NAME(__mpn_sub_n):
36: xor s2_ptr,res_ptr,%g1
37: andcc %g1,4,%g0
38: bne L1 ! branch if alignment differs
39: nop
40: ! ** V1a **
41: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
42: be L_v1 ! if no, branch
43: nop
44: /* Add least significant limb separately to align res_ptr and s2_ptr */
45: ld [s1_ptr],%g4
46: add s1_ptr,4,s1_ptr
47: ld [s2_ptr],%g2
48: add s2_ptr,4,s2_ptr
49: add size,-1,size
50: subcc %g4,%g2,%o4
51: st %o4,[res_ptr]
52: add res_ptr,4,res_ptr
53: L_v1: addx %g0,%g0,%o4 ! save cy in register
54: cmp size,2 ! if size < 2 ...
55: bl Lend2 ! ... branch to tail code
56: subcc %g0,%o4,%g0 ! restore cy
57:
58: ld [s1_ptr+0],%g4
59: addcc size,-10,size
60: ld [s1_ptr+4],%g1
61: ldd [s2_ptr+0],%g2
62: blt Lfin1
63: subcc %g0,%o4,%g0 ! restore cy
64: /* Add blocks of 8 limbs until less than 8 limbs remain */
65: Loop1: subxcc %g4,%g2,%o4
66: ld [s1_ptr+8],%g4
67: subxcc %g1,%g3,%o5
68: ld [s1_ptr+12],%g1
69: ldd [s2_ptr+8],%g2
70: std %o4,[res_ptr+0]
71: subxcc %g4,%g2,%o4
72: ld [s1_ptr+16],%g4
73: subxcc %g1,%g3,%o5
74: ld [s1_ptr+20],%g1
75: ldd [s2_ptr+16],%g2
76: std %o4,[res_ptr+8]
77: subxcc %g4,%g2,%o4
78: ld [s1_ptr+24],%g4
79: subxcc %g1,%g3,%o5
80: ld [s1_ptr+28],%g1
81: ldd [s2_ptr+24],%g2
82: std %o4,[res_ptr+16]
83: subxcc %g4,%g2,%o4
84: ld [s1_ptr+32],%g4
85: subxcc %g1,%g3,%o5
86: ld [s1_ptr+36],%g1
87: ldd [s2_ptr+32],%g2
88: std %o4,[res_ptr+24]
89: addx %g0,%g0,%o4 ! save cy in register
90: addcc size,-8,size
91: add s1_ptr,32,s1_ptr
92: add s2_ptr,32,s2_ptr
93: add res_ptr,32,res_ptr
94: bge Loop1
95: subcc %g0,%o4,%g0 ! restore cy
96:
97: Lfin1: addcc size,8-2,size
98: blt Lend1
99: subcc %g0,%o4,%g0 ! restore cy
100: /* Add blocks of 2 limbs until less than 2 limbs remain */
101: Loope1: subxcc %g4,%g2,%o4
102: ld [s1_ptr+8],%g4
103: subxcc %g1,%g3,%o5
104: ld [s1_ptr+12],%g1
105: ldd [s2_ptr+8],%g2
106: std %o4,[res_ptr+0]
107: addx %g0,%g0,%o4 ! save cy in register
108: addcc size,-2,size
109: add s1_ptr,8,s1_ptr
110: add s2_ptr,8,s2_ptr
111: add res_ptr,8,res_ptr
112: bge Loope1
113: subcc %g0,%o4,%g0 ! restore cy
114: Lend1: subxcc %g4,%g2,%o4
115: subxcc %g1,%g3,%o5
116: std %o4,[res_ptr+0]
117: addx %g0,%g0,%o4 ! save cy in register
118:
119: andcc size,1,%g0
120: be Lret1
121: subcc %g0,%o4,%g0 ! restore cy
122: /* Add last limb */
123: ld [s1_ptr+8],%g4
124: ld [s2_ptr+8],%g2
125: subxcc %g4,%g2,%o4
126: st %o4,[res_ptr+8]
127:
128: Lret1: retl
129: addx %g0,%g0,%o0 ! return carry-out from most sign. limb
130:
131: L1: xor s1_ptr,res_ptr,%g1
132: andcc %g1,4,%g0
133: bne L2
134: nop
135: ! ** V1b **
136: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
137: be L_v1b ! if no, branch
138: nop
139: /* Add least significant limb separately to align res_ptr and s1_ptr */
140: ld [s2_ptr],%g4
141: add s2_ptr,4,s2_ptr
142: ld [s1_ptr],%g2
143: add s1_ptr,4,s1_ptr
144: add size,-1,size
145: subcc %g2,%g4,%o4
146: st %o4,[res_ptr]
147: add res_ptr,4,res_ptr
148: L_v1b: addx %g0,%g0,%o4 ! save cy in register
149: cmp size,2 ! if size < 2 ...
150: bl Lend2 ! ... branch to tail code
151: subcc %g0,%o4,%g0 ! restore cy
152:
153: ld [s2_ptr+0],%g4
154: addcc size,-10,size
155: ld [s2_ptr+4],%g1
156: ldd [s1_ptr+0],%g2
157: blt Lfin1b
158: subcc %g0,%o4,%g0 ! restore cy
159: /* Add blocks of 8 limbs until less than 8 limbs remain */
160: Loop1b: subxcc %g2,%g4,%o4
161: ld [s2_ptr+8],%g4
162: subxcc %g3,%g1,%o5
163: ld [s2_ptr+12],%g1
164: ldd [s1_ptr+8],%g2
165: std %o4,[res_ptr+0]
166: subxcc %g2,%g4,%o4
167: ld [s2_ptr+16],%g4
168: subxcc %g3,%g1,%o5
169: ld [s2_ptr+20],%g1
170: ldd [s1_ptr+16],%g2
171: std %o4,[res_ptr+8]
172: subxcc %g2,%g4,%o4
173: ld [s2_ptr+24],%g4
174: subxcc %g3,%g1,%o5
175: ld [s2_ptr+28],%g1
176: ldd [s1_ptr+24],%g2
177: std %o4,[res_ptr+16]
178: subxcc %g2,%g4,%o4
179: ld [s2_ptr+32],%g4
180: subxcc %g3,%g1,%o5
181: ld [s2_ptr+36],%g1
182: ldd [s1_ptr+32],%g2
183: std %o4,[res_ptr+24]
184: addx %g0,%g0,%o4 ! save cy in register
185: addcc size,-8,size
186: add s1_ptr,32,s1_ptr
187: add s2_ptr,32,s2_ptr
188: add res_ptr,32,res_ptr
189: bge Loop1b
190: subcc %g0,%o4,%g0 ! restore cy
191:
192: Lfin1b: addcc size,8-2,size
193: blt Lend1b
194: subcc %g0,%o4,%g0 ! restore cy
195: /* Add blocks of 2 limbs until less than 2 limbs remain */
196: Loope1b:subxcc %g2,%g4,%o4
197: ld [s2_ptr+8],%g4
198: subxcc %g3,%g1,%o5
199: ld [s2_ptr+12],%g1
200: ldd [s1_ptr+8],%g2
201: std %o4,[res_ptr+0]
202: addx %g0,%g0,%o4 ! save cy in register
203: addcc size,-2,size
204: add s1_ptr,8,s1_ptr
205: add s2_ptr,8,s2_ptr
206: add res_ptr,8,res_ptr
207: bge Loope1b
208: subcc %g0,%o4,%g0 ! restore cy
209: Lend1b: subxcc %g2,%g4,%o4
210: subxcc %g3,%g1,%o5
211: std %o4,[res_ptr+0]
212: addx %g0,%g0,%o4 ! save cy in register
213:
214: andcc size,1,%g0
215: be Lret1b
216: subcc %g0,%o4,%g0 ! restore cy
217: /* Add last limb */
218: ld [s2_ptr+8],%g4
219: ld [s1_ptr+8],%g2
220: subxcc %g2,%g4,%o4
221: st %o4,[res_ptr+8]
222:
223: Lret1b: retl
224: addx %g0,%g0,%o0 ! return carry-out from most sign. limb
225:
226: ! ** V2 **
227: /* If we come here, the alignment of s1_ptr and res_ptr as well as the
228: alignment of s2_ptr and res_ptr differ. Since there are only two ways
229: things can be aligned (that we care about) we now know that the alignment
230: of s1_ptr and s2_ptr are the same. */
231:
232: L2: cmp size,1
233: be Ljone
234: nop
235: andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
236: be L_v2 ! if no, branch
237: nop
238: /* Add least significant limb separately to align s1_ptr and s2_ptr */
239: ld [s1_ptr],%g4
240: add s1_ptr,4,s1_ptr
241: ld [s2_ptr],%g2
242: add s2_ptr,4,s2_ptr
243: add size,-1,size
244: subcc %g4,%g2,%o4
245: st %o4,[res_ptr]
246: add res_ptr,4,res_ptr
247:
248: L_v2: addx %g0,%g0,%o4 ! save cy in register
249: addcc size,-8,size
250: blt Lfin2
251: subcc %g0,%o4,%g0 ! restore cy
252: /* Add blocks of 8 limbs until less than 8 limbs remain */
253: Loop2: ldd [s1_ptr+0],%g2
254: ldd [s2_ptr+0],%o4
255: subxcc %g2,%o4,%g2
256: st %g2,[res_ptr+0]
257: subxcc %g3,%o5,%g3
258: st %g3,[res_ptr+4]
259: ldd [s1_ptr+8],%g2
260: ldd [s2_ptr+8],%o4
261: subxcc %g2,%o4,%g2
262: st %g2,[res_ptr+8]
263: subxcc %g3,%o5,%g3
264: st %g3,[res_ptr+12]
265: ldd [s1_ptr+16],%g2
266: ldd [s2_ptr+16],%o4
267: subxcc %g2,%o4,%g2
268: st %g2,[res_ptr+16]
269: subxcc %g3,%o5,%g3
270: st %g3,[res_ptr+20]
271: ldd [s1_ptr+24],%g2
272: ldd [s2_ptr+24],%o4
273: subxcc %g2,%o4,%g2
274: st %g2,[res_ptr+24]
275: subxcc %g3,%o5,%g3
276: st %g3,[res_ptr+28]
277: addx %g0,%g0,%o4 ! save cy in register
278: addcc size,-8,size
279: add s1_ptr,32,s1_ptr
280: add s2_ptr,32,s2_ptr
281: add res_ptr,32,res_ptr
282: bge Loop2
283: subcc %g0,%o4,%g0 ! restore cy
284:
285: Lfin2: addcc size,8-2,size
286: blt Lend2
287: subcc %g0,%o4,%g0 ! restore cy
288: Loope2: ldd [s1_ptr+0],%g2
289: ldd [s2_ptr+0],%o4
290: subxcc %g2,%o4,%g2
291: st %g2,[res_ptr+0]
292: subxcc %g3,%o5,%g3
293: st %g3,[res_ptr+4]
294: addx %g0,%g0,%o4 ! save cy in register
295: addcc size,-2,size
296: add s1_ptr,8,s1_ptr
297: add s2_ptr,8,s2_ptr
298: add res_ptr,8,res_ptr
299: bge Loope2
300: subcc %g0,%o4,%g0 ! restore cy
301: Lend2: andcc size,1,%g0
302: be Lret2
303: subcc %g0,%o4,%g0 ! restore cy
304: /* Add last limb */
305: Ljone: ld [s1_ptr],%g4
306: ld [s2_ptr],%g2
307: subxcc %g4,%g2,%o4
308: st %o4,[res_ptr]
309:
310: Lret2: retl
311: addx %g0,%g0,%o0 ! return carry-out from most sign. limb
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>