Annotation of OpenXM_contrib/gmp/mpn/sparc32/sub_n.asm, Revision 1.1.1.2
1.1 maekawa 1: dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2: dnl store difference in a third limb vector.
3:
1.1.1.2 ! ohara 4: dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
1.1 maekawa 5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published
10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
11: dnl your option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: include(`../config.m4')
25:
26: C INPUT PARAMETERS
27: define(res_ptr,%o0)
28: define(s1_ptr,%o1)
29: define(s2_ptr,%o2)
30: define(n,%o3)
31:
32: ASM_START()
33: PROLOGUE(mpn_sub_n)
34: xor s2_ptr,res_ptr,%g1
35: andcc %g1,4,%g0
36: bne L(1) C branch if alignment differs
37: nop
38: C ** V1a **
39: andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
40: be L(v1) C if no, branch
41: nop
42: C Add least significant limb separately to align res_ptr and s2_ptr
43: ld [s1_ptr],%g4
44: add s1_ptr,4,s1_ptr
45: ld [s2_ptr],%g2
46: add s2_ptr,4,s2_ptr
47: add n,-1,n
48: subcc %g4,%g2,%o4
49: st %o4,[res_ptr]
50: add res_ptr,4,res_ptr
51: L(v1): addx %g0,%g0,%o4 C save cy in register
52: cmp n,2 C if n < 2 ...
53: bl L(end2) C ... branch to tail code
54: subcc %g0,%o4,%g0 C restore cy
55:
56: ld [s1_ptr+0],%g4
57: addcc n,-10,n
58: ld [s1_ptr+4],%g1
59: ldd [s2_ptr+0],%g2
60: blt L(fin1)
61: subcc %g0,%o4,%g0 C restore cy
62: C Add blocks of 8 limbs until less than 8 limbs remain
63: L(loop1):
64: subxcc %g4,%g2,%o4
65: ld [s1_ptr+8],%g4
66: subxcc %g1,%g3,%o5
67: ld [s1_ptr+12],%g1
68: ldd [s2_ptr+8],%g2
69: std %o4,[res_ptr+0]
70: subxcc %g4,%g2,%o4
71: ld [s1_ptr+16],%g4
72: subxcc %g1,%g3,%o5
73: ld [s1_ptr+20],%g1
74: ldd [s2_ptr+16],%g2
75: std %o4,[res_ptr+8]
76: subxcc %g4,%g2,%o4
77: ld [s1_ptr+24],%g4
78: subxcc %g1,%g3,%o5
79: ld [s1_ptr+28],%g1
80: ldd [s2_ptr+24],%g2
81: std %o4,[res_ptr+16]
82: subxcc %g4,%g2,%o4
83: ld [s1_ptr+32],%g4
84: subxcc %g1,%g3,%o5
85: ld [s1_ptr+36],%g1
86: ldd [s2_ptr+32],%g2
87: std %o4,[res_ptr+24]
88: addx %g0,%g0,%o4 C save cy in register
89: addcc n,-8,n
90: add s1_ptr,32,s1_ptr
91: add s2_ptr,32,s2_ptr
92: add res_ptr,32,res_ptr
93: bge L(loop1)
94: subcc %g0,%o4,%g0 C restore cy
95:
96: L(fin1):
97: addcc n,8-2,n
98: blt L(end1)
99: subcc %g0,%o4,%g0 C restore cy
100: C Add blocks of 2 limbs until less than 2 limbs remain
101: L(loope1):
102: subxcc %g4,%g2,%o4
103: ld [s1_ptr+8],%g4
104: subxcc %g1,%g3,%o5
105: ld [s1_ptr+12],%g1
106: ldd [s2_ptr+8],%g2
107: std %o4,[res_ptr+0]
108: addx %g0,%g0,%o4 C save cy in register
109: addcc n,-2,n
110: add s1_ptr,8,s1_ptr
111: add s2_ptr,8,s2_ptr
112: add res_ptr,8,res_ptr
113: bge L(loope1)
114: subcc %g0,%o4,%g0 C restore cy
115: L(end1):
116: subxcc %g4,%g2,%o4
117: subxcc %g1,%g3,%o5
118: std %o4,[res_ptr+0]
119: addx %g0,%g0,%o4 C save cy in register
120:
121: andcc n,1,%g0
122: be L(ret1)
123: subcc %g0,%o4,%g0 C restore cy
124: C Add last limb
125: ld [s1_ptr+8],%g4
126: ld [s2_ptr+8],%g2
127: subxcc %g4,%g2,%o4
128: st %o4,[res_ptr+8]
129:
130: L(ret1):
131: retl
132: addx %g0,%g0,%o0 C return carry-out from most sign. limb
133:
134: L(1): xor s1_ptr,res_ptr,%g1
135: andcc %g1,4,%g0
136: bne L(2)
137: nop
138: C ** V1b **
139: andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
140: be L(v1b) C if no, branch
141: nop
142: C Add least significant limb separately to align res_ptr and s1_ptr
143: ld [s2_ptr],%g4
144: add s2_ptr,4,s2_ptr
145: ld [s1_ptr],%g2
146: add s1_ptr,4,s1_ptr
147: add n,-1,n
148: subcc %g2,%g4,%o4
149: st %o4,[res_ptr]
150: add res_ptr,4,res_ptr
151: L(v1b): addx %g0,%g0,%o4 C save cy in register
152: cmp n,2 C if n < 2 ...
153: bl L(end2) C ... branch to tail code
154: subcc %g0,%o4,%g0 C restore cy
155:
156: ld [s2_ptr+0],%g4
157: addcc n,-10,n
158: ld [s2_ptr+4],%g1
159: ldd [s1_ptr+0],%g2
160: blt L(fin1b)
161: subcc %g0,%o4,%g0 C restore cy
162: C Add blocks of 8 limbs until less than 8 limbs remain
163: L(loop1b):
164: subxcc %g2,%g4,%o4
165: ld [s2_ptr+8],%g4
166: subxcc %g3,%g1,%o5
167: ld [s2_ptr+12],%g1
168: ldd [s1_ptr+8],%g2
169: std %o4,[res_ptr+0]
170: subxcc %g2,%g4,%o4
171: ld [s2_ptr+16],%g4
172: subxcc %g3,%g1,%o5
173: ld [s2_ptr+20],%g1
174: ldd [s1_ptr+16],%g2
175: std %o4,[res_ptr+8]
176: subxcc %g2,%g4,%o4
177: ld [s2_ptr+24],%g4
178: subxcc %g3,%g1,%o5
179: ld [s2_ptr+28],%g1
180: ldd [s1_ptr+24],%g2
181: std %o4,[res_ptr+16]
182: subxcc %g2,%g4,%o4
183: ld [s2_ptr+32],%g4
184: subxcc %g3,%g1,%o5
185: ld [s2_ptr+36],%g1
186: ldd [s1_ptr+32],%g2
187: std %o4,[res_ptr+24]
188: addx %g0,%g0,%o4 C save cy in register
189: addcc n,-8,n
190: add s1_ptr,32,s1_ptr
191: add s2_ptr,32,s2_ptr
192: add res_ptr,32,res_ptr
193: bge L(loop1b)
194: subcc %g0,%o4,%g0 C restore cy
195:
196: L(fin1b):
197: addcc n,8-2,n
198: blt L(end1b)
199: subcc %g0,%o4,%g0 C restore cy
200: C Add blocks of 2 limbs until less than 2 limbs remain
201: L(loope1b):
202: subxcc %g2,%g4,%o4
203: ld [s2_ptr+8],%g4
204: subxcc %g3,%g1,%o5
205: ld [s2_ptr+12],%g1
206: ldd [s1_ptr+8],%g2
207: std %o4,[res_ptr+0]
208: addx %g0,%g0,%o4 C save cy in register
209: addcc n,-2,n
210: add s1_ptr,8,s1_ptr
211: add s2_ptr,8,s2_ptr
212: add res_ptr,8,res_ptr
213: bge L(loope1b)
214: subcc %g0,%o4,%g0 C restore cy
215: L(end1b):
216: subxcc %g2,%g4,%o4
217: subxcc %g3,%g1,%o5
218: std %o4,[res_ptr+0]
219: addx %g0,%g0,%o4 C save cy in register
220:
221: andcc n,1,%g0
222: be L(ret1b)
223: subcc %g0,%o4,%g0 C restore cy
224: C Add last limb
225: ld [s2_ptr+8],%g4
226: ld [s1_ptr+8],%g2
227: subxcc %g2,%g4,%o4
228: st %o4,[res_ptr+8]
229:
230: L(ret1b):
231: retl
232: addx %g0,%g0,%o0 C return carry-out from most sign. limb
233:
234: C ** V2 **
235: C If we come here, the alignment of s1_ptr and res_ptr as well as the
236: C alignment of s2_ptr and res_ptr differ. Since there are only two ways
237: C things can be aligned (that we care about) we now know that the alignment
238: C of s1_ptr and s2_ptr are the same.
239:
240: L(2): cmp n,1
241: be L(jone)
242: nop
243: andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0
244: be L(v2) C if no, branch
245: nop
246: C Add least significant limb separately to align s1_ptr and s2_ptr
247: ld [s1_ptr],%g4
248: add s1_ptr,4,s1_ptr
249: ld [s2_ptr],%g2
250: add s2_ptr,4,s2_ptr
251: add n,-1,n
252: subcc %g4,%g2,%o4
253: st %o4,[res_ptr]
254: add res_ptr,4,res_ptr
255:
256: L(v2): addx %g0,%g0,%o4 C save cy in register
257: addcc n,-8,n
258: blt L(fin2)
259: subcc %g0,%o4,%g0 C restore cy
260: C Add blocks of 8 limbs until less than 8 limbs remain
261: L(loop2):
262: ldd [s1_ptr+0],%g2
263: ldd [s2_ptr+0],%o4
264: subxcc %g2,%o4,%g2
265: st %g2,[res_ptr+0]
266: subxcc %g3,%o5,%g3
267: st %g3,[res_ptr+4]
268: ldd [s1_ptr+8],%g2
269: ldd [s2_ptr+8],%o4
270: subxcc %g2,%o4,%g2
271: st %g2,[res_ptr+8]
272: subxcc %g3,%o5,%g3
273: st %g3,[res_ptr+12]
274: ldd [s1_ptr+16],%g2
275: ldd [s2_ptr+16],%o4
276: subxcc %g2,%o4,%g2
277: st %g2,[res_ptr+16]
278: subxcc %g3,%o5,%g3
279: st %g3,[res_ptr+20]
280: ldd [s1_ptr+24],%g2
281: ldd [s2_ptr+24],%o4
282: subxcc %g2,%o4,%g2
283: st %g2,[res_ptr+24]
284: subxcc %g3,%o5,%g3
285: st %g3,[res_ptr+28]
286: addx %g0,%g0,%o4 C save cy in register
287: addcc n,-8,n
288: add s1_ptr,32,s1_ptr
289: add s2_ptr,32,s2_ptr
290: add res_ptr,32,res_ptr
291: bge L(loop2)
292: subcc %g0,%o4,%g0 C restore cy
293:
294: L(fin2):
295: addcc n,8-2,n
296: blt L(end2)
297: subcc %g0,%o4,%g0 C restore cy
298: L(loope2):
299: ldd [s1_ptr+0],%g2
300: ldd [s2_ptr+0],%o4
301: subxcc %g2,%o4,%g2
302: st %g2,[res_ptr+0]
303: subxcc %g3,%o5,%g3
304: st %g3,[res_ptr+4]
305: addx %g0,%g0,%o4 C save cy in register
306: addcc n,-2,n
307: add s1_ptr,8,s1_ptr
308: add s2_ptr,8,s2_ptr
309: add res_ptr,8,res_ptr
310: bge L(loope2)
311: subcc %g0,%o4,%g0 C restore cy
312: L(end2):
313: andcc n,1,%g0
314: be L(ret2)
315: subcc %g0,%o4,%g0 C restore cy
316: C Add last limb
317: L(jone):
318: ld [s1_ptr],%g4
319: ld [s2_ptr],%g2
320: subxcc %g4,%g2,%o4
321: st %o4,[res_ptr]
322:
323: L(ret2):
324: retl
325: addx %g0,%g0,%o0 C return carry-out from most sign. limb
326: EPILOGUE(mpn_sub_n)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>