Annotation of OpenXM_contrib/gmp/mpn/sparc32/add_n.asm, Revision 1.1.1.2
1.1 maekawa 1: dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
2: dnl sum in a third limb vector.
3:
1.1.1.2 ! ohara 4: dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
1.1 maekawa 5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published
10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
11: dnl your option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: include(`../config.m4')
25:
26: C INPUT PARAMETERS
27: define(res_ptr,%o0)
28: define(s1_ptr,%o1)
29: define(s2_ptr,%o2)
30: define(n,%o3)
31:
32: ASM_START()
33: PROLOGUE(mpn_add_n)
34: xor s2_ptr,res_ptr,%g1
35: andcc %g1,4,%g0
36: bne L(1) C branch if alignment differs
37: nop
38: C ** V1a **
39: L(0): andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0
40: be L(v1) C if no, branch
41: nop
42: C Add least significant limb separately to align res_ptr and s2_ptr
43: ld [s1_ptr],%g4
44: add s1_ptr,4,s1_ptr
45: ld [s2_ptr],%g2
46: add s2_ptr,4,s2_ptr
47: add n,-1,n
48: addcc %g4,%g2,%o4
49: st %o4,[res_ptr]
50: add res_ptr,4,res_ptr
51: L(v1): addx %g0,%g0,%o4 C save cy in register
52: cmp n,2 C if n < 2 ...
53: bl L(end2) C ... branch to tail code
54: subcc %g0,%o4,%g0 C restore cy
55:
56: ld [s1_ptr+0],%g4
57: addcc n,-10,n
58: ld [s1_ptr+4],%g1
59: ldd [s2_ptr+0],%g2
60: blt L(fin1)
61: subcc %g0,%o4,%g0 C restore cy
62: C Add blocks of 8 limbs until less than 8 limbs remain
63: L(loop1):
64: addxcc %g4,%g2,%o4
65: ld [s1_ptr+8],%g4
66: addxcc %g1,%g3,%o5
67: ld [s1_ptr+12],%g1
68: ldd [s2_ptr+8],%g2
69: std %o4,[res_ptr+0]
70: addxcc %g4,%g2,%o4
71: ld [s1_ptr+16],%g4
72: addxcc %g1,%g3,%o5
73: ld [s1_ptr+20],%g1
74: ldd [s2_ptr+16],%g2
75: std %o4,[res_ptr+8]
76: addxcc %g4,%g2,%o4
77: ld [s1_ptr+24],%g4
78: addxcc %g1,%g3,%o5
79: ld [s1_ptr+28],%g1
80: ldd [s2_ptr+24],%g2
81: std %o4,[res_ptr+16]
82: addxcc %g4,%g2,%o4
83: ld [s1_ptr+32],%g4
84: addxcc %g1,%g3,%o5
85: ld [s1_ptr+36],%g1
86: ldd [s2_ptr+32],%g2
87: std %o4,[res_ptr+24]
88: addx %g0,%g0,%o4 C save cy in register
89: addcc n,-8,n
90: add s1_ptr,32,s1_ptr
91: add s2_ptr,32,s2_ptr
92: add res_ptr,32,res_ptr
93: bge L(loop1)
94: subcc %g0,%o4,%g0 C restore cy
95:
96: L(fin1):
97: addcc n,8-2,n
98: blt L(end1)
99: subcc %g0,%o4,%g0 C restore cy
100: C Add blocks of 2 limbs until less than 2 limbs remain
101: L(loope1):
102: addxcc %g4,%g2,%o4
103: ld [s1_ptr+8],%g4
104: addxcc %g1,%g3,%o5
105: ld [s1_ptr+12],%g1
106: ldd [s2_ptr+8],%g2
107: std %o4,[res_ptr+0]
108: addx %g0,%g0,%o4 C save cy in register
109: addcc n,-2,n
110: add s1_ptr,8,s1_ptr
111: add s2_ptr,8,s2_ptr
112: add res_ptr,8,res_ptr
113: bge L(loope1)
114: subcc %g0,%o4,%g0 C restore cy
115: L(end1):
116: addxcc %g4,%g2,%o4
117: addxcc %g1,%g3,%o5
118: std %o4,[res_ptr+0]
119: addx %g0,%g0,%o4 C save cy in register
120:
121: andcc n,1,%g0
122: be L(ret1)
123: subcc %g0,%o4,%g0 C restore cy
124: C Add last limb
125: ld [s1_ptr+8],%g4
126: ld [s2_ptr+8],%g2
127: addxcc %g4,%g2,%o4
128: st %o4,[res_ptr+8]
129:
130: L(ret1):
131: retl
132: addx %g0,%g0,%o0 C return carry-out from most sign. limb
133:
134: L(1): xor s1_ptr,res_ptr,%g1
135: andcc %g1,4,%g0
136: bne L(2)
137: nop
138: C ** V1b **
139: mov s2_ptr,%g1
140: mov s1_ptr,s2_ptr
141: b L(0)
142: mov %g1,s1_ptr
143:
144: C ** V2 **
145: C If we come here, the alignment of s1_ptr and res_ptr as well as the
146: C alignment of s2_ptr and res_ptr differ. Since there are only two ways
147: C things can be aligned (that we care about) we now know that the alignment
148: C of s1_ptr and s2_ptr are the same.
149:
150: L(2): cmp n,1
151: be L(jone)
152: nop
153: andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0
154: be L(v2) C if no, branch
155: nop
156: C Add least significant limb separately to align s1_ptr and s2_ptr
157: ld [s1_ptr],%g4
158: add s1_ptr,4,s1_ptr
159: ld [s2_ptr],%g2
160: add s2_ptr,4,s2_ptr
161: add n,-1,n
162: addcc %g4,%g2,%o4
163: st %o4,[res_ptr]
164: add res_ptr,4,res_ptr
165:
166: L(v2): addx %g0,%g0,%o4 C save cy in register
167: addcc n,-8,n
168: blt L(fin2)
169: subcc %g0,%o4,%g0 C restore cy
170: C Add blocks of 8 limbs until less than 8 limbs remain
171: L(loop2):
172: ldd [s1_ptr+0],%g2
173: ldd [s2_ptr+0],%o4
174: addxcc %g2,%o4,%g2
175: st %g2,[res_ptr+0]
176: addxcc %g3,%o5,%g3
177: st %g3,[res_ptr+4]
178: ldd [s1_ptr+8],%g2
179: ldd [s2_ptr+8],%o4
180: addxcc %g2,%o4,%g2
181: st %g2,[res_ptr+8]
182: addxcc %g3,%o5,%g3
183: st %g3,[res_ptr+12]
184: ldd [s1_ptr+16],%g2
185: ldd [s2_ptr+16],%o4
186: addxcc %g2,%o4,%g2
187: st %g2,[res_ptr+16]
188: addxcc %g3,%o5,%g3
189: st %g3,[res_ptr+20]
190: ldd [s1_ptr+24],%g2
191: ldd [s2_ptr+24],%o4
192: addxcc %g2,%o4,%g2
193: st %g2,[res_ptr+24]
194: addxcc %g3,%o5,%g3
195: st %g3,[res_ptr+28]
196: addx %g0,%g0,%o4 C save cy in register
197: addcc n,-8,n
198: add s1_ptr,32,s1_ptr
199: add s2_ptr,32,s2_ptr
200: add res_ptr,32,res_ptr
201: bge L(loop2)
202: subcc %g0,%o4,%g0 C restore cy
203:
204: L(fin2):
205: addcc n,8-2,n
206: blt L(end2)
207: subcc %g0,%o4,%g0 C restore cy
208: L(loope2):
209: ldd [s1_ptr+0],%g2
210: ldd [s2_ptr+0],%o4
211: addxcc %g2,%o4,%g2
212: st %g2,[res_ptr+0]
213: addxcc %g3,%o5,%g3
214: st %g3,[res_ptr+4]
215: addx %g0,%g0,%o4 C save cy in register
216: addcc n,-2,n
217: add s1_ptr,8,s1_ptr
218: add s2_ptr,8,s2_ptr
219: add res_ptr,8,res_ptr
220: bge L(loope2)
221: subcc %g0,%o4,%g0 C restore cy
222: L(end2):
223: andcc n,1,%g0
224: be L(ret2)
225: subcc %g0,%o4,%g0 C restore cy
226: C Add last limb
227: L(jone):
228: ld [s1_ptr],%g4
229: ld [s2_ptr],%g2
230: addxcc %g4,%g2,%o4
231: st %o4,[res_ptr]
232:
233: L(ret2):
234: retl
235: addx %g0,%g0,%o0 C return carry-out from most sign. limb
236: EPILOGUE(mpn_add_n)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>