Annotation of OpenXM_contrib/gmp/mpn/sparc32/v9/submul_1.asm, Revision 1.1.1.1
1.1 maekawa 1: dnl SPARC v9 32-bit mpn_submul_1 -- Multiply a limb vector with a limb and
2: dnl subtract the result from a second limb vector.
3:
4: dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published
10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
11: dnl your option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: include(`../config.m4')
25:
26: C INPUT PARAMETERS
27: C res_ptr i0
28: C s1_ptr i1
29: C size i2
30: C s2_limb i3
31:
32: ASM_START()
33:
34: TEXT
35: ALIGN(4)
36: L(noll):
37: .word 0
38:
39: PROLOGUE(mpn_submul_1)
40: save %sp,-256,%sp
41:
42: ifdef(`PIC',
43: `L(pc): rd %pc,%o7
44: ld [%o7+L(noll)-L(pc)],%f10',
45: ` sethi %hi(L(noll)),%g1
46: ld [%g1+%lo(L(noll))],%f10')
47:
48: sethi %hi(0xffff0000),%o0
49: andn %i3,%o0,%o0
50: st %o0,[%fp-16]
51: ld [%fp-16],%f11
52: fxtod %f10,%f6
53:
54: srl %i3,16,%o0
55: st %o0,[%fp-16]
56: ld [%fp-16],%f11
57: fxtod %f10,%f8
58:
59: mov 0,%g3 C cy = 0
60:
61: ld [%i1],%f11
62: subcc %i2,1,%i2
63: be,pn %icc,L(end1)
64: add %i1,4,%i1 C s1_ptr++
65:
66: fxtod %f10,%f2
67: ld [%i1],%f11
68: add %i1,4,%i1 C s1_ptr++
69: fmuld %f2,%f8,%f16
70: fmuld %f2,%f6,%f4
71: fdtox %f16,%f14
72: std %f14,[%fp-24]
73: fdtox %f4,%f12
74: subcc %i2,1,%i2
75: be,pn %icc,L(end2)
76: std %f12,[%fp-16]
77:
78: fxtod %f10,%f2
79: ld [%i1],%f11
80: add %i1,4,%i1 C s1_ptr++
81: fmuld %f2,%f8,%f16
82: fmuld %f2,%f6,%f4
83: fdtox %f16,%f14
84: std %f14,[%fp-40]
85: fdtox %f4,%f12
86: subcc %i2,1,%i2
87: be,pn %icc,L(end3)
88: std %f12,[%fp-32]
89:
90: fxtod %f10,%f2
91: ld [%i1],%f11
92: add %i1,4,%i1 C s1_ptr++
93: ld [%i0],%g5
94: ldx [%fp-24],%g2 C p16
95: fmuld %f2,%f8,%f16
96: ldx [%fp-16],%g1 C p0
97: fmuld %f2,%f6,%f4
98: sllx %g2,16,%g2 C align p16
99: fdtox %f16,%f14
100: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
101: std %f14,[%fp-24]
102: fdtox %f4,%f12
103: add %i0,4,%i0 C res_ptr++
104: subcc %i2,1,%i2
105: be,pn %icc,L(end4)
106: std %f12,[%fp-16]
107:
108: b,a L(loopm)
109:
110: .align 16
111: C BEGIN LOOP
112: L(loop):
113: fxtod %f10,%f2
114: ld [%i1],%f11
115: add %i1,4,%i1 C s1_ptr++
116: add %g3,%g1,%g4 C p += cy
117: subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
118: ld [%i0],%g5
119: srlx %g4,32,%g3
120: ldx [%fp-24],%g2 C p16
121: fmuld %f2,%f8,%f16
122: ldx [%fp-16],%g1 C p0
123: fmuld %f2,%f6,%f4
124: sllx %g2,16,%g2 C align p16
125: st %l2,[%i0-4]
126: addx %g3,0,%g3
127: fdtox %f16,%f14
128: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
129: std %f14,[%fp-24]
130: fdtox %f4,%f12
131: std %f12,[%fp-16]
132: subcc %i2,1,%i2
133: be,pn %icc,L(loope)
134: add %i0,4,%i0 C res_ptr++
135: L(loopm):
136: fxtod %f10,%f2
137: ld [%i1],%f11
138: add %i1,4,%i1 C s1_ptr++
139: add %g3,%g1,%g4 C p += cy
140: subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
141: ld [%i0],%g5
142: srlx %g4,32,%g3
143: ldx [%fp-40],%g2 C p16
144: fmuld %f2,%f8,%f16
145: ldx [%fp-32],%g1 C p0
146: fmuld %f2,%f6,%f4
147: sllx %g2,16,%g2 C align p16
148: st %l2,[%i0-4]
149: addx %g3,0,%g3
150: fdtox %f16,%f14
151: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
152: std %f14,[%fp-40]
153: fdtox %f4,%f12
154: std %f12,[%fp-32]
155: subcc %i2,1,%i2
156: bne,pt %icc,L(loop)
157: add %i0,4,%i0 C res_ptr++
158: C END LOOP
159:
160: fxtod %f10,%f2
161: add %g3,%g1,%g4 C p += cy
162: subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
163: ld [%i0],%g5
164: srlx %g4,32,%g3
165: ldx [%fp-24],%g2 C p16
166: fmuld %f2,%f8,%f16
167: ldx [%fp-16],%g1 C p0
168: fmuld %f2,%f6,%f4
169: sllx %g2,16,%g2 C align p16
170: st %l2,[%i0-4]
171: b,a L(xxx)
172: L(loope):
173: L(end4):
174: fxtod %f10,%f2
175: add %g3,%g1,%g4 C p += cy
176: subcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
177: ld [%i0],%g5
178: srlx %g4,32,%g3
179: ldx [%fp-40],%g2 C p16
180: fmuld %f2,%f8,%f16
181: ldx [%fp-32],%g1 C p0
182: fmuld %f2,%f6,%f4
183: sllx %g2,16,%g2 C align p16
184: st %l2,[%i0-4]
185: fdtox %f16,%f14
186: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
187: std %f14,[%fp-40]
188: fdtox %f4,%f12
189: std %f12,[%fp-32]
190: add %i0,4,%i0 C res_ptr++
191:
192: add %g3,%g1,%g4 C p += cy
193: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
194: ld [%i0],%g5
195: srlx %g4,32,%g3
196: ldx [%fp-24],%g2 C p16
197: ldx [%fp-16],%g1 C p0
198: sllx %g2,16,%g2 C align p16
199: st %l2,[%i0-4]
200: b,a L(yyy)
201:
202: L(end3):
203: fxtod %f10,%f2
204: ld [%i0],%g5
205: ldx [%fp-24],%g2 C p16
206: fmuld %f2,%f8,%f16
207: ldx [%fp-16],%g1 C p0
208: fmuld %f2,%f6,%f4
209: sllx %g2,16,%g2 C align p16
210: L(xxx): fdtox %f16,%f14
211: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
212: std %f14,[%fp-24]
213: fdtox %f4,%f12
214: std %f12,[%fp-16]
215: add %i0,4,%i0 C res_ptr++
216:
217: add %g3,%g1,%g4 C p += cy
218: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
219: ld [%i0],%g5
220: srlx %g4,32,%g3
221: ldx [%fp-40],%g2 C p16
222: ldx [%fp-32],%g1 C p0
223: sllx %g2,16,%g2 C align p16
224: st %l2,[%i0-4]
225: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
226: add %i0,4,%i0 C res_ptr++
227:
228: add %g3,%g1,%g4 C p += cy
229: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
230: ld [%i0],%g5
231: srlx %g4,32,%g3
232: ldx [%fp-24],%g2 C p16
233: ldx [%fp-16],%g1 C p0
234: sllx %g2,16,%g2 C align p16
235: st %l2,[%i0-4]
236: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
237: add %i0,4,%i0 C res_ptr++
238: b,a L(ret)
239:
240: L(end2):
241: fxtod %f10,%f2
242: fmuld %f2,%f8,%f16
243: fmuld %f2,%f6,%f4
244: fdtox %f16,%f14
245: std %f14,[%fp-40]
246: fdtox %f4,%f12
247: std %f12,[%fp-32]
248: ld [%i0],%g5
249: ldx [%fp-24],%g2 C p16
250: ldx [%fp-16],%g1 C p0
251: sllx %g2,16,%g2 C align p16
252: L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
253: add %i0,4,%i0 C res_ptr++
254:
255: add %g3,%g1,%g4 C p += cy
256: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
257: ld [%i0],%g5
258: srlx %g4,32,%g3
259: ldx [%fp-40],%g2 C p16
260: ldx [%fp-32],%g1 C p0
261: sllx %g2,16,%g2 C align p16
262: st %l2,[%i0-4]
263: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
264: add %i0,4,%i0 C res_ptr++
265: b,a L(ret)
266:
267: L(end1):
268: fxtod %f10,%f2
269: fmuld %f2,%f8,%f16
270: fmuld %f2,%f6,%f4
271: fdtox %f16,%f14
272: std %f14,[%fp-24]
273: fdtox %f4,%f12
274: std %f12,[%fp-16]
275:
276: ld [%i0],%g5
277: ldx [%fp-24],%g2 C p16
278: ldx [%fp-16],%g1 C p0
279: sllx %g2,16,%g2 C align p16
280: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
281: add %i0,4,%i0 C res_ptr++
282:
283: L(ret): add %g3,%g1,%g4 C p += cy
284: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
285: srlx %g4,32,%g3
286: st %l2,[%i0-4]
287:
288: addx %g3,%g0,%g3
289: ret
290: restore %g0,%g3,%o0 C sideeffect: put cy in retreg
291: EPILOGUE(mpn_submul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>