Annotation of OpenXM_contrib/gmp/mpn/sparc32/v9/mul_1.asm, Revision 1.1.1.1
1.1 maekawa 1: dnl SPARC v9 32-bit mpn_mul_1 -- Multiply a limb vector with a limb and
2: dnl store the result in a second limb vector.
3:
4: dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published
10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
11: dnl your option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23:
24: include(`../config.m4')
25:
26: C INPUT PARAMETERS
27: C res_ptr i0
28: C s1_ptr i1
29: C size i2
30: C s2_limb i3
31:
32: ASM_START()
33:
34: TEXT
35: ALIGN(4)
36: L(noll):
37: .word 0
38:
39: PROLOGUE(mpn_mul_1)
40: save %sp,-256,%sp
41:
42: ifdef(`PIC',
43: `L(pc): rd %pc,%o7
44: ld [%o7+L(noll)-L(pc)],%f10',
45: ` sethi %hi(L(noll)),%g1
46: ld [%g1+%lo(L(noll))],%f10')
47:
48: sethi %hi(0xffff0000),%o0
49: andn %i3,%o0,%o0
50: st %o0,[%fp-16]
51: ld [%fp-16],%f11
52: fxtod %f10,%f6
53:
54: srl %i3,16,%o0
55: st %o0,[%fp-16]
56: ld [%fp-16],%f11
57: fxtod %f10,%f8
58:
59: mov 0,%g3 C cy = 0
60:
61: ld [%i1],%f11
62: subcc %i2,1,%i2
63: be,pn %icc,L(end1)
64: add %i1,4,%i1 C s1_ptr++
65:
66: fxtod %f10,%f2
67: ld [%i1],%f11
68: add %i1,4,%i1 C s1_ptr++
69: fmuld %f2,%f8,%f16
70: fmuld %f2,%f6,%f4
71: fdtox %f16,%f14
72: std %f14,[%fp-24]
73: fdtox %f4,%f12
74: subcc %i2,1,%i2
75: be,pn %icc,L(end2)
76: std %f12,[%fp-16]
77:
78: fxtod %f10,%f2
79: ld [%i1],%f11
80: add %i1,4,%i1 C s1_ptr++
81: fmuld %f2,%f8,%f16
82: fmuld %f2,%f6,%f4
83: fdtox %f16,%f14
84: std %f14,[%fp-40]
85: fdtox %f4,%f12
86: subcc %i2,1,%i2
87: be,pn %icc,L(end3)
88: std %f12,[%fp-32]
89:
90: fxtod %f10,%f2
91: ld [%i1],%f11
92: add %i1,4,%i1 C s1_ptr++
93: ldx [%fp-24],%g2 C p16
94: fmuld %f2,%f8,%f16
95: ldx [%fp-16],%g1 C p0
96: fmuld %f2,%f6,%f4
97: sllx %g2,16,%g2 C align p16
98: fdtox %f16,%f14
99: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
100: std %f14,[%fp-24]
101: fdtox %f4,%f12
102: add %i0,4,%i0 C res_ptr++
103: subcc %i2,1,%i2
104: be,pn %icc,L(end4)
105: std %f12,[%fp-16]
106:
107: b,a L(loopm)
108:
109: .align 16
110: C BEGIN LOOP
111: L(loop):
112: fxtod %f10,%f2
113: ld [%i1],%f11
114: add %i1,4,%i1 C s1_ptr++
115: add %g3,%g1,%g4 C p += cy
116: srlx %g4,32,%g3
117: ldx [%fp-24],%g2 C p16
118: fmuld %f2,%f8,%f16
119: ldx [%fp-16],%g1 C p0
120: fmuld %f2,%f6,%f4
121: sllx %g2,16,%g2 C align p16
122: st %g4,[%i0-4]
123: fdtox %f16,%f14
124: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
125: std %f14,[%fp-24]
126: fdtox %f4,%f12
127: std %f12,[%fp-16]
128: subcc %i2,1,%i2
129: be,pn %icc,L(loope)
130: add %i0,4,%i0 C res_ptr++
131: L(loopm):
132: fxtod %f10,%f2
133: ld [%i1],%f11
134: add %i1,4,%i1 C s1_ptr++
135: add %g3,%g1,%g4 C p += cy
136: srlx %g4,32,%g3
137: ldx [%fp-40],%g2 C p16
138: fmuld %f2,%f8,%f16
139: ldx [%fp-32],%g1 C p0
140: fmuld %f2,%f6,%f4
141: sllx %g2,16,%g2 C align p16
142: st %g4,[%i0-4]
143: fdtox %f16,%f14
144: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
145: std %f14,[%fp-40]
146: fdtox %f4,%f12
147: std %f12,[%fp-32]
148: subcc %i2,1,%i2
149: bne,pt %icc,L(loop)
150: add %i0,4,%i0 C res_ptr++
151: C END LOOP
152:
153: fxtod %f10,%f2
154: add %g3,%g1,%g4 C p += cy
155: srlx %g4,32,%g3
156: ldx [%fp-24],%g2 C p16
157: fmuld %f2,%f8,%f16
158: ldx [%fp-16],%g1 C p0
159: fmuld %f2,%f6,%f4
160: sllx %g2,16,%g2 C align p16
161: st %g4,[%i0-4]
162: b,a L(xxx)
163: L(loope):
164: L(end4):
165: fxtod %f10,%f2
166: add %g3,%g1,%g4 C p += cy
167: srlx %g4,32,%g3
168: ldx [%fp-40],%g2 C p16
169: fmuld %f2,%f8,%f16
170: ldx [%fp-32],%g1 C p0
171: fmuld %f2,%f6,%f4
172: sllx %g2,16,%g2 C align p16
173: st %g4,[%i0-4]
174: fdtox %f16,%f14
175: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
176: std %f14,[%fp-40]
177: fdtox %f4,%f12
178: std %f12,[%fp-32]
179: add %i0,4,%i0 C res_ptr++
180:
181: add %g3,%g1,%g4 C p += cy
182: srlx %g4,32,%g3
183: ldx [%fp-24],%g2 C p16
184: ldx [%fp-16],%g1 C p0
185: sllx %g2,16,%g2 C align p16
186: st %g4,[%i0-4]
187: b,a L(yyy)
188:
189: L(end3):
190: fxtod %f10,%f2
191: ldx [%fp-24],%g2 C p16
192: fmuld %f2,%f8,%f16
193: ldx [%fp-16],%g1 C p0
194: fmuld %f2,%f6,%f4
195: sllx %g2,16,%g2 C align p16
196: L(xxx): fdtox %f16,%f14
197: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
198: std %f14,[%fp-24]
199: fdtox %f4,%f12
200: std %f12,[%fp-16]
201: add %i0,4,%i0 C res_ptr++
202:
203: add %g3,%g1,%g4 C p += cy
204: srlx %g4,32,%g3
205: ldx [%fp-40],%g2 C p16
206: ldx [%fp-32],%g1 C p0
207: sllx %g2,16,%g2 C align p16
208: st %g4,[%i0-4]
209: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
210: add %i0,4,%i0 C res_ptr++
211:
212: add %g3,%g1,%g4 C p += cy
213: srlx %g4,32,%g3
214: ldx [%fp-24],%g2 C p16
215: ldx [%fp-16],%g1 C p0
216: sllx %g2,16,%g2 C align p16
217: st %g4,[%i0-4]
218: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
219: add %i0,4,%i0 C res_ptr++
220: b,a L(ret)
221:
222: L(end2):
223: fxtod %f10,%f2
224: fmuld %f2,%f8,%f16
225: fmuld %f2,%f6,%f4
226: fdtox %f16,%f14
227: std %f14,[%fp-40]
228: fdtox %f4,%f12
229: std %f12,[%fp-32]
230: ldx [%fp-24],%g2 C p16
231: ldx [%fp-16],%g1 C p0
232: sllx %g2,16,%g2 C align p16
233: L(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
234: add %i0,4,%i0 C res_ptr++
235:
236: add %g3,%g1,%g4 C p += cy
237: srlx %g4,32,%g3
238: ldx [%fp-40],%g2 C p16
239: ldx [%fp-32],%g1 C p0
240: sllx %g2,16,%g2 C align p16
241: st %g4,[%i0-4]
242: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
243: add %i0,4,%i0 C res_ptr++
244: b,a L(ret)
245:
246: L(end1):
247: fxtod %f10,%f2
248: fmuld %f2,%f8,%f16
249: fmuld %f2,%f6,%f4
250: fdtox %f16,%f14
251: std %f14,[%fp-24]
252: fdtox %f4,%f12
253: std %f12,[%fp-16]
254:
255: ldx [%fp-24],%g2 C p16
256: ldx [%fp-16],%g1 C p0
257: sllx %g2,16,%g2 C align p16
258: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
259: add %i0,4,%i0 C res_ptr++
260:
261: L(ret): add %g3,%g1,%g4 C p += cy
262: srlx %g4,32,%g3
263: st %g4,[%i0-4]
264:
265: ret
266: restore %g0,%g3,%o0 C sideeffect: put cy in retreg
267: EPILOGUE(mpn_mul_1)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>