Annotation of OpenXM_contrib/gmp/mpn/sparc64/submul1h.asm, Revision 1.1.1.1
1.1 maekawa 1: dnl SPARC 64-bit submull/submulu -- Helper for mpn_submul_1 and mpn_mul_1.
2:
3: dnl Copyright (C) 1998, 2000 Free Software Foundation, Inc.
4:
5: dnl This file is part of the GNU MP Library.
6:
7: dnl The GNU MP Library is free software; you can redistribute it and/or modify
8: dnl it under the terms of the GNU Lesser General Public License as published
9: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
10: dnl your option) any later version.
11:
12: dnl The GNU MP Library is distributed in the hope that it will be useful, but
13: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15: dnl License for more details.
16:
17: dnl You should have received a copy of the GNU Lesser General Public License
18: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: dnl MA 02111-1307, USA.
21:
22: ifdef(`LOWPART',
23: `submull:',
24: `submulu:')
25: save %sp,-256,%sp
26:
27: sethi %hi(0xffff0000),%o0
28: andn %i3,%o0,%o0
29: st %o0,[%fp-17]
30: ld [%fp-17],%f11
31: fxtod %f10,%f6
32:
33: srl %i3,16,%o0
34: st %o0,[%fp-17]
35: ld [%fp-17],%f11
36: fxtod %f10,%f8
37:
38: mov 0,%g3 C cy = 0
39:
40: ld [%i1+4],%f11
41: subcc %i2,1,%i2
42: dnl be,pn %icc,E(end1)
43: add %i1,4,%i1 C s1_ptr++
44:
45: fxtod %f10,%f2
46: ld [%i1-4],%f11
47: add %i1,4,%i1 C s1_ptr++
48: fmuld %f2,%f8,%f16
49: fmuld %f2,%f6,%f4
50: fdtox %f16,%f14
51: std %f14,[%fp-25]
52: fdtox %f4,%f12
53: subcc %i2,1,%i2
54: be,pn %icc,E(end2)
55: std %f12,[%fp-17]
56:
57: fxtod %f10,%f2
58: ld [%i1+4],%f11
59: add %i1,4,%i1 C s1_ptr++
60: fmuld %f2,%f8,%f16
61: fmuld %f2,%f6,%f4
62: fdtox %f16,%f14
63: std %f14,[%fp-41]
64: fdtox %f4,%f12
65: subcc %i2,1,%i2
66: dnl be,pn %icc,E(end3)
67: std %f12,[%fp-33]
68:
69: fxtod %f10,%f2
70: ld [%i1-4],%f11
71: add %i1,4,%i1 C s1_ptr++
72: ld [%i0+DLO],%g5
73: ldx [%fp-25],%g2 C p16
74: fmuld %f2,%f8,%f16
75: ldx [%fp-17],%g1 C p0
76: fmuld %f2,%f6,%f4
77: sllx %g2,16,%g2 C align p16
78: fdtox %f16,%f14
79: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
80: std %f14,[%fp-25]
81: fdtox %f4,%f12
82: add %i0,4,%i0 C res_ptr++
83: subcc %i2,1,%i2
84: be,pn %icc,E(end4)
85: std %f12,[%fp-17]
86:
87: b,a E(loop)
88: nop C nop is cheap to nullify
89:
90: ALIGN(16)
91: C BEGIN LOOP
92: E(loop):
93: fxtod %f10,%f2
94: ld [%i1+4],%f11
95: add %i1,4,%i1 C s1_ptr++
96: add %g3,%g1,%g4 C p += cy
97: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
98: ld [%i0+DHI],%g5
99: srlx %g4,32,%g3
100: ldx [%fp-41],%g2 C p16
101: fmuld %f2,%f8,%f16
102: ldx [%fp-33],%g1 C p0
103: fmuld %f2,%f6,%f4
104: sllx %g2,16,%g2 C align p16
105: st %l2,[%i0-4+DLO]
106: fdtox %f16,%f14
107: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
108: std %f14,[%fp-41]
109: fdtox %f4,%f12
110: std %f12,[%fp-33]
111: sub %i2,2,%i2
112: add %i0,4,%i0 C res_ptr++
113:
114: fxtod %f10,%f2
115: ld [%i1-4],%f11
116: add %i1,4,%i1 C s1_ptr++
117: add %g3,%g1,%g4 C p += cy
118: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
119: ld [%i0+DLO],%g5
120: srlx %g4,32,%g3
121: ldx [%fp-25],%g2 C p16
122: fmuld %f2,%f8,%f16
123: ldx [%fp-17],%g1 C p0
124: fmuld %f2,%f6,%f4
125: sllx %g2,16,%g2 C align p16
126: st %l2,[%i0-4+DHI]
127: fdtox %f16,%f14
128: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
129: std %f14,[%fp-25]
130: fdtox %f4,%f12
131: std %f12,[%fp-17]
132: brnz,pt %i2,E(loop)
133: add %i0,4,%i0 C res_ptr++
134: C END LOOP
135: E(loope):
136: E(end4):
137: fxtod %f10,%f2
138: add %g3,%g1,%g4 C p += cy
139: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
140: ld [%i0+DHI],%g5
141: srlx %g4,32,%g3
142: ldx [%fp-41],%g2 C p16
143: fmuld %f2,%f8,%f16
144: ldx [%fp-33],%g1 C p0
145: fmuld %f2,%f6,%f4
146: sllx %g2,16,%g2 C align p16
147: st %l2,[%i0-4+DLO]
148: fdtox %f16,%f14
149: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
150: std %f14,[%fp-41]
151: fdtox %f4,%f12
152: std %f12,[%fp-33]
153: add %i0,4,%i0 C res_ptr++
154:
155: add %g3,%g1,%g4 C p += cy
156: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
157: ld [%i0+DLO],%g5
158: srlx %g4,32,%g3
159: ldx [%fp-25],%g2 C p16
160: ldx [%fp-17],%g1 C p0
161: sllx %g2,16,%g2 C align p16
162: st %l2,[%i0-4+DHI]
163: b,a E(yyy)
164:
165: E(end2):
166: fxtod %f10,%f2
167: fmuld %f2,%f8,%f16
168: fmuld %f2,%f6,%f4
169: fdtox %f16,%f14
170: std %f14,[%fp-41]
171: fdtox %f4,%f12
172: std %f12,[%fp-33]
173: ld [%i0+DLO],%g5
174: ldx [%fp-25],%g2 C p16
175: ldx [%fp-17],%g1 C p0
176: sllx %g2,16,%g2 C align p16
177: E(yyy): add %g2,%g1,%g1 C add p16 to p0 (ADD1)
178: add %i0,4,%i0 C res_ptr++
179:
180: add %g3,%g1,%g4 C p += cy
181: subxcc %g5,%g4,%l2 C add *res_ptr to p0 (ADD2)
182: ifdef(`LOWPART',
183: ` ld [%i0+DHI],%g5')
184: srlx %g4,32,%g3
185: ldx [%fp-41],%g2 C p16
186: ldx [%fp-33],%g1 C p0
187: sllx %g2,16,%g2 C align p16
188: st %l2,[%i0-4+DLO]
189: add %g2,%g1,%g1 C add p16 to p0 (ADD1)
190: add %i0,4,%i0 C res_ptr++
191:
192: add %g3,%g1,%g4 C p += cy
193: ifdef(`LOWPART',
194: ` subxcc %g5,%g4,%l2') C add *res_ptr to p0 (ADD2)
195: ifdef(`LOWPART',
196: ` st %l2,[%i0-4+DHI]
197: srlx %g4,32,%g4')
198:
199: addx %g4,0,%g4
200: ret
201: restore %g0,%g4,%o0 C sideeffect: put cy in retreg
202: ifdef(`LOWPART',
203: `EPILOGUE(submull)',
204: `EPILOGUE(submulu)')
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>