Annotation of OpenXM_contrib/gmp/mpn/pa32/udiv_qrnnd.asm, Revision 1.1.1.1
1.1 ohara 1: dnl HP-PA __udiv_qrnnd division support, used from longlong.h.
2: dnl This version runs fast on pre-PA7000 CPUs.
3:
4: dnl Copyright 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
5:
6: dnl This file is part of the GNU MP Library.
7:
8: dnl The GNU MP Library is free software; you can redistribute it and/or modify
9: dnl it under the terms of the GNU Lesser General Public License as published
10: dnl by the Free Software Foundation; either version 2.1 of the License, or (at
11: dnl your option) any later version.
12:
13: dnl The GNU MP Library is distributed in the hope that it will be useful, but
14: dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: dnl License for more details.
17:
18: dnl You should have received a copy of the GNU Lesser General Public License
19: dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: dnl the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: dnl MA 02111-1307, USA.
22:
23: include(`../config.m4')
24:
25: C INPUT PARAMETERS
26: C rem_ptr gr26
27: C n1 gr25
28: C n0 gr24
29: C d gr23
30:
31: C The code size is a bit excessive. We could merge the last two ds;addc
32: C sequences by simply moving the "bb,< Odd" instruction down. The only
33: C trouble is the FFFFFFFF code that would need some hacking.
34:
35: ASM_START()
36: PROLOGUE(mpn_udiv_qrnnd)
37: comb,< %r23,0,L(largedivisor)
38: sub %r0,%r23,%r1 C clear cy as side-effect
39: ds %r0,%r1,%r0
40: addc %r24,%r24,%r24
41: ds %r25,%r23,%r25
42: addc %r24,%r24,%r24
43: ds %r25,%r23,%r25
44: addc %r24,%r24,%r24
45: ds %r25,%r23,%r25
46: addc %r24,%r24,%r24
47: ds %r25,%r23,%r25
48: addc %r24,%r24,%r24
49: ds %r25,%r23,%r25
50: addc %r24,%r24,%r24
51: ds %r25,%r23,%r25
52: addc %r24,%r24,%r24
53: ds %r25,%r23,%r25
54: addc %r24,%r24,%r24
55: ds %r25,%r23,%r25
56: addc %r24,%r24,%r24
57: ds %r25,%r23,%r25
58: addc %r24,%r24,%r24
59: ds %r25,%r23,%r25
60: addc %r24,%r24,%r24
61: ds %r25,%r23,%r25
62: addc %r24,%r24,%r24
63: ds %r25,%r23,%r25
64: addc %r24,%r24,%r24
65: ds %r25,%r23,%r25
66: addc %r24,%r24,%r24
67: ds %r25,%r23,%r25
68: addc %r24,%r24,%r24
69: ds %r25,%r23,%r25
70: addc %r24,%r24,%r24
71: ds %r25,%r23,%r25
72: addc %r24,%r24,%r24
73: ds %r25,%r23,%r25
74: addc %r24,%r24,%r24
75: ds %r25,%r23,%r25
76: addc %r24,%r24,%r24
77: ds %r25,%r23,%r25
78: addc %r24,%r24,%r24
79: ds %r25,%r23,%r25
80: addc %r24,%r24,%r24
81: ds %r25,%r23,%r25
82: addc %r24,%r24,%r24
83: ds %r25,%r23,%r25
84: addc %r24,%r24,%r24
85: ds %r25,%r23,%r25
86: addc %r24,%r24,%r24
87: ds %r25,%r23,%r25
88: addc %r24,%r24,%r24
89: ds %r25,%r23,%r25
90: addc %r24,%r24,%r24
91: ds %r25,%r23,%r25
92: addc %r24,%r24,%r24
93: ds %r25,%r23,%r25
94: addc %r24,%r24,%r24
95: ds %r25,%r23,%r25
96: addc %r24,%r24,%r24
97: ds %r25,%r23,%r25
98: addc %r24,%r24,%r24
99: ds %r25,%r23,%r25
100: addc %r24,%r24,%r24
101: ds %r25,%r23,%r25
102: addc %r24,%r24,%r28
103: ds %r25,%r23,%r25
104: comclr,>= %r25,%r0,%r0
105: addl %r25,%r23,%r25
106: stws %r25,0(0,%r26)
107: bv 0(%r2)
108: addc %r28,%r28,%r28
109:
110: .label L(largedivisor)
111: extru %r24,31,1,%r19 C r19 = n0 & 1
112: bb,< %r23,31,L(odd)
113: extru %r23,30,31,%r22 C r22 = d >> 1
114: shd %r25,%r24,1,%r24 C r24 = new n0
115: extru %r25,30,31,%r25 C r25 = new n1
116: sub %r0,%r22,%r21
117: ds %r0,%r21,%r0
118: addc %r24,%r24,%r24
119: ds %r25,%r22,%r25
120: addc %r24,%r24,%r24
121: ds %r25,%r22,%r25
122: addc %r24,%r24,%r24
123: ds %r25,%r22,%r25
124: addc %r24,%r24,%r24
125: ds %r25,%r22,%r25
126: addc %r24,%r24,%r24
127: ds %r25,%r22,%r25
128: addc %r24,%r24,%r24
129: ds %r25,%r22,%r25
130: addc %r24,%r24,%r24
131: ds %r25,%r22,%r25
132: addc %r24,%r24,%r24
133: ds %r25,%r22,%r25
134: addc %r24,%r24,%r24
135: ds %r25,%r22,%r25
136: addc %r24,%r24,%r24
137: ds %r25,%r22,%r25
138: addc %r24,%r24,%r24
139: ds %r25,%r22,%r25
140: addc %r24,%r24,%r24
141: ds %r25,%r22,%r25
142: addc %r24,%r24,%r24
143: ds %r25,%r22,%r25
144: addc %r24,%r24,%r24
145: ds %r25,%r22,%r25
146: addc %r24,%r24,%r24
147: ds %r25,%r22,%r25
148: addc %r24,%r24,%r24
149: ds %r25,%r22,%r25
150: addc %r24,%r24,%r24
151: ds %r25,%r22,%r25
152: addc %r24,%r24,%r24
153: ds %r25,%r22,%r25
154: addc %r24,%r24,%r24
155: ds %r25,%r22,%r25
156: addc %r24,%r24,%r24
157: ds %r25,%r22,%r25
158: addc %r24,%r24,%r24
159: ds %r25,%r22,%r25
160: addc %r24,%r24,%r24
161: ds %r25,%r22,%r25
162: addc %r24,%r24,%r24
163: ds %r25,%r22,%r25
164: addc %r24,%r24,%r24
165: ds %r25,%r22,%r25
166: addc %r24,%r24,%r24
167: ds %r25,%r22,%r25
168: addc %r24,%r24,%r24
169: ds %r25,%r22,%r25
170: addc %r24,%r24,%r24
171: ds %r25,%r22,%r25
172: addc %r24,%r24,%r24
173: ds %r25,%r22,%r25
174: addc %r24,%r24,%r24
175: ds %r25,%r22,%r25
176: addc %r24,%r24,%r24
177: ds %r25,%r22,%r25
178: addc %r24,%r24,%r24
179: ds %r25,%r22,%r25
180: addc %r24,%r24,%r24
181: ds %r25,%r22,%r25
182: comclr,>= %r25,%r0,%r0
183: addl %r25,%r22,%r25
184: sh1addl %r25,%r19,%r25
185: stws %r25,0(0,%r26)
186: bv 0(%r2)
187: addc %r24,%r24,%r28
188:
189: .label L(odd)
190: addib,sv,n 1,%r22,L(FFFFFFFF) C r22 = (d / 2 + 1)
191: shd %r25,%r24,1,%r24 C r24 = new n0
192: extru %r25,30,31,%r25 C r25 = new n1
193: sub %r0,%r22,%r21
194: ds %r0,%r21,%r0
195: addc %r24,%r24,%r24
196: ds %r25,%r22,%r25
197: addc %r24,%r24,%r24
198: ds %r25,%r22,%r25
199: addc %r24,%r24,%r24
200: ds %r25,%r22,%r25
201: addc %r24,%r24,%r24
202: ds %r25,%r22,%r25
203: addc %r24,%r24,%r24
204: ds %r25,%r22,%r25
205: addc %r24,%r24,%r24
206: ds %r25,%r22,%r25
207: addc %r24,%r24,%r24
208: ds %r25,%r22,%r25
209: addc %r24,%r24,%r24
210: ds %r25,%r22,%r25
211: addc %r24,%r24,%r24
212: ds %r25,%r22,%r25
213: addc %r24,%r24,%r24
214: ds %r25,%r22,%r25
215: addc %r24,%r24,%r24
216: ds %r25,%r22,%r25
217: addc %r24,%r24,%r24
218: ds %r25,%r22,%r25
219: addc %r24,%r24,%r24
220: ds %r25,%r22,%r25
221: addc %r24,%r24,%r24
222: ds %r25,%r22,%r25
223: addc %r24,%r24,%r24
224: ds %r25,%r22,%r25
225: addc %r24,%r24,%r24
226: ds %r25,%r22,%r25
227: addc %r24,%r24,%r24
228: ds %r25,%r22,%r25
229: addc %r24,%r24,%r24
230: ds %r25,%r22,%r25
231: addc %r24,%r24,%r24
232: ds %r25,%r22,%r25
233: addc %r24,%r24,%r24
234: ds %r25,%r22,%r25
235: addc %r24,%r24,%r24
236: ds %r25,%r22,%r25
237: addc %r24,%r24,%r24
238: ds %r25,%r22,%r25
239: addc %r24,%r24,%r24
240: ds %r25,%r22,%r25
241: addc %r24,%r24,%r24
242: ds %r25,%r22,%r25
243: addc %r24,%r24,%r24
244: ds %r25,%r22,%r25
245: addc %r24,%r24,%r24
246: ds %r25,%r22,%r25
247: addc %r24,%r24,%r24
248: ds %r25,%r22,%r25
249: addc %r24,%r24,%r24
250: ds %r25,%r22,%r25
251: addc %r24,%r24,%r24
252: ds %r25,%r22,%r25
253: addc %r24,%r24,%r24
254: ds %r25,%r22,%r25
255: addc %r24,%r24,%r24
256: ds %r25,%r22,%r25
257: addc %r24,%r24,%r24
258: ds %r25,%r22,%r25
259: addc %r24,%r24,%r28
260: comclr,>= %r25,%r0,%r0
261: addl %r25,%r22,%r25
262: sh1addl %r25,%r19,%r25
263: C We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
264: add,nuv %r28,%r25,%r25
265: addl %r25,%r1,%r25
266: addc %r0,%r28,%r28
267: sub,<< %r25,%r23,%r0
268: addl %r25,%r1,%r25
269: stws %r25,0(0,%r26)
270: bv 0(%r2)
271: addc %r0,%r28,%r28
272:
273: C This is just a special case of the code above.
274: C We come here when d == 0xFFFFFFFF
275: .label L(FFFFFFFF)
276: add,uv %r25,%r24,%r24
277: sub,<< %r24,%r23,%r0
278: ldo 1(%r24),%r24
279: stws %r24,0(0,%r26)
280: bv 0(%r2)
281: addc %r0,%r25,%r28
282: EPILOGUE()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>