Annotation of OpenXM_contrib/gmp/mpn/generic/tdiv_qr.c, Revision 1.1.1.1
1.1 maekawa 1: /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2: write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
3: qxn is non-zero, generate that many fraction limbs and append them after the
4: other quotient limbs, and update the remainder accordningly. The input
5: operands are unaffected.
6:
7: Preconditions:
8: 1. The most significant limb of of the divisor must be non-zero.
9: 2. No argument overlap is permitted. (??? relax this ???)
10: 3. nn >= dn, even if qxn is non-zero. (??? relax this ???)
11:
12: The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
13: complexity of multiplication.
14:
15: Copyright (C) 1997, 2000 Free Software Foundation, Inc.
16:
17: This file is part of the GNU MP Library.
18:
19: The GNU MP Library is free software; you can redistribute it and/or modify
20: it under the terms of the GNU Lesser General Public License as published by
21: the Free Software Foundation; either version 2.1 of the License, or (at your
22: option) any later version.
23:
24: The GNU MP Library is distributed in the hope that it will be useful, but
25: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
27: License for more details.
28:
29: You should have received a copy of the GNU Lesser General Public License
30: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
31: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
32: MA 02111-1307, USA. */
33:
34: #include "gmp.h"
35: #include "gmp-impl.h"
36: #include "longlong.h"
37:
38: #ifndef BZ_THRESHOLD
39: #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
40: #endif
41:
42: /* Extract the middle limb from ((h,,l) << cnt) */
43: #define SHL(h,l,cnt) \
44: ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
45:
46: void
47: #if __STDC__
48: mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
49: mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
50: #else
51: mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
52: mp_ptr qp;
53: mp_ptr rp;
54: mp_size_t qxn;
55: mp_srcptr np;
56: mp_size_t nn;
57: mp_srcptr dp;
58: mp_size_t dn;
59: #endif
60: {
61: /* FIXME:
62: 1. qxn
63: 2. pass allocated storage in additional parameter?
64: */
65: if (qxn != 0)
66: abort ();
67:
68: switch (dn)
69: {
70: case 0:
71: DIVIDE_BY_ZERO;
72:
73: case 1:
74: {
75: rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
76: return;
77: }
78:
79: case 2:
80: {
81: int cnt;
82: mp_ptr n2p, d2p;
83: mp_limb_t qhl, cy;
84: TMP_DECL (marker);
85: TMP_MARK (marker);
86: count_leading_zeros (cnt, dp[dn - 1]);
87: if (cnt != 0)
88: {
89: d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
90: mpn_lshift (d2p, dp, dn, cnt);
91: n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
92: cy = mpn_lshift (n2p, np, nn, cnt);
93: n2p[nn] = cy;
94: qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
95: if (cy == 0)
96: qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
97: }
98: else
99: {
100: d2p = (mp_ptr) dp;
101: n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
102: MPN_COPY (n2p, np, nn);
103: qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
104: qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
105: }
106:
107: if (cnt != 0)
108: mpn_rshift (rp, n2p, dn, cnt);
109: else
110: MPN_COPY (rp, n2p, dn);
111: TMP_FREE (marker);
112: return;
113: }
114:
115: default:
116: {
117: int adjust;
118: TMP_DECL (marker);
119: TMP_MARK (marker);
120: adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
121: if (nn + adjust >= 2 * dn)
122: {
123: mp_ptr n2p, d2p;
124: mp_limb_t cy;
125: int cnt;
126: count_leading_zeros (cnt, dp[dn - 1]);
127:
128: qp[nn - dn] = 0; /* zero high quotient limb */
129: if (cnt != 0) /* normalize divisor if needed */
130: {
131: d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
132: mpn_lshift (d2p, dp, dn, cnt);
133: n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
134: cy = mpn_lshift (n2p, np, nn, cnt);
135: n2p[nn] = cy;
136: nn += adjust;
137: }
138: else
139: {
140: d2p = (mp_ptr) dp;
141: n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
142: MPN_COPY (n2p, np, nn);
143: n2p[nn] = 0;
144: nn += adjust;
145: }
146:
147: if (dn == 2)
148: mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
149: else if (dn < BZ_THRESHOLD)
150: mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
151: else
152: {
153: /* Perform 2*dn / dn limb divisions as long as the limbs
154: in np last. */
155: mp_ptr q2p = qp + nn - 2 * dn;
156: n2p += nn - 2 * dn;
157: mpn_bz_divrem_n (q2p, n2p, d2p, dn);
158: nn -= dn;
159: while (nn >= 2 * dn)
160: {
161: mp_limb_t c;
162: q2p -= dn; n2p -= dn;
163: c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
164: ASSERT_ALWAYS (c == 0);
165: nn -= dn;
166: }
167:
168: if (nn != dn)
169: {
170: n2p -= nn - dn;
171: /* In theory, we could fall out to the cute code below
172: since we now have exactly the situation that code
173: is designed to handle. We botch this badly and call
174: the basic mpn_sb_divrem_mn! */
175: if (dn == 2)
176: mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
177: else
178: mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
179: }
180: }
181:
182:
183: if (cnt != 0)
184: mpn_rshift (rp, n2p, dn, cnt);
185: else
186: MPN_COPY (rp, n2p, dn);
187: TMP_FREE (marker);
188: return;
189: }
190:
191: /* When we come here, the numerator/partial remainder is less
192: than twice the size of the denominator. */
193:
194: {
195: /* Problem:
196:
197: Divide a numerator N with nn limbs by a denominator D with dn
198: limbs forming a quotient of nn-dn+1 limbs. When qn is small
199: compared to dn, conventional division algorithms perform poorly.
200: We want an algorithm that has an expected running time that is
201: dependent only on qn. It is assumed that the most significant
202: limb of the numerator is smaller than the most significant limb
203: of the denominator.
204:
205: Algorithm (very informally stated):
206:
207: 1) Divide the 2 x qn most significant limbs from the numerator
208: by the qn most significant limbs from the denominator. Call
209: the result qest. This is either the correct quotient, but
210: might be 1 or 2 too large. Compute the remainder from the
211: division. (This step is implemented by a mpn_divrem call.)
212:
213: 2) Is the most significant limb from the remainder < p, where p
214: is the product of the most significant limb from the quotient
215: and the next(d). (Next(d) denotes the next ignored limb from
216: the denominator.) If it is, decrement qest, and adjust the
217: remainder accordingly.
218:
219: 3) Is the remainder >= qest? If it is, qest is the desired
220: quotient. The algorithm terminates.
221:
222: 4) Subtract qest x next(d) from the remainder. If there is
223: borrow out, decrement qest, and adjust the remainder
224: accordingly.
225:
226: 5) Skip one word from the denominator (i.e., let next(d) denote
227: the next less significant limb. */
228:
229: mp_size_t qn;
230: mp_ptr n2p, d2p;
231: mp_ptr tp;
232: mp_limb_t cy;
233: mp_size_t in, rn;
234: mp_limb_t quotient_too_large;
235: int cnt;
236:
237: qn = nn - dn;
238: qp[qn] = 0; /* zero high quotient limb */
239: qn += adjust; /* qn cannot become bigger */
240:
241: if (qn == 0)
242: {
243: MPN_COPY (rp, np, dn);
244: TMP_FREE (marker);
245: return;
246: }
247:
248: in = dn - qn; /* (at least partially) ignored # of limbs in ops */
249: /* Normalize denominator by shifting it to the left such that its
250: most significant bit is set. Then shift the numerator the same
251: amount, to mathematically preserve quotient. */
252: count_leading_zeros (cnt, dp[dn - 1]);
253: if (cnt != 0)
254: {
255: d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
256:
257: mpn_lshift (d2p, dp + in, qn, cnt);
258: d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
259:
260: n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
261: cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
262: if (adjust)
263: {
264: n2p[2 * qn] = cy;
265: n2p++;
266: }
267: else
268: {
269: n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
270: }
271: }
272: else
273: {
274: d2p = (mp_ptr) dp + in;
275:
276: n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
277: MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
278: if (adjust)
279: {
280: n2p[2 * qn] = 0;
281: n2p++;
282: }
283: }
284:
285: /* Get an approximate quotient using the extracted operands. */
286: if (qn == 1)
287: {
288: mp_limb_t q0, r0;
289: mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
290: /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
291: temps here. This doesn't hurt code quality on any machines
292: so we do it unconditionally. */
293: gcc272bug_n1 = n2p[1];
294: gcc272bug_n0 = n2p[0];
295: gcc272bug_d0 = d2p[0];
296: udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
297: n2p[0] = r0;
298: qp[0] = q0;
299: }
300: else if (qn == 2)
301: mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
302: else if (qn < BZ_THRESHOLD)
303: mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
304: else
305: mpn_bz_divrem_n (qp, n2p, d2p, qn);
306:
307: rn = qn;
308: /* Multiply the first ignored divisor limb by the most significant
309: quotient limb. If that product is > the partial remainder's
310: most significant limb, we know the quotient is too large. This
311: test quickly catches most cases where the quotient is too large;
312: it catches all cases where the quotient is 2 too large. */
313: {
314: mp_limb_t dl, x;
315: mp_limb_t h, l;
316:
317: if (in - 2 < 0)
318: dl = 0;
319: else
320: dl = dp[in - 2];
321:
322: x = SHL (dp[in - 1], dl, cnt);
323: umul_ppmm (h, l, x, qp[qn - 1]);
324:
325: if (n2p[qn - 1] < h)
326: {
327: mp_limb_t cy;
328:
329: mpn_decr_u (qp, (mp_limb_t) 1);
330: cy = mpn_add_n (n2p, n2p, d2p, qn);
331: if (cy)
332: {
333: /* The partial remainder is safely large. */
334: n2p[qn] = cy;
335: ++rn;
336: }
337: }
338: }
339:
340: quotient_too_large = 0;
341: if (cnt != 0)
342: {
343: mp_limb_t cy1, cy2;
344:
345: /* Append partially used numerator limb to partial remainder. */
346: cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
347: n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
348:
349: /* Update partial remainder with partially used divisor limb. */
350: cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
351: if (qn != rn)
352: {
353: if (n2p[qn] < cy2)
354: abort ();
355: n2p[qn] -= cy2;
356: }
357: else
358: {
359: n2p[qn] = cy1 - cy2;
360:
361: quotient_too_large = (cy1 < cy2);
362: ++rn;
363: }
364: --in;
365: }
366: /* True: partial remainder now is neutral, i.e., it is not shifted up. */
367:
368: tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
369:
370: if (in < qn)
371: {
372: if (in == 0)
373: {
374: MPN_COPY (rp, n2p, rn);
375: if (rn != dn)
376: abort ();
377: goto foo;
378: }
379: mpn_mul (tp, qp, qn, dp, in);
380: }
381: else
382: mpn_mul (tp, dp, in, qp, qn);
383:
384: cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
385: MPN_COPY (rp + in, n2p, dn - in);
386: quotient_too_large |= cy;
387: cy = mpn_sub_n (rp, np, tp, in);
388: cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
389: quotient_too_large |= cy;
390: foo:
391: if (quotient_too_large)
392: {
393: mpn_decr_u (qp, (mp_limb_t) 1);
394: mpn_add_n (rp, rp, dp, dn);
395: }
396: }
397: TMP_FREE (marker);
398: return;
399: }
400: }
401: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>