Annotation of OpenXM_contrib/gmp/mpn/generic/tdiv_qr.c, Revision 1.1.1.2
1.1 maekawa 1: /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2: write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
3: qxn is non-zero, generate that many fraction limbs and append them after the
4: other quotient limbs, and update the remainder accordningly. The input
5: operands are unaffected.
6:
7: Preconditions:
8: 1. The most significant limb of of the divisor must be non-zero.
9: 2. No argument overlap is permitted. (??? relax this ???)
10: 3. nn >= dn, even if qxn is non-zero. (??? relax this ???)
11:
12: The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
13: complexity of multiplication.
14:
1.1.1.2 ! ohara 15: Copyright 1997, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1 maekawa 16:
17: This file is part of the GNU MP Library.
18:
19: The GNU MP Library is free software; you can redistribute it and/or modify
20: it under the terms of the GNU Lesser General Public License as published by
21: the Free Software Foundation; either version 2.1 of the License, or (at your
22: option) any later version.
23:
24: The GNU MP Library is distributed in the hope that it will be useful, but
25: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
27: License for more details.
28:
29: You should have received a copy of the GNU Lesser General Public License
30: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
31: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
32: MA 02111-1307, USA. */
33:
1.1.1.2 ! ohara 34: #include <stdlib.h>
1.1 maekawa 35: #include "gmp.h"
36: #include "gmp-impl.h"
37: #include "longlong.h"
38:
39:
40: void
41: mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
42: mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
43: {
44: /* FIXME:
45: 1. qxn
46: 2. pass allocated storage in additional parameter?
47: */
48: if (qxn != 0)
49: abort ();
50:
1.1.1.2 ! ohara 51: ASSERT (qxn >= 0);
! 52: ASSERT (nn >= 0);
! 53: ASSERT (dn >= 0);
! 54: ASSERT (dn == 0 || dp[dn - 1] != 0);
! 55: ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
! 56: ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
! 57:
1.1 maekawa 58: switch (dn)
59: {
60: case 0:
61: DIVIDE_BY_ZERO;
62:
63: case 1:
64: {
65: rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
66: return;
67: }
68:
69: case 2:
70: {
71: mp_ptr n2p, d2p;
72: mp_limb_t qhl, cy;
73: TMP_DECL (marker);
74: TMP_MARK (marker);
1.1.1.2 ! ohara 75: if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
1.1 maekawa 76: {
1.1.1.2 ! ohara 77: int cnt;
! 78: mp_limb_t dtmp[2];
! 79: count_leading_zeros (cnt, dp[1]);
! 80: cnt -= GMP_NAIL_BITS;
! 81: d2p = dtmp;
! 82: d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
! 83: d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
1.1 maekawa 84: n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
85: cy = mpn_lshift (n2p, np, nn, cnt);
86: n2p[nn] = cy;
87: qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
88: if (cy == 0)
1.1.1.2 ! ohara 89: qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
! 90: mpn_rshift (rp, n2p, (mp_size_t) 2, cnt);
1.1 maekawa 91: }
92: else
93: {
94: d2p = (mp_ptr) dp;
95: n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
96: MPN_COPY (n2p, np, nn);
97: qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
1.1.1.2 ! ohara 98: qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
! 99: MPN_COPY (rp, n2p, 2);
1.1 maekawa 100: }
101: TMP_FREE (marker);
102: return;
103: }
104:
105: default:
106: {
107: int adjust;
108: TMP_DECL (marker);
109: TMP_MARK (marker);
110: adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
111: if (nn + adjust >= 2 * dn)
112: {
113: mp_ptr n2p, d2p;
114: mp_limb_t cy;
115: int cnt;
116:
1.1.1.2 ! ohara 117: qp[nn - dn] = 0; /* zero high quotient limb */
! 118: if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
1.1 maekawa 119: {
1.1.1.2 ! ohara 120: count_leading_zeros (cnt, dp[dn - 1]);
! 121: cnt -= GMP_NAIL_BITS;
1.1 maekawa 122: d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
123: mpn_lshift (d2p, dp, dn, cnt);
124: n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
125: cy = mpn_lshift (n2p, np, nn, cnt);
126: n2p[nn] = cy;
127: nn += adjust;
128: }
129: else
130: {
1.1.1.2 ! ohara 131: cnt = 0;
1.1 maekawa 132: d2p = (mp_ptr) dp;
133: n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
134: MPN_COPY (n2p, np, nn);
135: n2p[nn] = 0;
136: nn += adjust;
137: }
138:
139: if (dn == 2)
140: mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
1.1.1.2 ! ohara 141: else if (dn < DIV_DC_THRESHOLD)
1.1 maekawa 142: mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
143: else
144: {
145: /* Perform 2*dn / dn limb divisions as long as the limbs
146: in np last. */
147: mp_ptr q2p = qp + nn - 2 * dn;
148: n2p += nn - 2 * dn;
1.1.1.2 ! ohara 149: mpn_dc_divrem_n (q2p, n2p, d2p, dn);
1.1 maekawa 150: nn -= dn;
151: while (nn >= 2 * dn)
152: {
153: mp_limb_t c;
154: q2p -= dn; n2p -= dn;
1.1.1.2 ! ohara 155: c = mpn_dc_divrem_n (q2p, n2p, d2p, dn);
1.1 maekawa 156: ASSERT_ALWAYS (c == 0);
157: nn -= dn;
158: }
159:
160: if (nn != dn)
161: {
162: n2p -= nn - dn;
163: /* In theory, we could fall out to the cute code below
164: since we now have exactly the situation that code
165: is designed to handle. We botch this badly and call
166: the basic mpn_sb_divrem_mn! */
167: if (dn == 2)
168: mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
169: else
170: mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
171: }
172: }
173:
174:
175: if (cnt != 0)
176: mpn_rshift (rp, n2p, dn, cnt);
177: else
178: MPN_COPY (rp, n2p, dn);
179: TMP_FREE (marker);
180: return;
181: }
182:
183: /* When we come here, the numerator/partial remainder is less
184: than twice the size of the denominator. */
185:
186: {
187: /* Problem:
188:
189: Divide a numerator N with nn limbs by a denominator D with dn
190: limbs forming a quotient of nn-dn+1 limbs. When qn is small
191: compared to dn, conventional division algorithms perform poorly.
192: We want an algorithm that has an expected running time that is
193: dependent only on qn. It is assumed that the most significant
194: limb of the numerator is smaller than the most significant limb
195: of the denominator.
196:
197: Algorithm (very informally stated):
198:
199: 1) Divide the 2 x qn most significant limbs from the numerator
200: by the qn most significant limbs from the denominator. Call
201: the result qest. This is either the correct quotient, but
202: might be 1 or 2 too large. Compute the remainder from the
203: division. (This step is implemented by a mpn_divrem call.)
204:
205: 2) Is the most significant limb from the remainder < p, where p
206: is the product of the most significant limb from the quotient
207: and the next(d). (Next(d) denotes the next ignored limb from
208: the denominator.) If it is, decrement qest, and adjust the
209: remainder accordingly.
210:
211: 3) Is the remainder >= qest? If it is, qest is the desired
212: quotient. The algorithm terminates.
213:
214: 4) Subtract qest x next(d) from the remainder. If there is
215: borrow out, decrement qest, and adjust the remainder
216: accordingly.
217:
218: 5) Skip one word from the denominator (i.e., let next(d) denote
219: the next less significant limb. */
220:
221: mp_size_t qn;
222: mp_ptr n2p, d2p;
223: mp_ptr tp;
224: mp_limb_t cy;
225: mp_size_t in, rn;
226: mp_limb_t quotient_too_large;
1.1.1.2 ! ohara 227: unsigned int cnt;
1.1 maekawa 228:
229: qn = nn - dn;
230: qp[qn] = 0; /* zero high quotient limb */
231: qn += adjust; /* qn cannot become bigger */
232:
233: if (qn == 0)
234: {
235: MPN_COPY (rp, np, dn);
236: TMP_FREE (marker);
237: return;
238: }
239:
240: in = dn - qn; /* (at least partially) ignored # of limbs in ops */
241: /* Normalize denominator by shifting it to the left such that its
242: most significant bit is set. Then shift the numerator the same
243: amount, to mathematically preserve quotient. */
1.1.1.2 ! ohara 244: if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
1.1 maekawa 245: {
1.1.1.2 ! ohara 246: count_leading_zeros (cnt, dp[dn - 1]);
! 247: cnt -= GMP_NAIL_BITS;
1.1 maekawa 248:
1.1.1.2 ! ohara 249: d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
1.1 maekawa 250: mpn_lshift (d2p, dp + in, qn, cnt);
1.1.1.2 ! ohara 251: d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
1.1 maekawa 252:
253: n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
254: cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
255: if (adjust)
256: {
257: n2p[2 * qn] = cy;
258: n2p++;
259: }
260: else
261: {
1.1.1.2 ! ohara 262: n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
1.1 maekawa 263: }
264: }
265: else
266: {
1.1.1.2 ! ohara 267: cnt = 0;
1.1 maekawa 268: d2p = (mp_ptr) dp + in;
269:
270: n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
271: MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
272: if (adjust)
273: {
274: n2p[2 * qn] = 0;
275: n2p++;
276: }
277: }
278:
279: /* Get an approximate quotient using the extracted operands. */
280: if (qn == 1)
281: {
282: mp_limb_t q0, r0;
283: mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
284: /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
285: temps here. This doesn't hurt code quality on any machines
286: so we do it unconditionally. */
287: gcc272bug_n1 = n2p[1];
288: gcc272bug_n0 = n2p[0];
289: gcc272bug_d0 = d2p[0];
1.1.1.2 ! ohara 290: udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0 << GMP_NAIL_BITS,
! 291: gcc272bug_d0 << GMP_NAIL_BITS);
! 292: r0 >>= GMP_NAIL_BITS;
1.1 maekawa 293: n2p[0] = r0;
294: qp[0] = q0;
295: }
296: else if (qn == 2)
297: mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
1.1.1.2 ! ohara 298: else if (qn < DIV_DC_THRESHOLD)
! 299: mpn_sb_divrem_mn (qp, n2p, 2 * qn, d2p, qn);
1.1 maekawa 300: else
1.1.1.2 ! ohara 301: mpn_dc_divrem_n (qp, n2p, d2p, qn);
1.1 maekawa 302:
303: rn = qn;
304: /* Multiply the first ignored divisor limb by the most significant
305: quotient limb. If that product is > the partial remainder's
306: most significant limb, we know the quotient is too large. This
307: test quickly catches most cases where the quotient is too large;
308: it catches all cases where the quotient is 2 too large. */
309: {
310: mp_limb_t dl, x;
311: mp_limb_t h, l;
312:
313: if (in - 2 < 0)
314: dl = 0;
315: else
316: dl = dp[in - 2];
317:
1.1.1.2 ! ohara 318: #if GMP_NAIL_BITS == 0
! 319: x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % BITS_PER_MP_LIMB));
! 320: #else
! 321: x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
! 322: if (cnt != 0)
! 323: x |= dl >> (GMP_NUMB_BITS - cnt);
! 324: #endif
! 325: umul_ppmm (h, l, x, qp[qn - 1] << GMP_NAIL_BITS);
! 326: l >>= GMP_NAIL_BITS;
1.1 maekawa 327:
328: if (n2p[qn - 1] < h)
329: {
330: mp_limb_t cy;
331:
332: mpn_decr_u (qp, (mp_limb_t) 1);
333: cy = mpn_add_n (n2p, n2p, d2p, qn);
334: if (cy)
335: {
336: /* The partial remainder is safely large. */
337: n2p[qn] = cy;
338: ++rn;
339: }
340: }
341: }
342:
343: quotient_too_large = 0;
344: if (cnt != 0)
345: {
346: mp_limb_t cy1, cy2;
347:
348: /* Append partially used numerator limb to partial remainder. */
1.1.1.2 ! ohara 349: cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
! 350: n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
1.1 maekawa 351:
352: /* Update partial remainder with partially used divisor limb. */
1.1.1.2 ! ohara 353: cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
1.1 maekawa 354: if (qn != rn)
355: {
356: if (n2p[qn] < cy2)
357: abort ();
358: n2p[qn] -= cy2;
359: }
360: else
361: {
1.1.1.2 ! ohara 362: n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
1.1 maekawa 363:
364: quotient_too_large = (cy1 < cy2);
365: ++rn;
366: }
367: --in;
368: }
369: /* True: partial remainder now is neutral, i.e., it is not shifted up. */
370:
371: tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
372:
373: if (in < qn)
374: {
375: if (in == 0)
376: {
377: MPN_COPY (rp, n2p, rn);
378: if (rn != dn)
379: abort ();
380: goto foo;
381: }
382: mpn_mul (tp, qp, qn, dp, in);
383: }
384: else
385: mpn_mul (tp, dp, in, qp, qn);
386:
387: cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
388: MPN_COPY (rp + in, n2p, dn - in);
389: quotient_too_large |= cy;
390: cy = mpn_sub_n (rp, np, tp, in);
391: cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
392: quotient_too_large |= cy;
393: foo:
394: if (quotient_too_large)
395: {
396: mpn_decr_u (qp, (mp_limb_t) 1);
397: mpn_add_n (rp, rp, dp, dn);
398: }
399: }
400: TMP_FREE (marker);
401: return;
402: }
403: }
404: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>