Annotation of OpenXM_contrib/gmp/mpn/generic/tdiv_qr.c, Revision 1.1
1.1 ! maekawa 1: /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
! 2: write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
! 3: qxn is non-zero, generate that many fraction limbs and append them after the
! 4: other quotient limbs, and update the remainder accordningly. The input
! 5: operands are unaffected.
! 6:
! 7: Preconditions:
! 8: 1. The most significant limb of of the divisor must be non-zero.
! 9: 2. No argument overlap is permitted. (??? relax this ???)
! 10: 3. nn >= dn, even if qxn is non-zero. (??? relax this ???)
! 11:
! 12: The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
! 13: complexity of multiplication.
! 14:
! 15: Copyright (C) 1997, 2000 Free Software Foundation, Inc.
! 16:
! 17: This file is part of the GNU MP Library.
! 18:
! 19: The GNU MP Library is free software; you can redistribute it and/or modify
! 20: it under the terms of the GNU Lesser General Public License as published by
! 21: the Free Software Foundation; either version 2.1 of the License, or (at your
! 22: option) any later version.
! 23:
! 24: The GNU MP Library is distributed in the hope that it will be useful, but
! 25: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 26: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 27: License for more details.
! 28:
! 29: You should have received a copy of the GNU Lesser General Public License
! 30: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 31: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 32: MA 02111-1307, USA. */
! 33:
! 34: #include "gmp.h"
! 35: #include "gmp-impl.h"
! 36: #include "longlong.h"
! 37:
! 38: #ifndef BZ_THRESHOLD
! 39: #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
! 40: #endif
! 41:
! 42: /* Extract the middle limb from ((h,,l) << cnt) */
! 43: #define SHL(h,l,cnt) \
! 44: ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
! 45:
! 46: void
! 47: #if __STDC__
! 48: mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
! 49: mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
! 50: #else
! 51: mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
! 52: mp_ptr qp;
! 53: mp_ptr rp;
! 54: mp_size_t qxn;
! 55: mp_srcptr np;
! 56: mp_size_t nn;
! 57: mp_srcptr dp;
! 58: mp_size_t dn;
! 59: #endif
! 60: {
! 61: /* FIXME:
! 62: 1. qxn
! 63: 2. pass allocated storage in additional parameter?
! 64: */
! 65: if (qxn != 0)
! 66: abort ();
! 67:
! 68: switch (dn)
! 69: {
! 70: case 0:
! 71: DIVIDE_BY_ZERO;
! 72:
! 73: case 1:
! 74: {
! 75: rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
! 76: return;
! 77: }
! 78:
! 79: case 2:
! 80: {
! 81: int cnt;
! 82: mp_ptr n2p, d2p;
! 83: mp_limb_t qhl, cy;
! 84: TMP_DECL (marker);
! 85: TMP_MARK (marker);
! 86: count_leading_zeros (cnt, dp[dn - 1]);
! 87: if (cnt != 0)
! 88: {
! 89: d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
! 90: mpn_lshift (d2p, dp, dn, cnt);
! 91: n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
! 92: cy = mpn_lshift (n2p, np, nn, cnt);
! 93: n2p[nn] = cy;
! 94: qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
! 95: if (cy == 0)
! 96: qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
! 97: }
! 98: else
! 99: {
! 100: d2p = (mp_ptr) dp;
! 101: n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
! 102: MPN_COPY (n2p, np, nn);
! 103: qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
! 104: qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
! 105: }
! 106:
! 107: if (cnt != 0)
! 108: mpn_rshift (rp, n2p, dn, cnt);
! 109: else
! 110: MPN_COPY (rp, n2p, dn);
! 111: TMP_FREE (marker);
! 112: return;
! 113: }
! 114:
! 115: default:
! 116: {
! 117: int adjust;
! 118: TMP_DECL (marker);
! 119: TMP_MARK (marker);
! 120: adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
! 121: if (nn + adjust >= 2 * dn)
! 122: {
! 123: mp_ptr n2p, d2p;
! 124: mp_limb_t cy;
! 125: int cnt;
! 126: count_leading_zeros (cnt, dp[dn - 1]);
! 127:
! 128: qp[nn - dn] = 0; /* zero high quotient limb */
! 129: if (cnt != 0) /* normalize divisor if needed */
! 130: {
! 131: d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
! 132: mpn_lshift (d2p, dp, dn, cnt);
! 133: n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
! 134: cy = mpn_lshift (n2p, np, nn, cnt);
! 135: n2p[nn] = cy;
! 136: nn += adjust;
! 137: }
! 138: else
! 139: {
! 140: d2p = (mp_ptr) dp;
! 141: n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
! 142: MPN_COPY (n2p, np, nn);
! 143: n2p[nn] = 0;
! 144: nn += adjust;
! 145: }
! 146:
! 147: if (dn == 2)
! 148: mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
! 149: else if (dn < BZ_THRESHOLD)
! 150: mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
! 151: else
! 152: {
! 153: /* Perform 2*dn / dn limb divisions as long as the limbs
! 154: in np last. */
! 155: mp_ptr q2p = qp + nn - 2 * dn;
! 156: n2p += nn - 2 * dn;
! 157: mpn_bz_divrem_n (q2p, n2p, d2p, dn);
! 158: nn -= dn;
! 159: while (nn >= 2 * dn)
! 160: {
! 161: mp_limb_t c;
! 162: q2p -= dn; n2p -= dn;
! 163: c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
! 164: ASSERT_ALWAYS (c == 0);
! 165: nn -= dn;
! 166: }
! 167:
! 168: if (nn != dn)
! 169: {
! 170: n2p -= nn - dn;
! 171: /* In theory, we could fall out to the cute code below
! 172: since we now have exactly the situation that code
! 173: is designed to handle. We botch this badly and call
! 174: the basic mpn_sb_divrem_mn! */
! 175: if (dn == 2)
! 176: mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
! 177: else
! 178: mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
! 179: }
! 180: }
! 181:
! 182:
! 183: if (cnt != 0)
! 184: mpn_rshift (rp, n2p, dn, cnt);
! 185: else
! 186: MPN_COPY (rp, n2p, dn);
! 187: TMP_FREE (marker);
! 188: return;
! 189: }
! 190:
! 191: /* When we come here, the numerator/partial remainder is less
! 192: than twice the size of the denominator. */
! 193:
! 194: {
! 195: /* Problem:
! 196:
! 197: Divide a numerator N with nn limbs by a denominator D with dn
! 198: limbs forming a quotient of nn-dn+1 limbs. When qn is small
! 199: compared to dn, conventional division algorithms perform poorly.
! 200: We want an algorithm that has an expected running time that is
! 201: dependent only on qn. It is assumed that the most significant
! 202: limb of the numerator is smaller than the most significant limb
! 203: of the denominator.
! 204:
! 205: Algorithm (very informally stated):
! 206:
! 207: 1) Divide the 2 x qn most significant limbs from the numerator
! 208: by the qn most significant limbs from the denominator. Call
! 209: the result qest. This is either the correct quotient, but
! 210: might be 1 or 2 too large. Compute the remainder from the
! 211: division. (This step is implemented by a mpn_divrem call.)
! 212:
! 213: 2) Is the most significant limb from the remainder < p, where p
! 214: is the product of the most significant limb from the quotient
! 215: and the next(d). (Next(d) denotes the next ignored limb from
! 216: the denominator.) If it is, decrement qest, and adjust the
! 217: remainder accordingly.
! 218:
! 219: 3) Is the remainder >= qest? If it is, qest is the desired
! 220: quotient. The algorithm terminates.
! 221:
! 222: 4) Subtract qest x next(d) from the remainder. If there is
! 223: borrow out, decrement qest, and adjust the remainder
! 224: accordingly.
! 225:
! 226: 5) Skip one word from the denominator (i.e., let next(d) denote
! 227: the next less significant limb. */
! 228:
! 229: mp_size_t qn;
! 230: mp_ptr n2p, d2p;
! 231: mp_ptr tp;
! 232: mp_limb_t cy;
! 233: mp_size_t in, rn;
! 234: mp_limb_t quotient_too_large;
! 235: int cnt;
! 236:
! 237: qn = nn - dn;
! 238: qp[qn] = 0; /* zero high quotient limb */
! 239: qn += adjust; /* qn cannot become bigger */
! 240:
! 241: if (qn == 0)
! 242: {
! 243: MPN_COPY (rp, np, dn);
! 244: TMP_FREE (marker);
! 245: return;
! 246: }
! 247:
! 248: in = dn - qn; /* (at least partially) ignored # of limbs in ops */
! 249: /* Normalize denominator by shifting it to the left such that its
! 250: most significant bit is set. Then shift the numerator the same
! 251: amount, to mathematically preserve quotient. */
! 252: count_leading_zeros (cnt, dp[dn - 1]);
! 253: if (cnt != 0)
! 254: {
! 255: d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
! 256:
! 257: mpn_lshift (d2p, dp + in, qn, cnt);
! 258: d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
! 259:
! 260: n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
! 261: cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
! 262: if (adjust)
! 263: {
! 264: n2p[2 * qn] = cy;
! 265: n2p++;
! 266: }
! 267: else
! 268: {
! 269: n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
! 270: }
! 271: }
! 272: else
! 273: {
! 274: d2p = (mp_ptr) dp + in;
! 275:
! 276: n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
! 277: MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
! 278: if (adjust)
! 279: {
! 280: n2p[2 * qn] = 0;
! 281: n2p++;
! 282: }
! 283: }
! 284:
! 285: /* Get an approximate quotient using the extracted operands. */
! 286: if (qn == 1)
! 287: {
! 288: mp_limb_t q0, r0;
! 289: mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
! 290: /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
! 291: temps here. This doesn't hurt code quality on any machines
! 292: so we do it unconditionally. */
! 293: gcc272bug_n1 = n2p[1];
! 294: gcc272bug_n0 = n2p[0];
! 295: gcc272bug_d0 = d2p[0];
! 296: udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
! 297: n2p[0] = r0;
! 298: qp[0] = q0;
! 299: }
! 300: else if (qn == 2)
! 301: mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
! 302: else if (qn < BZ_THRESHOLD)
! 303: mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
! 304: else
! 305: mpn_bz_divrem_n (qp, n2p, d2p, qn);
! 306:
! 307: rn = qn;
! 308: /* Multiply the first ignored divisor limb by the most significant
! 309: quotient limb. If that product is > the partial remainder's
! 310: most significant limb, we know the quotient is too large. This
! 311: test quickly catches most cases where the quotient is too large;
! 312: it catches all cases where the quotient is 2 too large. */
! 313: {
! 314: mp_limb_t dl, x;
! 315: mp_limb_t h, l;
! 316:
! 317: if (in - 2 < 0)
! 318: dl = 0;
! 319: else
! 320: dl = dp[in - 2];
! 321:
! 322: x = SHL (dp[in - 1], dl, cnt);
! 323: umul_ppmm (h, l, x, qp[qn - 1]);
! 324:
! 325: if (n2p[qn - 1] < h)
! 326: {
! 327: mp_limb_t cy;
! 328:
! 329: mpn_decr_u (qp, (mp_limb_t) 1);
! 330: cy = mpn_add_n (n2p, n2p, d2p, qn);
! 331: if (cy)
! 332: {
! 333: /* The partial remainder is safely large. */
! 334: n2p[qn] = cy;
! 335: ++rn;
! 336: }
! 337: }
! 338: }
! 339:
! 340: quotient_too_large = 0;
! 341: if (cnt != 0)
! 342: {
! 343: mp_limb_t cy1, cy2;
! 344:
! 345: /* Append partially used numerator limb to partial remainder. */
! 346: cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
! 347: n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
! 348:
! 349: /* Update partial remainder with partially used divisor limb. */
! 350: cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
! 351: if (qn != rn)
! 352: {
! 353: if (n2p[qn] < cy2)
! 354: abort ();
! 355: n2p[qn] -= cy2;
! 356: }
! 357: else
! 358: {
! 359: n2p[qn] = cy1 - cy2;
! 360:
! 361: quotient_too_large = (cy1 < cy2);
! 362: ++rn;
! 363: }
! 364: --in;
! 365: }
! 366: /* True: partial remainder now is neutral, i.e., it is not shifted up. */
! 367:
! 368: tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
! 369:
! 370: if (in < qn)
! 371: {
! 372: if (in == 0)
! 373: {
! 374: MPN_COPY (rp, n2p, rn);
! 375: if (rn != dn)
! 376: abort ();
! 377: goto foo;
! 378: }
! 379: mpn_mul (tp, qp, qn, dp, in);
! 380: }
! 381: else
! 382: mpn_mul (tp, dp, in, qp, qn);
! 383:
! 384: cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
! 385: MPN_COPY (rp + in, n2p, dn - in);
! 386: quotient_too_large |= cy;
! 387: cy = mpn_sub_n (rp, np, tp, in);
! 388: cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
! 389: quotient_too_large |= cy;
! 390: foo:
! 391: if (quotient_too_large)
! 392: {
! 393: mpn_decr_u (qp, (mp_limb_t) 1);
! 394: mpn_add_n (rp, rp, dp, dn);
! 395: }
! 396: }
! 397: TMP_FREE (marker);
! 398: return;
! 399: }
! 400: }
! 401: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>