version 1.1.1.1, 2000/09/09 14:12:28 |
version 1.1.1.2, 2003/08/25 16:06:21 |
|
|
The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time |
The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time |
complexity of multiplication. |
complexity of multiplication. |
|
|
Copyright (C) 1997, 2000 Free Software Foundation, Inc. |
Copyright 1997, 2000, 2001, 2002 Free Software Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
Line 31 along with the GNU MP Library; see the file COPYING.LI |
|
Line 31 along with the GNU MP Library; see the file COPYING.LI |
|
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
MA 02111-1307, USA. */ |
MA 02111-1307, USA. */ |
|
|
|
#include <stdlib.h> |
#include "gmp.h" |
#include "gmp.h" |
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "longlong.h" |
#include "longlong.h" |
|
|
#ifndef BZ_THRESHOLD |
|
#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD) |
|
#endif |
|
|
|
/* Extract the middle limb from ((h,,l) << cnt) */ |
|
#define SHL(h,l,cnt) \ |
|
((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1)))) |
|
|
|
void |
void |
#if __STDC__ |
|
mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, |
mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, |
mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) |
mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) |
#else |
|
mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
mp_ptr qp; |
|
mp_ptr rp; |
|
mp_size_t qxn; |
|
mp_srcptr np; |
|
mp_size_t nn; |
|
mp_srcptr dp; |
|
mp_size_t dn; |
|
#endif |
|
{ |
{ |
/* FIXME: |
/* FIXME: |
1. qxn |
1. qxn |
Line 65 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 48 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
if (qxn != 0) |
if (qxn != 0) |
abort (); |
abort (); |
|
|
|
ASSERT (qxn >= 0); |
|
ASSERT (nn >= 0); |
|
ASSERT (dn >= 0); |
|
ASSERT (dn == 0 || dp[dn - 1] != 0); |
|
ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn)); |
|
ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn)); |
|
|
switch (dn) |
switch (dn) |
{ |
{ |
case 0: |
case 0: |
Line 78 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 68 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
|
|
case 2: |
case 2: |
{ |
{ |
int cnt; |
|
mp_ptr n2p, d2p; |
mp_ptr n2p, d2p; |
mp_limb_t qhl, cy; |
mp_limb_t qhl, cy; |
TMP_DECL (marker); |
TMP_DECL (marker); |
TMP_MARK (marker); |
TMP_MARK (marker); |
count_leading_zeros (cnt, dp[dn - 1]); |
if ((dp[1] & GMP_NUMB_HIGHBIT) == 0) |
if (cnt != 0) |
|
{ |
{ |
d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); |
int cnt; |
mpn_lshift (d2p, dp, dn, cnt); |
mp_limb_t dtmp[2]; |
|
count_leading_zeros (cnt, dp[1]); |
|
cnt -= GMP_NAIL_BITS; |
|
d2p = dtmp; |
|
d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt)); |
|
d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK; |
n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); |
n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); |
cy = mpn_lshift (n2p, np, nn, cnt); |
cy = mpn_lshift (n2p, np, nn, cnt); |
n2p[nn] = cy; |
n2p[nn] = cy; |
qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p); |
qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p); |
if (cy == 0) |
if (cy == 0) |
qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */ |
qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ |
|
mpn_rshift (rp, n2p, (mp_size_t) 2, cnt); |
} |
} |
else |
else |
{ |
{ |
Line 101 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 95 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB); |
n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB); |
MPN_COPY (n2p, np, nn); |
MPN_COPY (n2p, np, nn); |
qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p); |
qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p); |
qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */ |
qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ |
|
MPN_COPY (rp, n2p, 2); |
} |
} |
|
|
if (cnt != 0) |
|
mpn_rshift (rp, n2p, dn, cnt); |
|
else |
|
MPN_COPY (rp, n2p, dn); |
|
TMP_FREE (marker); |
TMP_FREE (marker); |
return; |
return; |
} |
} |
Line 123 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 113 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
mp_ptr n2p, d2p; |
mp_ptr n2p, d2p; |
mp_limb_t cy; |
mp_limb_t cy; |
int cnt; |
int cnt; |
count_leading_zeros (cnt, dp[dn - 1]); |
|
|
|
qp[nn - dn] = 0; /* zero high quotient limb */ |
qp[nn - dn] = 0; /* zero high quotient limb */ |
if (cnt != 0) /* normalize divisor if needed */ |
if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */ |
{ |
{ |
|
count_leading_zeros (cnt, dp[dn - 1]); |
|
cnt -= GMP_NAIL_BITS; |
d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); |
d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); |
mpn_lshift (d2p, dp, dn, cnt); |
mpn_lshift (d2p, dp, dn, cnt); |
n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); |
n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); |
Line 137 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 128 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
} |
} |
else |
else |
{ |
{ |
|
cnt = 0; |
d2p = (mp_ptr) dp; |
d2p = (mp_ptr) dp; |
n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); |
n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); |
MPN_COPY (n2p, np, nn); |
MPN_COPY (n2p, np, nn); |
Line 146 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 138 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
|
|
if (dn == 2) |
if (dn == 2) |
mpn_divrem_2 (qp, 0L, n2p, nn, d2p); |
mpn_divrem_2 (qp, 0L, n2p, nn, d2p); |
else if (dn < BZ_THRESHOLD) |
else if (dn < DIV_DC_THRESHOLD) |
mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn); |
mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn); |
else |
else |
{ |
{ |
Line 154 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 146 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
in np last. */ |
in np last. */ |
mp_ptr q2p = qp + nn - 2 * dn; |
mp_ptr q2p = qp + nn - 2 * dn; |
n2p += nn - 2 * dn; |
n2p += nn - 2 * dn; |
mpn_bz_divrem_n (q2p, n2p, d2p, dn); |
mpn_dc_divrem_n (q2p, n2p, d2p, dn); |
nn -= dn; |
nn -= dn; |
while (nn >= 2 * dn) |
while (nn >= 2 * dn) |
{ |
{ |
mp_limb_t c; |
mp_limb_t c; |
q2p -= dn; n2p -= dn; |
q2p -= dn; n2p -= dn; |
c = mpn_bz_divrem_n (q2p, n2p, d2p, dn); |
c = mpn_dc_divrem_n (q2p, n2p, d2p, dn); |
ASSERT_ALWAYS (c == 0); |
ASSERT_ALWAYS (c == 0); |
nn -= dn; |
nn -= dn; |
} |
} |
Line 232 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 224 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
mp_limb_t cy; |
mp_limb_t cy; |
mp_size_t in, rn; |
mp_size_t in, rn; |
mp_limb_t quotient_too_large; |
mp_limb_t quotient_too_large; |
int cnt; |
unsigned int cnt; |
|
|
qn = nn - dn; |
qn = nn - dn; |
qp[qn] = 0; /* zero high quotient limb */ |
qp[qn] = 0; /* zero high quotient limb */ |
Line 249 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 241 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
/* Normalize denominator by shifting it to the left such that its |
/* Normalize denominator by shifting it to the left such that its |
most significant bit is set. Then shift the numerator the same |
most significant bit is set. Then shift the numerator the same |
amount, to mathematically preserve quotient. */ |
amount, to mathematically preserve quotient. */ |
count_leading_zeros (cnt, dp[dn - 1]); |
if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) |
if (cnt != 0) |
|
{ |
{ |
d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB); |
count_leading_zeros (cnt, dp[dn - 1]); |
|
cnt -= GMP_NAIL_BITS; |
|
|
|
d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB); |
mpn_lshift (d2p, dp + in, qn, cnt); |
mpn_lshift (d2p, dp + in, qn, cnt); |
d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt); |
d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt); |
|
|
n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); |
n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); |
cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt); |
cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt); |
Line 266 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 259 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
} |
} |
else |
else |
{ |
{ |
n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt); |
n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt); |
} |
} |
} |
} |
else |
else |
{ |
{ |
|
cnt = 0; |
d2p = (mp_ptr) dp + in; |
d2p = (mp_ptr) dp + in; |
|
|
n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); |
n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); |
Line 293 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 287 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
gcc272bug_n1 = n2p[1]; |
gcc272bug_n1 = n2p[1]; |
gcc272bug_n0 = n2p[0]; |
gcc272bug_n0 = n2p[0]; |
gcc272bug_d0 = d2p[0]; |
gcc272bug_d0 = d2p[0]; |
udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0); |
udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0 << GMP_NAIL_BITS, |
|
gcc272bug_d0 << GMP_NAIL_BITS); |
|
r0 >>= GMP_NAIL_BITS; |
n2p[0] = r0; |
n2p[0] = r0; |
qp[0] = q0; |
qp[0] = q0; |
} |
} |
else if (qn == 2) |
else if (qn == 2) |
mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); |
mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); |
else if (qn < BZ_THRESHOLD) |
else if (qn < DIV_DC_THRESHOLD) |
mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn); |
mpn_sb_divrem_mn (qp, n2p, 2 * qn, d2p, qn); |
else |
else |
mpn_bz_divrem_n (qp, n2p, d2p, qn); |
mpn_dc_divrem_n (qp, n2p, d2p, qn); |
|
|
rn = qn; |
rn = qn; |
/* Multiply the first ignored divisor limb by the most significant |
/* Multiply the first ignored divisor limb by the most significant |
Line 319 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 315 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
else |
else |
dl = dp[in - 2]; |
dl = dp[in - 2]; |
|
|
x = SHL (dp[in - 1], dl, cnt); |
#if GMP_NAIL_BITS == 0 |
umul_ppmm (h, l, x, qp[qn - 1]); |
x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % BITS_PER_MP_LIMB)); |
|
#else |
|
x = (dp[in - 1] << cnt) & GMP_NUMB_MASK; |
|
if (cnt != 0) |
|
x |= dl >> (GMP_NUMB_BITS - cnt); |
|
#endif |
|
umul_ppmm (h, l, x, qp[qn - 1] << GMP_NAIL_BITS); |
|
l >>= GMP_NAIL_BITS; |
|
|
if (n2p[qn - 1] < h) |
if (n2p[qn - 1] < h) |
{ |
{ |
Line 343 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 346 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
mp_limb_t cy1, cy2; |
mp_limb_t cy1, cy2; |
|
|
/* Append partially used numerator limb to partial remainder. */ |
/* Append partially used numerator limb to partial remainder. */ |
cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt); |
cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt); |
n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt); |
n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt); |
|
|
/* Update partial remainder with partially used divisor limb. */ |
/* Update partial remainder with partially used divisor limb. */ |
cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt)); |
cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt)); |
if (qn != rn) |
if (qn != rn) |
{ |
{ |
if (n2p[qn] < cy2) |
if (n2p[qn] < cy2) |
Line 356 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
Line 359 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) |
|
} |
} |
else |
else |
{ |
{ |
n2p[qn] = cy1 - cy2; |
n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */ |
|
|
quotient_too_large = (cy1 < cy2); |
quotient_too_large = (cy1 < cy2); |
++rn; |
++rn; |