version 1.1.1.1, 2000/09/09 14:12:27 |
version 1.1.1.2, 2003/08/25 16:06:20 |
|
|
FUTURE GNU MP RELEASE. |
FUTURE GNU MP RELEASE. |
|
|
|
|
Copyright (C) 1993, 1994, 1995, 1996, 2000 Free Software Foundation, Inc. |
Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software Foundation, |
|
Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
Line 30 MA 02111-1307, USA. */ |
|
Line 31 MA 02111-1307, USA. */ |
|
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "longlong.h" |
#include "longlong.h" |
|
|
|
|
|
/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, |
|
meaning the quotient size where that should happen, the quotient size |
|
being how many udiv divisions will be done. |
|
|
|
The default is to use preinv always, CPUs where this doesn't suit have |
|
tuned thresholds. Note in particular that preinv should certainly be |
|
used if that's the only division available (USE_PREINV_ALWAYS). */ |
|
|
|
#ifndef DIV_SB_PREINV_THRESHOLD |
|
#define DIV_SB_PREINV_THRESHOLD 0 |
|
#endif |
|
|
|
|
/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write |
/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write |
the NSIZE-DSIZE least significant quotient limbs at QP |
the NSIZE-DSIZE least significant quotient limbs at QP |
and the DSIZE long remainder at NP. If QEXTRA_LIMBS is |
and the DSIZE long remainder at NP. |
non-zero, generate that many fraction bits and append them after the |
|
other quotient limbs. |
|
Return the most significant limb of the quotient, this is always 0 or 1. |
Return the most significant limb of the quotient, this is always 0 or 1. |
|
|
Preconditions: |
Preconditions: |
Line 44 MA 02111-1307, USA. */ |
|
Line 57 MA 02111-1307, USA. */ |
|
QP + DSIZE >= NP must hold true. (This means that it's |
QP + DSIZE >= NP must hold true. (This means that it's |
possible to put the quotient in the high part of NUM, right after the |
possible to put the quotient in the high part of NUM, right after the |
remainder in NUM. |
remainder in NUM. |
3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. |
3. NSIZE >= DSIZE. |
4. DSIZE >= 2. */ |
4. DSIZE >= 2. */ |
|
|
|
|
#define PREINVERT_VIABLE \ |
|
(UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */) |
|
|
|
mp_limb_t |
mp_limb_t |
#if __STDC__ |
|
mpn_sb_divrem_mn (mp_ptr qp, |
mpn_sb_divrem_mn (mp_ptr qp, |
mp_ptr np, mp_size_t nsize, |
mp_ptr np, mp_size_t nn, |
mp_srcptr dp, mp_size_t dsize) |
mp_srcptr dp, mp_size_t dn) |
#else |
|
mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) |
|
mp_ptr qp; |
|
mp_ptr np; |
|
mp_size_t nsize; |
|
mp_srcptr dp; |
|
mp_size_t dsize; |
|
#endif |
|
{ |
{ |
mp_limb_t most_significant_q_limb = 0; |
mp_limb_t most_significant_q_limb = 0; |
|
mp_size_t qn = nn - dn; |
mp_size_t i; |
mp_size_t i; |
mp_limb_t dx, d1, n0; |
mp_limb_t dx, d1, n0; |
mp_limb_t dxinv; |
mp_limb_t dxinv; |
int have_preinv; |
int use_preinv; |
|
|
ASSERT_ALWAYS (dsize > 2); |
ASSERT (dn > 2); |
|
ASSERT (nn >= dn); |
|
ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); |
|
ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn)); |
|
ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn)); |
|
ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np); |
|
ASSERT_MPN (np, nn); |
|
ASSERT_MPN (dp, dn); |
|
|
np += nsize - dsize; |
np += qn; |
dx = dp[dsize - 1]; |
dx = dp[dn - 1]; |
d1 = dp[dsize - 2]; |
d1 = dp[dn - 2]; |
n0 = np[dsize - 1]; |
n0 = np[dn - 1]; |
|
|
if (n0 >= dx) |
if (n0 >= dx) |
{ |
{ |
if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0) |
if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0) |
{ |
{ |
mpn_sub_n (np, np, dp, dsize); |
mpn_sub_n (np, np, dp, dn); |
most_significant_q_limb = 1; |
most_significant_q_limb = 1; |
} |
} |
} |
} |
|
|
/* If multiplication is much faster than division, preinvert the |
/* use_preinv is possibly a constant, but it's left to the compiler to |
most significant divisor limb before entering the loop. */ |
optimize away the unused code in that case. */ |
if (PREINVERT_VIABLE) |
use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD); |
{ |
if (use_preinv) |
have_preinv = 0; |
invert_limb (dxinv, dx); |
if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME) |
|
{ |
|
invert_limb (dxinv, dx); |
|
have_preinv = 1; |
|
} |
|
} |
|
|
|
for (i = nsize - dsize - 1; i >= 0; i--) |
for (i = qn - 1; i >= 0; i--) |
{ |
{ |
mp_limb_t q; |
mp_limb_t q; |
mp_limb_t nx; |
mp_limb_t nx; |
mp_limb_t cy_limb; |
mp_limb_t cy_limb; |
|
|
nx = np[dsize - 1]; |
nx = np[dn - 1]; /* FIXME: could get value from r1 */ |
np--; |
np--; |
|
|
if (nx == dx) |
if (nx == dx) |
{ |
{ |
/* This might over-estimate q, but it's probably not worth |
/* This might over-estimate q, but it's probably not worth |
the extra code here to find out. */ |
the extra code here to find out. */ |
q = ~(mp_limb_t) 0; |
q = GMP_NUMB_MASK; |
|
|
#if 1 |
#if 1 |
cy_limb = mpn_submul_1 (np, dp, dsize, q); |
cy_limb = mpn_submul_1 (np, dp, dn, q); |
#else |
#else |
/* This should be faster on many machines */ |
/* This should be faster on many machines */ |
cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize); |
cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn); |
cy = mpn_add_n (np, np, dp, dsize); |
cy = mpn_add_n (np, np, dp, dn); |
np[dsize] += cy; |
np[dn] += cy; |
#endif |
#endif |
|
|
if (nx != cy_limb) |
if (nx != cy_limb) |
{ |
{ |
mpn_add_n (np, np, dp, dsize); |
mpn_add_n (np, np, dp, dn); |
q--; |
q--; |
} |
} |
|
|
Line 135 mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) |
|
Line 138 mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) |
|
{ |
{ |
mp_limb_t rx, r1, r0, p1, p0; |
mp_limb_t rx, r1, r0, p1, p0; |
|
|
/* "workaround" avoids a problem with gcc 2.7.2.3 i386 register |
/* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage |
usage when np[dsize-1] is used in an asm statement like |
when np[dn-1] is used in an asm statement like umul_ppmm in |
umul_ppmm in udiv_qrnnd_preinv. The symptom is seg faults due |
udiv_qrnnd_preinv. The symptom is seg faults due to registers |
to registers being clobbered. gcc 2.95 i386 doesn't have the |
being clobbered. gcc 2.95 i386 doesn't have the problem. */ |
problem. */ |
{ |
{ |
mp_limb_t workaround = np[dn - 1]; |
mp_limb_t workaround = np[dsize - 1]; |
if (use_preinv) |
if (PREINVERT_VIABLE && have_preinv) |
udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv); |
udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv); |
else |
else |
{ |
udiv_qrnnd (q, r1, nx, workaround, dx); |
udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS, |
} |
dx << GMP_NAIL_BITS); |
umul_ppmm (p1, p0, d1, q); |
r1 >>= GMP_NAIL_BITS; |
|
} |
|
} |
|
umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS); |
|
p0 >>= GMP_NAIL_BITS; |
|
|
r0 = np[dsize - 2]; |
r0 = np[dn - 2]; |
rx = 0; |
rx = 0; |
if (r1 < p1 || (r1 == p1 && r0 < p0)) |
if (r1 < p1 || (r1 == p1 && r0 < p0)) |
{ |
{ |
p1 -= p0 < d1; |
p1 -= p0 < d1; |
p0 -= d1; |
p0 = (p0 - d1) & GMP_NUMB_MASK; |
q--; |
q--; |
r1 += dx; |
r1 = (r1 + dx) & GMP_NUMB_MASK; |
rx = r1 < dx; |
rx = r1 < dx; |
} |
} |
|
|
p1 += r0 < p0; /* cannot carry! */ |
p1 += r0 < p0; /* cannot carry! */ |
rx -= r1 < p1; /* may become 11..1 if q is still too large */ |
rx -= r1 < p1; /* may become 11..1 if q is still too large */ |
r1 -= p1; |
r1 = (r1 - p1) & GMP_NUMB_MASK; |
r0 -= p0; |
r0 = (r0 - p0) & GMP_NUMB_MASK; |
|
|
cy_limb = mpn_submul_1 (np, dp, dsize - 2, q); |
cy_limb = mpn_submul_1 (np, dp, dn - 2, q); |
|
|
|
/* Check if we've over-estimated q, and adjust as needed. */ |
{ |
{ |
mp_limb_t cy1, cy2; |
mp_limb_t cy1, cy2; |
cy1 = r0 < cy_limb; |
cy1 = r0 < cy_limb; |
r0 -= cy_limb; |
r0 = (r0 - cy_limb) & GMP_NUMB_MASK; |
cy2 = r1 < cy1; |
cy2 = r1 < cy1; |
r1 -= cy1; |
r1 -= cy1; |
np[dsize - 1] = r1; |
np[dn - 1] = r1; |
np[dsize - 2] = r0; |
np[dn - 2] = r0; |
if (cy2 != rx) |
if (cy2 != rx) |
{ |
{ |
mpn_add_n (np, np, dp, dsize); |
mpn_add_n (np, np, dp, dn); |
q--; |
q--; |
} |
} |
} |
} |
Line 190 mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) |
|
Line 198 mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) |
|
______ ______ |
______ ______ |
- |__p1__|__p0__| partial product to subtract |
- |__p1__|__p0__| partial product to subtract |
______ ______ |
______ ______ |
- |______|cylimb| |
- |______|cylimb| |
|
|
rx is -1, 0 or 1. If rx=1, then q is correct (it should match |
rx is -1, 0 or 1. If rx=1, then q is correct (it should match |
carry out). If rx=-1 then q is too large. If rx=0, then q might |
carry out). If rx=-1 then q is too large. If rx=0, then q might |