=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/generic/Attic/sb_divrem_mn.c,v retrieving revision 1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/generic/Attic/sb_divrem_mn.c 2000/09/09 14:12:27 1.1 +++ OpenXM_contrib/gmp/mpn/generic/Attic/sb_divrem_mn.c 2003/08/25 16:06:20 1.1.1.2 @@ -7,7 +7,8 @@ FUTURE GNU MP RELEASE. -Copyright (C) 1993, 1994, 1995, 1996, 2000 Free Software Foundation, Inc. +Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software Foundation, +Inc. This file is part of the GNU MP Library. @@ -30,11 +31,23 @@ MA 02111-1307, USA. */ #include "gmp-impl.h" #include "longlong.h" + +/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, + meaning the quotient size where that should happen, the quotient size + being how many udiv divisions will be done. + + The default is to use preinv always, CPUs where this doesn't suit have + tuned thresholds. Note in particular that preinv should certainly be + used if that's the only division available (USE_PREINV_ALWAYS). */ + +#ifndef DIV_SB_PREINV_THRESHOLD +#define DIV_SB_PREINV_THRESHOLD 0 +#endif + + /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write the NSIZE-DSIZE least significant quotient limbs at QP - and the DSIZE long remainder at NP. If QEXTRA_LIMBS is - non-zero, generate that many fraction bits and append them after the - other quotient limbs. + and the DSIZE long remainder at NP. Return the most significant limb of the quotient, this is always 0 or 1. Preconditions: @@ -44,88 +57,78 @@ MA 02111-1307, USA. */ QP + DSIZE >= NP must hold true. (This means that it's possible to put the quotient in the high part of NUM, right after the remainder in NUM. - 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. + 3. NSIZE >= DSIZE. 4. DSIZE >= 2. */ -#define PREINVERT_VIABLE \ - (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */) - mp_limb_t -#if __STDC__ mpn_sb_divrem_mn (mp_ptr qp, - mp_ptr np, mp_size_t nsize, - mp_srcptr dp, mp_size_t dsize) -#else -mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) - mp_ptr qp; - mp_ptr np; - mp_size_t nsize; - mp_srcptr dp; - mp_size_t dsize; -#endif + mp_ptr np, mp_size_t nn, + mp_srcptr dp, mp_size_t dn) { mp_limb_t most_significant_q_limb = 0; + mp_size_t qn = nn - dn; mp_size_t i; mp_limb_t dx, d1, n0; mp_limb_t dxinv; - int have_preinv; + int use_preinv; - ASSERT_ALWAYS (dsize > 2); + ASSERT (dn > 2); + ASSERT (nn >= dn); + ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); + ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn)); + ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn)); + ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np); + ASSERT_MPN (np, nn); + ASSERT_MPN (dp, dn); - np += nsize - dsize; - dx = dp[dsize - 1]; - d1 = dp[dsize - 2]; - n0 = np[dsize - 1]; + np += qn; + dx = dp[dn - 1]; + d1 = dp[dn - 2]; + n0 = np[dn - 1]; if (n0 >= dx) { - if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0) + if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0) { - mpn_sub_n (np, np, dp, dsize); + mpn_sub_n (np, np, dp, dn); most_significant_q_limb = 1; } } - /* If multiplication is much faster than division, preinvert the - most significant divisor limb before entering the loop. */ - if (PREINVERT_VIABLE) - { - have_preinv = 0; - if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME) - { - invert_limb (dxinv, dx); - have_preinv = 1; - } - } + /* use_preinv is possibly a constant, but it's left to the compiler to + optimize away the unused code in that case. */ + use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD); + if (use_preinv) + invert_limb (dxinv, dx); - for (i = nsize - dsize - 1; i >= 0; i--) + for (i = qn - 1; i >= 0; i--) { mp_limb_t q; mp_limb_t nx; mp_limb_t cy_limb; - nx = np[dsize - 1]; + nx = np[dn - 1]; /* FIXME: could get value from r1 */ np--; if (nx == dx) { /* This might over-estimate q, but it's probably not worth the extra code here to find out. */ - q = ~(mp_limb_t) 0; + q = GMP_NUMB_MASK; #if 1 - cy_limb = mpn_submul_1 (np, dp, dsize, q); + cy_limb = mpn_submul_1 (np, dp, dn, q); #else /* This should be faster on many machines */ - cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize); - cy = mpn_add_n (np, np, dp, dsize); - np[dsize] += cy; + cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn); + cy = mpn_add_n (np, np, dp, dn); + np[dn] += cy; #endif if (nx != cy_limb) { - mpn_add_n (np, np, dp, dsize); + mpn_add_n (np, np, dp, dn); q--; } @@ -135,49 +138,54 @@ mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) { mp_limb_t rx, r1, r0, p1, p0; - /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register - usage when np[dsize-1] is used in an asm statement like - umul_ppmm in udiv_qrnnd_preinv. The symptom is seg faults due - to registers being clobbered. gcc 2.95 i386 doesn't have the - problem. */ - { - mp_limb_t workaround = np[dsize - 1]; - if (PREINVERT_VIABLE && have_preinv) - udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv); - else - udiv_qrnnd (q, r1, nx, workaround, dx); - } - umul_ppmm (p1, p0, d1, q); + /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage + when np[dn-1] is used in an asm statement like umul_ppmm in + udiv_qrnnd_preinv. The symptom is seg faults due to registers + being clobbered. gcc 2.95 i386 doesn't have the problem. */ + { + mp_limb_t workaround = np[dn - 1]; + if (use_preinv) + udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv); + else + { + udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS, + dx << GMP_NAIL_BITS); + r1 >>= GMP_NAIL_BITS; + } + } + umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS); + p0 >>= GMP_NAIL_BITS; - r0 = np[dsize - 2]; + r0 = np[dn - 2]; rx = 0; if (r1 < p1 || (r1 == p1 && r0 < p0)) { p1 -= p0 < d1; - p0 -= d1; + p0 = (p0 - d1) & GMP_NUMB_MASK; q--; - r1 += dx; + r1 = (r1 + dx) & GMP_NUMB_MASK; rx = r1 < dx; } p1 += r0 < p0; /* cannot carry! */ rx -= r1 < p1; /* may become 11..1 if q is still too large */ - r1 -= p1; - r0 -= p0; + r1 = (r1 - p1) & GMP_NUMB_MASK; + r0 = (r0 - p0) & GMP_NUMB_MASK; - cy_limb = mpn_submul_1 (np, dp, dsize - 2, q); + cy_limb = mpn_submul_1 (np, dp, dn - 2, q); + /* Check if we've over-estimated q, and adjust as needed. */ { mp_limb_t cy1, cy2; cy1 = r0 < cy_limb; - r0 -= cy_limb; + r0 = (r0 - cy_limb) & GMP_NUMB_MASK; cy2 = r1 < cy1; r1 -= cy1; - np[dsize - 1] = r1; - np[dsize - 2] = r0; + np[dn - 1] = r1; + np[dn - 2] = r0; if (cy2 != rx) { - mpn_add_n (np, np, dp, dsize); + mpn_add_n (np, np, dp, dn); q--; } } @@ -190,7 +198,7 @@ mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) ______ ______ - |__p1__|__p0__| partial product to subtract ______ ______ - - |______|cylimb| + - |______|cylimb| rx is -1, 0 or 1. If rx=1, then q is correct (it should match carry out). If rx=-1 then q is too large. If rx=0, then q might