=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/generic/Attic/divrem_1.c,v retrieving revision 1.1.1.2 retrieving revision 1.1.1.3 diff -u -p -r1.1.1.2 -r1.1.1.3 --- OpenXM_contrib/gmp/mpn/generic/Attic/divrem_1.c 2000/09/09 14:12:24 1.1.1.2 +++ OpenXM_contrib/gmp/mpn/generic/Attic/divrem_1.c 2003/08/25 16:06:20 1.1.1.3 @@ -1,12 +1,6 @@ -/* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) -- - Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. - Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. - Return the single-limb remainder. - There are no constraints on the value of the divisor. +/* mpn_divrem_1 -- mpn by limb division. - QUOT_PTR and DIVIDEND_PTR might point to the same limb. - -Copyright (C) 1991, 1993, 1994, 1996, 1998, 1999, 2000 Free Software +Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -31,218 +25,223 @@ MA 02111-1307, USA. */ #include "longlong.h" +/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, + meaning the quotient size where that should happen, the quotient size + being how many udiv divisions will be done. -/* __gmpn_divmod_1_internal(quot_ptr,dividend_ptr,dividend_size,divisor_limb) - Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. - Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. - Return the single-limb remainder. - There are no constraints on the value of the divisor. + The default is to use preinv always, CPUs where this doesn't suit have + tuned thresholds. Note in particular that preinv should certainly be + used if that's the only division available (USE_PREINV_ALWAYS). */ - QUOT_PTR and DIVIDEND_PTR might point to the same limb. */ - -#ifndef UMUL_TIME -#define UMUL_TIME 1 +#ifndef DIVREM_1_NORM_THRESHOLD +#define DIVREM_1_NORM_THRESHOLD 0 #endif - -#ifndef UDIV_TIME -#define UDIV_TIME UMUL_TIME +#ifndef DIVREM_1_UNNORM_THRESHOLD +#define DIVREM_1_UNNORM_THRESHOLD 0 #endif -static mp_limb_t -#if __STDC__ -__gmpn_divmod_1_internal (mp_ptr quot_ptr, - mp_srcptr dividend_ptr, mp_size_t dividend_size, - mp_limb_t divisor_limb) -#else -__gmpn_divmod_1_internal (quot_ptr, dividend_ptr, dividend_size, divisor_limb) - mp_ptr quot_ptr; - mp_srcptr dividend_ptr; - mp_size_t dividend_size; - mp_limb_t divisor_limb; -#endif -{ - mp_size_t i; - mp_limb_t n1, n0, r; - int dummy; - /* ??? Should this be handled at all? Rely on callers? */ - if (dividend_size == 0) - return 0; - /* If multiplication is much faster than division, and the - dividend is large, pre-invert the divisor, and use - only multiplications in the inner loop. */ +/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM + and UNNORM thresholds are 0 and only the inversion code is included. - /* This test should be read: - Does it ever help to use udiv_qrnnd_preinv? - && Does what we save compensate for the inversion overhead? */ - if (UDIV_TIME > (2 * UMUL_TIME + 6) - && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) - { - int normalization_steps; + If multiply-by-inverse is never viable, then NORM and UNNORM thresholds + will be MP_SIZE_T_MAX and only the plain division code is included. - count_leading_zeros (normalization_steps, divisor_limb); - if (normalization_steps != 0) - { - mp_limb_t divisor_limb_inverted; + Otherwise mul-by-inverse is better than plain division above some + threshold, and best results are obtained by having code for both present. - divisor_limb <<= normalization_steps; - invert_limb (divisor_limb_inverted, divisor_limb); + The main reason for separating the norm and unnorm cases is that not all + CPUs give zero for "n0 >> BITS_PER_MP_LIMB" which would arise in the + unnorm code used on an already normalized divisor. - n1 = dividend_ptr[dividend_size - 1]; - r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same + non-shifting code for both the norm and unnorm cases, though with + different criteria for skipping a division, and with different thresholds + of course. And in fact if inversion is never viable, then that simple + non-shifting division would be all that's left. - /* Possible optimization: - if (r == 0 - && divisor_limb > ((n1 << normalization_steps) - | (dividend_ptr[dividend_size - 2] >> ...))) - ...one division less... */ + The NORM and UNNORM thresholds might not differ much, but if there's + going to be separate code for norm and unnorm then it makes sense to have + separate thresholds. One thing that's possible is that the + mul-by-inverse might be better only for normalized divisors, due to that + case not needing variable bit shifts. - for (i = dividend_size - 2; i >= 0; i--) + Notice that the thresholds are tested after the decision to possibly skip + one divide step, so they're based on the actual number of divisions done. + + For the unnorm case, it would be possible to call mpn_lshift to adjust + the dividend all in one go (into the quotient space say), rather than + limb-by-limb in the loop. This might help if mpn_lshift is a lot faster + than what the compiler can generate for EXTRACT. But this is left to CPU + specific implementations to consider, especially since EXTRACT isn't on + the dependent chain. */ + +mp_limb_t +mpn_divrem_1 (mp_ptr qp, mp_size_t qxn, + mp_srcptr up, mp_size_t un, mp_limb_t d) +{ + mp_size_t n; + mp_size_t i; + mp_limb_t n1, n0; + mp_limb_t r = 0; + + ASSERT (qxn >= 0); + ASSERT (un >= 0); + ASSERT (d != 0); + /* FIXME: What's the correct overlap rule when qxn!=0? */ + ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un)); + + n = un + qxn; + if (n == 0) + return 0; + + d <<= GMP_NAIL_BITS; + + qp += (n - 1); /* Make qp point at most significant quotient limb */ + + if ((d & GMP_LIMB_HIGHBIT) != 0) + { + if (un != 0) + { + /* High quotient limb is 0 or 1, skip a divide step. */ + mp_limb_t q; + r = up[un - 1] << GMP_NAIL_BITS; + q = (r >= d); + *qp-- = q; + r -= (d & -q); + r >>= GMP_NAIL_BITS; + n--; + un--; + } + + if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD)) + { + plain: + for (i = un - 1; i >= 0; i--) { - n0 = dividend_ptr[i]; - udiv_qrnnd_preinv (quot_ptr[i + 1], r, r, - ((n1 << normalization_steps) - | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), - divisor_limb, divisor_limb_inverted); - n1 = n0; + n0 = up[i] << GMP_NAIL_BITS; + udiv_qrnnd (*qp, r, r, n0, d); + r >>= GMP_NAIL_BITS; + qp--; } - udiv_qrnnd_preinv (quot_ptr[0], r, r, - n1 << normalization_steps, - divisor_limb, divisor_limb_inverted); - return r >> normalization_steps; + for (i = qxn - 1; i >= 0; i--) + { + udiv_qrnnd (*qp, r, r, 0, d); + r >>= GMP_NAIL_BITS; + qp--; + } + return r; } else { - mp_limb_t divisor_limb_inverted; + /* Multiply-by-inverse, divisor already normalized. */ + mp_limb_t dinv; + invert_limb (dinv, d); - invert_limb (divisor_limb_inverted, divisor_limb); - - i = dividend_size - 1; - r = dividend_ptr[i]; - - if (r >= divisor_limb) - r = 0; - else + for (i = un - 1; i >= 0; i--) { - quot_ptr[i] = 0; - i--; + n0 = up[i] << GMP_NAIL_BITS; + udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv); + r >>= GMP_NAIL_BITS; + qp--; } - - for (; i >= 0; i--) + for (i = qxn - 1; i >= 0; i--) { - n0 = dividend_ptr[i]; - udiv_qrnnd_preinv (quot_ptr[i], r, r, - n0, divisor_limb, divisor_limb_inverted); + udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv); + r >>= GMP_NAIL_BITS; + qp--; } return r; } } else { - if (UDIV_NEEDS_NORMALIZATION) - { - int normalization_steps; + /* Most significant bit of divisor == 0. */ + int norm; - count_leading_zeros (normalization_steps, divisor_limb); - if (normalization_steps != 0) + /* Skip a division if high < divisor (high quotient 0). Testing here + before before normalizing will still skip as often as possible. */ + if (un != 0) + { + n1 = up[un - 1] << GMP_NAIL_BITS; + if (n1 < d) { - divisor_limb <<= normalization_steps; + r = n1 >> GMP_NAIL_BITS; + *qp-- = 0; + n--; + if (n == 0) + return r; + un--; + } + } - n1 = dividend_ptr[dividend_size - 1]; - r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); + if (! UDIV_NEEDS_NORMALIZATION + && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) + goto plain; - /* Possible optimization: - if (r == 0 - && divisor_limb > ((n1 << normalization_steps) - | (dividend_ptr[dividend_size - 2] >> ...))) - ...one division less... */ + count_leading_zeros (norm, d); + d <<= norm; + r <<= norm; - for (i = dividend_size - 2; i >= 0; i--) + if (UDIV_NEEDS_NORMALIZATION + && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) + { + if (un != 0) + { + n1 = up[un - 1] << GMP_NAIL_BITS; + r |= (n1 >> (GMP_LIMB_BITS - norm)); + for (i = un - 2; i >= 0; i--) { - n0 = dividend_ptr[i]; - udiv_qrnnd (quot_ptr[i + 1], r, r, - ((n1 << normalization_steps) - | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), - divisor_limb); + n0 = up[i] << GMP_NAIL_BITS; + udiv_qrnnd (*qp, r, r, + (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)), + d); + r >>= GMP_NAIL_BITS; + qp--; n1 = n0; } - udiv_qrnnd (quot_ptr[0], r, r, - n1 << normalization_steps, - divisor_limb); - return r >> normalization_steps; + udiv_qrnnd (*qp, r, r, n1 << norm, d); + r >>= GMP_NAIL_BITS; + qp--; } + for (i = qxn - 1; i >= 0; i--) + { + udiv_qrnnd (*qp, r, r, 0, d); + r >>= GMP_NAIL_BITS; + qp--; + } + return r >> norm; } - /* No normalization needed, either because udiv_qrnnd doesn't require - it, or because DIVISOR_LIMB is already normalized. */ - - i = dividend_size - 1; - r = dividend_ptr[i]; - - if (r >= divisor_limb) - r = 0; else { - quot_ptr[i] = 0; - i--; - } - - for (; i >= 0; i--) - { - n0 = dividend_ptr[i]; - udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb); - } - return r; - } -} - - - -mp_limb_t -#if __STDC__ -mpn_divrem_1 (mp_ptr qp, mp_size_t qxn, - mp_srcptr np, mp_size_t nn, - mp_limb_t d) -#else -mpn_divrem_1 (qp, qxn, np, nn, d) - mp_ptr qp; - mp_size_t qxn; - mp_srcptr np; - mp_size_t nn; - mp_limb_t d; -#endif -{ - mp_limb_t rlimb; - mp_size_t i; - - /* Develop integer part of quotient. */ - rlimb = __gmpn_divmod_1_internal (qp + qxn, np, nn, d); - - /* Develop fraction part of quotient. This is not as fast as it should; - the preinvert stuff from __gmpn_divmod_1_internal ought to be used here - too. */ - if (UDIV_NEEDS_NORMALIZATION) - { - int normalization_steps; - - count_leading_zeros (normalization_steps, d); - if (normalization_steps != 0) - { - d <<= normalization_steps; - rlimb <<= normalization_steps; - + mp_limb_t dinv; + invert_limb (dinv, d); + if (un != 0) + { + n1 = up[un - 1] << GMP_NAIL_BITS; + r |= (n1 >> (GMP_LIMB_BITS - norm)); + for (i = un - 2; i >= 0; i--) + { + n0 = up[i] << GMP_NAIL_BITS; + udiv_qrnnd_preinv (*qp, r, r, + ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))), + d, dinv); + r >>= GMP_NAIL_BITS; + qp--; + n1 = n0; + } + udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv); + r >>= GMP_NAIL_BITS; + qp--; + } for (i = qxn - 1; i >= 0; i--) - udiv_qrnnd (qp[i], rlimb, rlimb, 0, d); - - return rlimb >> normalization_steps; + { + udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv); + r >>= GMP_NAIL_BITS; + qp--; + } + return r >> norm; } - else - /* fall out */ - ; } - - for (i = qxn - 1; i >= 0; i--) - udiv_qrnnd (qp[i], rlimb, rlimb, 0, d); - - return rlimb; }