version 1.1.1.2, 2000/09/09 14:12:24 |
version 1.1.1.3, 2003/08/25 16:06:20 |
|
|
/* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) -- |
/* mpn_divrem_1 -- mpn by limb division. |
Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. |
|
Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. |
|
Return the single-limb remainder. |
|
There are no constraints on the value of the divisor. |
|
|
|
QUOT_PTR and DIVIDEND_PTR might point to the same limb. |
Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002 Free Software |
|
|
Copyright (C) 1991, 1993, 1994, 1996, 1998, 1999, 2000 Free Software |
|
Foundation, Inc. |
Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
Line 31 MA 02111-1307, USA. */ |
|
Line 25 MA 02111-1307, USA. */ |
|
#include "longlong.h" |
#include "longlong.h" |
|
|
|
|
|
/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, |
|
meaning the quotient size where that should happen, the quotient size |
|
being how many udiv divisions will be done. |
|
|
/* __gmpn_divmod_1_internal(quot_ptr,dividend_ptr,dividend_size,divisor_limb) |
The default is to use preinv always, CPUs where this doesn't suit have |
Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. |
tuned thresholds. Note in particular that preinv should certainly be |
Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. |
used if that's the only division available (USE_PREINV_ALWAYS). */ |
Return the single-limb remainder. |
|
There are no constraints on the value of the divisor. |
|
|
|
QUOT_PTR and DIVIDEND_PTR might point to the same limb. */ |
#ifndef DIVREM_1_NORM_THRESHOLD |
|
#define DIVREM_1_NORM_THRESHOLD 0 |
#ifndef UMUL_TIME |
|
#define UMUL_TIME 1 |
|
#endif |
#endif |
|
#ifndef DIVREM_1_UNNORM_THRESHOLD |
#ifndef UDIV_TIME |
#define DIVREM_1_UNNORM_THRESHOLD 0 |
#define UDIV_TIME UMUL_TIME |
|
#endif |
#endif |
|
|
static mp_limb_t |
|
#if __STDC__ |
|
__gmpn_divmod_1_internal (mp_ptr quot_ptr, |
|
mp_srcptr dividend_ptr, mp_size_t dividend_size, |
|
mp_limb_t divisor_limb) |
|
#else |
|
__gmpn_divmod_1_internal (quot_ptr, dividend_ptr, dividend_size, divisor_limb) |
|
mp_ptr quot_ptr; |
|
mp_srcptr dividend_ptr; |
|
mp_size_t dividend_size; |
|
mp_limb_t divisor_limb; |
|
#endif |
|
{ |
|
mp_size_t i; |
|
mp_limb_t n1, n0, r; |
|
int dummy; |
|
|
|
/* ??? Should this be handled at all? Rely on callers? */ |
|
if (dividend_size == 0) |
|
return 0; |
|
|
|
/* If multiplication is much faster than division, and the |
/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM |
dividend is large, pre-invert the divisor, and use |
and UNNORM thresholds are 0 and only the inversion code is included. |
only multiplications in the inner loop. */ |
|
|
|
/* This test should be read: |
If multiply-by-inverse is never viable, then NORM and UNNORM thresholds |
Does it ever help to use udiv_qrnnd_preinv? |
will be MP_SIZE_T_MAX and only the plain division code is included. |
&& Does what we save compensate for the inversion overhead? */ |
|
if (UDIV_TIME > (2 * UMUL_TIME + 6) |
|
&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) |
|
{ |
|
int normalization_steps; |
|
|
|
count_leading_zeros (normalization_steps, divisor_limb); |
Otherwise mul-by-inverse is better than plain division above some |
if (normalization_steps != 0) |
threshold, and best results are obtained by having code for both present. |
{ |
|
mp_limb_t divisor_limb_inverted; |
|
|
|
divisor_limb <<= normalization_steps; |
The main reason for separating the norm and unnorm cases is that not all |
invert_limb (divisor_limb_inverted, divisor_limb); |
CPUs give zero for "n0 >> BITS_PER_MP_LIMB" which would arise in the |
|
unnorm code used on an already normalized divisor. |
|
|
n1 = dividend_ptr[dividend_size - 1]; |
If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same |
r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); |
non-shifting code for both the norm and unnorm cases, though with |
|
different criteria for skipping a division, and with different thresholds |
|
of course. And in fact if inversion is never viable, then that simple |
|
non-shifting division would be all that's left. |
|
|
/* Possible optimization: |
The NORM and UNNORM thresholds might not differ much, but if there's |
if (r == 0 |
going to be separate code for norm and unnorm then it makes sense to have |
&& divisor_limb > ((n1 << normalization_steps) |
separate thresholds. One thing that's possible is that the |
| (dividend_ptr[dividend_size - 2] >> ...))) |
mul-by-inverse might be better only for normalized divisors, due to that |
...one division less... */ |
case not needing variable bit shifts. |
|
|
for (i = dividend_size - 2; i >= 0; i--) |
Notice that the thresholds are tested after the decision to possibly skip |
|
one divide step, so they're based on the actual number of divisions done. |
|
|
|
For the unnorm case, it would be possible to call mpn_lshift to adjust |
|
the dividend all in one go (into the quotient space say), rather than |
|
limb-by-limb in the loop. This might help if mpn_lshift is a lot faster |
|
than what the compiler can generate for EXTRACT. But this is left to CPU |
|
specific implementations to consider, especially since EXTRACT isn't on |
|
the dependent chain. */ |
|
|
|
mp_limb_t |
|
mpn_divrem_1 (mp_ptr qp, mp_size_t qxn, |
|
mp_srcptr up, mp_size_t un, mp_limb_t d) |
|
{ |
|
mp_size_t n; |
|
mp_size_t i; |
|
mp_limb_t n1, n0; |
|
mp_limb_t r = 0; |
|
|
|
ASSERT (qxn >= 0); |
|
ASSERT (un >= 0); |
|
ASSERT (d != 0); |
|
/* FIXME: What's the correct overlap rule when qxn!=0? */ |
|
ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un)); |
|
|
|
n = un + qxn; |
|
if (n == 0) |
|
return 0; |
|
|
|
d <<= GMP_NAIL_BITS; |
|
|
|
qp += (n - 1); /* Make qp point at most significant quotient limb */ |
|
|
|
if ((d & GMP_LIMB_HIGHBIT) != 0) |
|
{ |
|
if (un != 0) |
|
{ |
|
/* High quotient limb is 0 or 1, skip a divide step. */ |
|
mp_limb_t q; |
|
r = up[un - 1] << GMP_NAIL_BITS; |
|
q = (r >= d); |
|
*qp-- = q; |
|
r -= (d & -q); |
|
r >>= GMP_NAIL_BITS; |
|
n--; |
|
un--; |
|
} |
|
|
|
if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD)) |
|
{ |
|
plain: |
|
for (i = un - 1; i >= 0; i--) |
{ |
{ |
n0 = dividend_ptr[i]; |
n0 = up[i] << GMP_NAIL_BITS; |
udiv_qrnnd_preinv (quot_ptr[i + 1], r, r, |
udiv_qrnnd (*qp, r, r, n0, d); |
((n1 << normalization_steps) |
r >>= GMP_NAIL_BITS; |
| (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), |
qp--; |
divisor_limb, divisor_limb_inverted); |
|
n1 = n0; |
|
} |
} |
udiv_qrnnd_preinv (quot_ptr[0], r, r, |
for (i = qxn - 1; i >= 0; i--) |
n1 << normalization_steps, |
{ |
divisor_limb, divisor_limb_inverted); |
udiv_qrnnd (*qp, r, r, 0, d); |
return r >> normalization_steps; |
r >>= GMP_NAIL_BITS; |
|
qp--; |
|
} |
|
return r; |
} |
} |
else |
else |
{ |
{ |
mp_limb_t divisor_limb_inverted; |
/* Multiply-by-inverse, divisor already normalized. */ |
|
mp_limb_t dinv; |
|
invert_limb (dinv, d); |
|
|
invert_limb (divisor_limb_inverted, divisor_limb); |
for (i = un - 1; i >= 0; i--) |
|
|
i = dividend_size - 1; |
|
r = dividend_ptr[i]; |
|
|
|
if (r >= divisor_limb) |
|
r = 0; |
|
else |
|
{ |
{ |
quot_ptr[i] = 0; |
n0 = up[i] << GMP_NAIL_BITS; |
i--; |
udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv); |
|
r >>= GMP_NAIL_BITS; |
|
qp--; |
} |
} |
|
for (i = qxn - 1; i >= 0; i--) |
for (; i >= 0; i--) |
|
{ |
{ |
n0 = dividend_ptr[i]; |
udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv); |
udiv_qrnnd_preinv (quot_ptr[i], r, r, |
r >>= GMP_NAIL_BITS; |
n0, divisor_limb, divisor_limb_inverted); |
qp--; |
} |
} |
return r; |
return r; |
} |
} |
} |
} |
else |
else |
{ |
{ |
if (UDIV_NEEDS_NORMALIZATION) |
/* Most significant bit of divisor == 0. */ |
{ |
int norm; |
int normalization_steps; |
|
|
|
count_leading_zeros (normalization_steps, divisor_limb); |
/* Skip a division if high < divisor (high quotient 0). Testing here |
if (normalization_steps != 0) |
before before normalizing will still skip as often as possible. */ |
|
if (un != 0) |
|
{ |
|
n1 = up[un - 1] << GMP_NAIL_BITS; |
|
if (n1 < d) |
{ |
{ |
divisor_limb <<= normalization_steps; |
r = n1 >> GMP_NAIL_BITS; |
|
*qp-- = 0; |
|
n--; |
|
if (n == 0) |
|
return r; |
|
un--; |
|
} |
|
} |
|
|
n1 = dividend_ptr[dividend_size - 1]; |
if (! UDIV_NEEDS_NORMALIZATION |
r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); |
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) |
|
goto plain; |
|
|
/* Possible optimization: |
count_leading_zeros (norm, d); |
if (r == 0 |
d <<= norm; |
&& divisor_limb > ((n1 << normalization_steps) |
r <<= norm; |
| (dividend_ptr[dividend_size - 2] >> ...))) |
|
...one division less... */ |
|
|
|
for (i = dividend_size - 2; i >= 0; i--) |
if (UDIV_NEEDS_NORMALIZATION |
|
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) |
|
{ |
|
if (un != 0) |
|
{ |
|
n1 = up[un - 1] << GMP_NAIL_BITS; |
|
r |= (n1 >> (GMP_LIMB_BITS - norm)); |
|
for (i = un - 2; i >= 0; i--) |
{ |
{ |
n0 = dividend_ptr[i]; |
n0 = up[i] << GMP_NAIL_BITS; |
udiv_qrnnd (quot_ptr[i + 1], r, r, |
udiv_qrnnd (*qp, r, r, |
((n1 << normalization_steps) |
(n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)), |
| (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), |
d); |
divisor_limb); |
r >>= GMP_NAIL_BITS; |
|
qp--; |
n1 = n0; |
n1 = n0; |
} |
} |
udiv_qrnnd (quot_ptr[0], r, r, |
udiv_qrnnd (*qp, r, r, n1 << norm, d); |
n1 << normalization_steps, |
r >>= GMP_NAIL_BITS; |
divisor_limb); |
qp--; |
return r >> normalization_steps; |
|
} |
} |
|
for (i = qxn - 1; i >= 0; i--) |
|
{ |
|
udiv_qrnnd (*qp, r, r, 0, d); |
|
r >>= GMP_NAIL_BITS; |
|
qp--; |
|
} |
|
return r >> norm; |
} |
} |
/* No normalization needed, either because udiv_qrnnd doesn't require |
|
it, or because DIVISOR_LIMB is already normalized. */ |
|
|
|
i = dividend_size - 1; |
|
r = dividend_ptr[i]; |
|
|
|
if (r >= divisor_limb) |
|
r = 0; |
|
else |
else |
{ |
{ |
quot_ptr[i] = 0; |
mp_limb_t dinv; |
i--; |
invert_limb (dinv, d); |
} |
if (un != 0) |
|
{ |
for (; i >= 0; i--) |
n1 = up[un - 1] << GMP_NAIL_BITS; |
{ |
r |= (n1 >> (GMP_LIMB_BITS - norm)); |
n0 = dividend_ptr[i]; |
for (i = un - 2; i >= 0; i--) |
udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb); |
{ |
} |
n0 = up[i] << GMP_NAIL_BITS; |
return r; |
udiv_qrnnd_preinv (*qp, r, r, |
} |
((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))), |
} |
d, dinv); |
|
r >>= GMP_NAIL_BITS; |
|
qp--; |
|
n1 = n0; |
mp_limb_t |
} |
#if __STDC__ |
udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv); |
mpn_divrem_1 (mp_ptr qp, mp_size_t qxn, |
r >>= GMP_NAIL_BITS; |
mp_srcptr np, mp_size_t nn, |
qp--; |
mp_limb_t d) |
} |
#else |
|
mpn_divrem_1 (qp, qxn, np, nn, d) |
|
mp_ptr qp; |
|
mp_size_t qxn; |
|
mp_srcptr np; |
|
mp_size_t nn; |
|
mp_limb_t d; |
|
#endif |
|
{ |
|
mp_limb_t rlimb; |
|
mp_size_t i; |
|
|
|
/* Develop integer part of quotient. */ |
|
rlimb = __gmpn_divmod_1_internal (qp + qxn, np, nn, d); |
|
|
|
/* Develop fraction part of quotient. This is not as fast as it should; |
|
the preinvert stuff from __gmpn_divmod_1_internal ought to be used here |
|
too. */ |
|
if (UDIV_NEEDS_NORMALIZATION) |
|
{ |
|
int normalization_steps; |
|
|
|
count_leading_zeros (normalization_steps, d); |
|
if (normalization_steps != 0) |
|
{ |
|
d <<= normalization_steps; |
|
rlimb <<= normalization_steps; |
|
|
|
for (i = qxn - 1; i >= 0; i--) |
for (i = qxn - 1; i >= 0; i--) |
udiv_qrnnd (qp[i], rlimb, rlimb, 0, d); |
{ |
|
udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv); |
return rlimb >> normalization_steps; |
r >>= GMP_NAIL_BITS; |
|
qp--; |
|
} |
|
return r >> norm; |
} |
} |
else |
|
/* fall out */ |
|
; |
|
} |
} |
|
|
for (i = qxn - 1; i >= 0; i--) |
|
udiv_qrnnd (qp[i], rlimb, rlimb, 0, d); |
|
|
|
return rlimb; |
|
} |
} |