version 1.1.1.2, 2000/09/09 14:12:26 |
version 1.1.1.3, 2003/08/25 16:06:20 |
|
|
Return the single-limb remainder. |
Return the single-limb remainder. |
There are no constraints on the value of the divisor. |
There are no constraints on the value of the divisor. |
|
|
Copyright (C) 1991, 1993, 1994, 1999 Free Software Foundation, Inc. |
Copyright 1991, 1993, 1994, 1999, 2000, 2002 Free Software Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
Line 26 MA 02111-1307, USA. */ |
|
Line 26 MA 02111-1307, USA. */ |
|
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "longlong.h" |
#include "longlong.h" |
|
|
#ifndef UMUL_TIME |
|
#define UMUL_TIME 1 |
|
#endif |
|
|
|
#ifndef UDIV_TIME |
/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, |
#define UDIV_TIME UMUL_TIME |
meaning the quotient size where that should happen, the quotient size |
|
being how many udiv divisions will be done. |
|
|
|
The default is to use preinv always, CPUs where this doesn't suit have |
|
tuned thresholds. Note in particular that preinv should certainly be |
|
used if that's the only division available (USE_PREINV_ALWAYS). */ |
|
|
|
#ifndef MOD_1_NORM_THRESHOLD |
|
#define MOD_1_NORM_THRESHOLD 0 |
#endif |
#endif |
|
#ifndef MOD_1_UNNORM_THRESHOLD |
|
#define MOD_1_UNNORM_THRESHOLD 0 |
|
#endif |
|
|
|
|
|
/* The comments in mpn/generic/divrem_1.c apply here too. |
|
|
|
As noted in the algorithms section of the manual, the shifts in the loop |
|
for the unnorm case can be avoided by calculating r = a%(d*2^n), followed |
|
by a final (r*2^n)%(d*2^n). In fact if it happens that a%(d*2^n) can |
|
skip a division where (a*2^n)%(d*2^n) can't then there's the same number |
|
of divide steps, though how often that happens depends on the assumed |
|
distributions of dividend and divisor. In any case this idea is left to |
|
CPU specific implementations to consider. */ |
|
|
mp_limb_t |
mp_limb_t |
#if __STDC__ |
mpn_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d) |
mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, |
|
mp_limb_t divisor_limb) |
|
#else |
|
mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb) |
|
mp_srcptr dividend_ptr; |
|
mp_size_t dividend_size; |
|
mp_limb_t divisor_limb; |
|
#endif |
|
{ |
{ |
mp_size_t i; |
mp_size_t i; |
mp_limb_t n1, n0, r; |
mp_limb_t n1, n0, r; |
int dummy; |
mp_limb_t dummy; |
|
|
/* Botch: Should this be handled at all? Rely on callers? */ |
ASSERT (un >= 0); |
if (dividend_size == 0) |
ASSERT (d != 0); |
|
|
|
/* Botch: Should this be handled at all? Rely on callers? |
|
But note un==0 is currently required by mpz/fdiv_r_ui.c and possibly |
|
other places. */ |
|
if (un == 0) |
return 0; |
return 0; |
|
|
/* If multiplication is much faster than division, and the |
d <<= GMP_NAIL_BITS; |
dividend is large, pre-invert the divisor, and use |
|
only multiplications in the inner loop. */ |
|
|
|
/* This test should be read: |
if ((d & GMP_LIMB_HIGHBIT) != 0) |
Does it ever help to use udiv_qrnnd_preinv? |
|
&& Does what we save compensate for the inversion overhead? */ |
|
if (UDIV_TIME > (2 * UMUL_TIME + 6) |
|
&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) |
|
{ |
{ |
int normalization_steps; |
/* High limb is initial remainder, possibly with one subtract of |
|
d to get r<d. */ |
|
r = up[un - 1] << GMP_NAIL_BITS; |
|
if (r >= d) |
|
r -= d; |
|
r >>= GMP_NAIL_BITS; |
|
un--; |
|
if (un == 0) |
|
return r; |
|
|
count_leading_zeros (normalization_steps, divisor_limb); |
if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD)) |
if (normalization_steps != 0) |
|
{ |
{ |
mp_limb_t divisor_limb_inverted; |
plain: |
|
for (i = un - 1; i >= 0; i--) |
divisor_limb <<= normalization_steps; |
|
invert_limb (divisor_limb_inverted, divisor_limb); |
|
|
|
n1 = dividend_ptr[dividend_size - 1]; |
|
r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); |
|
|
|
/* Possible optimization: |
|
if (r == 0 |
|
&& divisor_limb > ((n1 << normalization_steps) |
|
| (dividend_ptr[dividend_size - 2] >> ...))) |
|
...one division less... */ |
|
|
|
for (i = dividend_size - 2; i >= 0; i--) |
|
{ |
{ |
n0 = dividend_ptr[i]; |
n0 = up[i] << GMP_NAIL_BITS; |
udiv_qrnnd_preinv (dummy, r, r, |
udiv_qrnnd (dummy, r, r, n0, d); |
((n1 << normalization_steps) |
r >>= GMP_NAIL_BITS; |
| (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), |
|
divisor_limb, divisor_limb_inverted); |
|
n1 = n0; |
|
} |
} |
udiv_qrnnd_preinv (dummy, r, r, |
return r; |
n1 << normalization_steps, |
|
divisor_limb, divisor_limb_inverted); |
|
return r >> normalization_steps; |
|
} |
} |
else |
else |
{ |
{ |
mp_limb_t divisor_limb_inverted; |
mp_limb_t inv; |
|
invert_limb (inv, d); |
invert_limb (divisor_limb_inverted, divisor_limb); |
for (i = un - 1; i >= 0; i--) |
|
|
i = dividend_size - 1; |
|
r = dividend_ptr[i]; |
|
|
|
if (r >= divisor_limb) |
|
r = 0; |
|
else |
|
i--; |
|
|
|
for (; i >= 0; i--) |
|
{ |
{ |
n0 = dividend_ptr[i]; |
n0 = up[i] << GMP_NAIL_BITS; |
udiv_qrnnd_preinv (dummy, r, r, |
udiv_qrnnd_preinv (dummy, r, r, n0, d, inv); |
n0, divisor_limb, divisor_limb_inverted); |
r >>= GMP_NAIL_BITS; |
} |
} |
return r; |
return r; |
} |
} |
} |
} |
else |
else |
{ |
{ |
if (UDIV_NEEDS_NORMALIZATION) |
int norm; |
|
|
|
/* Skip a division if high < divisor. Having the test here before |
|
normalizing will still skip as often as possible. */ |
|
r = up[un - 1] << GMP_NAIL_BITS; |
|
if (r < d) |
{ |
{ |
int normalization_steps; |
r >>= GMP_NAIL_BITS; |
|
un--; |
|
if (un == 0) |
|
return r; |
|
} |
|
else |
|
r = 0; |
|
|
count_leading_zeros (normalization_steps, divisor_limb); |
/* If udiv_qrnnd doesn't need a normalized divisor, can use the simple |
if (normalization_steps != 0) |
code above. */ |
{ |
if (! UDIV_NEEDS_NORMALIZATION |
divisor_limb <<= normalization_steps; |
&& BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) |
|
goto plain; |
|
|
n1 = dividend_ptr[dividend_size - 1]; |
count_leading_zeros (norm, d); |
r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); |
d <<= norm; |
|
|
/* Possible optimization: |
n1 = up[un - 1] << GMP_NAIL_BITS; |
if (r == 0 |
r = (r << norm) | (n1 >> (GMP_LIMB_BITS - norm)); |
&& divisor_limb > ((n1 << normalization_steps) |
|
| (dividend_ptr[dividend_size - 2] >> ...))) |
|
...one division less... */ |
|
|
|
for (i = dividend_size - 2; i >= 0; i--) |
if (UDIV_NEEDS_NORMALIZATION |
{ |
&& BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) |
n0 = dividend_ptr[i]; |
{ |
udiv_qrnnd (dummy, r, r, |
for (i = un - 2; i >= 0; i--) |
((n1 << normalization_steps) |
{ |
| (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), |
n0 = up[i] << GMP_NAIL_BITS; |
divisor_limb); |
|
n1 = n0; |
|
} |
|
udiv_qrnnd (dummy, r, r, |
udiv_qrnnd (dummy, r, r, |
n1 << normalization_steps, |
(n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)), |
divisor_limb); |
d); |
return r >> normalization_steps; |
r >>= GMP_NAIL_BITS; |
|
n1 = n0; |
} |
} |
|
udiv_qrnnd (dummy, r, r, n1 << norm, d); |
|
r >>= GMP_NAIL_BITS; |
|
return r >> norm; |
} |
} |
/* No normalization needed, either because udiv_qrnnd doesn't require |
|
it, or because DIVISOR_LIMB is already normalized. */ |
|
|
|
i = dividend_size - 1; |
|
r = dividend_ptr[i]; |
|
|
|
if (r >= divisor_limb) |
|
r = 0; |
|
else |
else |
i--; |
|
|
|
for (; i >= 0; i--) |
|
{ |
{ |
n0 = dividend_ptr[i]; |
mp_limb_t inv; |
udiv_qrnnd (dummy, r, r, n0, divisor_limb); |
invert_limb (inv, d); |
|
|
|
for (i = un - 2; i >= 0; i--) |
|
{ |
|
n0 = up[i] << GMP_NAIL_BITS; |
|
udiv_qrnnd_preinv (dummy, r, r, |
|
(n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)), |
|
d, inv); |
|
r >>= GMP_NAIL_BITS; |
|
n1 = n0; |
|
} |
|
udiv_qrnnd_preinv (dummy, r, r, n1 << norm, d, inv); |
|
r >>= GMP_NAIL_BITS; |
|
return r >> norm; |
} |
} |
return r; |
|
} |
} |
} |
} |