version 1.1.1.1, 2000/01/10 15:35:23 |
version 1.1.1.3, 2003/08/25 16:06:20 |
|
|
Return the single-limb remainder. |
Return the single-limb remainder. |
There are no constraints on the value of the divisor. |
There are no constraints on the value of the divisor. |
|
|
Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc. |
Copyright 1991, 1993, 1994, 1999, 2000, 2002 Free Software Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
The GNU MP Library is free software; you can redistribute it and/or modify |
The GNU MP Library is free software; you can redistribute it and/or modify |
it under the terms of the GNU Library General Public License as published by |
it under the terms of the GNU Lesser General Public License as published by |
the Free Software Foundation; either version 2 of the License, or (at your |
the Free Software Foundation; either version 2.1 of the License, or (at your |
option) any later version. |
option) any later version. |
|
|
The GNU MP Library is distributed in the hope that it will be useful, but |
The GNU MP Library is distributed in the hope that it will be useful, but |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
License for more details. |
License for more details. |
|
|
You should have received a copy of the GNU Library General Public License |
You should have received a copy of the GNU Lesser General Public License |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
MA 02111-1307, USA. */ |
MA 02111-1307, USA. */ |
Line 26 MA 02111-1307, USA. */ |
|
Line 26 MA 02111-1307, USA. */ |
|
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "longlong.h" |
#include "longlong.h" |
|
|
#ifndef UMUL_TIME |
|
#define UMUL_TIME 1 |
|
#endif |
|
|
|
#ifndef UDIV_TIME |
/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, |
#define UDIV_TIME UMUL_TIME |
meaning the quotient size where that should happen, the quotient size |
#endif |
being how many udiv divisions will be done. |
|
|
/* FIXME: We should be using invert_limb (or invert_normalized_limb) |
The default is to use preinv always, CPUs where this doesn't suit have |
here (not udiv_qrnnd). */ |
tuned thresholds. Note in particular that preinv should certainly be |
|
used if that's the only division available (USE_PREINV_ALWAYS). */ |
|
|
mp_limb_t |
#ifndef MOD_1_NORM_THRESHOLD |
#if __STDC__ |
#define MOD_1_NORM_THRESHOLD 0 |
mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size, |
|
mp_limb_t divisor_limb) |
|
#else |
|
mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb) |
|
mp_srcptr dividend_ptr; |
|
mp_size_t dividend_size; |
|
mp_limb_t divisor_limb; |
|
#endif |
#endif |
{ |
#ifndef MOD_1_UNNORM_THRESHOLD |
mp_size_t i; |
#define MOD_1_UNNORM_THRESHOLD 0 |
mp_limb_t n1, n0, r; |
#endif |
int dummy; |
|
|
|
/* Botch: Should this be handled at all? Rely on callers? */ |
|
if (dividend_size == 0) |
|
return 0; |
|
|
|
/* If multiplication is much faster than division, and the |
/* The comments in mpn/generic/divrem_1.c apply here too. |
dividend is large, pre-invert the divisor, and use |
|
only multiplications in the inner loop. */ |
|
|
|
/* This test should be read: |
As noted in the algorithms section of the manual, the shifts in the loop |
Does it ever help to use udiv_qrnnd_preinv? |
for the unnorm case can be avoided by calculating r = a%(d*2^n), followed |
&& Does what we save compensate for the inversion overhead? */ |
by a final (r*2^n)%(d*2^n). In fact if it happens that a%(d*2^n) can |
if (UDIV_TIME > (2 * UMUL_TIME + 6) |
skip a division where (a*2^n)%(d*2^n) can't then there's the same number |
&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) |
of divide steps, though how often that happens depends on the assumed |
{ |
distributions of dividend and divisor. In any case this idea is left to |
int normalization_steps; |
CPU specific implementations to consider. */ |
|
|
count_leading_zeros (normalization_steps, divisor_limb); |
mp_limb_t |
if (normalization_steps != 0) |
mpn_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d) |
{ |
{ |
mp_limb_t divisor_limb_inverted; |
mp_size_t i; |
|
mp_limb_t n1, n0, r; |
|
mp_limb_t dummy; |
|
|
divisor_limb <<= normalization_steps; |
ASSERT (un >= 0); |
|
ASSERT (d != 0); |
|
|
/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The |
/* Botch: Should this be handled at all? Rely on callers? |
result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the |
But note un==0 is currently required by mpz/fdiv_r_ui.c and possibly |
most significant bit (with weight 2**N) implicit. */ |
other places. */ |
|
if (un == 0) |
|
return 0; |
|
|
/* Special case for DIVISOR_LIMB == 100...000. */ |
d <<= GMP_NAIL_BITS; |
if (divisor_limb << 1 == 0) |
|
divisor_limb_inverted = ~(mp_limb_t) 0; |
|
else |
|
udiv_qrnnd (divisor_limb_inverted, dummy, |
|
-divisor_limb, 0, divisor_limb); |
|
|
|
n1 = dividend_ptr[dividend_size - 1]; |
if ((d & GMP_LIMB_HIGHBIT) != 0) |
r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); |
{ |
|
/* High limb is initial remainder, possibly with one subtract of |
|
d to get r<d. */ |
|
r = up[un - 1] << GMP_NAIL_BITS; |
|
if (r >= d) |
|
r -= d; |
|
r >>= GMP_NAIL_BITS; |
|
un--; |
|
if (un == 0) |
|
return r; |
|
|
/* Possible optimization: |
if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD)) |
if (r == 0 |
{ |
&& divisor_limb > ((n1 << normalization_steps) |
plain: |
| (dividend_ptr[dividend_size - 2] >> ...))) |
for (i = un - 1; i >= 0; i--) |
...one division less... */ |
|
|
|
for (i = dividend_size - 2; i >= 0; i--) |
|
{ |
{ |
n0 = dividend_ptr[i]; |
n0 = up[i] << GMP_NAIL_BITS; |
udiv_qrnnd_preinv (dummy, r, r, |
udiv_qrnnd (dummy, r, r, n0, d); |
((n1 << normalization_steps) |
r >>= GMP_NAIL_BITS; |
| (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), |
|
divisor_limb, divisor_limb_inverted); |
|
n1 = n0; |
|
} |
} |
udiv_qrnnd_preinv (dummy, r, r, |
return r; |
n1 << normalization_steps, |
|
divisor_limb, divisor_limb_inverted); |
|
return r >> normalization_steps; |
|
} |
} |
else |
else |
{ |
{ |
mp_limb_t divisor_limb_inverted; |
mp_limb_t inv; |
|
invert_limb (inv, d); |
/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The |
for (i = un - 1; i >= 0; i--) |
result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the |
|
most significant bit (with weight 2**N) implicit. */ |
|
|
|
/* Special case for DIVISOR_LIMB == 100...000. */ |
|
if (divisor_limb << 1 == 0) |
|
divisor_limb_inverted = ~(mp_limb_t) 0; |
|
else |
|
udiv_qrnnd (divisor_limb_inverted, dummy, |
|
-divisor_limb, 0, divisor_limb); |
|
|
|
i = dividend_size - 1; |
|
r = dividend_ptr[i]; |
|
|
|
if (r >= divisor_limb) |
|
r = 0; |
|
else |
|
i--; |
|
|
|
for (; i >= 0; i--) |
|
{ |
{ |
n0 = dividend_ptr[i]; |
n0 = up[i] << GMP_NAIL_BITS; |
udiv_qrnnd_preinv (dummy, r, r, |
udiv_qrnnd_preinv (dummy, r, r, n0, d, inv); |
n0, divisor_limb, divisor_limb_inverted); |
r >>= GMP_NAIL_BITS; |
} |
} |
return r; |
return r; |
} |
} |
} |
} |
else |
else |
{ |
{ |
if (UDIV_NEEDS_NORMALIZATION) |
int norm; |
|
|
|
/* Skip a division if high < divisor. Having the test here before |
|
normalizing will still skip as often as possible. */ |
|
r = up[un - 1] << GMP_NAIL_BITS; |
|
if (r < d) |
{ |
{ |
int normalization_steps; |
r >>= GMP_NAIL_BITS; |
|
un--; |
|
if (un == 0) |
|
return r; |
|
} |
|
else |
|
r = 0; |
|
|
count_leading_zeros (normalization_steps, divisor_limb); |
/* If udiv_qrnnd doesn't need a normalized divisor, can use the simple |
if (normalization_steps != 0) |
code above. */ |
{ |
if (! UDIV_NEEDS_NORMALIZATION |
divisor_limb <<= normalization_steps; |
&& BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) |
|
goto plain; |
|
|
n1 = dividend_ptr[dividend_size - 1]; |
count_leading_zeros (norm, d); |
r = n1 >> (BITS_PER_MP_LIMB - normalization_steps); |
d <<= norm; |
|
|
/* Possible optimization: |
n1 = up[un - 1] << GMP_NAIL_BITS; |
if (r == 0 |
r = (r << norm) | (n1 >> (GMP_LIMB_BITS - norm)); |
&& divisor_limb > ((n1 << normalization_steps) |
|
| (dividend_ptr[dividend_size - 2] >> ...))) |
|
...one division less... */ |
|
|
|
for (i = dividend_size - 2; i >= 0; i--) |
if (UDIV_NEEDS_NORMALIZATION |
{ |
&& BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD)) |
n0 = dividend_ptr[i]; |
{ |
udiv_qrnnd (dummy, r, r, |
for (i = un - 2; i >= 0; i--) |
((n1 << normalization_steps) |
{ |
| (n0 >> (BITS_PER_MP_LIMB - normalization_steps))), |
n0 = up[i] << GMP_NAIL_BITS; |
divisor_limb); |
|
n1 = n0; |
|
} |
|
udiv_qrnnd (dummy, r, r, |
udiv_qrnnd (dummy, r, r, |
n1 << normalization_steps, |
(n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)), |
divisor_limb); |
d); |
return r >> normalization_steps; |
r >>= GMP_NAIL_BITS; |
|
n1 = n0; |
} |
} |
|
udiv_qrnnd (dummy, r, r, n1 << norm, d); |
|
r >>= GMP_NAIL_BITS; |
|
return r >> norm; |
} |
} |
/* No normalization needed, either because udiv_qrnnd doesn't require |
|
it, or because DIVISOR_LIMB is already normalized. */ |
|
|
|
i = dividend_size - 1; |
|
r = dividend_ptr[i]; |
|
|
|
if (r >= divisor_limb) |
|
r = 0; |
|
else |
else |
i--; |
|
|
|
for (; i >= 0; i--) |
|
{ |
{ |
n0 = dividend_ptr[i]; |
mp_limb_t inv; |
udiv_qrnnd (dummy, r, r, n0, divisor_limb); |
invert_limb (inv, d); |
|
|
|
for (i = un - 2; i >= 0; i--) |
|
{ |
|
n0 = up[i] << GMP_NAIL_BITS; |
|
udiv_qrnnd_preinv (dummy, r, r, |
|
(n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)), |
|
d, inv); |
|
r >>= GMP_NAIL_BITS; |
|
n1 = n0; |
|
} |
|
udiv_qrnnd_preinv (dummy, r, r, n1 << norm, d, inv); |
|
r >>= GMP_NAIL_BITS; |
|
return r >> norm; |
} |
} |
return r; |
|
} |
} |
} |
} |