version 1.1.1.1, 2000/01/10 15:35:27 |
version 1.1.1.2, 2000/09/09 14:12:56 |
|
|
/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD. |
/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD. |
|
|
Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. |
Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, Inc. |
|
Contributed by Paul Zimmermann. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
The GNU MP Library is free software; you can redistribute it and/or modify |
The GNU MP Library is free software; you can redistribute it and/or modify |
it under the terms of the GNU Library General Public License as published by |
it under the terms of the GNU Lesser General Public License as published by |
the Free Software Foundation; either version 2 of the License, or (at your |
the Free Software Foundation; either version 2.1 of the License, or (at your |
option) any later version. |
option) any later version. |
|
|
The GNU MP Library is distributed in the hope that it will be useful, but |
The GNU MP Library is distributed in the hope that it will be useful, but |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
License for more details. |
License for more details. |
|
|
You should have received a copy of the GNU Library General Public License |
You should have received a copy of the GNU Lesser General Public License |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
MA 02111-1307, USA. */ |
MA 02111-1307, USA. */ |
Line 22 MA 02111-1307, USA. */ |
|
Line 23 MA 02111-1307, USA. */ |
|
#include "gmp.h" |
#include "gmp.h" |
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "longlong.h" |
#include "longlong.h" |
|
#ifdef BERKELEY_MP |
|
#include "mp.h" |
|
#endif |
|
|
|
|
|
/* set c <- (a*b)/R^n mod m c has to have at least (2n) allocated limbs */ |
|
static void |
|
#if __STDC__ |
|
mpz_redc (mpz_ptr c, mpz_srcptr a, mpz_srcptr b, mpz_srcptr m, mp_limb_t Nprim) |
|
#else |
|
mpz_redc (c, a, b, m, Nprim) |
|
mpz_ptr c; |
|
mpz_srcptr a; |
|
mpz_srcptr b; |
|
mpz_srcptr m; |
|
mp_limb_t Nprim; |
|
#endif |
|
{ |
|
mp_ptr cp, mp = PTR (m); |
|
mp_limb_t cy, cout = 0; |
|
mp_limb_t q; |
|
size_t j, n = ABSIZ (m); |
|
|
|
ASSERT (ALLOC (c) >= 2 * n); |
|
|
|
mpz_mul (c, a, b); |
|
cp = PTR (c); |
|
j = ABSIZ (c); |
|
MPN_ZERO (cp + j, 2 * n - j); |
|
for (j = 0; j < n; j++) |
|
{ |
|
q = cp[0] * Nprim; |
|
cy = mpn_addmul_1 (cp, mp, n, q); |
|
cout += mpn_add_1 (cp + n, cp + n, n - j, cy); |
|
cp++; |
|
} |
|
cp -= n; |
|
if (cout) |
|
{ |
|
cy = cout - mpn_sub_n (cp, cp + n, mp, n); |
|
while (cy) |
|
cy -= mpn_sub_n (cp, cp, mp, n); |
|
} |
|
else |
|
MPN_COPY (cp, cp + n, n); |
|
MPN_NORMALIZE (cp, n); |
|
SIZ (c) = SIZ (c) < 0 ? -n : n; |
|
} |
|
|
|
/* average number of calls to redc for an exponent of n bits |
|
with the sliding window algorithm of base 2^k: the optimal is |
|
obtained for the value of k which minimizes 2^(k-1)+n/(k+1): |
|
|
|
n\k 4 5 6 7 8 |
|
128 156* 159 171 200 261 |
|
256 309 307* 316 343 403 |
|
512 617 607* 610 632 688 |
|
1024 1231 1204 1195* 1207 1256 |
|
2048 2461 2399 2366 2360* 2396 |
|
4096 4918 4787 4707 4665* 4670 |
|
*/ |
|
|
#ifndef BERKELEY_MP |
#ifndef BERKELEY_MP |
void |
void |
#if __STDC__ |
#if __STDC__ |
mpz_powm (mpz_ptr res, mpz_srcptr base, mpz_srcptr exp, mpz_srcptr mod) |
mpz_powm (mpz_ptr res, mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod) |
#else |
#else |
mpz_powm (res, base, exp, mod) |
mpz_powm (res, base, e, mod) |
mpz_ptr res; |
mpz_ptr res; |
mpz_srcptr base; |
mpz_srcptr base; |
mpz_srcptr exp; |
mpz_srcptr e; |
mpz_srcptr mod; |
mpz_srcptr mod; |
#endif |
#endif |
#else /* BERKELEY_MP */ |
#else /* BERKELEY_MP */ |
void |
void |
#if __STDC__ |
#if __STDC__ |
pow (mpz_srcptr base, mpz_srcptr exp, mpz_srcptr mod, mpz_ptr res) |
pow (mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod, mpz_ptr res) |
#else |
#else |
pow (base, exp, mod, res) |
pow (base, e, mod, res) |
mpz_srcptr base; |
mpz_srcptr base; |
mpz_srcptr exp; |
mpz_srcptr e; |
mpz_srcptr mod; |
mpz_srcptr mod; |
mpz_ptr res; |
mpz_ptr res; |
#endif |
#endif |
#endif /* BERKELEY_MP */ |
#endif /* BERKELEY_MP */ |
{ |
{ |
mp_ptr rp, ep, mp, bp; |
mp_limb_t invm, *ep, c, mask; |
mp_size_t esize, msize, bsize, rsize; |
mpz_t xx, *g; |
mp_size_t size; |
mp_size_t n, i, K, j, l, k; |
int mod_shift_cnt; |
int sh; |
int negative_result; |
int use_redc; |
mp_limb_t *free_me = NULL; |
|
size_t free_me_size; |
|
TMP_DECL (marker); |
|
|
|
esize = ABS (exp->_mp_size); |
#ifdef POWM_DEBUG |
msize = ABS (mod->_mp_size); |
mpz_t exp; |
size = 2 * msize; |
mpz_init (exp); |
|
#endif |
|
|
rp = res->_mp_d; |
n = ABSIZ (mod); |
ep = exp->_mp_d; |
|
|
|
if (msize == 0) |
if (n == 0) |
msize = 1 / msize; /* provoke a signal */ |
DIVIDE_BY_ZERO; |
|
|
if (esize == 0) |
if (SIZ (e) == 0) |
{ |
{ |
/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 |
/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 |
depending on if MOD equals 1. */ |
depending on if MOD equals 1. */ |
rp[0] = 1; |
SIZ(res) = (ABSIZ (mod) == 1 && (PTR(mod))[0] == 1) ? 0 : 1; |
res->_mp_size = (msize == 1 && (mod->_mp_d)[0] == 1) ? 0 : 1; |
PTR(res)[0] = 1; |
return; |
return; |
} |
} |
|
|
TMP_MARK (marker); |
/* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. |
|
In REDC each modular multiplication costs about 2*n^2 limbs operations, |
|
whereas using usual reduction it costs 3*K(n), where K(n) is the cost of a |
|
multiplication using Karatsuba, and a division is assumed to cost 2*K(n), |
|
for example using Burnikel-Ziegler's algorithm. This gives a theoretical |
|
threshold of a*KARATSUBA_SQR_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~ |
|
2.66. */ |
|
/* For now, also disable REDC when MOD is even, as the inverse can't |
|
handle that. */ |
|
|
/* Normalize MOD (i.e. make its most significant bit set) as required by |
#ifndef POWM_THRESHOLD |
mpn_divmod. This will make the intermediate values in the calculation |
#define POWM_THRESHOLD ((8 * KARATSUBA_SQR_THRESHOLD) / 3) |
slightly larger, but the correct result is obtained after a final |
#endif |
reduction using the original MOD value. */ |
|
|
|
mp = (mp_ptr) TMP_ALLOC (msize * BYTES_PER_MP_LIMB); |
use_redc = (n < POWM_THRESHOLD && PTR(mod)[0] % 2 != 0); |
count_leading_zeros (mod_shift_cnt, mod->_mp_d[msize - 1]); |
if (use_redc) |
if (mod_shift_cnt != 0) |
{ |
mpn_lshift (mp, mod->_mp_d, msize, mod_shift_cnt); |
/* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ |
else |
modlimb_invert (invm, PTR(mod)[0]); |
MPN_COPY (mp, mod->_mp_d, msize); |
invm = -invm; |
|
} |
|
|
bsize = ABS (base->_mp_size); |
/* determines optimal value of k */ |
if (bsize > msize) |
l = ABSIZ (e) * BITS_PER_MP_LIMB; /* number of bits of exponent */ |
|
k = 1; |
|
K = 2; |
|
while (2 * l > K * (2 + k * (3 + k))) |
{ |
{ |
/* The base is larger than the module. Reduce it. */ |
k++; |
|
K *= 2; |
|
} |
|
|
/* Allocate (BSIZE + 1) with space for remainder and quotient. |
g = (mpz_t *) (*_mp_allocate_func) (K / 2 * sizeof (mpz_t)); |
(The quotient is (bsize - msize + 1) limbs.) */ |
/* compute x*R^n where R=2^BITS_PER_MP_LIMB */ |
bp = (mp_ptr) TMP_ALLOC ((bsize + 1) * BYTES_PER_MP_LIMB); |
mpz_init (g[0]); |
MPN_COPY (bp, base->_mp_d, bsize); |
if (use_redc) |
/* We don't care about the quotient, store it above the remainder, |
{ |
at BP + MSIZE. */ |
mpz_mul_2exp (g[0], base, n * BITS_PER_MP_LIMB); |
mpn_divmod (bp + msize, bp, bsize, mp, msize); |
mpz_mod (g[0], g[0], mod); |
bsize = msize; |
|
/* Canonicalize the base, since we are going to multiply with it |
|
quite a few times. */ |
|
MPN_NORMALIZE (bp, bsize); |
|
} |
} |
else |
else |
bp = base->_mp_d; |
mpz_mod (g[0], base, mod); |
|
|
if (bsize == 0) |
/* compute xx^g for odd g < 2^k */ |
|
mpz_init (xx); |
|
if (use_redc) |
{ |
{ |
res->_mp_size = 0; |
_mpz_realloc (xx, 2 * n); |
TMP_FREE (marker); |
mpz_redc (xx, g[0], g[0], mod, invm); /* xx = x^2*R^n */ |
return; |
|
} |
} |
|
else |
if (res->_mp_alloc < size) |
|
{ |
{ |
/* We have to allocate more space for RES. If any of the input |
mpz_mul (xx, g[0], g[0]); |
parameters are identical to RES, defer deallocation of the old |
mpz_mod (xx, xx, mod); |
space. */ |
} |
|
for (i = 1; i < K / 2; i++) |
if (rp == ep || rp == mp || rp == bp) |
{ |
|
mpz_init (g[i]); |
|
if (use_redc) |
{ |
{ |
free_me = rp; |
_mpz_realloc (g[i], 2 * n); |
free_me_size = res->_mp_alloc; |
mpz_redc (g[i], g[i - 1], xx, mod, invm); /* g[i] = x^(2i+1)*R^n */ |
} |
} |
else |
else |
(*_mp_free_func) (rp, res->_mp_alloc * BYTES_PER_MP_LIMB); |
{ |
|
mpz_mul (g[i], g[i - 1], xx); |
|
mpz_mod (g[i], g[i], mod); |
|
} |
|
} |
|
|
rp = (mp_ptr) (*_mp_allocate_func) (size * BYTES_PER_MP_LIMB); |
/* now starts the real stuff */ |
res->_mp_alloc = size; |
mask = (mp_limb_t) ((1<<k) - 1); |
res->_mp_d = rp; |
ep = PTR (e); |
|
i = ABSIZ (e) - 1; /* current index */ |
|
c = ep[i]; /* current limb */ |
|
count_leading_zeros (sh, c); |
|
sh = BITS_PER_MP_LIMB - sh; /* significant bits in ep[i] */ |
|
sh -= k; /* index of lower bit of ep[i] to take into account */ |
|
if (sh < 0) |
|
{ /* k-sh extra bits are needed */ |
|
if (i > 0) |
|
{ |
|
i--; |
|
c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh)); |
|
sh += BITS_PER_MP_LIMB; |
|
} |
} |
} |
else |
else |
|
c = c >> sh; |
|
#ifdef POWM_DEBUG |
|
printf ("-1/m mod 2^%u = %lu\n", BITS_PER_MP_LIMB, invm); |
|
mpz_set_ui (exp, c); |
|
#endif |
|
j=0; |
|
while (c % 2 == 0) |
{ |
{ |
/* Make BASE, EXP and MOD not overlap with RES. */ |
j++; |
if (rp == bp) |
c = (c >> 1); |
|
} |
|
mpz_set (xx, g[c >> 1]); |
|
while (j--) |
|
{ |
|
if (use_redc) |
|
mpz_redc (xx, xx, xx, mod, invm); |
|
else |
{ |
{ |
/* RES and BASE are identical. Allocate temp. space for BASE. */ |
mpz_mul (xx, xx, xx); |
bp = (mp_ptr) TMP_ALLOC (bsize * BYTES_PER_MP_LIMB); |
mpz_mod (xx, xx, mod); |
MPN_COPY (bp, rp, bsize); |
|
} |
} |
if (rp == ep) |
} |
|
|
|
#ifdef POWM_DEBUG |
|
printf ("x^"); mpz_out_str (0, 10, exp); |
|
printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx); |
|
putchar ('\n'); |
|
#endif |
|
|
|
while (i > 0 || sh > 0) |
|
{ |
|
c = ep[i]; |
|
sh -= k; |
|
l = k; /* number of bits treated */ |
|
if (sh < 0) |
{ |
{ |
/* RES and EXP are identical. Allocate temp. space for EXP. */ |
if (i > 0) |
ep = (mp_ptr) TMP_ALLOC (esize * BYTES_PER_MP_LIMB); |
{ |
MPN_COPY (ep, rp, esize); |
i--; |
|
c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh)); |
|
sh += BITS_PER_MP_LIMB; |
|
} |
|
else |
|
{ |
|
l += sh; /* may be less bits than k here */ |
|
c = c & ((1<<l) - 1); |
|
} |
} |
} |
if (rp == mp) |
else |
|
c = c >> sh; |
|
c = c & mask; |
|
|
|
/* this while loop implements the sliding window improvement */ |
|
while ((c & (1 << (k - 1))) == 0 && (i > 0 || sh > 0)) |
{ |
{ |
/* RES and MOD are identical. Allocate temporary space for MOD. */ |
if (use_redc) mpz_redc (xx, xx, xx, mod, invm); |
mp = (mp_ptr) TMP_ALLOC (msize * BYTES_PER_MP_LIMB); |
else |
MPN_COPY (mp, rp, msize); |
{ |
|
mpz_mul (xx, xx, xx); |
|
mpz_mod (xx, xx, mod); |
|
} |
|
if (sh) |
|
{ |
|
sh--; |
|
c = (c<<1) + ((ep[i]>>sh) & 1); |
|
} |
|
else |
|
{ |
|
i--; |
|
sh = BITS_PER_MP_LIMB - 1; |
|
c = (c<<1) + (ep[i]>>sh); |
|
} |
} |
} |
} |
|
|
|
MPN_COPY (rp, bp, bsize); |
#ifdef POWM_DEBUG |
rsize = bsize; |
printf ("l=%u c=%lu\n", l, c); |
|
mpz_mul_2exp (exp, exp, k); |
|
mpz_add_ui (exp, exp, c); |
|
#endif |
|
|
{ |
/* now replace xx by xx^(2^k)*x^c */ |
mp_size_t i; |
if (c != 0) |
mp_ptr xp = (mp_ptr) TMP_ALLOC (2 * (msize + 1) * BYTES_PER_MP_LIMB); |
{ |
int c; |
j = 0; |
mp_limb_t e; |
while (c % 2 == 0) |
mp_limb_t carry_limb; |
{ |
|
j++; |
negative_result = (ep[0] & 1) && base->_mp_size < 0; |
c = c >> 1; |
|
} |
i = esize - 1; |
/* c0 = c * 2^j, i.e. xx^(2^k)*x^c = (A^(2^(k - j))*c)^(2^j) */ |
e = ep[i]; |
l -= j; |
count_leading_zeros (c, e); |
while (l--) |
e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ |
if (use_redc) mpz_redc (xx, xx, xx, mod, invm); |
c = BITS_PER_MP_LIMB - 1 - c; |
else |
|
|
/* Main loop. |
|
|
|
Make the result be pointed to alternately by XP and RP. This |
|
helps us avoid block copying, which would otherwise be necessary |
|
with the overlap restrictions of mpn_divmod. With 50% probability |
|
the result after this loop will be in the area originally pointed |
|
by RP (==RES->_mp_d), and with 50% probability in the area originally |
|
pointed to by XP. */ |
|
|
|
for (;;) |
|
{ |
|
while (c != 0) |
|
{ |
|
mp_ptr tp; |
|
mp_size_t xsize; |
|
|
|
mpn_mul_n (xp, rp, rp, rsize); |
|
xsize = 2 * rsize; |
|
if (xsize > msize) |
|
{ |
{ |
mpn_divmod (xp + msize, xp, xsize, mp, msize); |
mpz_mul (xx, xx, xx); |
xsize = msize; |
mpz_mod (xx, xx, mod); |
} |
} |
|
if (use_redc) |
|
mpz_redc (xx, xx, g[c >> 1], mod, invm); |
|
else |
|
{ |
|
mpz_mul (xx, xx, g[c >> 1]); |
|
mpz_mod (xx, xx, mod); |
|
} |
|
} |
|
else |
|
j = l; /* case c=0 */ |
|
while (j--) |
|
{ |
|
if (use_redc) |
|
mpz_redc (xx, xx, xx, mod, invm); |
|
else |
|
{ |
|
mpz_mul (xx, xx, xx); |
|
mpz_mod (xx, xx, mod); |
|
} |
|
} |
|
#ifdef POWM_DEBUG |
|
printf ("x^"); mpz_out_str (0, 10, exp); |
|
printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx); |
|
putchar ('\n'); |
|
#endif |
|
} |
|
|
tp = rp; rp = xp; xp = tp; |
/* now convert back xx to xx/R^n */ |
rsize = xsize; |
if (use_redc) |
|
|
if ((mp_limb_signed_t) e < 0) |
|
{ |
|
mpn_mul (xp, rp, rsize, bp, bsize); |
|
xsize = rsize + bsize; |
|
if (xsize > msize) |
|
{ |
|
mpn_divmod (xp + msize, xp, xsize, mp, msize); |
|
xsize = msize; |
|
} |
|
|
|
tp = rp; rp = xp; xp = tp; |
|
rsize = xsize; |
|
} |
|
e <<= 1; |
|
c--; |
|
} |
|
|
|
i--; |
|
if (i < 0) |
|
break; |
|
e = ep[i]; |
|
c = BITS_PER_MP_LIMB; |
|
} |
|
|
|
/* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT |
|
steps. Adjust the result by reducing it with the original MOD. |
|
|
|
Also make sure the result is put in RES->_mp_d (where it already |
|
might be, see above). */ |
|
|
|
if (mod_shift_cnt != 0) |
|
{ |
|
carry_limb = mpn_lshift (res->_mp_d, rp, rsize, mod_shift_cnt); |
|
rp = res->_mp_d; |
|
if (carry_limb != 0) |
|
{ |
|
rp[rsize] = carry_limb; |
|
rsize++; |
|
} |
|
} |
|
else |
|
{ |
|
MPN_COPY (res->_mp_d, rp, rsize); |
|
rp = res->_mp_d; |
|
} |
|
|
|
if (rsize >= msize) |
|
{ |
|
mpn_divmod (rp + msize, rp, rsize, mp, msize); |
|
rsize = msize; |
|
} |
|
|
|
/* Remove any leading zero words from the result. */ |
|
if (mod_shift_cnt != 0) |
|
mpn_rshift (rp, rp, rsize, mod_shift_cnt); |
|
MPN_NORMALIZE (rp, rsize); |
|
} |
|
|
|
if (negative_result && rsize != 0) |
|
{ |
{ |
if (mod_shift_cnt != 0) |
mpz_set_ui (g[0], 1); |
mpn_rshift (mp, mp, msize, mod_shift_cnt); |
mpz_redc (xx, xx, g[0], mod, invm); |
mpn_sub (rp, mp, msize, rp, rsize); |
if (mpz_cmp (xx, mod) >= 0) |
rsize = msize; |
mpz_sub (xx, xx, mod); |
MPN_NORMALIZE (rp, rsize); |
|
} |
} |
res->_mp_size = rsize; |
mpz_set (res, xx); |
|
|
if (free_me != NULL) |
mpz_clear (xx); |
(*_mp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB); |
for (i = 0; i < K / 2; i++) |
TMP_FREE (marker); |
mpz_clear (g[i]); |
|
(*_mp_free_func) (g, K / 2 * sizeof (mpz_t)); |
} |
} |