version 1.1.1.2, 2000/09/09 14:12:56 |
version 1.1.1.3, 2003/08/25 16:06:33 |
|
|
/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD. |
/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD. |
|
|
Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, Inc. |
Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software |
Contributed by Paul Zimmermann. |
Foundation, Inc. Contributed by Paul Zimmermann. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
Line 20 along with the GNU MP Library; see the file COPYING.LI |
|
Line 20 along with the GNU MP Library; see the file COPYING.LI |
|
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
MA 02111-1307, USA. */ |
MA 02111-1307, USA. */ |
|
|
|
|
#include "gmp.h" |
#include "gmp.h" |
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "longlong.h" |
#include "longlong.h" |
Line 28 MA 02111-1307, USA. */ |
|
Line 29 MA 02111-1307, USA. */ |
|
#endif |
#endif |
|
|
|
|
/* set c <- (a*b)/R^n mod m c has to have at least (2n) allocated limbs */ |
/* Set c <- tp/R^n mod m. |
static void |
tp should have space for 2*n+1 limbs; clobber its most significant limb. */ |
#if __STDC__ |
#if ! WANT_REDC_GLOBAL |
mpz_redc (mpz_ptr c, mpz_srcptr a, mpz_srcptr b, mpz_srcptr m, mp_limb_t Nprim) |
static |
#else |
|
mpz_redc (c, a, b, m, Nprim) |
|
mpz_ptr c; |
|
mpz_srcptr a; |
|
mpz_srcptr b; |
|
mpz_srcptr m; |
|
mp_limb_t Nprim; |
|
#endif |
#endif |
|
void |
|
redc (mp_ptr cp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim, mp_ptr tp) |
{ |
{ |
mp_ptr cp, mp = PTR (m); |
mp_limb_t cy; |
mp_limb_t cy, cout = 0; |
|
mp_limb_t q; |
mp_limb_t q; |
size_t j, n = ABSIZ (m); |
mp_size_t j; |
|
|
ASSERT (ALLOC (c) >= 2 * n); |
tp[2 * n] = 0; /* carry guard */ |
|
|
mpz_mul (c, a, b); |
|
cp = PTR (c); |
|
j = ABSIZ (c); |
|
MPN_ZERO (cp + j, 2 * n - j); |
|
for (j = 0; j < n; j++) |
for (j = 0; j < n; j++) |
{ |
{ |
q = cp[0] * Nprim; |
q = tp[0] * Nprim; |
cy = mpn_addmul_1 (cp, mp, n, q); |
cy = mpn_addmul_1 (tp, mp, n, q); |
cout += mpn_add_1 (cp + n, cp + n, n - j, cy); |
mpn_incr_u (tp + n, cy); |
cp++; |
tp++; |
} |
} |
cp -= n; |
|
if (cout) |
if (tp[n] != 0) |
|
mpn_sub_n (cp, tp, mp, n); |
|
else |
|
MPN_COPY (cp, tp, n); |
|
} |
|
|
|
/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and |
|
t is defined by (tp,mn). */ |
|
static void |
|
reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn) |
|
{ |
|
mp_ptr qp; |
|
TMP_DECL (marker); |
|
|
|
TMP_MARK (marker); |
|
qp = TMP_ALLOC_LIMBS (an - mn + 1); |
|
|
|
mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn); |
|
|
|
TMP_FREE (marker); |
|
} |
|
|
|
#if REDUCE_EXPONENT |
|
/* Return the group order of the ring mod m. */ |
|
static mp_limb_t |
|
phi (mp_limb_t t) |
|
{ |
|
mp_limb_t d, m, go; |
|
|
|
go = 1; |
|
|
|
if (t % 2 == 0) |
{ |
{ |
cy = cout - mpn_sub_n (cp, cp + n, mp, n); |
t = t / 2; |
while (cy) |
while (t % 2 == 0) |
cy -= mpn_sub_n (cp, cp, mp, n); |
{ |
|
go *= 2; |
|
t = t / 2; |
|
} |
} |
} |
else |
for (d = 3;; d += 2) |
MPN_COPY (cp, cp + n, n); |
{ |
MPN_NORMALIZE (cp, n); |
m = d - 1; |
SIZ (c) = SIZ (c) < 0 ? -n : n; |
for (;;) |
|
{ |
|
unsigned long int q = t / d; |
|
if (q < d) |
|
{ |
|
if (t <= 1) |
|
return go; |
|
if (t == d) |
|
return go * m; |
|
return go * (t - 1); |
|
} |
|
if (t != q * d) |
|
break; |
|
go *= m; |
|
m = d; |
|
t = q; |
|
} |
|
} |
} |
} |
|
#endif |
|
|
/* average number of calls to redc for an exponent of n bits |
/* average number of calls to redc for an exponent of n bits |
with the sliding window algorithm of base 2^k: the optimal is |
with the sliding window algorithm of base 2^k: the optimal is |
Line 85 mpz_redc (c, a, b, m, Nprim) |
|
Line 128 mpz_redc (c, a, b, m, Nprim) |
|
4096 4918 4787 4707 4665* 4670 |
4096 4918 4787 4707 4665* 4670 |
*/ |
*/ |
|
|
#ifndef BERKELEY_MP |
|
void |
/* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. In REDC |
#if __STDC__ |
each modular multiplication costs about 2*n^2 limbs operations, whereas |
mpz_powm (mpz_ptr res, mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod) |
using usual reduction it costs 3*K(n), where K(n) is the cost of a |
#else |
multiplication using Karatsuba, and a division is assumed to cost 2*K(n), |
mpz_powm (res, base, e, mod) |
for example using Burnikel-Ziegler's algorithm. This gives a theoretical |
mpz_ptr res; |
threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~ |
mpz_srcptr base; |
2.66. */ |
mpz_srcptr e; |
/* For now, also disable REDC when MOD is even, as the inverse can't handle |
mpz_srcptr mod; |
that. At some point, we might want to make the code faster for that case, |
|
perhaps using CRR. */ |
|
|
|
#ifndef POWM_THRESHOLD |
|
#define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3) |
#endif |
#endif |
#else /* BERKELEY_MP */ |
|
|
#define HANDLE_NEGATIVE_EXPONENT 1 |
|
#undef REDUCE_EXPONENT |
|
|
void |
void |
#if __STDC__ |
#ifndef BERKELEY_MP |
pow (mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod, mpz_ptr res) |
mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m) |
#else |
#else /* BERKELEY_MP */ |
pow (base, e, mod, res) |
pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r) |
mpz_srcptr base; |
|
mpz_srcptr e; |
|
mpz_srcptr mod; |
|
mpz_ptr res; |
|
#endif |
|
#endif /* BERKELEY_MP */ |
#endif /* BERKELEY_MP */ |
{ |
{ |
mp_limb_t invm, *ep, c, mask; |
mp_ptr xp, tp, qp, gp, this_gp; |
mpz_t xx, *g; |
mp_srcptr bp, ep, mp; |
mp_size_t n, i, K, j, l, k; |
mp_size_t bn, es, en, mn, xn; |
|
mp_limb_t invm, c; |
|
unsigned long int enb; |
|
mp_size_t i, K, j, l, k; |
|
int m_zero_cnt, e_zero_cnt; |
int sh; |
int sh; |
int use_redc; |
int use_redc; |
|
#if HANDLE_NEGATIVE_EXPONENT |
#ifdef POWM_DEBUG |
mpz_t new_b; |
mpz_t exp; |
|
mpz_init (exp); |
|
#endif |
#endif |
|
#if REDUCE_EXPONENT |
|
mpz_t new_e; |
|
#endif |
|
TMP_DECL (marker); |
|
|
n = ABSIZ (mod); |
mp = PTR(m); |
|
mn = ABSIZ (m); |
if (n == 0) |
if (mn == 0) |
DIVIDE_BY_ZERO; |
DIVIDE_BY_ZERO; |
|
|
if (SIZ (e) == 0) |
TMP_MARK (marker); |
|
|
|
es = SIZ (e); |
|
if (es <= 0) |
{ |
{ |
/* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 |
if (es == 0) |
depending on if MOD equals 1. */ |
{ |
SIZ(res) = (ABSIZ (mod) == 1 && (PTR(mod))[0] == 1) ? 0 : 1; |
/* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if |
PTR(res)[0] = 1; |
m equals 1. */ |
return; |
SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; |
|
PTR(r)[0] = 1; |
|
TMP_FREE (marker); /* we haven't really allocated anything here */ |
|
return; |
|
} |
|
#if HANDLE_NEGATIVE_EXPONENT |
|
MPZ_TMP_INIT (new_b, mn + 1); |
|
|
|
if (! mpz_invert (new_b, b, m)) |
|
DIVIDE_BY_ZERO; |
|
b = new_b; |
|
es = -es; |
|
#else |
|
DIVIDE_BY_ZERO; |
|
#endif |
} |
} |
|
en = es; |
|
|
/* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. |
#if REDUCE_EXPONENT |
In REDC each modular multiplication costs about 2*n^2 limbs operations, |
/* Reduce exponent by dividing it by phi(m) when m small. */ |
whereas using usual reduction it costs 3*K(n), where K(n) is the cost of a |
if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150) |
multiplication using Karatsuba, and a division is assumed to cost 2*K(n), |
{ |
for example using Burnikel-Ziegler's algorithm. This gives a theoretical |
MPZ_TMP_INIT (new_e, 2); |
threshold of a*KARATSUBA_SQR_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~ |
mpz_mod_ui (new_e, e, phi (mp[0])); |
2.66. */ |
e = new_e; |
/* For now, also disable REDC when MOD is even, as the inverse can't |
} |
handle that. */ |
|
|
|
#ifndef POWM_THRESHOLD |
|
#define POWM_THRESHOLD ((8 * KARATSUBA_SQR_THRESHOLD) / 3) |
|
#endif |
#endif |
|
|
use_redc = (n < POWM_THRESHOLD && PTR(mod)[0] % 2 != 0); |
use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0; |
if (use_redc) |
if (use_redc) |
{ |
{ |
/* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ |
/* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ |
modlimb_invert (invm, PTR(mod)[0]); |
modlimb_invert (invm, mp[0]); |
invm = -invm; |
invm = -invm; |
} |
} |
|
else |
|
{ |
|
/* Normalize m (i.e. make its most significant bit set) as required by |
|
division functions below. */ |
|
count_leading_zeros (m_zero_cnt, mp[mn - 1]); |
|
m_zero_cnt -= GMP_NAIL_BITS; |
|
if (m_zero_cnt != 0) |
|
{ |
|
mp_ptr new_mp; |
|
new_mp = TMP_ALLOC_LIMBS (mn); |
|
mpn_lshift (new_mp, mp, mn, m_zero_cnt); |
|
mp = new_mp; |
|
} |
|
} |
|
|
/* determines optimal value of k */ |
/* Determine optimal value of k, the number of exponent bits we look at |
l = ABSIZ (e) * BITS_PER_MP_LIMB; /* number of bits of exponent */ |
at a time. */ |
|
count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]); |
|
e_zero_cnt -= GMP_NAIL_BITS; |
|
enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */ |
k = 1; |
k = 1; |
K = 2; |
K = 2; |
while (2 * l > K * (2 + k * (3 + k))) |
while (2 * enb > K * (2 + k * (3 + k))) |
{ |
{ |
k++; |
k++; |
K *= 2; |
K *= 2; |
} |
} |
|
|
g = (mpz_t *) (*_mp_allocate_func) (K / 2 * sizeof (mpz_t)); |
tp = TMP_ALLOC_LIMBS (2 * mn + 1); |
/* compute x*R^n where R=2^BITS_PER_MP_LIMB */ |
qp = TMP_ALLOC_LIMBS (mn + 1); |
mpz_init (g[0]); |
|
if (use_redc) |
|
{ |
|
mpz_mul_2exp (g[0], base, n * BITS_PER_MP_LIMB); |
|
mpz_mod (g[0], g[0], mod); |
|
} |
|
else |
|
mpz_mod (g[0], base, mod); |
|
|
|
/* compute xx^g for odd g < 2^k */ |
gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn); |
mpz_init (xx); |
|
if (use_redc) |
/* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */ |
|
bn = ABSIZ (b); |
|
bp = PTR(b); |
|
/* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary |
|
for speed or correctness to do this when b and m have the same number of |
|
limbs, perhaps remove mpn_cmp call. */ |
|
if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0)) |
{ |
{ |
_mpz_realloc (xx, 2 * n); |
/* Reduce possibly huge base while moving it to gp[0]. Use a function |
mpz_redc (xx, g[0], g[0], mod, invm); /* xx = x^2*R^n */ |
call to reduce, since we don't want the quotient allocation to |
|
live until function return. */ |
|
if (use_redc) |
|
{ |
|
reduce (tp + mn, bp, bn, mp, mn); /* b mod m */ |
|
MPN_ZERO (tp, mn); |
|
mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */ |
|
} |
|
else |
|
{ |
|
reduce (gp, bp, bn, mp, mn); |
|
} |
} |
} |
else |
else |
{ |
{ |
mpz_mul (xx, g[0], g[0]); |
/* |b| < m. We pad out operands to become mn limbs, which simplifies |
mpz_mod (xx, xx, mod); |
the rest of the function, but slows things down when the |b| << m. */ |
} |
|
for (i = 1; i < K / 2; i++) |
|
{ |
|
mpz_init (g[i]); |
|
if (use_redc) |
if (use_redc) |
{ |
{ |
_mpz_realloc (g[i], 2 * n); |
MPN_ZERO (tp, mn); |
mpz_redc (g[i], g[i - 1], xx, mod, invm); /* g[i] = x^(2i+1)*R^n */ |
MPN_COPY (tp + mn, bp, bn); |
|
MPN_ZERO (tp + mn + bn, mn - bn); |
|
mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); |
} |
} |
else |
else |
{ |
{ |
mpz_mul (g[i], g[i - 1], xx); |
MPN_COPY (gp, bp, bn); |
mpz_mod (g[i], g[i], mod); |
MPN_ZERO (gp + bn, mn - bn); |
} |
} |
} |
} |
|
|
/* now starts the real stuff */ |
/* Compute xx^i for odd g < 2^i. */ |
mask = (mp_limb_t) ((1<<k) - 1); |
|
|
xp = TMP_ALLOC_LIMBS (mn); |
|
mpn_sqr_n (tp, gp, mn); |
|
if (use_redc) |
|
redc (xp, mp, mn, invm, tp); /* xx = x^2*R^n */ |
|
else |
|
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); |
|
this_gp = gp; |
|
for (i = 1; i < K / 2; i++) |
|
{ |
|
mpn_mul_n (tp, this_gp, xp, mn); |
|
this_gp += mn; |
|
if (use_redc) |
|
redc (this_gp, mp, mn, invm, tp); /* g[i] = x^(2i+1)*R^n */ |
|
else |
|
mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn); |
|
} |
|
|
|
/* Start the real stuff. */ |
ep = PTR (e); |
ep = PTR (e); |
i = ABSIZ (e) - 1; /* current index */ |
i = en - 1; /* current index */ |
c = ep[i]; /* current limb */ |
c = ep[i]; /* current limb */ |
count_leading_zeros (sh, c); |
sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */ |
sh = BITS_PER_MP_LIMB - sh; /* significant bits in ep[i] */ |
|
sh -= k; /* index of lower bit of ep[i] to take into account */ |
sh -= k; /* index of lower bit of ep[i] to take into account */ |
if (sh < 0) |
if (sh < 0) |
{ /* k-sh extra bits are needed */ |
{ /* k-sh extra bits are needed */ |
if (i > 0) |
if (i > 0) |
{ |
{ |
i--; |
i--; |
c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh)); |
c <<= (-sh); |
sh += BITS_PER_MP_LIMB; |
sh += GMP_NUMB_BITS; |
|
c |= ep[i] >> sh; |
} |
} |
} |
} |
else |
else |
c = c >> sh; |
c >>= sh; |
#ifdef POWM_DEBUG |
|
printf ("-1/m mod 2^%u = %lu\n", BITS_PER_MP_LIMB, invm); |
for (j = 0; c % 2 == 0; j++) |
mpz_set_ui (exp, c); |
c >>= 1; |
#endif |
|
j=0; |
MPN_COPY (xp, gp + mn * (c >> 1), mn); |
while (c % 2 == 0) |
while (--j >= 0) |
{ |
{ |
j++; |
mpn_sqr_n (tp, xp, mn); |
c = (c >> 1); |
|
} |
|
mpz_set (xx, g[c >> 1]); |
|
while (j--) |
|
{ |
|
if (use_redc) |
if (use_redc) |
mpz_redc (xx, xx, xx, mod, invm); |
redc (xp, mp, mn, invm, tp); |
else |
else |
{ |
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); |
mpz_mul (xx, xx, xx); |
|
mpz_mod (xx, xx, mod); |
|
} |
|
} |
} |
|
|
#ifdef POWM_DEBUG |
|
printf ("x^"); mpz_out_str (0, 10, exp); |
|
printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx); |
|
putchar ('\n'); |
|
#endif |
|
|
|
while (i > 0 || sh > 0) |
while (i > 0 || sh > 0) |
{ |
{ |
c = ep[i]; |
c = ep[i]; |
sh -= k; |
|
l = k; /* number of bits treated */ |
l = k; /* number of bits treated */ |
|
sh -= l; |
if (sh < 0) |
if (sh < 0) |
{ |
{ |
if (i > 0) |
if (i > 0) |
{ |
{ |
i--; |
i--; |
c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh)); |
c <<= (-sh); |
sh += BITS_PER_MP_LIMB; |
sh += GMP_NUMB_BITS; |
|
c |= ep[i] >> sh; |
} |
} |
else |
else |
{ |
{ |
l += sh; /* may be less bits than k here */ |
l += sh; /* last chunk of bits from e; l < k */ |
c = c & ((1<<l) - 1); |
|
} |
} |
} |
} |
else |
else |
c = c >> sh; |
c >>= sh; |
c = c & mask; |
c &= ((mp_limb_t) 1 << l) - 1; |
|
|
/* this while loop implements the sliding window improvement */ |
/* This while loop implements the sliding window improvement--loop while |
while ((c & (1 << (k - 1))) == 0 && (i > 0 || sh > 0)) |
the most significant bit of c is zero, squaring xx as we go. */ |
|
while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0)) |
{ |
{ |
if (use_redc) mpz_redc (xx, xx, xx, mod, invm); |
mpn_sqr_n (tp, xp, mn); |
|
if (use_redc) |
|
redc (xp, mp, mn, invm, tp); |
else |
else |
|
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); |
|
if (sh != 0) |
{ |
{ |
mpz_mul (xx, xx, xx); |
|
mpz_mod (xx, xx, mod); |
|
} |
|
if (sh) |
|
{ |
|
sh--; |
sh--; |
c = (c<<1) + ((ep[i]>>sh) & 1); |
c = (c << 1) + ((ep[i] >> sh) & 1); |
} |
} |
else |
else |
{ |
{ |
i--; |
i--; |
sh = BITS_PER_MP_LIMB - 1; |
sh = GMP_NUMB_BITS - 1; |
c = (c<<1) + (ep[i]>>sh); |
c = (c << 1) + (ep[i] >> sh); |
} |
} |
} |
} |
|
|
#ifdef POWM_DEBUG |
/* Replace xx by xx^(2^l)*x^c. */ |
printf ("l=%u c=%lu\n", l, c); |
|
mpz_mul_2exp (exp, exp, k); |
|
mpz_add_ui (exp, exp, c); |
|
#endif |
|
|
|
/* now replace xx by xx^(2^k)*x^c */ |
|
if (c != 0) |
if (c != 0) |
{ |
{ |
j = 0; |
for (j = 0; c % 2 == 0; j++) |
while (c % 2 == 0) |
c >>= 1; |
|
|
|
/* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */ |
|
l -= j; |
|
while (--l >= 0) |
{ |
{ |
j++; |
mpn_sqr_n (tp, xp, mn); |
c = c >> 1; |
if (use_redc) |
|
redc (xp, mp, mn, invm, tp); |
|
else |
|
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); |
} |
} |
/* c0 = c * 2^j, i.e. xx^(2^k)*x^c = (A^(2^(k - j))*c)^(2^j) */ |
mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn); |
l -= j; |
|
while (l--) |
|
if (use_redc) mpz_redc (xx, xx, xx, mod, invm); |
|
else |
|
{ |
|
mpz_mul (xx, xx, xx); |
|
mpz_mod (xx, xx, mod); |
|
} |
|
if (use_redc) |
if (use_redc) |
mpz_redc (xx, xx, g[c >> 1], mod, invm); |
redc (xp, mp, mn, invm, tp); |
else |
else |
{ |
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); |
mpz_mul (xx, xx, g[c >> 1]); |
|
mpz_mod (xx, xx, mod); |
|
} |
|
} |
} |
else |
else |
j = l; /* case c=0 */ |
j = l; /* case c=0 */ |
while (j--) |
while (--j >= 0) |
{ |
{ |
|
mpn_sqr_n (tp, xp, mn); |
if (use_redc) |
if (use_redc) |
mpz_redc (xx, xx, xx, mod, invm); |
redc (xp, mp, mn, invm, tp); |
else |
else |
{ |
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); |
mpz_mul (xx, xx, xx); |
|
mpz_mod (xx, xx, mod); |
|
} |
|
} |
} |
#ifdef POWM_DEBUG |
|
printf ("x^"); mpz_out_str (0, 10, exp); |
|
printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx); |
|
putchar ('\n'); |
|
#endif |
|
} |
} |
|
|
/* now convert back xx to xx/R^n */ |
|
if (use_redc) |
if (use_redc) |
{ |
{ |
mpz_set_ui (g[0], 1); |
/* Convert back xx to xx/R^n. */ |
mpz_redc (xx, xx, g[0], mod, invm); |
MPN_COPY (tp, xp, mn); |
if (mpz_cmp (xx, mod) >= 0) |
MPN_ZERO (tp + mn, mn); |
mpz_sub (xx, xx, mod); |
redc (xp, mp, mn, invm, tp); |
|
if (mpn_cmp (xp, mp, mn) >= 0) |
|
mpn_sub_n (xp, xp, mp, mn); |
} |
} |
mpz_set (res, xx); |
else |
|
{ |
|
if (m_zero_cnt != 0) |
|
{ |
|
mp_limb_t cy; |
|
cy = mpn_lshift (tp, xp, mn, m_zero_cnt); |
|
tp[mn] = cy; |
|
mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn); |
|
mpn_rshift (xp, xp, mn, m_zero_cnt); |
|
} |
|
} |
|
xn = mn; |
|
MPN_NORMALIZE (xp, xn); |
|
|
mpz_clear (xx); |
if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0) |
for (i = 0; i < K / 2; i++) |
{ |
mpz_clear (g[i]); |
mp = PTR(m); /* want original, unnormalized m */ |
(*_mp_free_func) (g, K / 2 * sizeof (mpz_t)); |
mpn_sub (xp, mp, mn, xp, xn); |
|
xn = mn; |
|
MPN_NORMALIZE (xp, xn); |
|
} |
|
MPZ_REALLOC (r, xn); |
|
SIZ (r) = xn; |
|
MPN_COPY (PTR(r), xp, xn); |
|
|
|
__GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn); |
|
TMP_FREE (marker); |
} |
} |