Annotation of OpenXM_contrib/gmp/mpz/powm.c, Revision 1.1.1.2
1.1 maekawa 1: /* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
2:
1.1.1.2 ! maekawa 3: Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, Inc.
! 4: Contributed by Paul Zimmermann.
1.1 maekawa 5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 ! maekawa 9: it under the terms of the GNU Lesser General Public License as published by
! 10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 ! maekawa 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 16: License for more details.
17:
1.1.1.2 ! maekawa 18: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA. */
22:
23: #include "gmp.h"
24: #include "gmp-impl.h"
25: #include "longlong.h"
1.1.1.2 ! maekawa 26: #ifdef BERKELEY_MP
! 27: #include "mp.h"
! 28: #endif
! 29:
1.1 maekawa 30:
1.1.1.2 ! maekawa 31: /* set c <- (a*b)/R^n mod m c has to have at least (2n) allocated limbs */
! 32: static void
! 33: #if __STDC__
! 34: mpz_redc (mpz_ptr c, mpz_srcptr a, mpz_srcptr b, mpz_srcptr m, mp_limb_t Nprim)
! 35: #else
! 36: mpz_redc (c, a, b, m, Nprim)
! 37: mpz_ptr c;
! 38: mpz_srcptr a;
! 39: mpz_srcptr b;
! 40: mpz_srcptr m;
! 41: mp_limb_t Nprim;
! 42: #endif
! 43: {
! 44: mp_ptr cp, mp = PTR (m);
! 45: mp_limb_t cy, cout = 0;
! 46: mp_limb_t q;
! 47: size_t j, n = ABSIZ (m);
! 48:
! 49: ASSERT (ALLOC (c) >= 2 * n);
! 50:
! 51: mpz_mul (c, a, b);
! 52: cp = PTR (c);
! 53: j = ABSIZ (c);
! 54: MPN_ZERO (cp + j, 2 * n - j);
! 55: for (j = 0; j < n; j++)
! 56: {
! 57: q = cp[0] * Nprim;
! 58: cy = mpn_addmul_1 (cp, mp, n, q);
! 59: cout += mpn_add_1 (cp + n, cp + n, n - j, cy);
! 60: cp++;
! 61: }
! 62: cp -= n;
! 63: if (cout)
! 64: {
! 65: cy = cout - mpn_sub_n (cp, cp + n, mp, n);
! 66: while (cy)
! 67: cy -= mpn_sub_n (cp, cp, mp, n);
! 68: }
! 69: else
! 70: MPN_COPY (cp, cp + n, n);
! 71: MPN_NORMALIZE (cp, n);
! 72: SIZ (c) = SIZ (c) < 0 ? -n : n;
! 73: }
! 74:
! 75: /* average number of calls to redc for an exponent of n bits
! 76: with the sliding window algorithm of base 2^k: the optimal is
! 77: obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
! 78:
! 79: n\k 4 5 6 7 8
! 80: 128 156* 159 171 200 261
! 81: 256 309 307* 316 343 403
! 82: 512 617 607* 610 632 688
! 83: 1024 1231 1204 1195* 1207 1256
! 84: 2048 2461 2399 2366 2360* 2396
! 85: 4096 4918 4787 4707 4665* 4670
! 86: */
! 87:
1.1 maekawa 88: #ifndef BERKELEY_MP
89: void
90: #if __STDC__
1.1.1.2 ! maekawa 91: mpz_powm (mpz_ptr res, mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod)
1.1 maekawa 92: #else
1.1.1.2 ! maekawa 93: mpz_powm (res, base, e, mod)
1.1 maekawa 94: mpz_ptr res;
95: mpz_srcptr base;
1.1.1.2 ! maekawa 96: mpz_srcptr e;
1.1 maekawa 97: mpz_srcptr mod;
98: #endif
99: #else /* BERKELEY_MP */
100: void
101: #if __STDC__
1.1.1.2 ! maekawa 102: pow (mpz_srcptr base, mpz_srcptr e, mpz_srcptr mod, mpz_ptr res)
1.1 maekawa 103: #else
1.1.1.2 ! maekawa 104: pow (base, e, mod, res)
1.1 maekawa 105: mpz_srcptr base;
1.1.1.2 ! maekawa 106: mpz_srcptr e;
1.1 maekawa 107: mpz_srcptr mod;
108: mpz_ptr res;
109: #endif
110: #endif /* BERKELEY_MP */
111: {
1.1.1.2 ! maekawa 112: mp_limb_t invm, *ep, c, mask;
! 113: mpz_t xx, *g;
! 114: mp_size_t n, i, K, j, l, k;
! 115: int sh;
! 116: int use_redc;
! 117:
! 118: #ifdef POWM_DEBUG
! 119: mpz_t exp;
! 120: mpz_init (exp);
! 121: #endif
1.1 maekawa 122:
1.1.1.2 ! maekawa 123: n = ABSIZ (mod);
1.1 maekawa 124:
1.1.1.2 ! maekawa 125: if (n == 0)
! 126: DIVIDE_BY_ZERO;
1.1 maekawa 127:
1.1.1.2 ! maekawa 128: if (SIZ (e) == 0)
1.1 maekawa 129: {
130: /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
1.1.1.2 ! maekawa 131: depending on if MOD equals 1. */
! 132: SIZ(res) = (ABSIZ (mod) == 1 && (PTR(mod))[0] == 1) ? 0 : 1;
! 133: PTR(res)[0] = 1;
1.1 maekawa 134: return;
135: }
136:
1.1.1.2 ! maekawa 137: /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD.
! 138: In REDC each modular multiplication costs about 2*n^2 limbs operations,
! 139: whereas using usual reduction it costs 3*K(n), where K(n) is the cost of a
! 140: multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
! 141: for example using Burnikel-Ziegler's algorithm. This gives a theoretical
! 142: threshold of a*KARATSUBA_SQR_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
! 143: 2.66. */
! 144: /* For now, also disable REDC when MOD is even, as the inverse can't
! 145: handle that. */
1.1 maekawa 146:
1.1.1.2 ! maekawa 147: #ifndef POWM_THRESHOLD
! 148: #define POWM_THRESHOLD ((8 * KARATSUBA_SQR_THRESHOLD) / 3)
! 149: #endif
1.1 maekawa 150:
1.1.1.2 ! maekawa 151: use_redc = (n < POWM_THRESHOLD && PTR(mod)[0] % 2 != 0);
! 152: if (use_redc)
1.1 maekawa 153: {
1.1.1.2 ! maekawa 154: /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
! 155: modlimb_invert (invm, PTR(mod)[0]);
! 156: invm = -invm;
1.1 maekawa 157: }
158:
1.1.1.2 ! maekawa 159: /* determines optimal value of k */
! 160: l = ABSIZ (e) * BITS_PER_MP_LIMB; /* number of bits of exponent */
! 161: k = 1;
! 162: K = 2;
! 163: while (2 * l > K * (2 + k * (3 + k)))
1.1 maekawa 164: {
1.1.1.2 ! maekawa 165: k++;
! 166: K *= 2;
1.1 maekawa 167: }
168:
1.1.1.2 ! maekawa 169: g = (mpz_t *) (*_mp_allocate_func) (K / 2 * sizeof (mpz_t));
! 170: /* compute x*R^n where R=2^BITS_PER_MP_LIMB */
! 171: mpz_init (g[0]);
! 172: if (use_redc)
1.1 maekawa 173: {
1.1.1.2 ! maekawa 174: mpz_mul_2exp (g[0], base, n * BITS_PER_MP_LIMB);
! 175: mpz_mod (g[0], g[0], mod);
! 176: }
! 177: else
! 178: mpz_mod (g[0], base, mod);
1.1 maekawa 179:
1.1.1.2 ! maekawa 180: /* compute xx^g for odd g < 2^k */
! 181: mpz_init (xx);
! 182: if (use_redc)
! 183: {
! 184: _mpz_realloc (xx, 2 * n);
! 185: mpz_redc (xx, g[0], g[0], mod, invm); /* xx = x^2*R^n */
! 186: }
! 187: else
! 188: {
! 189: mpz_mul (xx, g[0], g[0]);
! 190: mpz_mod (xx, xx, mod);
! 191: }
! 192: for (i = 1; i < K / 2; i++)
! 193: {
! 194: mpz_init (g[i]);
! 195: if (use_redc)
1.1 maekawa 196: {
1.1.1.2 ! maekawa 197: _mpz_realloc (g[i], 2 * n);
! 198: mpz_redc (g[i], g[i - 1], xx, mod, invm); /* g[i] = x^(2i+1)*R^n */
1.1 maekawa 199: }
200: else
1.1.1.2 ! maekawa 201: {
! 202: mpz_mul (g[i], g[i - 1], xx);
! 203: mpz_mod (g[i], g[i], mod);
! 204: }
! 205: }
1.1 maekawa 206:
1.1.1.2 ! maekawa 207: /* now starts the real stuff */
! 208: mask = (mp_limb_t) ((1<<k) - 1);
! 209: ep = PTR (e);
! 210: i = ABSIZ (e) - 1; /* current index */
! 211: c = ep[i]; /* current limb */
! 212: count_leading_zeros (sh, c);
! 213: sh = BITS_PER_MP_LIMB - sh; /* significant bits in ep[i] */
! 214: sh -= k; /* index of lower bit of ep[i] to take into account */
! 215: if (sh < 0)
! 216: { /* k-sh extra bits are needed */
! 217: if (i > 0)
! 218: {
! 219: i--;
! 220: c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh));
! 221: sh += BITS_PER_MP_LIMB;
! 222: }
1.1 maekawa 223: }
224: else
1.1.1.2 ! maekawa 225: c = c >> sh;
! 226: #ifdef POWM_DEBUG
! 227: printf ("-1/m mod 2^%u = %lu\n", BITS_PER_MP_LIMB, invm);
! 228: mpz_set_ui (exp, c);
! 229: #endif
! 230: j=0;
! 231: while (c % 2 == 0)
1.1 maekawa 232: {
1.1.1.2 ! maekawa 233: j++;
! 234: c = (c >> 1);
! 235: }
! 236: mpz_set (xx, g[c >> 1]);
! 237: while (j--)
! 238: {
! 239: if (use_redc)
! 240: mpz_redc (xx, xx, xx, mod, invm);
! 241: else
1.1 maekawa 242: {
1.1.1.2 ! maekawa 243: mpz_mul (xx, xx, xx);
! 244: mpz_mod (xx, xx, mod);
1.1 maekawa 245: }
1.1.1.2 ! maekawa 246: }
! 247:
! 248: #ifdef POWM_DEBUG
! 249: printf ("x^"); mpz_out_str (0, 10, exp);
! 250: printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx);
! 251: putchar ('\n');
! 252: #endif
! 253:
! 254: while (i > 0 || sh > 0)
! 255: {
! 256: c = ep[i];
! 257: sh -= k;
! 258: l = k; /* number of bits treated */
! 259: if (sh < 0)
1.1 maekawa 260: {
1.1.1.2 ! maekawa 261: if (i > 0)
! 262: {
! 263: i--;
! 264: c = (c << (-sh)) | (ep[i] >> (BITS_PER_MP_LIMB + sh));
! 265: sh += BITS_PER_MP_LIMB;
! 266: }
! 267: else
! 268: {
! 269: l += sh; /* may be less bits than k here */
! 270: c = c & ((1<<l) - 1);
! 271: }
1.1 maekawa 272: }
1.1.1.2 ! maekawa 273: else
! 274: c = c >> sh;
! 275: c = c & mask;
! 276:
! 277: /* this while loop implements the sliding window improvement */
! 278: while ((c & (1 << (k - 1))) == 0 && (i > 0 || sh > 0))
1.1 maekawa 279: {
1.1.1.2 ! maekawa 280: if (use_redc) mpz_redc (xx, xx, xx, mod, invm);
! 281: else
! 282: {
! 283: mpz_mul (xx, xx, xx);
! 284: mpz_mod (xx, xx, mod);
! 285: }
! 286: if (sh)
! 287: {
! 288: sh--;
! 289: c = (c<<1) + ((ep[i]>>sh) & 1);
! 290: }
! 291: else
! 292: {
! 293: i--;
! 294: sh = BITS_PER_MP_LIMB - 1;
! 295: c = (c<<1) + (ep[i]>>sh);
! 296: }
1.1 maekawa 297: }
298:
1.1.1.2 ! maekawa 299: #ifdef POWM_DEBUG
! 300: printf ("l=%u c=%lu\n", l, c);
! 301: mpz_mul_2exp (exp, exp, k);
! 302: mpz_add_ui (exp, exp, c);
! 303: #endif
1.1 maekawa 304:
1.1.1.2 ! maekawa 305: /* now replace xx by xx^(2^k)*x^c */
! 306: if (c != 0)
! 307: {
! 308: j = 0;
! 309: while (c % 2 == 0)
! 310: {
! 311: j++;
! 312: c = c >> 1;
! 313: }
! 314: /* c0 = c * 2^j, i.e. xx^(2^k)*x^c = (A^(2^(k - j))*c)^(2^j) */
! 315: l -= j;
! 316: while (l--)
! 317: if (use_redc) mpz_redc (xx, xx, xx, mod, invm);
! 318: else
1.1 maekawa 319: {
1.1.1.2 ! maekawa 320: mpz_mul (xx, xx, xx);
! 321: mpz_mod (xx, xx, mod);
1.1 maekawa 322: }
1.1.1.2 ! maekawa 323: if (use_redc)
! 324: mpz_redc (xx, xx, g[c >> 1], mod, invm);
! 325: else
! 326: {
! 327: mpz_mul (xx, xx, g[c >> 1]);
! 328: mpz_mod (xx, xx, mod);
! 329: }
! 330: }
! 331: else
! 332: j = l; /* case c=0 */
! 333: while (j--)
! 334: {
! 335: if (use_redc)
! 336: mpz_redc (xx, xx, xx, mod, invm);
! 337: else
! 338: {
! 339: mpz_mul (xx, xx, xx);
! 340: mpz_mod (xx, xx, mod);
! 341: }
! 342: }
! 343: #ifdef POWM_DEBUG
! 344: printf ("x^"); mpz_out_str (0, 10, exp);
! 345: printf ("*2^%u mod m = ", n * BITS_PER_MP_LIMB); mpz_out_str (0, 10, xx);
! 346: putchar ('\n');
! 347: #endif
! 348: }
1.1 maekawa 349:
1.1.1.2 ! maekawa 350: /* now convert back xx to xx/R^n */
! 351: if (use_redc)
! 352: {
! 353: mpz_set_ui (g[0], 1);
! 354: mpz_redc (xx, xx, g[0], mod, invm);
! 355: if (mpz_cmp (xx, mod) >= 0)
! 356: mpz_sub (xx, xx, mod);
! 357: }
! 358: mpz_set (res, xx);
1.1 maekawa 359:
1.1.1.2 ! maekawa 360: mpz_clear (xx);
! 361: for (i = 0; i < K / 2; i++)
! 362: mpz_clear (g[i]);
! 363: (*_mp_free_func) (g, K / 2 * sizeof (mpz_t));
1.1 maekawa 364: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>