Annotation of OpenXM_contrib/gmp/mpz/powm.c, Revision 1.1.1.3
1.1 maekawa 1: /* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
2:
1.1.1.3 ! ohara 3: Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
! 4: Foundation, Inc. Contributed by Paul Zimmermann.
1.1 maekawa 5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 maekawa 9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 maekawa 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 16: License for more details.
17:
1.1.1.2 maekawa 18: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA. */
22:
1.1.1.3 ! ohara 23:
1.1 maekawa 24: #include "gmp.h"
25: #include "gmp-impl.h"
26: #include "longlong.h"
1.1.1.2 maekawa 27: #ifdef BERKELEY_MP
28: #include "mp.h"
29: #endif
30:
1.1 maekawa 31:
1.1.1.3 ! ohara 32: /* Set c <- tp/R^n mod m.
! 33: tp should have space for 2*n+1 limbs; clobber its most significant limb. */
! 34: #if ! WANT_REDC_GLOBAL
! 35: static
1.1.1.2 maekawa 36: #endif
1.1.1.3 ! ohara 37: void
! 38: redc (mp_ptr cp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim, mp_ptr tp)
1.1.1.2 maekawa 39: {
1.1.1.3 ! ohara 40: mp_limb_t cy;
1.1.1.2 maekawa 41: mp_limb_t q;
1.1.1.3 ! ohara 42: mp_size_t j;
1.1.1.2 maekawa 43:
1.1.1.3 ! ohara 44: tp[2 * n] = 0; /* carry guard */
1.1.1.2 maekawa 45:
46: for (j = 0; j < n; j++)
47: {
1.1.1.3 ! ohara 48: q = tp[0] * Nprim;
! 49: cy = mpn_addmul_1 (tp, mp, n, q);
! 50: mpn_incr_u (tp + n, cy);
! 51: tp++;
1.1.1.2 maekawa 52: }
1.1.1.3 ! ohara 53:
! 54: if (tp[n] != 0)
! 55: mpn_sub_n (cp, tp, mp, n);
1.1.1.2 maekawa 56: else
1.1.1.3 ! ohara 57: MPN_COPY (cp, tp, n);
1.1.1.2 maekawa 58: }
59:
1.1.1.3 ! ohara 60: /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
! 61: t is defined by (tp,mn). */
! 62: static void
! 63: reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
! 64: {
! 65: mp_ptr qp;
! 66: TMP_DECL (marker);
! 67:
! 68: TMP_MARK (marker);
! 69: qp = TMP_ALLOC_LIMBS (an - mn + 1);
! 70:
! 71: mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
! 72:
! 73: TMP_FREE (marker);
! 74: }
! 75:
! 76: #if REDUCE_EXPONENT
! 77: /* Return the group order of the ring mod m. */
! 78: static mp_limb_t
! 79: phi (mp_limb_t t)
! 80: {
! 81: mp_limb_t d, m, go;
! 82:
! 83: go = 1;
! 84:
! 85: if (t % 2 == 0)
! 86: {
! 87: t = t / 2;
! 88: while (t % 2 == 0)
! 89: {
! 90: go *= 2;
! 91: t = t / 2;
! 92: }
! 93: }
! 94: for (d = 3;; d += 2)
! 95: {
! 96: m = d - 1;
! 97: for (;;)
! 98: {
! 99: unsigned long int q = t / d;
! 100: if (q < d)
! 101: {
! 102: if (t <= 1)
! 103: return go;
! 104: if (t == d)
! 105: return go * m;
! 106: return go * (t - 1);
! 107: }
! 108: if (t != q * d)
! 109: break;
! 110: go *= m;
! 111: m = d;
! 112: t = q;
! 113: }
! 114: }
! 115: }
! 116: #endif
! 117:
1.1.1.2 maekawa 118: /* average number of calls to redc for an exponent of n bits
119: with the sliding window algorithm of base 2^k: the optimal is
120: obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
121:
122: n\k 4 5 6 7 8
123: 128 156* 159 171 200 261
124: 256 309 307* 316 343 403
125: 512 617 607* 610 632 688
126: 1024 1231 1204 1195* 1207 1256
127: 2048 2461 2399 2366 2360* 2396
128: 4096 4918 4787 4707 4665* 4670
129: */
130:
1.1.1.3 ! ohara 131:
! 132: /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. In REDC
! 133: each modular multiplication costs about 2*n^2 limbs operations, whereas
! 134: using usual reduction it costs 3*K(n), where K(n) is the cost of a
! 135: multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
! 136: for example using Burnikel-Ziegler's algorithm. This gives a theoretical
! 137: threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
! 138: 2.66. */
! 139: /* For now, also disable REDC when MOD is even, as the inverse can't handle
! 140: that. At some point, we might want to make the code faster for that case,
! 141: perhaps using CRR. */
! 142:
! 143: #ifndef POWM_THRESHOLD
! 144: #define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
1.1 maekawa 145: #endif
1.1.1.3 ! ohara 146:
! 147: #define HANDLE_NEGATIVE_EXPONENT 1
! 148: #undef REDUCE_EXPONENT
! 149:
1.1 maekawa 150: void
1.1.1.3 ! ohara 151: #ifndef BERKELEY_MP
! 152: mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
! 153: #else /* BERKELEY_MP */
! 154: pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
1.1 maekawa 155: #endif /* BERKELEY_MP */
156: {
1.1.1.3 ! ohara 157: mp_ptr xp, tp, qp, gp, this_gp;
! 158: mp_srcptr bp, ep, mp;
! 159: mp_size_t bn, es, en, mn, xn;
! 160: mp_limb_t invm, c;
! 161: unsigned long int enb;
! 162: mp_size_t i, K, j, l, k;
! 163: int m_zero_cnt, e_zero_cnt;
1.1.1.2 maekawa 164: int sh;
165: int use_redc;
1.1.1.3 ! ohara 166: #if HANDLE_NEGATIVE_EXPONENT
! 167: mpz_t new_b;
1.1.1.2 maekawa 168: #endif
1.1.1.3 ! ohara 169: #if REDUCE_EXPONENT
! 170: mpz_t new_e;
! 171: #endif
! 172: TMP_DECL (marker);
1.1 maekawa 173:
1.1.1.3 ! ohara 174: mp = PTR(m);
! 175: mn = ABSIZ (m);
! 176: if (mn == 0)
1.1.1.2 maekawa 177: DIVIDE_BY_ZERO;
1.1 maekawa 178:
1.1.1.3 ! ohara 179: TMP_MARK (marker);
! 180:
! 181: es = SIZ (e);
! 182: if (es <= 0)
1.1 maekawa 183: {
1.1.1.3 ! ohara 184: if (es == 0)
! 185: {
! 186: /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
! 187: m equals 1. */
! 188: SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
! 189: PTR(r)[0] = 1;
! 190: TMP_FREE (marker); /* we haven't really allocated anything here */
! 191: return;
! 192: }
! 193: #if HANDLE_NEGATIVE_EXPONENT
! 194: MPZ_TMP_INIT (new_b, mn + 1);
1.1 maekawa 195:
1.1.1.3 ! ohara 196: if (! mpz_invert (new_b, b, m))
! 197: DIVIDE_BY_ZERO;
! 198: b = new_b;
! 199: es = -es;
! 200: #else
! 201: DIVIDE_BY_ZERO;
! 202: #endif
! 203: }
! 204: en = es;
! 205:
! 206: #if REDUCE_EXPONENT
! 207: /* Reduce exponent by dividing it by phi(m) when m small. */
! 208: if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
! 209: {
! 210: MPZ_TMP_INIT (new_e, 2);
! 211: mpz_mod_ui (new_e, e, phi (mp[0]));
! 212: e = new_e;
! 213: }
1.1.1.2 maekawa 214: #endif
1.1 maekawa 215:
1.1.1.3 ! ohara 216: use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
1.1.1.2 maekawa 217: if (use_redc)
1.1 maekawa 218: {
1.1.1.2 maekawa 219: /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
1.1.1.3 ! ohara 220: modlimb_invert (invm, mp[0]);
1.1.1.2 maekawa 221: invm = -invm;
1.1 maekawa 222: }
1.1.1.3 ! ohara 223: else
! 224: {
! 225: /* Normalize m (i.e. make its most significant bit set) as required by
! 226: division functions below. */
! 227: count_leading_zeros (m_zero_cnt, mp[mn - 1]);
! 228: m_zero_cnt -= GMP_NAIL_BITS;
! 229: if (m_zero_cnt != 0)
! 230: {
! 231: mp_ptr new_mp;
! 232: new_mp = TMP_ALLOC_LIMBS (mn);
! 233: mpn_lshift (new_mp, mp, mn, m_zero_cnt);
! 234: mp = new_mp;
! 235: }
! 236: }
1.1 maekawa 237:
1.1.1.3 ! ohara 238: /* Determine optimal value of k, the number of exponent bits we look at
! 239: at a time. */
! 240: count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
! 241: e_zero_cnt -= GMP_NAIL_BITS;
! 242: enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
1.1.1.2 maekawa 243: k = 1;
244: K = 2;
1.1.1.3 ! ohara 245: while (2 * enb > K * (2 + k * (3 + k)))
1.1 maekawa 246: {
1.1.1.2 maekawa 247: k++;
248: K *= 2;
1.1 maekawa 249: }
250:
1.1.1.3 ! ohara 251: tp = TMP_ALLOC_LIMBS (2 * mn + 1);
! 252: qp = TMP_ALLOC_LIMBS (mn + 1);
1.1 maekawa 253:
1.1.1.3 ! ohara 254: gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);
! 255:
! 256: /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */
! 257: bn = ABSIZ (b);
! 258: bp = PTR(b);
! 259: /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary
! 260: for speed or correctness to do this when b and m have the same number of
! 261: limbs, perhaps remove mpn_cmp call. */
! 262: if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
! 263: {
! 264: /* Reduce possibly huge base while moving it to gp[0]. Use a function
! 265: call to reduce, since we don't want the quotient allocation to
! 266: live until function return. */
! 267: if (use_redc)
! 268: {
! 269: reduce (tp + mn, bp, bn, mp, mn); /* b mod m */
! 270: MPN_ZERO (tp, mn);
! 271: mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
! 272: }
! 273: else
! 274: {
! 275: reduce (gp, bp, bn, mp, mn);
! 276: }
1.1.1.2 maekawa 277: }
278: else
279: {
1.1.1.3 ! ohara 280: /* |b| < m. We pad out operands to become mn limbs, which simplifies
! 281: the rest of the function, but slows things down when the |b| << m. */
1.1.1.2 maekawa 282: if (use_redc)
1.1 maekawa 283: {
1.1.1.3 ! ohara 284: MPN_ZERO (tp, mn);
! 285: MPN_COPY (tp + mn, bp, bn);
! 286: MPN_ZERO (tp + mn + bn, mn - bn);
! 287: mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
1.1 maekawa 288: }
289: else
1.1.1.2 maekawa 290: {
1.1.1.3 ! ohara 291: MPN_COPY (gp, bp, bn);
! 292: MPN_ZERO (gp + bn, mn - bn);
1.1.1.2 maekawa 293: }
294: }
1.1 maekawa 295:
1.1.1.3 ! ohara 296: /* Compute xx^i for odd g < 2^i. */
! 297:
! 298: xp = TMP_ALLOC_LIMBS (mn);
! 299: mpn_sqr_n (tp, gp, mn);
! 300: if (use_redc)
! 301: redc (xp, mp, mn, invm, tp); /* xx = x^2*R^n */
! 302: else
! 303: mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
! 304: this_gp = gp;
! 305: for (i = 1; i < K / 2; i++)
! 306: {
! 307: mpn_mul_n (tp, this_gp, xp, mn);
! 308: this_gp += mn;
! 309: if (use_redc)
! 310: redc (this_gp, mp, mn, invm, tp); /* g[i] = x^(2i+1)*R^n */
! 311: else
! 312: mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
! 313: }
! 314:
! 315: /* Start the real stuff. */
1.1.1.2 maekawa 316: ep = PTR (e);
1.1.1.3 ! ohara 317: i = en - 1; /* current index */
1.1.1.2 maekawa 318: c = ep[i]; /* current limb */
1.1.1.3 ! ohara 319: sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */
1.1.1.2 maekawa 320: sh -= k; /* index of lower bit of ep[i] to take into account */
321: if (sh < 0)
322: { /* k-sh extra bits are needed */
323: if (i > 0)
324: {
325: i--;
1.1.1.3 ! ohara 326: c <<= (-sh);
! 327: sh += GMP_NUMB_BITS;
! 328: c |= ep[i] >> sh;
1.1.1.2 maekawa 329: }
1.1 maekawa 330: }
331: else
1.1.1.3 ! ohara 332: c >>= sh;
! 333:
! 334: for (j = 0; c % 2 == 0; j++)
! 335: c >>= 1;
! 336:
! 337: MPN_COPY (xp, gp + mn * (c >> 1), mn);
! 338: while (--j >= 0)
1.1.1.2 maekawa 339: {
1.1.1.3 ! ohara 340: mpn_sqr_n (tp, xp, mn);
1.1.1.2 maekawa 341: if (use_redc)
1.1.1.3 ! ohara 342: redc (xp, mp, mn, invm, tp);
1.1.1.2 maekawa 343: else
1.1.1.3 ! ohara 344: mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
1.1.1.2 maekawa 345: }
346:
347: while (i > 0 || sh > 0)
348: {
349: c = ep[i];
350: l = k; /* number of bits treated */
1.1.1.3 ! ohara 351: sh -= l;
1.1.1.2 maekawa 352: if (sh < 0)
1.1 maekawa 353: {
1.1.1.2 maekawa 354: if (i > 0)
355: {
356: i--;
1.1.1.3 ! ohara 357: c <<= (-sh);
! 358: sh += GMP_NUMB_BITS;
! 359: c |= ep[i] >> sh;
1.1.1.2 maekawa 360: }
361: else
362: {
1.1.1.3 ! ohara 363: l += sh; /* last chunk of bits from e; l < k */
1.1.1.2 maekawa 364: }
1.1 maekawa 365: }
1.1.1.2 maekawa 366: else
1.1.1.3 ! ohara 367: c >>= sh;
! 368: c &= ((mp_limb_t) 1 << l) - 1;
1.1.1.2 maekawa 369:
1.1.1.3 ! ohara 370: /* This while loop implements the sliding window improvement--loop while
! 371: the most significant bit of c is zero, squaring xx as we go. */
! 372: while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0))
1.1 maekawa 373: {
1.1.1.3 ! ohara 374: mpn_sqr_n (tp, xp, mn);
! 375: if (use_redc)
! 376: redc (xp, mp, mn, invm, tp);
1.1.1.2 maekawa 377: else
1.1.1.3 ! ohara 378: mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
! 379: if (sh != 0)
1.1.1.2 maekawa 380: {
381: sh--;
1.1.1.3 ! ohara 382: c = (c << 1) + ((ep[i] >> sh) & 1);
1.1.1.2 maekawa 383: }
384: else
385: {
386: i--;
1.1.1.3 ! ohara 387: sh = GMP_NUMB_BITS - 1;
! 388: c = (c << 1) + (ep[i] >> sh);
1.1.1.2 maekawa 389: }
1.1 maekawa 390: }
391:
1.1.1.3 ! ohara 392: /* Replace xx by xx^(2^l)*x^c. */
1.1.1.2 maekawa 393: if (c != 0)
394: {
1.1.1.3 ! ohara 395: for (j = 0; c % 2 == 0; j++)
! 396: c >>= 1;
! 397:
! 398: /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */
! 399: l -= j;
! 400: while (--l >= 0)
1.1.1.2 maekawa 401: {
1.1.1.3 ! ohara 402: mpn_sqr_n (tp, xp, mn);
! 403: if (use_redc)
! 404: redc (xp, mp, mn, invm, tp);
! 405: else
! 406: mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
1.1.1.2 maekawa 407: }
1.1.1.3 ! ohara 408: mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn);
1.1.1.2 maekawa 409: if (use_redc)
1.1.1.3 ! ohara 410: redc (xp, mp, mn, invm, tp);
1.1.1.2 maekawa 411: else
1.1.1.3 ! ohara 412: mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
1.1.1.2 maekawa 413: }
414: else
415: j = l; /* case c=0 */
1.1.1.3 ! ohara 416: while (--j >= 0)
1.1.1.2 maekawa 417: {
1.1.1.3 ! ohara 418: mpn_sqr_n (tp, xp, mn);
1.1.1.2 maekawa 419: if (use_redc)
1.1.1.3 ! ohara 420: redc (xp, mp, mn, invm, tp);
1.1.1.2 maekawa 421: else
1.1.1.3 ! ohara 422: mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
1.1.1.2 maekawa 423: }
424: }
1.1 maekawa 425:
1.1.1.2 maekawa 426: if (use_redc)
427: {
1.1.1.3 ! ohara 428: /* Convert back xx to xx/R^n. */
! 429: MPN_COPY (tp, xp, mn);
! 430: MPN_ZERO (tp + mn, mn);
! 431: redc (xp, mp, mn, invm, tp);
! 432: if (mpn_cmp (xp, mp, mn) >= 0)
! 433: mpn_sub_n (xp, xp, mp, mn);
! 434: }
! 435: else
! 436: {
! 437: if (m_zero_cnt != 0)
! 438: {
! 439: mp_limb_t cy;
! 440: cy = mpn_lshift (tp, xp, mn, m_zero_cnt);
! 441: tp[mn] = cy;
! 442: mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn);
! 443: mpn_rshift (xp, xp, mn, m_zero_cnt);
! 444: }
! 445: }
! 446: xn = mn;
! 447: MPN_NORMALIZE (xp, xn);
! 448:
! 449: if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0)
! 450: {
! 451: mp = PTR(m); /* want original, unnormalized m */
! 452: mpn_sub (xp, mp, mn, xp, xn);
! 453: xn = mn;
! 454: MPN_NORMALIZE (xp, xn);
! 455: }
! 456: MPZ_REALLOC (r, xn);
! 457: SIZ (r) = xn;
! 458: MPN_COPY (PTR(r), xp, xn);
! 459:
! 460: __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn);
! 461: TMP_FREE (marker);
1.1 maekawa 462: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>