OpenXM_contrib/gmp/mpz/powm.c - annotate

Return to powm.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpz
Annotation of OpenXM_contrib/gmp/mpz/powm.c, Revision 1.1.1.3

1.1       maekawa     1: /* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
                      2:
1.1.1.3 ! ohara       3: Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
        !             4: Foundation, Inc.  Contributed by Paul Zimmermann.
1.1       maekawa     5:
                      6: This file is part of the GNU MP Library.
                      7:
                      8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2   maekawa     9: it under the terms of the GNU Lesser General Public License as published by
                     10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1       maekawa    11: option) any later version.
                     12:
                     13: The GNU MP Library is distributed in the hope that it will be useful, but
                     14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2   maekawa    15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1.1       maekawa    16: License for more details.
                     17:
1.1.1.2   maekawa    18: You should have received a copy of the GNU Lesser General Public License
1.1       maekawa    19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: MA 02111-1307, USA. */
                     22:
1.1.1.3 ! ohara      23:
1.1       maekawa    24: #include "gmp.h"
                     25: #include "gmp-impl.h"
                     26: #include "longlong.h"
1.1.1.2   maekawa    27: #ifdef BERKELEY_MP
                     28: #include "mp.h"
                     29: #endif
                     30:
1.1       maekawa    31:
1.1.1.3 ! ohara      32: /* Set c <- tp/R^n mod m.
        !            33:    tp should have space for 2*n+1 limbs; clobber its most significant limb. */
        !            34: #if ! WANT_REDC_GLOBAL
        !            35: static
1.1.1.2   maekawa    36: #endif
1.1.1.3 ! ohara      37: void
        !            38: redc (mp_ptr cp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim, mp_ptr tp)
1.1.1.2   maekawa    39: {
1.1.1.3 ! ohara      40:   mp_limb_t cy;
1.1.1.2   maekawa    41:   mp_limb_t q;
1.1.1.3 ! ohara      42:   mp_size_t j;
1.1.1.2   maekawa    43:
1.1.1.3 ! ohara      44:   tp[2 * n] = 0;               /* carry guard */
1.1.1.2   maekawa    45:
                     46:   for (j = 0; j < n; j++)
                     47:     {
1.1.1.3 ! ohara      48:       q = tp[0] * Nprim;
        !            49:       cy = mpn_addmul_1 (tp, mp, n, q);
        !            50:       mpn_incr_u (tp + n, cy);
        !            51:       tp++;
1.1.1.2   maekawa    52:     }
1.1.1.3 ! ohara      53:
        !            54:   if (tp[n] != 0)
        !            55:     mpn_sub_n (cp, tp, mp, n);
1.1.1.2   maekawa    56:   else
1.1.1.3 ! ohara      57:     MPN_COPY (cp, tp, n);
1.1.1.2   maekawa    58: }
                     59:
1.1.1.3 ! ohara      60: /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
        !            61:    t is defined by (tp,mn).  */
        !            62: static void
        !            63: reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
        !            64: {
        !            65:   mp_ptr qp;
        !            66:   TMP_DECL (marker);
        !            67:
        !            68:   TMP_MARK (marker);
        !            69:   qp = TMP_ALLOC_LIMBS (an - mn + 1);
        !            70:
        !            71:   mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
        !            72:
        !            73:   TMP_FREE (marker);
        !            74: }
        !            75:
        !            76: #if REDUCE_EXPONENT
        !            77: /* Return the group order of the ring mod m.  */
        !            78: static mp_limb_t
        !            79: phi (mp_limb_t t)
        !            80: {
        !            81:   mp_limb_t d, m, go;
        !            82:
        !            83:   go = 1;
        !            84:
        !            85:   if (t % 2 == 0)
        !            86:     {
        !            87:       t = t / 2;
        !            88:       while (t % 2 == 0)
        !            89:        {
        !            90:          go *= 2;
        !            91:          t = t / 2;
        !            92:        }
        !            93:     }
        !            94:   for (d = 3;; d += 2)
        !            95:     {
        !            96:       m = d - 1;
        !            97:       for (;;)
        !            98:        {
        !            99:          unsigned long int q = t / d;
        !           100:          if (q < d)
        !           101:            {
        !           102:              if (t <= 1)
        !           103:                return go;
        !           104:              if (t == d)
        !           105:                return go * m;
        !           106:              return go * (t - 1);
        !           107:            }
        !           108:          if (t != q * d)
        !           109:            break;
        !           110:          go *= m;
        !           111:          m = d;
        !           112:          t = q;
        !           113:        }
        !           114:     }
        !           115: }
        !           116: #endif
        !           117:
1.1.1.2   maekawa   118: /* average number of calls to redc for an exponent of n bits
                    119:    with the sliding window algorithm of base 2^k: the optimal is
                    120:    obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
                    121:
                    122:    n\k    4     5     6     7     8
                    123:    128    156*  159   171   200   261
                    124:    256    309   307*  316   343   403
                    125:    512    617   607*  610   632   688
                    126:    1024   1231  1204  1195* 1207  1256
                    127:    2048   2461  2399  2366  2360* 2396
                    128:    4096   4918  4787  4707  4665* 4670
                    129: */
                    130: 
1.1.1.3 ! ohara     131:
        !           132: /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD.  In REDC
        !           133:    each modular multiplication costs about 2*n^2 limbs operations, whereas
        !           134:    using usual reduction it costs 3*K(n), where K(n) is the cost of a
        !           135:    multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
        !           136:    for example using Burnikel-Ziegler's algorithm. This gives a theoretical
        !           137:    threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
        !           138:    2.66.  */
        !           139: /* For now, also disable REDC when MOD is even, as the inverse can't handle
        !           140:    that.  At some point, we might want to make the code faster for that case,
        !           141:    perhaps using CRR.  */
        !           142:
        !           143: #ifndef POWM_THRESHOLD
        !           144: #define POWM_THRESHOLD  ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
1.1       maekawa   145: #endif
1.1.1.3 ! ohara     146:
        !           147: #define HANDLE_NEGATIVE_EXPONENT 1
        !           148: #undef REDUCE_EXPONENT
        !           149:
1.1       maekawa   150: void
1.1.1.3 ! ohara     151: #ifndef BERKELEY_MP
        !           152: mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
        !           153: #else /* BERKELEY_MP */
        !           154: pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
1.1       maekawa   155: #endif /* BERKELEY_MP */
                    156: {
1.1.1.3 ! ohara     157:   mp_ptr xp, tp, qp, gp, this_gp;
        !           158:   mp_srcptr bp, ep, mp;
        !           159:   mp_size_t bn, es, en, mn, xn;
        !           160:   mp_limb_t invm, c;
        !           161:   unsigned long int enb;
        !           162:   mp_size_t i, K, j, l, k;
        !           163:   int m_zero_cnt, e_zero_cnt;
1.1.1.2   maekawa   164:   int sh;
                    165:   int use_redc;
1.1.1.3 ! ohara     166: #if HANDLE_NEGATIVE_EXPONENT
        !           167:   mpz_t new_b;
1.1.1.2   maekawa   168: #endif
1.1.1.3 ! ohara     169: #if REDUCE_EXPONENT
        !           170:   mpz_t new_e;
        !           171: #endif
        !           172:   TMP_DECL (marker);
1.1       maekawa   173:
1.1.1.3 ! ohara     174:   mp = PTR(m);
        !           175:   mn = ABSIZ (m);
        !           176:   if (mn == 0)
1.1.1.2   maekawa   177:     DIVIDE_BY_ZERO;
1.1       maekawa   178:
1.1.1.3 ! ohara     179:   TMP_MARK (marker);
        !           180:
        !           181:   es = SIZ (e);
        !           182:   if (es <= 0)
1.1       maekawa   183:     {
1.1.1.3 ! ohara     184:       if (es == 0)
        !           185:        {
        !           186:          /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
        !           187:             m equals 1.  */
        !           188:          SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
        !           189:          PTR(r)[0] = 1;
        !           190:          TMP_FREE (marker);    /* we haven't really allocated anything here */
        !           191:          return;
        !           192:        }
        !           193: #if HANDLE_NEGATIVE_EXPONENT
        !           194:       MPZ_TMP_INIT (new_b, mn + 1);
1.1       maekawa   195:
1.1.1.3 ! ohara     196:       if (! mpz_invert (new_b, b, m))
        !           197:        DIVIDE_BY_ZERO;
        !           198:       b = new_b;
        !           199:       es = -es;
        !           200: #else
        !           201:       DIVIDE_BY_ZERO;
        !           202: #endif
        !           203:     }
        !           204:   en = es;
        !           205:
        !           206: #if REDUCE_EXPONENT
        !           207:   /* Reduce exponent by dividing it by phi(m) when m small.  */
        !           208:   if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
        !           209:     {
        !           210:       MPZ_TMP_INIT (new_e, 2);
        !           211:       mpz_mod_ui (new_e, e, phi (mp[0]));
        !           212:       e = new_e;
        !           213:     }
1.1.1.2   maekawa   214: #endif
1.1       maekawa   215:
1.1.1.3 ! ohara     216:   use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
1.1.1.2   maekawa   217:   if (use_redc)
1.1       maekawa   218:     {
1.1.1.2   maekawa   219:       /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
1.1.1.3 ! ohara     220:       modlimb_invert (invm, mp[0]);
1.1.1.2   maekawa   221:       invm = -invm;
1.1       maekawa   222:     }
1.1.1.3 ! ohara     223:   else
        !           224:     {
        !           225:       /* Normalize m (i.e. make its most significant bit set) as required by
        !           226:         division functions below.  */
        !           227:       count_leading_zeros (m_zero_cnt, mp[mn - 1]);
        !           228:       m_zero_cnt -= GMP_NAIL_BITS;
        !           229:       if (m_zero_cnt != 0)
        !           230:        {
        !           231:          mp_ptr new_mp;
        !           232:          new_mp = TMP_ALLOC_LIMBS (mn);
        !           233:          mpn_lshift (new_mp, mp, mn, m_zero_cnt);
        !           234:          mp = new_mp;
        !           235:        }
        !           236:     }
1.1       maekawa   237:
1.1.1.3 ! ohara     238:   /* Determine optimal value of k, the number of exponent bits we look at
        !           239:      at a time.  */
        !           240:   count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
        !           241:   e_zero_cnt -= GMP_NAIL_BITS;
        !           242:   enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
1.1.1.2   maekawa   243:   k = 1;
                    244:   K = 2;
1.1.1.3 ! ohara     245:   while (2 * enb > K * (2 + k * (3 + k)))
1.1       maekawa   246:     {
1.1.1.2   maekawa   247:       k++;
                    248:       K *= 2;
1.1       maekawa   249:     }
                    250:
1.1.1.3 ! ohara     251:   tp = TMP_ALLOC_LIMBS (2 * mn + 1);
        !           252:   qp = TMP_ALLOC_LIMBS (mn + 1);
1.1       maekawa   253:
1.1.1.3 ! ohara     254:   gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);
        !           255:
        !           256:   /* Compute x*R^n where R=2^BITS_PER_MP_LIMB.  */
        !           257:   bn = ABSIZ (b);
        !           258:   bp = PTR(b);
        !           259:   /* Handle |b| >= m by computing b mod m.  FIXME: It is not strictly necessary
        !           260:      for speed or correctness to do this when b and m have the same number of
        !           261:      limbs, perhaps remove mpn_cmp call.  */
        !           262:   if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
        !           263:     {
        !           264:       /* Reduce possibly huge base while moving it to gp[0].  Use a function
        !           265:         call to reduce, since we don't want the quotient allocation to
        !           266:         live until function return.  */
        !           267:       if (use_redc)
        !           268:        {
        !           269:          reduce (tp + mn, bp, bn, mp, mn);     /* b mod m */
        !           270:          MPN_ZERO (tp, mn);
        !           271:          mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
        !           272:        }
        !           273:       else
        !           274:        {
        !           275:          reduce (gp, bp, bn, mp, mn);
        !           276:        }
1.1.1.2   maekawa   277:     }
                    278:   else
                    279:     {
1.1.1.3 ! ohara     280:       /* |b| < m.  We pad out operands to become mn limbs,  which simplifies
        !           281:         the rest of the function, but slows things down when the |b| << m.  */
1.1.1.2   maekawa   282:       if (use_redc)
1.1       maekawa   283:        {
1.1.1.3 ! ohara     284:          MPN_ZERO (tp, mn);
        !           285:          MPN_COPY (tp + mn, bp, bn);
        !           286:          MPN_ZERO (tp + mn + bn, mn - bn);
        !           287:          mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
1.1       maekawa   288:        }
                    289:       else
1.1.1.2   maekawa   290:        {
1.1.1.3 ! ohara     291:          MPN_COPY (gp, bp, bn);
        !           292:          MPN_ZERO (gp + bn, mn - bn);
1.1.1.2   maekawa   293:        }
                    294:     }
1.1       maekawa   295:
1.1.1.3 ! ohara     296:   /* Compute xx^i for odd g < 2^i.  */
        !           297:
        !           298:   xp = TMP_ALLOC_LIMBS (mn);
        !           299:   mpn_sqr_n (tp, gp, mn);
        !           300:   if (use_redc)
        !           301:     redc (xp, mp, mn, invm, tp);               /* xx = x^2*R^n */
        !           302:   else
        !           303:     mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
        !           304:   this_gp = gp;
        !           305:   for (i = 1; i < K / 2; i++)
        !           306:     {
        !           307:       mpn_mul_n (tp, this_gp, xp, mn);
        !           308:       this_gp += mn;
        !           309:       if (use_redc)
        !           310:        redc (this_gp, mp, mn, invm, tp);       /* g[i] = x^(2i+1)*R^n */
        !           311:       else
        !           312:        mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
        !           313:     }
        !           314:
        !           315:   /* Start the real stuff.  */
1.1.1.2   maekawa   316:   ep = PTR (e);
1.1.1.3 ! ohara     317:   i = en - 1;                          /* current index */
1.1.1.2   maekawa   318:   c = ep[i];                           /* current limb */
1.1.1.3 ! ohara     319:   sh = GMP_NUMB_BITS - e_zero_cnt;     /* significant bits in ep[i] */
1.1.1.2   maekawa   320:   sh -= k;                             /* index of lower bit of ep[i] to take into account */
                    321:   if (sh < 0)
                    322:     {                                  /* k-sh extra bits are needed */
                    323:       if (i > 0)
                    324:        {
                    325:          i--;
1.1.1.3 ! ohara     326:          c <<= (-sh);
        !           327:          sh += GMP_NUMB_BITS;
        !           328:          c |= ep[i] >> sh;
1.1.1.2   maekawa   329:        }
1.1       maekawa   330:     }
                    331:   else
1.1.1.3 ! ohara     332:     c >>= sh;
        !           333:
        !           334:   for (j = 0; c % 2 == 0; j++)
        !           335:     c >>= 1;
        !           336:
        !           337:   MPN_COPY (xp, gp + mn * (c >> 1), mn);
        !           338:   while (--j >= 0)
1.1.1.2   maekawa   339:     {
1.1.1.3 ! ohara     340:       mpn_sqr_n (tp, xp, mn);
1.1.1.2   maekawa   341:       if (use_redc)
1.1.1.3 ! ohara     342:        redc (xp, mp, mn, invm, tp);
1.1.1.2   maekawa   343:       else
1.1.1.3 ! ohara     344:        mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
1.1.1.2   maekawa   345:     }
                    346:
                    347:   while (i > 0 || sh > 0)
                    348:     {
                    349:       c = ep[i];
                    350:       l = k;                           /* number of bits treated */
1.1.1.3 ! ohara     351:       sh -= l;
1.1.1.2   maekawa   352:       if (sh < 0)
1.1       maekawa   353:        {
1.1.1.2   maekawa   354:          if (i > 0)
                    355:            {
                    356:              i--;
1.1.1.3 ! ohara     357:              c <<= (-sh);
        !           358:              sh += GMP_NUMB_BITS;
        !           359:              c |= ep[i] >> sh;
1.1.1.2   maekawa   360:            }
                    361:          else
                    362:            {
1.1.1.3 ! ohara     363:              l += sh;                  /* last chunk of bits from e; l < k */
1.1.1.2   maekawa   364:            }
1.1       maekawa   365:        }
1.1.1.2   maekawa   366:       else
1.1.1.3 ! ohara     367:        c >>= sh;
        !           368:       c &= ((mp_limb_t) 1 << l) - 1;
1.1.1.2   maekawa   369:
1.1.1.3 ! ohara     370:       /* This while loop implements the sliding window improvement--loop while
        !           371:         the most significant bit of c is zero, squaring xx as we go. */
        !           372:       while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0))
1.1       maekawa   373:        {
1.1.1.3 ! ohara     374:          mpn_sqr_n (tp, xp, mn);
        !           375:          if (use_redc)
        !           376:            redc (xp, mp, mn, invm, tp);
1.1.1.2   maekawa   377:          else
1.1.1.3 ! ohara     378:            mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
        !           379:          if (sh != 0)
1.1.1.2   maekawa   380:            {
                    381:              sh--;
1.1.1.3 ! ohara     382:              c = (c << 1) + ((ep[i] >> sh) & 1);
1.1.1.2   maekawa   383:            }
                    384:          else
                    385:            {
                    386:              i--;
1.1.1.3 ! ohara     387:              sh = GMP_NUMB_BITS - 1;
        !           388:              c = (c << 1) + (ep[i] >> sh);
1.1.1.2   maekawa   389:            }
1.1       maekawa   390:        }
                    391:
1.1.1.3 ! ohara     392:       /* Replace xx by xx^(2^l)*x^c.  */
1.1.1.2   maekawa   393:       if (c != 0)
                    394:        {
1.1.1.3 ! ohara     395:          for (j = 0; c % 2 == 0; j++)
        !           396:            c >>= 1;
        !           397:
        !           398:          /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */
        !           399:          l -= j;
        !           400:          while (--l >= 0)
1.1.1.2   maekawa   401:            {
1.1.1.3 ! ohara     402:              mpn_sqr_n (tp, xp, mn);
        !           403:              if (use_redc)
        !           404:                redc (xp, mp, mn, invm, tp);
        !           405:              else
        !           406:                mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
1.1.1.2   maekawa   407:            }
1.1.1.3 ! ohara     408:          mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn);
1.1.1.2   maekawa   409:          if (use_redc)
1.1.1.3 ! ohara     410:            redc (xp, mp, mn, invm, tp);
1.1.1.2   maekawa   411:          else
1.1.1.3 ! ohara     412:            mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
1.1.1.2   maekawa   413:        }
                    414:       else
                    415:        j = l;                          /* case c=0 */
1.1.1.3 ! ohara     416:       while (--j >= 0)
1.1.1.2   maekawa   417:        {
1.1.1.3 ! ohara     418:          mpn_sqr_n (tp, xp, mn);
1.1.1.2   maekawa   419:          if (use_redc)
1.1.1.3 ! ohara     420:            redc (xp, mp, mn, invm, tp);
1.1.1.2   maekawa   421:          else
1.1.1.3 ! ohara     422:            mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
1.1.1.2   maekawa   423:        }
                    424:     }
1.1       maekawa   425:
1.1.1.2   maekawa   426:   if (use_redc)
                    427:     {
1.1.1.3 ! ohara     428:       /* Convert back xx to xx/R^n.  */
        !           429:       MPN_COPY (tp, xp, mn);
        !           430:       MPN_ZERO (tp + mn, mn);
        !           431:       redc (xp, mp, mn, invm, tp);
        !           432:       if (mpn_cmp (xp, mp, mn) >= 0)
        !           433:        mpn_sub_n (xp, xp, mp, mn);
        !           434:     }
        !           435:   else
        !           436:     {
        !           437:       if (m_zero_cnt != 0)
        !           438:        {
        !           439:          mp_limb_t cy;
        !           440:          cy = mpn_lshift (tp, xp, mn, m_zero_cnt);
        !           441:          tp[mn] = cy;
        !           442:          mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn);
        !           443:          mpn_rshift (xp, xp, mn, m_zero_cnt);
        !           444:        }
        !           445:     }
        !           446:   xn = mn;
        !           447:   MPN_NORMALIZE (xp, xn);
        !           448:
        !           449:   if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0)
        !           450:     {
        !           451:       mp = PTR(m);                    /* want original, unnormalized m */
        !           452:       mpn_sub (xp, mp, mn, xp, xn);
        !           453:       xn = mn;
        !           454:       MPN_NORMALIZE (xp, xn);
        !           455:     }
        !           456:   MPZ_REALLOC (r, xn);
        !           457:   SIZ (r) = xn;
        !           458:   MPN_COPY (PTR(r), xp, xn);
        !           459:
        !           460:   __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn);
        !           461:   TMP_FREE (marker);
1.1       maekawa   462: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>