OpenXM_contrib/gmp/mpn/generic/gcd.c - annotate

Return to gcd.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / generic
Annotation of OpenXM_contrib/gmp/mpn/generic/gcd.c, Revision 1.1.1.2

1.1       maekawa     1: /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
                      2:
1.1.1.2 ! maekawa     3: Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000 Free Software
        !             4: Foundation, Inc.
1.1       maekawa     5:
                      6: This file is part of the GNU MP Library.
                      7:
                      8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 ! maekawa     9: it under the terms of the GNU Lesser General Public License as published by
        !            10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1       maekawa    11: option) any later version.
                     12:
                     13: The GNU MP Library is distributed in the hope that it will be useful, but
                     14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 ! maekawa    15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1.1       maekawa    16: License for more details.
                     17:
1.1.1.2 ! maekawa    18: You should have received a copy of the GNU Lesser General Public License
1.1       maekawa    19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: MA 02111-1307, USA. */
                     22:
                     23: /* Integer greatest common divisor of two unsigned integers, using
                     24:    the accelerated algorithm (see reference below).
                     25:
1.1.1.2 ! maekawa    26:    mp_size_t mpn_gcd (up, usize, vp, vsize).
1.1       maekawa    27:
                     28:    Preconditions [U = (up, usize) and V = (vp, vsize)]:
                     29:
                     30:    1.  V is odd.
                     31:    2.  numbits(U) >= numbits(V).
                     32:
                     33:    Both U and V are destroyed by the operation.  The result is left at vp,
                     34:    and its size is returned.
                     35:
                     36:    Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
                     37:
                     38:    Funding for this work has been partially provided by Conselho Nacional
                     39:    de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
                     40:    301314194-2, and was done while I was a visiting reseacher in the Instituto
                     41:    de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
                     42:
                     43:    Refer to
                     44:        K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
                     45:        Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
                     46:
                     47: #include "gmp.h"
                     48: #include "gmp-impl.h"
                     49: #include "longlong.h"
                     50:
1.1.1.2 ! maekawa    51: /* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated
        !            52:    algorithm is used, otherwise the binary algorithm is used.  This may be
        !            53:    adjusted for different architectures.  */
        !            54: #ifndef GCD_ACCEL_THRESHOLD
        !            55: #define GCD_ACCEL_THRESHOLD 5
1.1       maekawa    56: #endif
                     57:
                     58: /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
                     59:    algorithm reduces using the bmod operation.  Otherwise, the k-ary reduction
                     60:    is used.  0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB.  */
                     61: enum
                     62:   {
                     63:     BMOD_THRESHOLD = BITS_PER_MP_LIMB/2
                     64:   };
                     65:
                     66:
                     67: /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
                     68:    Both U and V must be odd.  */
                     69: static __gmp_inline mp_size_t
                     70: #if __STDC__
                     71: gcd_2 (mp_ptr vp, mp_srcptr up)
                     72: #else
                     73: gcd_2 (vp, up)
                     74:      mp_ptr vp;
                     75:      mp_srcptr up;
                     76: #endif
                     77: {
                     78:   mp_limb_t u0, u1, v0, v1;
                     79:   mp_size_t vsize;
                     80:
                     81:   u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
                     82:
                     83:   while (u1 != v1 && u0 != v0)
                     84:     {
                     85:       unsigned long int r;
                     86:       if (u1 > v1)
                     87:        {
                     88:          u1 -= v1 + (u0 < v0), u0 -= v0;
                     89:          count_trailing_zeros (r, u0);
                     90:          u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
                     91:          u1 >>= r;
                     92:        }
                     93:       else  /* u1 < v1.  */
                     94:        {
                     95:          v1 -= u1 + (v0 < u0), v0 -= u0;
                     96:          count_trailing_zeros (r, v0);
                     97:          v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
                     98:          v1 >>= r;
                     99:        }
                    100:     }
                    101:
                    102:   vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
                    103:
                    104:   /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
                    105:   if (u1 == v1 && u0 == v0)
                    106:     return vsize;
                    107:
                    108:   v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
                    109:   vp[0] = mpn_gcd_1 (vp, vsize, v0);
                    110:
                    111:   return 1;
                    112: }
                    113:
                    114: /* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
                    115:    0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
                    116:    In the reference article, D was computed along with N, but it is better to
                    117:    compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
                    118:    the result as a twos' complement signed integer.
                    119:
                    120:    Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB).  According to the reference
                    121:    article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
                    122:    2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double
                    123:    precision.  If N2 > N1 initially, the first iteration of the while loop
                    124:    will swap them.  In all other situations, N1 >= N2 is maintained.  */
                    125:
1.1.1.2 ! maekawa   126: static
        !           127: #if ! defined (__i386__)
        !           128: __gmp_inline                   /* don't inline this for the x86 */
        !           129: #endif
        !           130: mp_limb_t
1.1       maekawa   131: #if __STDC__
                    132: find_a (mp_srcptr cp)
                    133: #else
                    134: find_a (cp)
                    135:      mp_srcptr cp;
                    136: #endif
                    137: {
                    138:   unsigned long int leading_zero_bits = 0;
                    139:
                    140:   mp_limb_t n1_l = cp[0];      /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l.  */
                    141:   mp_limb_t n1_h = cp[1];
                    142:
                    143:   mp_limb_t n2_l = -n1_l;      /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l.  */
                    144:   mp_limb_t n2_h = ~n1_h;
                    145:
                    146:   /* Main loop.  */
                    147:   while (n2_h)                 /* While N2 >= 2^BITS_PER_MP_LIMB.  */
                    148:     {
                    149:       /* N1 <-- N1 % N2.  */
1.1.1.2 ! maekawa   150:       if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0)
1.1       maekawa   151:        {
                    152:          unsigned long int i;
                    153:          count_leading_zeros (i, n2_h);
                    154:          i -= leading_zero_bits, leading_zero_bits += i;
                    155:          n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
                    156:          do
                    157:            {
                    158:              if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
                    159:                n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
                    160:              n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
                    161:              i -= 1;
                    162:            }
                    163:          while (i);
                    164:        }
                    165:       if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
                    166:        n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
                    167:
1.1.1.2 ! maekawa   168:       MP_LIMB_T_SWAP (n1_h, n2_h);
        !           169:       MP_LIMB_T_SWAP (n1_l, n2_l);
1.1       maekawa   170:     }
                    171:
                    172:   return n2_l;
                    173: }
                    174:
                    175: mp_size_t
                    176: #if __STDC__
1.1.1.2 ! maekawa   177: mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize)
1.1       maekawa   178: #else
1.1.1.2 ! maekawa   179: mpn_gcd (gp, up, usize, vp, vsize)
1.1       maekawa   180:      mp_ptr gp;
                    181:      mp_ptr up;
                    182:      mp_size_t usize;
1.1.1.2 ! maekawa   183:      mp_ptr vp;
        !           184:      mp_size_t vsize;
1.1       maekawa   185: #endif
                    186: {
                    187:   mp_ptr orig_vp = vp;
                    188:   mp_size_t orig_vsize = vsize;
                    189:   int binary_gcd_ctr;          /* Number of times binary gcd will execute.  */
                    190:   TMP_DECL (marker);
                    191:
                    192:   TMP_MARK (marker);
                    193:
1.1.1.2 ! maekawa   194:   /* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD.
1.1       maekawa   195:      Two EXTRA limbs for U and V are required for kary reduction.  */
1.1.1.2 ! maekawa   196:   if (vsize >= GCD_ACCEL_THRESHOLD)
1.1       maekawa   197:     {
                    198:       unsigned long int vbitsize, d;
                    199:       mp_ptr orig_up = up;
                    200:       mp_size_t orig_usize = usize;
                    201:       mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
                    202:
                    203:       MPN_COPY (anchor_up, orig_up, usize);
                    204:       up = anchor_up;
                    205:
                    206:       count_leading_zeros (d, up[usize-1]);
                    207:       d = usize * BITS_PER_MP_LIMB - d;
                    208:       count_leading_zeros (vbitsize, vp[vsize-1]);
                    209:       vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
                    210:       d = d - vbitsize + 1;
                    211:
                    212:       /* Use bmod reduction to quickly discover whether V divides U.  */
                    213:       up[usize++] = 0;                         /* Insert leading zero.  */
                    214:       mpn_bdivmod (up, up, usize, vp, vsize, d);
                    215:
                    216:       /* Now skip U/V mod 2^d and any low zero limbs.  */
                    217:       d /= BITS_PER_MP_LIMB, up += d, usize -= d;
                    218:       while (usize != 0 && up[0] == 0)
                    219:        up++, usize--;
                    220:
                    221:       if (usize == 0)                          /* GCD == ORIG_V.  */
                    222:        goto done;
                    223:
                    224:       vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
                    225:       MPN_COPY (vp, orig_vp, vsize);
                    226:
                    227:       do                                       /* Main loop.  */
                    228:        {
1.1.1.2 ! maekawa   229:           /* mpn_com_n can't be used here because anchor_up and up may
        !           230:              partially overlap */
        !           231:          if (up[usize-1] & MP_LIMB_T_HIGHBIT)  /* U < 0; take twos' compl. */
1.1       maekawa   232:            {
                    233:              mp_size_t i;
                    234:              anchor_up[0] = -up[0];
                    235:              for (i = 1; i < usize; i++)
                    236:                anchor_up[i] = ~up[i];
                    237:              up = anchor_up;
                    238:            }
                    239:
                    240:          MPN_NORMALIZE_NOT_ZERO (up, usize);
                    241:
                    242:          if ((up[0] & 1) == 0)                 /* Result even; remove twos. */
                    243:            {
1.1.1.2 ! maekawa   244:              unsigned int r;
1.1       maekawa   245:              count_trailing_zeros (r, up[0]);
                    246:              mpn_rshift (anchor_up, up, usize, r);
                    247:              usize -= (anchor_up[usize-1] == 0);
                    248:            }
                    249:          else if (anchor_up != up)
1.1.1.2 ! maekawa   250:            MPN_COPY_INCR (anchor_up, up, usize);
1.1       maekawa   251:
1.1.1.2 ! maekawa   252:          MPN_PTR_SWAP (anchor_up,usize, vp,vsize);
1.1       maekawa   253:          up = anchor_up;
                    254:
                    255:          if (vsize <= 2)               /* Kary can't handle < 2 limbs and  */
                    256:            break;                      /* isn't efficient for == 2 limbs.  */
                    257:
                    258:          d = vbitsize;
                    259:          count_leading_zeros (vbitsize, vp[vsize-1]);
                    260:          vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
                    261:          d = d - vbitsize + 1;
                    262:
                    263:          if (d > BMOD_THRESHOLD)       /* Bmod reduction.  */
                    264:            {
                    265:              up[usize++] = 0;
                    266:              mpn_bdivmod (up, up, usize, vp, vsize, d);
                    267:              d /= BITS_PER_MP_LIMB, up += d, usize -= d;
                    268:            }
                    269:          else                          /* Kary reduction.  */
                    270:            {
                    271:              mp_limb_t bp[2], cp[2];
                    272:
                    273:              /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB).  */
1.1.1.2 ! maekawa   274:               {
        !           275:                 mp_limb_t u_inv, hi, lo;
        !           276:                 modlimb_invert (u_inv, up[0]);
        !           277:                 cp[0] = vp[0] * u_inv;
        !           278:                 umul_ppmm (hi, lo, cp[0], up[0]);
        !           279:                 cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv;
        !           280:               }
1.1       maekawa   281:
                    282:              /* U <-- find_a (C)  *  U.  */
                    283:              up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
                    284:              usize++;
                    285:
                    286:              /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
                    287:                  bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
1.1.1.2 ! maekawa   288:                  bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2
        !           289:
        !           290:                  Like V/U above, but simplified because only the low bit of
        !           291:                  bp[1] is wanted. */
        !           292:               {
        !           293:                 mp_limb_t  v_inv, hi, lo;
        !           294:                 modlimb_invert (v_inv, vp[0]);
        !           295:                 bp[0] = up[0] * v_inv;
        !           296:                 umul_ppmm (hi, lo, bp[0], vp[0]);
        !           297:                 bp[1] = (up[1] + hi + (bp[0]&vp[1])) & 1;
        !           298:               }
1.1       maekawa   299:
                    300:              up[usize++] = 0;
                    301:              if (bp[1])        /* B < 0: U <-- U + (-B)  * V.  */
                    302:                {
                    303:                   mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
                    304:                   mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
                    305:                }
                    306:              else              /* B >= 0:  U <-- U - B * V.  */
                    307:                {
                    308:                  mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
                    309:                  mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
                    310:                }
                    311:
                    312:              up += 2, usize -= 2;  /* At least two low limbs are zero.  */
                    313:            }
                    314:
                    315:          /* Must remove low zero limbs before complementing.  */
                    316:          while (usize != 0 && up[0] == 0)
                    317:            up++, usize--;
                    318:        }
                    319:       while (usize);
                    320:
                    321:       /* Compute GCD (ORIG_V, GCD (ORIG_U, V)).  Binary will execute twice.  */
                    322:       up = orig_up, usize = orig_usize;
                    323:       binary_gcd_ctr = 2;
                    324:     }
                    325:   else
                    326:     binary_gcd_ctr = 1;
                    327:
                    328:   /* Finish up with the binary algorithm.  Executes once or twice.  */
                    329:   for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
                    330:     {
                    331:       if (usize > 2)           /* First make U close to V in size.  */
                    332:        {
                    333:          unsigned long int vbitsize, d;
                    334:          count_leading_zeros (d, up[usize-1]);
                    335:          d = usize * BITS_PER_MP_LIMB - d;
                    336:          count_leading_zeros (vbitsize, vp[vsize-1]);
                    337:          vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
                    338:          d = d - vbitsize - 1;
                    339:          if (d != -(unsigned long int)1 && d > 2)
                    340:            {
                    341:              mpn_bdivmod (up, up, usize, vp, vsize, d);  /* Result > 0.  */
                    342:              d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
                    343:            }
                    344:        }
                    345:
                    346:       /* Start binary GCD.  */
                    347:       do
                    348:        {
                    349:          mp_size_t zeros;
                    350:
                    351:          /* Make sure U is odd.  */
                    352:          MPN_NORMALIZE (up, usize);
                    353:          while (up[0] == 0)
                    354:            up += 1, usize -= 1;
                    355:          if ((up[0] & 1) == 0)
                    356:            {
1.1.1.2 ! maekawa   357:              unsigned int r;
1.1       maekawa   358:              count_trailing_zeros (r, up[0]);
                    359:              mpn_rshift (up, up, usize, r);
                    360:              usize -= (up[usize-1] == 0);
                    361:            }
                    362:
                    363:          /* Keep usize >= vsize.  */
                    364:          if (usize < vsize)
1.1.1.2 ! maekawa   365:            MPN_PTR_SWAP (up, usize, vp, vsize);
1.1       maekawa   366:
                    367:          if (usize <= 2)                               /* Double precision. */
                    368:            {
                    369:              if (vsize == 1)
                    370:                vp[0] = mpn_gcd_1 (up, usize, vp[0]);
                    371:              else
                    372:                vsize = gcd_2 (vp, up);
                    373:              break;                                    /* Binary GCD done.  */
                    374:            }
                    375:
                    376:          /* Count number of low zero limbs of U - V.  */
                    377:          for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
                    378:            continue;
                    379:
                    380:          /* If U < V, swap U and V; in any case, subtract V from U.  */
                    381:          if (zeros == vsize)                           /* Subtract done.  */
                    382:            up += zeros, usize -= zeros;
                    383:          else if (usize == vsize)
                    384:            {
                    385:              mp_size_t size = vsize;
                    386:              do
                    387:                size--;
                    388:              while (up[size] == vp[size]);
                    389:              if (up[size] < vp[size])                  /* usize == vsize.  */
1.1.1.2 ! maekawa   390:                MP_PTR_SWAP (up, vp);
1.1       maekawa   391:              up += zeros, usize = size + 1 - zeros;
                    392:              mpn_sub_n (up, up, vp + zeros, usize);
                    393:            }
                    394:          else
                    395:            {
                    396:              mp_size_t size = vsize - zeros;
                    397:              up += zeros, usize -= zeros;
                    398:              if (mpn_sub_n (up, up, vp + zeros, size))
                    399:                {
                    400:                  while (up[size] == 0)                 /* Propagate borrow. */
                    401:                    up[size++] = -(mp_limb_t)1;
                    402:                  up[size] -= 1;
                    403:                }
                    404:            }
                    405:        }
                    406:       while (usize);                                   /* End binary GCD.  */
                    407:     }
                    408:
                    409: done:
                    410:   if (vp != gp)
                    411:     MPN_COPY (gp, vp, vsize);
                    412:   TMP_FREE (marker);
                    413:   return vsize;
                    414: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>