OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/generic/gcd.c - annotate

Return to gcd.c CVS log
Up to [local] / OpenXM / src / kan96xx / gmp-2.0.2-ssh-2 / mpn / generic
Annotation of OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/generic/gcd.c, Revision 1.1.1.1

1.1       takayama    1: /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
                      2:
                      3: Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
                      4:
                      5: This file is part of the GNU MP Library.
                      6:
                      7: The GNU MP Library is free software; you can redistribute it and/or modify
                      8: it under the terms of the GNU Library General Public License as published by
                      9: the Free Software Foundation; either version 2 of the License, or (at your
                     10: option) any later version.
                     11:
                     12: The GNU MP Library is distributed in the hope that it will be useful, but
                     13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
                     15: License for more details.
                     16:
                     17: You should have received a copy of the GNU Library General Public License
                     18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: MA 02111-1307, USA. */
                     21:
                     22: /* Integer greatest common divisor of two unsigned integers, using
                     23:    the accelerated algorithm (see reference below).
                     24:
                     25:    mp_size_t mpn_gcd (vp, vsize, up, usize).
                     26:
                     27:    Preconditions [U = (up, usize) and V = (vp, vsize)]:
                     28:
                     29:    1.  V is odd.
                     30:    2.  numbits(U) >= numbits(V).
                     31:
                     32:    Both U and V are destroyed by the operation.  The result is left at vp,
                     33:    and its size is returned.
                     34:
                     35:    Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
                     36:
                     37:    Funding for this work has been partially provided by Conselho Nacional
                     38:    de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
                     39:    301314194-2, and was done while I was a visiting reseacher in the Instituto
                     40:    de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
                     41:
                     42:    Refer to
                     43:        K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
                     44:        Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
                     45:
                     46: #include "gmp.h"
                     47: #include "gmp-impl.h"
                     48: #include "longlong.h"
                     49:
                     50: /* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is
                     51:    used, otherwise the binary algorithm is used.  This may be adjusted for
                     52:    different architectures.  */
                     53: #ifndef ACCEL_THRESHOLD
                     54: #define ACCEL_THRESHOLD 4
                     55: #endif
                     56:
                     57: /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
                     58:    algorithm reduces using the bmod operation.  Otherwise, the k-ary reduction
                     59:    is used.  0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB.  */
                     60: enum
                     61:   {
                     62:     BMOD_THRESHOLD = BITS_PER_MP_LIMB/2
                     63:   };
                     64:
                     65: #define SIGN_BIT  (~(~(mp_limb_t)0 >> 1))
                     66:
                     67:
                     68: #define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0)
                     69: #define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0)
                     70: #define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0)
                     71: #define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0)
                     72:
                     73: /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
                     74:    Both U and V must be odd.  */
                     75: static __gmp_inline mp_size_t
                     76: #if __STDC__
                     77: gcd_2 (mp_ptr vp, mp_srcptr up)
                     78: #else
                     79: gcd_2 (vp, up)
                     80:      mp_ptr vp;
                     81:      mp_srcptr up;
                     82: #endif
                     83: {
                     84:   mp_limb_t u0, u1, v0, v1;
                     85:   mp_size_t vsize;
                     86:
                     87:   u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
                     88:
                     89:   while (u1 != v1 && u0 != v0)
                     90:     {
                     91:       unsigned long int r;
                     92:       if (u1 > v1)
                     93:        {
                     94:          u1 -= v1 + (u0 < v0), u0 -= v0;
                     95:          count_trailing_zeros (r, u0);
                     96:          u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
                     97:          u1 >>= r;
                     98:        }
                     99:       else  /* u1 < v1.  */
                    100:        {
                    101:          v1 -= u1 + (v0 < u0), v0 -= u0;
                    102:          count_trailing_zeros (r, v0);
                    103:          v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
                    104:          v1 >>= r;
                    105:        }
                    106:     }
                    107:
                    108:   vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
                    109:
                    110:   /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
                    111:   if (u1 == v1 && u0 == v0)
                    112:     return vsize;
                    113:
                    114:   v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
                    115:   vp[0] = mpn_gcd_1 (vp, vsize, v0);
                    116:
                    117:   return 1;
                    118: }
                    119:
                    120: /* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
                    121:    0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
                    122:    In the reference article, D was computed along with N, but it is better to
                    123:    compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
                    124:    the result as a twos' complement signed integer.
                    125:
                    126:    Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB).  According to the reference
                    127:    article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
                    128:    2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double
                    129:    precision.  If N2 > N1 initially, the first iteration of the while loop
                    130:    will swap them.  In all other situations, N1 >= N2 is maintained.  */
                    131:
                    132: static __gmp_inline mp_limb_t
                    133: #if __STDC__
                    134: find_a (mp_srcptr cp)
                    135: #else
                    136: find_a (cp)
                    137:      mp_srcptr cp;
                    138: #endif
                    139: {
                    140:   unsigned long int leading_zero_bits = 0;
                    141:
                    142:   mp_limb_t n1_l = cp[0];      /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l.  */
                    143:   mp_limb_t n1_h = cp[1];
                    144:
                    145:   mp_limb_t n2_l = -n1_l;      /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l.  */
                    146:   mp_limb_t n2_h = ~n1_h;
                    147:
                    148:   /* Main loop.  */
                    149:   while (n2_h)                 /* While N2 >= 2^BITS_PER_MP_LIMB.  */
                    150:     {
                    151:       /* N1 <-- N1 % N2.  */
                    152:       if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0)
                    153:        {
                    154:          unsigned long int i;
                    155:          count_leading_zeros (i, n2_h);
                    156:          i -= leading_zero_bits, leading_zero_bits += i;
                    157:          n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
                    158:          do
                    159:            {
                    160:              if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
                    161:                n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
                    162:              n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
                    163:              i -= 1;
                    164:            }
                    165:          while (i);
                    166:        }
                    167:       if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
                    168:        n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
                    169:
                    170:       SWAP_LIMB (n1_h, n2_h);
                    171:       SWAP_LIMB (n1_l, n2_l);
                    172:     }
                    173:
                    174:   return n2_l;
                    175: }
                    176:
                    177: mp_size_t
                    178: #if __STDC__
                    179: mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize)
                    180: #else
                    181: mpn_gcd (gp, vp, vsize, up, usize)
                    182:      mp_ptr gp;
                    183:      mp_ptr vp;
                    184:      mp_size_t vsize;
                    185:      mp_ptr up;
                    186:      mp_size_t usize;
                    187: #endif
                    188: {
                    189:   mp_ptr orig_vp = vp;
                    190:   mp_size_t orig_vsize = vsize;
                    191:   int binary_gcd_ctr;          /* Number of times binary gcd will execute.  */
                    192:   TMP_DECL (marker);
                    193:
                    194:   TMP_MARK (marker);
                    195:
                    196:   /* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD.
                    197:      Two EXTRA limbs for U and V are required for kary reduction.  */
                    198:   if (vsize > ACCEL_THRESHOLD)
                    199:     {
                    200:       unsigned long int vbitsize, d;
                    201:       mp_ptr orig_up = up;
                    202:       mp_size_t orig_usize = usize;
                    203:       mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
                    204:
                    205:       MPN_COPY (anchor_up, orig_up, usize);
                    206:       up = anchor_up;
                    207:
                    208:       count_leading_zeros (d, up[usize-1]);
                    209:       d = usize * BITS_PER_MP_LIMB - d;
                    210:       count_leading_zeros (vbitsize, vp[vsize-1]);
                    211:       vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
                    212:       d = d - vbitsize + 1;
                    213:
                    214:       /* Use bmod reduction to quickly discover whether V divides U.  */
                    215:       up[usize++] = 0;                         /* Insert leading zero.  */
                    216:       mpn_bdivmod (up, up, usize, vp, vsize, d);
                    217:
                    218:       /* Now skip U/V mod 2^d and any low zero limbs.  */
                    219:       d /= BITS_PER_MP_LIMB, up += d, usize -= d;
                    220:       while (usize != 0 && up[0] == 0)
                    221:        up++, usize--;
                    222:
                    223:       if (usize == 0)                          /* GCD == ORIG_V.  */
                    224:        goto done;
                    225:
                    226:       vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
                    227:       MPN_COPY (vp, orig_vp, vsize);
                    228:
                    229:       do                                       /* Main loop.  */
                    230:        {
                    231:          if (up[usize-1] & SIGN_BIT)           /* U < 0; take twos' compl. */
                    232:            {
                    233:              mp_size_t i;
                    234:              anchor_up[0] = -up[0];
                    235:              for (i = 1; i < usize; i++)
                    236:                anchor_up[i] = ~up[i];
                    237:              up = anchor_up;
                    238:            }
                    239:
                    240:          MPN_NORMALIZE_NOT_ZERO (up, usize);
                    241:
                    242:          if ((up[0] & 1) == 0)                 /* Result even; remove twos. */
                    243:            {
                    244:              unsigned long int r;
                    245:              count_trailing_zeros (r, up[0]);
                    246:              mpn_rshift (anchor_up, up, usize, r);
                    247:              usize -= (anchor_up[usize-1] == 0);
                    248:            }
                    249:          else if (anchor_up != up)
                    250:            MPN_COPY (anchor_up, up, usize);
                    251:
                    252:          SWAP_MPN (anchor_up, usize, vp, vsize);
                    253:          up = anchor_up;
                    254:
                    255:          if (vsize <= 2)               /* Kary can't handle < 2 limbs and  */
                    256:            break;                      /* isn't efficient for == 2 limbs.  */
                    257:
                    258:          d = vbitsize;
                    259:          count_leading_zeros (vbitsize, vp[vsize-1]);
                    260:          vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
                    261:          d = d - vbitsize + 1;
                    262:
                    263:          if (d > BMOD_THRESHOLD)       /* Bmod reduction.  */
                    264:            {
                    265:              up[usize++] = 0;
                    266:              mpn_bdivmod (up, up, usize, vp, vsize, d);
                    267:              d /= BITS_PER_MP_LIMB, up += d, usize -= d;
                    268:            }
                    269:          else                          /* Kary reduction.  */
                    270:            {
                    271:              mp_limb_t bp[2], cp[2];
                    272:
                    273:              /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB).  */
                    274:              cp[0] = vp[0], cp[1] = vp[1];
                    275:              mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB);
                    276:
                    277:              /* U <-- find_a (C)  *  U.  */
                    278:              up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
                    279:              usize++;
                    280:
                    281:              /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
                    282:                  bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
                    283:                  bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */
                    284:              bp[0] = up[0], bp[1] = up[1];
                    285:              mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB);
                    286:              bp[1] &= 1;       /* Since V is odd, division is unnecessary.  */
                    287:
                    288:              up[usize++] = 0;
                    289:              if (bp[1])        /* B < 0: U <-- U + (-B)  * V.  */
                    290:                {
                    291:                   mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
                    292:                   mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
                    293:                }
                    294:              else              /* B >= 0:  U <-- U - B * V.  */
                    295:                {
                    296:                  mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
                    297:                  mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
                    298:                }
                    299:
                    300:              up += 2, usize -= 2;  /* At least two low limbs are zero.  */
                    301:            }
                    302:
                    303:          /* Must remove low zero limbs before complementing.  */
                    304:          while (usize != 0 && up[0] == 0)
                    305:            up++, usize--;
                    306:        }
                    307:       while (usize);
                    308:
                    309:       /* Compute GCD (ORIG_V, GCD (ORIG_U, V)).  Binary will execute twice.  */
                    310:       up = orig_up, usize = orig_usize;
                    311:       binary_gcd_ctr = 2;
                    312:     }
                    313:   else
                    314:     binary_gcd_ctr = 1;
                    315:
                    316:   /* Finish up with the binary algorithm.  Executes once or twice.  */
                    317:   for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
                    318:     {
                    319:       if (usize > 2)           /* First make U close to V in size.  */
                    320:        {
                    321:          unsigned long int vbitsize, d;
                    322:          count_leading_zeros (d, up[usize-1]);
                    323:          d = usize * BITS_PER_MP_LIMB - d;
                    324:          count_leading_zeros (vbitsize, vp[vsize-1]);
                    325:          vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
                    326:          d = d - vbitsize - 1;
                    327:          if (d != -(unsigned long int)1 && d > 2)
                    328:            {
                    329:              mpn_bdivmod (up, up, usize, vp, vsize, d);  /* Result > 0.  */
                    330:              d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
                    331:            }
                    332:        }
                    333:
                    334:       /* Start binary GCD.  */
                    335:       do
                    336:        {
                    337:          mp_size_t zeros;
                    338:
                    339:          /* Make sure U is odd.  */
                    340:          MPN_NORMALIZE (up, usize);
                    341:          while (up[0] == 0)
                    342:            up += 1, usize -= 1;
                    343:          if ((up[0] & 1) == 0)
                    344:            {
                    345:              unsigned long int r;
                    346:              count_trailing_zeros (r, up[0]);
                    347:              mpn_rshift (up, up, usize, r);
                    348:              usize -= (up[usize-1] == 0);
                    349:            }
                    350:
                    351:          /* Keep usize >= vsize.  */
                    352:          if (usize < vsize)
                    353:            SWAP_MPN (up, usize, vp, vsize);
                    354:
                    355:          if (usize <= 2)                               /* Double precision. */
                    356:            {
                    357:              if (vsize == 1)
                    358:                vp[0] = mpn_gcd_1 (up, usize, vp[0]);
                    359:              else
                    360:                vsize = gcd_2 (vp, up);
                    361:              break;                                    /* Binary GCD done.  */
                    362:            }
                    363:
                    364:          /* Count number of low zero limbs of U - V.  */
                    365:          for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
                    366:            continue;
                    367:
                    368:          /* If U < V, swap U and V; in any case, subtract V from U.  */
                    369:          if (zeros == vsize)                           /* Subtract done.  */
                    370:            up += zeros, usize -= zeros;
                    371:          else if (usize == vsize)
                    372:            {
                    373:              mp_size_t size = vsize;
                    374:              do
                    375:                size--;
                    376:              while (up[size] == vp[size]);
                    377:              if (up[size] < vp[size])                  /* usize == vsize.  */
                    378:                SWAP_PTR (up, vp);
                    379:              up += zeros, usize = size + 1 - zeros;
                    380:              mpn_sub_n (up, up, vp + zeros, usize);
                    381:            }
                    382:          else
                    383:            {
                    384:              mp_size_t size = vsize - zeros;
                    385:              up += zeros, usize -= zeros;
                    386:              if (mpn_sub_n (up, up, vp + zeros, size))
                    387:                {
                    388:                  while (up[size] == 0)                 /* Propagate borrow. */
                    389:                    up[size++] = -(mp_limb_t)1;
                    390:                  up[size] -= 1;
                    391:                }
                    392:            }
                    393:        }
                    394:       while (usize);                                   /* End binary GCD.  */
                    395:     }
                    396:
                    397: done:
                    398:   if (vp != gp)
                    399:     MPN_COPY (gp, vp, vsize);
                    400:   TMP_FREE (marker);
                    401:   return vsize;
                    402: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>