OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/generic/gcd.c - annotate

Return to gcd.c CVS log
Up to [local] / OpenXM / src / kan96xx / gmp-2.0.2-ssh-2 / mpn / generic
Annotation of OpenXM/src/kan96xx/gmp-2.0.2-ssh-2/mpn/generic/gcd.c, Revision 1.1

1.1     ! takayama    1: /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
        !             2:
        !             3: Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
        !             4:
        !             5: This file is part of the GNU MP Library.
        !             6:
        !             7: The GNU MP Library is free software; you can redistribute it and/or modify
        !             8: it under the terms of the GNU Library General Public License as published by
        !             9: the Free Software Foundation; either version 2 of the License, or (at your
        !            10: option) any later version.
        !            11:
        !            12: The GNU MP Library is distributed in the hope that it will be useful, but
        !            13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
        !            15: License for more details.
        !            16:
        !            17: You should have received a copy of the GNU Library General Public License
        !            18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            20: MA 02111-1307, USA. */
        !            21:
        !            22: /* Integer greatest common divisor of two unsigned integers, using
        !            23:    the accelerated algorithm (see reference below).
        !            24:
        !            25:    mp_size_t mpn_gcd (vp, vsize, up, usize).
        !            26:
        !            27:    Preconditions [U = (up, usize) and V = (vp, vsize)]:
        !            28:
        !            29:    1.  V is odd.
        !            30:    2.  numbits(U) >= numbits(V).
        !            31:
        !            32:    Both U and V are destroyed by the operation.  The result is left at vp,
        !            33:    and its size is returned.
        !            34:
        !            35:    Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
        !            36:
        !            37:    Funding for this work has been partially provided by Conselho Nacional
        !            38:    de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
        !            39:    301314194-2, and was done while I was a visiting reseacher in the Instituto
        !            40:    de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
        !            41:
        !            42:    Refer to
        !            43:        K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
        !            44:        Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
        !            45:
        !            46: #include "gmp.h"
        !            47: #include "gmp-impl.h"
        !            48: #include "longlong.h"
        !            49:
        !            50: /* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is
        !            51:    used, otherwise the binary algorithm is used.  This may be adjusted for
        !            52:    different architectures.  */
        !            53: #ifndef ACCEL_THRESHOLD
        !            54: #define ACCEL_THRESHOLD 4
        !            55: #endif
        !            56:
        !            57: /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
        !            58:    algorithm reduces using the bmod operation.  Otherwise, the k-ary reduction
        !            59:    is used.  0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB.  */
        !            60: enum
        !            61:   {
        !            62:     BMOD_THRESHOLD = BITS_PER_MP_LIMB/2
        !            63:   };
        !            64:
        !            65: #define SIGN_BIT  (~(~(mp_limb_t)0 >> 1))
        !            66:
        !            67:
        !            68: #define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0)
        !            69: #define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0)
        !            70: #define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0)
        !            71: #define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0)
        !            72:
        !            73: /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
        !            74:    Both U and V must be odd.  */
        !            75: static __gmp_inline mp_size_t
        !            76: #if __STDC__
        !            77: gcd_2 (mp_ptr vp, mp_srcptr up)
        !            78: #else
        !            79: gcd_2 (vp, up)
        !            80:      mp_ptr vp;
        !            81:      mp_srcptr up;
        !            82: #endif
        !            83: {
        !            84:   mp_limb_t u0, u1, v0, v1;
        !            85:   mp_size_t vsize;
        !            86:
        !            87:   u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
        !            88:
        !            89:   while (u1 != v1 && u0 != v0)
        !            90:     {
        !            91:       unsigned long int r;
        !            92:       if (u1 > v1)
        !            93:        {
        !            94:          u1 -= v1 + (u0 < v0), u0 -= v0;
        !            95:          count_trailing_zeros (r, u0);
        !            96:          u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
        !            97:          u1 >>= r;
        !            98:        }
        !            99:       else  /* u1 < v1.  */
        !           100:        {
        !           101:          v1 -= u1 + (v0 < u0), v0 -= u0;
        !           102:          count_trailing_zeros (r, v0);
        !           103:          v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
        !           104:          v1 >>= r;
        !           105:        }
        !           106:     }
        !           107:
        !           108:   vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
        !           109:
        !           110:   /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
        !           111:   if (u1 == v1 && u0 == v0)
        !           112:     return vsize;
        !           113:
        !           114:   v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
        !           115:   vp[0] = mpn_gcd_1 (vp, vsize, v0);
        !           116:
        !           117:   return 1;
        !           118: }
        !           119:
        !           120: /* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
        !           121:    0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
        !           122:    In the reference article, D was computed along with N, but it is better to
        !           123:    compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
        !           124:    the result as a twos' complement signed integer.
        !           125:
        !           126:    Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB).  According to the reference
        !           127:    article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
        !           128:    2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double
        !           129:    precision.  If N2 > N1 initially, the first iteration of the while loop
        !           130:    will swap them.  In all other situations, N1 >= N2 is maintained.  */
        !           131:
        !           132: static __gmp_inline mp_limb_t
        !           133: #if __STDC__
        !           134: find_a (mp_srcptr cp)
        !           135: #else
        !           136: find_a (cp)
        !           137:      mp_srcptr cp;
        !           138: #endif
        !           139: {
        !           140:   unsigned long int leading_zero_bits = 0;
        !           141:
        !           142:   mp_limb_t n1_l = cp[0];      /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l.  */
        !           143:   mp_limb_t n1_h = cp[1];
        !           144:
        !           145:   mp_limb_t n2_l = -n1_l;      /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l.  */
        !           146:   mp_limb_t n2_h = ~n1_h;
        !           147:
        !           148:   /* Main loop.  */
        !           149:   while (n2_h)                 /* While N2 >= 2^BITS_PER_MP_LIMB.  */
        !           150:     {
        !           151:       /* N1 <-- N1 % N2.  */
        !           152:       if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0)
        !           153:        {
        !           154:          unsigned long int i;
        !           155:          count_leading_zeros (i, n2_h);
        !           156:          i -= leading_zero_bits, leading_zero_bits += i;
        !           157:          n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
        !           158:          do
        !           159:            {
        !           160:              if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
        !           161:                n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
        !           162:              n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
        !           163:              i -= 1;
        !           164:            }
        !           165:          while (i);
        !           166:        }
        !           167:       if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
        !           168:        n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
        !           169:
        !           170:       SWAP_LIMB (n1_h, n2_h);
        !           171:       SWAP_LIMB (n1_l, n2_l);
        !           172:     }
        !           173:
        !           174:   return n2_l;
        !           175: }
        !           176:
        !           177: mp_size_t
        !           178: #if __STDC__
        !           179: mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize)
        !           180: #else
        !           181: mpn_gcd (gp, vp, vsize, up, usize)
        !           182:      mp_ptr gp;
        !           183:      mp_ptr vp;
        !           184:      mp_size_t vsize;
        !           185:      mp_ptr up;
        !           186:      mp_size_t usize;
        !           187: #endif
        !           188: {
        !           189:   mp_ptr orig_vp = vp;
        !           190:   mp_size_t orig_vsize = vsize;
        !           191:   int binary_gcd_ctr;          /* Number of times binary gcd will execute.  */
        !           192:   TMP_DECL (marker);
        !           193:
        !           194:   TMP_MARK (marker);
        !           195:
        !           196:   /* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD.
        !           197:      Two EXTRA limbs for U and V are required for kary reduction.  */
        !           198:   if (vsize > ACCEL_THRESHOLD)
        !           199:     {
        !           200:       unsigned long int vbitsize, d;
        !           201:       mp_ptr orig_up = up;
        !           202:       mp_size_t orig_usize = usize;
        !           203:       mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
        !           204:
        !           205:       MPN_COPY (anchor_up, orig_up, usize);
        !           206:       up = anchor_up;
        !           207:
        !           208:       count_leading_zeros (d, up[usize-1]);
        !           209:       d = usize * BITS_PER_MP_LIMB - d;
        !           210:       count_leading_zeros (vbitsize, vp[vsize-1]);
        !           211:       vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
        !           212:       d = d - vbitsize + 1;
        !           213:
        !           214:       /* Use bmod reduction to quickly discover whether V divides U.  */
        !           215:       up[usize++] = 0;                         /* Insert leading zero.  */
        !           216:       mpn_bdivmod (up, up, usize, vp, vsize, d);
        !           217:
        !           218:       /* Now skip U/V mod 2^d and any low zero limbs.  */
        !           219:       d /= BITS_PER_MP_LIMB, up += d, usize -= d;
        !           220:       while (usize != 0 && up[0] == 0)
        !           221:        up++, usize--;
        !           222:
        !           223:       if (usize == 0)                          /* GCD == ORIG_V.  */
        !           224:        goto done;
        !           225:
        !           226:       vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
        !           227:       MPN_COPY (vp, orig_vp, vsize);
        !           228:
        !           229:       do                                       /* Main loop.  */
        !           230:        {
        !           231:          if (up[usize-1] & SIGN_BIT)           /* U < 0; take twos' compl. */
        !           232:            {
        !           233:              mp_size_t i;
        !           234:              anchor_up[0] = -up[0];
        !           235:              for (i = 1; i < usize; i++)
        !           236:                anchor_up[i] = ~up[i];
        !           237:              up = anchor_up;
        !           238:            }
        !           239:
        !           240:          MPN_NORMALIZE_NOT_ZERO (up, usize);
        !           241:
        !           242:          if ((up[0] & 1) == 0)                 /* Result even; remove twos. */
        !           243:            {
        !           244:              unsigned long int r;
        !           245:              count_trailing_zeros (r, up[0]);
        !           246:              mpn_rshift (anchor_up, up, usize, r);
        !           247:              usize -= (anchor_up[usize-1] == 0);
        !           248:            }
        !           249:          else if (anchor_up != up)
        !           250:            MPN_COPY (anchor_up, up, usize);
        !           251:
        !           252:          SWAP_MPN (anchor_up, usize, vp, vsize);
        !           253:          up = anchor_up;
        !           254:
        !           255:          if (vsize <= 2)               /* Kary can't handle < 2 limbs and  */
        !           256:            break;                      /* isn't efficient for == 2 limbs.  */
        !           257:
        !           258:          d = vbitsize;
        !           259:          count_leading_zeros (vbitsize, vp[vsize-1]);
        !           260:          vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
        !           261:          d = d - vbitsize + 1;
        !           262:
        !           263:          if (d > BMOD_THRESHOLD)       /* Bmod reduction.  */
        !           264:            {
        !           265:              up[usize++] = 0;
        !           266:              mpn_bdivmod (up, up, usize, vp, vsize, d);
        !           267:              d /= BITS_PER_MP_LIMB, up += d, usize -= d;
        !           268:            }
        !           269:          else                          /* Kary reduction.  */
        !           270:            {
        !           271:              mp_limb_t bp[2], cp[2];
        !           272:
        !           273:              /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB).  */
        !           274:              cp[0] = vp[0], cp[1] = vp[1];
        !           275:              mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB);
        !           276:
        !           277:              /* U <-- find_a (C)  *  U.  */
        !           278:              up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
        !           279:              usize++;
        !           280:
        !           281:              /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
        !           282:                  bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
        !           283:                  bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */
        !           284:              bp[0] = up[0], bp[1] = up[1];
        !           285:              mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB);
        !           286:              bp[1] &= 1;       /* Since V is odd, division is unnecessary.  */
        !           287:
        !           288:              up[usize++] = 0;
        !           289:              if (bp[1])        /* B < 0: U <-- U + (-B)  * V.  */
        !           290:                {
        !           291:                   mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
        !           292:                   mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
        !           293:                }
        !           294:              else              /* B >= 0:  U <-- U - B * V.  */
        !           295:                {
        !           296:                  mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
        !           297:                  mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
        !           298:                }
        !           299:
        !           300:              up += 2, usize -= 2;  /* At least two low limbs are zero.  */
        !           301:            }
        !           302:
        !           303:          /* Must remove low zero limbs before complementing.  */
        !           304:          while (usize != 0 && up[0] == 0)
        !           305:            up++, usize--;
        !           306:        }
        !           307:       while (usize);
        !           308:
        !           309:       /* Compute GCD (ORIG_V, GCD (ORIG_U, V)).  Binary will execute twice.  */
        !           310:       up = orig_up, usize = orig_usize;
        !           311:       binary_gcd_ctr = 2;
        !           312:     }
        !           313:   else
        !           314:     binary_gcd_ctr = 1;
        !           315:
        !           316:   /* Finish up with the binary algorithm.  Executes once or twice.  */
        !           317:   for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
        !           318:     {
        !           319:       if (usize > 2)           /* First make U close to V in size.  */
        !           320:        {
        !           321:          unsigned long int vbitsize, d;
        !           322:          count_leading_zeros (d, up[usize-1]);
        !           323:          d = usize * BITS_PER_MP_LIMB - d;
        !           324:          count_leading_zeros (vbitsize, vp[vsize-1]);
        !           325:          vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
        !           326:          d = d - vbitsize - 1;
        !           327:          if (d != -(unsigned long int)1 && d > 2)
        !           328:            {
        !           329:              mpn_bdivmod (up, up, usize, vp, vsize, d);  /* Result > 0.  */
        !           330:              d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
        !           331:            }
        !           332:        }
        !           333:
        !           334:       /* Start binary GCD.  */
        !           335:       do
        !           336:        {
        !           337:          mp_size_t zeros;
        !           338:
        !           339:          /* Make sure U is odd.  */
        !           340:          MPN_NORMALIZE (up, usize);
        !           341:          while (up[0] == 0)
        !           342:            up += 1, usize -= 1;
        !           343:          if ((up[0] & 1) == 0)
        !           344:            {
        !           345:              unsigned long int r;
        !           346:              count_trailing_zeros (r, up[0]);
        !           347:              mpn_rshift (up, up, usize, r);
        !           348:              usize -= (up[usize-1] == 0);
        !           349:            }
        !           350:
        !           351:          /* Keep usize >= vsize.  */
        !           352:          if (usize < vsize)
        !           353:            SWAP_MPN (up, usize, vp, vsize);
        !           354:
        !           355:          if (usize <= 2)                               /* Double precision. */
        !           356:            {
        !           357:              if (vsize == 1)
        !           358:                vp[0] = mpn_gcd_1 (up, usize, vp[0]);
        !           359:              else
        !           360:                vsize = gcd_2 (vp, up);
        !           361:              break;                                    /* Binary GCD done.  */
        !           362:            }
        !           363:
        !           364:          /* Count number of low zero limbs of U - V.  */
        !           365:          for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
        !           366:            continue;
        !           367:
        !           368:          /* If U < V, swap U and V; in any case, subtract V from U.  */
        !           369:          if (zeros == vsize)                           /* Subtract done.  */
        !           370:            up += zeros, usize -= zeros;
        !           371:          else if (usize == vsize)
        !           372:            {
        !           373:              mp_size_t size = vsize;
        !           374:              do
        !           375:                size--;
        !           376:              while (up[size] == vp[size]);
        !           377:              if (up[size] < vp[size])                  /* usize == vsize.  */
        !           378:                SWAP_PTR (up, vp);
        !           379:              up += zeros, usize = size + 1 - zeros;
        !           380:              mpn_sub_n (up, up, vp + zeros, usize);
        !           381:            }
        !           382:          else
        !           383:            {
        !           384:              mp_size_t size = vsize - zeros;
        !           385:              up += zeros, usize -= zeros;
        !           386:              if (mpn_sub_n (up, up, vp + zeros, size))
        !           387:                {
        !           388:                  while (up[size] == 0)                 /* Propagate borrow. */
        !           389:                    up[size++] = -(mp_limb_t)1;
        !           390:                  up[size] -= 1;
        !           391:                }
        !           392:            }
        !           393:        }
        !           394:       while (usize);                                   /* End binary GCD.  */
        !           395:     }
        !           396:
        !           397: done:
        !           398:   if (vp != gp)
        !           399:     MPN_COPY (gp, vp, vsize);
        !           400:   TMP_FREE (marker);
        !           401:   return vsize;
        !           402: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>