Annotation of OpenXM_contrib/gmp/mpn/generic/gcd.c, Revision 1.1
1.1 ! maekawa 1: /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
! 2:
! 3: Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
! 4:
! 5: This file is part of the GNU MP Library.
! 6:
! 7: The GNU MP Library is free software; you can redistribute it and/or modify
! 8: it under the terms of the GNU Library General Public License as published by
! 9: the Free Software Foundation; either version 2 of the License, or (at your
! 10: option) any later version.
! 11:
! 12: The GNU MP Library is distributed in the hope that it will be useful, but
! 13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 15: License for more details.
! 16:
! 17: You should have received a copy of the GNU Library General Public License
! 18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: MA 02111-1307, USA. */
! 21:
! 22: /* Integer greatest common divisor of two unsigned integers, using
! 23: the accelerated algorithm (see reference below).
! 24:
! 25: mp_size_t mpn_gcd (vp, vsize, up, usize).
! 26:
! 27: Preconditions [U = (up, usize) and V = (vp, vsize)]:
! 28:
! 29: 1. V is odd.
! 30: 2. numbits(U) >= numbits(V).
! 31:
! 32: Both U and V are destroyed by the operation. The result is left at vp,
! 33: and its size is returned.
! 34:
! 35: Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
! 36:
! 37: Funding for this work has been partially provided by Conselho Nacional
! 38: de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
! 39: 301314194-2, and was done while I was a visiting reseacher in the Instituto
! 40: de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
! 41:
! 42: Refer to
! 43: K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
! 44: Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
! 45:
! 46: #include "gmp.h"
! 47: #include "gmp-impl.h"
! 48: #include "longlong.h"
! 49:
! 50: /* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is
! 51: used, otherwise the binary algorithm is used. This may be adjusted for
! 52: different architectures. */
! 53: #ifndef ACCEL_THRESHOLD
! 54: #define ACCEL_THRESHOLD 4
! 55: #endif
! 56:
! 57: /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
! 58: algorithm reduces using the bmod operation. Otherwise, the k-ary reduction
! 59: is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */
! 60: enum
! 61: {
! 62: BMOD_THRESHOLD = BITS_PER_MP_LIMB/2
! 63: };
! 64:
! 65: #define SIGN_BIT (~(~(mp_limb_t)0 >> 1))
! 66:
! 67:
! 68: #define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0)
! 69: #define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0)
! 70: #define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0)
! 71: #define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0)
! 72:
! 73: /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
! 74: Both U and V must be odd. */
! 75: static __gmp_inline mp_size_t
! 76: #if __STDC__
! 77: gcd_2 (mp_ptr vp, mp_srcptr up)
! 78: #else
! 79: gcd_2 (vp, up)
! 80: mp_ptr vp;
! 81: mp_srcptr up;
! 82: #endif
! 83: {
! 84: mp_limb_t u0, u1, v0, v1;
! 85: mp_size_t vsize;
! 86:
! 87: u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
! 88:
! 89: while (u1 != v1 && u0 != v0)
! 90: {
! 91: unsigned long int r;
! 92: if (u1 > v1)
! 93: {
! 94: u1 -= v1 + (u0 < v0), u0 -= v0;
! 95: count_trailing_zeros (r, u0);
! 96: u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
! 97: u1 >>= r;
! 98: }
! 99: else /* u1 < v1. */
! 100: {
! 101: v1 -= u1 + (v0 < u0), v0 -= u0;
! 102: count_trailing_zeros (r, v0);
! 103: v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
! 104: v1 >>= r;
! 105: }
! 106: }
! 107:
! 108: vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
! 109:
! 110: /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
! 111: if (u1 == v1 && u0 == v0)
! 112: return vsize;
! 113:
! 114: v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
! 115: vp[0] = mpn_gcd_1 (vp, vsize, v0);
! 116:
! 117: return 1;
! 118: }
! 119:
! 120: /* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
! 121: 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
! 122: In the reference article, D was computed along with N, but it is better to
! 123: compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
! 124: the result as a twos' complement signed integer.
! 125:
! 126: Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference
! 127: article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
! 128: 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double
! 129: precision. If N2 > N1 initially, the first iteration of the while loop
! 130: will swap them. In all other situations, N1 >= N2 is maintained. */
! 131:
! 132: static __gmp_inline mp_limb_t
! 133: #if __STDC__
! 134: find_a (mp_srcptr cp)
! 135: #else
! 136: find_a (cp)
! 137: mp_srcptr cp;
! 138: #endif
! 139: {
! 140: unsigned long int leading_zero_bits = 0;
! 141:
! 142: mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */
! 143: mp_limb_t n1_h = cp[1];
! 144:
! 145: mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */
! 146: mp_limb_t n2_h = ~n1_h;
! 147:
! 148: /* Main loop. */
! 149: while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */
! 150: {
! 151: /* N1 <-- N1 % N2. */
! 152: if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0)
! 153: {
! 154: unsigned long int i;
! 155: count_leading_zeros (i, n2_h);
! 156: i -= leading_zero_bits, leading_zero_bits += i;
! 157: n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
! 158: do
! 159: {
! 160: if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
! 161: n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
! 162: n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
! 163: i -= 1;
! 164: }
! 165: while (i);
! 166: }
! 167: if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
! 168: n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
! 169:
! 170: SWAP_LIMB (n1_h, n2_h);
! 171: SWAP_LIMB (n1_l, n2_l);
! 172: }
! 173:
! 174: return n2_l;
! 175: }
! 176:
! 177: mp_size_t
! 178: #if __STDC__
! 179: mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize)
! 180: #else
! 181: mpn_gcd (gp, vp, vsize, up, usize)
! 182: mp_ptr gp;
! 183: mp_ptr vp;
! 184: mp_size_t vsize;
! 185: mp_ptr up;
! 186: mp_size_t usize;
! 187: #endif
! 188: {
! 189: mp_ptr orig_vp = vp;
! 190: mp_size_t orig_vsize = vsize;
! 191: int binary_gcd_ctr; /* Number of times binary gcd will execute. */
! 192: TMP_DECL (marker);
! 193:
! 194: TMP_MARK (marker);
! 195:
! 196: /* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD.
! 197: Two EXTRA limbs for U and V are required for kary reduction. */
! 198: if (vsize > ACCEL_THRESHOLD)
! 199: {
! 200: unsigned long int vbitsize, d;
! 201: mp_ptr orig_up = up;
! 202: mp_size_t orig_usize = usize;
! 203: mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
! 204:
! 205: MPN_COPY (anchor_up, orig_up, usize);
! 206: up = anchor_up;
! 207:
! 208: count_leading_zeros (d, up[usize-1]);
! 209: d = usize * BITS_PER_MP_LIMB - d;
! 210: count_leading_zeros (vbitsize, vp[vsize-1]);
! 211: vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
! 212: d = d - vbitsize + 1;
! 213:
! 214: /* Use bmod reduction to quickly discover whether V divides U. */
! 215: up[usize++] = 0; /* Insert leading zero. */
! 216: mpn_bdivmod (up, up, usize, vp, vsize, d);
! 217:
! 218: /* Now skip U/V mod 2^d and any low zero limbs. */
! 219: d /= BITS_PER_MP_LIMB, up += d, usize -= d;
! 220: while (usize != 0 && up[0] == 0)
! 221: up++, usize--;
! 222:
! 223: if (usize == 0) /* GCD == ORIG_V. */
! 224: goto done;
! 225:
! 226: vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
! 227: MPN_COPY (vp, orig_vp, vsize);
! 228:
! 229: do /* Main loop. */
! 230: {
! 231: if (up[usize-1] & SIGN_BIT) /* U < 0; take twos' compl. */
! 232: {
! 233: mp_size_t i;
! 234: anchor_up[0] = -up[0];
! 235: for (i = 1; i < usize; i++)
! 236: anchor_up[i] = ~up[i];
! 237: up = anchor_up;
! 238: }
! 239:
! 240: MPN_NORMALIZE_NOT_ZERO (up, usize);
! 241:
! 242: if ((up[0] & 1) == 0) /* Result even; remove twos. */
! 243: {
! 244: unsigned long int r;
! 245: count_trailing_zeros (r, up[0]);
! 246: mpn_rshift (anchor_up, up, usize, r);
! 247: usize -= (anchor_up[usize-1] == 0);
! 248: }
! 249: else if (anchor_up != up)
! 250: MPN_COPY (anchor_up, up, usize);
! 251:
! 252: SWAP_MPN (anchor_up, usize, vp, vsize);
! 253: up = anchor_up;
! 254:
! 255: if (vsize <= 2) /* Kary can't handle < 2 limbs and */
! 256: break; /* isn't efficient for == 2 limbs. */
! 257:
! 258: d = vbitsize;
! 259: count_leading_zeros (vbitsize, vp[vsize-1]);
! 260: vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
! 261: d = d - vbitsize + 1;
! 262:
! 263: if (d > BMOD_THRESHOLD) /* Bmod reduction. */
! 264: {
! 265: up[usize++] = 0;
! 266: mpn_bdivmod (up, up, usize, vp, vsize, d);
! 267: d /= BITS_PER_MP_LIMB, up += d, usize -= d;
! 268: }
! 269: else /* Kary reduction. */
! 270: {
! 271: mp_limb_t bp[2], cp[2];
! 272:
! 273: /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */
! 274: cp[0] = vp[0], cp[1] = vp[1];
! 275: mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB);
! 276:
! 277: /* U <-- find_a (C) * U. */
! 278: up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
! 279: usize++;
! 280:
! 281: /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
! 282: bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
! 283: bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */
! 284: bp[0] = up[0], bp[1] = up[1];
! 285: mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB);
! 286: bp[1] &= 1; /* Since V is odd, division is unnecessary. */
! 287:
! 288: up[usize++] = 0;
! 289: if (bp[1]) /* B < 0: U <-- U + (-B) * V. */
! 290: {
! 291: mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
! 292: mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
! 293: }
! 294: else /* B >= 0: U <-- U - B * V. */
! 295: {
! 296: mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
! 297: mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
! 298: }
! 299:
! 300: up += 2, usize -= 2; /* At least two low limbs are zero. */
! 301: }
! 302:
! 303: /* Must remove low zero limbs before complementing. */
! 304: while (usize != 0 && up[0] == 0)
! 305: up++, usize--;
! 306: }
! 307: while (usize);
! 308:
! 309: /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */
! 310: up = orig_up, usize = orig_usize;
! 311: binary_gcd_ctr = 2;
! 312: }
! 313: else
! 314: binary_gcd_ctr = 1;
! 315:
! 316: /* Finish up with the binary algorithm. Executes once or twice. */
! 317: for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
! 318: {
! 319: if (usize > 2) /* First make U close to V in size. */
! 320: {
! 321: unsigned long int vbitsize, d;
! 322: count_leading_zeros (d, up[usize-1]);
! 323: d = usize * BITS_PER_MP_LIMB - d;
! 324: count_leading_zeros (vbitsize, vp[vsize-1]);
! 325: vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
! 326: d = d - vbitsize - 1;
! 327: if (d != -(unsigned long int)1 && d > 2)
! 328: {
! 329: mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */
! 330: d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
! 331: }
! 332: }
! 333:
! 334: /* Start binary GCD. */
! 335: do
! 336: {
! 337: mp_size_t zeros;
! 338:
! 339: /* Make sure U is odd. */
! 340: MPN_NORMALIZE (up, usize);
! 341: while (up[0] == 0)
! 342: up += 1, usize -= 1;
! 343: if ((up[0] & 1) == 0)
! 344: {
! 345: unsigned long int r;
! 346: count_trailing_zeros (r, up[0]);
! 347: mpn_rshift (up, up, usize, r);
! 348: usize -= (up[usize-1] == 0);
! 349: }
! 350:
! 351: /* Keep usize >= vsize. */
! 352: if (usize < vsize)
! 353: SWAP_MPN (up, usize, vp, vsize);
! 354:
! 355: if (usize <= 2) /* Double precision. */
! 356: {
! 357: if (vsize == 1)
! 358: vp[0] = mpn_gcd_1 (up, usize, vp[0]);
! 359: else
! 360: vsize = gcd_2 (vp, up);
! 361: break; /* Binary GCD done. */
! 362: }
! 363:
! 364: /* Count number of low zero limbs of U - V. */
! 365: for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
! 366: continue;
! 367:
! 368: /* If U < V, swap U and V; in any case, subtract V from U. */
! 369: if (zeros == vsize) /* Subtract done. */
! 370: up += zeros, usize -= zeros;
! 371: else if (usize == vsize)
! 372: {
! 373: mp_size_t size = vsize;
! 374: do
! 375: size--;
! 376: while (up[size] == vp[size]);
! 377: if (up[size] < vp[size]) /* usize == vsize. */
! 378: SWAP_PTR (up, vp);
! 379: up += zeros, usize = size + 1 - zeros;
! 380: mpn_sub_n (up, up, vp + zeros, usize);
! 381: }
! 382: else
! 383: {
! 384: mp_size_t size = vsize - zeros;
! 385: up += zeros, usize -= zeros;
! 386: if (mpn_sub_n (up, up, vp + zeros, size))
! 387: {
! 388: while (up[size] == 0) /* Propagate borrow. */
! 389: up[size++] = -(mp_limb_t)1;
! 390: up[size] -= 1;
! 391: }
! 392: }
! 393: }
! 394: while (usize); /* End binary GCD. */
! 395: }
! 396:
! 397: done:
! 398: if (vp != gp)
! 399: MPN_COPY (gp, vp, vsize);
! 400: TMP_FREE (marker);
! 401: return vsize;
! 402: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>