Annotation of OpenXM_contrib/gmp/mpn/generic/gcd.c, Revision 1.1.1.3
1.1 maekawa 1: /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
2:
1.1.1.3 ! ohara 3: Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002 Free
! 4: Software Foundation, Inc.
1.1 maekawa 5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 maekawa 9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 maekawa 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 16: License for more details.
17:
1.1.1.2 maekawa 18: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA. */
22:
23: /* Integer greatest common divisor of two unsigned integers, using
24: the accelerated algorithm (see reference below).
25:
1.1.1.2 maekawa 26: mp_size_t mpn_gcd (up, usize, vp, vsize).
1.1 maekawa 27:
28: Preconditions [U = (up, usize) and V = (vp, vsize)]:
29:
30: 1. V is odd.
31: 2. numbits(U) >= numbits(V).
32:
33: Both U and V are destroyed by the operation. The result is left at vp,
34: and its size is returned.
35:
36: Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
37:
38: Funding for this work has been partially provided by Conselho Nacional
39: de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
40: 301314194-2, and was done while I was a visiting reseacher in the Instituto
41: de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
42:
43: Refer to
44: K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
45: Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
46:
47: #include "gmp.h"
48: #include "gmp-impl.h"
49: #include "longlong.h"
50:
1.1.1.2 maekawa 51: /* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated
52: algorithm is used, otherwise the binary algorithm is used. This may be
53: adjusted for different architectures. */
54: #ifndef GCD_ACCEL_THRESHOLD
55: #define GCD_ACCEL_THRESHOLD 5
1.1 maekawa 56: #endif
57:
58: /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
59: algorithm reduces using the bmod operation. Otherwise, the k-ary reduction
1.1.1.3 ! ohara 60: is used. 0 <= BMOD_THRESHOLD < GMP_NUMB_BITS. */
1.1 maekawa 61: enum
62: {
1.1.1.3 ! ohara 63: BMOD_THRESHOLD = GMP_NUMB_BITS/2
1.1 maekawa 64: };
65:
66:
67: /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
68: Both U and V must be odd. */
1.1.1.3 ! ohara 69: static inline mp_size_t
1.1 maekawa 70: gcd_2 (mp_ptr vp, mp_srcptr up)
71: {
72: mp_limb_t u0, u1, v0, v1;
73: mp_size_t vsize;
74:
1.1.1.3 ! ohara 75: u0 = up[0];
! 76: u1 = up[1];
! 77: v0 = vp[0];
! 78: v1 = vp[1];
1.1 maekawa 79:
80: while (u1 != v1 && u0 != v0)
81: {
82: unsigned long int r;
83: if (u1 > v1)
84: {
1.1.1.3 ! ohara 85: u1 -= v1 + (u0 < v0);
! 86: u0 = (u0 - v0) & GMP_NUMB_MASK;
1.1 maekawa 87: count_trailing_zeros (r, u0);
1.1.1.3 ! ohara 88: u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
1.1 maekawa 89: u1 >>= r;
90: }
91: else /* u1 < v1. */
92: {
1.1.1.3 ! ohara 93: v1 -= u1 + (v0 < u0);
! 94: v0 = (v0 - u0) & GMP_NUMB_MASK;
1.1 maekawa 95: count_trailing_zeros (r, v0);
1.1.1.3 ! ohara 96: v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
1.1 maekawa 97: v1 >>= r;
98: }
99: }
100:
101: vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
102:
103: /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
104: if (u1 == v1 && u0 == v0)
105: return vsize;
106:
107: v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
108: vp[0] = mpn_gcd_1 (vp, vsize, v0);
109:
110: return 1;
111: }
112:
1.1.1.3 ! ohara 113: /* The function find_a finds 0 < N < 2^GMP_NUMB_BITS such that there exists
! 114: 0 < |D| < 2^GMP_NUMB_BITS, and N == D * C mod 2^(2*GMP_NUMB_BITS).
1.1 maekawa 115: In the reference article, D was computed along with N, but it is better to
1.1.1.3 ! ohara 116: compute D separately as D <-- N / C mod 2^(GMP_NUMB_BITS + 1), treating
1.1 maekawa 117: the result as a twos' complement signed integer.
118:
1.1.1.3 ! ohara 119: Initialize N1 to C mod 2^(2*GMP_NUMB_BITS). According to the reference
! 120: article, N2 should be initialized to 2^(2*GMP_NUMB_BITS), but we use
! 121: 2^(2*GMP_NUMB_BITS) - N1 to start the calculations within double
1.1 maekawa 122: precision. If N2 > N1 initially, the first iteration of the while loop
123: will swap them. In all other situations, N1 >= N2 is maintained. */
124:
1.1.1.3 ! ohara 125: #if HAVE_NATIVE_mpn_gcd_finda
! 126: #define find_a(cp) mpn_gcd_finda (cp)
! 127:
! 128: #else
1.1.1.2 maekawa 129: static
130: #if ! defined (__i386__)
1.1.1.3 ! ohara 131: inline /* don't inline this for the x86 */
1.1.1.2 maekawa 132: #endif
133: mp_limb_t
1.1 maekawa 134: find_a (mp_srcptr cp)
135: {
136: unsigned long int leading_zero_bits = 0;
137:
1.1.1.3 ! ohara 138: mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^GMP_NUMB_BITS + n1_l. */
1.1 maekawa 139: mp_limb_t n1_h = cp[1];
140:
1.1.1.3 ! ohara 141: mp_limb_t n2_l = (-n1_l & GMP_NUMB_MASK); /* N2 == n2_h * 2^GMP_NUMB_BITS + n2_l. */
! 142: mp_limb_t n2_h = (~n1_h & GMP_NUMB_MASK);
1.1 maekawa 143:
144: /* Main loop. */
1.1.1.3 ! ohara 145: while (n2_h != 0) /* While N2 >= 2^GMP_NUMB_BITS. */
1.1 maekawa 146: {
147: /* N1 <-- N1 % N2. */
1.1.1.3 ! ohara 148: if (((GMP_NUMB_HIGHBIT >> leading_zero_bits) & n2_h) == 0)
1.1 maekawa 149: {
150: unsigned long int i;
151: count_leading_zeros (i, n2_h);
1.1.1.3 ! ohara 152: i -= GMP_NAIL_BITS;
! 153: i -= leading_zero_bits;
! 154: leading_zero_bits += i;
! 155: n2_h = ((n2_h << i) & GMP_NUMB_MASK) | (n2_l >> (GMP_NUMB_BITS - i));
! 156: n2_l = (n2_l << i) & GMP_NUMB_MASK;
1.1 maekawa 157: do
158: {
159: if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
1.1.1.3 ! ohara 160: {
! 161: n1_h -= n2_h + (n1_l < n2_l);
! 162: n1_l = (n1_l - n2_l) & GMP_NUMB_MASK;
! 163: }
! 164: n2_l = (n2_l >> 1) | ((n2_h << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK);
! 165: n2_h >>= 1;
1.1 maekawa 166: i -= 1;
167: }
1.1.1.3 ! ohara 168: while (i != 0);
1.1 maekawa 169: }
170: if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
1.1.1.3 ! ohara 171: {
! 172: n1_h -= n2_h + (n1_l < n2_l);
! 173: n1_l = (n1_l - n2_l) & GMP_NUMB_MASK;
! 174: }
1.1 maekawa 175:
1.1.1.2 maekawa 176: MP_LIMB_T_SWAP (n1_h, n2_h);
177: MP_LIMB_T_SWAP (n1_l, n2_l);
1.1 maekawa 178: }
179:
180: return n2_l;
181: }
1.1.1.3 ! ohara 182: #endif
! 183:
1.1 maekawa 184:
185: mp_size_t
1.1.1.2 maekawa 186: mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize)
1.1 maekawa 187: {
188: mp_ptr orig_vp = vp;
189: mp_size_t orig_vsize = vsize;
190: int binary_gcd_ctr; /* Number of times binary gcd will execute. */
191: TMP_DECL (marker);
192:
1.1.1.3 ! ohara 193: ASSERT (usize >= 1);
! 194: ASSERT (vsize >= 1);
! 195: ASSERT (usize >= vsize);
! 196: ASSERT (vp[0] & 1);
! 197: ASSERT (up[usize - 1] != 0);
! 198: ASSERT (vp[vsize - 1] != 0);
! 199: #if WANT_ASSERT
! 200: if (usize == vsize)
! 201: {
! 202: int uzeros, vzeros;
! 203: count_leading_zeros (uzeros, up[usize - 1]);
! 204: count_leading_zeros (vzeros, vp[vsize - 1]);
! 205: ASSERT (uzeros <= vzeros);
! 206: }
! 207: #endif
! 208: ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
! 209: ASSERT (MPN_SAME_OR_SEPARATE2_P (gp, vsize, up, usize));
! 210: ASSERT (MPN_SAME_OR_SEPARATE2_P (gp, vsize, vp, vsize));
! 211:
1.1 maekawa 212: TMP_MARK (marker);
213:
1.1.1.2 maekawa 214: /* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD.
1.1 maekawa 215: Two EXTRA limbs for U and V are required for kary reduction. */
1.1.1.2 maekawa 216: if (vsize >= GCD_ACCEL_THRESHOLD)
1.1 maekawa 217: {
218: unsigned long int vbitsize, d;
219: mp_ptr orig_up = up;
220: mp_size_t orig_usize = usize;
221: mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
222:
223: MPN_COPY (anchor_up, orig_up, usize);
224: up = anchor_up;
225:
1.1.1.3 ! ohara 226: count_leading_zeros (d, up[usize - 1]);
! 227: d -= GMP_NAIL_BITS;
! 228: d = usize * GMP_NUMB_BITS - d;
! 229: count_leading_zeros (vbitsize, vp[vsize - 1]);
! 230: vbitsize -= GMP_NAIL_BITS;
! 231: vbitsize = vsize * GMP_NUMB_BITS - vbitsize;
! 232: ASSERT (d >= vbitsize);
1.1 maekawa 233: d = d - vbitsize + 1;
234:
235: /* Use bmod reduction to quickly discover whether V divides U. */
236: up[usize++] = 0; /* Insert leading zero. */
237: mpn_bdivmod (up, up, usize, vp, vsize, d);
238:
239: /* Now skip U/V mod 2^d and any low zero limbs. */
1.1.1.3 ! ohara 240: d /= GMP_NUMB_BITS, up += d, usize -= d;
1.1 maekawa 241: while (usize != 0 && up[0] == 0)
242: up++, usize--;
243:
244: if (usize == 0) /* GCD == ORIG_V. */
245: goto done;
246:
247: vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
248: MPN_COPY (vp, orig_vp, vsize);
249:
250: do /* Main loop. */
251: {
1.1.1.3 ! ohara 252: /* mpn_com_n can't be used here because anchor_up and up may
! 253: partially overlap */
! 254: if ((up[usize - 1] & GMP_NUMB_HIGHBIT) != 0) /* U < 0; take twos' compl. */
1.1 maekawa 255: {
256: mp_size_t i;
1.1.1.3 ! ohara 257: anchor_up[0] = -up[0] & GMP_NUMB_MASK;
1.1 maekawa 258: for (i = 1; i < usize; i++)
1.1.1.3 ! ohara 259: anchor_up[i] = (~up[i] & GMP_NUMB_MASK);
1.1 maekawa 260: up = anchor_up;
261: }
262:
263: MPN_NORMALIZE_NOT_ZERO (up, usize);
264:
265: if ((up[0] & 1) == 0) /* Result even; remove twos. */
266: {
1.1.1.2 maekawa 267: unsigned int r;
1.1 maekawa 268: count_trailing_zeros (r, up[0]);
269: mpn_rshift (anchor_up, up, usize, r);
1.1.1.3 ! ohara 270: usize -= (anchor_up[usize - 1] == 0);
1.1 maekawa 271: }
272: else if (anchor_up != up)
1.1.1.2 maekawa 273: MPN_COPY_INCR (anchor_up, up, usize);
1.1 maekawa 274:
1.1.1.2 maekawa 275: MPN_PTR_SWAP (anchor_up,usize, vp,vsize);
1.1 maekawa 276: up = anchor_up;
277:
278: if (vsize <= 2) /* Kary can't handle < 2 limbs and */
279: break; /* isn't efficient for == 2 limbs. */
280:
281: d = vbitsize;
1.1.1.3 ! ohara 282: count_leading_zeros (vbitsize, vp[vsize - 1]);
! 283: vbitsize -= GMP_NAIL_BITS;
! 284: vbitsize = vsize * GMP_NUMB_BITS - vbitsize;
1.1 maekawa 285: d = d - vbitsize + 1;
286:
287: if (d > BMOD_THRESHOLD) /* Bmod reduction. */
288: {
289: up[usize++] = 0;
290: mpn_bdivmod (up, up, usize, vp, vsize, d);
1.1.1.3 ! ohara 291: d /= GMP_NUMB_BITS, up += d, usize -= d;
1.1 maekawa 292: }
293: else /* Kary reduction. */
294: {
295: mp_limb_t bp[2], cp[2];
296:
1.1.1.3 ! ohara 297: /* C <-- V/U mod 2^(2*GMP_NUMB_BITS). */
! 298: {
! 299: mp_limb_t u_inv, hi, lo;
! 300: modlimb_invert (u_inv, up[0]);
! 301: cp[0] = (vp[0] * u_inv) & GMP_NUMB_MASK;
! 302: umul_ppmm (hi, lo, cp[0], up[0] << GMP_NAIL_BITS);
! 303: lo >>= GMP_NAIL_BITS;
! 304: cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv & GMP_NUMB_MASK;
! 305: }
1.1 maekawa 306:
307: /* U <-- find_a (C) * U. */
308: up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
309: usize++;
310:
1.1.1.3 ! ohara 311: /* B <-- A/C == U/V mod 2^(GMP_NUMB_BITS + 1).
! 312: bp[0] <-- U/V mod 2^GMP_NUMB_BITS and
! 313: bp[1] <-- ( (U - bp[0] * V)/2^GMP_NUMB_BITS ) / V mod 2
! 314:
! 315: Like V/U above, but simplified because only the low bit of
! 316: bp[1] is wanted. */
! 317: {
! 318: mp_limb_t v_inv, hi, lo;
! 319: modlimb_invert (v_inv, vp[0]);
! 320: bp[0] = (up[0] * v_inv) & GMP_NUMB_MASK;
! 321: umul_ppmm (hi, lo, bp[0], vp[0] << GMP_NAIL_BITS);
! 322: lo >>= GMP_NAIL_BITS;
! 323: bp[1] = (up[1] + hi + (bp[0] & vp[1])) & 1;
! 324: }
1.1 maekawa 325:
326: up[usize++] = 0;
1.1.1.3 ! ohara 327: if (bp[1] != 0) /* B < 0: U <-- U + (-B) * V. */
1.1 maekawa 328: {
1.1.1.3 ! ohara 329: mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0] & GMP_NUMB_MASK);
1.1 maekawa 330: mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
331: }
332: else /* B >= 0: U <-- U - B * V. */
333: {
334: mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
335: mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
336: }
337:
338: up += 2, usize -= 2; /* At least two low limbs are zero. */
339: }
340:
341: /* Must remove low zero limbs before complementing. */
342: while (usize != 0 && up[0] == 0)
343: up++, usize--;
344: }
1.1.1.3 ! ohara 345: while (usize != 0);
1.1 maekawa 346:
347: /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */
348: up = orig_up, usize = orig_usize;
349: binary_gcd_ctr = 2;
350: }
351: else
352: binary_gcd_ctr = 1;
353:
354: /* Finish up with the binary algorithm. Executes once or twice. */
355: for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
356: {
357: if (usize > 2) /* First make U close to V in size. */
358: {
359: unsigned long int vbitsize, d;
1.1.1.3 ! ohara 360: count_leading_zeros (d, up[usize - 1]);
! 361: d -= GMP_NAIL_BITS;
! 362: d = usize * GMP_NUMB_BITS - d;
! 363: count_leading_zeros (vbitsize, vp[vsize - 1]);
! 364: vbitsize -= GMP_NAIL_BITS;
! 365: vbitsize = vsize * GMP_NUMB_BITS - vbitsize;
1.1 maekawa 366: d = d - vbitsize - 1;
367: if (d != -(unsigned long int)1 && d > 2)
368: {
369: mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */
1.1.1.3 ! ohara 370: d /= (unsigned long int)GMP_NUMB_BITS, up += d, usize -= d;
1.1 maekawa 371: }
372: }
373:
374: /* Start binary GCD. */
375: do
376: {
377: mp_size_t zeros;
378:
379: /* Make sure U is odd. */
380: MPN_NORMALIZE (up, usize);
381: while (up[0] == 0)
382: up += 1, usize -= 1;
383: if ((up[0] & 1) == 0)
384: {
1.1.1.2 maekawa 385: unsigned int r;
1.1 maekawa 386: count_trailing_zeros (r, up[0]);
387: mpn_rshift (up, up, usize, r);
1.1.1.3 ! ohara 388: usize -= (up[usize - 1] == 0);
1.1 maekawa 389: }
390:
391: /* Keep usize >= vsize. */
392: if (usize < vsize)
1.1.1.2 maekawa 393: MPN_PTR_SWAP (up, usize, vp, vsize);
1.1 maekawa 394:
395: if (usize <= 2) /* Double precision. */
396: {
397: if (vsize == 1)
398: vp[0] = mpn_gcd_1 (up, usize, vp[0]);
399: else
400: vsize = gcd_2 (vp, up);
401: break; /* Binary GCD done. */
402: }
403:
404: /* Count number of low zero limbs of U - V. */
405: for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
406: continue;
407:
408: /* If U < V, swap U and V; in any case, subtract V from U. */
409: if (zeros == vsize) /* Subtract done. */
410: up += zeros, usize -= zeros;
411: else if (usize == vsize)
412: {
413: mp_size_t size = vsize;
414: do
415: size--;
416: while (up[size] == vp[size]);
417: if (up[size] < vp[size]) /* usize == vsize. */
1.1.1.2 maekawa 418: MP_PTR_SWAP (up, vp);
1.1 maekawa 419: up += zeros, usize = size + 1 - zeros;
420: mpn_sub_n (up, up, vp + zeros, usize);
421: }
422: else
423: {
424: mp_size_t size = vsize - zeros;
425: up += zeros, usize -= zeros;
426: if (mpn_sub_n (up, up, vp + zeros, size))
427: {
428: while (up[size] == 0) /* Propagate borrow. */
429: up[size++] = -(mp_limb_t)1;
430: up[size] -= 1;
431: }
432: }
433: }
434: while (usize); /* End binary GCD. */
435: }
436:
437: done:
438: if (vp != gp)
1.1.1.3 ! ohara 439: MPN_COPY_INCR (gp, vp, vsize);
1.1 maekawa 440: TMP_FREE (marker);
441: return vsize;
442: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>