Annotation of OpenXM_contrib/gmp/mpn/generic/gcd.c, Revision 1.1.1.2
1.1 maekawa 1: /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
2:
1.1.1.2 ! maekawa 3: Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000 Free Software
! 4: Foundation, Inc.
1.1 maekawa 5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 ! maekawa 9: it under the terms of the GNU Lesser General Public License as published by
! 10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 ! maekawa 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 16: License for more details.
17:
1.1.1.2 ! maekawa 18: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA. */
22:
23: /* Integer greatest common divisor of two unsigned integers, using
24: the accelerated algorithm (see reference below).
25:
1.1.1.2 ! maekawa 26: mp_size_t mpn_gcd (up, usize, vp, vsize).
1.1 maekawa 27:
28: Preconditions [U = (up, usize) and V = (vp, vsize)]:
29:
30: 1. V is odd.
31: 2. numbits(U) >= numbits(V).
32:
33: Both U and V are destroyed by the operation. The result is left at vp,
34: and its size is returned.
35:
36: Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
37:
38: Funding for this work has been partially provided by Conselho Nacional
39: de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
40: 301314194-2, and was done while I was a visiting reseacher in the Instituto
41: de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
42:
43: Refer to
44: K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
45: Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
46:
47: #include "gmp.h"
48: #include "gmp-impl.h"
49: #include "longlong.h"
50:
1.1.1.2 ! maekawa 51: /* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated
! 52: algorithm is used, otherwise the binary algorithm is used. This may be
! 53: adjusted for different architectures. */
! 54: #ifndef GCD_ACCEL_THRESHOLD
! 55: #define GCD_ACCEL_THRESHOLD 5
1.1 maekawa 56: #endif
57:
58: /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
59: algorithm reduces using the bmod operation. Otherwise, the k-ary reduction
60: is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */
61: enum
62: {
63: BMOD_THRESHOLD = BITS_PER_MP_LIMB/2
64: };
65:
66:
67: /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
68: Both U and V must be odd. */
69: static __gmp_inline mp_size_t
70: #if __STDC__
71: gcd_2 (mp_ptr vp, mp_srcptr up)
72: #else
73: gcd_2 (vp, up)
74: mp_ptr vp;
75: mp_srcptr up;
76: #endif
77: {
78: mp_limb_t u0, u1, v0, v1;
79: mp_size_t vsize;
80:
81: u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
82:
83: while (u1 != v1 && u0 != v0)
84: {
85: unsigned long int r;
86: if (u1 > v1)
87: {
88: u1 -= v1 + (u0 < v0), u0 -= v0;
89: count_trailing_zeros (r, u0);
90: u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
91: u1 >>= r;
92: }
93: else /* u1 < v1. */
94: {
95: v1 -= u1 + (v0 < u0), v0 -= u0;
96: count_trailing_zeros (r, v0);
97: v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
98: v1 >>= r;
99: }
100: }
101:
102: vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
103:
104: /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
105: if (u1 == v1 && u0 == v0)
106: return vsize;
107:
108: v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
109: vp[0] = mpn_gcd_1 (vp, vsize, v0);
110:
111: return 1;
112: }
113:
114: /* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
115: 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
116: In the reference article, D was computed along with N, but it is better to
117: compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
118: the result as a twos' complement signed integer.
119:
120: Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference
121: article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
122: 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double
123: precision. If N2 > N1 initially, the first iteration of the while loop
124: will swap them. In all other situations, N1 >= N2 is maintained. */
125:
1.1.1.2 ! maekawa 126: static
! 127: #if ! defined (__i386__)
! 128: __gmp_inline /* don't inline this for the x86 */
! 129: #endif
! 130: mp_limb_t
1.1 maekawa 131: #if __STDC__
132: find_a (mp_srcptr cp)
133: #else
134: find_a (cp)
135: mp_srcptr cp;
136: #endif
137: {
138: unsigned long int leading_zero_bits = 0;
139:
140: mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */
141: mp_limb_t n1_h = cp[1];
142:
143: mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */
144: mp_limb_t n2_h = ~n1_h;
145:
146: /* Main loop. */
147: while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */
148: {
149: /* N1 <-- N1 % N2. */
1.1.1.2 ! maekawa 150: if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0)
1.1 maekawa 151: {
152: unsigned long int i;
153: count_leading_zeros (i, n2_h);
154: i -= leading_zero_bits, leading_zero_bits += i;
155: n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
156: do
157: {
158: if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
159: n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
160: n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
161: i -= 1;
162: }
163: while (i);
164: }
165: if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
166: n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
167:
1.1.1.2 ! maekawa 168: MP_LIMB_T_SWAP (n1_h, n2_h);
! 169: MP_LIMB_T_SWAP (n1_l, n2_l);
1.1 maekawa 170: }
171:
172: return n2_l;
173: }
174:
175: mp_size_t
176: #if __STDC__
1.1.1.2 ! maekawa 177: mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize)
1.1 maekawa 178: #else
1.1.1.2 ! maekawa 179: mpn_gcd (gp, up, usize, vp, vsize)
1.1 maekawa 180: mp_ptr gp;
181: mp_ptr up;
182: mp_size_t usize;
1.1.1.2 ! maekawa 183: mp_ptr vp;
! 184: mp_size_t vsize;
1.1 maekawa 185: #endif
186: {
187: mp_ptr orig_vp = vp;
188: mp_size_t orig_vsize = vsize;
189: int binary_gcd_ctr; /* Number of times binary gcd will execute. */
190: TMP_DECL (marker);
191:
192: TMP_MARK (marker);
193:
1.1.1.2 ! maekawa 194: /* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD.
1.1 maekawa 195: Two EXTRA limbs for U and V are required for kary reduction. */
1.1.1.2 ! maekawa 196: if (vsize >= GCD_ACCEL_THRESHOLD)
1.1 maekawa 197: {
198: unsigned long int vbitsize, d;
199: mp_ptr orig_up = up;
200: mp_size_t orig_usize = usize;
201: mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
202:
203: MPN_COPY (anchor_up, orig_up, usize);
204: up = anchor_up;
205:
206: count_leading_zeros (d, up[usize-1]);
207: d = usize * BITS_PER_MP_LIMB - d;
208: count_leading_zeros (vbitsize, vp[vsize-1]);
209: vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
210: d = d - vbitsize + 1;
211:
212: /* Use bmod reduction to quickly discover whether V divides U. */
213: up[usize++] = 0; /* Insert leading zero. */
214: mpn_bdivmod (up, up, usize, vp, vsize, d);
215:
216: /* Now skip U/V mod 2^d and any low zero limbs. */
217: d /= BITS_PER_MP_LIMB, up += d, usize -= d;
218: while (usize != 0 && up[0] == 0)
219: up++, usize--;
220:
221: if (usize == 0) /* GCD == ORIG_V. */
222: goto done;
223:
224: vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
225: MPN_COPY (vp, orig_vp, vsize);
226:
227: do /* Main loop. */
228: {
1.1.1.2 ! maekawa 229: /* mpn_com_n can't be used here because anchor_up and up may
! 230: partially overlap */
! 231: if (up[usize-1] & MP_LIMB_T_HIGHBIT) /* U < 0; take twos' compl. */
1.1 maekawa 232: {
233: mp_size_t i;
234: anchor_up[0] = -up[0];
235: for (i = 1; i < usize; i++)
236: anchor_up[i] = ~up[i];
237: up = anchor_up;
238: }
239:
240: MPN_NORMALIZE_NOT_ZERO (up, usize);
241:
242: if ((up[0] & 1) == 0) /* Result even; remove twos. */
243: {
1.1.1.2 ! maekawa 244: unsigned int r;
1.1 maekawa 245: count_trailing_zeros (r, up[0]);
246: mpn_rshift (anchor_up, up, usize, r);
247: usize -= (anchor_up[usize-1] == 0);
248: }
249: else if (anchor_up != up)
1.1.1.2 ! maekawa 250: MPN_COPY_INCR (anchor_up, up, usize);
1.1 maekawa 251:
1.1.1.2 ! maekawa 252: MPN_PTR_SWAP (anchor_up,usize, vp,vsize);
1.1 maekawa 253: up = anchor_up;
254:
255: if (vsize <= 2) /* Kary can't handle < 2 limbs and */
256: break; /* isn't efficient for == 2 limbs. */
257:
258: d = vbitsize;
259: count_leading_zeros (vbitsize, vp[vsize-1]);
260: vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
261: d = d - vbitsize + 1;
262:
263: if (d > BMOD_THRESHOLD) /* Bmod reduction. */
264: {
265: up[usize++] = 0;
266: mpn_bdivmod (up, up, usize, vp, vsize, d);
267: d /= BITS_PER_MP_LIMB, up += d, usize -= d;
268: }
269: else /* Kary reduction. */
270: {
271: mp_limb_t bp[2], cp[2];
272:
273: /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */
1.1.1.2 ! maekawa 274: {
! 275: mp_limb_t u_inv, hi, lo;
! 276: modlimb_invert (u_inv, up[0]);
! 277: cp[0] = vp[0] * u_inv;
! 278: umul_ppmm (hi, lo, cp[0], up[0]);
! 279: cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv;
! 280: }
1.1 maekawa 281:
282: /* U <-- find_a (C) * U. */
283: up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
284: usize++;
285:
286: /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
287: bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
1.1.1.2 ! maekawa 288: bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2
! 289:
! 290: Like V/U above, but simplified because only the low bit of
! 291: bp[1] is wanted. */
! 292: {
! 293: mp_limb_t v_inv, hi, lo;
! 294: modlimb_invert (v_inv, vp[0]);
! 295: bp[0] = up[0] * v_inv;
! 296: umul_ppmm (hi, lo, bp[0], vp[0]);
! 297: bp[1] = (up[1] + hi + (bp[0]&vp[1])) & 1;
! 298: }
1.1 maekawa 299:
300: up[usize++] = 0;
301: if (bp[1]) /* B < 0: U <-- U + (-B) * V. */
302: {
303: mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
304: mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
305: }
306: else /* B >= 0: U <-- U - B * V. */
307: {
308: mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
309: mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
310: }
311:
312: up += 2, usize -= 2; /* At least two low limbs are zero. */
313: }
314:
315: /* Must remove low zero limbs before complementing. */
316: while (usize != 0 && up[0] == 0)
317: up++, usize--;
318: }
319: while (usize);
320:
321: /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */
322: up = orig_up, usize = orig_usize;
323: binary_gcd_ctr = 2;
324: }
325: else
326: binary_gcd_ctr = 1;
327:
328: /* Finish up with the binary algorithm. Executes once or twice. */
329: for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
330: {
331: if (usize > 2) /* First make U close to V in size. */
332: {
333: unsigned long int vbitsize, d;
334: count_leading_zeros (d, up[usize-1]);
335: d = usize * BITS_PER_MP_LIMB - d;
336: count_leading_zeros (vbitsize, vp[vsize-1]);
337: vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
338: d = d - vbitsize - 1;
339: if (d != -(unsigned long int)1 && d > 2)
340: {
341: mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */
342: d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
343: }
344: }
345:
346: /* Start binary GCD. */
347: do
348: {
349: mp_size_t zeros;
350:
351: /* Make sure U is odd. */
352: MPN_NORMALIZE (up, usize);
353: while (up[0] == 0)
354: up += 1, usize -= 1;
355: if ((up[0] & 1) == 0)
356: {
1.1.1.2 ! maekawa 357: unsigned int r;
1.1 maekawa 358: count_trailing_zeros (r, up[0]);
359: mpn_rshift (up, up, usize, r);
360: usize -= (up[usize-1] == 0);
361: }
362:
363: /* Keep usize >= vsize. */
364: if (usize < vsize)
1.1.1.2 ! maekawa 365: MPN_PTR_SWAP (up, usize, vp, vsize);
1.1 maekawa 366:
367: if (usize <= 2) /* Double precision. */
368: {
369: if (vsize == 1)
370: vp[0] = mpn_gcd_1 (up, usize, vp[0]);
371: else
372: vsize = gcd_2 (vp, up);
373: break; /* Binary GCD done. */
374: }
375:
376: /* Count number of low zero limbs of U - V. */
377: for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
378: continue;
379:
380: /* If U < V, swap U and V; in any case, subtract V from U. */
381: if (zeros == vsize) /* Subtract done. */
382: up += zeros, usize -= zeros;
383: else if (usize == vsize)
384: {
385: mp_size_t size = vsize;
386: do
387: size--;
388: while (up[size] == vp[size]);
389: if (up[size] < vp[size]) /* usize == vsize. */
1.1.1.2 ! maekawa 390: MP_PTR_SWAP (up, vp);
1.1 maekawa 391: up += zeros, usize = size + 1 - zeros;
392: mpn_sub_n (up, up, vp + zeros, usize);
393: }
394: else
395: {
396: mp_size_t size = vsize - zeros;
397: up += zeros, usize -= zeros;
398: if (mpn_sub_n (up, up, vp + zeros, size))
399: {
400: while (up[size] == 0) /* Propagate borrow. */
401: up[size++] = -(mp_limb_t)1;
402: up[size] -= 1;
403: }
404: }
405: }
406: while (usize); /* End binary GCD. */
407: }
408:
409: done:
410: if (vp != gp)
411: MPN_COPY (gp, vp, vsize);
412: TMP_FREE (marker);
413: return vsize;
414: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>