Annotation of OpenXM_contrib/gmp/mpn/generic/gcd.c, Revision 1.1.1.1
1.1 maekawa 1: /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
2:
3: Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
4:
5: This file is part of the GNU MP Library.
6:
7: The GNU MP Library is free software; you can redistribute it and/or modify
8: it under the terms of the GNU Library General Public License as published by
9: the Free Software Foundation; either version 2 of the License, or (at your
10: option) any later version.
11:
12: The GNU MP Library is distributed in the hope that it will be useful, but
13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15: License for more details.
16:
17: You should have received a copy of the GNU Library General Public License
18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: MA 02111-1307, USA. */
21:
22: /* Integer greatest common divisor of two unsigned integers, using
23: the accelerated algorithm (see reference below).
24:
25: mp_size_t mpn_gcd (vp, vsize, up, usize).
26:
27: Preconditions [U = (up, usize) and V = (vp, vsize)]:
28:
29: 1. V is odd.
30: 2. numbits(U) >= numbits(V).
31:
32: Both U and V are destroyed by the operation. The result is left at vp,
33: and its size is returned.
34:
35: Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
36:
37: Funding for this work has been partially provided by Conselho Nacional
38: de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
39: 301314194-2, and was done while I was a visiting reseacher in the Instituto
40: de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
41:
42: Refer to
43: K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
44: Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
45:
46: #include "gmp.h"
47: #include "gmp-impl.h"
48: #include "longlong.h"
49:
50: /* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is
51: used, otherwise the binary algorithm is used. This may be adjusted for
52: different architectures. */
53: #ifndef ACCEL_THRESHOLD
54: #define ACCEL_THRESHOLD 4
55: #endif
56:
57: /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
58: algorithm reduces using the bmod operation. Otherwise, the k-ary reduction
59: is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */
60: enum
61: {
62: BMOD_THRESHOLD = BITS_PER_MP_LIMB/2
63: };
64:
65: #define SIGN_BIT (~(~(mp_limb_t)0 >> 1))
66:
67:
68: #define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0)
69: #define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0)
70: #define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0)
71: #define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0)
72:
73: /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
74: Both U and V must be odd. */
75: static __gmp_inline mp_size_t
76: #if __STDC__
77: gcd_2 (mp_ptr vp, mp_srcptr up)
78: #else
79: gcd_2 (vp, up)
80: mp_ptr vp;
81: mp_srcptr up;
82: #endif
83: {
84: mp_limb_t u0, u1, v0, v1;
85: mp_size_t vsize;
86:
87: u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
88:
89: while (u1 != v1 && u0 != v0)
90: {
91: unsigned long int r;
92: if (u1 > v1)
93: {
94: u1 -= v1 + (u0 < v0), u0 -= v0;
95: count_trailing_zeros (r, u0);
96: u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
97: u1 >>= r;
98: }
99: else /* u1 < v1. */
100: {
101: v1 -= u1 + (v0 < u0), v0 -= u0;
102: count_trailing_zeros (r, v0);
103: v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
104: v1 >>= r;
105: }
106: }
107:
108: vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
109:
110: /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
111: if (u1 == v1 && u0 == v0)
112: return vsize;
113:
114: v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
115: vp[0] = mpn_gcd_1 (vp, vsize, v0);
116:
117: return 1;
118: }
119:
120: /* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
121: 0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
122: In the reference article, D was computed along with N, but it is better to
123: compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
124: the result as a twos' complement signed integer.
125:
126: Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference
127: article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
128: 2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double
129: precision. If N2 > N1 initially, the first iteration of the while loop
130: will swap them. In all other situations, N1 >= N2 is maintained. */
131:
132: static __gmp_inline mp_limb_t
133: #if __STDC__
134: find_a (mp_srcptr cp)
135: #else
136: find_a (cp)
137: mp_srcptr cp;
138: #endif
139: {
140: unsigned long int leading_zero_bits = 0;
141:
142: mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */
143: mp_limb_t n1_h = cp[1];
144:
145: mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */
146: mp_limb_t n2_h = ~n1_h;
147:
148: /* Main loop. */
149: while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */
150: {
151: /* N1 <-- N1 % N2. */
152: if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0)
153: {
154: unsigned long int i;
155: count_leading_zeros (i, n2_h);
156: i -= leading_zero_bits, leading_zero_bits += i;
157: n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
158: do
159: {
160: if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
161: n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
162: n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
163: i -= 1;
164: }
165: while (i);
166: }
167: if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
168: n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
169:
170: SWAP_LIMB (n1_h, n2_h);
171: SWAP_LIMB (n1_l, n2_l);
172: }
173:
174: return n2_l;
175: }
176:
177: mp_size_t
178: #if __STDC__
179: mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize)
180: #else
181: mpn_gcd (gp, vp, vsize, up, usize)
182: mp_ptr gp;
183: mp_ptr vp;
184: mp_size_t vsize;
185: mp_ptr up;
186: mp_size_t usize;
187: #endif
188: {
189: mp_ptr orig_vp = vp;
190: mp_size_t orig_vsize = vsize;
191: int binary_gcd_ctr; /* Number of times binary gcd will execute. */
192: TMP_DECL (marker);
193:
194: TMP_MARK (marker);
195:
196: /* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD.
197: Two EXTRA limbs for U and V are required for kary reduction. */
198: if (vsize > ACCEL_THRESHOLD)
199: {
200: unsigned long int vbitsize, d;
201: mp_ptr orig_up = up;
202: mp_size_t orig_usize = usize;
203: mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
204:
205: MPN_COPY (anchor_up, orig_up, usize);
206: up = anchor_up;
207:
208: count_leading_zeros (d, up[usize-1]);
209: d = usize * BITS_PER_MP_LIMB - d;
210: count_leading_zeros (vbitsize, vp[vsize-1]);
211: vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
212: d = d - vbitsize + 1;
213:
214: /* Use bmod reduction to quickly discover whether V divides U. */
215: up[usize++] = 0; /* Insert leading zero. */
216: mpn_bdivmod (up, up, usize, vp, vsize, d);
217:
218: /* Now skip U/V mod 2^d and any low zero limbs. */
219: d /= BITS_PER_MP_LIMB, up += d, usize -= d;
220: while (usize != 0 && up[0] == 0)
221: up++, usize--;
222:
223: if (usize == 0) /* GCD == ORIG_V. */
224: goto done;
225:
226: vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
227: MPN_COPY (vp, orig_vp, vsize);
228:
229: do /* Main loop. */
230: {
231: if (up[usize-1] & SIGN_BIT) /* U < 0; take twos' compl. */
232: {
233: mp_size_t i;
234: anchor_up[0] = -up[0];
235: for (i = 1; i < usize; i++)
236: anchor_up[i] = ~up[i];
237: up = anchor_up;
238: }
239:
240: MPN_NORMALIZE_NOT_ZERO (up, usize);
241:
242: if ((up[0] & 1) == 0) /* Result even; remove twos. */
243: {
244: unsigned long int r;
245: count_trailing_zeros (r, up[0]);
246: mpn_rshift (anchor_up, up, usize, r);
247: usize -= (anchor_up[usize-1] == 0);
248: }
249: else if (anchor_up != up)
250: MPN_COPY (anchor_up, up, usize);
251:
252: SWAP_MPN (anchor_up, usize, vp, vsize);
253: up = anchor_up;
254:
255: if (vsize <= 2) /* Kary can't handle < 2 limbs and */
256: break; /* isn't efficient for == 2 limbs. */
257:
258: d = vbitsize;
259: count_leading_zeros (vbitsize, vp[vsize-1]);
260: vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
261: d = d - vbitsize + 1;
262:
263: if (d > BMOD_THRESHOLD) /* Bmod reduction. */
264: {
265: up[usize++] = 0;
266: mpn_bdivmod (up, up, usize, vp, vsize, d);
267: d /= BITS_PER_MP_LIMB, up += d, usize -= d;
268: }
269: else /* Kary reduction. */
270: {
271: mp_limb_t bp[2], cp[2];
272:
273: /* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */
274: cp[0] = vp[0], cp[1] = vp[1];
275: mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB);
276:
277: /* U <-- find_a (C) * U. */
278: up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
279: usize++;
280:
281: /* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
282: bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
283: bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */
284: bp[0] = up[0], bp[1] = up[1];
285: mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB);
286: bp[1] &= 1; /* Since V is odd, division is unnecessary. */
287:
288: up[usize++] = 0;
289: if (bp[1]) /* B < 0: U <-- U + (-B) * V. */
290: {
291: mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
292: mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
293: }
294: else /* B >= 0: U <-- U - B * V. */
295: {
296: mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
297: mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
298: }
299:
300: up += 2, usize -= 2; /* At least two low limbs are zero. */
301: }
302:
303: /* Must remove low zero limbs before complementing. */
304: while (usize != 0 && up[0] == 0)
305: up++, usize--;
306: }
307: while (usize);
308:
309: /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */
310: up = orig_up, usize = orig_usize;
311: binary_gcd_ctr = 2;
312: }
313: else
314: binary_gcd_ctr = 1;
315:
316: /* Finish up with the binary algorithm. Executes once or twice. */
317: for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
318: {
319: if (usize > 2) /* First make U close to V in size. */
320: {
321: unsigned long int vbitsize, d;
322: count_leading_zeros (d, up[usize-1]);
323: d = usize * BITS_PER_MP_LIMB - d;
324: count_leading_zeros (vbitsize, vp[vsize-1]);
325: vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
326: d = d - vbitsize - 1;
327: if (d != -(unsigned long int)1 && d > 2)
328: {
329: mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */
330: d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
331: }
332: }
333:
334: /* Start binary GCD. */
335: do
336: {
337: mp_size_t zeros;
338:
339: /* Make sure U is odd. */
340: MPN_NORMALIZE (up, usize);
341: while (up[0] == 0)
342: up += 1, usize -= 1;
343: if ((up[0] & 1) == 0)
344: {
345: unsigned long int r;
346: count_trailing_zeros (r, up[0]);
347: mpn_rshift (up, up, usize, r);
348: usize -= (up[usize-1] == 0);
349: }
350:
351: /* Keep usize >= vsize. */
352: if (usize < vsize)
353: SWAP_MPN (up, usize, vp, vsize);
354:
355: if (usize <= 2) /* Double precision. */
356: {
357: if (vsize == 1)
358: vp[0] = mpn_gcd_1 (up, usize, vp[0]);
359: else
360: vsize = gcd_2 (vp, up);
361: break; /* Binary GCD done. */
362: }
363:
364: /* Count number of low zero limbs of U - V. */
365: for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
366: continue;
367:
368: /* If U < V, swap U and V; in any case, subtract V from U. */
369: if (zeros == vsize) /* Subtract done. */
370: up += zeros, usize -= zeros;
371: else if (usize == vsize)
372: {
373: mp_size_t size = vsize;
374: do
375: size--;
376: while (up[size] == vp[size]);
377: if (up[size] < vp[size]) /* usize == vsize. */
378: SWAP_PTR (up, vp);
379: up += zeros, usize = size + 1 - zeros;
380: mpn_sub_n (up, up, vp + zeros, usize);
381: }
382: else
383: {
384: mp_size_t size = vsize - zeros;
385: up += zeros, usize -= zeros;
386: if (mpn_sub_n (up, up, vp + zeros, size))
387: {
388: while (up[size] == 0) /* Propagate borrow. */
389: up[size++] = -(mp_limb_t)1;
390: up[size] -= 1;
391: }
392: }
393: }
394: while (usize); /* End binary GCD. */
395: }
396:
397: done:
398: if (vp != gp)
399: MPN_COPY (gp, vp, vsize);
400: TMP_FREE (marker);
401: return vsize;
402: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>