Annotation of OpenXM_contrib/gmp/mpn/generic/gcdext.c, Revision 1.1.1.1
1.1 maekawa 1: /* mpn_gcdext -- Extended Greatest Common Divisor.
2:
3: Copyright (C) 1996 Free Software Foundation, Inc.
4:
5: This file is part of the GNU MP Library.
6:
7: The GNU MP Library is free software; you can redistribute it and/or modify
8: it under the terms of the GNU Library General Public License as published by
9: the Free Software Foundation; either version 2 of the License, or (at your
10: option) any later version.
11:
12: The GNU MP Library is distributed in the hope that it will be useful, but
13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15: License for more details.
16:
17: You should have received a copy of the GNU Library General Public License
18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: MA 02111-1307, USA. */
21:
22: #include "gmp.h"
23: #include "gmp-impl.h"
24: #include "longlong.h"
25:
26: #ifndef EXTEND
27: #define EXTEND 1
28: #endif
29:
30: #if STAT
31: int arr[BITS_PER_MP_LIMB];
32: #endif
33:
34: #define SGN(A) (((A) < 0) ? -1 : ((A) > 0))
35:
36: /* Idea 1: After we have performed a full division, don't shift operands back,
37: but instead account for the extra factors-of-2 thus introduced.
38: Idea 2: Simple generalization to use divide-and-conquer would give us an
39: algorithm that runs faster than O(n^2).
40: Idea 3: The input numbers need less space as the computation progresses,
41: while the s0 and s1 variables need more space. To save space, we
42: could make them share space, and have the latter variables grow
43: into the former. */
44:
45: /* Precondition: U >= V. */
46:
47: mp_size_t
48: #if EXTEND
49: #if __STDC__
50: mpn_gcdext (mp_ptr gp, mp_ptr s0p,
51: mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
52: #else
53: mpn_gcdext (gp, s0p, up, size, vp, vsize)
54: mp_ptr gp;
55: mp_ptr s0p;
56: mp_ptr up;
57: mp_size_t size;
58: mp_ptr vp;
59: mp_size_t vsize;
60: #endif
61: #else
62: #if __STDC__
63: mpn_gcd (mp_ptr gp,
64: mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
65: #else
66: mpn_gcd (gp, up, size, vp, vsize)
67: mp_ptr gp;
68: mp_ptr up;
69: mp_size_t size;
70: mp_ptr vp;
71: mp_size_t vsize;
72: #endif
73: #endif
74: {
75: mp_limb_t uh, vh;
76: mp_limb_signed_t A, B, C, D;
77: int cnt;
78: mp_ptr tp, wp;
79: #if RECORD
80: mp_limb_signed_t min = 0, max = 0;
81: #endif
82: #if EXTEND
83: mp_ptr s1p;
84: mp_ptr orig_s0p = s0p;
85: mp_size_t ssize, orig_size = size;
86: TMP_DECL (mark);
87:
88: TMP_MARK (mark);
89:
90: tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
91: wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
92: s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB);
93:
94: MPN_ZERO (s0p, size);
95: MPN_ZERO (s1p, size);
96:
97: s0p[0] = 1;
98: s1p[0] = 0;
99: ssize = 1;
100: #endif
101:
102: if (size > vsize)
103: {
104: /* Normalize V (and shift up U the same amount). */
105: count_leading_zeros (cnt, vp[vsize - 1]);
106: if (cnt != 0)
107: {
108: mp_limb_t cy;
109: mpn_lshift (vp, vp, vsize, cnt);
110: cy = mpn_lshift (up, up, size, cnt);
111: up[size] = cy;
112: size += cy != 0;
113: }
114:
115: mpn_divmod (up + vsize, up, size, vp, vsize);
116: #if EXTEND
117: /* This is really what it boils down to in this case... */
118: s0p[0] = 0;
119: s1p[0] = 1;
120: #endif
121: size = vsize;
122: if (cnt != 0)
123: {
124: mpn_rshift (up, up, size, cnt);
125: mpn_rshift (vp, vp, size, cnt);
126: }
127: {
128: mp_ptr xp;
129: xp = up; up = vp; vp = xp;
130: }
131: }
132:
133: for (;;)
134: {
135: /* Figure out exact size of V. */
136: vsize = size;
137: MPN_NORMALIZE (vp, vsize);
138: if (vsize <= 1)
139: break;
140:
141: /* Make UH be the most significant limb of U, and make VH be
142: corresponding bits from V. */
143: uh = up[size - 1];
144: vh = vp[size - 1];
145: count_leading_zeros (cnt, uh);
146: if (cnt != 0)
147: {
148: uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
149: vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
150: }
151:
152: #if 0
153: /* For now, only handle BITS_PER_MP_LIMB-1 bits. This makes
154: room for sign bit. */
155: uh >>= 1;
156: vh >>= 1;
157: #endif
158: A = 1;
159: B = 0;
160: C = 0;
161: D = 1;
162:
163: for (;;)
164: {
165: mp_limb_signed_t q, T;
166: if (vh + C == 0 || vh + D == 0)
167: break;
168:
169: q = (uh + A) / (vh + C);
170: if (q != (uh + B) / (vh + D))
171: break;
172:
173: T = A - q * C;
174: A = C;
175: C = T;
176: T = B - q * D;
177: B = D;
178: D = T;
179: T = uh - q * vh;
180: uh = vh;
181: vh = T;
182: }
183:
184: #if RECORD
185: min = MIN (A, min); min = MIN (B, min);
186: min = MIN (C, min); min = MIN (D, min);
187: max = MAX (A, max); max = MAX (B, max);
188: max = MAX (C, max); max = MAX (D, max);
189: #endif
190:
191: if (B == 0)
192: {
193: mp_limb_t qh;
194: mp_size_t i;
195:
196: /* This is quite rare. I.e., optimize something else! */
197:
198: /* Normalize V (and shift up U the same amount). */
199: count_leading_zeros (cnt, vp[vsize - 1]);
200: if (cnt != 0)
201: {
202: mp_limb_t cy;
203: mpn_lshift (vp, vp, vsize, cnt);
204: cy = mpn_lshift (up, up, size, cnt);
205: up[size] = cy;
206: size += cy != 0;
207: }
208:
209: qh = mpn_divmod (up + vsize, up, size, vp, vsize);
210: #if EXTEND
211: MPN_COPY (tp, s0p, ssize);
212: for (i = 0; i < size - vsize; i++)
213: {
214: mp_limb_t cy;
215: cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
216: if (cy != 0)
217: tp[ssize++] = cy;
218: }
219: if (qh != 0)
220: {
221: mp_limb_t cy;
222: abort ();
223: /* XXX since qh == 1, mpn_addmul_1 is overkill */
224: cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh);
225: if (cy != 0)
226: tp[ssize++] = cy;
227: }
228: #if 0
229: MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */
230: MPN_COPY (s1p, tp, ssize);
231: #else
232: {
233: mp_ptr xp;
234: xp = s0p; s0p = s1p; s1p = xp;
235: xp = s1p; s1p = tp; tp = xp;
236: }
237: #endif
238: #endif
239: size = vsize;
240: if (cnt != 0)
241: {
242: mpn_rshift (up, up, size, cnt);
243: mpn_rshift (vp, vp, size, cnt);
244: }
245:
246: {
247: mp_ptr xp;
248: xp = up; up = vp; vp = xp;
249: }
250: MPN_NORMALIZE (up, size);
251: }
252: else
253: {
254: /* T = U*A + V*B
255: W = U*C + V*D
256: U = T
257: V = W */
258:
259: if (SGN(A) == SGN(B)) /* should be different sign */
260: abort ();
261: if (SGN(C) == SGN(D)) /* should be different sign */
262: abort ();
263: #if STAT
264: { mp_limb_t x;
265: x = ABS (A) | ABS (B) | ABS (C) | ABS (D);
266: count_leading_zeros (cnt, x);
267: arr[BITS_PER_MP_LIMB - cnt]++; }
268: #endif
269: if (A == 0)
270: {
271: if (B != 1) abort ();
272: MPN_COPY (tp, vp, size);
273: }
274: else
275: {
276: if (A < 0)
277: {
278: mpn_mul_1 (tp, vp, size, B);
279: mpn_submul_1 (tp, up, size, -A);
280: }
281: else
282: {
283: mpn_mul_1 (tp, up, size, A);
284: mpn_submul_1 (tp, vp, size, -B);
285: }
286: }
287: if (C < 0)
288: {
289: mpn_mul_1 (wp, vp, size, D);
290: mpn_submul_1 (wp, up, size, -C);
291: }
292: else
293: {
294: mpn_mul_1 (wp, up, size, C);
295: mpn_submul_1 (wp, vp, size, -D);
296: }
297:
298: {
299: mp_ptr xp;
300: xp = tp; tp = up; up = xp;
301: xp = wp; wp = vp; vp = xp;
302: }
303:
304: #if EXTEND
305: { mp_limb_t cy;
306: MPN_ZERO (tp, orig_size);
307: if (A == 0)
308: {
309: if (B != 1) abort ();
310: MPN_COPY (tp, s1p, ssize);
311: }
312: else
313: {
314: if (A < 0)
315: {
316: cy = mpn_mul_1 (tp, s1p, ssize, B);
317: cy += mpn_addmul_1 (tp, s0p, ssize, -A);
318: }
319: else
320: {
321: cy = mpn_mul_1 (tp, s0p, ssize, A);
322: cy += mpn_addmul_1 (tp, s1p, ssize, -B);
323: }
324: if (cy != 0)
325: tp[ssize++] = cy;
326: }
327: MPN_ZERO (wp, orig_size);
328: if (C < 0)
329: {
330: cy = mpn_mul_1 (wp, s1p, ssize, D);
331: cy += mpn_addmul_1 (wp, s0p, ssize, -C);
332: }
333: else
334: {
335: cy = mpn_mul_1 (wp, s0p, ssize, C);
336: cy += mpn_addmul_1 (wp, s1p, ssize, -D);
337: }
338: if (cy != 0)
339: wp[ssize++] = cy;
340: }
341: {
342: mp_ptr xp;
343: xp = tp; tp = s0p; s0p = xp;
344: xp = wp; wp = s1p; s1p = xp;
345: }
346: #endif
347: #if 0 /* Is it a win to remove multiple zeros here? */
348: MPN_NORMALIZE (up, size);
349: #else
350: if (up[size - 1] == 0)
351: size--;
352: #endif
353: }
354: }
355:
356: #if RECORD
357: printf ("min: %ld\n", min);
358: printf ("max: %ld\n", max);
359: #endif
360:
361: if (vsize == 0)
362: {
363: if (gp != up)
364: MPN_COPY (gp, up, size);
365: #if EXTEND
366: if (orig_s0p != s0p)
367: MPN_COPY (orig_s0p, s0p, ssize);
368: #endif
369: TMP_FREE (mark);
370: return size;
371: }
372: else
373: {
374: mp_limb_t vl, ul, t;
375: #if EXTEND
376: mp_limb_t cy;
377: mp_size_t i;
378: #endif
379: vl = vp[0];
380: #if EXTEND
381: t = mpn_divmod_1 (wp, up, size, vl);
382: MPN_COPY (tp, s0p, ssize);
383: for (i = 0; i < size; i++)
384: {
385: cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
386: if (cy != 0)
387: tp[ssize++] = cy;
388: }
389: #if 0
390: MPN_COPY (s0p, s1p, ssize);
391: MPN_COPY (s1p, tp, ssize);
392: #else
393: {
394: mp_ptr xp;
395: xp = s0p; s0p = s1p; s1p = xp;
396: xp = s1p; s1p = tp; tp = xp;
397: }
398: #endif
399: #else
400: t = mpn_mod_1 (up, size, vl);
401: #endif
402: ul = vl;
403: vl = t;
404: while (vl != 0)
405: {
406: mp_limb_t t;
407: #if EXTEND
408: mp_limb_t q, cy;
409: q = ul / vl;
410: t = ul - q*vl;
411:
412: MPN_COPY (tp, s0p, ssize);
413: cy = mpn_addmul_1 (tp, s1p, ssize, q);
414: if (cy != 0)
415: tp[ssize++] = cy;
416: #if 0
417: MPN_COPY (s0p, s1p, ssize);
418: MPN_COPY (s1p, tp, ssize);
419: #else
420: {
421: mp_ptr xp;
422: xp = s0p; s0p = s1p; s1p = xp;
423: xp = s1p; s1p = tp; tp = xp;
424: }
425: #endif
426:
427: #else
428: t = ul % vl;
429: #endif
430: ul = vl;
431: vl = t;
432: }
433: gp[0] = ul;
434: #if EXTEND
435: if (orig_s0p != s0p)
436: MPN_COPY (orig_s0p, s0p, ssize);
437: #endif
438: TMP_FREE (mark);
439: return 1;
440: }
441: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>