Annotation of OpenXM_contrib/gmp/mpn/generic/gcdext.c, Revision 1.1
1.1 ! maekawa 1: /* mpn_gcdext -- Extended Greatest Common Divisor.
! 2:
! 3: Copyright (C) 1996 Free Software Foundation, Inc.
! 4:
! 5: This file is part of the GNU MP Library.
! 6:
! 7: The GNU MP Library is free software; you can redistribute it and/or modify
! 8: it under the terms of the GNU Library General Public License as published by
! 9: the Free Software Foundation; either version 2 of the License, or (at your
! 10: option) any later version.
! 11:
! 12: The GNU MP Library is distributed in the hope that it will be useful, but
! 13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 15: License for more details.
! 16:
! 17: You should have received a copy of the GNU Library General Public License
! 18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: MA 02111-1307, USA. */
! 21:
! 22: #include "gmp.h"
! 23: #include "gmp-impl.h"
! 24: #include "longlong.h"
! 25:
! 26: #ifndef EXTEND
! 27: #define EXTEND 1
! 28: #endif
! 29:
! 30: #if STAT
! 31: int arr[BITS_PER_MP_LIMB];
! 32: #endif
! 33:
! 34: #define SGN(A) (((A) < 0) ? -1 : ((A) > 0))
! 35:
! 36: /* Idea 1: After we have performed a full division, don't shift operands back,
! 37: but instead account for the extra factors-of-2 thus introduced.
! 38: Idea 2: Simple generalization to use divide-and-conquer would give us an
! 39: algorithm that runs faster than O(n^2).
! 40: Idea 3: The input numbers need less space as the computation progresses,
! 41: while the s0 and s1 variables need more space. To save space, we
! 42: could make them share space, and have the latter variables grow
! 43: into the former. */
! 44:
! 45: /* Precondition: U >= V. */
! 46:
! 47: mp_size_t
! 48: #if EXTEND
! 49: #if __STDC__
! 50: mpn_gcdext (mp_ptr gp, mp_ptr s0p,
! 51: mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
! 52: #else
! 53: mpn_gcdext (gp, s0p, up, size, vp, vsize)
! 54: mp_ptr gp;
! 55: mp_ptr s0p;
! 56: mp_ptr up;
! 57: mp_size_t size;
! 58: mp_ptr vp;
! 59: mp_size_t vsize;
! 60: #endif
! 61: #else
! 62: #if __STDC__
! 63: mpn_gcd (mp_ptr gp,
! 64: mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
! 65: #else
! 66: mpn_gcd (gp, up, size, vp, vsize)
! 67: mp_ptr gp;
! 68: mp_ptr up;
! 69: mp_size_t size;
! 70: mp_ptr vp;
! 71: mp_size_t vsize;
! 72: #endif
! 73: #endif
! 74: {
! 75: mp_limb_t uh, vh;
! 76: mp_limb_signed_t A, B, C, D;
! 77: int cnt;
! 78: mp_ptr tp, wp;
! 79: #if RECORD
! 80: mp_limb_signed_t min = 0, max = 0;
! 81: #endif
! 82: #if EXTEND
! 83: mp_ptr s1p;
! 84: mp_ptr orig_s0p = s0p;
! 85: mp_size_t ssize, orig_size = size;
! 86: TMP_DECL (mark);
! 87:
! 88: TMP_MARK (mark);
! 89:
! 90: tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
! 91: wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
! 92: s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB);
! 93:
! 94: MPN_ZERO (s0p, size);
! 95: MPN_ZERO (s1p, size);
! 96:
! 97: s0p[0] = 1;
! 98: s1p[0] = 0;
! 99: ssize = 1;
! 100: #endif
! 101:
! 102: if (size > vsize)
! 103: {
! 104: /* Normalize V (and shift up U the same amount). */
! 105: count_leading_zeros (cnt, vp[vsize - 1]);
! 106: if (cnt != 0)
! 107: {
! 108: mp_limb_t cy;
! 109: mpn_lshift (vp, vp, vsize, cnt);
! 110: cy = mpn_lshift (up, up, size, cnt);
! 111: up[size] = cy;
! 112: size += cy != 0;
! 113: }
! 114:
! 115: mpn_divmod (up + vsize, up, size, vp, vsize);
! 116: #if EXTEND
! 117: /* This is really what it boils down to in this case... */
! 118: s0p[0] = 0;
! 119: s1p[0] = 1;
! 120: #endif
! 121: size = vsize;
! 122: if (cnt != 0)
! 123: {
! 124: mpn_rshift (up, up, size, cnt);
! 125: mpn_rshift (vp, vp, size, cnt);
! 126: }
! 127: {
! 128: mp_ptr xp;
! 129: xp = up; up = vp; vp = xp;
! 130: }
! 131: }
! 132:
! 133: for (;;)
! 134: {
! 135: /* Figure out exact size of V. */
! 136: vsize = size;
! 137: MPN_NORMALIZE (vp, vsize);
! 138: if (vsize <= 1)
! 139: break;
! 140:
! 141: /* Make UH be the most significant limb of U, and make VH be
! 142: corresponding bits from V. */
! 143: uh = up[size - 1];
! 144: vh = vp[size - 1];
! 145: count_leading_zeros (cnt, uh);
! 146: if (cnt != 0)
! 147: {
! 148: uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
! 149: vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
! 150: }
! 151:
! 152: #if 0
! 153: /* For now, only handle BITS_PER_MP_LIMB-1 bits. This makes
! 154: room for sign bit. */
! 155: uh >>= 1;
! 156: vh >>= 1;
! 157: #endif
! 158: A = 1;
! 159: B = 0;
! 160: C = 0;
! 161: D = 1;
! 162:
! 163: for (;;)
! 164: {
! 165: mp_limb_signed_t q, T;
! 166: if (vh + C == 0 || vh + D == 0)
! 167: break;
! 168:
! 169: q = (uh + A) / (vh + C);
! 170: if (q != (uh + B) / (vh + D))
! 171: break;
! 172:
! 173: T = A - q * C;
! 174: A = C;
! 175: C = T;
! 176: T = B - q * D;
! 177: B = D;
! 178: D = T;
! 179: T = uh - q * vh;
! 180: uh = vh;
! 181: vh = T;
! 182: }
! 183:
! 184: #if RECORD
! 185: min = MIN (A, min); min = MIN (B, min);
! 186: min = MIN (C, min); min = MIN (D, min);
! 187: max = MAX (A, max); max = MAX (B, max);
! 188: max = MAX (C, max); max = MAX (D, max);
! 189: #endif
! 190:
! 191: if (B == 0)
! 192: {
! 193: mp_limb_t qh;
! 194: mp_size_t i;
! 195:
! 196: /* This is quite rare. I.e., optimize something else! */
! 197:
! 198: /* Normalize V (and shift up U the same amount). */
! 199: count_leading_zeros (cnt, vp[vsize - 1]);
! 200: if (cnt != 0)
! 201: {
! 202: mp_limb_t cy;
! 203: mpn_lshift (vp, vp, vsize, cnt);
! 204: cy = mpn_lshift (up, up, size, cnt);
! 205: up[size] = cy;
! 206: size += cy != 0;
! 207: }
! 208:
! 209: qh = mpn_divmod (up + vsize, up, size, vp, vsize);
! 210: #if EXTEND
! 211: MPN_COPY (tp, s0p, ssize);
! 212: for (i = 0; i < size - vsize; i++)
! 213: {
! 214: mp_limb_t cy;
! 215: cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
! 216: if (cy != 0)
! 217: tp[ssize++] = cy;
! 218: }
! 219: if (qh != 0)
! 220: {
! 221: mp_limb_t cy;
! 222: abort ();
! 223: /* XXX since qh == 1, mpn_addmul_1 is overkill */
! 224: cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh);
! 225: if (cy != 0)
! 226: tp[ssize++] = cy;
! 227: }
! 228: #if 0
! 229: MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */
! 230: MPN_COPY (s1p, tp, ssize);
! 231: #else
! 232: {
! 233: mp_ptr xp;
! 234: xp = s0p; s0p = s1p; s1p = xp;
! 235: xp = s1p; s1p = tp; tp = xp;
! 236: }
! 237: #endif
! 238: #endif
! 239: size = vsize;
! 240: if (cnt != 0)
! 241: {
! 242: mpn_rshift (up, up, size, cnt);
! 243: mpn_rshift (vp, vp, size, cnt);
! 244: }
! 245:
! 246: {
! 247: mp_ptr xp;
! 248: xp = up; up = vp; vp = xp;
! 249: }
! 250: MPN_NORMALIZE (up, size);
! 251: }
! 252: else
! 253: {
! 254: /* T = U*A + V*B
! 255: W = U*C + V*D
! 256: U = T
! 257: V = W */
! 258:
! 259: if (SGN(A) == SGN(B)) /* should be different sign */
! 260: abort ();
! 261: if (SGN(C) == SGN(D)) /* should be different sign */
! 262: abort ();
! 263: #if STAT
! 264: { mp_limb_t x;
! 265: x = ABS (A) | ABS (B) | ABS (C) | ABS (D);
! 266: count_leading_zeros (cnt, x);
! 267: arr[BITS_PER_MP_LIMB - cnt]++; }
! 268: #endif
! 269: if (A == 0)
! 270: {
! 271: if (B != 1) abort ();
! 272: MPN_COPY (tp, vp, size);
! 273: }
! 274: else
! 275: {
! 276: if (A < 0)
! 277: {
! 278: mpn_mul_1 (tp, vp, size, B);
! 279: mpn_submul_1 (tp, up, size, -A);
! 280: }
! 281: else
! 282: {
! 283: mpn_mul_1 (tp, up, size, A);
! 284: mpn_submul_1 (tp, vp, size, -B);
! 285: }
! 286: }
! 287: if (C < 0)
! 288: {
! 289: mpn_mul_1 (wp, vp, size, D);
! 290: mpn_submul_1 (wp, up, size, -C);
! 291: }
! 292: else
! 293: {
! 294: mpn_mul_1 (wp, up, size, C);
! 295: mpn_submul_1 (wp, vp, size, -D);
! 296: }
! 297:
! 298: {
! 299: mp_ptr xp;
! 300: xp = tp; tp = up; up = xp;
! 301: xp = wp; wp = vp; vp = xp;
! 302: }
! 303:
! 304: #if EXTEND
! 305: { mp_limb_t cy;
! 306: MPN_ZERO (tp, orig_size);
! 307: if (A == 0)
! 308: {
! 309: if (B != 1) abort ();
! 310: MPN_COPY (tp, s1p, ssize);
! 311: }
! 312: else
! 313: {
! 314: if (A < 0)
! 315: {
! 316: cy = mpn_mul_1 (tp, s1p, ssize, B);
! 317: cy += mpn_addmul_1 (tp, s0p, ssize, -A);
! 318: }
! 319: else
! 320: {
! 321: cy = mpn_mul_1 (tp, s0p, ssize, A);
! 322: cy += mpn_addmul_1 (tp, s1p, ssize, -B);
! 323: }
! 324: if (cy != 0)
! 325: tp[ssize++] = cy;
! 326: }
! 327: MPN_ZERO (wp, orig_size);
! 328: if (C < 0)
! 329: {
! 330: cy = mpn_mul_1 (wp, s1p, ssize, D);
! 331: cy += mpn_addmul_1 (wp, s0p, ssize, -C);
! 332: }
! 333: else
! 334: {
! 335: cy = mpn_mul_1 (wp, s0p, ssize, C);
! 336: cy += mpn_addmul_1 (wp, s1p, ssize, -D);
! 337: }
! 338: if (cy != 0)
! 339: wp[ssize++] = cy;
! 340: }
! 341: {
! 342: mp_ptr xp;
! 343: xp = tp; tp = s0p; s0p = xp;
! 344: xp = wp; wp = s1p; s1p = xp;
! 345: }
! 346: #endif
! 347: #if 0 /* Is it a win to remove multiple zeros here? */
! 348: MPN_NORMALIZE (up, size);
! 349: #else
! 350: if (up[size - 1] == 0)
! 351: size--;
! 352: #endif
! 353: }
! 354: }
! 355:
! 356: #if RECORD
! 357: printf ("min: %ld\n", min);
! 358: printf ("max: %ld\n", max);
! 359: #endif
! 360:
! 361: if (vsize == 0)
! 362: {
! 363: if (gp != up)
! 364: MPN_COPY (gp, up, size);
! 365: #if EXTEND
! 366: if (orig_s0p != s0p)
! 367: MPN_COPY (orig_s0p, s0p, ssize);
! 368: #endif
! 369: TMP_FREE (mark);
! 370: return size;
! 371: }
! 372: else
! 373: {
! 374: mp_limb_t vl, ul, t;
! 375: #if EXTEND
! 376: mp_limb_t cy;
! 377: mp_size_t i;
! 378: #endif
! 379: vl = vp[0];
! 380: #if EXTEND
! 381: t = mpn_divmod_1 (wp, up, size, vl);
! 382: MPN_COPY (tp, s0p, ssize);
! 383: for (i = 0; i < size; i++)
! 384: {
! 385: cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
! 386: if (cy != 0)
! 387: tp[ssize++] = cy;
! 388: }
! 389: #if 0
! 390: MPN_COPY (s0p, s1p, ssize);
! 391: MPN_COPY (s1p, tp, ssize);
! 392: #else
! 393: {
! 394: mp_ptr xp;
! 395: xp = s0p; s0p = s1p; s1p = xp;
! 396: xp = s1p; s1p = tp; tp = xp;
! 397: }
! 398: #endif
! 399: #else
! 400: t = mpn_mod_1 (up, size, vl);
! 401: #endif
! 402: ul = vl;
! 403: vl = t;
! 404: while (vl != 0)
! 405: {
! 406: mp_limb_t t;
! 407: #if EXTEND
! 408: mp_limb_t q, cy;
! 409: q = ul / vl;
! 410: t = ul - q*vl;
! 411:
! 412: MPN_COPY (tp, s0p, ssize);
! 413: cy = mpn_addmul_1 (tp, s1p, ssize, q);
! 414: if (cy != 0)
! 415: tp[ssize++] = cy;
! 416: #if 0
! 417: MPN_COPY (s0p, s1p, ssize);
! 418: MPN_COPY (s1p, tp, ssize);
! 419: #else
! 420: {
! 421: mp_ptr xp;
! 422: xp = s0p; s0p = s1p; s1p = xp;
! 423: xp = s1p; s1p = tp; tp = xp;
! 424: }
! 425: #endif
! 426:
! 427: #else
! 428: t = ul % vl;
! 429: #endif
! 430: ul = vl;
! 431: vl = t;
! 432: }
! 433: gp[0] = ul;
! 434: #if EXTEND
! 435: if (orig_s0p != s0p)
! 436: MPN_COPY (orig_s0p, s0p, ssize);
! 437: #endif
! 438: TMP_FREE (mark);
! 439: return 1;
! 440: }
! 441: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>