Annotation of OpenXM_contrib/gmp/mpn/generic/gcdext.c, Revision 1.1.1.2
1.1 maekawa 1: /* mpn_gcdext -- Extended Greatest Common Divisor.
2:
1.1.1.2 ! maekawa 3: Copyright (C) 1996, 1998, 2000 Free Software Foundation, Inc.
1.1 maekawa 4:
5: This file is part of the GNU MP Library.
6:
7: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 ! maekawa 8: it under the terms of the GNU Lesser General Public License as published by
! 9: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 10: option) any later version.
11:
12: The GNU MP Library is distributed in the hope that it will be useful, but
13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 ! maekawa 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 15: License for more details.
16:
1.1.1.2 ! maekawa 17: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: MA 02111-1307, USA. */
21:
22: #include "gmp.h"
23: #include "gmp-impl.h"
24: #include "longlong.h"
25:
1.1.1.2 ! maekawa 26: #ifndef GCDEXT_THRESHOLD
! 27: #define GCDEXT_THRESHOLD 17
! 28: #endif
! 29:
1.1 maekawa 30: #ifndef EXTEND
31: #define EXTEND 1
32: #endif
33:
34: #if STAT
35: int arr[BITS_PER_MP_LIMB];
36: #endif
37:
38:
1.1.1.2 ! maekawa 39: /* mpn_gcdext (GP, SP, SSIZE, UP, USIZE, VP, VSIZE)
! 40:
! 41: Compute the extended GCD of {UP,USIZE} and {VP,VSIZE} and store the
! 42: greatest common divisor at GP (unless it is 0), and the first cofactor at
! 43: SP. Write the size of the cofactor through the pointer SSIZE. Return the
! 44: size of the value at GP. Note that SP might be a negative number; this is
! 45: denoted by storing the negative of the size through SSIZE.
! 46:
! 47: {UP,USIZE} and {VP,VSIZE} are both clobbered.
! 48:
! 49: The space allocation for all four areas needs to be USIZE+1.
! 50:
! 51: Preconditions: 1) U >= V.
! 52: 2) V > 0. */
! 53:
! 54: /* We use Lehmer's algorithm. The idea is to extract the most significant
! 55: bits of the operands, and compute the continued fraction for them. We then
! 56: apply the gathered cofactors to the full operands.
! 57:
! 58: Idea 1: After we have performed a full division, don't shift operands back,
1.1 maekawa 59: but instead account for the extra factors-of-2 thus introduced.
60: Idea 2: Simple generalization to use divide-and-conquer would give us an
61: algorithm that runs faster than O(n^2).
62: Idea 3: The input numbers need less space as the computation progresses,
1.1.1.2 ! maekawa 63: while the s0 and s1 variables need more space. To save memory, we
1.1 maekawa 64: could make them share space, and have the latter variables grow
1.1.1.2 ! maekawa 65: into the former.
! 66: Idea 4: We should not do double-limb arithmetic from the start. Instead,
! 67: do things in single-limb arithmetic until the quotients differ,
! 68: and then switch to double-limb arithmetic. */
! 69:
1.1 maekawa 70:
1.1.1.2 ! maekawa 71: /* Division optimized for small quotients. If the quotient is more than one limb,
! 72: store 1 in *qh and return 0. */
! 73: static mp_limb_t
! 74: #if __STDC__
! 75: div2 (mp_limb_t *qh, mp_limb_t n1, mp_limb_t n0, mp_limb_t d1, mp_limb_t d0)
! 76: #else
! 77: div2 (qh, n1, n0, d1, d0)
! 78: mp_limb_t *qh;
! 79: mp_limb_t n1;
! 80: mp_limb_t n0;
! 81: mp_limb_t d1;
! 82: mp_limb_t d0;
! 83: #endif
! 84: {
! 85: if (d1 == 0)
! 86: {
! 87: *qh = 1;
! 88: return 0;
! 89: }
! 90:
! 91: if ((mp_limb_signed_t) n1 < 0)
! 92: {
! 93: mp_limb_t q;
! 94: int cnt;
! 95: for (cnt = 1; (mp_limb_signed_t) d1 >= 0; cnt++)
! 96: {
! 97: d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1));
! 98: d0 = d0 << 1;
! 99: }
! 100:
! 101: q = 0;
! 102: while (cnt)
! 103: {
! 104: q <<= 1;
! 105: if (n1 > d1 || (n1 == d1 && n0 >= d0))
! 106: {
! 107: sub_ddmmss (n1, n0, n1, n0, d1, d0);
! 108: q |= 1;
! 109: }
! 110: d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1);
! 111: d1 = d1 >> 1;
! 112: cnt--;
! 113: }
! 114:
! 115: *qh = 0;
! 116: return q;
! 117: }
! 118: else
! 119: {
! 120: mp_limb_t q;
! 121: int cnt;
! 122: for (cnt = 0; n1 > d1 || (n1 == d1 && n0 >= d0); cnt++)
! 123: {
! 124: d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1));
! 125: d0 = d0 << 1;
! 126: }
! 127:
! 128: q = 0;
! 129: while (cnt)
! 130: {
! 131: d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1);
! 132: d1 = d1 >> 1;
! 133: q <<= 1;
! 134: if (n1 > d1 || (n1 == d1 && n0 >= d0))
! 135: {
! 136: sub_ddmmss (n1, n0, n1, n0, d1, d0);
! 137: q |= 1;
! 138: }
! 139: cnt--;
! 140: }
! 141:
! 142: *qh = 0;
! 143: return q;
! 144: }
! 145: }
1.1 maekawa 146:
147: mp_size_t
148: #if EXTEND
149: #if __STDC__
1.1.1.2 ! maekawa 150: mpn_gcdext (mp_ptr gp, mp_ptr s0p, mp_size_t *s0size,
1.1 maekawa 151: mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
152: #else
1.1.1.2 ! maekawa 153: mpn_gcdext (gp, s0p, s0size, up, size, vp, vsize)
1.1 maekawa 154: mp_ptr gp;
155: mp_ptr s0p;
1.1.1.2 ! maekawa 156: mp_size_t *s0size;
1.1 maekawa 157: mp_ptr up;
158: mp_size_t size;
159: mp_ptr vp;
160: mp_size_t vsize;
161: #endif
162: #else
163: #if __STDC__
164: mpn_gcd (mp_ptr gp,
165: mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
166: #else
167: mpn_gcd (gp, up, size, vp, vsize)
168: mp_ptr gp;
169: mp_ptr up;
170: mp_size_t size;
171: mp_ptr vp;
172: mp_size_t vsize;
173: #endif
174: #endif
175: {
1.1.1.2 ! maekawa 176: mp_limb_t A, B, C, D;
1.1 maekawa 177: int cnt;
178: mp_ptr tp, wp;
179: #if RECORD
1.1.1.2 ! maekawa 180: mp_limb_t max = 0;
1.1 maekawa 181: #endif
182: #if EXTEND
183: mp_ptr s1p;
184: mp_ptr orig_s0p = s0p;
1.1.1.2 ! maekawa 185: mp_size_t ssize;
! 186: int sign = 1;
! 187: #endif
! 188: int use_double_flag;
1.1 maekawa 189: TMP_DECL (mark);
190:
191: TMP_MARK (mark);
192:
1.1.1.2 ! maekawa 193: use_double_flag = (size >= GCDEXT_THRESHOLD);
! 194:
1.1 maekawa 195: tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
196: wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
1.1.1.2 ! maekawa 197: #if EXTEND
! 198: s1p = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
1.1 maekawa 199:
200: MPN_ZERO (s0p, size);
201: MPN_ZERO (s1p, size);
202:
203: s0p[0] = 1;
204: s1p[0] = 0;
205: ssize = 1;
206: #endif
207:
208: if (size > vsize)
209: {
210: /* Normalize V (and shift up U the same amount). */
211: count_leading_zeros (cnt, vp[vsize - 1]);
212: if (cnt != 0)
213: {
214: mp_limb_t cy;
215: mpn_lshift (vp, vp, vsize, cnt);
216: cy = mpn_lshift (up, up, size, cnt);
217: up[size] = cy;
218: size += cy != 0;
219: }
220:
221: mpn_divmod (up + vsize, up, size, vp, vsize);
222: #if EXTEND
223: /* This is really what it boils down to in this case... */
224: s0p[0] = 0;
225: s1p[0] = 1;
1.1.1.2 ! maekawa 226: sign = -sign;
1.1 maekawa 227: #endif
228: size = vsize;
229: if (cnt != 0)
230: {
231: mpn_rshift (up, up, size, cnt);
232: mpn_rshift (vp, vp, size, cnt);
233: }
1.1.1.2 ! maekawa 234: MP_PTR_SWAP (up, vp);
1.1 maekawa 235: }
236:
237: for (;;)
238: {
1.1.1.2 ! maekawa 239: mp_limb_t asign;
1.1 maekawa 240: /* Figure out exact size of V. */
241: vsize = size;
242: MPN_NORMALIZE (vp, vsize);
243: if (vsize <= 1)
244: break;
245:
1.1.1.2 ! maekawa 246: if (use_double_flag)
1.1 maekawa 247: {
1.1.1.2 ! maekawa 248: mp_limb_t uh, vh, ul, vl;
! 249: /* Let UH,UL be the most significant limbs of U, and let VH,VL be
! 250: the corresponding bits from V. */
! 251: uh = up[size - 1];
! 252: vh = vp[size - 1];
! 253: ul = up[size - 2];
! 254: vl = vp[size - 2];
! 255: count_leading_zeros (cnt, uh);
! 256: if (cnt != 0)
! 257: {
! 258: uh = (uh << cnt) | (ul >> (BITS_PER_MP_LIMB - cnt));
! 259: vh = (vh << cnt) | (vl >> (BITS_PER_MP_LIMB - cnt));
! 260: vl <<= cnt;
! 261: ul <<= cnt;
! 262: if (size >= 3)
! 263: {
! 264: ul |= (up[size - 3] >> (BITS_PER_MP_LIMB - cnt));
! 265: vl |= (vp[size - 3] >> (BITS_PER_MP_LIMB - cnt));
! 266: }
! 267: }
! 268:
! 269: A = 1;
! 270: B = 0;
! 271: C = 0;
! 272: D = 1;
! 273:
! 274: asign = 0;
! 275: for (;;)
! 276: {
! 277: mp_limb_t T;
! 278: mp_limb_t qh, q1, q2;
! 279: mp_limb_t nh, nl, dh, dl;
! 280: mp_limb_t t1, t0;
! 281: mp_limb_t Th, Tl;
! 282:
! 283: sub_ddmmss (dh, dl, vh, vl, 0, C);
! 284: if ((dl | dh) == 0)
! 285: break;
! 286: add_ssaaaa (nh, nl, uh, ul, 0, A);
! 287: q1 = div2 (&qh, nh, nl, dh, dl);
! 288: if (qh != 0)
! 289: break; /* could handle this */
! 290:
! 291: add_ssaaaa (dh, dl, vh, vl, 0, D);
! 292: if ((dl | dh) == 0)
! 293: break;
! 294: sub_ddmmss (nh, nl, uh, ul, 0, B);
! 295: q2 = div2 (&qh, nh, nl, dh, dl);
! 296: if (qh != 0)
! 297: break; /* could handle this */
! 298:
! 299: if (q1 != q2)
! 300: break;
! 301:
! 302: asign = ~asign;
! 303:
! 304: T = A + q1 * C;
! 305: A = C;
! 306: C = T;
! 307: T = B + q1 * D;
! 308: B = D;
! 309: D = T;
! 310: umul_ppmm (t1, t0, q1, vl);
! 311: t1 += q1 * vh;
! 312: sub_ddmmss (Th, Tl, uh, ul, t1, t0);
! 313: uh = vh, ul = vl;
! 314: vh = Th, vl = Tl;
! 315:
! 316: add_ssaaaa (dh, dl, vh, vl, 0, C);
! 317: sub_ddmmss (nh, nl, uh, ul, 0, A);
! 318: q1 = div2 (&qh, nh, nl, dh, dl);
! 319: if (qh != 0)
! 320: break; /* could handle this */
! 321:
! 322: sub_ddmmss (dh, dl, vh, vl, 0, D);
! 323: if ((dl | dh) == 0)
! 324: break;
! 325: add_ssaaaa (nh, nl, uh, ul, 0, B);
! 326: q2 = div2 (&qh, nh, nl, dh, dl);
! 327: if (qh != 0)
! 328: break; /* could handle this */
! 329:
! 330: if (q1 != q2)
! 331: break;
! 332:
! 333: asign = ~asign;
! 334:
! 335: T = A + q1 * C;
! 336: A = C;
! 337: C = T;
! 338: T = B + q1 * D;
! 339: B = D;
! 340: D = T;
! 341: umul_ppmm (t1, t0, q1, vl);
! 342: t1 += q1 * vh;
! 343: sub_ddmmss (Th, Tl, uh, ul, t1, t0);
! 344: uh = vh, ul = vl;
! 345: vh = Th, vl = Tl;
! 346: }
! 347: #if EXTEND
! 348: if (asign)
! 349: sign = -sign;
! 350: #endif
1.1 maekawa 351: }
1.1.1.2 ! maekawa 352: else /* Same, but using single-limb calculations. */
! 353: {
! 354: mp_limb_t uh, vh;
! 355: /* Make UH be the most significant limb of U, and make VH be
! 356: corresponding bits from V. */
! 357: uh = up[size - 1];
! 358: vh = vp[size - 1];
! 359: count_leading_zeros (cnt, uh);
! 360: if (cnt != 0)
! 361: {
! 362: uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
! 363: vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
! 364: }
1.1 maekawa 365:
1.1.1.2 ! maekawa 366: A = 1;
! 367: B = 0;
! 368: C = 0;
! 369: D = 1;
! 370:
! 371: asign = 0;
! 372: for (;;)
! 373: {
! 374: mp_limb_t q, T;
! 375: if (vh - C == 0 || vh + D == 0)
! 376: break;
! 377:
! 378: q = (uh + A) / (vh - C);
! 379: if (q != (uh - B) / (vh + D))
! 380: break;
! 381:
! 382: asign = ~asign;
! 383:
! 384: T = A + q * C;
! 385: A = C;
! 386: C = T;
! 387: T = B + q * D;
! 388: B = D;
! 389: D = T;
! 390: T = uh - q * vh;
! 391: uh = vh;
! 392: vh = T;
! 393:
! 394: if (vh - D == 0)
! 395: break;
! 396:
! 397: q = (uh - A) / (vh + C);
! 398: if (q != (uh + B) / (vh - D))
! 399: break;
! 400:
! 401: asign = ~asign;
! 402:
! 403: T = A + q * C;
! 404: A = C;
! 405: C = T;
! 406: T = B + q * D;
! 407: B = D;
! 408: D = T;
! 409: T = uh - q * vh;
! 410: uh = vh;
! 411: vh = T;
! 412: }
! 413: #if EXTEND
! 414: if (asign)
! 415: sign = -sign;
! 416: #endif
1.1 maekawa 417: }
418:
419: #if RECORD
420: max = MAX (A, max); max = MAX (B, max);
421: max = MAX (C, max); max = MAX (D, max);
422: #endif
423:
424: if (B == 0)
425: {
426: mp_limb_t qh;
427: mp_size_t i;
428: /* This is quite rare. I.e., optimize something else! */
429:
430: /* Normalize V (and shift up U the same amount). */
431: count_leading_zeros (cnt, vp[vsize - 1]);
432: if (cnt != 0)
433: {
434: mp_limb_t cy;
435: mpn_lshift (vp, vp, vsize, cnt);
436: cy = mpn_lshift (up, up, size, cnt);
437: up[size] = cy;
438: size += cy != 0;
439: }
440:
441: qh = mpn_divmod (up + vsize, up, size, vp, vsize);
442: #if EXTEND
443: MPN_COPY (tp, s0p, ssize);
444: {
1.1.1.2 ! maekawa 445: mp_size_t qsize;
! 446:
! 447: qsize = size - vsize; /* size of stored quotient from division */
! 448: if (ssize < qsize)
! 449: {
! 450: MPN_ZERO (tp + ssize, qsize - ssize);
! 451: MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
! 452: for (i = 0; i < ssize; i++)
! 453: {
! 454: mp_limb_t cy;
! 455: cy = mpn_addmul_1 (tp + i, up + vsize, qsize, s1p[i]);
! 456: tp[qsize + i] = cy;
! 457: }
! 458: if (qh != 0)
! 459: {
! 460: mp_limb_t cy;
! 461: cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize);
! 462: if (cy != 0)
! 463: abort ();
! 464: }
! 465: }
! 466: else
! 467: {
! 468: MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
! 469: for (i = 0; i < qsize; i++)
! 470: {
! 471: mp_limb_t cy;
! 472: cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
! 473: tp[ssize + i] = cy;
! 474: }
! 475: if (qh != 0)
! 476: {
! 477: mp_limb_t cy;
! 478: cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize);
! 479: if (cy != 0)
! 480: {
! 481: tp[qsize + ssize] = cy;
! 482: s1p[qsize + ssize] = 0;
! 483: ssize++;
! 484: }
! 485: }
! 486: }
! 487: ssize += qsize;
! 488: ssize -= tp[ssize - 1] == 0;
1.1 maekawa 489: }
1.1.1.2 ! maekawa 490:
! 491: sign = -sign;
! 492: MP_PTR_SWAP (s0p, s1p);
! 493: MP_PTR_SWAP (s1p, tp);
1.1 maekawa 494: #endif
495: size = vsize;
496: if (cnt != 0)
497: {
498: mpn_rshift (up, up, size, cnt);
499: mpn_rshift (vp, vp, size, cnt);
500: }
1.1.1.2 ! maekawa 501: MP_PTR_SWAP (up, vp);
1.1 maekawa 502: }
503: else
504: {
1.1.1.2 ! maekawa 505: #if EXTEND
! 506: mp_size_t tsize, wsize;
! 507: #endif
1.1 maekawa 508: /* T = U*A + V*B
509: W = U*C + V*D
510: U = T
511: V = W */
512:
513: #if STAT
1.1.1.2 ! maekawa 514: { mp_limb_t x; x = A | B | C | D; count_leading_zeros (cnt, x);
! 515: arr[BITS_PER_MP_LIMB - cnt]++; }
1.1 maekawa 516: #endif
517: if (A == 0)
518: {
1.1.1.2 ! maekawa 519: /* B == 1 and C == 1 (D is arbitrary) */
! 520: mp_limb_t cy;
1.1 maekawa 521: MPN_COPY (tp, vp, size);
1.1.1.2 ! maekawa 522: MPN_COPY (wp, up, size);
! 523: mpn_submul_1 (wp, vp, size, D);
! 524: MP_PTR_SWAP (tp, up);
! 525: MP_PTR_SWAP (wp, vp);
1.1 maekawa 526: #if EXTEND
527: MPN_COPY (tp, s1p, ssize);
1.1.1.2 ! maekawa 528: tsize = ssize;
! 529: tp[ssize] = 0; /* must zero since wp might spill below */
! 530: MPN_COPY (wp, s0p, ssize);
! 531: cy = mpn_addmul_1 (wp, s1p, ssize, D);
! 532: wp[ssize] = cy;
! 533: wsize = ssize + (cy != 0);
! 534: MP_PTR_SWAP (tp, s0p);
! 535: MP_PTR_SWAP (wp, s1p);
! 536: ssize = MAX (wsize, tsize);
! 537: #endif
1.1 maekawa 538: }
539: else
540: {
1.1.1.2 ! maekawa 541: if (asign)
1.1 maekawa 542: {
1.1.1.2 ! maekawa 543: mp_limb_t cy;
! 544: mpn_mul_1 (tp, vp, size, B);
! 545: mpn_submul_1 (tp, up, size, A);
! 546: mpn_mul_1 (wp, up, size, C);
! 547: mpn_submul_1 (wp, vp, size, D);
! 548: MP_PTR_SWAP (tp, up);
! 549: MP_PTR_SWAP (wp, vp);
! 550: #if EXTEND
1.1 maekawa 551: cy = mpn_mul_1 (tp, s1p, ssize, B);
1.1.1.2 ! maekawa 552: cy += mpn_addmul_1 (tp, s0p, ssize, A);
! 553: tp[ssize] = cy;
! 554: tsize = ssize + (cy != 0);
! 555: cy = mpn_mul_1 (wp, s0p, ssize, C);
! 556: cy += mpn_addmul_1 (wp, s1p, ssize, D);
! 557: wp[ssize] = cy;
! 558: wsize = ssize + (cy != 0);
! 559: MP_PTR_SWAP (tp, s0p);
! 560: MP_PTR_SWAP (wp, s1p);
! 561: ssize = MAX (wsize, tsize);
! 562: #endif
1.1 maekawa 563: }
564: else
565: {
1.1.1.2 ! maekawa 566: mp_limb_t cy;
! 567: mpn_mul_1 (tp, up, size, A);
! 568: mpn_submul_1 (tp, vp, size, B);
! 569: mpn_mul_1 (wp, vp, size, D);
! 570: mpn_submul_1 (wp, up, size, C);
! 571: MP_PTR_SWAP (tp, up);
! 572: MP_PTR_SWAP (wp, vp);
! 573: #if EXTEND
1.1 maekawa 574: cy = mpn_mul_1 (tp, s0p, ssize, A);
1.1.1.2 ! maekawa 575: cy += mpn_addmul_1 (tp, s1p, ssize, B);
! 576: tp[ssize] = cy;
! 577: tsize = ssize + (cy != 0);
! 578: cy = mpn_mul_1 (wp, s1p, ssize, D);
! 579: cy += mpn_addmul_1 (wp, s0p, ssize, C);
! 580: wp[ssize] = cy;
! 581: wsize = ssize + (cy != 0);
! 582: MP_PTR_SWAP (tp, s0p);
! 583: MP_PTR_SWAP (wp, s1p);
! 584: ssize = MAX (wsize, tsize);
! 585: #endif
1.1 maekawa 586: }
587: }
1.1.1.2 ! maekawa 588:
! 589: size -= up[size - 1] == 0;
1.1 maekawa 590: }
591: }
592:
593: #if RECORD
1.1.1.2 ! maekawa 594: printf ("max: %lx\n", max);
! 595: #endif
! 596:
! 597: #if STAT
! 598: {int i; for (i = 0; i < BITS_PER_MP_LIMB; i++) printf ("%d:%d\n", i, arr[i]);}
1.1 maekawa 599: #endif
600:
601: if (vsize == 0)
602: {
1.1.1.2 ! maekawa 603: if (gp != up && gp != 0)
1.1 maekawa 604: MPN_COPY (gp, up, size);
605: #if EXTEND
1.1.1.2 ! maekawa 606: MPN_NORMALIZE (s0p, ssize);
1.1 maekawa 607: if (orig_s0p != s0p)
608: MPN_COPY (orig_s0p, s0p, ssize);
1.1.1.2 ! maekawa 609: *s0size = sign >= 0 ? ssize : -ssize;
1.1 maekawa 610: #endif
611: TMP_FREE (mark);
612: return size;
613: }
614: else
615: {
616: mp_limb_t vl, ul, t;
617: #if EXTEND
1.1.1.2 ! maekawa 618: mp_size_t qsize, i;
1.1 maekawa 619: #endif
620: vl = vp[0];
621: #if EXTEND
622: t = mpn_divmod_1 (wp, up, size, vl);
1.1.1.2 ! maekawa 623:
1.1 maekawa 624: MPN_COPY (tp, s0p, ssize);
1.1.1.2 ! maekawa 625:
! 626: qsize = size - (wp[size - 1] == 0); /* size of quotient from division */
! 627: if (ssize < qsize)
1.1 maekawa 628: {
1.1.1.2 ! maekawa 629: MPN_ZERO (tp + ssize, qsize - ssize);
! 630: MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
! 631: for (i = 0; i < ssize; i++)
! 632: {
! 633: mp_limb_t cy;
! 634: cy = mpn_addmul_1 (tp + i, wp, qsize, s1p[i]);
! 635: tp[qsize + i] = cy;
! 636: }
! 637: }
! 638: else
! 639: {
! 640: MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */
! 641: for (i = 0; i < qsize; i++)
! 642: {
! 643: mp_limb_t cy;
! 644: cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
! 645: tp[ssize + i] = cy;
! 646: }
! 647: }
! 648: ssize += qsize;
! 649: ssize -= tp[ssize - 1] == 0;
! 650:
! 651: sign = -sign;
! 652: MP_PTR_SWAP (s0p, s1p);
! 653: MP_PTR_SWAP (s1p, tp);
1.1 maekawa 654: #else
655: t = mpn_mod_1 (up, size, vl);
656: #endif
657: ul = vl;
658: vl = t;
659: while (vl != 0)
660: {
661: mp_limb_t t;
662: #if EXTEND
1.1.1.2 ! maekawa 663: mp_limb_t q;
1.1 maekawa 664: q = ul / vl;
1.1.1.2 ! maekawa 665: t = ul - q * vl;
1.1 maekawa 666:
667: MPN_COPY (tp, s0p, ssize);
1.1.1.2 ! maekawa 668:
! 669: MPN_ZERO (s1p + ssize, 1); /* zero s1 too */
! 670:
1.1 maekawa 671: {
1.1.1.2 ! maekawa 672: mp_limb_t cy;
! 673: cy = mpn_addmul_1 (tp, s1p, ssize, q);
! 674: tp[ssize] = cy;
1.1 maekawa 675: }
676:
1.1.1.2 ! maekawa 677: ssize += 1;
! 678: ssize -= tp[ssize - 1] == 0;
! 679:
! 680: sign = -sign;
! 681: MP_PTR_SWAP (s0p, s1p);
! 682: MP_PTR_SWAP (s1p, tp);
1.1 maekawa 683: #else
684: t = ul % vl;
685: #endif
686: ul = vl;
687: vl = t;
688: }
1.1.1.2 ! maekawa 689: if (gp != 0)
! 690: gp[0] = ul;
1.1 maekawa 691: #if EXTEND
1.1.1.2 ! maekawa 692: MPN_NORMALIZE (s0p, ssize);
1.1 maekawa 693: if (orig_s0p != s0p)
694: MPN_COPY (orig_s0p, s0p, ssize);
1.1.1.2 ! maekawa 695: *s0size = sign >= 0 ? ssize : -ssize;
1.1 maekawa 696: #endif
697: TMP_FREE (mark);
698: return 1;
699: }
700: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>