Annotation of OpenXM_contrib/pari-2.2/src/kernel/none/mp.c, Revision 1.1
1.1 ! noro 1: /* $Id: mp.c,v 1.47 2001/09/28 17:16:45 xavier Exp $
! 2:
! 3: Copyright (C) 2000 The PARI group.
! 4:
! 5: This file is part of the PARI/GP package.
! 6:
! 7: PARI/GP is free software; you can redistribute it and/or modify it under the
! 8: terms of the GNU General Public License as published by the Free Software
! 9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
! 10: ANY WARRANTY WHATSOEVER.
! 11:
! 12: Check the License for details. You should have received a copy of it, along
! 13: with the package; see the file 'COPYING'. If not, write to the Free Software
! 14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
! 15:
! 16: /***********************************************************************/
! 17: /** **/
! 18: /** MULTIPRECISION KERNEL **/
! 19: /** **/
! 20: /***********************************************************************/
! 21: /* most of the routines in this file are commented out in 68k */
! 22: /* version (#ifdef __M68K__) since they are defined in mp.s */
! 23: #include "pari.h"
! 24:
! 25: /* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't
! 26: * GENs but pairs (long *a, long na) representing a list of digits (in basis
! 27: * BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no
! 28: * need to reintroduce codewords ]
! 29: * Use speci(a,na) to visualize the coresponding GEN.
! 30: */
! 31:
! 32: /* z2 := z1[imin..imax].f shifted left sh bits (feeding f from the right) */
! 33: #define shift_left2(z2,z1,imin,imax,f, sh,m) {\
! 34: register ulong _l,_k = ((ulong)f)>>m;\
! 35: GEN t1 = z1 + imax, t2 = z2 + imax, T = z1 + imin;\
! 36: while (t1 > T) {\
! 37: _l = *t1--;\
! 38: *t2-- = (_l<<(ulong)sh) | _k;\
! 39: _k = _l>>(ulong)m;\
! 40: }\
! 41: *t2 = (*t1<<(ulong)sh) | _k;\
! 42: }
! 43: #define shift_left(z2,z1,imin,imax,f, sh) {\
! 44: register const ulong _m = BITS_IN_LONG - sh;\
! 45: shift_left2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
! 46: }
! 47:
! 48: /* z2 := f.z1[imin..imax-1] shifted right sh bits (feeding f from the left) */
! 49: #define shift_right2(z2,z1,imin,imax,f, sh,m) {\
! 50: register GEN t1 = z1 + imin, t2 = z2 + imin, T = z1 + imax;\
! 51: register ulong _k,_l = *t1++;\
! 52: *t2++ = (_l>>(ulong)sh) | ((ulong)f<<(ulong)m);\
! 53: while (t1 < T) {\
! 54: _k = _l<<(ulong)m; _l = *t1++;\
! 55: *t2++ = (_l>>(ulong)sh) | _k;\
! 56: }\
! 57: }
! 58: #define shift_right(z2,z1,imin,imax,f, sh) {\
! 59: register const ulong _m = BITS_IN_LONG - sh;\
! 60: shift_right2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
! 61: }
! 62:
! 63: #define MASK(x) (((ulong)(x)) & (LGEFINTBITS | SIGNBITS))
! 64: int
! 65: egalii(GEN x, GEN y)
! 66: {
! 67: long i;
! 68: if (MASK(x[1]) != MASK(y[1])) return 0;
! 69: i = lgefint(x)-1; while (i>1 && x[i]==y[i]) i--;
! 70: return i==1;
! 71: }
! 72: #undef MASK
! 73:
! 74: #ifdef INLINE
! 75: INLINE
! 76: #endif
! 77: GEN
! 78: addsispec(long s, GEN x, long nx)
! 79: {
! 80: GEN xd, zd = (GEN)avma;
! 81: long lz;
! 82: LOCAL_OVERFLOW;
! 83:
! 84: lz = nx+3; (void)new_chunk(lz);
! 85: xd = x + nx;
! 86: *--zd = addll(*--xd, s);
! 87: if (overflow)
! 88: for(;;)
! 89: {
! 90: if (xd == x) { *--zd = 1; break; } /* enlarge z */
! 91: *--zd = ((ulong)*--xd) + 1;
! 92: if (*zd) { lz--; break; }
! 93: }
! 94: else lz--;
! 95: while (xd > x) *--zd = *--xd;
! 96: *--zd = evalsigne(1) | evallgefint(lz);
! 97: *--zd = evaltyp(t_INT) | evallg(lz);
! 98: avma=(long)zd; return zd;
! 99: }
! 100:
! 101: #define swapspec(x,y, nx,ny) {long _a=nx;GEN _z=x; nx=ny; ny=_a; x=y; y=_z;}
! 102:
! 103: #ifdef INLINE
! 104: INLINE
! 105: #endif
! 106: GEN
! 107: addiispec(GEN x, GEN y, long nx, long ny)
! 108: {
! 109: GEN xd,yd,zd;
! 110: long lz;
! 111: LOCAL_OVERFLOW;
! 112:
! 113: if (nx < ny) swapspec(x,y, nx,ny);
! 114: if (ny == 1) return addsispec(*y,x,nx);
! 115: zd = (GEN)avma;
! 116: lz = nx+3; (void)new_chunk(lz);
! 117: xd = x + nx;
! 118: yd = y + ny;
! 119: *--zd = addll(*--xd, *--yd);
! 120: while (yd > y) *--zd = addllx(*--xd, *--yd);
! 121: if (overflow)
! 122: for(;;)
! 123: {
! 124: if (xd == x) { *--zd = 1; break; } /* enlarge z */
! 125: *--zd = ((ulong)*--xd) + 1;
! 126: if (*zd) { lz--; break; }
! 127: }
! 128: else lz--;
! 129: while (xd > x) *--zd = *--xd;
! 130: *--zd = evalsigne(1) | evallgefint(lz);
! 131: *--zd = evaltyp(t_INT) | evallg(lz);
! 132: avma=(long)zd; return zd;
! 133: }
! 134:
! 135: /* assume x >= y */
! 136: #ifdef INLINE
! 137: INLINE
! 138: #endif
! 139: GEN
! 140: subisspec(GEN x, long s, long nx)
! 141: {
! 142: GEN xd, zd = (GEN)avma;
! 143: long lz;
! 144: LOCAL_OVERFLOW;
! 145:
! 146: lz = nx+2; (void)new_chunk(lz);
! 147: xd = x + nx;
! 148: *--zd = subll(*--xd, s);
! 149: if (overflow)
! 150: for(;;)
! 151: {
! 152: *--zd = ((ulong)*--xd) - 1;
! 153: if (*xd) break;
! 154: }
! 155: if (xd == x)
! 156: while (*zd == 0) { zd++; lz--; } /* shorten z */
! 157: else
! 158: do *--zd = *--xd; while (xd > x);
! 159: *--zd = evalsigne(1) | evallgefint(lz);
! 160: *--zd = evaltyp(t_INT) | evallg(lz);
! 161: avma=(long)zd; return zd;
! 162: }
! 163:
! 164: /* assume x > y */
! 165: #ifdef INLINE
! 166: INLINE
! 167: #endif
! 168: GEN
! 169: subiispec(GEN x, GEN y, long nx, long ny)
! 170: {
! 171: GEN xd,yd,zd;
! 172: long lz;
! 173: LOCAL_OVERFLOW;
! 174:
! 175: if (ny==1) return subisspec(x,*y,nx);
! 176: zd = (GEN)avma;
! 177: lz = nx+2; (void)new_chunk(lz);
! 178: xd = x + nx;
! 179: yd = y + ny;
! 180: *--zd = subll(*--xd, *--yd);
! 181: while (yd > y) *--zd = subllx(*--xd, *--yd);
! 182: if (overflow)
! 183: for(;;)
! 184: {
! 185: *--zd = ((ulong)*--xd) - 1;
! 186: if (*xd) break;
! 187: }
! 188: if (xd == x)
! 189: while (*zd == 0) { zd++; lz--; } /* shorten z */
! 190: else
! 191: do *--zd = *--xd; while (xd > x);
! 192: *--zd = evalsigne(1) | evallgefint(lz);
! 193: *--zd = evaltyp(t_INT) | evallg(lz);
! 194: avma=(long)zd; return zd;
! 195: }
! 196:
! 197: #ifndef __M68K__
! 198:
! 199: /* prototype of positive small ints */
! 200: static long pos_s[] = {
! 201: evaltyp(t_INT) | m_evallg(3), evalsigne(1) | evallgefint(3), 0 };
! 202:
! 203: /* prototype of negative small ints */
! 204: static long neg_s[] = {
! 205: evaltyp(t_INT) | m_evallg(3), evalsigne(-1) | evallgefint(3), 0 };
! 206:
! 207: void
! 208: affir(GEN x, GEN y)
! 209: {
! 210: const long s=signe(x),ly=lg(y);
! 211: long lx,sh,i;
! 212:
! 213: if (!s)
! 214: {
! 215: y[1] = evalexpo(-bit_accuracy(ly));
! 216: y[2] = 0; return;
! 217: }
! 218:
! 219: lx=lgefint(x); sh=bfffo(x[2]);
! 220: y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
! 221: if (sh)
! 222: {
! 223: if (lx>ly)
! 224: { shift_left(y,x,2,ly-1, x[ly],sh); }
! 225: else
! 226: {
! 227: for (i=lx; i<ly; i++) y[i]=0;
! 228: shift_left(y,x,2,lx-1, 0,sh);
! 229: }
! 230: return;
! 231: }
! 232:
! 233: if (lx>=ly)
! 234: for (i=2; i<ly; i++) y[i]=x[i];
! 235: else
! 236: {
! 237: for (i=2; i<lx; i++) y[i]=x[i];
! 238: for ( ; i<ly; i++) y[i]=0;
! 239: }
! 240: }
! 241:
! 242: void
! 243: affrr(GEN x, GEN y)
! 244: {
! 245: long lx,ly,i;
! 246:
! 247: y[1] = x[1]; if (!signe(x)) { y[2]=0; return; }
! 248:
! 249: lx=lg(x); ly=lg(y);
! 250: if (lx>=ly)
! 251: for (i=2; i<ly; i++) y[i]=x[i];
! 252: else
! 253: {
! 254: for (i=2; i<lx; i++) y[i]=x[i];
! 255: for ( ; i<ly; i++) y[i]=0;
! 256: }
! 257: }
! 258:
! 259: GEN
! 260: shifti(GEN x, long n)
! 261: {
! 262: const long s=signe(x);
! 263: long lx,ly,i,m;
! 264: GEN y;
! 265:
! 266: if (!s) return gzero;
! 267: if (!n) return icopy(x);
! 268: lx = lgefint(x);
! 269: if (n > 0)
! 270: {
! 271: GEN z = (GEN)avma;
! 272: long d = n>>TWOPOTBITS_IN_LONG;
! 273:
! 274: ly = lx+d; y = new_chunk(ly);
! 275: for ( ; d; d--) *--z = 0;
! 276: m = n & (BITS_IN_LONG-1);
! 277: if (!m) for (i=2; i<lx; i++) y[i]=x[i];
! 278: else
! 279: {
! 280: register const ulong sh = BITS_IN_LONG - m;
! 281: shift_left2(y,x, 2,lx-1, 0,m,sh);
! 282: i = ((ulong)x[2]) >> sh;
! 283: if (i) { ly++; y = new_chunk(1); y[2] = i; }
! 284: }
! 285: }
! 286: else
! 287: {
! 288: n = -n;
! 289: ly = lx - (n>>TWOPOTBITS_IN_LONG);
! 290: if (ly<3) return gzero;
! 291: y = new_chunk(ly);
! 292: m = n & (BITS_IN_LONG-1);
! 293: if (!m) for (i=2; i<ly; i++) y[i]=x[i];
! 294: else
! 295: {
! 296: shift_right(y,x, 2,ly, 0,m);
! 297: if (y[2] == 0)
! 298: {
! 299: if (ly==3) { avma = (long)(y+3); return gzero; }
! 300: ly--; avma = (long)(++y);
! 301: }
! 302: }
! 303: }
! 304: y[1] = evalsigne(s)|evallgefint(ly);
! 305: y[0] = evaltyp(t_INT)|evallg(ly); return y;
! 306: }
! 307:
! 308: GEN
! 309: mptrunc(GEN x)
! 310: {
! 311: long d,e,i,s,m;
! 312: GEN y;
! 313:
! 314: if (typ(x)==t_INT) return icopy(x);
! 315: if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gzero;
! 316: d = (e>>TWOPOTBITS_IN_LONG) + 3;
! 317: m = e & (BITS_IN_LONG-1);
! 318: if (d > lg(x)) err(precer, "mptrunc (precision loss in truncation)");
! 319:
! 320: y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
! 321: if (++m == BITS_IN_LONG)
! 322: for (i=2; i<d; i++) y[i]=x[i];
! 323: else
! 324: {
! 325: register const ulong sh = BITS_IN_LONG - m;
! 326: shift_right2(y,x, 2,d,0, sh,m);
! 327: }
! 328: return y;
! 329: }
! 330:
! 331: /* integral part */
! 332: GEN
! 333: mpent(GEN x)
! 334: {
! 335: long d,e,i,lx,m;
! 336: GEN y;
! 337:
! 338: if (typ(x)==t_INT) return icopy(x);
! 339: if (signe(x) >= 0) return mptrunc(x);
! 340: if ((e=expo(x)) < 0) return stoi(-1);
! 341: d = (e>>TWOPOTBITS_IN_LONG) + 3;
! 342: m = e & (BITS_IN_LONG-1);
! 343: lx=lg(x); if (d>lx) err(precer, "mpent (precision loss in trucation)");
! 344: y = new_chunk(d);
! 345: if (++m == BITS_IN_LONG)
! 346: {
! 347: for (i=2; i<d; i++) y[i]=x[i];
! 348: i=d; while (i<lx && !x[i]) i++;
! 349: if (i==lx) goto END;
! 350: }
! 351: else
! 352: {
! 353: register const ulong sh = BITS_IN_LONG - m;
! 354: shift_right2(y,x, 2,d,0, sh,m);
! 355: if (x[d-1]<<m == 0)
! 356: {
! 357: i=d; while (i<lx && !x[i]) i++;
! 358: if (i==lx) goto END;
! 359: }
! 360: }
! 361: /* set y:=y+1 */
! 362: for (i=d-1; i>=2; i--) { y[i]++; if (y[i]) goto END; }
! 363: y=new_chunk(1); y[2]=1; d++;
! 364: END:
! 365: y[1] = evalsigne(-1) | evallgefint(d);
! 366: y[0] = evaltyp(t_INT) | evallg(d); return y;
! 367: }
! 368:
! 369: int
! 370: cmpsi(long x, GEN y)
! 371: {
! 372: ulong p;
! 373:
! 374: if (!x) return -signe(y);
! 375:
! 376: if (x>0)
! 377: {
! 378: if (signe(y)<=0) return 1;
! 379: if (lgefint(y)>3) return -1;
! 380: p=y[2]; if (p == (ulong)x) return 0;
! 381: return p < (ulong)x ? 1 : -1;
! 382: }
! 383:
! 384: if (signe(y)>=0) return -1;
! 385: if (lgefint(y)>3) return 1;
! 386: p=y[2]; if (p == (ulong)-x) return 0;
! 387: return p < (ulong)(-x) ? -1 : 1;
! 388: }
! 389:
! 390: int
! 391: cmpii(GEN x, GEN y)
! 392: {
! 393: const long sx = signe(x), sy = signe(y);
! 394: long lx,ly,i;
! 395:
! 396: if (sx<sy) return -1;
! 397: if (sx>sy) return 1;
! 398: if (!sx) return 0;
! 399:
! 400: lx=lgefint(x); ly=lgefint(y);
! 401: if (lx>ly) return sx;
! 402: if (lx<ly) return -sx;
! 403: i=2; while (i<lx && x[i]==y[i]) i++;
! 404: if (i==lx) return 0;
! 405: return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
! 406: }
! 407:
! 408: int
! 409: cmprr(GEN x, GEN y)
! 410: {
! 411: const long sx = signe(x), sy = signe(y);
! 412: long ex,ey,lx,ly,lz,i;
! 413:
! 414: if (sx<sy) return -1;
! 415: if (sx>sy) return 1;
! 416: if (!sx) return 0;
! 417:
! 418: ex=expo(x); ey=expo(y);
! 419: if (ex>ey) return sx;
! 420: if (ex<ey) return -sx;
! 421:
! 422: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
! 423: i=2; while (i<lz && x[i]==y[i]) i++;
! 424: if (i<lz) return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
! 425: if (lx>=ly)
! 426: {
! 427: while (i<lx && !x[i]) i++;
! 428: return (i==lx) ? 0 : sx;
! 429: }
! 430: while (i<ly && !y[i]) i++;
! 431: return (i==ly) ? 0 : -sx;
! 432: }
! 433:
! 434: GEN
! 435: addss(long x, long y)
! 436: {
! 437: if (!x) return stoi(y);
! 438: if (x>0) { pos_s[2] = x; return addsi(y,pos_s); }
! 439: neg_s[2] = -x; return addsi(y,neg_s);
! 440: }
! 441:
! 442: GEN
! 443: addsi(long x, GEN y)
! 444: {
! 445: long sx,sy,ly;
! 446: GEN z;
! 447:
! 448: if (!x) return icopy(y);
! 449: sy=signe(y); if (!sy) return stoi(x);
! 450: if (x<0) { sx=-1; x=-x; } else sx=1;
! 451: if (sx==sy)
! 452: {
! 453: z = addsispec(x,y+2, lgefint(y)-2);
! 454: setsigne(z,sy); return z;
! 455: }
! 456: ly=lgefint(y);
! 457: if (ly==3)
! 458: {
! 459: const long d = y[2] - x;
! 460: if (!d) return gzero;
! 461: z=cgeti(3);
! 462: if (y[2] < 0 || d > 0) {
! 463: z[1] = evalsigne(sy) | evallgefint(3);
! 464: z[2] = d;
! 465: }
! 466: else {
! 467: z[1] = evalsigne(-sy) | evallgefint(3);
! 468: z[2] =-d;
! 469: }
! 470: return z;
! 471: }
! 472: z = subisspec(y+2,x, ly-2);
! 473: setsigne(z,sy); return z;
! 474: }
! 475:
! 476: GEN
! 477: addii(GEN x, GEN y)
! 478: {
! 479: long sx = signe(x);
! 480: long sy = signe(y);
! 481: long lx,ly;
! 482: GEN z;
! 483:
! 484: if (!sx) return sy? icopy(y): gzero;
! 485: if (!sy) return icopy(x);
! 486: lx=lgefint(x); ly=lgefint(y);
! 487:
! 488: if (sx==sy)
! 489: z = addiispec(x+2,y+2,lx-2,ly-2);
! 490: else
! 491: { /* sx != sy */
! 492: long i = lx - ly;
! 493: if (i==0)
! 494: {
! 495: i = absi_cmp(x,y);
! 496: if (!i) return gzero;
! 497: }
! 498: if (i<0) { sx=sy; swapspec(x,y, lx,ly); } /* ensure |x| >= |y| */
! 499: z = subiispec(x+2,y+2,lx-2,ly-2);
! 500: }
! 501: setsigne(z,sx); return z;
! 502: }
! 503:
! 504: GEN
! 505: addsr(long x, GEN y)
! 506: {
! 507: if (!x) return rcopy(y);
! 508: if (x>0) { pos_s[2]=x; return addir(pos_s,y); }
! 509: neg_s[2] = -x; return addir(neg_s,y);
! 510: }
! 511:
! 512: GEN
! 513: addir(GEN x, GEN y)
! 514: {
! 515: long e,l,ly;
! 516: GEN z;
! 517:
! 518: if (!signe(x)) return rcopy(y);
! 519: e = expo(y)-expi(x);
! 520: if (!signe(y))
! 521: {
! 522: #if 0
! 523: if (e>0) err(adder3);
! 524: #else /* design issue: make 0.0 "absorbing" */
! 525: if (e>0) return rcopy(y);
! 526: #endif
! 527: z = cgetr(3 + ((-e)>>TWOPOTBITS_IN_LONG));
! 528: affir(x,z); return z;
! 529: }
! 530:
! 531: ly=lg(y);
! 532: if (e>0)
! 533: {
! 534: l = ly - (e>>TWOPOTBITS_IN_LONG);
! 535: if (l<3) return rcopy(y);
! 536: }
! 537: else l = ly + ((-e)>>TWOPOTBITS_IN_LONG)+1;
! 538: z=cgetr(l); affir(x,z); y=addrr(z,y);
! 539: z = y+l; ly = lg(y); while (ly--) z[ly] = y[ly];
! 540: avma=(long)z; return z;
! 541: }
! 542:
! 543: GEN
! 544: addrr(GEN x, GEN y)
! 545: {
! 546: long sx=signe(x),sy=signe(y),ex=expo(x),ey=expo(y);
! 547: long e,m,flag,i,j,f2,lx,ly,lz;
! 548: GEN z;
! 549: LOCAL_OVERFLOW;
! 550:
! 551: e = ey-ex;
! 552: if (!sy)
! 553: {
! 554: if (!sx)
! 555: {
! 556: if (e > 0) ex=ey;
! 557: z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z;
! 558: }
! 559: if (e > 0) { z=cgetr(3); z[1]=evalexpo(ey); z[2]=0; return z; }
! 560: lz = 3 + ((-e)>>TWOPOTBITS_IN_LONG);
! 561: lx = lg(x); if (lz>lx) lz=lx;
! 562: z = cgetr(lz); while(--lz) z[lz]=x[lz];
! 563: return z;
! 564: }
! 565: if (!sx)
! 566: {
! 567: if (e < 0) { z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z; }
! 568: lz = 3 + (e>>TWOPOTBITS_IN_LONG);
! 569: ly = lg(y); if (lz>ly) lz=ly;
! 570: z = cgetr(lz); while (--lz) z[lz]=y[lz];
! 571: return z;
! 572: }
! 573:
! 574: if (e < 0) { z=x; x=y; y=z; ey=ex; i=sx; sx=sy; sy=i; e=-e; }
! 575: /* now ey >= ex */
! 576: lx=lg(x); ly=lg(y);
! 577: if (e)
! 578: {
! 579: long d = e >> TWOPOTBITS_IN_LONG, l = ly-d;
! 580: if (l > lx) { flag=0; lz=lx+d+1; }
! 581: else if (l > 2) { flag=1; lz=ly; lx=l; }
! 582: else return rcopy(y);
! 583: m = e & (BITS_IN_LONG-1);
! 584: }
! 585: else
! 586: {
! 587: if (lx > ly) lx = ly;
! 588: flag=2; lz=lx; m=0;
! 589: }
! 590: z = cgetr(lz);
! 591: if (m)
! 592: { /* shift right x m bits */
! 593: const ulong sh = BITS_IN_LONG-m;
! 594: GEN p1 = x; x = new_chunk(lx+1);
! 595: shift_right2(x,p1,2,lx, 0,m,sh);
! 596: if (flag==0)
! 597: {
! 598: x[lx] = p1[lx-1] << sh;
! 599: if (x[lx]) { flag = 3; lx++; }
! 600: }
! 601: }
! 602:
! 603: if (sx==sy)
! 604: { /* addition */
! 605: i=lz-1; avma = (long)z;
! 606: if (flag==0) { z[i] = y[i]; i--; }
! 607: overflow=0;
! 608: for (j=lx-1; j>=2; i--,j--) z[i] = addllx(x[j],y[i]);
! 609:
! 610: if (overflow)
! 611: for (;;)
! 612: {
! 613: if (i==1)
! 614: {
! 615: shift_right(z,z, 2,lz, 1,1);
! 616: ey=evalexpo(ey+1);
! 617: z[1] = evalsigne(sx) | ey; return z;
! 618: }
! 619: z[i] = y[i]+1; if (z[i--]) break;
! 620: }
! 621: for (; i>=2; i--) z[i]=y[i];
! 622: z[1] = evalsigne(sx) | evalexpo(ey); return z;
! 623: }
! 624:
! 625: /* subtraction */
! 626: if (e) f2 = 1;
! 627: else
! 628: {
! 629: i=2; while (i<lx && x[i]==y[i]) i++;
! 630: if (i==lx)
! 631: {
! 632: avma = (long)(z+lz);
! 633: e = evalexpo(ey - bit_accuracy(lx));
! 634: z=cgetr(3); z[1]=e; z[2]=0; return z;
! 635: }
! 636: f2 = ((ulong)y[i] > (ulong)x[i]);
! 637: }
! 638:
! 639: /* result is non-zero. f2 = (y > x) */
! 640: i=lz-1;
! 641: if (f2)
! 642: {
! 643: if (flag==0) { z[i] = y[i]; i--; }
! 644: j=lx-1; z[i] = subll(y[i],x[j]); i--; j--;
! 645: for (; j>=2; i--,j--) z[i] = subllx(y[i],x[j]);
! 646: if (overflow)
! 647: for (;;) { z[i] = y[i]-1; if (y[i--]) break; }
! 648: for (; i>=2; i--) z[i]=y[i];
! 649: sx = sy;
! 650: }
! 651: else
! 652: {
! 653: if (flag==0) { z[i] = -y[i]; i--; overflow=1; } else overflow=0;
! 654: for (; i>=2; i--) z[i]=subllx(x[i],y[i]);
! 655: }
! 656:
! 657: x = z+2; i=0; while (!x[i]) i++;
! 658: lz -= i; z += i; m = bfffo(z[2]);
! 659: e = evalexpo(ey - (m | (i<<TWOPOTBITS_IN_LONG)));
! 660: if (m) shift_left(z,z,2,lz-1, 0,m);
! 661: z[1] = evalsigne(sx) | e;
! 662: z[0] = evaltyp(t_REAL) | evallg(lz);
! 663: avma = (long)z; return z;
! 664: }
! 665:
! 666: GEN
! 667: mulss(long x, long y)
! 668: {
! 669: long s,p1;
! 670: GEN z;
! 671: LOCAL_HIREMAINDER;
! 672:
! 673: if (!x || !y) return gzero;
! 674: if (x<0) { s = -1; x = -x; } else s=1;
! 675: if (y<0) { s = -s; y = -y; }
! 676: p1 = mulll(x,y);
! 677: if (hiremainder)
! 678: {
! 679: z=cgeti(4); z[1] = evalsigne(s) | evallgefint(4);
! 680: z[2]=hiremainder; z[3]=p1; return z;
! 681: }
! 682: z=cgeti(3); z[1] = evalsigne(s) | evallgefint(3);
! 683: z[2]=p1; return z;
! 684: }
! 685: #endif
! 686:
! 687: GEN
! 688: muluu(ulong x, ulong y)
! 689: {
! 690: long p1;
! 691: GEN z;
! 692: LOCAL_HIREMAINDER;
! 693:
! 694: if (!x || !y) return gzero;
! 695: p1 = mulll(x,y);
! 696: if (hiremainder)
! 697: {
! 698: z=cgeti(4); z[1] = evalsigne(1) | evallgefint(4);
! 699: z[2]=hiremainder; z[3]=p1; return z;
! 700: }
! 701: z=cgeti(3); z[1] = evalsigne(1) | evallgefint(3);
! 702: z[2]=p1; return z;
! 703: }
! 704:
! 705: /* assume ny > 0 */
! 706: #ifdef INLINE
! 707: INLINE
! 708: #endif
! 709: GEN
! 710: mulsispec(long x, GEN y, long ny)
! 711: {
! 712: GEN yd, z = (GEN)avma;
! 713: long lz = ny+3;
! 714: LOCAL_HIREMAINDER;
! 715:
! 716: (void)new_chunk(lz);
! 717: yd = y + ny; *--z = mulll(x, *--yd);
! 718: while (yd > y) *--z = addmul(x,*--yd);
! 719: if (hiremainder) *--z = hiremainder; else lz--;
! 720: *--z = evalsigne(1) | evallgefint(lz);
! 721: *--z = evaltyp(t_INT) | evallg(lz);
! 722: avma=(long)z; return z;
! 723: }
! 724:
! 725: GEN
! 726: mului(ulong x, GEN y)
! 727: {
! 728: long s = signe(y);
! 729: GEN z;
! 730:
! 731: if (!s || !x) return gzero;
! 732: z = mulsispec(x, y+2, lgefint(y)-2);
! 733: setsigne(z,s); return z;
! 734: }
! 735:
! 736: /* a + b*Y, assume Y >= 0, 0 <= a,b <= VERYBIGINT */
! 737: GEN
! 738: addsmulsi(long a, long b, GEN Y)
! 739: {
! 740: GEN yd,y,z;
! 741: long ny,lz;
! 742: LOCAL_HIREMAINDER;
! 743: LOCAL_OVERFLOW;
! 744:
! 745: if (!signe(Y)) return stoi(a);
! 746:
! 747: y = Y+2; z = (GEN)avma;
! 748: ny = lgefint(Y)-2;
! 749: lz = ny+3;
! 750:
! 751: (void)new_chunk(lz);
! 752: yd = y + ny; *--z = addll(a, mulll(b, *--yd));
! 753: if (overflow) hiremainder++; /* can't overflow */
! 754: while (yd > y) *--z = addmul(b,*--yd);
! 755: if (hiremainder) *--z = hiremainder; else lz--;
! 756: *--z = evalsigne(1) | evallgefint(lz);
! 757: *--z = evaltyp(t_INT) | evallg(lz);
! 758: avma=(long)z; return z;
! 759: }
! 760:
! 761: #ifndef __M68K__
! 762:
! 763: GEN
! 764: mulsi(long x, GEN y)
! 765: {
! 766: long s = signe(y);
! 767: GEN z;
! 768:
! 769: if (!s || !x) return gzero;
! 770: if (x<0) { s = -s; x = -x; }
! 771: z = mulsispec(x, y+2, lgefint(y)-2);
! 772: setsigne(z,s); return z;
! 773: }
! 774:
! 775: GEN
! 776: mulsr(long x, GEN y)
! 777: {
! 778: long lx,i,s,garde,e,sh,m;
! 779: GEN z;
! 780: LOCAL_HIREMAINDER;
! 781:
! 782: if (!x) return gzero;
! 783: s = signe(y);
! 784: if (!s)
! 785: {
! 786: if (x<0) x = -x;
! 787: e = y[1] + (BITS_IN_LONG-1)-bfffo(x);
! 788: if (e & ~EXPOBITS) err(muler2);
! 789: z=cgetr(3); z[1]=e; z[2]=0; return z;
! 790: }
! 791: if (x<0) { s = -s; x = -x; }
! 792: if (x==1) { z=rcopy(y); setsigne(z,s); return z; }
! 793:
! 794: lx = lg(y); e = expo(y);
! 795: z=cgetr(lx); y--; garde=mulll(x,y[lx]);
! 796: for (i=lx-1; i>=3; i--) z[i]=addmul(x,y[i]);
! 797: z[2]=hiremainder;
! 798:
! 799: sh = bfffo(hiremainder); m = BITS_IN_LONG-sh;
! 800: if (sh) shift_left2(z,z, 2,lx-1, garde,sh,m);
! 801: e = evalexpo(m+e);
! 802: z[1] = evalsigne(s) | e; return z;
! 803: }
! 804:
! 805: GEN
! 806: mulrr(GEN x, GEN y)
! 807: {
! 808: long sx = signe(x), sy = signe(y);
! 809: long i,j,ly,lz,lzz,e,flag,p1;
! 810: ulong garde;
! 811: GEN z, y1;
! 812: LOCAL_HIREMAINDER;
! 813: LOCAL_OVERFLOW;
! 814:
! 815: e = evalexpo(expo(x)+expo(y));
! 816: if (!sx || !sy) { z=cgetr(3); z[1]=e; z[2]=0; return z; }
! 817: if (sy<0) sx = -sx;
! 818: lz=lg(x); ly=lg(y);
! 819: if (lz>ly) { lz=ly; z=x; x=y; y=z; flag=1; } else flag = (lz!=ly);
! 820: z=cgetr(lz); z[1] = evalsigne(sx) | e;
! 821: if (lz==3)
! 822: {
! 823: if (flag)
! 824: {
! 825: (void)mulll(x[2],y[3]);
! 826: garde = addmul(x[2],y[2]);
! 827: }
! 828: else
! 829: garde = mulll(x[2],y[2]);
! 830: if ((long)hiremainder<0) { z[2]=hiremainder; z[1]++; }
! 831: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
! 832: return z;
! 833: }
! 834:
! 835: if (flag) { (void)mulll(x[2],y[lz]); garde = hiremainder; } else garde=0;
! 836: lzz=lz-1; p1=x[lzz];
! 837: if (p1)
! 838: {
! 839: (void)mulll(p1,y[3]);
! 840: garde = addll(addmul(p1,y[2]), garde);
! 841: z[lzz] = overflow+hiremainder;
! 842: }
! 843: else z[lzz]=0;
! 844: for (j=lz-2, y1=y-j; j>=3; j--)
! 845: {
! 846: p1 = x[j]; y1++;
! 847: if (p1)
! 848: {
! 849: (void)mulll(p1,y1[lz+1]);
! 850: garde = addll(addmul(p1,y1[lz]), garde);
! 851: for (i=lzz; i>j; i--)
! 852: {
! 853: hiremainder += overflow;
! 854: z[i] = addll(addmul(p1,y1[i]), z[i]);
! 855: }
! 856: z[j] = hiremainder+overflow;
! 857: }
! 858: else z[j]=0;
! 859: }
! 860: p1=x[2]; y1++;
! 861: garde = addll(mulll(p1,y1[lz]), garde);
! 862: for (i=lzz; i>2; i--)
! 863: {
! 864: hiremainder += overflow;
! 865: z[i] = addll(addmul(p1,y1[i]), z[i]);
! 866: }
! 867: z[2] = hiremainder+overflow;
! 868: if (z[2] < 0) z[1]++;
! 869: else
! 870: shift_left(z,z,2,lzz,garde, 1);
! 871: return z;
! 872: }
! 873:
! 874: GEN
! 875: mulir(GEN x, GEN y)
! 876: {
! 877: long sx=signe(x),sy,lz,lzz,ey,e,p1,i,j;
! 878: ulong garde;
! 879: GEN z,y1;
! 880: LOCAL_HIREMAINDER;
! 881: LOCAL_OVERFLOW;
! 882:
! 883: if (!sx) return gzero;
! 884: if (!is_bigint(x)) return mulsr(itos(x),y);
! 885: sy=signe(y); ey=expo(y);
! 886: if (!sy)
! 887: {
! 888: e = evalexpo(expi(x)+ey);
! 889: z=cgetr(3); z[1]=e; z[2]=0; return z;
! 890: }
! 891:
! 892: if (sy<0) sx = -sx;
! 893: lz=lg(y); z=cgetr(lz);
! 894: y1=cgetr(lz+1);
! 895: affir(x,y1); x=y; y=y1;
! 896: e = evalexpo(expo(y)+ey);
! 897: z[1] = evalsigne(sx) | e;
! 898: if (lz==3)
! 899: {
! 900: (void)mulll(x[2],y[3]);
! 901: garde=addmul(x[2],y[2]);
! 902: if ((long)hiremainder < 0) { z[2]=hiremainder; z[1]++; }
! 903: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
! 904: avma=(long)z; return z;
! 905: }
! 906:
! 907: (void)mulll(x[2],y[lz]); garde=hiremainder;
! 908: lzz=lz-1; p1=x[lzz];
! 909: if (p1)
! 910: {
! 911: (void)mulll(p1,y[3]);
! 912: garde=addll(addmul(p1,y[2]),garde);
! 913: z[lzz] = overflow+hiremainder;
! 914: }
! 915: else z[lzz]=0;
! 916: for (j=lz-2, y1=y-j; j>=3; j--)
! 917: {
! 918: p1=x[j]; y1++;
! 919: if (p1)
! 920: {
! 921: (void)mulll(p1,y1[lz+1]);
! 922: garde = addll(addmul(p1,y1[lz]), garde);
! 923: for (i=lzz; i>j; i--)
! 924: {
! 925: hiremainder += overflow;
! 926: z[i] = addll(addmul(p1,y1[i]), z[i]);
! 927: }
! 928: z[j] = hiremainder+overflow;
! 929: }
! 930: else z[j]=0;
! 931: }
! 932: p1=x[2]; y1++;
! 933: garde = addll(mulll(p1,y1[lz]), garde);
! 934: for (i=lzz; i>2; i--)
! 935: {
! 936: hiremainder += overflow;
! 937: z[i] = addll(addmul(p1,y1[i]), z[i]);
! 938: }
! 939: z[2] = hiremainder+overflow;
! 940: if (z[2] < 0) z[1]++;
! 941: else
! 942: shift_left(z,z,2,lzz,garde, 1);
! 943: avma=(long)z; return z;
! 944: }
! 945:
! 946: /* written by Bruno Haible following an idea of Robert Harley */
! 947: long
! 948: vals(ulong z)
! 949: {
! 950: static char tab[64]={-1,0,1,12,2,6,-1,13,3,-1,7,-1,-1,-1,-1,14,10,4,-1,-1,8,-1,-1,25,-1,-1,-1,-1,-1,21,27,15,31,11,5,-1,-1,-1,-1,-1,9,-1,-1,24,-1,-1,20,26,30,-1,-1,-1,-1,23,-1,19,29,-1,22,18,28,17,16,-1};
! 951: #ifdef LONG_IS_64BIT
! 952: long s;
! 953: #endif
! 954:
! 955: if (!z) return -1;
! 956: #ifdef LONG_IS_64BIT
! 957: if (! (z&0xffffffff)) { s = 32; z >>=32; } else s = 0;
! 958: #endif
! 959: z |= ~z + 1;
! 960: z += z << 4;
! 961: z += z << 6;
! 962: z ^= z << 16; /* or z -= z<<16 */
! 963: #ifdef LONG_IS_64BIT
! 964: return s + tab[(z&0xffffffff)>>26];
! 965: #else
! 966: return tab[z>>26];
! 967: #endif
! 968: }
! 969:
! 970: GEN
! 971: modss(long x, long y)
! 972: {
! 973: LOCAL_HIREMAINDER;
! 974:
! 975: if (!y) err(moder1);
! 976: if (y<0) y=-y;
! 977: hiremainder=0; divll(labs(x),y);
! 978: if (!hiremainder) return gzero;
! 979: return (x < 0) ? stoi(y-hiremainder) : stoi(hiremainder);
! 980: }
! 981:
! 982: GEN
! 983: resss(long x, long y)
! 984: {
! 985: LOCAL_HIREMAINDER;
! 986:
! 987: if (!y) err(reser1);
! 988: hiremainder=0; divll(labs(x),labs(y));
! 989: return (x < 0) ? stoi(-((long)hiremainder)) : stoi(hiremainder);
! 990: }
! 991:
! 992: GEN
! 993: divsi(long x, GEN y)
! 994: {
! 995: long p1, s = signe(y);
! 996: LOCAL_HIREMAINDER;
! 997:
! 998: if (!s) err(diver2);
! 999: if (!x || lgefint(y)>3 || ((long)y[2])<0)
! 1000: {
! 1001: hiremainder=x; SAVE_HIREMAINDER; return gzero;
! 1002: }
! 1003: hiremainder=0; p1=divll(labs(x),y[2]);
! 1004: if (x<0) { hiremainder = -((long)hiremainder); p1 = -p1; }
! 1005: if (s<0) p1 = -p1;
! 1006: SAVE_HIREMAINDER; return stoi(p1);
! 1007: }
! 1008: #endif
! 1009:
! 1010: GEN
! 1011: modui(ulong x, GEN y)
! 1012: {
! 1013: LOCAL_HIREMAINDER;
! 1014:
! 1015: if (!signe(y)) err(diver2);
! 1016: if (!x || lgefint(y)>3) hiremainder=x;
! 1017: else
! 1018: {
! 1019: hiremainder=0; (void)divll(x,y[2]);
! 1020: }
! 1021: return utoi(hiremainder);
! 1022: }
! 1023:
! 1024: ulong
! 1025: umodiu(GEN y, ulong x)
! 1026: {
! 1027: long sy=signe(y),ly,i;
! 1028: LOCAL_HIREMAINDER;
! 1029:
! 1030: if (!x) err(diver4);
! 1031: if (!sy) return 0;
! 1032: ly = lgefint(y);
! 1033: if (x <= (ulong)y[2]) hiremainder=0;
! 1034: else
! 1035: {
! 1036: if (ly==3) return (sy > 0)? (ulong)y[2]: x - (ulong)y[2];
! 1037: hiremainder=y[2]; ly--; y++;
! 1038: }
! 1039: for (i=2; i<ly; i++) (void)divll(y[i],x);
! 1040: if (!hiremainder) return 0;
! 1041: return (sy > 0)? hiremainder: x - hiremainder;
! 1042: }
! 1043:
! 1044: GEN
! 1045: modiu(GEN y, ulong x) { return utoi(umodiu(y,x)); }
! 1046:
! 1047: #ifndef __M68K__
! 1048:
! 1049: GEN
! 1050: modsi(long x, GEN y)
! 1051: {
! 1052: long s = signe(y);
! 1053: GEN p1;
! 1054: LOCAL_HIREMAINDER;
! 1055:
! 1056: if (!s) err(diver2);
! 1057: if (!x || lgefint(y)>3 || ((long)y[2])<0) hiremainder=x;
! 1058: else
! 1059: {
! 1060: hiremainder=0; (void)divll(labs(x),y[2]);
! 1061: if (x<0) hiremainder = -((long)hiremainder);
! 1062: }
! 1063: if (!hiremainder) return gzero;
! 1064: if (x>0) return stoi(hiremainder);
! 1065: if (s<0)
! 1066: { setsigne(y,1); p1=addsi(hiremainder,y); setsigne(y,-1); }
! 1067: else
! 1068: p1=addsi(hiremainder,y);
! 1069: return p1;
! 1070: }
! 1071:
! 1072: GEN
! 1073: divis(GEN y, long x)
! 1074: {
! 1075: long sy=signe(y),ly,s,i;
! 1076: GEN z;
! 1077: LOCAL_HIREMAINDER;
! 1078:
! 1079: if (!x) err(diver4);
! 1080: if (!sy) { hiremainder=0; SAVE_HIREMAINDER; return gzero; }
! 1081: if (x<0) { s = -sy; x = -x; } else s=sy;
! 1082:
! 1083: ly = lgefint(y);
! 1084: if ((ulong)x <= (ulong)y[2]) hiremainder=0;
! 1085: else
! 1086: {
! 1087: if (ly==3) { hiremainder=itos(y); SAVE_HIREMAINDER; return gzero; }
! 1088: hiremainder=y[2]; ly--; y++;
! 1089: }
! 1090: z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
! 1091: for (i=2; i<ly; i++) z[i]=divll(y[i],x);
! 1092: if (sy<0) hiremainder = - ((long)hiremainder);
! 1093: SAVE_HIREMAINDER; return z;
! 1094: }
! 1095:
! 1096: GEN
! 1097: divir(GEN x, GEN y)
! 1098: {
! 1099: GEN xr,z;
! 1100: long av,ly;
! 1101:
! 1102: if (!signe(y)) err(diver5);
! 1103: if (!signe(x)) return gzero;
! 1104: ly=lg(y); z=cgetr(ly); av=avma; xr=cgetr(ly+1); affir(x,xr);
! 1105: affrr(divrr(xr,y),z); avma=av; return z;
! 1106: }
! 1107:
! 1108: GEN
! 1109: divri(GEN x, GEN y)
! 1110: {
! 1111: GEN yr,z;
! 1112: long av,lx,s=signe(y);
! 1113:
! 1114: if (!s) err(diver8);
! 1115: if (!signe(x))
! 1116: {
! 1117: const long ex = evalexpo(expo(x) - expi(y));
! 1118:
! 1119: if (ex<0) err(diver12);
! 1120: z=cgetr(3); z[1]=ex; z[2]=0; return z;
! 1121: }
! 1122: if (!is_bigint(y)) return divrs(x, s>0? y[2]: -y[2]);
! 1123:
! 1124: lx=lg(x); z=cgetr(lx);
! 1125: av=avma; yr=cgetr(lx+1); affir(y,yr);
! 1126: affrr(divrr(x,yr),z); avma=av; return z;
! 1127: }
! 1128:
! 1129: void
! 1130: diviiz(GEN x, GEN y, GEN z)
! 1131: {
! 1132: long av=avma,lz;
! 1133: GEN xr,yr;
! 1134:
! 1135: if (typ(z)==t_INT) { affii(divii(x,y),z); avma=av; return; }
! 1136: lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr);
! 1137: affrr(divrr(xr,yr),z); avma=av;
! 1138: }
! 1139:
! 1140: void
! 1141: mpdivz(GEN x, GEN y, GEN z)
! 1142: {
! 1143: long av=avma;
! 1144:
! 1145: if (typ(z)==t_INT)
! 1146: {
! 1147: if (typ(x)==t_REAL || typ(y)==t_REAL) err(divzer1);
! 1148: affii(divii(x,y),z); avma=av; return;
! 1149: }
! 1150: if (typ(x)==t_INT)
! 1151: {
! 1152: GEN xr,yr;
! 1153: long lz;
! 1154:
! 1155: if (typ(y)==t_REAL) { affrr(divir(x,y),z); avma=av; return; }
! 1156: lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr);
! 1157: affrr(divrr(xr,yr),z); avma=av; return;
! 1158: }
! 1159: if (typ(y)==t_REAL) { affrr(divrr(x,y),z); avma=av; return; }
! 1160: affrr(divri(x,y),z); avma=av;
! 1161: }
! 1162:
! 1163: GEN
! 1164: divsr(long x, GEN y)
! 1165: {
! 1166: long av,ly;
! 1167: GEN p1,z;
! 1168:
! 1169: if (!signe(y)) err(diver3);
! 1170: if (!x) return gzero;
! 1171: ly=lg(y); z=cgetr(ly); av=avma;
! 1172: p1=cgetr(ly+1); affsr(x,p1); affrr(divrr(p1,y),z);
! 1173: avma=av; return z;
! 1174: }
! 1175:
! 1176: GEN
! 1177: modii(GEN x, GEN y)
! 1178: {
! 1179: switch(signe(x))
! 1180: {
! 1181: case 0: return gzero;
! 1182: case 1: return resii(x,y);
! 1183: default:
! 1184: {
! 1185: long av = avma;
! 1186: (void)new_chunk(lgefint(y));
! 1187: x = resii(x,y); avma=av;
! 1188: if (x==gzero) return x;
! 1189: return subiispec(y+2,x+2,lgefint(y)-2,lgefint(x)-2);
! 1190: }
! 1191: }
! 1192: }
! 1193:
! 1194: void
! 1195: modiiz(GEN x, GEN y, GEN z)
! 1196: {
! 1197: const long av = avma;
! 1198: affii(modii(x,y),z); avma=av;
! 1199: }
! 1200:
! 1201: GEN
! 1202: divrs(GEN x, long y)
! 1203: {
! 1204: long i,lx,ex,garde,sh,s=signe(x);
! 1205: GEN z;
! 1206: LOCAL_HIREMAINDER;
! 1207:
! 1208: if (!y) err(diver6);
! 1209: if (!s)
! 1210: {
! 1211: z=cgetr(3); z[1] = x[1] - (BITS_IN_LONG-1) + bfffo(y);
! 1212: if (z[1]<0) err(diver7);
! 1213: z[2]=0; return z;
! 1214: }
! 1215: if (y<0) { s = -s; y = -y; }
! 1216: if (y==1) { z=rcopy(x); setsigne(z,s); return z; }
! 1217:
! 1218: z=cgetr(lx=lg(x)); hiremainder=0;
! 1219: for (i=2; i<lx; i++) z[i]=divll(x[i],y);
! 1220:
! 1221: /* we may have hiremainder != 0 ==> garde */
! 1222: garde=divll(0,y); sh=bfffo(z[2]); ex=evalexpo(expo(x)-sh);
! 1223: if (sh) shift_left(z,z, 2,lx-1, garde,sh);
! 1224: z[1] = evalsigne(s) | ex;
! 1225: return z;
! 1226: }
! 1227:
! 1228: GEN
! 1229: divrr(GEN x, GEN y)
! 1230: {
! 1231: long sx=signe(x), sy=signe(y), lx,ly,lz,e,i,j;
! 1232: ulong si,saux;
! 1233: GEN z,x1;
! 1234:
! 1235: if (!sy) err(diver9);
! 1236: e = evalexpo(expo(x) - expo(y));
! 1237: if (!sx) { z=cgetr(3); z[1]=e; z[2]=0; return z; }
! 1238: if (sy<0) sx = -sx;
! 1239: e = evalsigne(sx) | e;
! 1240: lx=lg(x); ly=lg(y);
! 1241: if (ly==3)
! 1242: {
! 1243: ulong k = x[2], l = (lx>3)? x[3]: 0;
! 1244: LOCAL_HIREMAINDER;
! 1245: if (k < (ulong)y[2]) e--;
! 1246: else
! 1247: {
! 1248: l >>= 1; if (k&1) l |= HIGHBIT;
! 1249: k >>= 1;
! 1250: }
! 1251: z=cgetr(3); z[1]=e;
! 1252: hiremainder=k; z[2]=divll(l,y[2]); return z;
! 1253: }
! 1254:
! 1255: lz = (lx<=ly)? lx: ly;
! 1256: x1 = (z=new_chunk(lz))-1;
! 1257: x1[1]=0; for (i=2; i<lz; i++) x1[i]=x[i];
! 1258: x1[lz] = (lx>lz)? x[lz]: 0;
! 1259: x=z; si=y[2]; saux=y[3];
! 1260: for (i=0; i<lz-1; i++)
! 1261: { /* x1 = x + (i-1) */
! 1262: ulong k,qp;
! 1263: LOCAL_HIREMAINDER;
! 1264: LOCAL_OVERFLOW;
! 1265: if ((ulong)x1[1] == si)
! 1266: {
! 1267: qp = MAXULONG; k=addll(si,x1[2]);
! 1268: }
! 1269: else
! 1270: {
! 1271: if ((ulong)x1[1] > si) /* can't happen if i=0 */
! 1272: {
! 1273: GEN y1 = y+1;
! 1274: overflow=0;
! 1275: for (j=lz-i; j>0; j--) x1[j] = subllx(x1[j],y1[j]);
! 1276: j=i; do x[--j]++; while (j && !x[j]);
! 1277: }
! 1278: hiremainder=x1[1]; overflow=0;
! 1279: qp=divll(x1[2],si); k=hiremainder;
! 1280: }
! 1281: if (!overflow)
! 1282: {
! 1283: long k3 = subll(mulll(qp,saux), x1[3]);
! 1284: long k4 = subllx(hiremainder,k);
! 1285: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
! 1286: }
! 1287: j = lz-i+1;
! 1288: if (j<ly) (void)mulll(qp,y[j]); else { hiremainder=0 ; j=ly; }
! 1289: for (j--; j>1; j--)
! 1290: {
! 1291: x1[j] = subll(x1[j], addmul(qp,y[j]));
! 1292: hiremainder += overflow;
! 1293: }
! 1294: if ((ulong)x1[1] != hiremainder)
! 1295: {
! 1296: if ((ulong)x1[1] < hiremainder)
! 1297: {
! 1298: qp--;
! 1299: overflow=0; for (j=lz-i; j>1; j--) x1[j]=addllx(x1[j], y[j]);
! 1300: }
! 1301: else
! 1302: {
! 1303: x1[1] -= hiremainder;
! 1304: while (x1[1])
! 1305: {
! 1306: qp++; if (!qp) { j=i; do x[--j]++; while (j && !x[j]); }
! 1307: overflow=0; for (j=lz-i; j>1; j--) x1[j]=subllx(x1[j],y[j]);
! 1308: x1[1] -= overflow;
! 1309: }
! 1310: }
! 1311: }
! 1312: x1[1]=qp; x1++;
! 1313: }
! 1314: x1 = x-1; for (j=lz-1; j>=2; j--) x[j]=x1[j];
! 1315: if (*x) { shift_right(x,x, 2,lz, 1,1); } else e--;
! 1316: x[0]=evaltyp(t_REAL)|evallg(lz);
! 1317: x[1]=e; return x;
! 1318: }
! 1319: #endif /* !defined(__M68K__) */
! 1320: /* The following ones are not in mp.s (mulii is, with a different algorithm) */
! 1321:
! 1322: /* Integer division x / y:
! 1323: * if z = ONLY_REM return remainder, otherwise return quotient
! 1324: * if z != NULL set *z to remainder
! 1325: * *z is the last object on stack (and thus can be disposed of with cgiv
! 1326: * instead of gerepile)
! 1327: * If *z is zero, we put gzero here and no copy.
! 1328: * space needed: lx + ly
! 1329: */
! 1330: GEN
! 1331: dvmdii(GEN x, GEN y, GEN *z)
! 1332: {
! 1333: long sx=signe(x),sy=signe(y);
! 1334: long av,lx,ly,lz,i,j,sh,k,lq,lr;
! 1335: ulong si,qp,saux, *xd,*rd,*qd;
! 1336: GEN r,q,x1;
! 1337:
! 1338: if (!sy) err(dvmer1);
! 1339: if (!sx)
! 1340: {
! 1341: if (!z || z == ONLY_REM) return gzero;
! 1342: *z=gzero; return gzero;
! 1343: }
! 1344: lx=lgefint(x);
! 1345: ly=lgefint(y); lz=lx-ly;
! 1346: if (lz <= 0)
! 1347: {
! 1348: if (lz == 0)
! 1349: {
! 1350: for (i=2; i<lx; i++)
! 1351: if (x[i] != y[i])
! 1352: {
! 1353: if ((ulong)x[i] > (ulong)y[i]) goto DIVIDE;
! 1354: goto TRIVIAL;
! 1355: }
! 1356: if (z == ONLY_REM) return gzero;
! 1357: if (z) *z = gzero;
! 1358: if (sx < 0) sy = -sy;
! 1359: return stoi(sy);
! 1360: }
! 1361: TRIVIAL:
! 1362: if (z == ONLY_REM) return icopy(x);
! 1363: if (z) *z = icopy(x);
! 1364: return gzero;
! 1365: }
! 1366: DIVIDE: /* quotient is non-zero */
! 1367: av=avma; if (sx<0) sy = -sy;
! 1368: if (ly==3)
! 1369: {
! 1370: LOCAL_HIREMAINDER;
! 1371: si = y[2];
! 1372: if (si <= (ulong)x[2]) hiremainder=0;
! 1373: else
! 1374: {
! 1375: hiremainder = x[2]; lx--; x++;
! 1376: }
! 1377: q = new_chunk(lx); for (i=2; i<lx; i++) q[i]=divll(x[i],si);
! 1378: if (z == ONLY_REM)
! 1379: {
! 1380: avma=av; if (!hiremainder) return gzero;
! 1381: r=cgeti(3);
! 1382: r[1] = evalsigne(sx) | evallgefint(3);
! 1383: r[2]=hiremainder; return r;
! 1384: }
! 1385: q[1] = evalsigne(sy) | evallgefint(lx);
! 1386: q[0] = evaltyp(t_INT) | evallg(lx);
! 1387: if (!z) return q;
! 1388: if (!hiremainder) { *z=gzero; return q; }
! 1389: r=cgeti(3);
! 1390: r[1] = evalsigne(sx) | evallgefint(3);
! 1391: r[2] = hiremainder; *z=r; return q;
! 1392: }
! 1393:
! 1394: x1 = new_chunk(lx); sh = bfffo(y[2]);
! 1395: if (sh)
! 1396: { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
! 1397: register const ulong m = BITS_IN_LONG - sh;
! 1398: r = new_chunk(ly);
! 1399: shift_left2(r, y,2,ly-1, 0,sh,m); y = r;
! 1400: shift_left2(x1,x,2,lx-1, 0,sh,m);
! 1401: x1[1] = ((ulong)x[2]) >> m;
! 1402: }
! 1403: else
! 1404: {
! 1405: x1[1]=0; for (j=2; j<lx; j++) x1[j]=x[j];
! 1406: }
! 1407: x=x1; si=y[2]; saux=y[3];
! 1408: for (i=0; i<=lz; i++)
! 1409: { /* x1 = x + i */
! 1410: LOCAL_HIREMAINDER;
! 1411: LOCAL_OVERFLOW;
! 1412: if ((ulong)x1[1] == si)
! 1413: {
! 1414: qp = MAXULONG; k=addll(si,x1[2]);
! 1415: }
! 1416: else
! 1417: {
! 1418: hiremainder=x1[1]; overflow=0;
! 1419: qp=divll(x1[2],si); k=hiremainder;
! 1420: }
! 1421: if (!overflow)
! 1422: {
! 1423: long k3 = subll(mulll(qp,saux), x1[3]);
! 1424: long k4 = subllx(hiremainder,k);
! 1425: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
! 1426: }
! 1427: hiremainder=0;
! 1428: for (j=ly-1; j>1; j--)
! 1429: {
! 1430: x1[j] = subll(x1[j], addmul(qp,y[j]));
! 1431: hiremainder+=overflow;
! 1432: }
! 1433: if ((ulong)x1[1] < hiremainder)
! 1434: {
! 1435: overflow=0; qp--;
! 1436: for (j=ly-1; j>1; j--) x1[j] = addllx(x1[j],y[j]);
! 1437: }
! 1438: x1[1]=qp; x1++;
! 1439: }
! 1440:
! 1441: lq = lz+2;
! 1442: if (!z)
! 1443: {
! 1444: qd = (ulong*)av;
! 1445: xd = (ulong*)(x + lq);
! 1446: if (x[1]) { lz++; lq++; }
! 1447: while (lz--) *--qd = *--xd;
! 1448: *--qd = evalsigne(sy) | evallgefint(lq);
! 1449: *--qd = evaltyp(t_INT) | evallg(lq);
! 1450: avma = (long)qd; return (GEN)qd;
! 1451: }
! 1452:
! 1453: j=lq; while (j<lx && !x[j]) j++;
! 1454: lz = lx-j;
! 1455: if (z == ONLY_REM)
! 1456: {
! 1457: if (lz==0) { avma = av; return gzero; }
! 1458: rd = (ulong*)av; lr = lz+2;
! 1459: xd = (ulong*)(x + lx);
! 1460: if (!sh) while (lz--) *--rd = *--xd;
! 1461: else
! 1462: { /* shift remainder right by sh bits */
! 1463: const ulong shl = BITS_IN_LONG - sh;
! 1464: ulong l;
! 1465: xd--;
! 1466: while (--lz) /* fill r[3..] */
! 1467: {
! 1468: l = *xd >> sh;
! 1469: *--rd = l | (*--xd << shl);
! 1470: }
! 1471: l = *xd >> sh;
! 1472: if (l) *--rd = l; else lr--;
! 1473: }
! 1474: *--rd = evalsigne(sx) | evallgefint(lr);
! 1475: *--rd = evaltyp(t_INT) | evallg(lr);
! 1476: avma = (long)rd; return (GEN)rd;
! 1477: }
! 1478:
! 1479: lr = lz+2;
! 1480: rd = NULL; /* gcc -Wall */
! 1481: if (lz)
! 1482: { /* non zero remainder: initialize rd */
! 1483: xd = (ulong*)(x + lx);
! 1484: if (!sh)
! 1485: {
! 1486: rd = (ulong*)avma; (void)new_chunk(lr);
! 1487: while (lz--) *--rd = *--xd;
! 1488: }
! 1489: else
! 1490: { /* shift remainder right by sh bits */
! 1491: const ulong shl = BITS_IN_LONG - sh;
! 1492: ulong l;
! 1493: rd = (ulong*)x; /* overwrite shifted y */
! 1494: xd--;
! 1495: while (--lz)
! 1496: {
! 1497: l = *xd >> sh;
! 1498: *--rd = l | (*--xd << shl);
! 1499: }
! 1500: l = *xd >> sh;
! 1501: if (l) *--rd = l; else lr--;
! 1502: }
! 1503: *--rd = evalsigne(sx) | evallgefint(lr);
! 1504: *--rd = evaltyp(t_INT) | evallg(lr);
! 1505: rd += lr;
! 1506: }
! 1507: qd = (ulong*)av;
! 1508: xd = (ulong*)(x + lq);
! 1509: if (x[1]) lq++;
! 1510: j = lq-2; while (j--) *--qd = *--xd;
! 1511: *--qd = evalsigne(sy) | evallgefint(lq);
! 1512: *--qd = evaltyp(t_INT) | evallg(lq);
! 1513: q = (GEN)qd;
! 1514: if (lr==2) *z = gzero;
! 1515: else
! 1516: { /* rd has been properly initialized: we had lz > 0 */
! 1517: while (lr--) *--qd = *--rd;
! 1518: *z = (GEN)qd;
! 1519: }
! 1520: avma = (long)qd; return q;
! 1521: }
! 1522:
! 1523: /* assume y > x > 0. return y mod x */
! 1524: ulong
! 1525: mppgcd_resiu(GEN y, ulong x)
! 1526: {
! 1527: long i, ly = lgefint(y);
! 1528: LOCAL_HIREMAINDER;
! 1529:
! 1530: hiremainder = 0;
! 1531: for (i=2; i<ly; i++) (void)divll(y[i],x);
! 1532: return hiremainder;
! 1533: }
! 1534:
! 1535: /* Assume x>y>0, both of them odd. return x-y if x=y mod 4, x+y otherwise */
! 1536: void
! 1537: mppgcd_plus_minus(GEN x, GEN y, GEN res)
! 1538: {
! 1539: long av = avma;
! 1540: long lx = lgefint(x)-1;
! 1541: long ly = lgefint(y)-1, lt,m,i;
! 1542: GEN t;
! 1543:
! 1544: if ((x[lx]^y[ly]) & 3) /* x != y mod 4*/
! 1545: t = addiispec(x+2,y+2,lx-1,ly-1);
! 1546: else
! 1547: t = subiispec(x+2,y+2,lx-1,ly-1);
! 1548:
! 1549: lt = lgefint(t)-1; while (!t[lt]) lt--;
! 1550: m = vals(t[lt]); lt++;
! 1551: if (m == 0) /* 2^32 | t */
! 1552: {
! 1553: for (i = 2; i < lt; i++) res[i] = t[i];
! 1554: }
! 1555: else if (t[2] >> m)
! 1556: {
! 1557: shift_right(res,t, 2,lt, 0,m);
! 1558: }
! 1559: else
! 1560: {
! 1561: lt--; t++;
! 1562: shift_right(res,t, 2,lt, t[1],m);
! 1563: }
! 1564: res[1] = evalsigne(1)|evallgefint(lt);
! 1565: avma = av;
! 1566: }
! 1567:
! 1568: /* private version of mulss */
! 1569: ulong
! 1570: smulss(ulong x, ulong y, ulong *rem)
! 1571: {
! 1572: LOCAL_HIREMAINDER;
! 1573: x=mulll(x,y); *rem=hiremainder; return x;
! 1574: }
! 1575:
! 1576: #ifdef LONG_IS_64BIT
! 1577: # define DIVCONVI 7
! 1578: #else
! 1579: # define DIVCONVI 14
! 1580: #endif
! 1581:
! 1582: /* convert integer --> base 10^9 [assume x > 0] */
! 1583: GEN
! 1584: convi(GEN x)
! 1585: {
! 1586: ulong av=avma, lim;
! 1587: long lz;
! 1588: GEN z,p1;
! 1589:
! 1590: lz = 3 + ((lgefint(x)-2)*15)/DIVCONVI;
! 1591: z=new_chunk(lz); z[1] = -1; p1 = z+2;
! 1592: lim = stack_lim(av,1);
! 1593: for(;;)
! 1594: {
! 1595: x = divis(x,1000000000); *p1++ = hiremainder;
! 1596: if (!signe(x)) { avma=av; return p1; }
! 1597: if (low_stack(lim, stack_lim(av,1))) x = gerepileuptoint((long)z,x);
! 1598: }
! 1599: }
! 1600: #undef DIVCONVI
! 1601:
! 1602: /* convert fractional part --> base 10^9 [assume expo(x) < 0] */
! 1603: GEN
! 1604: confrac(GEN x)
! 1605: {
! 1606: long lx=lg(x), ex = -expo(x)-1, d,m,ly,ey,lr,i,j;
! 1607: GEN y,y1;
! 1608:
! 1609: ey = ex + bit_accuracy(lx);
! 1610: ly = 1 + ((ey-1) >> TWOPOTBITS_IN_LONG);
! 1611: d = ex >> TWOPOTBITS_IN_LONG;
! 1612: m = ex & (BITS_IN_LONG-1);
! 1613: y = new_chunk(ly); y1 = y + (d-2);
! 1614: while (d--) y[d]=0;
! 1615: if (!m)
! 1616: for (i=2; i<lx; i++) y1[i] = x[i];
! 1617: else
! 1618: { /* shift x left sh bits */
! 1619: const ulong sh=BITS_IN_LONG-m;
! 1620: ulong k=0, l;
! 1621: for (i=2; i<lx; i++)
! 1622: {
! 1623: l = x[i];
! 1624: y1[i] = (l>>m)|k;
! 1625: k = l<<sh;
! 1626: }
! 1627: y1[i] = k;
! 1628: }
! 1629: lr = 1 + ((long) (ey*L2SL10))/9;
! 1630: y1 = new_chunk(lr);
! 1631: for (j=0; j<lr; j++)
! 1632: {
! 1633: LOCAL_HIREMAINDER;
! 1634: hiremainder=0;
! 1635: for (i=ly-1; i>=0; i--) y[i]=addmul(y[i],1000000000);
! 1636: y1[j]=hiremainder;
! 1637: }
! 1638: return y1;
! 1639: }
! 1640:
! 1641: GEN
! 1642: truedvmdii(GEN x, GEN y, GEN *z)
! 1643: {
! 1644: long av = avma;
! 1645: GEN r, q = dvmdii(x,y,&r); /* assume that r is last on stack */
! 1646: GEN *gptr[2];
! 1647:
! 1648: if (signe(r)>=0)
! 1649: {
! 1650: if (z == ONLY_REM) return gerepileuptoint(av,r);
! 1651: if (z) *z = r; else cgiv(r);
! 1652: return q;
! 1653: }
! 1654:
! 1655: if (z == ONLY_REM)
! 1656: {
! 1657: r = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2);
! 1658: return gerepileuptoint(av, r);
! 1659: }
! 1660: q = addsi(-signe(y),q);
! 1661: if (!z) return gerepileuptoint(av, q);
! 1662:
! 1663: *z = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2);
! 1664: gptr[0]=&q; gptr[1]=z; gerepilemanysp(av,(long)r,gptr,2);
! 1665: return q;
! 1666: }
! 1667:
! 1668: /* EXACT INTEGER DIVISION */
! 1669:
! 1670: /* Find c such that 1=c*b mod 2^BITS_IN_LONG, assuming b odd (unchecked) */
! 1671: static ulong
! 1672: invrev(ulong b)
! 1673: {
! 1674: ulong x;
! 1675: switch(b & 7)
! 1676: {
! 1677: case 3:
! 1678: case 5: x = b+8; break;
! 1679: default: x = b; break;
! 1680: } /* x = b^(-1) mod 2^4 */
! 1681: x = x*(2-b*x);
! 1682: x = x*(2-b*x);
! 1683: x = x*(2-b*x); /* b^(-1) mod 2^32 (Newton applied to 1/x - b = 0) */
! 1684: #ifdef LONG_IS_64BIT
! 1685: x = x*(2-b*x); /* b^(-1) mod 2^64 */
! 1686: #endif
! 1687: return x;
! 1688: }
! 1689:
! 1690: /* assume xy>0, y odd */
! 1691: GEN
! 1692: diviuexact(GEN x, ulong y)
! 1693: {
! 1694: long i,lz,lx;
! 1695: ulong q, yinv;
! 1696: GEN z, z0, x0, x0min;
! 1697:
! 1698: if (y == 1) return icopy(x);
! 1699: lx = lgefint(x);
! 1700: if (lx == 3) return stoi((ulong)x[2] / y);
! 1701: yinv = invrev(y);
! 1702: lz = (y <= (ulong)x[2]) ? lx : lx-1;
! 1703: z = new_chunk(lz);
! 1704: z0 = z + lz;
! 1705: x0 = x + lx; x0min = x + lx-lz+2;
! 1706:
! 1707: while (x0 > x0min)
! 1708: {
! 1709: *--z0 = q = yinv*((ulong)*--x0); /* i-th quotient */
! 1710: if (!q) continue;
! 1711: /* x := x - q * y */
! 1712: { /* update neither lowest word (could set it to 0) nor highest ones */
! 1713: register GEN x1 = x0 - 1;
! 1714: LOCAL_HIREMAINDER;
! 1715: (void)mulll(q,y);
! 1716: if (hiremainder)
! 1717: {
! 1718: if ((ulong)*x1 < hiremainder)
! 1719: {
! 1720: *x1 -= hiremainder;
! 1721: do (*--x1)--; while ((ulong)*x1 == MAXULONG);
! 1722: }
! 1723: else
! 1724: *x1 -= hiremainder;
! 1725: }
! 1726: }
! 1727: }
! 1728: i=2; while(!z[i]) i++;
! 1729: z += i-2; lz -= i-2;
! 1730: z[0] = evaltyp(t_INT)|evallg(lz);
! 1731: z[1] = evalsigne(1)|evallg(lz);
! 1732: avma = (ulong)z; return z;
! 1733: }
! 1734:
! 1735: /* Find z such that x=y*z, knowing that y | x (unchecked)
! 1736: * Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.
! 1737: * Set x := (x - z0 y) / B, updating only relevant words, and repeat */
! 1738: GEN
! 1739: diviiexact(GEN x, GEN y)
! 1740: {
! 1741: long av,lx,ly,lz,vy,i,ii,sx = signe(x), sy = signe(y);
! 1742: ulong y0inv,q;
! 1743: GEN z;
! 1744:
! 1745: if (!sy) err(dvmer1);
! 1746: if (!sx) return gzero;
! 1747: vy = vali(y); av = avma;
! 1748: (void)new_chunk(lgefint(x)); /* enough room for z */
! 1749: if (vy)
! 1750: { /* make y odd */
! 1751: #if 0
! 1752: if (vali(x) < vy) err(talker,"impossible division in diviiexact");
! 1753: #endif
! 1754: y = shifti(y,-vy);
! 1755: x = shifti(x,-vy);
! 1756: }
! 1757: else x = icopy(x); /* necessary because we destroy x */
! 1758: avma = av; /* will erase our x,y when exiting */
! 1759: /* now y is odd */
! 1760: ly = lgefint(y);
! 1761: if (ly == 3)
! 1762: {
! 1763: x = diviuexact(x,(ulong)y[2]);
! 1764: if (signe(x)) setsigne(x,sx*sy); /* should have x != 0 at this point */
! 1765: return x;
! 1766: }
! 1767: lx = lgefint(x); if (ly>lx) err(talker,"impossible division in diviiexact");
! 1768: y0inv = invrev(y[ly-1]);
! 1769: i=2; while (i<ly && y[i]==x[i]) i++;
! 1770: lz = (i==ly || (ulong)y[i] < (ulong)x[i]) ? lx-ly+3 : lx-ly+2;
! 1771: z = new_chunk(lz);
! 1772:
! 1773: y += ly - 1; /* now y[-i] = i-th word of y */
! 1774: for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
! 1775: {
! 1776: long limj;
! 1777: LOCAL_HIREMAINDER;
! 1778: LOCAL_OVERFLOW;
! 1779:
! 1780: z[i] = q = y0inv*((ulong)x[ii]); /* i-th quotient */
! 1781: if (!q) continue;
! 1782:
! 1783: /* x := x - q * y */
! 1784: (void)mulll(q,y[0]); limj = max(lx - lz, ii+3-ly);
! 1785: { /* update neither lowest word (could set it to 0) nor highest ones */
! 1786: register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;
! 1787: for (; x0 >= xlim; x0--, y0--)
! 1788: {
! 1789: *x0 = subll(*x0, addmul(q,*y0));
! 1790: hiremainder += overflow;
! 1791: }
! 1792: if (hiremainder && limj != lx - lz)
! 1793: {
! 1794: if ((ulong)*x0 < hiremainder)
! 1795: {
! 1796: *x0 -= hiremainder;
! 1797: do (*--x0)--; while ((ulong)*x0 == MAXULONG);
! 1798: }
! 1799: else
! 1800: *x0 -= hiremainder;
! 1801: }
! 1802: }
! 1803: }
! 1804: #if 0
! 1805: i=2; while(i<lz && !z[i]) i++;
! 1806: if (i==lz) err(bugparier,"diviiexact");
! 1807: #else
! 1808: i=2; while(!z[i]) i++;
! 1809: #endif
! 1810: z += i-2; lz -= (i-2);
! 1811: z[0] = evaltyp(t_INT)|evallg(lz);
! 1812: z[1] = evalsigne(sx*sy)|evallg(lz);
! 1813: avma = (ulong)z; return z;
! 1814: }
! 1815:
! 1816: long
! 1817: smodsi(long x, GEN y)
! 1818: {
! 1819: if (x<0) err(talker,"negative small integer in smodsi");
! 1820: (void)divsi(x,y); return hiremainder;
! 1821: }
! 1822:
! 1823: /* x and y are integers. Return 1 if |x| == |y|, 0 otherwise */
! 1824: int
! 1825: absi_equal(GEN x, GEN y)
! 1826: {
! 1827: long lx,i;
! 1828:
! 1829: if (!signe(x)) return !signe(y);
! 1830: if (!signe(y)) return 0;
! 1831:
! 1832: lx=lgefint(x); if (lx != lgefint(y)) return 0;
! 1833: i=2; while (i<lx && x[i]==y[i]) i++;
! 1834: return (i==lx);
! 1835: }
! 1836:
! 1837: /* x and y are integers. Return sign(|x| - |y|) */
! 1838: int
! 1839: absi_cmp(GEN x, GEN y)
! 1840: {
! 1841: long lx,ly,i;
! 1842:
! 1843: if (!signe(x)) return signe(y)? -1: 0;
! 1844: if (!signe(y)) return 1;
! 1845:
! 1846: lx=lgefint(x); ly=lgefint(y);
! 1847: if (lx>ly) return 1;
! 1848: if (lx<ly) return -1;
! 1849: i=2; while (i<lx && x[i]==y[i]) i++;
! 1850: if (i==lx) return 0;
! 1851: return ((ulong)x[i] > (ulong)y[i])? 1: -1;
! 1852: }
! 1853:
! 1854: /* x and y are reals. Return sign(|x| - |y|) */
! 1855: int
! 1856: absr_cmp(GEN x, GEN y)
! 1857: {
! 1858: long ex,ey,lx,ly,lz,i;
! 1859:
! 1860: if (!signe(x)) return signe(y)? -1: 0;
! 1861: if (!signe(y)) return 1;
! 1862:
! 1863: ex=expo(x); ey=expo(y);
! 1864: if (ex>ey) return 1;
! 1865: if (ex<ey) return -1;
! 1866:
! 1867: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
! 1868: i=2; while (i<lz && x[i]==y[i]) i++;
! 1869: if (i<lz) return ((ulong)x[i] > (ulong)y[i])? 1: -1;
! 1870: if (lx>=ly)
! 1871: {
! 1872: while (i<lx && !x[i]) i++;
! 1873: return (i==lx)? 0: 1;
! 1874: }
! 1875: while (i<ly && !y[i]) i++;
! 1876: return (i==ly)? 0: -1;
! 1877: }
! 1878:
! 1879: /********************************************************************/
! 1880: /** **/
! 1881: /** INTEGER MULTIPLICATION (KARATSUBA) **/
! 1882: /** **/
! 1883: /********************************************************************/
! 1884: #define _sqri_l 47
! 1885: #define _muli_l 25 /* optimal on PII 350MHz + gcc 2.7.2.1 (gp-dyn) */
! 1886:
! 1887: #if 0 /* for tunings */
! 1888: long KARATSUBA_SQRI_LIMIT = _sqri_l;
! 1889: long KARATSUBA_MULI_LIMIT = _muli_l;
! 1890:
! 1891: void setsqri(long a) { KARATSUBA_SQRI_LIMIT = a; }
! 1892: void setmuli(long a) { KARATSUBA_MULI_LIMIT = a; }
! 1893:
! 1894: GEN
! 1895: speci(GEN x, long nx)
! 1896: {
! 1897: GEN z;
! 1898: long i;
! 1899: if (!nx) return gzero;
! 1900: z = cgeti(nx+2); z[1] = evalsigne(1)|evallgefint(nx+2);
! 1901: for (i=0; i<nx; i++) z[i+2] = x[i];
! 1902: return z;
! 1903: }
! 1904: #else
! 1905: # define KARATSUBA_SQRI_LIMIT _sqri_l
! 1906: # define KARATSUBA_MULI_LIMIT _muli_l
! 1907: #endif
! 1908:
! 1909: /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
! 1910: #ifdef INLINE
! 1911: INLINE
! 1912: #endif
! 1913: GEN
! 1914: muliispec(GEN x, GEN y, long nx, long ny)
! 1915: {
! 1916: GEN z2e,z2d,yd,xd,ye,zd;
! 1917: long p1,lz;
! 1918: LOCAL_HIREMAINDER;
! 1919:
! 1920: if (!ny) return gzero;
! 1921: zd = (GEN)avma; lz = nx+ny+2;
! 1922: (void)new_chunk(lz);
! 1923: xd = x + nx;
! 1924: yd = y + ny;
! 1925: ye = yd; p1 = *--xd;
! 1926:
! 1927: *--zd = mulll(p1, *--yd); z2e = zd;
! 1928: while (yd > y) *--zd = addmul(p1, *--yd);
! 1929: *--zd = hiremainder;
! 1930:
! 1931: while (xd > x)
! 1932: {
! 1933: LOCAL_OVERFLOW;
! 1934: yd = ye; p1 = *--xd;
! 1935:
! 1936: z2d = --z2e;
! 1937: *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
! 1938: while (yd > y)
! 1939: {
! 1940: hiremainder += overflow;
! 1941: *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
! 1942: }
! 1943: *--zd = hiremainder + overflow;
! 1944: }
! 1945: if (*zd == 0) { zd++; lz--; } /* normalize */
! 1946: *--zd = evalsigne(1) | evallgefint(lz);
! 1947: *--zd = evaltyp(t_INT) | evallg(lz);
! 1948: avma=(long)zd; return zd;
! 1949: }
! 1950:
! 1951: #ifdef INLINE
! 1952: INLINE
! 1953: #endif
! 1954: GEN
! 1955: sqrispec(GEN x, long nx)
! 1956: {
! 1957: GEN z2e,z2d,yd,xd,zd,x0,z0;
! 1958: long p1,lz;
! 1959: LOCAL_HIREMAINDER;
! 1960:
! 1961: if (!nx) return gzero;
! 1962: zd = (GEN)avma; lz = (nx+1) << 1;
! 1963: z0 = new_chunk(lz);
! 1964: if (nx == 1)
! 1965: {
! 1966: *--zd = mulll(*x, *x);
! 1967: *--zd = hiremainder; goto END;
! 1968: }
! 1969: xd = x + nx;
! 1970:
! 1971: /* compute double products --> zd */
! 1972: p1 = *--xd; yd = xd; --zd;
! 1973: *--zd = mulll(p1, *--yd); z2e = zd;
! 1974: while (yd > x) *--zd = addmul(p1, *--yd);
! 1975: *--zd = hiremainder;
! 1976:
! 1977: x0 = x+1;
! 1978: while (xd > x0)
! 1979: {
! 1980: LOCAL_OVERFLOW;
! 1981: p1 = *--xd; yd = xd;
! 1982:
! 1983: z2e -= 2; z2d = z2e;
! 1984: *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
! 1985: while (yd > x)
! 1986: {
! 1987: hiremainder += overflow;
! 1988: *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
! 1989: }
! 1990: *--zd = hiremainder + overflow;
! 1991: }
! 1992: /* multiply zd by 2 (put result in zd - 1) */
! 1993: zd[-1] = ((*zd & HIGHBIT) != 0);
! 1994: shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);
! 1995:
! 1996: /* add the squares */
! 1997: xd = x + nx; zd = z0 + lz;
! 1998: p1 = *--xd;
! 1999: zd--; *zd = mulll(p1,p1);
! 2000: zd--; *zd = addll(hiremainder, *zd);
! 2001: while (xd > x)
! 2002: {
! 2003: p1 = *--xd;
! 2004: zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);
! 2005: zd--; *zd = addll(hiremainder + overflow, *zd);
! 2006: }
! 2007:
! 2008: END:
! 2009: if (*zd == 0) { zd++; lz--; } /* normalize */
! 2010: *--zd = evalsigne(1) | evallgefint(lz);
! 2011: *--zd = evaltyp(t_INT) | evallg(lz);
! 2012: avma=(long)zd; return zd;
! 2013: }
! 2014:
! 2015: /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
! 2016: static GEN
! 2017: addshiftw(GEN x, GEN y, long d)
! 2018: {
! 2019: GEN z,z0,y0,yd, zd = (GEN)avma;
! 2020: long a,lz,ly = lgefint(y);
! 2021:
! 2022: z0 = new_chunk(d);
! 2023: a = ly-2; yd = y+ly;
! 2024: if (a >= d)
! 2025: {
! 2026: y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
! 2027: a -= d;
! 2028: if (a)
! 2029: z = addiispec(x+2, y+2, lgefint(x)-2, a);
! 2030: else
! 2031: z = icopy(x);
! 2032: }
! 2033: else
! 2034: {
! 2035: y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
! 2036: while (zd >= z0) *--zd = 0; /* complete with 0s */
! 2037: z = icopy(x);
! 2038: }
! 2039: lz = lgefint(z)+d;
! 2040: z[1] = evalsigne(1) | evallgefint(lz);
! 2041: z[0] = evaltyp(t_INT) | evallg(lz); return z;
! 2042: }
! 2043:
! 2044: /* Fast product (Karatsuba) of integers. a and b are "special" GENs
! 2045: * c,c0,c1,c2 are genuine GENs.
! 2046: */
! 2047: static GEN
! 2048: quickmulii(GEN a, GEN b, long na, long nb)
! 2049: {
! 2050: GEN a0,c,c0;
! 2051: long av,n0,n0a,i;
! 2052:
! 2053: if (na < nb) swapspec(a,b, na,nb);
! 2054: if (nb == 1) return mulsispec(*b, a, na);
! 2055: if (nb == 0) return gzero;
! 2056: if (nb < KARATSUBA_MULI_LIMIT) return muliispec(a,b,na,nb);
! 2057: i=(na>>1); n0=na-i; na=i;
! 2058: av=avma; a0=a+na; n0a=n0;
! 2059: while (!*a0 && n0a) { a0++; n0a--; }
! 2060:
! 2061: if (n0a && nb > n0)
! 2062: { /* nb <= na <= n0 */
! 2063: GEN b0,c1,c2;
! 2064: long n0b;
! 2065:
! 2066: nb -= n0;
! 2067: c = quickmulii(a,b,na,nb);
! 2068: b0 = b+nb; n0b = n0;
! 2069: while (!*b0 && n0b) { b0++; n0b--; }
! 2070: if (n0b)
! 2071: {
! 2072: c0 = quickmulii(a0,b0, n0a,n0b);
! 2073:
! 2074: c2 = addiispec(a0,a, n0a,na);
! 2075: c1 = addiispec(b0,b, n0b,nb);
! 2076: c1 = quickmulii(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
! 2077: c2 = addiispec(c0+2, c+2, lgefint(c0)-2,lgefint(c) -2);
! 2078:
! 2079: c1 = subiispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
! 2080: }
! 2081: else
! 2082: {
! 2083: c0 = gzero;
! 2084: c1 = quickmulii(a0,b, n0a,nb);
! 2085: }
! 2086: c = addshiftw(c,c1, n0);
! 2087: }
! 2088: else
! 2089: {
! 2090: c = quickmulii(a,b,na,nb);
! 2091: c0 = quickmulii(a0,b,n0a,nb);
! 2092: }
! 2093: return gerepileuptoint(av, addshiftw(c,c0, n0));
! 2094: }
! 2095:
! 2096: /* actual operations will take place on a+2 and b+2: we strip the codewords */
! 2097: GEN
! 2098: mulii(GEN a,GEN b)
! 2099: {
! 2100: long sa,sb;
! 2101: GEN z;
! 2102:
! 2103: sa=signe(a); if (!sa) return gzero;
! 2104: sb=signe(b); if (!sb) return gzero;
! 2105: if (sb<0) sa = -sa;
! 2106: z = quickmulii(a+2,b+2, lgefint(a)-2,lgefint(b)-2);
! 2107: setsigne(z,sa); return z;
! 2108: }
! 2109:
! 2110: GEN
! 2111: resiimul(GEN x, GEN sy)
! 2112: {
! 2113: GEN r, q, y = (GEN)sy[1], invy;
! 2114: long av = avma, k;
! 2115:
! 2116: k = cmpii(x, y);
! 2117: if (k <= 0) return k? icopy(x): gzero;
! 2118: invy = (GEN)sy[2];
! 2119: q = mulir(x,invy);
! 2120: q = mptrunc(q); /* <= divii(x, y) (at most 1 less) */
! 2121: r = subii(x, mulii(y,q));
! 2122: /* resii(x,y) + y >= r >= resii(x,y) */
! 2123: k = cmpii(r, y);
! 2124: if (k >= 0)
! 2125: {
! 2126: if (k == 0) { avma = av; return gzero; }
! 2127: r = subiispec(r+2, y+2, lgefint(r)-2, lgefint(y)-2);
! 2128: }
! 2129: #if 0
! 2130: q = subii(r,resii(x,y));
! 2131: if (signe(q))
! 2132: err(talker,"bug in resiimul: x = %Z\ny = %Z\ndif = %Z", x,y,q);
! 2133: #endif
! 2134: return gerepileuptoint(av, r); /* = resii(x, y) */
! 2135: }
! 2136:
! 2137: /* x % (2^n), assuming x, n >= 0 */
! 2138: GEN
! 2139: resmod2n(GEN x, long n)
! 2140: {
! 2141: long hi,l,k,lx,ly;
! 2142: GEN z, xd, zd;
! 2143:
! 2144: if (!signe(x) || !n) return gzero;
! 2145:
! 2146: l = n & (BITS_IN_LONG-1); /* n % BITS_IN_LONG */
! 2147: k = n >> TWOPOTBITS_IN_LONG; /* n / BITS_IN_LONG */
! 2148: lx = lgefint(x);
! 2149: if (lx < k+3) return icopy(x);
! 2150:
! 2151: xd = x + (lx-k-1);
! 2152: /* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words
! 2153: * ^--- initial xd */
! 2154: hi = *xd & ((1<<l) - 1); /* last l bits of # = top bits of result */
! 2155: if (!hi)
! 2156: { /* strip leading zeroes from result */
! 2157: xd++; while (k && !*xd) { k--; xd++; }
! 2158: if (!k) return gzero;
! 2159: ly = k+2; xd--;
! 2160: }
! 2161: else
! 2162: ly = k+3;
! 2163:
! 2164: zd = z = cgeti(ly);
! 2165: *++zd = evalsigne(1) | evallgefint(ly);
! 2166: if (hi) *++zd = hi;
! 2167: for ( ;k; k--) *++zd = *++xd;
! 2168: return z;
! 2169: }
! 2170:
! 2171: static GEN
! 2172: quicksqri(GEN a, long na)
! 2173: {
! 2174: GEN a0,c;
! 2175: long av,n0,n0a,i;
! 2176:
! 2177: if (na < KARATSUBA_SQRI_LIMIT) return sqrispec(a,na);
! 2178: i=(na>>1); n0=na-i; na=i;
! 2179: av=avma; a0=a+na; n0a=n0;
! 2180: while (!*a0 && n0a) { a0++; n0a--; }
! 2181: c = quicksqri(a,na);
! 2182: if (n0a)
! 2183: {
! 2184: GEN t, c1, c0 = quicksqri(a0,n0a);
! 2185: #if 0
! 2186: c1 = shifti(quickmulii(a0,a, n0a,na),1);
! 2187: #else /* slower !!! */
! 2188: t = addiispec(a0,a,n0a,na);
! 2189: t = quicksqri(t+2,lgefint(t)-2);
! 2190: c1= addiispec(c0+2,c+2, lgefint(c0)-2, lgefint(c)-2);
! 2191: c1= subiispec(t+2, c1+2, lgefint(t)-2, lgefint(c1)-2);
! 2192: #endif
! 2193: c = addshiftw(c,c1, n0);
! 2194: c = addshiftw(c,c0, n0);
! 2195: }
! 2196: else
! 2197: c = addshiftw(c,gzero,n0<<1);
! 2198: return gerepileuptoint(av, c);
! 2199: }
! 2200:
! 2201: GEN
! 2202: sqri(GEN a) { return quicksqri(a+2, lgefint(a)-2); }
! 2203:
! 2204: #define MULRR_LIMIT 32
! 2205: #define MULRR2_LIMIT 32
! 2206:
! 2207: #if 0
! 2208: GEN
! 2209: karamulrr1(GEN x, GEN y)
! 2210: {
! 2211: long sx,sy;
! 2212: long i,i1,i2,lx=lg(x),ly=lg(y),e,flag,garde;
! 2213: long lz2,lz3,lz4;
! 2214: GEN z,lo1,lo2,hi;
! 2215:
! 2216: /* ensure that lg(y) >= lg(x) */
! 2217: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
! 2218: if (lx < MULRR_LIMIT) return mulrr(x,y);
! 2219: sx=signe(x); sy=signe(y);
! 2220: e = evalexpo(expo(x)+expo(y));
! 2221: if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; }
! 2222: if (sy<0) sx = -sx;
! 2223: ly=lx+flag; z=cgetr(lx);
! 2224: lz2 = (lx>>1); lz3 = lx-lz2;
! 2225: x += 2; lx -= 2;
! 2226: y += 2; ly -= 2;
! 2227: hi = quickmulii(x,y,lz2,lz2);
! 2228: i1=lz2; while (i1<lx && !x[i1]) i1++;
! 2229: lo1 = quickmulii(y,x+i1,lz2,lx-i1);
! 2230: i2=lz2; while (i2<ly && !y[i2]) i2++;
! 2231: lo2 = quickmulii(x,y+i2,lz2,ly-i2);
! 2232:
! 2233: if (signe(lo1))
! 2234: {
! 2235: if (flag) { lo2 = addshiftw(lo1,lo2,1); lz3++; } else lo2=addii(lo1,lo2);
! 2236: }
! 2237: lz4=lgefint(lo2)-lz3;
! 2238: if (lz4>0) hi = addiispec(hi+2,lo2+2, lgefint(hi)-2,lz4);
! 2239: if (hi[2] < 0)
! 2240: {
! 2241: e++; garde=hi[lx+2];
! 2242: for (i=lx+1; i>=2 ; i--) z[i]=hi[i];
! 2243: }
! 2244: else
! 2245: {
! 2246: garde = (hi[lx+2] << 1);
! 2247: shift_left(z,hi,2,lx+1, garde, 1);
! 2248: }
! 2249: z[1]=evalsigne(sx) | e;
! 2250: if (garde < 0)
! 2251: { /* round to nearest */
! 2252: i=lx+2; do z[--i]++; while (z[i]==0);
! 2253: if (i==1) z[2]=HIGHBIT;
! 2254: }
! 2255: avma=(long)z; return z;
! 2256: }
! 2257:
! 2258: GEN
! 2259: karamulrr2(GEN x, GEN y)
! 2260: {
! 2261: long sx,sy,i,lx=lg(x),ly=lg(y),e,flag,garde;
! 2262: GEN z,hi;
! 2263:
! 2264: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
! 2265: if (lx < MULRR2_LIMIT) return mulrr(x,y);
! 2266: ly=lx+flag; sx=signe(x); sy=signe(y);
! 2267: e = evalexpo(expo(x)+expo(y));
! 2268: if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; }
! 2269: if (sy<0) sx = -sx;
! 2270: z=cgetr(lx);
! 2271: hi=quickmulii(y+2,x+2,ly-2,lx-2);
! 2272: if (hi[2] < 0)
! 2273: {
! 2274: e++; garde=hi[lx];
! 2275: for (i=2; i<lx ; i++) z[i]=hi[i];
! 2276: }
! 2277: else
! 2278: {
! 2279: garde = (hi[lx] << 1);
! 2280: shift_left(z,hi,2,lx-1, garde, 1);
! 2281: }
! 2282: z[1]=evalsigne(sx) | e;
! 2283: if (garde < 0)
! 2284: { /* round to nearest */
! 2285: i=lx; do z[--i]++; while (z[i]==0);
! 2286: if (i==1) z[2]=HIGHBIT;
! 2287: }
! 2288: avma=(long)z; return z;
! 2289: }
! 2290:
! 2291: GEN
! 2292: karamulrr(GEN x, GEN y, long flag)
! 2293: {
! 2294: switch(flag)
! 2295: {
! 2296: case 1: return karamulrr1(x,y);
! 2297: case 2: return karamulrr2(x,y);
! 2298: default: err(flagerr,"karamulrr");
! 2299: }
! 2300: return NULL; /* not reached */
! 2301: }
! 2302:
! 2303: GEN
! 2304: karamulir(GEN x, GEN y, long flag)
! 2305: {
! 2306: long sx=signe(x),lz,i;
! 2307: GEN z,temp,z1;
! 2308:
! 2309: if (!sx) return gzero;
! 2310: lz=lg(y); z=cgetr(lz);
! 2311: temp=cgetr(lz+1); affir(x,temp);
! 2312: z1=karamulrr(temp,y,flag);
! 2313: for (i=1; i<lz; i++) z[i]=z1[i];
! 2314: avma=(long)z; return z;
! 2315: }
! 2316: #endif
! 2317:
! 2318: #ifdef LONG_IS_64BIT
! 2319:
! 2320: #if PARI_BYTE_ORDER == LITTLE_ENDIAN_64 || PARI_BYTE_ORDER == BIG_ENDIAN_64
! 2321: #else
! 2322: error... unknown machine
! 2323: #endif
! 2324:
! 2325: GEN
! 2326: dbltor(double x)
! 2327: {
! 2328: GEN z = cgetr(3);
! 2329: long e;
! 2330: union { double f; ulong i; } fi;
! 2331: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2332: const int exp_mid = 0x3ff;/* exponent bias */
! 2333: const int expo_len = 11; /* number of bits of exponent */
! 2334: LOCAL_HIREMAINDER;
! 2335:
! 2336: if (x==0) { z[1]=evalexpo(-308); z[2]=0; return z; }
! 2337: fi.f = x;
! 2338: e = ((fi.i & (HIGHBIT-1)) >> mant_len) - exp_mid;
! 2339: z[1] = evalexpo(e) | evalsigne(x<0? -1: 1);
! 2340: z[2] = (fi.i << expo_len) | HIGHBIT;
! 2341: return z;
! 2342: }
! 2343:
! 2344: double
! 2345: rtodbl(GEN x)
! 2346: {
! 2347: long ex,s=signe(x);
! 2348: ulong a;
! 2349: union { double f; ulong i; } fi;
! 2350: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2351: const int exp_mid = 0x3ff;/* exponent bias */
! 2352: const int expo_len = 11; /* number of bits of exponent */
! 2353: LOCAL_HIREMAINDER;
! 2354:
! 2355: if (typ(x)==t_INT && !s) return 0.0;
! 2356: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
! 2357: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
! 2358:
! 2359: /* start by rounding to closest */
! 2360: a = (x[2] & (HIGHBIT-1)) + 0x400;
! 2361: if (a & HIGHBIT) { ex++; a=0; }
! 2362: if (ex >= exp_mid) err(rtodber);
! 2363: fi.i = ((ex + exp_mid) << mant_len) | (a >> expo_len);
! 2364: if (s<0) fi.i |= HIGHBIT;
! 2365: return fi.f;
! 2366: }
! 2367:
! 2368: #else
! 2369:
! 2370: #if PARI_BYTE_ORDER == LITTLE_ENDIAN
! 2371: # define INDEX0 1
! 2372: # define INDEX1 0
! 2373: #elif PARI_BYTE_ORDER == BIG_ENDIAN
! 2374: # define INDEX0 0
! 2375: # define INDEX1 1
! 2376: #else
! 2377: error... unknown machine
! 2378: #endif
! 2379:
! 2380: GEN
! 2381: dbltor(double x)
! 2382: {
! 2383: GEN z;
! 2384: long e;
! 2385: union { double f; ulong i[2]; } fi;
! 2386: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2387: const int exp_mid = 0x3ff;/* exponent bias */
! 2388: const int shift = mant_len-32;
! 2389: const int expo_len = 11; /* number of bits of exponent */
! 2390:
! 2391: if (x==0) { z=cgetr(3); z[1]=evalexpo(-308); z[2]=0; return z; }
! 2392: fi.f = x; z=cgetr(4);
! 2393: {
! 2394: const ulong a = fi.i[INDEX0];
! 2395: const ulong b = fi.i[INDEX1];
! 2396: e = ((a & (HIGHBIT-1)) >> shift) - exp_mid;
! 2397: z[1] = evalexpo(e) | evalsigne(x<0? -1: 1);
! 2398: z[3] = b << expo_len;
! 2399: z[2] = HIGHBIT | b >> (BITS_IN_LONG-expo_len) | (a << expo_len);
! 2400: }
! 2401: return z;
! 2402: }
! 2403:
! 2404: double
! 2405: rtodbl(GEN x)
! 2406: {
! 2407: long ex,s=signe(x),lx=lg(x);
! 2408: ulong a,b,k;
! 2409: union { double f; ulong i[2]; } fi;
! 2410: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2411: const int exp_mid = 0x3ff;/* exponent bias */
! 2412: const int shift = mant_len-32;
! 2413: const int expo_len = 11; /* number of bits of exponent */
! 2414:
! 2415: if (typ(x)==t_INT && !s) return 0.0;
! 2416: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
! 2417: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
! 2418:
! 2419: /* start by rounding to closest */
! 2420: a = x[2] & (HIGHBIT-1);
! 2421: if (lx > 3)
! 2422: {
! 2423: b = x[3] + 0x400UL; if (b < 0x400UL) a++;
! 2424: if (a & HIGHBIT) { ex++; a=0; }
! 2425: }
! 2426: else b = 0;
! 2427: if (ex > exp_mid) err(rtodber);
! 2428: ex += exp_mid;
! 2429: k = (a >> expo_len) | (ex << shift);
! 2430: if (s<0) k |= HIGHBIT;
! 2431: fi.i[INDEX0] = k;
! 2432: fi.i[INDEX1] = (a << (BITS_IN_LONG-expo_len)) | (b >> expo_len);
! 2433: return fi.f;
! 2434: }
! 2435: #endif
! 2436:
! 2437: /* Old cgiv without reference count (which was not used anyway)
! 2438: * Should be a macro.
! 2439: */
! 2440: void
! 2441: cgiv(GEN x)
! 2442: {
! 2443: if (x == (GEN) avma)
! 2444: avma = (long) (x+lg(x));
! 2445: }
! 2446:
! 2447: /********************************************************************/
! 2448: /** **/
! 2449: /** INTEGER EXTENDED GCD (AND INVMOD) **/
! 2450: /** **/
! 2451: /********************************************************************/
! 2452:
! 2453: /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
! 2454: * in the context of trying to improve elliptic curve cryptosystem attacking
! 2455: * algorithms. 2001Jan02 -- added bezout() functionality.
! 2456: *
! 2457: * Two basic ideas - (1) avoid many integer divisions, especially when the
! 2458: * quotient is 1 (which happens more than 40% of the time). (2) Use Lehmer's
! 2459: * trick as modified by Jebelean of extracting a couple of words' worth of
! 2460: * leading bits from both operands, and compute partial quotients from them
! 2461: * as long as we can be sure of their values. The Jebelean modifications
! 2462: * consist in reliable inequalities from which we can decide fast whether
! 2463: * to carry on or to return to the outer loop, and in re-shifting after the
! 2464: * first word's worth of bits has been used up. All of this is described
! 2465: * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
! 2466: * quite right (the catch-up divisions needed when one partial quotient is
! 2467: * larger than a word are missing).
! 2468: *
! 2469: * The API consists of invmod() and bezout() below; the single-word routines
! 2470: * xgcduu and xxgcduu may be called directly if desired; lgcdii() probably
! 2471: * doesn't make much sense out of context.
! 2472: *
! 2473: * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
! 2474: * ptotically about a factor 2.5 .. 3, depending on processor architecture,
! 2475: * than the naive continued-division code. Unfortunately, thanks to the
! 2476: * unrolled loops and all, the code is a bit lengthy.
! 2477: */
! 2478:
! 2479: /*==================================
! 2480: * xgcduu(d,d1,f,v,v1,s)
! 2481: * xxgcduu(d,d1,f,u,u1,v,v1,s)
! 2482: * rgcduu(d,d1,vmax,u,u1,v,v1,s)
! 2483: *==================================*/
! 2484: /*
! 2485: * Fast `final' extended gcd algorithm, acting on two ulongs. Ideally this
! 2486: * should be replaced with assembler versions wherever possible. The present
! 2487: * code essentially does `subtract, compare, and possibly divide' at each step,
! 2488: * which is reasonable when hardware division (a) exists, (b) is a bit slowish
! 2489: * and (c) does not depend a lot on the operand values (as on i486). When
! 2490: * wordsize division is in fact an assembler routine based on subtraction,
! 2491: * this strategy may not be the most efficient one.
! 2492: *
! 2493: * xxgcduu() should be called with d > d1 > 0, returns gcd(d,d1), and assigns
! 2494: * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1] (i.e.,
! 2495: * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
! 2496: * the quotient of the first division step), and the information about the
! 2497: * implied signs to s (-1 when an odd number of divisions has been done,
! 2498: * 1 otherwise). xgcduu() is exactly the same except that u,u1 are not com-
! 2499: * puted (and not returned, of course).
! 2500: *
! 2501: * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
! 2502: * (so we can stop the chain division one step early: as soon as the remainder
! 2503: * equals 1). Use this when you intend to use only what would be v, or only
! 2504: * what would be u and v, after that final division step, but not u1 and v1.
! 2505: * With the flag in force and thus without that final step, the interesting
! 2506: * quantity/ies will still sit in [u1 and] v1, of course.
! 2507: *
! 2508: * For computing the inverse of a single-word INTMOD known to exist, pass f=1
! 2509: * to xgcduu(), and obtain the result from s and v1. (The routine does the
! 2510: * right thing when d1==1 already.) For finishing a multiword modinv known
! 2511: * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix (with
! 2512: * properly adjusted signs) onto the values v' and v1' previously obtained
! 2513: * from the multiword division steps. Actually, just take the scalar product
! 2514: * of [v',v1'] with [u1,-v1], and change the sign if s==-1. (If the final
! 2515: * step had been carried out, it would be [-u,v], and s would also change.)
! 2516: * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
! 2517: * Finally, f=0 with xxgcduu() is useful for Bezout computations.
! 2518: * [Harrumph. In the above prescription, the sign turns out to be precisely
! 2519: * wrong.]
! 2520: * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
! 2521: * make a difference when gcd(d,d1)>1. The speedup is negligible.)
! 2522: *
! 2523: * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
! 2524: * recover the final u,u1 given only v,v1 and s. However, it probably isn't
! 2525: * worthwhile, as it trades a few multiplications for a division.
! 2526: *
! 2527: * Note that these routines do not know and do not need to know about the
! 2528: * PARI stack.
! 2529: *
! 2530: * Added 2001Jan15:
! 2531: * rgcduu() is a variant of xxgcduu() which does not have f (the effect is
! 2532: * that of f=0), but instead has a ulong vmax parameter, for use in rational
! 2533: * reconstruction below. It returns when v1 exceeds vmax; v will never
! 2534: * exceed vmax. (vmax=0 is taken as a synonym of MAXULONG i.e. unlimited,
! 2535: * in which case rgcduu behaves exactly like xxgcduu with f=0.) The return
! 2536: * value of rgcduu() is typically meaningless; the interesting part is the
! 2537: * matrix.
! 2538: */
! 2539:
! 2540: ulong
! 2541: xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
! 2542: {
! 2543: ulong xv,xv1, xs, q,res;
! 2544: LOCAL_HIREMAINDER;
! 2545:
! 2546: /* The above blurb contained a lie. The main loop always stops when d1
! 2547: * has become equal to 1. If (d1 == 1 && !(f&1)) after the loop, we do
! 2548: * the final `division' of d by 1 `by hand' as it were.
! 2549: *
! 2550: * The loop has already been unrolled once. Aggressive optimization could
! 2551: * well lead to a totally unrolled assembler version...
! 2552: *
! 2553: * On modern x86 architectures, this loop is a pig anyway. The division
! 2554: * instruction always puts its result into the same pair of registers, and
! 2555: * we always want to use one of them straight away, so pipeline performance
! 2556: * will suck big time. An assembler version should probably do a first loop
! 2557: * computing and storing all the quotients -- their number is bounded in
! 2558: * advance -- and then assembling the matrix in a second pass. On other
! 2559: * architectures where we can cycle through four or so groups of registers
! 2560: * and exploit a fast ALU result-to-operand feedback path, this is much less
! 2561: * of an issue. (Intel sucks. See http://www.x86.org/ ...)
! 2562: */
! 2563: xs = res = 0;
! 2564: xv = 0UL; xv1 = 1UL;
! 2565: while (d1 > 1UL)
! 2566: {
! 2567: d -= d1; /* no need to use subll */
! 2568: if (d >= d1)
! 2569: {
! 2570: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
! 2571: xv += q * xv1;
! 2572: }
! 2573: else
! 2574: xv += xv1;
! 2575: /* possible loop exit */
! 2576: if (d <= 1UL) { xs=1; break; }
! 2577: /* repeat with inverted roles */
! 2578: d1 -= d;
! 2579: if (d1 >= d)
! 2580: {
! 2581: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
! 2582: xv1 += q * xv;
! 2583: }
! 2584: else
! 2585: xv1 += xv;
! 2586: } /* while */
! 2587:
! 2588: if (!(f&1)) /* division by 1 postprocessing if needed */
! 2589: {
! 2590: if (xs && d==1)
! 2591: { xv1 += d1 * xv; xs = 0; res = 1UL; }
! 2592: else if (!xs && d1==1)
! 2593: { xv += d * xv1; xs = 1; res = 1UL; }
! 2594: }
! 2595:
! 2596: if (xs)
! 2597: {
! 2598: *s = -1; *v = xv1; *v1 = xv;
! 2599: return (res ? res : (d==1 ? 1UL : d1));
! 2600: }
! 2601: else
! 2602: {
! 2603: *s = 1; *v = xv; *v1 = xv1;
! 2604: return (res ? res : (d1==1 ? 1UL : d));
! 2605: }
! 2606: }
! 2607:
! 2608:
! 2609: ulong
! 2610: xxgcduu(ulong d, ulong d1, int f,
! 2611: ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
! 2612: {
! 2613: ulong xu,xu1, xv,xv1, xs, q,res;
! 2614: LOCAL_HIREMAINDER;
! 2615:
! 2616: xs = res = 0;
! 2617: xu = xv1 = 1UL;
! 2618: xu1 = xv = 0UL;
! 2619: while (d1 > 1UL)
! 2620: {
! 2621: d -= d1; /* no need to use subll */
! 2622: if (d >= d1)
! 2623: {
! 2624: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
! 2625: xv += q * xv1;
! 2626: xu += q * xu1;
! 2627: }
! 2628: else
! 2629: { xv += xv1; xu += xu1; }
! 2630: /* possible loop exit */
! 2631: if (d <= 1UL) { xs=1; break; }
! 2632: /* repeat with inverted roles */
! 2633: d1 -= d;
! 2634: if (d1 >= d)
! 2635: {
! 2636: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
! 2637: xv1 += q * xv;
! 2638: xu1 += q * xu;
! 2639: }
! 2640: else
! 2641: { xv1 += xv; xu1 += xu; }
! 2642: } /* while */
! 2643:
! 2644: if (!(f&1)) /* division by 1 postprocessing if needed */
! 2645: {
! 2646: if (xs && d==1)
! 2647: {
! 2648: xv1 += d1 * xv;
! 2649: xu1 += d1 * xu;
! 2650: xs = 0; res = 1UL;
! 2651: }
! 2652: else if (!xs && d1==1)
! 2653: {
! 2654: xv += d * xv1;
! 2655: xu += d * xu1;
! 2656: xs = 1; res = 1UL;
! 2657: }
! 2658: }
! 2659:
! 2660: if (xs)
! 2661: {
! 2662: *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 2663: return (res ? res : (d==1 ? 1UL : d1));
! 2664: }
! 2665: else
! 2666: {
! 2667: *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 2668: return (res ? res : (d1==1 ? 1UL : d));
! 2669: }
! 2670: }
! 2671:
! 2672: ulong
! 2673: rgcduu(ulong d, ulong d1, ulong vmax,
! 2674: ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
! 2675: {
! 2676: ulong xu,xu1, xv,xv1, xs, q, res=0;
! 2677: int f = 0;
! 2678: LOCAL_HIREMAINDER;
! 2679:
! 2680: if (vmax == 0) vmax = MAXULONG;
! 2681: xs = res = 0;
! 2682: xu = xv1 = 1UL;
! 2683: xu1 = xv = 0UL;
! 2684: while (d1 > 1UL)
! 2685: {
! 2686: d -= d1; /* no need to use subll */
! 2687: if (d >= d1)
! 2688: {
! 2689: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
! 2690: xv += q * xv1;
! 2691: xu += q * xu1;
! 2692: }
! 2693: else
! 2694: { xv += xv1; xu += xu1; }
! 2695: /* possible loop exit */
! 2696: if (xv > vmax) { f=xs=1; break; }
! 2697: if (d <= 1UL) { xs=1; break; }
! 2698: /* repeat with inverted roles */
! 2699: d1 -= d;
! 2700: if (d1 >= d)
! 2701: {
! 2702: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
! 2703: xv1 += q * xv;
! 2704: xu1 += q * xu;
! 2705: }
! 2706: else
! 2707: { xv1 += xv; xu1 += xu; }
! 2708: /* possible loop exit */
! 2709: if (xv1 > vmax) { f=1; break; }
! 2710: } /* while */
! 2711:
! 2712: if (!(f&1)) /* division by 1 postprocessing if needed */
! 2713: {
! 2714: if (xs && d==1)
! 2715: {
! 2716: xv1 += d1 * xv;
! 2717: xu1 += d1 * xu;
! 2718: xs = 0; res = 1UL;
! 2719: }
! 2720: else if (!xs && d1==1)
! 2721: {
! 2722: xv += d * xv1;
! 2723: xu += d * xu1;
! 2724: xs = 1; res = 1UL;
! 2725: }
! 2726: }
! 2727:
! 2728: if (xs)
! 2729: {
! 2730: *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 2731: return (res ? res : (d==1 ? 1UL : d1));
! 2732: }
! 2733: else
! 2734: {
! 2735: *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 2736: return (res ? res : (d1==1 ? 1UL : d));
! 2737: }
! 2738: }
! 2739:
! 2740: /*==================================
! 2741: * lgcdii(d,d1,u,u1,v,v1,vmax)
! 2742: *==================================*/
! 2743: /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
! 2744: *
! 2745: * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
! 2746: * and a quantity of bits from d1 obtained by a shift of the same displacement,
! 2747: * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
! 2748: * the product of all the [0, 1; 1, q_j] thus obtained, where the leftmost
! 2749: * factor arises from the quotient of the first division step.
! 2750: *
! 2751: * For use in rational reconstruction, input param vmax can be given a
! 2752: * nonzero value. In this case, we will return early as soon as v1 > vmax
! 2753: * (i.e. it is guaranteed that v <= vmax). --2001Jan15
! 2754: *
! 2755: * MUST be called with d > d1 > 0, and with d occupying more than one
! 2756: * significant word (if it doesn't, the caller has no business with us;
! 2757: * he/she/it should use xgcduu() instead). Returns the number of reduction/
! 2758: * swap steps carried out, possibly zero, or under certain conditions minus
! 2759: * that number. When the return value is nonzero, the caller should use the
! 2760: * returned recurrence matrix to update its own copies of d,d1. When the
! 2761: * return value is non-positive, and the latest remainder after updating
! 2762: * turns out to be nonzero, the caller should at once attempt a full division,
! 2763: * rather than first trying lgcdii() again -- this typically happens when we
! 2764: * are about to encounter a quotient larger than half a word. (This is not
! 2765: * detected infallibly -- after a positive return value, it is perfectly
! 2766: * possible that the next stage will end up needing a full division. After
! 2767: * a negative return value, however, this is certain, and should be acted
! 2768: * upon.)
! 2769: *
! 2770: * (The sign information, for which xgcduu() has its return argument s, is now
! 2771: * implicit in the LSB of our return value, and the caller may take advantage
! 2772: * of the fact that a return value of +-1 implies u==0,u1==v==1 [only v1 pro-
! 2773: * vides interesting information in this case]. One might also use the fact
! 2774: * that if the return value is +-2, then u==1, but this is rather marginal.)
! 2775: *
! 2776: * If it was not possible to determine even the first quotient, either because
! 2777: * we're too close to an integer quotient or because the quotient would be
! 2778: * larger than one word (if the `leading digit' of d1 after shifting is all
! 2779: * zeros), we return 0 and do not bother to assign anything to the last four
! 2780: * args.
! 2781: *
! 2782: * The division chain might (sometimes) even run to completion. It will be
! 2783: * up to the caller to detect this case.
! 2784: *
! 2785: * This routine does _not_ change d or d1; this will also be up to the caller.
! 2786: *
! 2787: * Note that this routine does not know and does not need to know about the
! 2788: * PARI stack.
! 2789: */
! 2790: /*#define DEBUG_LEHMER 1 */
! 2791: int
! 2792: lgcdii(ulong* d, ulong* d1,
! 2793: ulong* u, ulong* u1, ulong* v, ulong* v1,
! 2794: ulong vmax)
! 2795: {
! 2796: /* Strategy: (1) Extract/shift most significant bits. We assume that d
! 2797: * has at least two significant words, but we can cope with a one-word d1.
! 2798: * Let dd,dd1 be the most significant dividend word and matching part of the
! 2799: * divisor.
! 2800: * (2) Check for overflow on the first division. For our purposes, this
! 2801: * happens when the upper half of dd1 is zero. (Actually this is detected
! 2802: * during extraction.)
! 2803: * (3) Get a fix on the first quotient. We compute q = floor(dd/dd1), which
! 2804: * is an upper bound for floor(d/d1), and which gives the true value of the
! 2805: * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
! 2806: * (If it isn't, we give up. This is annoying because the subsequent full
! 2807: * division will repeat some work already done, but it happens very infre-
! 2808: * quently. Doing the extra-bit-fetch in this case would be awkward.)
! 2809: * (4) Finish initializations.
! 2810: *
! 2811: * The remainder of the action is comparatively boring... The main loop has
! 2812: * been unrolled once (so we don't swap things and we can apply Jebelean's
! 2813: * termination conditions which alternatingly take two different forms during
! 2814: * successive iterations). When we first run out of sufficient bits to form
! 2815: * a quotient, and have an extra word of each operand, we pull out two whole
! 2816: * word's worth of dividend bits, and divisor bits of matching significance;
! 2817: * to these we apply our partial matrix (disregarding overflow because the
! 2818: * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
! 2819: * re-extract one word's worth of the current dividend and a matching amount
! 2820: * of divisor bits. The affair will normally terminate with matrix entries
! 2821: * just short of a whole word. (We terminate the inner loop before these can
! 2822: * possibly overflow.)
! 2823: */
! 2824: ulong dd,dd1,ddlo,dd1lo, sh,shc; /* `digits', shift count */
! 2825: ulong xu,xu1, xv,xv1, q,res; /* recurrences, partial quotient, count */
! 2826: ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
! 2827: long ld, ld1, lz; /* t_INT effective lengths */
! 2828: int skip = 0; /* a boolean flag */
! 2829: LOCAL_OVERFLOW;
! 2830: LOCAL_HIREMAINDER;
! 2831:
! 2832: #ifdef DEBUG_LEHMER
! 2833: voir(d, -1); voir(d1, -1);
! 2834: #endif
! 2835: /* following is just for convenience: vmax==0 means no bound */
! 2836: if (vmax == 0) vmax = MAXULONG;
! 2837: ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
! 2838: if (lz > 1) return 0; /* rare, quick and desperate exit */
! 2839:
! 2840: d += 2; d1 += 2; /* point at the leading `digits' */
! 2841: dd1lo = 0; /* unless we find something better */
! 2842: sh = bfffo(*d); /* obtain dividend left shift count */
! 2843:
! 2844: if (sh)
! 2845: { /* do the shifting */
! 2846: shc = BITS_IN_LONG - sh;
! 2847: if (lz)
! 2848: { /* dividend longer than divisor */
! 2849: dd1 = (*d1 >> shc);
! 2850: if (!(HIGHMASK & dd1)) return 0; /* overflow detected */
! 2851: if (ld1 > 3)
! 2852: dd1lo = (*d1 << sh) + (d1[1] >> shc);
! 2853: else
! 2854: dd1lo = (*d1 << sh);
! 2855: }
! 2856: else
! 2857: { /* dividend and divisor have the same length */
! 2858: /* dd1 = shiftl(d1,sh) would have left hiremainder==0, and dd1 != 0. */
! 2859: dd1 = (*d1 << sh);
! 2860: if (!(HIGHMASK & dd1)) return 0;
! 2861: if (ld1 > 3)
! 2862: {
! 2863: dd1 += (d1[1] >> shc);
! 2864: if (ld1 > 4)
! 2865: dd1lo = (d1[1] << sh) + (d1[2] >> shc);
! 2866: else
! 2867: dd1lo = (d1[1] << sh);
! 2868: }
! 2869: }
! 2870: /* following lines assume d to have 2 or more significant words */
! 2871: dd = (*d << sh) + (d[1] >> shc);
! 2872: if (ld > 4)
! 2873: ddlo = (d[1] << sh) + (d[2] >> shc);
! 2874: else
! 2875: ddlo = (d[1] << sh);
! 2876: }
! 2877: else
! 2878: { /* no shift needed */
! 2879: if (lz) return 0; /* div'd longer than div'r: o'flow automatic */
! 2880: dd1 = *d1;
! 2881: if (!(HIGHMASK & dd1)) return 0;
! 2882: if(ld1 > 3) dd1lo = d1[1];
! 2883: /* assume again that d has another significant word */
! 2884: dd = *d; ddlo = d[1];
! 2885: }
! 2886: #ifdef DEBUG_LEHMER
! 2887: fprintferr(" %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
! 2888: #endif
! 2889:
! 2890: /* First subtraction/division stage. (If a subtraction initially suffices,
! 2891: * we don't divide at all.) If a Jebelean condition is violated, and we
! 2892: * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
! 2893: * give up and ask for a full division. Otherwise we commit the result,
! 2894: * possibly deciding to re-shift immediately afterwards.
! 2895: */
! 2896: dd -= dd1;
! 2897: if (dd < dd1)
! 2898: { /* first quotient known to be == 1 */
! 2899: xv1 = 1UL;
! 2900: if (!dd) /* !(Jebelean condition), extraspecial case */
! 2901: { /* note this can actually happen... Now
! 2902: * q==1 is known, but we underflow already.
! 2903: * OTOH we've just shortened d by a whole word.
! 2904: * Thus we feel pleased with ourselves and
! 2905: * return. (The re-shift code below would
! 2906: * do so anyway.) */
! 2907: *u = 0; *v = *u1 = *v1 = 1UL;
! 2908: return -1; /* Next step will be a full division. */
! 2909: } /* if !(Jebelean) then */
! 2910: }
! 2911: else
! 2912: { /* division indicated */
! 2913: hiremainder = 0;
! 2914: xv1 = 1 + divll(dd, dd1); /* xv1: alternative spelling of `q', here ;) */
! 2915: dd = hiremainder;
! 2916: if (dd < xv1) /* !(Jebelean cond'), non-extra special case */
! 2917: { /* Attempt to complete the division using the
! 2918: * less significant bits, before skipping right
! 2919: * past the 1st loop to the reshift stage. */
! 2920: ddlo = subll(ddlo, mulll(xv1, dd1lo));
! 2921: dd = subllx(dd, hiremainder);
! 2922:
! 2923: /* If we now have an overflow, q was _certainly_ too large. Thanks to
! 2924: * our decision not to get here unless the original dd1 had bits set in
! 2925: * the upper half of the word, however, we now do know that the correct
! 2926: * quotient is in fact q-1. Adjust our data accordingly. */
! 2927: if (overflow)
! 2928: {
! 2929: xv1--;
! 2930: ddlo = addll(ddlo,dd1lo);
! 2931: dd = addllx(dd,dd1); /* overflows again which cancels the borrow */
! 2932: /* ...and fall through to skip=1 below */
! 2933: }
! 2934: else
! 2935: /* Test Jebelean condition anew, at this point using _all_ the extracted
! 2936: * bits we have. This is clutching at straws; we have a more or less
! 2937: * even chance of succeeding this time. Note that if we fail, we really
! 2938: * do not know whether the correct quotient would have been q or some
! 2939: * smaller value. */
! 2940: if (!dd && ddlo < xv1) return 0;
! 2941:
! 2942: /* Otherwise, we now know that q is correct, but we cannot go into the
! 2943: * 1st loop. Raise a flag so we'll remember to skip past the loop.
! 2944: * Get here also after the q-1 adjustment case. */
! 2945: skip = 1;
! 2946: } /* if !(Jebelean) then */
! 2947: }
! 2948: res = 1;
! 2949: #ifdef DEBUG_LEHMER
! 2950: fprintferr(" q = %ld, %lx, %lx\n", xv1, dd1, dd);
! 2951: #endif
! 2952: if (xv1 > vmax)
! 2953: { /* gone past the bound already */
! 2954: *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
! 2955: return res;
! 2956: }
! 2957: xu = 0UL; xv = xu1 = 1UL;
! 2958:
! 2959: /* Some invariants from here across the first loop:
! 2960: *
! 2961: * At this point, and again after we are finished with the first loop and
! 2962: * subsequent conditional, a division and the associated update of the
! 2963: * recurrence matrix have just been carried out completely. The matrix
! 2964: * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
! 2965: * columns), and the current remainder == next divisor (dd at the moment)
! 2966: * is nonzero (it might be zero here, but then skip will have been set).
! 2967: *
! 2968: * After the first loop, or when skip is set already, it will also be the
! 2969: * case that there aren't sufficiently many bits to continue without re-
! 2970: * shifting. If the divisor after reshifting is zero, or indeed if it
! 2971: * doesn't have more than half a word's worth of bits, we will have to
! 2972: * return at that point. Otherwise, we proceed into the second loop.
! 2973: *
! 2974: * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
! 2975: * already reflect the result of applying the current matrix to the old
! 2976: * ddorig:ddlo and dd1orig:dd1lo. (For the first iteration above, this
! 2977: * was easy to achieve, and we didn't even need to peek into the (now
! 2978: * no longer existent!) saved words. After the loop, we'll stop for a
! 2979: * moment to merge in the ddlo,dd1lo contributions.)
! 2980: *
! 2981: * Note that after the first division, even an a priori quotient of 1 cannot
! 2982: * be trusted until we've checked Jebelean's condition -- it cannot be too
! 2983: * large, of course, but it might be too small.
! 2984: */
! 2985:
! 2986: if (!skip)
! 2987: {
! 2988: for(;;)
! 2989: {
! 2990: /* First half of loop divides dd into dd1, and leaves the recurrence
! 2991: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
! 2992: * entries) when successful. */
! 2993: tmpd = dd1 - dd;
! 2994: if (tmpd < dd)
! 2995: { /* quotient suspected to be 1 */
! 2996: #ifdef DEBUG_LEHMER
! 2997: q = 1;
! 2998: #endif
! 2999: tmpu = xu + xu1; /* cannot overflow -- everything bounded by
! 3000: * the original dd during first loop */
! 3001: tmpv = xv + xv1;
! 3002: }
! 3003: else
! 3004: { /* division indicated */
! 3005: hiremainder = 0;
! 3006: q = 1 + divll(tmpd, dd);
! 3007: tmpd = hiremainder;
! 3008: tmpu = xu + q*xu1; /* can't overflow, but may need to be undone */
! 3009: tmpv = xv + q*xv1;
! 3010: }
! 3011:
! 3012: tmp0 = addll(tmpv, xv1);
! 3013: if ((tmpd < tmpu) || overflow ||
! 3014: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
! 3015: break; /* skip ahead to reshift stage */
! 3016: else
! 3017: { /* commit dd1, xu, xv */
! 3018: res++;
! 3019: dd1 = tmpd; xu = tmpu; xv = tmpv;
! 3020: #ifdef DEBUG_LEHMER
! 3021: fprintferr(" q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
! 3022: q, dd, dd1, xu1, xu, xv1, xv);
! 3023: #endif
! 3024: if (xv > vmax)
! 3025: { /* time to return */
! 3026: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 3027: return res;
! 3028: }
! 3029: }
! 3030:
! 3031: /* Second half of loop divides dd1 into dd, and the matrix returns to its
! 3032: * normal arrangement. */
! 3033: tmpd = dd - dd1;
! 3034: if (tmpd < dd1)
! 3035: { /* quotient suspected to be 1 */
! 3036: #ifdef DEBUG_LEHMER
! 3037: q = 1;
! 3038: #endif
! 3039: tmpu = xu1 + xu; /* cannot overflow */
! 3040: tmpv = xv1 + xv;
! 3041: }
! 3042: else
! 3043: { /* division indicated */
! 3044: hiremainder = 0;
! 3045: q = 1 + divll(tmpd, dd1);
! 3046: tmpd = hiremainder;
! 3047: tmpu = xu1 + q*xu;
! 3048: tmpv = xv1 + q*xv;
! 3049: }
! 3050:
! 3051: tmp0 = addll(tmpu, xu);
! 3052: if ((tmpd < tmpv) || overflow ||
! 3053: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
! 3054: break; /* skip ahead to reshift stage */
! 3055: else
! 3056: { /* commit dd, xu1, xv1 */
! 3057: res++;
! 3058: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
! 3059: #ifdef DEBUG_LEHMER
! 3060: fprintferr(" q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
! 3061: q, dd1, dd, xu, xu1, xv, xv1);
! 3062: #endif
! 3063: if (xv1 > vmax)
! 3064: { /* time to return */
! 3065: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 3066: return res;
! 3067: }
! 3068: }
! 3069:
! 3070: } /* end of first loop */
! 3071:
! 3072: /* Intermezzo: update dd:ddlo, dd1:dd1lo. (But not if skip is set.) */
! 3073:
! 3074: if (res&1)
! 3075: {
! 3076: /* after failed division in 1st half of loop:
! 3077: * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
! 3078: * * [ -xu, xu1 ; xv, -xv1 ]
! 3079: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
! 3080: * add the high-order remainders + overflows onto [dd1,dd].)
! 3081: */
! 3082: tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
! 3083: tmp1 = subll(mulll(dd1lo,xv), tmp1);
! 3084: dd1 += subllx(hiremainder, tmp0);
! 3085: tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
! 3086: ddlo = subll(tmp2, mulll(dd1lo,xv1));
! 3087: dd += subllx(tmp0, hiremainder);
! 3088: dd1lo = tmp1;
! 3089: }
! 3090: else
! 3091: {
! 3092: /* after failed division in 2nd half of loop:
! 3093: * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
! 3094: * * [ xu1, -xu ; -xv1, xv ]
! 3095: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
! 3096: * add the high-order remainders + overflows onto [dd,dd1].)
! 3097: */
! 3098: tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
! 3099: tmp1 = subll(tmp1, mulll(dd1lo,xv1));
! 3100: dd += subllx(tmp0, hiremainder);
! 3101: tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
! 3102: dd1lo = subll(mulll(dd1lo,xv), tmp2);
! 3103: dd1 += subllx(hiremainder, tmp0);
! 3104: ddlo = tmp1;
! 3105: }
! 3106: #ifdef DEBUG_LEHMER
! 3107: fprintferr(" %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
! 3108: #endif
! 3109: } /* end of skip-pable section: get here also, with res==1, when there
! 3110: * was a problem immediately after the very first division. */
! 3111:
! 3112: /* Re-shift. Note: the shift count _can_ be zero, viz. under the following
! 3113: * precise conditions: The original dd1 had its topmost bit set, so the 1st
! 3114: * q was 1, and after subtraction, dd had its topmost bit unset. If now
! 3115: * dd==0, we'd have taken the return exit already, so we couldn't have got
! 3116: * here. If not, then it must have been the second division which has gone
! 3117: * amiss (because dd1 was very close to an exact multiple of the remainder
! 3118: * dd value, so this will be very rare). At this point, we'd have a fairly
! 3119: * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
! 3120: * this is not guaranteed to work. Instead of trying, we return at once.
! 3121: * The caller will see to it that the initial subtraction is re-done using
! 3122: * _all_ the bits of both operands, which already helps, and the next round
! 3123: * will either be a full division (if dd occupied a halfword or less), or
! 3124: * another llgcdii() first step. In the latter case, since we try a little
! 3125: * harder during our first step, we may actually be able to fix the problem,
! 3126: * and get here again with improved low-order bits and with another step
! 3127: * under our belt. Otherwise we'll have given up above and forced a full-
! 3128: * blown division.
! 3129: *
! 3130: * If res is even, the shift count _cannot_ be zero. (The first step forces
! 3131: * a zero into the remainder's MSB, and all subsequent remainders will have
! 3132: * inherited it.)
! 3133: *
! 3134: * The re-shift stage exits if the next divisor has at most half a word's
! 3135: * worth of bits.
! 3136: *
! 3137: * For didactic reasons, the second loop will be arranged in the same way
! 3138: * as the first -- beginning with the division of dd into dd1, as if res
! 3139: * was odd. To cater for this, if res is actually even, we swap things
! 3140: * around during reshifting. (During the second loop, the parity of res
! 3141: * does not matter; we know in which half of the loop we are when we decide
! 3142: * to return.)
! 3143: */
! 3144: #ifdef DEBUG_LEHMER
! 3145: fprintferr("(sh)");
! 3146: #endif
! 3147:
! 3148: if (res&1)
! 3149: { /* after odd number of division(s) */
! 3150: if (dd1 && (sh = bfffo(dd1)))
! 3151: {
! 3152: shc = BITS_IN_LONG - sh;
! 3153: dd = (ddlo >> shc) + (dd << sh);
! 3154: if (!(HIGHMASK & dd))
! 3155: {
! 3156: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 3157: return -res; /* full division asked for */
! 3158: }
! 3159: dd1 = (dd1lo >> shc) + (dd1 << sh);
! 3160: }
! 3161: else
! 3162: { /* time to return: <= 1 word left, or sh==0 */
! 3163: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 3164: return res;
! 3165: }
! 3166: }
! 3167: else
! 3168: { /* after even number of divisions */
! 3169: if (dd)
! 3170: {
! 3171: sh = bfffo(dd); /* Known to be positive. */
! 3172: shc = BITS_IN_LONG - sh;
! 3173: /* dd:ddlo will become the new dd1, and v.v. */
! 3174: tmpd = (ddlo >> shc) + (dd << sh);
! 3175: dd = (dd1lo >> shc) + (dd1 << sh);
! 3176: dd1 = tmpd;
! 3177: /* This has completed the swap; now dd is again the current divisor.
! 3178: * The following test originally inspected dd1 -- a most subtle and
! 3179: * most annoying bug. The Management. */
! 3180: if (HIGHMASK & dd)
! 3181: {
! 3182: /* recurrence matrix is the wrong way round; swap it. */
! 3183: tmp0 = xu; xu = xu1; xu1 = tmp0;
! 3184: tmp0 = xv; xv = xv1; xv1 = tmp0;
! 3185: }
! 3186: else
! 3187: {
! 3188: /* recurrence matrix is the wrong way round; fix this. */
! 3189: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 3190: return -res; /* full division asked for */
! 3191: }
! 3192: }
! 3193: else
! 3194: { /* time to return: <= 1 word left */
! 3195: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 3196: return res;
! 3197: }
! 3198: } /* end reshift */
! 3199:
! 3200: #ifdef DEBUG_LEHMER
! 3201: fprintferr(" %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
! 3202: #endif
! 3203:
! 3204: /* The Second Loop. Rip-off of the first, but we now check for overflow
! 3205: * in the recurrences. Returns instead of breaking when we cannot fix the
! 3206: * quotient any longer.
! 3207: */
! 3208:
! 3209: for(;;)
! 3210: {
! 3211: /* First half of loop divides dd into dd1, and leaves the recurrence
! 3212: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
! 3213: * entries) when successful. */
! 3214: tmpd = dd1 - dd;
! 3215: if (tmpd < dd)
! 3216: { /* quotient suspected to be 1 */
! 3217: #ifdef DEBUG_LEHMER
! 3218: q = 1;
! 3219: #endif
! 3220: tmpu = xu + xu1;
! 3221: tmpv = addll(xv, xv1); /* xv,xv1 will overflow first */
! 3222: tmp1 = overflow;
! 3223: }
! 3224: else
! 3225: { /* division indicated */
! 3226: hiremainder = 0;
! 3227: q = 1 + divll(tmpd, dd);
! 3228: tmpd = hiremainder;
! 3229: tmpu = xu + q*xu1;
! 3230: tmpv = addll(xv, mulll(q,xv1));
! 3231: tmp1 = overflow | hiremainder;
! 3232: }
! 3233:
! 3234: tmp0 = addll(tmpv, xv1);
! 3235: if ((tmpd < tmpu) || overflow || tmp1 ||
! 3236: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
! 3237: {
! 3238: /* The recurrence matrix has not yet been warped... */
! 3239: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 3240: break;
! 3241: }
! 3242: /* commit dd1, xu, xv */
! 3243: res++;
! 3244: dd1 = tmpd; xu = tmpu; xv = tmpv;
! 3245: #ifdef DEBUG_LEHMER
! 3246: fprintferr(" q = %ld, %lx, %lx\n", q, dd, dd1);
! 3247: #endif
! 3248: if (xv > vmax)
! 3249: { /* time to return */
! 3250: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 3251: return res;
! 3252: }
! 3253:
! 3254: /* Second half of loop divides dd1 into dd, and the matrix returns to its
! 3255: * normal arrangement. */
! 3256: tmpd = dd - dd1;
! 3257: if (tmpd < dd1)
! 3258: { /* quotient suspected to be 1 */
! 3259: #ifdef DEBUG_LEHMER
! 3260: q = 1;
! 3261: #endif
! 3262: tmpu = xu1 + xu;
! 3263: tmpv = addll(xv1, xv);
! 3264: tmp1 = overflow;
! 3265: }
! 3266: else
! 3267: { /* division indicated */
! 3268: hiremainder = 0;
! 3269: q = 1 + divll(tmpd, dd1);
! 3270: tmpd = hiremainder;
! 3271: tmpu = xu1 + q*xu;
! 3272: tmpv = addll(xv1, mulll(q, xv));
! 3273: tmp1 = overflow | hiremainder;
! 3274: }
! 3275:
! 3276: tmp0 = addll(tmpu, xu);
! 3277: if ((tmpd < tmpv) || overflow || tmp1 ||
! 3278: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
! 3279: {
! 3280: /* The recurrence matrix has not yet been unwarped, so it is
! 3281: * the wrong way round; fix this. */
! 3282: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 3283: break;
! 3284: }
! 3285:
! 3286: res++; /* commit dd, xu1, xv1 */
! 3287: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
! 3288: #ifdef DEBUG_LEHMER
! 3289: fprintferr(" q = %ld, %lx, %lx\n", q, dd1, dd);
! 3290: #endif
! 3291: if (xv1 > vmax)
! 3292: { /* time to return */
! 3293: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 3294: return res;
! 3295: }
! 3296: } /* end of second loop */
! 3297:
! 3298: return res;
! 3299: }
! 3300:
! 3301: /*==================================
! 3302: * invmod(a,b,res)
! 3303: *==================================
! 3304: * If a is invertible, return 1, and set res = a^{ -1 }
! 3305: * Otherwise, return 0, and set res = gcd(a,b)
! 3306: *
! 3307: * This is sufficiently different from bezout() to be implemented separately
! 3308: * instead of having a bunch of extra conditionals in a single function body
! 3309: * to meet both purposes.
! 3310: */
! 3311:
! 3312: int
! 3313: invmod(GEN a, GEN b, GEN *res)
! 3314: {
! 3315: GEN v,v1,d,d1,q,r;
! 3316: long av,av1,lim;
! 3317: long s;
! 3318: ulong g;
! 3319: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
! 3320: int lhmres; /* Lehmer stage return value */
! 3321:
! 3322: if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
! 3323: if (!signe(b)) { *res=absi(a); return 0; }
! 3324: av = avma;
! 3325: if (lgefint(b) == 3) /* single-word affair */
! 3326: {
! 3327: d1 = modiu(a, (ulong)(b[2]));
! 3328: if (d1 == gzero)
! 3329: {
! 3330: if (b[2] == 1L)
! 3331: { *res = gzero; return 1; }
! 3332: else
! 3333: { *res = absi(b); return 0; }
! 3334: }
! 3335: g = xgcduu((ulong)(b[2]), (ulong)(d1[2]), 1, &xv, &xv1, &s);
! 3336: #ifdef DEBUG_LEHMER
! 3337: fprintferr(" <- %lu,%lu\n", (ulong)(b[2]), (ulong)(d1[2]));
! 3338: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
! 3339: #endif
! 3340: avma = av;
! 3341: if (g != 1UL) { *res = utoi(g); return 0; }
! 3342: xv = xv1 % (ulong)(b[2]); if (s < 0) xv = ((ulong)(b[2])) - xv;
! 3343: *res = utoi(xv); return 1;
! 3344: }
! 3345:
! 3346: (void)new_chunk(lgefint(b));
! 3347: d = absi(b); d1 = modii(a,d);
! 3348:
! 3349: v=gzero; v1=gun; /* general case */
! 3350: #ifdef DEBUG_LEHMER
! 3351: fprintferr("INVERT: -------------------------\n");
! 3352: output(d1);
! 3353: #endif
! 3354: av1 = avma; lim = stack_lim(av,1);
! 3355:
! 3356: while (lgefint(d) > 3 && signe(d1))
! 3357: {
! 3358: #ifdef DEBUG_LEHMER
! 3359: fprintferr("Calling Lehmer:\n");
! 3360: #endif
! 3361: lhmres = lgcdii((ulong*)d, (ulong*)d1, &xu, &xu1, &xv, &xv1, MAXULONG);
! 3362: if (lhmres != 0) /* check progress */
! 3363: { /* apply matrix */
! 3364: #ifdef DEBUG_LEHMER
! 3365: fprintferr("Lehmer returned %d [%lu,%lu;%lu,%lu].\n",
! 3366: lhmres, xu, xu1, xv, xv1);
! 3367: #endif
! 3368: if ((lhmres == 1) || (lhmres == -1))
! 3369: {
! 3370: if (xv1 == 1)
! 3371: {
! 3372: r = subii(d,d1); d=d1; d1=r;
! 3373: a = subii(v,v1); v=v1; v1=a;
! 3374: }
! 3375: else
! 3376: {
! 3377: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
! 3378: a = subii(v, mului(xv1,v1)); v=v1; v1=a;
! 3379: }
! 3380: }
! 3381: else
! 3382: {
! 3383: r = subii(muliu(d,xu), muliu(d1,xv));
! 3384: a = subii(muliu(v,xu), muliu(v1,xv));
! 3385: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
! 3386: v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
! 3387: if (lhmres&1)
! 3388: {
! 3389: setsigne(d,-signe(d));
! 3390: setsigne(v,-signe(v));
! 3391: }
! 3392: else
! 3393: {
! 3394: if (signe(d1)) { setsigne(d1,-signe(d1)); }
! 3395: setsigne(v1,-signe(v1));
! 3396: }
! 3397: }
! 3398: }
! 3399: #ifdef DEBUG_LEHMER
! 3400: else
! 3401: fprintferr("Lehmer returned 0.\n");
! 3402: output(d); output(d1); output(v); output(v1);
! 3403: sleep(1);
! 3404: #endif
! 3405:
! 3406: if (lhmres <= 0 && signe(d1))
! 3407: {
! 3408: q = dvmdii(d,d1,&r);
! 3409: #ifdef DEBUG_LEHMER
! 3410: fprintferr("Full division:\n");
! 3411: printf(" q = "); output(q); sleep (1);
! 3412: #endif
! 3413: a = subii(v,mulii(q,v1));
! 3414: v=v1; v1=a;
! 3415: d=d1; d1=r;
! 3416: }
! 3417: if (low_stack(lim, stack_lim(av,1)))
! 3418: {
! 3419: GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
! 3420: if(DEBUGMEM>1) err(warnmem,"invmod");
! 3421: gerepilemany(av1,gptr,4);
! 3422: }
! 3423: } /* end while */
! 3424:
! 3425: /* Postprocessing - final sprint */
! 3426: if (signe(d1))
! 3427: {
! 3428: /* Assertions: lgefint(d)==lgefint(d1)==3, and
! 3429: * gcd(d,d1) is nonzero and fits into one word
! 3430: */
! 3431: g = xxgcduu((ulong)(d[2]), (ulong)(d1[2]), 1, &xu, &xu1, &xv, &xv1, &s);
! 3432: #ifdef DEBUG_LEHMER
! 3433: output(d);output(d1);output(v);output(v1);
! 3434: fprintferr(" <- %lu,%lu\n", (ulong)(d[2]), (ulong)(d1[2]));
! 3435: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
! 3436: #endif
! 3437: if (g != 1UL) { avma = av; *res = utoi(g); return 0; }
! 3438: /* (From the xgcduu() blurb:)
! 3439: * For finishing the multiword modinv, we now have to multiply the
! 3440: * returned matrix (with properly adjusted signs) onto the values
! 3441: * v' and v1' previously obtained from the multiword division steps.
! 3442: * Actually, it is sufficient to take the scalar product of [v',v1']
! 3443: * with [u1,-v1], and change the sign if s==1.
! 3444: */
! 3445: v = subii(muliu(v,xu1),muliu(v1,xv1));
! 3446: if (s > 0) setsigne(v,-signe(v));
! 3447: avma = av; *res = modii(v,b);
! 3448: #ifdef DEBUG_LEHMER
! 3449: output(*res); fprintfderr("============================Done.\n");
! 3450: sleep(1);
! 3451: #endif
! 3452: return 1;
! 3453: }
! 3454: /* get here when the final sprint was skipped (d1 was zero already) */
! 3455: avma = av;
! 3456: if (!egalii(d,gun)) { *res = icopy(d); return 0; }
! 3457: *res = modii(v,b);
! 3458: #ifdef DEBUG_LEHMER
! 3459: output(*res); fprintferr("============================Done.\n");
! 3460: sleep(1);
! 3461: #endif
! 3462: return 1;
! 3463: }
! 3464:
! 3465: /*==================================
! 3466: * bezout(a,b,pu,pv)
! 3467: *==================================
! 3468: * Return g = gcd(a,b) >= 0, and assign GENs u,v through pointers pu,pv
! 3469: * such that g = u*a + v*b.
! 3470: * Special cases:
! 3471: * a == b == 0 ==> pick u=1, v=0
! 3472: * a != 0 == b ==> keep v=0
! 3473: * a == 0 != b ==> keep u=0
! 3474: * |a| == |b| != 0 ==> keep u=0, set v=+-1
! 3475: * Assignments through pu,pv will be suppressed when the corresponding
! 3476: * pointer is NULL (but the computations will happen nonetheless).
! 3477: */
! 3478:
! 3479: GEN
! 3480: bezout(GEN a, GEN b, GEN *pu, GEN *pv)
! 3481: {
! 3482: GEN t,u,u1,v,v1,d,d1,q,r;
! 3483: GEN *pt;
! 3484: long av,av1,lim;
! 3485: long s;
! 3486: int sa, sb;
! 3487: ulong g;
! 3488: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
! 3489: int lhmres; /* Lehmer stage return value */
! 3490:
! 3491: if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
! 3492: s = absi_cmp(a,b);
! 3493: if (s < 0)
! 3494: {
! 3495: t=b; b=a; a=t;
! 3496: pt=pu; pu=pv; pv=pt;
! 3497: }
! 3498: /* now |a| >= |b| */
! 3499:
! 3500: sa = signe(a); sb = signe(b);
! 3501: if (!sb)
! 3502: {
! 3503: if (pv != NULL) *pv = gzero;
! 3504: switch(sa)
! 3505: {
! 3506: case 0:
! 3507: if (pu != NULL) *pu = gun; return gzero;
! 3508: case 1:
! 3509: if (pu != NULL) *pu = gun; return icopy(a);
! 3510: case -1:
! 3511: if (pu != NULL) *pu = negi(gun); return(negi(a));
! 3512: }
! 3513: }
! 3514: if (s == 0) /* |a| == |b| != 0 */
! 3515: {
! 3516: if (pu != NULL) *pu = gzero;
! 3517: if (sb > 0)
! 3518: {
! 3519: if (pv != NULL) *pv = gun; return icopy(b);
! 3520: }
! 3521: else
! 3522: {
! 3523: if (pv != NULL) *pv = negi(gun); return(negi(b));
! 3524: }
! 3525: }
! 3526: /* now |a| > |b| > 0 */
! 3527:
! 3528: if (lgefint(a) == 3) /* single-word affair */
! 3529: {
! 3530: g = xxgcduu((ulong)(a[2]), (ulong)(b[2]), 0, &xu, &xu1, &xv, &xv1, &s);
! 3531: sa = s > 0 ? sa : -sa;
! 3532: sb = s > 0 ? -sb : sb;
! 3533: if (pu != NULL)
! 3534: {
! 3535: if (xu == 0) *pu = gzero; /* can happen when b divides a */
! 3536: else if (xu == 1) *pu = sa < 0 ? negi(gun) : gun;
! 3537: else if (xu == 2) *pu = sa < 0 ? negi(gdeux) : gdeux;
! 3538: else
! 3539: {
! 3540: *pu = cgeti(3);
! 3541: (*pu)[1] = evalsigne(sa)|evallgefint(3);
! 3542: (*pu)[2] = xu;
! 3543: }
! 3544: }
! 3545: if (pv != NULL)
! 3546: {
! 3547: if (xv == 1) *pv = sb < 0 ? negi(gun) : gun;
! 3548: else if (xv == 2) *pv = sb < 0 ? negi(gdeux) : gdeux;
! 3549: else
! 3550: {
! 3551: *pv = cgeti(3);
! 3552: (*pv)[1] = evalsigne(sb)|evallgefint(3);
! 3553: (*pv)[2] = xv;
! 3554: }
! 3555: }
! 3556: if (g == 1) return gun;
! 3557: else if (g == 2) return gdeux;
! 3558: else
! 3559: {
! 3560: r = cgeti(3);
! 3561: r[1] = evalsigne(1)|evallgefint(3);
! 3562: r[2] = g;
! 3563: return r;
! 3564: }
! 3565: }
! 3566:
! 3567: /* general case */
! 3568: av = avma;
! 3569: (void)new_chunk(lgefint(b) + (lgefint(a)<<1)); /* room for u,v,gcd */
! 3570: /* if a is significantly larger than b, calling lgcdii() is not the best
! 3571: * way to start -- reduce a mod b first
! 3572: */
! 3573: if (lgefint(a) > lgefint(b))
! 3574: {
! 3575: d = absi(b), q = dvmdii(absi(a), d, &d1);
! 3576: if (!signe(d1)) /* a == qb */
! 3577: {
! 3578: avma = av;
! 3579: if (pu != NULL) *pu = gzero;
! 3580: if (pv != NULL) *pv = sb < 0 ? negi(gun) : gun;
! 3581: return (icopy(d));
! 3582: }
! 3583: else
! 3584: {
! 3585: u = gzero;
! 3586: u1 = v = gun;
! 3587: v1 = negi(q);
! 3588: }
! 3589: /* if this results in lgefint(d) == 3, will fall past main loop */
! 3590: }
! 3591: else
! 3592: {
! 3593: d = absi(a); d1 = absi(b);
! 3594: u = v1 = gun; u1 = v = gzero;
! 3595: }
! 3596: av1 = avma; lim = stack_lim(av, 1);
! 3597:
! 3598: /* main loop is almost identical to that of invmod() */
! 3599: while (lgefint(d) > 3 && signe(d1))
! 3600: {
! 3601: lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, MAXULONG);
! 3602: if (lhmres != 0) /* check progress */
! 3603: { /* apply matrix */
! 3604: if ((lhmres == 1) || (lhmres == -1))
! 3605: {
! 3606: if (xv1 == 1)
! 3607: {
! 3608: r = subii(d,d1); d=d1; d1=r;
! 3609: a = subii(u,u1); u=u1; u1=a;
! 3610: a = subii(v,v1); v=v1; v1=a;
! 3611: }
! 3612: else
! 3613: {
! 3614: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
! 3615: a = subii(u, mului(xv1,u1)); u=u1; u1=a;
! 3616: a = subii(v, mului(xv1,v1)); v=v1; v1=a;
! 3617: }
! 3618: }
! 3619: else
! 3620: {
! 3621: r = subii(muliu(d,xu), muliu(d1,xv));
! 3622: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
! 3623: a = subii(muliu(u,xu), muliu(u1,xv));
! 3624: u1 = subii(muliu(u,xu1), muliu(u1,xv1)); u = a;
! 3625: a = subii(muliu(v,xu), muliu(v1,xv));
! 3626: v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
! 3627: if (lhmres&1)
! 3628: {
! 3629: setsigne(d,-signe(d));
! 3630: setsigne(u,-signe(u));
! 3631: setsigne(v,-signe(v));
! 3632: }
! 3633: else
! 3634: {
! 3635: if (signe(d1)) { setsigne(d1,-signe(d1)); }
! 3636: setsigne(u1,-signe(u1));
! 3637: setsigne(v1,-signe(v1));
! 3638: }
! 3639: }
! 3640: }
! 3641: if (lhmres <= 0 && signe(d1))
! 3642: {
! 3643: q = dvmdii(d,d1,&r);
! 3644: #ifdef DEBUG_LEHMER
! 3645: fprintferr("Full division:\n");
! 3646: printf(" q = "); output(q); sleep (1);
! 3647: #endif
! 3648: a = subii(u,mulii(q,u1));
! 3649: u=u1; u1=a;
! 3650: a = subii(v,mulii(q,v1));
! 3651: v=v1; v1=a;
! 3652: d=d1; d1=r;
! 3653: }
! 3654: if (low_stack(lim, stack_lim(av,1)))
! 3655: {
! 3656: GEN *gptr[6]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&u; gptr[3]=&u1;
! 3657: gptr[4]=&v; gptr[5]=&v1;
! 3658: if(DEBUGMEM>1) err(warnmem,"bezout");
! 3659: gerepilemany(av1,gptr,6);
! 3660: }
! 3661: } /* end while */
! 3662:
! 3663: /* Postprocessing - final sprint */
! 3664: if (signe(d1))
! 3665: {
! 3666: /* Assertions: lgefint(d)==lgefint(d1)==3, and
! 3667: * gcd(d,d1) is nonzero and fits into one word
! 3668: */
! 3669: g = xxgcduu((ulong)(d[2]), (ulong)(d1[2]), 0, &xu, &xu1, &xv, &xv1, &s);
! 3670: u = subii(muliu(u,xu), muliu(u1, xv));
! 3671: v = subii(muliu(v,xu), muliu(v1, xv));
! 3672: if (s < 0) { sa = -sa; sb = -sb; }
! 3673: avma = av;
! 3674: if (pu != NULL) *pu = sa < 0 ? negi(u) : icopy(u);
! 3675: if (pv != NULL) *pv = sb < 0 ? negi(v) : icopy(v);
! 3676: if (g == 1) return gun;
! 3677: else if (g == 2) return gdeux;
! 3678: else
! 3679: {
! 3680: r = cgeti(3);
! 3681: r[1] = evalsigne(1)|evallgefint(3);
! 3682: r[2] = g;
! 3683: return r;
! 3684: }
! 3685: }
! 3686: /* get here when the final sprint was skipped (d1 was zero already).
! 3687: * Now the matrix is final, and d contains the gcd.
! 3688: */
! 3689: avma = av;
! 3690: if (pu != NULL) *pu = sa < 0 ? negi(u) : icopy(u);
! 3691: if (pv != NULL) *pv = sb < 0 ? negi(v) : icopy(v);
! 3692: return icopy(d);
! 3693: }
! 3694:
! 3695: /*==================================
! 3696: * cbezout(a,b,uu,vv)
! 3697: *==================================
! 3698: * Same as bezout() but for C longs.
! 3699: * Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
! 3700: * such that g = u*a + v*b.
! 3701: * Special cases:
! 3702: * a == b == 0 ==> pick u=1, v=0 (and return 1, surprisingly)
! 3703: * a != 0 == b ==> keep v=0
! 3704: * a == 0 != b ==> keep u=0
! 3705: * |a| == |b| != 0 ==> keep u=0, set v=+-1
! 3706: * Assignments through uu,vv happen unconditionally; non-NULL pointers
! 3707: * _must_ be used.
! 3708: */
! 3709: long
! 3710: cbezout(long a,long b,long *uu,long *vv)
! 3711: {
! 3712: long s,*t;
! 3713: ulong d = labs(a), d1 = labs(b);
! 3714: ulong r,u,u1,v,v1;
! 3715:
! 3716: #ifdef DEBUG_CBEZOUT
! 3717: fprintferr("> cbezout(%ld,%ld,%p,%p)\n", a, b, (void *)uu, (void *)vv);
! 3718: #endif
! 3719: if (!b)
! 3720: {
! 3721: *vv=0L;
! 3722: if (!a)
! 3723: {
! 3724: *uu=1L;
! 3725: #ifdef DEBUG_CBEZOUT
! 3726: fprintferr("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
! 3727: #endif
! 3728: return 0L;
! 3729: }
! 3730: *uu = a < 0 ? -1L : 1L;
! 3731: #ifdef DEBUG_CBEZOUT
! 3732: fprintferr("< %ld (%ld, %ld)\n", (long)d, *uu, *vv);
! 3733: #endif
! 3734: return (long)d;
! 3735: }
! 3736: else if (!a || (d == d1))
! 3737: {
! 3738: *uu = 0L; *vv = b < 0 ? -1L : 1L;
! 3739: #ifdef DEBUG_CBEZOUT
! 3740: fprintferr("< %ld (%ld, %ld)\n", (long)d1, *uu, *vv);
! 3741: #endif
! 3742: return (long)d1;
! 3743: }
! 3744: else if (d == 1) /* frequently used by nfinit */
! 3745: {
! 3746: *uu = a; *vv = 0L;
! 3747: #ifdef DEBUG_CBEZOUT
! 3748: fprintferr("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
! 3749: #endif
! 3750: return 1L;
! 3751: }
! 3752: else if (d < d1)
! 3753: {
! 3754: /* bug in gcc-2.95.3:
! 3755: * s = a; a = b; b = s; produces wrong result a = b. This is OK: */
! 3756: { long _x = a; a = b; b = _x; } /* in order to keep the right signs */
! 3757: r = d; d = d1; d1 = r;
! 3758: t = uu; uu = vv; vv = t;
! 3759: #ifdef DEBUG_CBEZOUT
! 3760: fprintferr(" swapping\n");
! 3761: #endif
! 3762: }
! 3763: /* d > d1 > 0 */
! 3764: r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
! 3765: if (s < 0)
! 3766: {
! 3767: *uu = a < 0 ? u : -(long)u;
! 3768: *vv = b < 0 ? -(long)v : v;
! 3769: }
! 3770: else
! 3771: {
! 3772: *uu = a < 0 ? -(long)u : u;
! 3773: *vv = b < 0 ? v : -(long)v;
! 3774: }
! 3775: #ifdef DEBUG_CBEZOUT
! 3776: fprintferr("< %ld (%ld, %ld)\n", (long)r, *uu, *vv);
! 3777: #endif
! 3778: return (long)r;
! 3779: }
! 3780:
! 3781: /*==========================================================
! 3782: * ratlift(GEN x, GEN m, GEN *a, GEN *b, GEN amax, GEN bmax)
! 3783: *==========================================================
! 3784: * Reconstruct rational number from its residue x mod m
! 3785: * Given t_INT x, m, amax>=0, bmax>0 such that
! 3786: * 0 <= x < m; 2*amax*bmax < m
! 3787: * attempts to find t_INT a, b such that
! 3788: * (1) a = b*x (mod m)
! 3789: * (2) |a| <= amax, 0 < b <= bmax
! 3790: * (3) gcd(m, b) = gcd(a, b)
! 3791: * If unsuccessful, it will return 0 and leave a,b unchanged (and
! 3792: * caller may deduce no such a,b exist). If successful, sets a,b
! 3793: * and returns 1. If there exist a,b satisfying (1), (2), and
! 3794: * (3') gcd(m, b) = 1
! 3795: * then they are uniquely determined subject to (1),(2) and
! 3796: * (3'') gcd(a, b) = 1,
! 3797: * and will be returned by the routine. (The caller may wish to
! 3798: * check gcd(a,b)==1, either directly or based on known prime
! 3799: * divisors of m, depending on the application.)
! 3800: * Reference:
! 3801: @article {MR97c:11116,
! 3802: AUTHOR = {Collins, George E. and Encarnaci{\'o}n, Mark J.},
! 3803: TITLE = {Efficient rational number reconstruction},
! 3804: JOURNAL = {J. Symbolic Comput.},
! 3805: FJOURNAL = {Journal of Symbolic Computation},
! 3806: VOLUME = {20},
! 3807: YEAR = {1995},
! 3808: NUMBER = {3},
! 3809: PAGES = {287--297},
! 3810: ISSN = {0747-7171},
! 3811: MRCLASS = {11Y16 (68Q40)},
! 3812: MRNUMBER = {97c:11116},
! 3813: MRREVIEWER = {Maurice Mignotte},
! 3814: }
! 3815: * Preprint available from:
! 3816: * ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1994/94-64.ps.gz
! 3817: */
! 3818:
! 3819: /* #define DEBUG_RATLIFT */
! 3820:
! 3821: int
! 3822: ratlift(GEN x, GEN m, GEN *a, GEN *b, GEN amax, GEN bmax)
! 3823: {
! 3824: GEN d,d1,v,v1,q,r;
! 3825: ulong av = avma, av1, lim;
! 3826: long lb,lr,lbb,lbr,s,s0;
! 3827: ulong vmax;
! 3828: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
! 3829: int lhmres; /* Lehmer stage return value */
! 3830:
! 3831: if ((typ(x) | typ(m) | typ(amax) | typ(bmax)) != t_INT) err(arither1);
! 3832: if (signe(bmax) <= 0)
! 3833: err(talker, "ratlift: bmax must be > 0, found\n\tbmax=%Z\n", bmax);
! 3834: if (signe(amax) < 0)
! 3835: err(talker, "ratilft: amax must be >= 0, found\n\tamax=%Z\n", amax);
! 3836: /* check 2*amax*bmax < m */
! 3837: if (cmpii(shifti(mulii(amax, bmax), 1), m) >= 0)
! 3838: err(talker, "ratlift: must have 2*amax*bmax < m, found\n\tamax=%Z\n\tbmax=%Z\n\tm=%Z\n", amax,bmax,m);
! 3839: /* we _could_ silently replace x with modii(x,m) instead of the following,
! 3840: * but let's leave this up to the caller
! 3841: */
! 3842: avma = av; s = signe(x);
! 3843: if (s < 0 || cmpii(x,m) >= 0)
! 3844: err(talker, "ratlift: must have 0 <= x < m, found\n\tx=%Z\n\tm=%Z\n", x,m);
! 3845:
! 3846: /* special cases x=0 and/or amax=0 */
! 3847: if (s == 0)
! 3848: {
! 3849: if (a != NULL) *a = gzero;
! 3850: if (b != NULL) *b = gun;
! 3851: return 1;
! 3852: }
! 3853: else if (signe(amax)==0)
! 3854: return 0;
! 3855: /* assert: m > x > 0, amax > 0 */
! 3856:
! 3857: /* check here whether a=x, b=1 is a solution */
! 3858: if (cmpii(x,amax) <= 0)
! 3859: {
! 3860: if (a != NULL) *a = icopy(x);
! 3861: if (b != NULL) *b = gun;
! 3862: return 1;
! 3863: }
! 3864:
! 3865: /* There is no special case for single-word numbers since this is
! 3866: * mainly meant to be used with large moduli.
! 3867: */
! 3868: (void)new_chunk(lgefint(bmax) + lgefint(amax)); /* room for a,b */
! 3869: d = m; d1 = x;
! 3870: v = gzero; v1 = gun;
! 3871: /* assert d1 > amax, v1 <= bmax here */
! 3872: lb = lgefint(bmax);
! 3873: lbb = bfffo(bmax[2]);
! 3874: s = 1;
! 3875: av1 = avma; lim = stack_lim(av, 1);
! 3876:
! 3877: /* general case: Euclidean division chain starting with m div x, and
! 3878: * with bounds on the sequence of convergents' denoms v_j.
! 3879: * Just to be different from what invmod and bezout are doing, we work
! 3880: * here with the all-nonnegative matrices [u,u1;v,v1]=prod_j([0,1;1,q_j]).
! 3881: * Loop invariants:
! 3882: * (a) (sign)*[-v,v1]*x = [d,d1] (mod m) (componentwise)
! 3883: * (sign initially +1, changes with each Euclidean step)
! 3884: * so [a,b] will be obtained in the form [-+d,v] or [+-d1,v1];
! 3885: * this congruence is a consequence of
! 3886: * (b) [x,m]~ = [u,u1;v,v1]*[d1,d]~,
! 3887: * where u,u1 is the usual numerator sequence starting with 1,0
! 3888: * instead of 0,1 (just multiply the eqn on the left by the inverse
! 3889: * matrix, which is det*[v1,-u1;-v,u], where "det" is the same as the
! 3890: * "(sign)" in (a)). From m = v*d1 + v1*d and
! 3891: * (c) d > d1 >= 0, 0 <= v < v1,
! 3892: * we have d >= m/(2*v1), so while v1 remains smaller than m/(2*amax),
! 3893: * the pair [-(sign)*d,v] satisfies (1) but violates (2) (d > amax).
! 3894: * Conversely, v1 > bmax indicates that no further solutions will be
! 3895: * forthcoming; [-(sign)*d,v] will be the last, and first, candidate.
! 3896: * Thus there's at most one point in the chain division where a solution
! 3897: * can live: v < bmax, v1 >= m/(2*amax) > bmax, and this is acceptable
! 3898: * iff in fact d <= amax (e.g. m=221, x=34 or 35, amax=bmax=10 fail on
! 3899: * this count while x=32,33,36,37 succeed). However, a division may leave
! 3900: * a zero residue before we ever reach this point (consider m=210, x=35,
! 3901: * amax=bmax=10), and our caller may find that gcd(d,v) > 1 (numerous
! 3902: * examples -- keep m=210 and consider any of x=29,31,32,33,34,36,37,38,
! 3903: * 39,40,41).
! 3904: * Furthermore, at the start of the loop body we have in fact
! 3905: * (c') 0 <= v < v1 <= bmax, d > d1 > amax >= 0,
! 3906: * (and are never done already).
! 3907: *
! 3908: * Main loop is similar to those of invmod() and bezout(), except for
! 3909: * having to determine appropriate vmax bounds, and checking termination
! 3910: * conditions. The signe(d1) condition is only for paranoia
! 3911: */
! 3912: while (lgefint(d) > 3 && signe(d1))
! 3913: {
! 3914: /* determine vmax for lgcdii so as to ensure v won't overshoot.
! 3915: * If v+v1 > bmax, the next step would take v1 beyond the limit, so
! 3916: * since [+-d1,v1] is not a solution, we give up. Otherwise if v+v1
! 3917: * is way shorter than bmax, use vmax=MAXULUNG. Otherwise, set vmax
! 3918: * to a crude lower approximation of bmax/(v+v1), or to 1, which will
! 3919: * allow the inner loop to do one step
! 3920: */
! 3921: r = addii(v,v1);
! 3922: lr = lb - lgefint(r);
! 3923: lbr = bfffo(r[2]);
! 3924: if (cmpii(r,bmax) > 0) /* done, not found */
! 3925: {
! 3926: avma = av;
! 3927: return 0;
! 3928: }
! 3929: else if (lr > 1) /* still more than a word's worth to go */
! 3930: {
! 3931: vmax = MAXULONG;
! 3932: }
! 3933: else /* take difference of bit lengths */
! 3934: {
! 3935: lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
! 3936: if (lr > BITS_IN_LONG)
! 3937: vmax = MAXULONG;
! 3938: else if (lr == 0)
! 3939: vmax = 1UL;
! 3940: else
! 3941: vmax = 1UL << (lr-1);
! 3942: /* the latter is pessimistic but faster than a division */
! 3943: }
! 3944: /* do a Lehmer-Jebelean round */
! 3945: lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, vmax);
! 3946: if (lhmres != 0) /* check progress */
! 3947: { /* apply matrix */
! 3948: if ((lhmres == 1) || (lhmres == -1))
! 3949: {
! 3950: s = -s;
! 3951: if (xv1 == 1)
! 3952: {
! 3953: /* re-use v+v1 computed above */
! 3954: v=v1; v1=r;
! 3955: r = subii(d,d1); d=d1; d1=r;
! 3956: }
! 3957: else
! 3958: {
! 3959: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
! 3960: r = addii(v, mului(xv1,v1)); v=v1; v1=r;
! 3961: }
! 3962: }
! 3963: else
! 3964: {
! 3965: r = subii(muliu(d,xu), muliu(d1,xv));
! 3966: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
! 3967: r = addii(muliu(v,xu), muliu(v1,xv));
! 3968: v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
! 3969: if (lhmres&1)
! 3970: {
! 3971: setsigne(d,-signe(d));
! 3972: s = -s;
! 3973: }
! 3974: else if (signe(d1))
! 3975: {
! 3976: setsigne(d1,-signe(d1));
! 3977: }
! 3978: }
! 3979: /* check whether we're done. Assert v <= bmax here. Examine v1:
! 3980: * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
! 3981: * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise
! 3982: * proceed.
! 3983: */
! 3984: if (cmpii(v1,bmax) > 0) /* certainly done */
! 3985: {
! 3986: avma = av;
! 3987: if (cmpii(d,amax) <= 0) /* done, found */
! 3988: {
! 3989: if (a != NULL)
! 3990: {
! 3991: *a = icopy(d);
! 3992: setsigne(*a,-s); /* sign opposite to s */
! 3993: }
! 3994: if (b != NULL) *b = icopy(v);
! 3995: return 1;
! 3996: }
! 3997: else /* done, not found */
! 3998: return 0;
! 3999: }
! 4000: else if (cmpii(d1,amax) <= 0) /* also done, found */
! 4001: {
! 4002: avma = av;
! 4003: if (a != NULL)
! 4004: {
! 4005: if (signe(d1))
! 4006: {
! 4007: *a = icopy(d1);
! 4008: setsigne(*a,s); /* same sign as s */
! 4009: }
! 4010: else
! 4011: *a = gzero;
! 4012: }
! 4013: if (b != NULL) *b = icopy(v1);
! 4014: return 1;
! 4015: }
! 4016: } /* lhmres != 0 */
! 4017:
! 4018: if (lhmres <= 0 && signe(d1))
! 4019: {
! 4020: q = dvmdii(d,d1,&r);
! 4021: #ifdef DEBUG_LEHMER
! 4022: fprintferr("Full division:\n");
! 4023: printf(" q = "); output(q); sleep (1);
! 4024: #endif
! 4025: d=d1; d1=r;
! 4026: r = addii(v,mulii(q,v1));
! 4027: v=v1; v1=r;
! 4028: s = -s;
! 4029: /* check whether we are done now. Since we weren't before the div, it
! 4030: * suffices to examine v1 and d1 -- the new d (former d1) cannot cut it
! 4031: */
! 4032: if (cmpii(v1,bmax) > 0) /* done, not found */
! 4033: {
! 4034: avma = av;
! 4035: return 0;
! 4036: }
! 4037: else if (cmpii(d1,amax) <= 0) /* done, found */
! 4038: {
! 4039: avma = av;
! 4040: if (a != NULL)
! 4041: {
! 4042: if (signe(d1))
! 4043: {
! 4044: *a = icopy(d1);
! 4045: setsigne(*a,s); /* same sign as s */
! 4046: }
! 4047: else
! 4048: *a = gzero;
! 4049: }
! 4050: if (b != NULL) *b = icopy(v1);
! 4051: return 1;
! 4052: }
! 4053: }
! 4054:
! 4055: if (low_stack(lim, stack_lim(av,1)))
! 4056: {
! 4057: GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
! 4058: if(DEBUGMEM>1) err(warnmem,"ratlift");
! 4059: gerepilemany(av1,gptr,4);
! 4060: }
! 4061: } /* end while */
! 4062:
! 4063: /* Postprocessing - final sprint. Since we usually underestimate vmax,
! 4064: * this function needs a loop here instead of a simple conditional.
! 4065: * Note we can only get here when amax fits into one word (which will
! 4066: * typically not be the case!). The condition is bogus -- d1 is never
! 4067: * zero at the start of the loop. There will be at most a few iterations,
! 4068: * so we don't bother collecting garbage
! 4069: */
! 4070: while (signe(d1))
! 4071: {
! 4072: /* Assertions: lgefint(d)==lgefint(d1)==3.
! 4073: * Moreover, we aren't done already, or we would have returned by now.
! 4074: * Recompute vmax...
! 4075: */
! 4076: #ifdef DEBUG_RATLIFT
! 4077: fprintferr("rl-fs: d,d1=%Z,%Z\n", d, d1);
! 4078: fprintferr("rl-fs: v,v1=%Z,%Z\n", v, v1);
! 4079: #endif
! 4080: r = addii(v,v1);
! 4081: lr = lb - lgefint(r);
! 4082: lbr = bfffo(r[2]);
! 4083: if (cmpii(r,bmax) > 0) /* done, not found */
! 4084: {
! 4085: avma = av;
! 4086: return 0;
! 4087: }
! 4088: else if (lr > 1) /* still more than a word's worth to go */
! 4089: {
! 4090: vmax = MAXULONG; /* (cannot in fact happen) */
! 4091: }
! 4092: else /* take difference of bit lengths */
! 4093: {
! 4094: lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
! 4095: if (lr > BITS_IN_LONG)
! 4096: vmax = MAXULONG;
! 4097: else if (lr == 0)
! 4098: vmax = 1UL;
! 4099: else
! 4100: vmax = 1UL << (lr-1); /* as above */
! 4101: }
! 4102: #ifdef DEBUG_RATLIFT
! 4103: fprintferr("rl-fs: vmax=%lu\n", vmax);
! 4104: #endif
! 4105: /* single-word "Lehmer", discarding the gcd or whatever it returns */
! 4106: (void)rgcduu((ulong)d[2], (ulong)d1[2], vmax, &xu, &xu1, &xv, &xv1, &s0);
! 4107: #ifdef DEBUG_RATLIFT
! 4108: fprintferr("rl-fs: [%lu,%lu; %lu,%lu] %s\n",
! 4109: xu, xu1, xv, xv1,
! 4110: s0 < 0 ? "-" : "+");
! 4111: #endif
! 4112: if (xv1 == 1) /* avoid multiplications */
! 4113: {
! 4114: /* re-use v+v1 computed above */
! 4115: v=v1; v1=r;
! 4116: r = subii(d,d1); d=d1; d1=r;
! 4117: s = -s;
! 4118: }
! 4119: else if (xu == 0) /* and xv==1, xu1==1, xv1 > 1 */
! 4120: {
! 4121: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
! 4122: r = addii(v, mului(xv1,v1)); v=v1; v1=r;
! 4123: s = -s;
! 4124: }
! 4125: else
! 4126: {
! 4127: r = subii(muliu(d,xu), muliu(d1,xv));
! 4128: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
! 4129: r = addii(muliu(v,xu), muliu(v1,xv));
! 4130: v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
! 4131: if (s0 < 0)
! 4132: {
! 4133: setsigne(d,-signe(d));
! 4134: s = -s;
! 4135: }
! 4136: else if (signe(d1)) /* sic: might vanish now */
! 4137: {
! 4138: setsigne(d1,-signe(d1));
! 4139: }
! 4140: }
! 4141: /* check whether we're done, as above. Assert v <= bmax. Examine v1:
! 4142: * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
! 4143: * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise proceed.
! 4144: */
! 4145: if (cmpii(v1,bmax) > 0) /* certainly done */
! 4146: {
! 4147: avma = av;
! 4148: if (cmpii(d,amax) <= 0) /* done, found */
! 4149: {
! 4150: if (a != NULL)
! 4151: {
! 4152: *a = icopy(d);
! 4153: setsigne(*a,-s); /* sign opposite to s */
! 4154: }
! 4155: if (b != NULL) *b = icopy(v);
! 4156: return 1;
! 4157: }
! 4158: else /* done, not found */
! 4159: return 0;
! 4160: }
! 4161: else if (cmpii(d1,amax) <= 0) /* also done, found */
! 4162: {
! 4163: avma = av;
! 4164: if (a != NULL)
! 4165: {
! 4166: if (signe(d1))
! 4167: {
! 4168: *a = icopy(d1);
! 4169: setsigne(*a,s); /* same sign as s */
! 4170: }
! 4171: else
! 4172: *a = gzero;
! 4173: }
! 4174: if (b != NULL) *b = icopy(v1);
! 4175: return 1;
! 4176: }
! 4177: } /* while */
! 4178:
! 4179: /* get here when we have run into d1 == 0 before returning... in fact,
! 4180: * this cannot happen.
! 4181: */
! 4182: err(talker, "ratlift failed to catch d1 == 0\n");
! 4183: /* NOTREACHED */
! 4184: return 0;
! 4185: }
! 4186:
! 4187: /********************************************************************/
! 4188: /** **/
! 4189: /** INTEGER SQUARE ROOT **/
! 4190: /** **/
! 4191: /********************************************************************/
! 4192:
! 4193: /* sqrt()'s result may be off by 1 when a is not representable exactly as a
! 4194: * double [64bit machine] */
! 4195: ulong
! 4196: usqrtsafe(ulong a)
! 4197: {
! 4198: ulong x = (ulong)sqrt((double)a);
! 4199: #ifdef LONG_IS_64BIT
! 4200: ulong y = x+1; if (y <= MAXHALFULONG && y*y <= a) x = y;
! 4201: #endif
! 4202: return x;
! 4203: }
! 4204:
! 4205: /* Assume a >= 0 has <= 4 words, return trunc[sqrt(a)] */
! 4206: ulong
! 4207: mpsqrtl(GEN a)
! 4208: {
! 4209: long l = lgefint(a);
! 4210: ulong x,y,z,k,m;
! 4211: int L, s;
! 4212:
! 4213: if (l <= 3) return l==2? 0: usqrtsafe((ulong)a[2]);
! 4214: s = bfffo(a[2]);
! 4215: if (s > 1)
! 4216: {
! 4217: s &= ~1UL; /* make it even */
! 4218: z = BITS_IN_LONG - s;
! 4219: m = (ulong)(a[2] << s) | (a[3] >> z);
! 4220: L = z>>1;
! 4221: }
! 4222: else
! 4223: {
! 4224: m = (ulong)a[2];
! 4225: L = BITS_IN_LONG/2;
! 4226: }
! 4227: /* m = BIL-1 (bffo odd) or BIL first bits of a */
! 4228: k = (long) sqrt((double)m);
! 4229: if (L == BITS_IN_LONG/2 && k == MAXHALFULONG)
! 4230: x = MAXULONG;
! 4231: else
! 4232: x = (k+1) << L;
! 4233: do
! 4234: {
! 4235: LOCAL_HIREMAINDER;
! 4236: LOCAL_OVERFLOW;
! 4237: hiremainder = a[2];
! 4238: if (hiremainder >= x) return x;
! 4239: z = addll(x, divll(a[3],x));
! 4240: z >>= 1; if (overflow) z |= HIGHBIT;
! 4241: y = x; x = z;
! 4242: }
! 4243: while (x < y);
! 4244: return y;
! 4245: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>