Annotation of OpenXM_contrib/pari/src/kernel/none/mp.c, Revision 1.1
1.1 ! maekawa 1: /***********************************************************************/
! 2: /** **/
! 3: /** MULTIPRECISION KERNEL **/
! 4: /** **/
! 5: /***********************************************************************/
! 6: /* $Id: mp.c,v 1.1.1.1 1999/09/16 13:47:57 karim Exp $ */
! 7: /* most of the routines in this file are commented out in 68k */
! 8: /* version (#ifdef __M68K__) since they are defined in mp.s */
! 9: #include "pari.h"
! 10:
! 11: /* z2 := z1[imin..imax].f shifted left sh bits (feeding f from the right) */
! 12: #define shift_left2(z2,z1,imin,imax,f, sh,m) {\
! 13: register ulong _i,_l,_k = ((ulong)f)>>m;\
! 14: for (_i=imax; _i>imin; _i--) {\
! 15: _l = z1[_i];\
! 16: z2[_i] = (_l<<(ulong)sh) | _k;\
! 17: _k = _l>>(ulong)m;\
! 18: }\
! 19: z2[imin]=(z1[imin]<<(ulong)sh) | _k;\
! 20: }
! 21: #define shift_left(z2,z1,imin,imax,f, sh) {\
! 22: register const ulong _m = BITS_IN_LONG - sh;\
! 23: shift_left2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
! 24: }
! 25:
! 26: /* z2 := f.z1[imin..imax-1] shifted right sh bits (feeding f from the left) */
! 27: #define shift_right2(z2,z1,imin,imax,f, sh,m) {\
! 28: register ulong _i,_k,_l = z1[2];\
! 29: z2[imin] = (_l>>(ulong)sh) | ((ulong)f<<(ulong)m);\
! 30: for (_i=imin+1; _i<imax; _i++) {\
! 31: _k = _l<<(ulong)m; _l = z1[_i];\
! 32: z2[_i] = (_l>>(ulong)sh) | _k;\
! 33: }\
! 34: }
! 35: #define shift_right(z2,z1,imin,imax,f, sh) {\
! 36: register const ulong _m = BITS_IN_LONG - sh;\
! 37: shift_right2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
! 38: }
! 39:
! 40: #ifdef INLINE
! 41: INLINE
! 42: #endif
! 43: GEN
! 44: addsispec(long s, GEN x, long nx)
! 45: {
! 46: GEN xd, zd = (GEN)avma;
! 47: long lz;
! 48: LOCAL_OVERFLOW;
! 49:
! 50: lz = nx+3; (void)new_chunk(lz);
! 51: xd = x + nx;
! 52: *--zd = addll(*--xd, s);
! 53: if (overflow)
! 54: for(;;)
! 55: {
! 56: if (xd == x) { *--zd = 1; break; } /* enlarge z */
! 57: *--zd = ((ulong)*--xd) + 1;
! 58: if (*zd) { lz--; break; }
! 59: }
! 60: else lz--;
! 61: while (xd > x) *--zd = *--xd;
! 62: *--zd = evalsigne(1) | evallgefint(lz);
! 63: *--zd = evaltyp(t_INT) | evallg(lz);
! 64: avma=(long)zd; return zd;
! 65: }
! 66:
! 67: #define swapspec(x,y, nx,ny) {long _a=nx;GEN _z=x; nx=ny; ny=_a; x=y; y=_z;}
! 68:
! 69: #ifdef INLINE
! 70: INLINE
! 71: #endif
! 72: GEN
! 73: addiispec(GEN x, GEN y, long nx, long ny)
! 74: {
! 75: GEN xd,yd,zd;
! 76: long lz;
! 77: LOCAL_OVERFLOW;
! 78:
! 79: if (nx < ny) swapspec(x,y, nx,ny);
! 80: if (ny == 1) return addsispec(*y,x,nx);
! 81: zd = (GEN)avma;
! 82: lz = nx+3; (void)new_chunk(lz);
! 83: xd = x + nx;
! 84: yd = y + ny;
! 85: *--zd = addll(*--xd, *--yd);
! 86: while (yd > y) *--zd = addllx(*--xd, *--yd);
! 87: if (overflow)
! 88: for(;;)
! 89: {
! 90: if (xd == x) { *--zd = 1; break; } /* enlarge z */
! 91: *--zd = ((ulong)*--xd) + 1;
! 92: if (*zd) { lz--; break; }
! 93: }
! 94: else lz--;
! 95: while (xd > x) *--zd = *--xd;
! 96: *--zd = evalsigne(1) | evallgefint(lz);
! 97: *--zd = evaltyp(t_INT) | evallg(lz);
! 98: avma=(long)zd; return zd;
! 99: }
! 100:
! 101: /* assume x >= y */
! 102: #ifdef INLINE
! 103: INLINE
! 104: #endif
! 105: GEN
! 106: subisspec(GEN x, long s, long nx)
! 107: {
! 108: GEN xd, zd = (GEN)avma;
! 109: long lz;
! 110: LOCAL_OVERFLOW;
! 111:
! 112: lz = nx+2; (void)new_chunk(lz);
! 113: xd = x + nx;
! 114: *--zd = subll(*--xd, s);
! 115: if (overflow)
! 116: for(;;)
! 117: {
! 118: *--zd = ((ulong)*--xd) - 1;
! 119: if (*xd) break;
! 120: }
! 121: if (xd == x)
! 122: while (*zd == 0) { zd++; lz--; } /* shorten z */
! 123: else
! 124: do *--zd = *--xd; while (xd > x);
! 125: *--zd = evalsigne(1) | evallgefint(lz);
! 126: *--zd = evaltyp(t_INT) | evallg(lz);
! 127: avma=(long)zd; return zd;
! 128: }
! 129:
! 130: /* assume x >= y */
! 131: #ifdef INLINE
! 132: INLINE
! 133: #endif
! 134: GEN
! 135: subiispec(GEN x, GEN y, long nx, long ny)
! 136: {
! 137: GEN xd,yd,zd;
! 138: long lz;
! 139: LOCAL_OVERFLOW;
! 140:
! 141: if (ny==1) return subisspec(x,*y,nx);
! 142: zd = (GEN)avma;
! 143: lz = nx+2; (void)new_chunk(lz);
! 144: xd = x + nx;
! 145: yd = y + ny;
! 146: *--zd = subll(*--xd, *--yd);
! 147: while (yd > y) *--zd = subllx(*--xd, *--yd);
! 148: if (overflow)
! 149: for(;;)
! 150: {
! 151: *--zd = ((ulong)*--xd) - 1;
! 152: if (*xd) break;
! 153: }
! 154: if (xd == x)
! 155: while (*zd == 0) { zd++; lz--; } /* shorten z */
! 156: else
! 157: do *--zd = *--xd; while (xd > x);
! 158: *--zd = evalsigne(1) | evallgefint(lz);
! 159: *--zd = evaltyp(t_INT) | evallg(lz);
! 160: avma=(long)zd; return zd;
! 161: }
! 162:
! 163: #ifndef __M68K__
! 164:
! 165: /* prototype of positive small ints */
! 166: static long pos_s[] = {
! 167: evaltyp(t_INT) | m_evallg(3), evalsigne(1) | evallgefint(3), 0 };
! 168:
! 169: /* prototype of negative small ints */
! 170: static long neg_s[] = {
! 171: evaltyp(t_INT) | m_evallg(3), evalsigne(-1) | evallgefint(3), 0 };
! 172:
! 173: void
! 174: affir(GEN x, GEN y)
! 175: {
! 176: const long s=signe(x),ly=lg(y);
! 177: long lx,sh,i;
! 178:
! 179: if (!s)
! 180: {
! 181: y[1] = evalexpo(-bit_accuracy(ly));
! 182: y[2] = 0; return;
! 183: }
! 184:
! 185: lx=lgefint(x); sh=bfffo(x[2]);
! 186: y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
! 187: if (sh)
! 188: {
! 189: if (lx>ly)
! 190: { shift_left(y,x,2,ly-1, x[ly],sh); }
! 191: else
! 192: {
! 193: for (i=lx; i<ly; i++) y[i]=0;
! 194: shift_left(y,x,2,lx-1, 0,sh);
! 195: }
! 196: return;
! 197: }
! 198:
! 199: if (lx>=ly)
! 200: for (i=2; i<ly; i++) y[i]=x[i];
! 201: else
! 202: {
! 203: for (i=2; i<lx; i++) y[i]=x[i];
! 204: for ( ; i<ly; i++) y[i]=0;
! 205: }
! 206: }
! 207:
! 208: void
! 209: affrr(GEN x, GEN y)
! 210: {
! 211: long lx,ly,i;
! 212:
! 213: y[1] = x[1]; if (!signe(x)) { y[2]=0; return; }
! 214:
! 215: lx=lg(x); ly=lg(y);
! 216: if (lx>=ly)
! 217: for (i=2; i<ly; i++) y[i]=x[i];
! 218: else
! 219: {
! 220: for (i=2; i<lx; i++) y[i]=x[i];
! 221: for ( ; i<ly; i++) y[i]=0;
! 222: }
! 223: }
! 224:
! 225: GEN
! 226: shifti(GEN x, long n)
! 227: {
! 228: const long s=signe(x);
! 229: long lx,ly,i,m;
! 230: GEN y;
! 231:
! 232: if (!s) return gzero;
! 233: if (!n) return icopy(x);
! 234: lx = lgefint(x);
! 235: if (n > 0)
! 236: {
! 237: GEN z = (GEN)avma;
! 238: long d = n>>TWOPOTBITS_IN_LONG;
! 239:
! 240: ly = lx+d; y = new_chunk(ly);
! 241: for ( ; d; d--) *--z = 0;
! 242: m = n & (BITS_IN_LONG-1);
! 243: if (!m) for (i=2; i<lx; i++) y[i]=x[i];
! 244: else
! 245: {
! 246: register const ulong sh = BITS_IN_LONG - m;
! 247: shift_left2(y,x, 2,lx-1, 0,m,sh);
! 248: i = ((ulong)x[2]) >> sh;
! 249: if (i) { ly++; y = new_chunk(1); y[2] = i; }
! 250: }
! 251: }
! 252: else
! 253: {
! 254: n = -n;
! 255: ly = lx - (n>>TWOPOTBITS_IN_LONG);
! 256: if (ly<3) return gzero;
! 257: y = new_chunk(ly);
! 258: m = n & (BITS_IN_LONG-1);
! 259: if (!m) for (i=2; i<ly; i++) y[i]=x[i];
! 260: else
! 261: {
! 262: shift_right(y,x, 2,ly, 0,m);
! 263: if (y[2] == 0)
! 264: {
! 265: if (ly==3) { avma = (long)(y+3); return gzero; }
! 266: ly--; avma = (long)(++y);
! 267: }
! 268: }
! 269: }
! 270: y[1] = evalsigne(s)|evallgefint(ly);
! 271: y[0] = evaltyp(t_INT)|evallg(ly); return y;
! 272: }
! 273:
! 274: GEN
! 275: mptrunc(GEN x)
! 276: {
! 277: long d,e,i,s,m;
! 278: GEN y;
! 279:
! 280: if (typ(x)==t_INT) return icopy(x);
! 281: if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gzero;
! 282: d = (e>>TWOPOTBITS_IN_LONG) + 3;
! 283: m = e & (BITS_IN_LONG-1);
! 284: if (d > lg(x)) err(truer2);
! 285:
! 286: y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
! 287: if (++m == BITS_IN_LONG)
! 288: for (i=2; i<d; i++) y[i]=x[i];
! 289: else
! 290: {
! 291: const register ulong sh = BITS_IN_LONG - m;
! 292: shift_right2(y,x, 2,d,0, sh,m);
! 293: }
! 294: return y;
! 295: }
! 296:
! 297: /* integral part */
! 298: GEN
! 299: mpent(GEN x)
! 300: {
! 301: long d,e,i,lx,m;
! 302: GEN y;
! 303:
! 304: if (typ(x)==t_INT) return icopy(x);
! 305: if (signe(x) >= 0) return mptrunc(x);
! 306: if ((e=expo(x)) < 0) return stoi(-1);
! 307: d = (e>>TWOPOTBITS_IN_LONG) + 3;
! 308: m = e & (BITS_IN_LONG-1);
! 309: lx=lg(x); if (d>lx) err(truer2);
! 310: y = new_chunk(d);
! 311: if (++m == BITS_IN_LONG)
! 312: {
! 313: for (i=2; i<d; i++) y[i]=x[i];
! 314: i=d; while (i<lx && !x[i]) i++;
! 315: if (i==lx) goto END;
! 316: }
! 317: else
! 318: {
! 319: const register ulong sh = BITS_IN_LONG - m;
! 320: shift_right2(y,x, 2,d,0, sh,m);
! 321: if (x[d-1]<<m == 0)
! 322: {
! 323: i=d; while (i<lx && !x[i]) i++;
! 324: if (i==lx) goto END;
! 325: }
! 326: }
! 327: /* set y:=y+1 */
! 328: for (i=d-1; i>=2; i--) { y[i]++; if (y[i]) goto END; }
! 329: y=new_chunk(1); y[2]=1; d++;
! 330: END:
! 331: y[1] = evalsigne(-1) | evallgefint(d);
! 332: y[0] = evaltyp(t_INT) | evallg(d); return y;
! 333: }
! 334:
! 335: int
! 336: cmpsi(long x, GEN y)
! 337: {
! 338: ulong p;
! 339:
! 340: if (!x) return -signe(y);
! 341:
! 342: if (x>0)
! 343: {
! 344: if (signe(y)<=0) return 1;
! 345: if (lgefint(y)>3) return -1;
! 346: p=y[2]; if (p == (ulong)x) return 0;
! 347: return p < (ulong)x ? 1 : -1;
! 348: }
! 349:
! 350: if (signe(y)>=0) return -1;
! 351: if (lgefint(y)>3) return 1;
! 352: p=y[2]; if (p == (ulong)-x) return 0;
! 353: return p < (ulong)(-x) ? -1 : 1;
! 354: }
! 355:
! 356: int
! 357: cmpii(GEN x, GEN y)
! 358: {
! 359: const long sx = signe(x), sy = signe(y);
! 360: long lx,ly,i;
! 361:
! 362: if (sx<sy) return -1;
! 363: if (sx>sy) return 1;
! 364: if (!sx) return 0;
! 365:
! 366: lx=lgefint(x); ly=lgefint(y);
! 367: if (lx>ly) return sx;
! 368: if (lx<ly) return -sx;
! 369: i=2; while (i<lx && x[i]==y[i]) i++;
! 370: if (i==lx) return 0;
! 371: return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
! 372: }
! 373:
! 374: int
! 375: cmprr(GEN x, GEN y)
! 376: {
! 377: const long sx = signe(x), sy = signe(y);
! 378: long ex,ey,lx,ly,lz,i;
! 379:
! 380: if (sx<sy) return -1;
! 381: if (sx>sy) return 1;
! 382: if (!sx) return 0;
! 383:
! 384: ex=expo(x); ey=expo(y);
! 385: if (ex>ey) return sx;
! 386: if (ex<ey) return -sx;
! 387:
! 388: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
! 389: i=2; while (i<lz && x[i]==y[i]) i++;
! 390: if (i<lz) return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
! 391: if (lx>=ly)
! 392: {
! 393: while (i<lx && !x[i]) i++;
! 394: return (i==lx) ? 0 : sx;
! 395: }
! 396: while (i<ly && !y[i]) i++;
! 397: return (i==ly) ? 0 : -sx;
! 398: }
! 399:
! 400: GEN
! 401: addss(long x, long y)
! 402: {
! 403: if (!x) return stoi(y);
! 404: if (x>0) { pos_s[2] = x; return addsi(y,pos_s); }
! 405: neg_s[2] = -x; return addsi(y,neg_s);
! 406: }
! 407:
! 408: GEN
! 409: addsi(long x, GEN y)
! 410: {
! 411: long sx,sy,ly;
! 412: GEN z;
! 413:
! 414: if (!x) return icopy(y);
! 415: sy=signe(y); if (!sy) return stoi(x);
! 416: if (x<0) { sx=-1; x=-x; } else sx=1;
! 417: if (sx==sy)
! 418: {
! 419: z = addsispec(x,y+2, lgefint(y)-2);
! 420: setsigne(z,sy); return z;
! 421: }
! 422: ly=lgefint(y);
! 423: if (ly==3)
! 424: {
! 425: const long d = y[2] - x;
! 426: if (!d) return gzero;
! 427: z=cgeti(3);
! 428: if (y[2] < 0 || d > 0) {
! 429: z[1] = evalsigne(sy) | evallgefint(3);
! 430: z[2] = d;
! 431: }
! 432: else {
! 433: z[1] = evalsigne(-sy) | evallgefint(3);
! 434: z[2] =-d;
! 435: }
! 436: return z;
! 437: }
! 438: z = subisspec(y+2,x, ly-2);
! 439: setsigne(z,sy); return z;
! 440: }
! 441:
! 442: GEN
! 443: addii(GEN x, GEN y)
! 444: {
! 445: long sx = signe(x);
! 446: long sy = signe(y);
! 447: long lx,ly;
! 448: GEN z;
! 449:
! 450: if (!sx) return sy? icopy(y): gzero;
! 451: if (!sy) return icopy(x);
! 452: lx=lgefint(x); ly=lgefint(y);
! 453:
! 454: if (sx==sy)
! 455: z = addiispec(x+2,y+2,lx-2,ly-2);
! 456: else
! 457: { /* sx != sy */
! 458: long i = lx - ly;
! 459: if (i==0)
! 460: {
! 461: i = absi_cmp(x,y);
! 462: if (!i) return gzero;
! 463: }
! 464: if (i<0) { sx=sy; swapspec(x,y, lx,ly); } /* ensure |x| >= |y| */
! 465: z = subiispec(x+2,y+2,lx-2,ly-2);
! 466: }
! 467: setsigne(z,sx); return z;
! 468: }
! 469:
! 470: GEN
! 471: addsr(long x, GEN y)
! 472: {
! 473: if (!x) return rcopy(y);
! 474: if (x>0) { pos_s[2]=x; return addir(pos_s,y); }
! 475: neg_s[2] = -x; return addir(neg_s,y);
! 476: }
! 477:
! 478: GEN
! 479: addir(GEN x, GEN y)
! 480: {
! 481: long e,l,ly;
! 482: GEN z;
! 483:
! 484: if (!signe(x)) return rcopy(y);
! 485: if (!signe(y))
! 486: {
! 487: l=lgefint(x)-(expo(y)>>TWOPOTBITS_IN_LONG);
! 488: if (l<3) err(adder3);
! 489: z=cgetr(l); affir(x,z); return z;
! 490: }
! 491:
! 492: e = expo(y)-expi(x); ly=lg(y);
! 493: if (e>0)
! 494: {
! 495: l = ly - (e>>TWOPOTBITS_IN_LONG);
! 496: if (l<3) return rcopy(y);
! 497: }
! 498: else l = ly + ((-e)>>TWOPOTBITS_IN_LONG)+1;
! 499: z=cgetr(l); affir(x,z); y=addrr(z,y);
! 500: z = y+l; ly = lg(y); while (ly--) z[ly] = y[ly];
! 501: avma=(long)z; return z;
! 502: }
! 503:
! 504: GEN
! 505: addrr(GEN x, GEN y)
! 506: {
! 507: long sx=signe(x),sy=signe(y),ex=expo(x),ey=expo(y);
! 508: long e,m,flag,i,j,f2,lx,ly,lz;
! 509: GEN z;
! 510: LOCAL_OVERFLOW;
! 511:
! 512: e = ey-ex;
! 513: if (!sy)
! 514: {
! 515: if (!sx)
! 516: {
! 517: if (e > 0) ex=ey;
! 518: z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z;
! 519: }
! 520: if (e > 0) { z=cgetr(3); z[1]=evalexpo(ey); z[2]=0; return z; }
! 521: lz = 3 + ((-e)>>TWOPOTBITS_IN_LONG);
! 522: lx = lg(x); if (lz>lx) lz=lx;
! 523: z = cgetr(lz); while(--lz) z[lz]=x[lz];
! 524: return z;
! 525: }
! 526: if (!sx)
! 527: {
! 528: if (e < 0) { z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z; }
! 529: lz = 3 + (e>>TWOPOTBITS_IN_LONG);
! 530: ly = lg(y); if (lz>ly) lz=ly;
! 531: z = cgetr(lz); while (--lz) z[lz]=y[lz];
! 532: return z;
! 533: }
! 534:
! 535: if (e < 0) { z=x; x=y; y=z; ey=ex; i=sx; sx=sy; sy=i; e=-e; }
! 536: /* now ey >= ex */
! 537: lx=lg(x); ly=lg(y);
! 538: if (e)
! 539: {
! 540: long d = e >> TWOPOTBITS_IN_LONG, l = ly-d;
! 541: if (l > lx) { flag=0; lz=lx+d+1; }
! 542: else if (l > 2) { flag=1; lz=ly; lx=l; }
! 543: else return rcopy(y);
! 544: m = e & (BITS_IN_LONG-1);
! 545: }
! 546: else
! 547: {
! 548: if (lx > ly) lx = ly;
! 549: flag=2; lz=lx; m=0;
! 550: }
! 551: z = cgetr(lz);
! 552: if (m)
! 553: { /* shift right x m bits */
! 554: const ulong sh = BITS_IN_LONG-m;
! 555: GEN p1 = x; x = new_chunk(lx+1);
! 556: shift_right2(x,p1,2,lx, 0,m,sh);
! 557: if (flag==0)
! 558: {
! 559: x[lx] = p1[lx-1] << sh;
! 560: if (x[lx]) { flag = 3; lx++; }
! 561: }
! 562: }
! 563:
! 564: if (sx==sy)
! 565: { /* addition */
! 566: i=lz-1; avma = (long)z;
! 567: if (flag==0) { z[i] = y[i]; i--; }
! 568: overflow=0;
! 569: for (j=lx-1; j>=2; i--,j--) z[i] = addllx(x[j],y[i]);
! 570:
! 571: if (overflow)
! 572: for (;;)
! 573: {
! 574: if (i==1)
! 575: {
! 576: shift_right(z,z, 2,lz, 1,1);
! 577: ey=evalexpo(ey+1); if (ey & ~EXPOBITS) err(adder4);
! 578: z[1] = evalsigne(sx) | ey; return z;
! 579: }
! 580: z[i] = y[i]+1; if (z[i--]) break;
! 581: }
! 582: for (; i>=2; i--) z[i]=y[i];
! 583: z[1] = evalsigne(sx) | evalexpo(ey); return z;
! 584: }
! 585:
! 586: /* subtraction */
! 587: if (e) f2 = 1;
! 588: else
! 589: {
! 590: i=2; while (i<lx && x[i]==y[i]) i++;
! 591: if (i==lx)
! 592: {
! 593: avma = (long)(z+lz);
! 594: e = evalexpo(ey - bit_accuracy(lx));
! 595: if (e & ~EXPOBITS) err(adder4);
! 596: z=cgetr(3); z[1]=e; z[2]=0; return z;
! 597: }
! 598: f2 = ((ulong)y[i] > (ulong)x[i]);
! 599: }
! 600:
! 601: /* result is non-zero. f2 = (y > x) */
! 602: i=lz-1;
! 603: if (f2)
! 604: {
! 605: if (flag==0) { z[i] = y[i]; i--; }
! 606: j=lx-1; z[i] = subll(y[i],x[j]); i--; j--;
! 607: for (; j>=2; i--,j--) z[i] = subllx(y[i],x[j]);
! 608: if (overflow)
! 609: for (;;) { z[i] = y[i]-1; if (y[i--]) break; }
! 610: for (; i>=2; i--) z[i]=y[i];
! 611: sx = sy;
! 612: }
! 613: else
! 614: {
! 615: if (flag==0) { z[i] = -y[i]; i--; overflow=1; } else overflow=0;
! 616: for (; i>=2; i--) z[i]=subllx(x[i],y[i]);
! 617: }
! 618:
! 619: x = z+2; i=0; while (!x[i]) i++;
! 620: lz -= i; z += i; m = bfffo(z[2]);
! 621: e = evalexpo(ey - (m | (i<<TWOPOTBITS_IN_LONG)));
! 622: if (e & ~EXPOBITS) err(adder5);
! 623: if (m) shift_left(z,z,2,lz-1, 0,m);
! 624: z[1] = evalsigne(sx) | e;
! 625: z[0] = evaltyp(t_REAL) | evallg(lz);
! 626: avma = (long)z; return z;
! 627: }
! 628:
! 629: GEN
! 630: mulss(long x, long y)
! 631: {
! 632: long s,p1;
! 633: GEN z;
! 634: LOCAL_HIREMAINDER;
! 635:
! 636: if (!x || !y) return gzero;
! 637: if (x<0) { s = -1; x = -x; } else s=1;
! 638: if (y<0) { s = -s; y = -y; }
! 639: p1 = mulll(x,y);
! 640: if (hiremainder)
! 641: {
! 642: z=cgeti(4); z[1] = evalsigne(s) | evallgefint(4);
! 643: z[2]=hiremainder; z[3]=p1; return z;
! 644: }
! 645: z=cgeti(3); z[1] = evalsigne(s) | evallgefint(3);
! 646: z[2]=p1; return z;
! 647: }
! 648: #endif
! 649:
! 650: GEN
! 651: muluu(ulong x, ulong y)
! 652: {
! 653: long p1;
! 654: GEN z;
! 655: LOCAL_HIREMAINDER;
! 656:
! 657: if (!x || !y) return gzero;
! 658: p1 = mulll(x,y);
! 659: if (hiremainder)
! 660: {
! 661: z=cgeti(4); z[1] = evalsigne(1) | evallgefint(4);
! 662: z[2]=hiremainder; z[3]=p1; return z;
! 663: }
! 664: z=cgeti(3); z[1] = evalsigne(1) | evallgefint(3);
! 665: z[2]=p1; return z;
! 666: }
! 667:
! 668: /* assume ny > 0 */
! 669: #ifdef INLINE
! 670: INLINE
! 671: #endif
! 672: GEN
! 673: mulsispec(long x, GEN y, long ny)
! 674: {
! 675: GEN yd, z = (GEN)avma;
! 676: long lz = ny+3;
! 677: LOCAL_HIREMAINDER;
! 678:
! 679: (void)new_chunk(lz);
! 680: yd = y + ny; *--z = mulll(x, *--yd);
! 681: while (yd > y) *--z = addmul(x,*--yd);
! 682: if (hiremainder) *--z = hiremainder; else lz--;
! 683: *--z = evalsigne(1) | evallgefint(lz);
! 684: *--z = evaltyp(t_INT) | evallg(lz);
! 685: avma=(long)z; return z;
! 686: }
! 687:
! 688: GEN
! 689: mului(ulong x, GEN y)
! 690: {
! 691: long s = signe(y);
! 692: GEN z;
! 693:
! 694: if (!s || !x) return gzero;
! 695: z = mulsispec(x, y+2, lgefint(y)-2);
! 696: setsigne(z,s); return z;
! 697: }
! 698:
! 699: #ifndef __M68K__
! 700:
! 701: GEN
! 702: mulsi(long x, GEN y)
! 703: {
! 704: long s = signe(y);
! 705: GEN z;
! 706:
! 707: if (!s || !x) return gzero;
! 708: if (x<0) { s = -s; x = -x; }
! 709: z = mulsispec(x, y+2, lgefint(y)-2);
! 710: setsigne(z,s); return z;
! 711: }
! 712:
! 713: GEN
! 714: mulsr(long x, GEN y)
! 715: {
! 716: long lx,i,s,garde,e,sh,m;
! 717: GEN z;
! 718: LOCAL_HIREMAINDER;
! 719:
! 720: if (!x) return gzero;
! 721: s = signe(y);
! 722: if (!s)
! 723: {
! 724: if (x<0) x = -x;
! 725: e = y[1] + (BITS_IN_LONG-1)-bfffo(x);
! 726: if (e & ~EXPOBITS) err(muler2);
! 727: z=cgetr(3); z[1]=e; z[2]=0; return z;
! 728: }
! 729: if (x<0) { s = -s; x = -x; }
! 730: if (x==1) { z=rcopy(y); setsigne(z,s); return z; }
! 731:
! 732: lx = lg(y); e = expo(y);
! 733: z=cgetr(lx); y--; garde=mulll(x,y[lx]);
! 734: for (i=lx-1; i>=3; i--) z[i]=addmul(x,y[i]);
! 735: z[2]=hiremainder;
! 736:
! 737: sh = bfffo(hiremainder); m = BITS_IN_LONG-sh;
! 738: if (sh) shift_left2(z,z, 2,lx-1, garde,sh,m);
! 739: e = evalexpo(m+e); if (e & ~EXPOBITS) err(muler2);
! 740: z[1] = evalsigne(s) | e; return z;
! 741: }
! 742:
! 743: GEN
! 744: mulrr(GEN x, GEN y)
! 745: {
! 746: long sx = signe(x), sy = signe(y);
! 747: long i,j,ly,lz,lzz,e,flag,p1;
! 748: ulong garde;
! 749: GEN z, y1;
! 750: LOCAL_HIREMAINDER;
! 751: LOCAL_OVERFLOW;
! 752:
! 753: e = evalexpo(expo(x)+expo(y)); if (e & ~EXPOBITS) err(muler4);
! 754: if (!sx || !sy) { z=cgetr(3); z[1]=e; z[2]=0; return z; }
! 755: if (sy<0) sx = -sx;
! 756: lz=lg(x); ly=lg(y);
! 757: if (lz>ly) { lz=ly; z=x; x=y; y=z; flag=1; } else flag = (lz!=ly);
! 758: z=cgetr(lz); z[1] = evalsigne(sx) | e;
! 759: if (lz==3)
! 760: {
! 761: if (flag)
! 762: {
! 763: (void)mulll(x[2],y[3]);
! 764: garde = addmul(x[2],y[2]);
! 765: }
! 766: else
! 767: garde = mulll(x[2],y[2]);
! 768: if ((long)hiremainder<0) { z[2]=hiremainder; z[1]++; }
! 769: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
! 770: return z;
! 771: }
! 772:
! 773: if (flag) { (void)mulll(x[2],y[lz]); garde = hiremainder; } else garde=0;
! 774: lzz=lz-1; p1=x[lzz];
! 775: if (p1)
! 776: {
! 777: (void)mulll(p1,y[3]);
! 778: garde = addll(addmul(p1,y[2]), garde);
! 779: z[lzz] = overflow+hiremainder;
! 780: }
! 781: else z[lzz]=0;
! 782: for (j=lz-2, y1=y-j; j>=3; j--)
! 783: {
! 784: p1 = x[j]; y1++;
! 785: if (p1)
! 786: {
! 787: (void)mulll(p1,y1[lz+1]);
! 788: garde = addll(addmul(p1,y1[lz]), garde);
! 789: for (i=lzz; i>j; i--)
! 790: {
! 791: hiremainder += overflow;
! 792: z[i] = addll(addmul(p1,y1[i]), z[i]);
! 793: }
! 794: z[j] = hiremainder+overflow;
! 795: }
! 796: else z[j]=0;
! 797: }
! 798: p1=x[2]; y1++;
! 799: garde = addll(mulll(p1,y1[lz]), garde);
! 800: for (i=lzz; i>2; i--)
! 801: {
! 802: hiremainder += overflow;
! 803: z[i] = addll(addmul(p1,y1[i]), z[i]);
! 804: }
! 805: z[2] = hiremainder+overflow;
! 806: if (z[2] < 0) z[1]++;
! 807: else
! 808: shift_left(z,z,2,lzz,garde, 1);
! 809: return z;
! 810: }
! 811:
! 812: GEN
! 813: mulir(GEN x, GEN y)
! 814: {
! 815: long sx=signe(x),sy,lz,lzz,ey,e,p1,i,j;
! 816: ulong garde;
! 817: GEN z,y1;
! 818: LOCAL_HIREMAINDER;
! 819: LOCAL_OVERFLOW;
! 820:
! 821: if (!sx) return gzero;
! 822: if (!is_bigint(x)) return mulsr(itos(x),y);
! 823: sy=signe(y); ey=expo(y);
! 824: if (!sy)
! 825: {
! 826: e = evalexpo(expi(x)+ey);
! 827: if (e & ~EXPOBITS) err(muler6);
! 828: z=cgetr(3); z[1]=e; z[2]=0; return z;
! 829: }
! 830:
! 831: if (sy<0) sx = -sx;
! 832: lz=lg(y); z=cgetr(lz);
! 833: y1=cgetr(lz+1);
! 834: affir(x,y1); x=y; y=y1;
! 835: e = evalexpo(expo(y)+ey);
! 836: if (e & ~EXPOBITS) err(muler4);
! 837: z[1] = evalsigne(sx) | e;
! 838: if (lz==3)
! 839: {
! 840: (void)mulll(x[2],y[3]);
! 841: garde=addmul(x[2],y[2]);
! 842: if ((long)hiremainder < 0) { z[2]=hiremainder; z[1]++; }
! 843: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
! 844: avma=(long)z; return z;
! 845: }
! 846:
! 847: (void)mulll(x[2],y[lz]); garde=hiremainder;
! 848: lzz=lz-1; p1=x[lzz];
! 849: if (p1)
! 850: {
! 851: (void)mulll(p1,y[3]);
! 852: garde=addll(addmul(p1,y[2]),garde);
! 853: z[lzz] = overflow+hiremainder;
! 854: }
! 855: else z[lzz]=0;
! 856: for (j=lz-2, y1=y-j; j>=3; j--)
! 857: {
! 858: p1=x[j]; y1++;
! 859: if (p1)
! 860: {
! 861: (void)mulll(p1,y1[lz+1]);
! 862: garde = addll(addmul(p1,y1[lz]), garde);
! 863: for (i=lzz; i>j; i--)
! 864: {
! 865: hiremainder += overflow;
! 866: z[i] = addll(addmul(p1,y1[i]), z[i]);
! 867: }
! 868: z[j] = hiremainder+overflow;
! 869: }
! 870: else z[j]=0;
! 871: }
! 872: p1=x[2]; y1++;
! 873: garde = addll(mulll(p1,y1[lz]), garde);
! 874: for (i=lzz; i>2; i--)
! 875: {
! 876: hiremainder += overflow;
! 877: z[i] = addll(addmul(p1,y1[i]), z[i]);
! 878: }
! 879: z[2] = hiremainder+overflow;
! 880: if (z[2] < 0) z[1]++;
! 881: else
! 882: shift_left(z,z,2,lzz,garde, 1);
! 883: avma=(long)z; return z;
! 884: }
! 885:
! 886: /* written by Bruno Haible following an idea of Robert Harley */
! 887: long
! 888: vals(ulong z)
! 889: {
! 890: static char tab[64]={-1,0,1,12,2,6,-1,13,3,-1,7,-1,-1,-1,-1,14,10,4,-1,-1,8,-1,-1,25,-1,-1,-1,-1,-1,21,27,15,31,11,5,-1,-1,-1,-1,-1,9,-1,-1,24,-1,-1,20,26,30,-1,-1,-1,-1,23,-1,19,29,-1,22,18,28,17,16,-1};
! 891: #ifdef LONG_IS_64BIT
! 892: long s;
! 893: #endif
! 894:
! 895: if (!z) return -1;
! 896: #ifdef LONG_IS_64BIT
! 897: if (! (z&0xffffffff)) { s = 32; z >>=32; } else s = 0;
! 898: #endif
! 899: z |= ~z + 1;
! 900: z += z << 4;
! 901: z += z << 6;
! 902: z ^= z << 16; /* or z -= z<<16 */
! 903: #ifdef LONG_IS_64BIT
! 904: return s + tab[(z&0xffffffff)>>26];
! 905: #else
! 906: return tab[z>>26];
! 907: #endif
! 908: }
! 909:
! 910: GEN
! 911: modss(long x, long y)
! 912: {
! 913: LOCAL_HIREMAINDER;
! 914:
! 915: if (!y) err(moder1);
! 916: if (y<0) y=-y;
! 917: hiremainder=0; divll(labs(x),y);
! 918: if (!hiremainder) return gzero;
! 919: return (x < 0) ? stoi(y-hiremainder) : stoi(hiremainder);
! 920: }
! 921:
! 922: GEN
! 923: resss(long x, long y)
! 924: {
! 925: LOCAL_HIREMAINDER;
! 926:
! 927: if (!y) err(reser1);
! 928: hiremainder=0; divll(labs(x),labs(y));
! 929: return (x < 0) ? stoi(-((long)hiremainder)) : stoi(hiremainder);
! 930: }
! 931:
! 932: GEN
! 933: divsi(long x, GEN y)
! 934: {
! 935: long p1, s = signe(y);
! 936: LOCAL_HIREMAINDER;
! 937:
! 938: if (!s) err(diver2);
! 939: if (!x || lgefint(y)>3 || ((long)y[2])<0)
! 940: {
! 941: hiremainder=x; SAVE_HIREMAINDER; return gzero;
! 942: }
! 943: hiremainder=0; p1=divll(labs(x),y[2]);
! 944: if (x<0) { hiremainder = -((long)hiremainder); p1 = -p1; }
! 945: if (s<0) p1 = -p1;
! 946: SAVE_HIREMAINDER; return stoi(p1);
! 947: }
! 948: #endif
! 949:
! 950: GEN
! 951: modui(ulong x, GEN y)
! 952: {
! 953: LOCAL_HIREMAINDER;
! 954:
! 955: if (!signe(y)) err(diver2);
! 956: if (!x || lgefint(y)>3) hiremainder=x;
! 957: else
! 958: {
! 959: hiremainder=0; (void)divll(x,y[2]);
! 960: }
! 961: return utoi(hiremainder);
! 962: }
! 963:
! 964: GEN
! 965: modiu(GEN y, ulong x)
! 966: {
! 967: long sy=signe(y),ly,i;
! 968: LOCAL_HIREMAINDER;
! 969:
! 970: if (!x) err(diver4);
! 971: if (!sy) return gzero;
! 972: ly = lgefint(y);
! 973: if (x <= (ulong)y[2]) hiremainder=0;
! 974: else
! 975: {
! 976: if (ly==3) return utoi((sy > 0)? (ulong)y[2]: x - (ulong)y[2]);
! 977: hiremainder=y[2]; ly--; y++;
! 978: }
! 979: for (i=2; i<ly; i++) (void)divll(y[i],x);
! 980: return utoi((sy > 0)? hiremainder: x - hiremainder);
! 981: }
! 982:
! 983: #ifndef __M68K__
! 984:
! 985: GEN
! 986: modsi(long x, GEN y)
! 987: {
! 988: long s = signe(y);
! 989: GEN p1;
! 990: LOCAL_HIREMAINDER;
! 991:
! 992: if (!s) err(diver2);
! 993: if (!x || lgefint(y)>3 || ((long)y[2])<0) hiremainder=x;
! 994: else
! 995: {
! 996: hiremainder=0; (void)divll(labs(x),y[2]);
! 997: if (x<0) hiremainder = -((long)hiremainder);
! 998: }
! 999: if (!hiremainder) return gzero;
! 1000: if (x>0) return stoi(hiremainder);
! 1001: if (s<0)
! 1002: { setsigne(y,1); p1=addsi(hiremainder,y); setsigne(y,-1); }
! 1003: else
! 1004: p1=addsi(hiremainder,y);
! 1005: return p1;
! 1006: }
! 1007:
! 1008: GEN
! 1009: divis(GEN y, long x)
! 1010: {
! 1011: long sy=signe(y),ly,s,i;
! 1012: GEN z;
! 1013: LOCAL_HIREMAINDER;
! 1014:
! 1015: if (!x) err(diver4);
! 1016: if (!sy) { hiremainder=0; SAVE_HIREMAINDER; return gzero; }
! 1017: if (x<0) { s = -sy; x = -x; } else s=sy;
! 1018:
! 1019: ly = lgefint(y);
! 1020: if ((ulong)x <= (ulong)y[2]) hiremainder=0;
! 1021: else
! 1022: {
! 1023: if (ly==3) { hiremainder=itos(y); SAVE_HIREMAINDER; return gzero; }
! 1024: hiremainder=y[2]; ly--; y++;
! 1025: }
! 1026: z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
! 1027: for (i=2; i<ly; i++) z[i]=divll(y[i],x);
! 1028: if (sy<0) hiremainder = - ((long)hiremainder);
! 1029: SAVE_HIREMAINDER; return z;
! 1030: }
! 1031:
! 1032: GEN
! 1033: divir(GEN x, GEN y)
! 1034: {
! 1035: GEN xr,z;
! 1036: long av,ly;
! 1037:
! 1038: if (!signe(y)) err(diver5);
! 1039: if (!signe(x)) return gzero;
! 1040: ly=lg(y); z=cgetr(ly); av=avma; xr=cgetr(ly+1); affir(x,xr);
! 1041: affrr(divrr(xr,y),z); avma=av; return z;
! 1042: }
! 1043:
! 1044: GEN
! 1045: divri(GEN x, GEN y)
! 1046: {
! 1047: GEN yr,z;
! 1048: long av,lx,s=signe(y);
! 1049:
! 1050: if (!s) err(diver8);
! 1051: if (!signe(x))
! 1052: {
! 1053: const long ex = evalexpo(expo(x) - expi(y));
! 1054:
! 1055: if (ex<0) err(diver12);
! 1056: z=cgetr(3); z[1]=ex; z[2]=0; return z;
! 1057: }
! 1058: if (!is_bigint(y)) return divrs(x, s>0? y[2]: -y[2]);
! 1059:
! 1060: lx=lg(x); z=cgetr(lx);
! 1061: av=avma; yr=cgetr(lx+1); affir(y,yr);
! 1062: affrr(divrr(x,yr),z); avma=av; return z;
! 1063: }
! 1064:
! 1065: void
! 1066: diviiz(GEN x, GEN y, GEN z)
! 1067: {
! 1068: long av=avma,lz;
! 1069: GEN xr,yr;
! 1070:
! 1071: if (typ(z)==t_INT) { affii(divii(x,y),z); avma=av; return; }
! 1072: lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr);
! 1073: affrr(divrr(xr,yr),z); avma=av;
! 1074: }
! 1075:
! 1076: void
! 1077: mpdivz(GEN x, GEN y, GEN z)
! 1078: {
! 1079: long av=avma;
! 1080:
! 1081: if (typ(z)==t_INT)
! 1082: {
! 1083: if (typ(x)==t_REAL || typ(y)==t_REAL) err(divzer1);
! 1084: affii(divii(x,y),z); avma=av; return;
! 1085: }
! 1086: if (typ(x)==t_INT)
! 1087: {
! 1088: GEN xr,yr;
! 1089: long lz;
! 1090:
! 1091: if (typ(y)==t_REAL) { affrr(divir(x,y),z); avma=av; return; }
! 1092: lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr);
! 1093: affrr(divrr(xr,yr),z); avma=av; return;
! 1094: }
! 1095: if (typ(y)==t_REAL) { affrr(divrr(x,y),z); avma=av; return; }
! 1096: affrr(divri(x,y),z); avma=av;
! 1097: }
! 1098:
! 1099: GEN
! 1100: divsr(long x, GEN y)
! 1101: {
! 1102: long av,ly;
! 1103: GEN p1,z;
! 1104:
! 1105: if (!signe(y)) err(diver3);
! 1106: if (!x) return gzero;
! 1107: ly=lg(y); z=cgetr(ly); av=avma;
! 1108: p1=cgetr(ly+1); affsr(x,p1); affrr(divrr(p1,y),z);
! 1109: avma=av; return z;
! 1110: }
! 1111:
! 1112: GEN
! 1113: modii(GEN x, GEN y)
! 1114: {
! 1115: switch(signe(x))
! 1116: {
! 1117: case 0: return gzero;
! 1118: case 1: return resii(x,y);
! 1119: default:
! 1120: {
! 1121: long av = avma;
! 1122: (void)new_chunk(lgefint(y));
! 1123: x = resii(x,y); avma=av;
! 1124: if (x==gzero) return x;
! 1125: return subiispec(y+2,x+2,lgefint(y)-2,lgefint(x)-2);
! 1126: }
! 1127: }
! 1128: }
! 1129:
! 1130: void
! 1131: modiiz(GEN x, GEN y, GEN z)
! 1132: {
! 1133: const long av = avma;
! 1134: affii(modii(x,y),z); avma=av;
! 1135: }
! 1136:
! 1137: GEN
! 1138: divrs(GEN x, long y)
! 1139: {
! 1140: long i,lx,ex,garde,sh,s=signe(x);
! 1141: GEN z;
! 1142: LOCAL_HIREMAINDER;
! 1143:
! 1144: if (!y) err(diver6);
! 1145: if (!s)
! 1146: {
! 1147: z=cgetr(3); z[1] = x[1] - (BITS_IN_LONG-1) + bfffo(y);
! 1148: if (z[1]<0) err(diver7);
! 1149: z[2]=0; return z;
! 1150: }
! 1151: if (y<0) { s = -s; y = -y; }
! 1152: if (y==1) { z=rcopy(x); setsigne(z,s); return z; }
! 1153:
! 1154: z=cgetr(lx=lg(x)); hiremainder=0;
! 1155: for (i=2; i<lx; i++) z[i]=divll(x[i],y);
! 1156:
! 1157: /* we may have hiremainder != 0 ==> garde */
! 1158: garde=divll(0,y); sh=bfffo(z[2]); ex=evalexpo(expo(x)-sh);
! 1159: if (ex & ~EXPOBITS) err(diver7);
! 1160: if (sh) shift_left(z,z, 2,lx-1, garde,sh);
! 1161: z[1] = evalsigne(s) | ex;
! 1162: return z;
! 1163: }
! 1164:
! 1165: GEN
! 1166: divrr(GEN x, GEN y)
! 1167: {
! 1168: long sx=signe(x), sy=signe(y), lx,ly,lz,e,i,j;
! 1169: ulong si,saux;
! 1170: GEN z,x1;
! 1171:
! 1172: if (!sy) err(diver9);
! 1173: e = evalexpo(expo(x) - expo(y));
! 1174: if (e & ~EXPOBITS) err(diver11);
! 1175: if (!sx) { z=cgetr(3); z[1]=e; z[2]=0; return z; }
! 1176: if (sy<0) sx = -sx;
! 1177: e = evalsigne(sx) | e;
! 1178: lx=lg(x); ly=lg(y);
! 1179: if (ly==3)
! 1180: {
! 1181: ulong k = x[2], l = (lx>3)? x[3]: 0;
! 1182: LOCAL_HIREMAINDER;
! 1183: if (k < (ulong)y[2]) e--;
! 1184: else
! 1185: {
! 1186: l >>= 1; if (k&1) l |= HIGHBIT;
! 1187: k >>= 1;
! 1188: }
! 1189: z=cgetr(3); z[1]=e;
! 1190: hiremainder=k; z[2]=divll(l,y[2]); return z;
! 1191: }
! 1192:
! 1193: lz = (lx<=ly)? lx: ly;
! 1194: x1 = (z=new_chunk(lz))-1;
! 1195: x1[1]=0; for (i=2; i<lz; i++) x1[i]=x[i];
! 1196: x1[lz] = (lx>lz)? x[lz]: 0;
! 1197: x=z; si=y[2]; saux=y[3];
! 1198: for (i=0; i<lz-1; i++)
! 1199: { /* x1 = x + (i-1) */
! 1200: ulong k,qp;
! 1201: LOCAL_HIREMAINDER;
! 1202: LOCAL_OVERFLOW;
! 1203: if ((ulong)x1[1] == si)
! 1204: {
! 1205: qp = MAXULONG; k=addll(si,x1[2]);
! 1206: }
! 1207: else
! 1208: {
! 1209: if ((ulong)x1[1] > si) /* can't happen if i=0 */
! 1210: {
! 1211: GEN y1 = y+1;
! 1212: overflow=0;
! 1213: for (j=lz-i; j>0; j--) x1[j] = subllx(x1[j],y1[j]);
! 1214: j=i; do x[--j]++; while (j && !x[j]);
! 1215: }
! 1216: hiremainder=x1[1]; overflow=0;
! 1217: qp=divll(x1[2],si); k=hiremainder;
! 1218: }
! 1219: if (!overflow)
! 1220: {
! 1221: long k3 = subll(mulll(qp,saux), x1[3]);
! 1222: long k4 = subllx(hiremainder,k);
! 1223: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
! 1224: }
! 1225: j = lz-i+1;
! 1226: if (j<ly) (void)mulll(qp,y[j]); else { hiremainder=0 ; j=ly; }
! 1227: for (j--; j>1; j--)
! 1228: {
! 1229: x1[j] = subll(x1[j], addmul(qp,y[j]));
! 1230: hiremainder += overflow;
! 1231: }
! 1232: if (x1[1] != hiremainder)
! 1233: {
! 1234: if ((ulong)x1[1] < hiremainder)
! 1235: {
! 1236: qp--;
! 1237: overflow=0; for (j=lz-i; j>1; j--) x1[j]=addllx(x1[j], y[j]);
! 1238: }
! 1239: else
! 1240: {
! 1241: x1[1] -= hiremainder;
! 1242: while (x1[1])
! 1243: {
! 1244: qp++; if (!qp) { j=i; do x[--j]++; while (j && !x[j]); }
! 1245: overflow=0; for (j=lz-i; j>1; j--) x1[j]=subllx(x1[j],y[j]);
! 1246: x1[1] -= overflow;
! 1247: }
! 1248: }
! 1249: }
! 1250: x1[1]=qp; x1++;
! 1251: }
! 1252: x1 = x-1; for (j=lz-1; j>=2; j--) x[j]=x1[j];
! 1253: if (*x) { shift_right(x,x, 2,lz, 1,1); } else e--;
! 1254: x[0]=evaltyp(t_REAL)|evallg(lz);
! 1255: x[1]=e; return x;
! 1256: }
! 1257: #endif /* !defined(__M68K__) */
! 1258: /* The following ones are not in mp.s (mulii is, with a different algorithm) */
! 1259:
! 1260: /* Integer division x / y:
! 1261: * if z = ONLY_REM return remainder, otherwise return quotient
! 1262: * if z != NULL set *z to remainder
! 1263: * *z is the last object on stack (and thus can be disposed of with cgiv
! 1264: * instead of gerepile)
! 1265: * If *z is zero, we put gzero here and no copy.
! 1266: * space needed: lx + ly
! 1267: */
! 1268: GEN
! 1269: dvmdii(GEN x, GEN y, GEN *z)
! 1270: {
! 1271: long sx=signe(x),sy=signe(y);
! 1272: long av,lx,ly,lz,i,j,sh,k,lq,lr;
! 1273: ulong si,qp,saux, *xd,*rd,*qd;
! 1274: GEN r,q,x1;
! 1275:
! 1276: if (!sy) err(dvmer1);
! 1277: if (!sx)
! 1278: {
! 1279: if (!z || z == ONLY_REM) return gzero;
! 1280: *z=gzero; return gzero;
! 1281: }
! 1282: lx=lgefint(x);
! 1283: ly=lgefint(y); lz=lx-ly;
! 1284: if (lz <= 0)
! 1285: {
! 1286: if (lz == 0)
! 1287: {
! 1288: for (i=2; i<lx; i++)
! 1289: if (x[i] != y[i])
! 1290: {
! 1291: if ((ulong)x[i] > (ulong)y[i]) goto DIVIDE;
! 1292: goto TRIVIAL;
! 1293: }
! 1294: if (z == ONLY_REM) return gzero;
! 1295: if (z) *z = gzero;
! 1296: if (sx < 0) sy = -sy;
! 1297: return stoi(sy);
! 1298: }
! 1299: TRIVIAL:
! 1300: if (z == ONLY_REM) return icopy(x);
! 1301: if (z) *z = icopy(x);
! 1302: return gzero;
! 1303: }
! 1304: DIVIDE: /* quotient is non-zero */
! 1305: av=avma; if (sx<0) sy = -sy;
! 1306: if (ly==3)
! 1307: {
! 1308: LOCAL_HIREMAINDER;
! 1309: si = y[2];
! 1310: if (si <= (ulong)x[2]) hiremainder=0;
! 1311: else
! 1312: {
! 1313: hiremainder = x[2]; lx--; x++;
! 1314: }
! 1315: q = new_chunk(lx); for (i=2; i<lx; i++) q[i]=divll(x[i],si);
! 1316: if (z == ONLY_REM)
! 1317: {
! 1318: avma=av; if (!hiremainder) return gzero;
! 1319: r=cgeti(3);
! 1320: r[1] = evalsigne(sx) | evallgefint(3);
! 1321: r[2]=hiremainder; return r;
! 1322: }
! 1323: q[1] = evalsigne(sy) | evallgefint(lx);
! 1324: q[0] = evaltyp(t_INT) | evallg(lx);
! 1325: if (!z) return q;
! 1326: if (!hiremainder) { *z=gzero; return q; }
! 1327: r=cgeti(3);
! 1328: r[1] = evalsigne(sx) | evallgefint(3);
! 1329: r[2] = hiremainder; *z=r; return q;
! 1330: }
! 1331:
! 1332: x1 = new_chunk(lx); sh = bfffo(y[2]);
! 1333: if (sh)
! 1334: { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
! 1335: register const ulong m = BITS_IN_LONG - sh;
! 1336: r = new_chunk(ly);
! 1337: shift_left2(r, y,2,ly-1, 0,sh,m); y = r;
! 1338: shift_left2(x1,x,2,lx-1, 0,sh,m);
! 1339: x1[1] = ((ulong)x[2]) >> m;
! 1340: }
! 1341: else
! 1342: {
! 1343: x1[1]=0; for (j=2; j<lx; j++) x1[j]=x[j];
! 1344: }
! 1345: x=x1; si=y[2]; saux=y[3];
! 1346: for (i=0; i<=lz; i++)
! 1347: { /* x1 = x + i */
! 1348: LOCAL_HIREMAINDER;
! 1349: LOCAL_OVERFLOW;
! 1350: if ((ulong)x1[1] == si)
! 1351: {
! 1352: qp = MAXULONG; k=addll(si,x1[2]);
! 1353: }
! 1354: else
! 1355: {
! 1356: hiremainder=x1[1]; overflow=0;
! 1357: qp=divll(x1[2],si); k=hiremainder;
! 1358: }
! 1359: if (!overflow)
! 1360: {
! 1361: long k3 = subll(mulll(qp,saux), x1[3]);
! 1362: long k4 = subllx(hiremainder,k);
! 1363: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
! 1364: }
! 1365: hiremainder=0;
! 1366: for (j=ly-1; j>1; j--)
! 1367: {
! 1368: x1[j] = subll(x1[j], addmul(qp,y[j]));
! 1369: hiremainder+=overflow;
! 1370: }
! 1371: if ((ulong)x1[1] < hiremainder)
! 1372: {
! 1373: overflow=0; qp--;
! 1374: for (j=ly-1; j>1; j--) x1[j] = addllx(x1[j],y[j]);
! 1375: }
! 1376: x1[1]=qp; x1++;
! 1377: }
! 1378:
! 1379: lq = lz+2;
! 1380: if (!z)
! 1381: {
! 1382: qd = (ulong*)av;
! 1383: xd = (ulong*)(x + lq);
! 1384: if (x[1]) { lz++; lq++; }
! 1385: while (lz--) *--qd = *--xd;
! 1386: *--qd = evalsigne(sy) | evallgefint(lq);
! 1387: *--qd = evaltyp(t_INT) | evallg(lq);
! 1388: avma = (long)qd; return (GEN)qd;
! 1389: }
! 1390:
! 1391: j=lq; while (j<lx && !x[j]) j++;
! 1392: if (z == ONLY_REM)
! 1393: {
! 1394: lz = lx-j; if (lz==0) { avma = av; return gzero; }
! 1395: rd = (ulong*)av; lr = lz+2;
! 1396: xd = (ulong*)(x + lx);
! 1397: if (!sh) while (lz--) *--rd = *--xd;
! 1398: else
! 1399: { /* shift remainder right by sh bits */
! 1400: const ulong shl = BITS_IN_LONG - sh;
! 1401: ulong l;
! 1402: xd--;
! 1403: while (--lz) /* fill r[3..] */
! 1404: {
! 1405: l = *xd >> sh;
! 1406: *--rd = l | (*--xd << shl);
! 1407: }
! 1408: l = *xd >> sh;
! 1409: if (l) *--rd = l; else lr--;
! 1410: }
! 1411: *--rd = evalsigne(sx) | evallgefint(lr);
! 1412: *--rd = evaltyp(t_INT) | evallg(lr);
! 1413: avma = (long)rd; return (GEN)rd;
! 1414: }
! 1415:
! 1416: lz = lx-j; lr = lz+2;
! 1417: if (lz)
! 1418: {
! 1419: xd = (ulong*)(x + lx);
! 1420: if (!sh)
! 1421: {
! 1422: rd = (ulong*)avma; r = new_chunk(lr);
! 1423: while (lz--) *--rd = *--xd;
! 1424: }
! 1425: else
! 1426: { /* shift remainder right by sh bits */
! 1427: const ulong shl = BITS_IN_LONG - sh;
! 1428: ulong l;
! 1429: rd = (ulong*)x; /* overwrite shifted y */
! 1430: xd--;
! 1431: while (--lz)
! 1432: {
! 1433: l = *xd >> sh;
! 1434: *--rd = l | (*--xd << shl);
! 1435: }
! 1436: l = *xd >> sh;
! 1437: if (l) *--rd = l; else lr--;
! 1438: }
! 1439: *--rd = evalsigne(sx) | evallgefint(lr);
! 1440: *--rd = evaltyp(t_INT) | evallg(lr);
! 1441: rd += lr;
! 1442: }
! 1443: qd = (ulong*)av;
! 1444: xd = (ulong*)(x + lq);
! 1445: if (x[1]) lq++;
! 1446: j = lq-2; while (j--) *--qd = *--xd;
! 1447: *--qd = evalsigne(sy) | evallgefint(lq);
! 1448: *--qd = evaltyp(t_INT) | evallg(lq);
! 1449: q = (GEN)qd;
! 1450: if (lr==2) *z = gzero;
! 1451: else
! 1452: {
! 1453: while (lr--) *--qd = *--rd;
! 1454: *z = (GEN)qd;
! 1455: }
! 1456: avma = (long)qd; return q;
! 1457: }
! 1458:
! 1459: /* assume y > x > 0. return y mod x */
! 1460: ulong
! 1461: mppgcd_resiu(GEN y, ulong x)
! 1462: {
! 1463: long i, ly = lgefint(y);
! 1464: LOCAL_HIREMAINDER;
! 1465:
! 1466: hiremainder = 0;
! 1467: for (i=2; i<ly; i++) (void)divll(y[i],x);
! 1468: return hiremainder;
! 1469: }
! 1470:
! 1471: /* Assume x>y>0, both of them odd. return x-y if x=y mod 4, x+y otherwise */
! 1472: void
! 1473: mppgcd_plus_minus(GEN x, GEN y, GEN res)
! 1474: {
! 1475: long av = avma;
! 1476: long lx = lgefint(x)-1;
! 1477: long ly = lgefint(y)-1, lt,m,i;
! 1478: GEN t;
! 1479:
! 1480: if ((x[lx]^y[ly]) & 3) /* x != y mod 4*/
! 1481: t = addiispec(x+2,y+2,lx-1,ly-1);
! 1482: else
! 1483: t = subiispec(x+2,y+2,lx-1,ly-1);
! 1484:
! 1485: lt = lgefint(t)-1; while (!t[lt]) lt--;
! 1486: m = vals(t[lt]); lt++;
! 1487: if (m == 0) /* 2^32 | t */
! 1488: {
! 1489: for (i = 2; i < lt; i++) res[i] = t[i];
! 1490: }
! 1491: else if (t[2] >> m)
! 1492: {
! 1493: shift_right(res,t, 2,lt, 0,m);
! 1494: }
! 1495: else
! 1496: {
! 1497: lt--; t++;
! 1498: shift_right(res,t, 2,lt, t[1],m);
! 1499: }
! 1500: res[1] = evalsigne(1)|evallgefint(lt);
! 1501: avma = av;
! 1502: }
! 1503:
! 1504: /* private version of mulss */
! 1505: ulong
! 1506: smulss(ulong x, ulong y, ulong *rem)
! 1507: {
! 1508: LOCAL_HIREMAINDER;
! 1509: x=mulll(x,y); *rem=hiremainder; return x;
! 1510: }
! 1511:
! 1512: #ifdef LONG_IS_64BIT
! 1513: # define DIVCONVI 7
! 1514: #else
! 1515: # define DIVCONVI 14
! 1516: #endif
! 1517:
! 1518: /* Conversion entier --> base 10^9. Assume x > 0 */
! 1519: GEN
! 1520: convi(GEN x)
! 1521: {
! 1522: ulong av=avma, lim;
! 1523: long lz;
! 1524: GEN z,p1;
! 1525:
! 1526: lz = 3 + ((lgefint(x)-2)*15)/DIVCONVI;
! 1527: z=new_chunk(lz); z[1] = -1; p1 = z+2;
! 1528: lim = stack_lim(av,1);
! 1529: for(;;)
! 1530: {
! 1531: x = divis(x,1000000000); *p1++ = hiremainder;
! 1532: if (!signe(x)) { avma=av; return p1; }
! 1533: if (low_stack(lim, stack_lim(av,1))) x = gerepileuptoint((long)z,x);
! 1534: }
! 1535: }
! 1536: #undef DIVCONVI
! 1537:
! 1538: /* Conversion partie fractionnaire --> base 10^9 (expo(x)<0) */
! 1539: GEN
! 1540: confrac(GEN x)
! 1541: {
! 1542: long lx=lg(x), ex = -expo(x)-1, d,m,ly,ey,lr,nbdec,i,j;
! 1543: GEN y,y1;
! 1544:
! 1545: ey = ex + bit_accuracy(lx);
! 1546: ly = 1 + ((ey-1) >> TWOPOTBITS_IN_LONG);
! 1547: d = ex >> TWOPOTBITS_IN_LONG;
! 1548: m = ex & (BITS_IN_LONG-1);
! 1549: y = new_chunk(ly); y1 = y + (d-2);
! 1550: while (d--) y[d]=0;
! 1551: if (!m)
! 1552: for (i=2; i<lx; i++) y1[i] = x[i];
! 1553: else
! 1554: { /* shift x left sh bits */
! 1555: const ulong sh=BITS_IN_LONG-m;
! 1556: ulong k=0, l;
! 1557: for (i=2; i<lx; i++)
! 1558: {
! 1559: l = x[i];
! 1560: y1[i] = (l>>m)|k;
! 1561: k = l<<sh;
! 1562: }
! 1563: y1[i] = k;
! 1564: }
! 1565: nbdec = (long) (ey*L2SL10)+1; lr=(nbdec+17)/9;
! 1566: y1=new_chunk(lr); *y1=nbdec;
! 1567: for (j=1; j<lr; j++)
! 1568: {
! 1569: LOCAL_HIREMAINDER;
! 1570: hiremainder=0;
! 1571: for (i=ly-1; i>=0; i--) y[i]=addmul(y[i],1000000000);
! 1572: y1[j]=hiremainder;
! 1573: }
! 1574: return y1;
! 1575: }
! 1576:
! 1577: GEN
! 1578: truedvmdii(GEN x, GEN y, GEN *z)
! 1579: {
! 1580: long av=avma,tetpil;
! 1581: GEN res, qu = dvmdii(x,y,&res);
! 1582: GEN *gptr[2];
! 1583:
! 1584: if (signe(res)>=0)
! 1585: {
! 1586: if (z == ONLY_REM) return gerepileuptoint(av,res);
! 1587: if (z) *z = res; else cgiv(res);
! 1588: return qu;
! 1589: }
! 1590:
! 1591: tetpil=avma;
! 1592: if (z == ONLY_REM)
! 1593: {
! 1594: res = subiispec(y+2,res+2, lgefint(y)-2,lgefint(res)-2);
! 1595: return gerepile(av,tetpil,res);
! 1596: }
! 1597: qu = addsi(-signe(y),qu);
! 1598: if (!z) return gerepile(av,tetpil,qu);
! 1599:
! 1600: *z = subiispec(y+2,res+2, lgefint(y)-2,lgefint(res)-2);
! 1601: gptr[0]=&qu; gptr[1]=z; gerepilemanysp(av,tetpil,gptr,2);
! 1602: return qu;
! 1603: }
! 1604:
! 1605: #if 0
! 1606: /* Exact integer division */
! 1607:
! 1608: static ulong
! 1609: invrev(ulong b)
! 1610: /* Find c such that 1=c*b mod B (where B = 2^32 or 2^64), assuming b odd,
! 1611: which is not checked */
! 1612: {
! 1613: int r;
! 1614: ulong x;
! 1615:
! 1616: r=b&7; x=(r==3 || r==5)? b+8: b; /* x=b^(-1) mod 2^4 */
! 1617: x=x*(2-b*x); x=x*(2-b*x); x=x*(2-b*x); /* x=b^(-1) mod 2^32 */
! 1618: #ifdef LONG_IS_64BIT
! 1619: x=x*(2-b*x); /* x=b^(-1) mod 2^64 */
! 1620: #endif
! 1621: return x;
! 1622: }
! 1623:
! 1624: #define divllrev(a,b) (((ulong)a)*invrev(b))
! 1625:
! 1626: /* 2-adic division */
! 1627: GEN
! 1628: diviirev(GEN x, GEN y, long a)
! 1629: /* Find z such that |x|=|y|*z mod B^a, where a<=lgefint(x)-2 */
! 1630: {
! 1631: long lx,lgx,ly,vy,av=avma,tetpil,i,j,ii;
! 1632: ulong binv,q;
! 1633: GEN z;
! 1634:
! 1635: if (!signe(y)) err(dvmer1);
! 1636: if (!signe(x)) return gzero;
! 1637: /* make y odd */
! 1638: vy=vali(y);
! 1639: if (vy)
! 1640: {
! 1641: if (vali(x)<vy) err(talker,"impossible division in diviirev");
! 1642: y=shifti(y,-vy); x=shifti(x,-vy); a-=(vy>>TWOPOTBITS_IN_LONG);
! 1643: }
! 1644: else x=icopy(x); /* necessary because we destroy x */
! 1645: /* improve the above by touching only a words */
! 1646: if (a<=0) {avma=a;return gzero;}
! 1647: /* now y is odd */
! 1648: lx=a+2; ly=lgefint(y); lgx=lgefint(x);
! 1649: if (lx>lgx) err(talker,"3rd parameter too large in diviirev");
! 1650: binv=invrev(y[ly-1]);
! 1651: z=cgeti(lx);
! 1652: for (ii=lgx-1,i=lx-1; i>=2; i--,ii--)
! 1653: {
! 1654: long limj;
! 1655: LOCAL_HIREMAINDER;
! 1656: LOCAL_OVERFLOW;
! 1657:
! 1658: z[i]=q=binv*((ulong)x[ii]); /* this is the i-th quotient */
! 1659: limj=max(lgx-a,3+ii-ly);
! 1660: mulll(q,y[ly-1]); overflow=0;
! 1661: for (j=ii-1; j>=limj; j--)
! 1662: x[j]=subllx(x[j],addmul(q,y[j+ly-ii-1]));
! 1663: }
! 1664: tetpil=avma; i=2; while((i<lx)&&(!z[i])) i++;
! 1665: if (i==lx) {avma=av; return gzero;}
! 1666: y=cgeti(lx-i+2); y[1]=evalsigne(1)|evallgefint(lx-i+2); j=2;
! 1667: for ( ; i<lx; i++) y[j++]=z[i];
! 1668: return gerepile(av,tetpil,y);
! 1669: }
! 1670:
! 1671: GEN
! 1672: diviiexactfullrev(GEN x, GEN y)
! 1673: /* Find z such that x=y*z knowing that y divides x */
! 1674: {
! 1675: long lx,lz,ly,vy,av=avma,tetpil,i,j,ii,sx=signe(x),sy=signe(y);
! 1676: ulong binv,q;
! 1677: GEN z;
! 1678:
! 1679: if (!sy) err(dvmer1);
! 1680: if (!sx) return gzero;
! 1681: /* make y odd */
! 1682: vy=vali(y);
! 1683: if (vy)
! 1684: {
! 1685: if (vali(x)<vy) err(talker,"impossible division in diviirev");
! 1686: y=shifti(y,-vy); x=shifti(x,-vy);
! 1687: }
! 1688: else x=icopy(x); /* necessary because we destroy x */
! 1689: /* now y is odd */
! 1690: ly=lgefint(y); lx=lgefint(x);
! 1691: if (ly>lx) err(talker,"impossible division in diviirev");
! 1692: binv=invrev(y[ly-1]);
! 1693: i=2; while (i<ly && y[i]==x[i]) i++;
! 1694: lz=(i==ly || y[i]<x[i]) ? lx-ly+3 : lx-ly+2;
! 1695: z=cgeti(lz);
! 1696: for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
! 1697: {
! 1698: long limj;
! 1699: LOCAL_HIREMAINDER;
! 1700: LOCAL_OVERFLOW;
! 1701:
! 1702: z[i]=q=binv*((ulong)x[ii]); /* this is the i-th quotient */
! 1703: limj=max(lx-lz+2,3+ii-ly);
! 1704: mulll(q,y[ly-1]); overflow=0;
! 1705: for (j=ii-1; j>=limj; j--)
! 1706: x[j]=subllx(x[j],addmul(q,y[j+ly-ii-1]));
! 1707: }
! 1708: tetpil=avma; i=2; while((i<lz)&&(!z[i])) i++;
! 1709: if (i==lz) err(talker,"bug in diviiexact");
! 1710: y=cgeti(lz-i+2); y[1]=evalsigne(sx*sy) | evallgefint(lz-i+2); j=2;
! 1711: for ( ; i<lz; i++) y[j++]=z[i];
! 1712: return gerepile(av,tetpil,y);
! 1713: }
! 1714:
! 1715: GEN
! 1716: diviiexact2(GEN x, GEN y)
! 1717: /* Find z such that x=y*z assuming y divides x (which is not checked) */
! 1718: {
! 1719: long sx=signe(x),sy=signe(y),av=avma,tetpil,lyinv,lpr,a,lx,ly,lz,lzs,lp1;
! 1720: long i,j,vy;
! 1721: ulong aux;
! 1722: GEN yinv,p1,z,xinit,yinit;
! 1723:
! 1724: if (!sy) err(dvmer1);
! 1725: if (!sx) return gzero;
! 1726: xinit=x; yinit=y;
! 1727: setsigne(y,1);setsigne(x,1);
! 1728: /* make y odd */
! 1729: vy=vali(y);
! 1730: if (vy)
! 1731: {
! 1732: if (vali(x)<vy) err(talker,"impossible division in diviirev");
! 1733: y=shifti(y,-vy); x=shifti(x,-vy);
! 1734: }
! 1735: /* now y is odd */
! 1736: ly=lgefint(y); lx=lgefint(x);
! 1737: if (lx<ly) err(talker,"not an exact division in diviiexact");
! 1738: a=lx-ly+1; lz=a+2;
! 1739: aux=invrev(y[ly-1]);
! 1740: if (aux & HIGHBIT) {yinv=stoi(aux^HIGHBIT);yinv[2]|=HIGHBIT;}
! 1741: else yinv=stoi(aux); /* inverse of y mod 2^32 (or 2^64) */
! 1742: lpr=1; /* current accuracy */
! 1743: while(lpr<a)
! 1744: {
! 1745: long lycut;
! 1746: GEN ycut;
! 1747:
! 1748: lyinv=lgefint(yinv);
! 1749: lycut=min(2*lpr+2,ly);
! 1750: ycut=cgeti(lycut); ycut[1]=evalsigne(1) | evallgefint(lycut);
! 1751: for(i=2; i<lycut; i++) ycut[i]=y[ly+i-lycut];
! 1752: p1=mulii(yinv,ycut); lp1=lgefint(p1);
! 1753: if (lp1>lpr+2)
! 1754: {
! 1755: long lp1cut,lynew,lp2;
! 1756: GEN p1cut,p2,ynew;
! 1757: LOCAL_OVERFLOW;
! 1758:
! 1759: lp1cut=min(lp1-lpr,lpr+2);
! 1760: p1cut=cgeti(lp1cut); p1cut[1]=evalsigne(1) | evallgefint(lp1cut);
! 1761: overflow=0;
! 1762: for (i=lp1cut-1; i>=2; i--) p1cut[i]=subllx(0,p1[i+lp1-lpr-lp1cut]);
! 1763: p2=mulii(p1cut,yinv); lp2=lgefint(p2);
! 1764: lynew=(lp2<=lpr+2) ? lpr+lp2 : 2*lpr+2;
! 1765: ynew=cgeti(lynew); ynew[1]=evalsigne(1) | evallgefint(lynew);
! 1766: for (i=lynew-1; i>=lynew-lpr; i--) ynew[i]=yinv[i+lyinv-lynew];
! 1767: for (i=lynew-lpr-1; i>=2; i--) ynew[i]=p2[i+lp2+lpr-lynew];
! 1768: yinv=ynew;
! 1769: }
! 1770: lpr<<=1;
! 1771: }
! 1772: lyinv=lgefint(yinv); lzs=min(lz,lyinv);
! 1773: z=cgeti(lzs); z[1]=evalsigne(1) | evallgefint(lzs);
! 1774: for(i=2; i<lzs; i++) z[i]=yinv[i+lyinv-lzs];
! 1775: p1=mulii(z,x); lp1=lgefint(p1); lzs=min(lz,lp1);
! 1776: z=cgeti(lzs);
! 1777: for (i=2; i<lzs; i++) z[i]=p1[i+lp1-lzs];
! 1778: i=2; while (i<lzs && !z[i]) i++;
! 1779: if (i==lzs) err(talker,"bug in diviiexact");
! 1780: tetpil=avma; p1=cgeti(lzs-i+2);
! 1781: p1[1]=evalsigne(sx*sy) | evallgefint(lzs-i+2);
! 1782: for (j=2; j<=lzs-i+1; j++) p1[j]=z[j+i-2];
! 1783: setsigne(xinit,sx);setsigne(yinit,sy);
! 1784: return gerepile(av,tetpil,p1);
! 1785: }
! 1786: #endif
! 1787:
! 1788: long
! 1789: smodsi(long x, GEN y)
! 1790: {
! 1791: if (x<0) err(talker,"negative small integer in smodsi");
! 1792: (void)divsi(x,y); return hiremainder;
! 1793: }
! 1794:
! 1795: /* x and y are integers. Return 1 if |x| == |y|, 0 otherwise */
! 1796: int
! 1797: absi_equal(GEN x, GEN y)
! 1798: {
! 1799: long lx,i;
! 1800:
! 1801: if (!signe(x)) return !signe(y);
! 1802: if (!signe(y)) return 0;
! 1803:
! 1804: lx=lgefint(x); if (lx != lgefint(y)) return 0;
! 1805: i=2; while (i<lx && x[i]==y[i]) i++;
! 1806: return (i==lx);
! 1807: }
! 1808:
! 1809: /* x and y are integers. Return sign(|x| - |y|) */
! 1810: int
! 1811: absi_cmp(GEN x, GEN y)
! 1812: {
! 1813: long lx,ly,i;
! 1814:
! 1815: if (!signe(x)) return signe(y)? -1: 0;
! 1816: if (!signe(y)) return 1;
! 1817:
! 1818: lx=lgefint(x); ly=lgefint(y);
! 1819: if (lx>ly) return 1;
! 1820: if (lx<ly) return -1;
! 1821: i=2; while (i<lx && x[i]==y[i]) i++;
! 1822: if (i==lx) return 0;
! 1823: return ((ulong)x[i] > (ulong)y[i])? 1: -1;
! 1824: }
! 1825:
! 1826: /* x and y are reals. Return sign(|x| - |y|) */
! 1827: int
! 1828: absr_cmp(GEN x, GEN y)
! 1829: {
! 1830: long ex,ey,lx,ly,lz,i;
! 1831:
! 1832: if (!signe(x)) return signe(y)? -1: 0;
! 1833: if (!signe(y)) return 1;
! 1834:
! 1835: ex=expo(x); ey=expo(y);
! 1836: if (ex>ey) return 1;
! 1837: if (ex<ey) return -1;
! 1838:
! 1839: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
! 1840: i=2; while (i<lz && x[i]==y[i]) i++;
! 1841: if (i<lz) return ((ulong)x[i] > (ulong)y[i])? 1: -1;
! 1842: if (lx>=ly)
! 1843: {
! 1844: while (i<lx && !x[i]) i++;
! 1845: return (i==lx)? 0: 1;
! 1846: }
! 1847: while (i<ly && !y[i]) i++;
! 1848: return (i==ly)? 0: -1;
! 1849: }
! 1850:
! 1851: /********************************************************************/
! 1852: /** **/
! 1853: /** INTEGER MULTIPLICATION (KARATSUBA) **/
! 1854: /** **/
! 1855: /********************************************************************/
! 1856:
! 1857: #if 0 /* for tunings */
! 1858: long SQRI_LIMIT = 12;
! 1859: long MULII_LIMIT = 16;
! 1860:
! 1861: void
! 1862: setsqi(long a) { SQRI_LIMIT=a; }
! 1863: void
! 1864: setmuli(long a) { MULII_LIMIT=a; }
! 1865:
! 1866: GEN
! 1867: speci(GEN x, long nx)
! 1868: {
! 1869: GEN z = cgeti(nx+2);
! 1870: long i;
! 1871: for (i=0; i<nx; i++) z[i+2] = x[i];
! 1872: z[1]=evalsigne(1)|evallgefint(nx+2);
! 1873: return z;
! 1874: }
! 1875: #else
! 1876: # define SQRI_LIMIT 12
! 1877: # define MULII_LIMIT 16
! 1878: #endif
! 1879:
! 1880: /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
! 1881: #ifdef INLINE
! 1882: INLINE
! 1883: #endif
! 1884: GEN
! 1885: muliispec(GEN x, GEN y, long nx, long ny)
! 1886: {
! 1887: GEN z2e,z2d,yd,xd,ye,zd;
! 1888: long p1,lz;
! 1889: LOCAL_HIREMAINDER;
! 1890:
! 1891: if (!ny) return gzero;
! 1892: zd = (GEN)avma; lz = nx+ny+2;
! 1893: (void)new_chunk(lz);
! 1894: xd = x + nx;
! 1895: yd = y + ny;
! 1896: ye = yd; p1 = *--xd;
! 1897:
! 1898: *--zd = mulll(p1, *--yd); z2e = zd;
! 1899: while (yd > y) *--zd = addmul(p1, *--yd);
! 1900: *--zd = hiremainder;
! 1901:
! 1902: while (xd > x)
! 1903: {
! 1904: LOCAL_OVERFLOW;
! 1905: yd = ye; p1 = *--xd;
! 1906:
! 1907: z2d = --z2e;
! 1908: *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
! 1909: while (yd > y)
! 1910: {
! 1911: hiremainder += overflow;
! 1912: *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
! 1913: }
! 1914: *--zd = hiremainder + overflow;
! 1915: }
! 1916: if (*zd == 0) { zd++; lz--; } /* normalize */
! 1917: *--zd = evalsigne(1) | evallgefint(lz);
! 1918: *--zd = evaltyp(t_INT) | evallg(lz);
! 1919: avma=(long)zd; return zd;
! 1920: }
! 1921:
! 1922: /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
! 1923: static GEN
! 1924: addshiftw(GEN x, GEN y, long d)
! 1925: {
! 1926: GEN z,z0,y0,yd, zd = (GEN)avma;
! 1927: long a,lz,ly = lgefint(y);
! 1928:
! 1929: z0 = new_chunk(d);
! 1930: a = ly-2; yd = y+ly;
! 1931: if (a >= d)
! 1932: {
! 1933: y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
! 1934: a -= d;
! 1935: if (a)
! 1936: z = addiispec(x+2, y+2, lgefint(x)-2, a);
! 1937: else
! 1938: z = icopy(x);
! 1939: }
! 1940: else
! 1941: {
! 1942: y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
! 1943: while (zd >= z0) *--zd = 0; /* complete with 0s */
! 1944: z = icopy(x);
! 1945: }
! 1946: lz = lgefint(z)+d;
! 1947: z[1] = evalsigne(1) | evallgefint(lz);
! 1948: z[0] = evaltyp(t_INT) | evallg(lz); return z;
! 1949: }
! 1950:
! 1951: /* Fast product (Karatsuba) of integers a,b. These are not real GENs, a+2
! 1952: * and b+2 were sent instead. na, nb = number of digits of a, b.
! 1953: * c,c0,c1,c2 are genuine GENs.
! 1954: */
! 1955: static GEN
! 1956: quickmulii(GEN a, GEN b, long na, long nb)
! 1957: {
! 1958: GEN a0,c,c0;
! 1959: long av,n0,n0a,i;
! 1960:
! 1961: if (na < nb) swapspec(a,b, na,nb);
! 1962: if (nb == 1) return mulsispec(*b, a, na);
! 1963: if (nb == 0) return gzero;
! 1964: if (nb < MULII_LIMIT) return muliispec(a,b,na,nb);
! 1965: i=(na>>1); n0=na-i; na=i;
! 1966: av=avma; a0=a+na; n0a=n0;
! 1967: while (!*a0 && n0a) { a0++; n0a--; }
! 1968:
! 1969: if (n0a && nb > n0)
! 1970: { /* nb <= na <= n0 */
! 1971: GEN b0,c1,c2;
! 1972: long n0b;
! 1973:
! 1974: nb -= n0;
! 1975: c = quickmulii(a,b,na,nb);
! 1976: b0 = b+nb; n0b = n0;
! 1977: while (!*b0 && n0b) { b0++; n0b--; }
! 1978: if (n0b)
! 1979: {
! 1980: c0 = quickmulii(a0,b0, n0a,n0b);
! 1981:
! 1982: c2 = addiispec(a0,a, n0a,na);
! 1983: c1 = addiispec(b0,b, n0b,nb);
! 1984: c1 = quickmulii(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
! 1985: c2 = addiispec(c0+2, c+2, lgefint(c0)-2,lgefint(c) -2);
! 1986:
! 1987: c1 = subiispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
! 1988: }
! 1989: else
! 1990: {
! 1991: c0 = gzero;
! 1992: c1 = quickmulii(a0,b, n0a,nb);
! 1993: }
! 1994: c = addshiftw(c,c1, n0);
! 1995: }
! 1996: else
! 1997: {
! 1998: c = quickmulii(a,b,na,nb);
! 1999: c0 = quickmulii(a0,b,n0a,nb);
! 2000: }
! 2001: return gerepileuptoint(av, addshiftw(c,c0, n0));
! 2002: }
! 2003:
! 2004: /* actual operations will take place on a+2 and b+2: we strip the codewords */
! 2005: GEN
! 2006: mulii(GEN a,GEN b)
! 2007: {
! 2008: long sa,sb;
! 2009: GEN z;
! 2010:
! 2011: sa=signe(a); if (!sa) return gzero;
! 2012: sb=signe(b); if (!sb) return gzero;
! 2013: if (sb<0) sa = -sa;
! 2014: z = quickmulii(a+2,b+2, lgefint(a)-2,lgefint(b)-2);
! 2015: setsigne(z,sa); return z;
! 2016: }
! 2017:
! 2018: static GEN
! 2019: quicksqri(GEN a, long na)
! 2020: {
! 2021: GEN a0,c,c0,c1;
! 2022: long av,n0,n0a,i;
! 2023:
! 2024: if (na<SQRI_LIMIT) return muliispec(a,a,na,na);
! 2025: i=(na>>1); n0=na-i; na=i;
! 2026: av=avma; a0=a+na; n0a=n0;
! 2027: while (!*a0 && n0a) { a0++; n0a--; }
! 2028: c = quicksqri(a,na);
! 2029: if (n0a)
! 2030: {
! 2031: c0 = quicksqri(a0,n0a);
! 2032: c1 = shifti(quickmulii(a0,a, n0a,na),1);
! 2033: c = addshiftw(c,c1, n0);
! 2034: c = addshiftw(c,c0, n0);
! 2035: }
! 2036: else
! 2037: c = addshiftw(c,gzero,n0<<1);
! 2038: return gerepileuptoint(av, c);
! 2039: }
! 2040:
! 2041: GEN
! 2042: sqri(GEN a) { return quicksqri(a+2, lgefint(a)-2); }
! 2043:
! 2044: #define MULRR_LIMIT 32
! 2045: #define MULRR2_LIMIT 32
! 2046:
! 2047: #if 0
! 2048: GEN
! 2049: karamulrr1(GEN x, GEN y)
! 2050: {
! 2051: long sx,sy;
! 2052: long i,i1,i2,lx=lg(x),ly=lg(y),e,flag,garde;
! 2053: long lz2,lz3,lz4;
! 2054: GEN z,lo1,lo2,hi;
! 2055:
! 2056: /* ensure that lg(y) >= lg(x) */
! 2057: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
! 2058: if (lx < MULRR_LIMIT) return mulrr(x,y);
! 2059: sx=signe(x); sy=signe(y);
! 2060: e = evalexpo(expo(x)+expo(y)); if (e & ~EXPOBITS) err(muler4);
! 2061: if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; }
! 2062: if (sy<0) sx = -sx;
! 2063: ly=lx+flag; z=cgetr(lx);
! 2064: lz2 = (lx>>1); lz3 = lx-lz2;
! 2065: x += 2; lx -= 2;
! 2066: y += 2; ly -= 2;
! 2067: hi = quickmulii(x,y,lz2,lz2);
! 2068: i1=lz2; while (i1<lx && !x[i1]) i1++;
! 2069: lo1 = quickmulii(y,x+i1,lz2,lx-i1);
! 2070: i2=lz2; while (i2<ly && !y[i2]) i2++;
! 2071: lo2 = quickmulii(x,y+i2,lz2,ly-i2);
! 2072:
! 2073: if (signe(lo1))
! 2074: {
! 2075: if (flag) { lo2 = addshiftw(lo1,lo2,1); lz3++; } else lo2=addii(lo1,lo2);
! 2076: }
! 2077: lz4=lgefint(lo2)-lz3;
! 2078: if (lz4>0) hi = addiispec(hi+2,lo2+2, lgefint(hi)-2,lz4);
! 2079: if (hi[2] < 0)
! 2080: {
! 2081: e++; garde=hi[lx+2];
! 2082: for (i=lx+1; i>=2 ; i--) z[i]=hi[i];
! 2083: }
! 2084: else
! 2085: {
! 2086: garde = (hi[lx+2] << 1);
! 2087: shift_left(z,hi,2,lx+1, garde, 1);
! 2088: }
! 2089: z[1]=evalsigne(sx) | e;
! 2090: if (garde < 0)
! 2091: { /* round to nearest */
! 2092: i=lx+2; do z[--i]++; while (z[i]==0);
! 2093: if (i==1) z[2]=HIGHBIT;
! 2094: }
! 2095: avma=(long)z; return z;
! 2096: }
! 2097:
! 2098: GEN
! 2099: karamulrr2(GEN x, GEN y)
! 2100: {
! 2101: long sx,sy,i,lx=lg(x),ly=lg(y),e,flag,garde;
! 2102: GEN z,hi;
! 2103:
! 2104: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
! 2105: if (lx < MULRR2_LIMIT) return mulrr(x,y);
! 2106: ly=lx+flag; sx=signe(x); sy=signe(y);
! 2107: e = evalexpo(expo(x)+expo(y)); if (e & ~EXPOBITS) err(muler4);
! 2108: if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; }
! 2109: if (sy<0) sx = -sx;
! 2110: z=cgetr(lx);
! 2111: hi=quickmulii(y+2,x+2,ly-2,lx-2);
! 2112: if (hi[2] < 0)
! 2113: {
! 2114: e++; garde=hi[lx];
! 2115: for (i=2; i<lx ; i++) z[i]=hi[i];
! 2116: }
! 2117: else
! 2118: {
! 2119: garde = (hi[lx] << 1);
! 2120: shift_left(z,hi,2,lx-1, garde, 1);
! 2121: }
! 2122: z[1]=evalsigne(sx) | e;
! 2123: if (garde < 0)
! 2124: { /* round to nearest */
! 2125: i=lx; do z[--i]++; while (z[i]==0);
! 2126: if (i==1) z[2]=HIGHBIT;
! 2127: }
! 2128: avma=(long)z; return z;
! 2129: }
! 2130:
! 2131: GEN
! 2132: karamulrr(GEN x, GEN y, long flag)
! 2133: {
! 2134: switch(flag)
! 2135: {
! 2136: case 1: return karamulrr1(x,y);
! 2137: case 2: return karamulrr2(x,y);
! 2138: default: err(flagerr,"karamulrr");
! 2139: }
! 2140: return NULL; /* not reached */
! 2141: }
! 2142:
! 2143: GEN
! 2144: karamulir(GEN x, GEN y, long flag)
! 2145: {
! 2146: long sx=signe(x),lz,i;
! 2147: GEN z,temp,z1;
! 2148:
! 2149: if (!sx) return gzero;
! 2150: lz=lg(y); z=cgetr(lz);
! 2151: temp=cgetr(lz+1); affir(x,temp);
! 2152: z1=karamulrr(temp,y,flag);
! 2153: for (i=1; i<lz; i++) z[i]=z1[i];
! 2154: avma=(long)z; return z;
! 2155: }
! 2156: #endif
! 2157:
! 2158: #ifdef LONG_IS_64BIT
! 2159:
! 2160: #if PARI_BYTE_ORDER == LITTLE_ENDIAN_64 || PARI_BYTE_ORDER == BIG_ENDIAN_64
! 2161: #else
! 2162: error... unknown machine
! 2163: #endif
! 2164:
! 2165: GEN
! 2166: dbltor(double x)
! 2167: {
! 2168: GEN z = cgetr(3);
! 2169: long e;
! 2170: union { double f; ulong i; } fi;
! 2171: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2172: const int exp_mid = 0x3ff;/* exponent bias */
! 2173: const int expo_len = 11; /* number of bits of exponent */
! 2174: LOCAL_HIREMAINDER;
! 2175:
! 2176: if (x==0) { z[1]=evalexpo(-308); z[2]=0; return z; }
! 2177: fi.f = x;
! 2178: e = evalexpo(((fi.i & (HIGHBIT-1)) >> mant_len) - exp_mid);
! 2179: z[1] = e | evalsigne(x<0? -1: 1);
! 2180: z[2] = (fi.i << expo_len) | HIGHBIT;
! 2181: return z;
! 2182: }
! 2183:
! 2184: double
! 2185: rtodbl(GEN x)
! 2186: {
! 2187: long ex,s=signe(x);
! 2188: ulong a;
! 2189: union { double f; ulong i; } fi;
! 2190: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2191: const int exp_mid = 0x3ff;/* exponent bias */
! 2192: const int expo_len = 11; /* number of bits of exponent */
! 2193: LOCAL_HIREMAINDER;
! 2194:
! 2195: if (typ(x)==t_INT && !s) return 0.0;
! 2196: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
! 2197: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
! 2198:
! 2199: /* start by rounding to closest */
! 2200: a = (x[2] & (HIGHBIT-1)) + 0x400;
! 2201: if (a & HIGHBIT) { ex++; a=0; }
! 2202: if (ex >= exp_mid) err(rtodber);
! 2203: fi.i = ((ex + exp_mid) << mant_len) | (a >> expo_len);
! 2204: if (s<0) fi.i |= HIGHBIT;
! 2205: return fi.f;
! 2206: }
! 2207:
! 2208: #else
! 2209:
! 2210: #if PARI_BYTE_ORDER == LITTLE_ENDIAN
! 2211: # define INDEX0 1
! 2212: # define INDEX1 0
! 2213: #elif PARI_BYTE_ORDER == BIG_ENDIAN
! 2214: # define INDEX0 0
! 2215: # define INDEX1 1
! 2216: #else
! 2217: error... unknown machine
! 2218: #endif
! 2219:
! 2220: GEN
! 2221: dbltor(double x)
! 2222: {
! 2223: GEN z;
! 2224: long e;
! 2225: union { double f; ulong i[2]; } fi;
! 2226: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2227: const int exp_mid = 0x3ff;/* exponent bias */
! 2228: const int shift = mant_len-32;
! 2229: const int expo_len = 11; /* number of bits of exponent */
! 2230:
! 2231: if (x==0) { z=cgetr(3); z[1]=evalexpo(-308); z[2]=0; return z; }
! 2232: fi.f = x; z=cgetr(4);
! 2233: {
! 2234: const ulong a = fi.i[INDEX0];
! 2235: const ulong b = fi.i[INDEX1];
! 2236: e = evalexpo(((a & (HIGHBIT-1)) >> shift) - exp_mid);
! 2237: z[1] = e | evalsigne(x<0? -1: 1);
! 2238: z[3] = b << expo_len;
! 2239: z[2] = HIGHBIT | b >> (BITS_IN_LONG-expo_len) | (a << expo_len);
! 2240: }
! 2241: return z;
! 2242: }
! 2243:
! 2244: double
! 2245: rtodbl(GEN x)
! 2246: {
! 2247: long ex,s=signe(x),lx=lg(x);
! 2248: ulong a,b,k;
! 2249: union { double f; ulong i[2]; } fi;
! 2250: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2251: const int exp_mid = 0x3ff;/* exponent bias */
! 2252: const int shift = mant_len-32;
! 2253: const int expo_len = 11; /* number of bits of exponent */
! 2254:
! 2255: if (typ(x)==t_INT && !s) return 0.0;
! 2256: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
! 2257: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
! 2258:
! 2259: /* start by rounding to closest */
! 2260: a = x[2] & (HIGHBIT-1);
! 2261: if (lx > 3)
! 2262: {
! 2263: b = x[3] + 0x400UL; if (b < 0x400UL) a++;
! 2264: if (a & HIGHBIT) { ex++; a=0; }
! 2265: }
! 2266: else b = 0;
! 2267: if (ex > exp_mid) err(rtodber);
! 2268: ex += exp_mid;
! 2269: k = (a >> expo_len) | (ex << shift);
! 2270: if (s<0) k |= HIGHBIT;
! 2271: fi.i[INDEX0] = k;
! 2272: fi.i[INDEX1] = (a << (BITS_IN_LONG-expo_len)) | (b >> expo_len);
! 2273: return fi.f;
! 2274: }
! 2275: #endif
! 2276:
! 2277: /* Old cgiv without reference count (which was not used anyway)
! 2278: * Should be a macro.
! 2279: */
! 2280: void
! 2281: cgiv(GEN x)
! 2282: {
! 2283: if (x == (GEN) avma)
! 2284: avma = (long) (x+lg(x));
! 2285: }
! 2286:
! 2287: /********************************************************************/
! 2288: /** **/
! 2289: /** INTEGER EXTENDED GCD (AND INVMOD) **/
! 2290: /** **/
! 2291: /********************************************************************/
! 2292:
! 2293: /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
! 2294: * in the context of trying to improve elliptic curve cryptosystem attacking
! 2295: * algorithms.
! 2296: *
! 2297: * Two basic ideas - (1) avoid many integer divisions, especially when the
! 2298: * quotient is 1 (which happens more than 40% of the time). (2) Use Lehmer's
! 2299: * trick as modified by Jebelean of extracting a couple of words' worth of
! 2300: * leading bits from both operands, and compute partial quotients from them
! 2301: * as long as we can be sure of their values. The Jebelean modifications
! 2302: * consist in reliable inequalities from which we can decide fast whether
! 2303: * to carry on or to return to the outer loop, and in re-shifting after the
! 2304: * first word's worth of bits has been used up. All of this is described
! 2305: * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
! 2306: * quite right (the catch-up divisions needed when one partial quotient is
! 2307: * larger than a word are missing).
! 2308: *
! 2309: * The API is mpxgcd() below; the single-word routines xgcduu and xxgcduu
! 2310: * may be called directly if desired; lgcdii() probably doesn't make much
! 2311: * sense out of context.
! 2312: *
! 2313: * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
! 2314: * ptotically about a factor 2.5 .. 3, depending on processor architecture,
! 2315: * than the naive continued-division code. Unfortunately, thanks to the
! 2316: * unrolled loops and all, the code is a bit lengthy.
! 2317: */
! 2318:
! 2319: /*==================================
! 2320: * xgcduu(d,d1,f,v,v1,s)
! 2321: * xxgcduu(d,d1,f,u,u1,v,v1,s)
! 2322: *==================================*/
! 2323: /*
! 2324: * Fast `final' extended gcd algorithm, acting on two ulongs. Ideally this
! 2325: * should be replaced with assembler versions wherever possible. The present
! 2326: * code essentially does `subtract, compare, and possibly divide' at each step,
! 2327: * which is reasonable when hardware division (a) exists, (b) is a bit slowish
! 2328: * and (c) does not depend a lot on the operand values (as on i486). When
! 2329: * wordsize division is in fact an assembler routine based on subtraction,
! 2330: * this strategy may not be the most efficient one.
! 2331: *
! 2332: * xxgcduu() should be called with d > d1 > 0, returns gcd(d,d1), and assigns
! 2333: * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1] (i.e.,
! 2334: * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
! 2335: * the quotient of the first division step), and the information about the
! 2336: * implied signs to s (-1 when an odd number of divisions has been done,
! 2337: * 1 otherwise). xgcduu() is exactly the same except that u,u1 are not com-
! 2338: * puted (and not returned, of course).
! 2339: *
! 2340: * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
! 2341: * (so we can stop the chain division one step early: as soon as the remainder
! 2342: * equals 1). Use this when you intend to use only what would be v, or only
! 2343: * what would be u and v, after that final division step, but not u1 and v1.
! 2344: * With the flag in force and thus without that final step, the interesting
! 2345: * quantity/ies will still sit in [u1 and] v1, of course.
! 2346: *
! 2347: * For computing the inverse of a single-word INTMOD known to exist, pass f=1
! 2348: * to xgcduu(), and obtain the result from s and v1. (The routine does the
! 2349: * right thing when d1==1 already.) For finishing a multiword modinv known
! 2350: * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix (with
! 2351: * properly adjusted signs) onto the values v' and v1' previously obtained
! 2352: * from the multiword division steps. Actually, just take the scalar product
! 2353: * of [v',v1'] with [u1,-v1], and change the sign if s==-1. (If the final
! 2354: * step had been carried out, it would be [-u,v], and s would also change.)
! 2355: * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
! 2356: * Finally, f=0 with xxgcduu() is useful for Bezout computations.
! 2357: * [Harrumph. In the above prescription, the sign turns out to be precisely
! 2358: * wrong.]
! 2359: * (It is safe for inversemodulo() to call xgcduu() with f=1, because f&1
! 2360: * doesn't make a difference when gcd(d,d1)>1. The speedup is negligible.)
! 2361: *
! 2362: * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
! 2363: * recover the final u,u1 given only v,v1 and s. However, it probably isn't
! 2364: * worthwhile, as it trades a few multiplications for a division.
! 2365: *
! 2366: * Note that these routines do not know and do not need to know about the
! 2367: * PARI stack.
! 2368: */
! 2369:
! 2370: ulong
! 2371: xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
! 2372: {
! 2373: ulong xv,xv1, xs, q,res;
! 2374: LOCAL_HIREMAINDER;
! 2375:
! 2376: /* The above blurb contained a lie. The main loop always stops when d1
! 2377: * has become equal to 1. If (d1 == 1 && !(f&1)) after the loop, we do
! 2378: * the final `division' of d by 1 `by hand' as it were.
! 2379: *
! 2380: * The loop has already been unrolled once. Aggressive optimization could
! 2381: * well lead to a totally unrolled assembler version...
! 2382: *
! 2383: * On modern x86 architectures, this loop is a pig anyway. The division
! 2384: * instruction always puts its result into the same pair of registers, and
! 2385: * we always want to use one of them straight away, so pipeline performance
! 2386: * will suck big time. An assembler version should probably do a first loop
! 2387: * computing and storing all the quotients -- their number is bounded in
! 2388: * advance -- and then assembling the matrix in a second pass. On other
! 2389: * architectures where we can cycle through four or so groups of registers
! 2390: * and exploit a fast ALU result-to-operand feedback path, this is much less
! 2391: * of an issue. (Intel sucks. See http://www.x86.org/ ...)
! 2392: */
! 2393: xs = res = 0;
! 2394: xv = 0UL; xv1 = 1UL;
! 2395: while (d1 > 1UL)
! 2396: {
! 2397: d -= d1; /* no need to use subll */
! 2398: if (d >= d1)
! 2399: {
! 2400: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
! 2401: xv += q * xv1;
! 2402: }
! 2403: else
! 2404: xv += xv1;
! 2405: /* possible loop exit */
! 2406: if (d <= 1UL) { xs=1; break; }
! 2407: /* repeat with inverted roles */
! 2408: d1 -= d;
! 2409: if (d1 >= d)
! 2410: {
! 2411: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
! 2412: xv1 += q * xv;
! 2413: }
! 2414: else
! 2415: xv1 += xv;
! 2416: } /* while */
! 2417:
! 2418: if (!(f&1)) /* division by 1 postprocessing if needed */
! 2419: {
! 2420: if (xs && d==1)
! 2421: { xv1 += d1 * xv; xs = 0; res = 1UL; }
! 2422: else if (!xs && d1==1)
! 2423: { xv += d * xv1; xs = 1; res = 1UL; }
! 2424: }
! 2425:
! 2426: if (xs)
! 2427: {
! 2428: *s = -1; *v = xv1; *v1 = xv;
! 2429: return (res ? res : (d==1 ? 1UL : d1));
! 2430: }
! 2431: else
! 2432: {
! 2433: *s = 1; *v = xv; *v1 = xv1;
! 2434: return (res ? res : (d1==1 ? 1UL : d));
! 2435: }
! 2436: }
! 2437:
! 2438:
! 2439: ulong
! 2440: xxgcduu(ulong d, ulong d1, int f,
! 2441: ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
! 2442: {
! 2443: ulong xu,xu1, xv,xv1, xs, q,res;
! 2444: LOCAL_HIREMAINDER;
! 2445:
! 2446: xs = res = 0;
! 2447: xu = xv1 = 1UL;
! 2448: xu1 = xv = 0UL;
! 2449: while (d1 > 1UL)
! 2450: {
! 2451: d -= d1; /* no need to use subll */
! 2452: if (d >= d1)
! 2453: {
! 2454: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
! 2455: xv += q * xv1;
! 2456: xu += q * xu1;
! 2457: }
! 2458: else
! 2459: { xv += xv1; xu += xu1; }
! 2460: /* possible loop exit */
! 2461: if (d <= 1UL) { xs=1; break; }
! 2462: /* repeat with inverted roles */
! 2463: d1 -= d;
! 2464: if (d1 >= d)
! 2465: {
! 2466: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
! 2467: xv1 += q * xv;
! 2468: xu1 += q * xu;
! 2469: }
! 2470: else
! 2471: { xv1 += xv; xu1 += xu; }
! 2472: } /* while */
! 2473:
! 2474: if (!(f&1)) /* division by 1 postprocessing if needed */
! 2475: {
! 2476: if (xs && d==1)
! 2477: {
! 2478: xv1 += d1 * xv;
! 2479: xu1 += d1 * xu;
! 2480: xs = 0; res = 1UL;
! 2481: }
! 2482: else if (!xs && d1==1)
! 2483: {
! 2484: xv += d * xv1;
! 2485: xu += d * xu1;
! 2486: xs = 1; res = 1UL;
! 2487: }
! 2488: }
! 2489:
! 2490: if (xs)
! 2491: {
! 2492: *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 2493: return (res ? res : (d==1 ? 1UL : d1));
! 2494: }
! 2495: else
! 2496: {
! 2497: *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 2498: return (res ? res : (d1==1 ? 1UL : d));
! 2499: }
! 2500: }
! 2501:
! 2502: /*==================================
! 2503: * lgcdii(d,d1,u,u1,v,v1)
! 2504: *==================================*/
! 2505: /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
! 2506: *
! 2507: * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
! 2508: * and a quantity of bits from d1 obtained by a shift of the same displacement,
! 2509: * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
! 2510: * the product of all the [0, 1; 1, q_j] thus obtained, where the leftmost
! 2511: * factor arises from the quotient of the first division step.
! 2512: *
! 2513: * MUST be called with d > d1 > 0, and with d occupying more than one
! 2514: * significant word (if it doesn't, the caller has no business with us;
! 2515: * he/she/it should use xgcduu() instead). Returns the number of reduction/
! 2516: * swap steps carried out, possibly zero, or under certain conditions minus
! 2517: * that number. When the return value is nonzero, the caller should use the
! 2518: * returned recurrence matrix to update its own copies of d,d1. When the
! 2519: * return value is non-positive, and the latest remainder after updating
! 2520: * turns out to be nonzero, the caller should at once attempt a full division,
! 2521: * rather than first trying lgcdii() again -- this typically happens when we
! 2522: * are about to encounter a quotient larger than half a word. (This is not
! 2523: * detected infallibly -- after a positive return value, it is perfectly
! 2524: * possible that the next stage will end up needing a full division. After
! 2525: * a negative return value, however, this is certain, and should be acted
! 2526: * upon.)
! 2527: *
! 2528: * (The sign information, for which xgcduu() has its return argument s, is now
! 2529: * implicit in the LSB of our return value, and the caller may take advantage
! 2530: * of the fact that a return value of +-1 implies u==0,u1==v==1 [only v1 pro-
! 2531: * vides interesting information in this case]. One might also use the fact
! 2532: * that if the return value is +-2, then u==1, but this is rather marginal.)
! 2533: *
! 2534: * If it was not possible to determine even the first quotient, either because
! 2535: * we're too close to an integer quotient or because the quotient would be
! 2536: * larger than one word (if the `leading digit' of d1 after shifting is all
! 2537: * zeros), we return 0 and do not bother to assign anything to the last four
! 2538: * args.
! 2539: *
! 2540: * The division chain might (sometimes) even run to completion. It will be
! 2541: * up to the caller to detect this case.
! 2542: *
! 2543: * This routine does _not_ change d or d1; this will also be up to the caller.
! 2544: *
! 2545: * Note that this routine does not know and does not need to know about the
! 2546: * PARI stack.
! 2547: */
! 2548: /*#define DEBUG_LEHMER 1 */
! 2549: int
! 2550: lgcdii(ulong* d, ulong* d1,
! 2551: ulong* u, ulong* u1, ulong* v, ulong* v1)
! 2552: {
! 2553: /* Strategy: (1) Extract/shift most significant bits. We assume that d
! 2554: * has at least two significant words, but we can cope with a one-word d1.
! 2555: * Let dd,dd1 be the most significant dividend word and matching part of the
! 2556: * divisor.
! 2557: * (2) Check for overflow on the first division. For our purposes, this
! 2558: * happens when the upper half of dd1 is zero. (Actually this is detected
! 2559: * during extraction.)
! 2560: * (3) Get a fix on the first quotient. We compute q = floor(dd/dd1), which
! 2561: * is an upper bound for floor(d/d1), and which gives the true value of the
! 2562: * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
! 2563: * (If it isn't, we give up. This is annoying because the subsequent full
! 2564: * division will repeat some work already done, but it happens very infre-
! 2565: * quently. Doing the extra-bit-fetch in this case would be awkward.)
! 2566: * (4) Finish initializations.
! 2567: *
! 2568: * The remainder of the action is comparatively boring... The main loop has
! 2569: * been unrolled once (so we don't swap things and we can apply Jebelean's
! 2570: * termination conditions which alternatingly take two different forms during
! 2571: * successive iterations). When we first run out of sufficient bits to form
! 2572: * a quotient, and have an extra word of each operand, we pull out two whole
! 2573: * word's worth of dividend bits, and divisor bits of matching significance;
! 2574: * to these we apply our partial matrix (disregarding overflow because the
! 2575: * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
! 2576: * re-extract one word's worth of the current dividend and a matching amount
! 2577: * of divisor bits. The affair will normally terminate with matrix entries
! 2578: * just short of a whole word. (We terminate the inner loop before these can
! 2579: * possibly overflow.)
! 2580: */
! 2581: ulong dd,dd1,ddlo,dd1lo, sh,shc; /* `digits', shift count */
! 2582: ulong xu,xu1, xv,xv1, q,res; /* recurrences, partial quotient, count */
! 2583: ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
! 2584: long ld, ld1, lz; /* t_INT effective lengths */
! 2585: int skip = 0; /* a boolean flag */
! 2586: LOCAL_OVERFLOW;
! 2587: LOCAL_HIREMAINDER;
! 2588:
! 2589: #ifdef DEBUG_LEHMER
! 2590: voir(d, -1); voir(d1, -1);
! 2591: #endif
! 2592: ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
! 2593: if (lz > 1) return 0; /* rare, quick and desperate exit */
! 2594:
! 2595: d += 2; d1 += 2; /* point at the leading `digits' */
! 2596: dd1lo = 0; /* unless we find something better */
! 2597: sh = bfffo(*d); /* obtain dividend left shift count */
! 2598:
! 2599: if (sh)
! 2600: { /* do the shifting */
! 2601: shc = BITS_IN_LONG - sh;
! 2602: if (lz)
! 2603: { /* dividend longer than divisor */
! 2604: dd1 = (*d1 >> shc);
! 2605: if (!(HIGHMASK & dd1)) return 0; /* overflow detected */
! 2606: if (ld1 > 3)
! 2607: dd1lo = (*d1 << sh) + (d1[1] >> shc);
! 2608: else
! 2609: dd1lo = (*d1 << sh);
! 2610: }
! 2611: else
! 2612: { /* dividend and divisor have the same length */
! 2613: /* dd1 = shiftl(d1,sh) would have left hiremainder==0, and dd1 != 0. */
! 2614: dd1 = (*d1 << sh);
! 2615: if (!(HIGHMASK & dd1)) return 0;
! 2616: if (ld1 > 3)
! 2617: {
! 2618: dd1 += (d1[1] >> shc);
! 2619: if (ld1 > 4)
! 2620: dd1lo = (d1[1] << sh) + (d1[2] >> shc);
! 2621: else
! 2622: dd1lo = (d1[1] << sh);
! 2623: }
! 2624: }
! 2625: /* following lines assume d to have 2 or more significant words */
! 2626: dd = (*d << sh) + (d[1] >> shc);
! 2627: if (ld > 4)
! 2628: ddlo = (d[1] << sh) + (d[2] >> shc);
! 2629: else
! 2630: ddlo = (d[1] << sh);
! 2631: }
! 2632: else
! 2633: { /* no shift needed */
! 2634: if (lz) return 0; /* div'd longer than div'r: o'flow automatic */
! 2635: dd1 = *d1;
! 2636: if (!(HIGHMASK & dd1)) return 0;
! 2637: if(ld1 > 3) dd1lo = d1[1];
! 2638: /* assume again that d has another significant word */
! 2639: dd = *d; ddlo = d[1];
! 2640: }
! 2641: #ifdef DEBUG_LEHMER
! 2642: fprintf(stderr, " %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
! 2643: #endif
! 2644:
! 2645: /* First subtraction/division stage. (If a subtraction initially suffices,
! 2646: * we don't divide at all.) If a Jebelean condition is violated, and we
! 2647: * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
! 2648: * give up and ask for a full division. Otherwise we commit the result,
! 2649: * possibly deciding to re-shift immediately afterwards.
! 2650: */
! 2651: dd -= dd1;
! 2652: if (dd < dd1)
! 2653: { /* first quotient known to be == 1 */
! 2654: xv1 = 1UL;
! 2655: if (!dd) /* !(Jebelean condition), extraspecial case */
! 2656: { /* note this can actually happen... Now
! 2657: * q==1 is known, but we underflow already.
! 2658: * OTOH we've just shortened d by a whole word.
! 2659: * Thus we feel pleased with ourselves and
! 2660: * return. (The re-shift code below would
! 2661: * do so anyway.) */
! 2662: *u = 0; *v = *u1 = *v1 = 1UL;
! 2663: return -1; /* Next step will be a full division. */
! 2664: } /* if !(Jebelean) then */
! 2665: }
! 2666: else
! 2667: { /* division indicated */
! 2668: hiremainder = 0;
! 2669: xv1 = 1 + divll(dd, dd1); /* xv1: alternative spelling of `q', here ;) */
! 2670: dd = hiremainder;
! 2671: if (dd < xv1) /* !(Jebelean cond'), non-extra special case */
! 2672: { /* Attempt to complete the division using the
! 2673: * less significant bits, before skipping right
! 2674: * past the 1st loop to the reshift stage. */
! 2675: ddlo = subll(ddlo, mulll(xv1, dd1lo));
! 2676: dd = subllx(dd, hiremainder);
! 2677:
! 2678: /* If we now have an overflow, q was _certainly_ too large. Thanks to
! 2679: * our decision not to get here unless the original dd1 had bits set in
! 2680: * the upper half of the word, however, we now do know that the correct
! 2681: * quotient is in fact q-1. Adjust our data accordingly. */
! 2682: if (overflow)
! 2683: {
! 2684: xv1--;
! 2685: ddlo = addll(ddlo,dd1lo);
! 2686: dd = addllx(dd,dd1); /* overflows again which cancels the borrow */
! 2687: /* ...and fall through to skip=1 below */
! 2688: }
! 2689: else
! 2690: /* Test Jebelean condition anew, at this point using _all_ the extracted
! 2691: * bits we have. This is clutching at straws; we have a more or less
! 2692: * even chance of succeeding this time. Note that if we fail, we really
! 2693: * do not know whether the correct quotient would have been q or some
! 2694: * smaller value. */
! 2695: if (!dd && ddlo < xv1) return 0;
! 2696:
! 2697: /* Otherwise, we now know that q is correct, but we cannot go into the
! 2698: * 1st loop. Raise a flag so we'll remember to skip past the loop.
! 2699: * Get here also after the q-1 adjustment case. */
! 2700: skip = 1;
! 2701: } /* if !(Jebelean) then */
! 2702: }
! 2703: xu = 0; xv = xu1 = 1UL; res = 1;
! 2704: #ifdef DEBUG_LEHMER
! 2705: fprintf(stderr, " q = %ld, %lx, %lx\n", xv1, dd1, dd);
! 2706: #endif
! 2707:
! 2708: /* Some invariants from here across the first loop:
! 2709: *
! 2710: * At this point, and again after we are finished with the first loop and
! 2711: * subsequent conditional, a division and the associated update of the
! 2712: * recurrence matrix have just been carried out completely. The matrix
! 2713: * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
! 2714: * columns), and the current remainder == next divisor (dd at the moment)
! 2715: * is nonzero (it might be zero here, but then skip will have been set).
! 2716: *
! 2717: * After the first loop, or when skip is set already, it will also be the
! 2718: * case that there aren't sufficiently many bits to continue without re-
! 2719: * shifting. If the divisor after reshifting is zero, or indeed if it
! 2720: * doesn't have more than half a word's worth of bits, we will have to
! 2721: * return at that point. Otherwise, we proceed into the second loop.
! 2722: *
! 2723: * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
! 2724: * already reflect the result of applying the current matrix to the old
! 2725: * ddorig:ddlo and dd1orig:dd1lo. (For the first iteration above, this
! 2726: * was easy to achieve, and we didn't even need to peek into the (now
! 2727: * no longer existent!) saved words. After the loop, we'll stop for a
! 2728: * moment to merge in the ddlo,dd1lo contributions.)
! 2729: *
! 2730: * Note that after the first division, even an a priori quotient of 1 cannot
! 2731: * be trusted until we've checked Jebelean's condition -- it cannot be too
! 2732: * large, of course, but it might be too small.
! 2733: */
! 2734:
! 2735: if (!skip)
! 2736: {
! 2737: while (1)
! 2738: {
! 2739: /* First half of loop divides dd into dd1, and leaves the recurrence
! 2740: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
! 2741: * entries) when successful. */
! 2742: tmpd = dd1 - dd;
! 2743: if (tmpd < dd)
! 2744: { /* quotient suspected to be 1 */
! 2745: #ifdef DEBUG_LEHMER
! 2746: q = 1;
! 2747: #endif
! 2748: tmpu = xu + xu1; /* cannot overflow -- everything bounded by
! 2749: * the original dd during first loop */
! 2750: tmpv = xv + xv1;
! 2751: }
! 2752: else
! 2753: { /* division indicated */
! 2754: hiremainder = 0;
! 2755: q = 1 + divll(tmpd, dd);
! 2756: tmpd = hiremainder;
! 2757: tmpu = xu + q*xu1; /* can't overflow, but may need to be undone */
! 2758: tmpv = xv + q*xv1;
! 2759: }
! 2760:
! 2761: tmp0 = addll(tmpv, xv1);
! 2762: if ((tmpd < tmpu) || overflow ||
! 2763: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
! 2764: break; /* skip ahead to reshift stage */
! 2765: else
! 2766: { /* commit dd1, xu, xv */
! 2767: res++;
! 2768: dd1 = tmpd; xu = tmpu; xv = tmpv;
! 2769: #ifdef DEBUG_LEHMER
! 2770: fprintf(stderr, " q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
! 2771: q, dd, dd1, xu1, xu, xv1, xv);
! 2772: #endif
! 2773: }
! 2774:
! 2775: /* Second half of loop divides dd1 into dd, and the matrix returns to its
! 2776: * normal arrangement. */
! 2777: tmpd = dd - dd1;
! 2778: if (tmpd < dd1)
! 2779: { /* quotient suspected to be 1 */
! 2780: #ifdef DEBUG_LEHMER
! 2781: q = 1;
! 2782: #endif
! 2783: tmpu = xu1 + xu; /* cannot overflow */
! 2784: tmpv = xv1 + xv;
! 2785: }
! 2786: else
! 2787: { /* division indicated */
! 2788: hiremainder = 0;
! 2789: q = 1 + divll(tmpd, dd1);
! 2790: tmpd = hiremainder;
! 2791: tmpu = xu1 + q*xu;
! 2792: tmpv = xv1 + q*xv;
! 2793: }
! 2794:
! 2795: tmp0 = addll(tmpu, xu);
! 2796: if ((tmpd < tmpv) || overflow ||
! 2797: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
! 2798: break; /* skip ahead to reshift stage */
! 2799: else
! 2800: { /* commit dd, xu1, xv1 */
! 2801: res++;
! 2802: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
! 2803: #ifdef DEBUG_LEHMER
! 2804: fprintf(stderr, " q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
! 2805: q, dd1, dd, xu, xu1, xv, xv1);
! 2806: #endif
! 2807: }
! 2808:
! 2809: } /* end of first loop */
! 2810:
! 2811: /* Intermezzo: update dd:ddlo, dd1:dd1lo. (But not if skip is set.) */
! 2812:
! 2813: if (res&1)
! 2814: {
! 2815: /* after failed division in 1st half of loop:
! 2816: * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
! 2817: * * [ -xu, xu1 ; xv, -xv1 ]
! 2818: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
! 2819: * add the high-order remainders + overflows onto [dd1,dd].)
! 2820: */
! 2821: tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
! 2822: tmp1 = subll(mulll(dd1lo,xv), tmp1);
! 2823: dd1 += subllx(hiremainder, tmp0);
! 2824: tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
! 2825: ddlo = subll(tmp2, mulll(dd1lo,xv1));
! 2826: dd += subllx(tmp0, hiremainder);
! 2827: dd1lo = tmp1;
! 2828: }
! 2829: else
! 2830: {
! 2831: /* after failed division in 2nd half of loop:
! 2832: * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
! 2833: * * [ xu1, -xu ; -xv1, xv ]
! 2834: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
! 2835: * add the high-order remainders + overflows onto [dd,dd1].)
! 2836: */
! 2837: tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
! 2838: tmp1 = subll(tmp1, mulll(dd1lo,xv1));
! 2839: dd += subllx(tmp0, hiremainder);
! 2840: tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
! 2841: dd1lo = subll(mulll(dd1lo,xv), tmp2);
! 2842: dd1 += subllx(hiremainder, tmp0);
! 2843: ddlo = tmp1;
! 2844: }
! 2845: #ifdef DEBUG_LEHMER
! 2846: fprintf(stderr, " %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
! 2847: #endif
! 2848: } /* end of skip-pable section: get here also, with res==1, when there
! 2849: * was a problem immediately after the very first division. */
! 2850:
! 2851: /* Re-shift. Note: the shift count _can_ be zero, viz. under the following
! 2852: * precise conditions: The original dd1 had its topmost bit set, so the 1st
! 2853: * q was 1, and after subtraction, dd had its topmost bit unset. If now
! 2854: * dd==0, we'd have taken the return exit already, so we couldn't have got
! 2855: * here. If not, then it must have been the second division which has gone
! 2856: * amiss (because dd1 was very close to an exact multiple of the remainder
! 2857: * dd value, so this will be very rare). At this point, we'd have a fairly
! 2858: * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
! 2859: * this is not guaranteed to work. Instead of trying, we return at once.
! 2860: * The caller will see to it that the initial subtraction is re-done using
! 2861: * _all_ the bits of both operands, which already helps, and the next round
! 2862: * will either be a full division (if dd occupied a halfword or less), or
! 2863: * another llgcdii() first step. In the latter case, since we try a little
! 2864: * harder during our first step, we may actually be able to fix the problem,
! 2865: * and get here again with improved low-order bits and with another step
! 2866: * under our belt. Otherwise we'll have given up above and forced a full-
! 2867: * blown division.
! 2868: *
! 2869: * If res is even, the shift count _cannot_ be zero. (The first step forces
! 2870: * a zero into the remainder's MSB, and all subsequent remainders will have
! 2871: * inherited it.)
! 2872: *
! 2873: * The re-shift stage exits if the next divisor has at most half a word's
! 2874: * worth of bits.
! 2875: *
! 2876: * For didactic reasons, the second loop will be arranged in the same way
! 2877: * as the first -- beginning with the division of dd into dd1, as if res
! 2878: * was odd. To cater for this, if res is actually even, we swap things
! 2879: * around during reshifting. (During the second loop, the parity of res
! 2880: * does not matter; we know in which half of the loop we are when we decide
! 2881: * to return.)
! 2882: */
! 2883: #ifdef DEBUG_LEHMER
! 2884: fprintf(stderr, "(sh)");
! 2885: #endif
! 2886:
! 2887: if (res&1)
! 2888: { /* after odd number of division(s) */
! 2889: if (dd1 && (sh = bfffo(dd1)))
! 2890: {
! 2891: shc = BITS_IN_LONG - sh;
! 2892: dd = (ddlo >> shc) + (dd << sh);
! 2893: if (!(HIGHMASK & dd))
! 2894: {
! 2895: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 2896: return -res; /* full division asked for */
! 2897: }
! 2898: dd1 = (dd1lo >> shc) + (dd1 << sh);
! 2899: }
! 2900: else
! 2901: { /* time to return: <= 1 word left, or sh==0 */
! 2902: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 2903: return res;
! 2904: }
! 2905: }
! 2906: else
! 2907: { /* after even number of divisions */
! 2908: if (dd)
! 2909: {
! 2910: sh = bfffo(dd); /* Known to be positive. */
! 2911: shc = BITS_IN_LONG - sh;
! 2912: /* dd:ddlo will become the new dd1, and v.v. */
! 2913: tmpd = (ddlo >> shc) + (dd << sh);
! 2914: dd = (dd1lo >> shc) + (dd1 << sh);
! 2915: dd1 = tmpd;
! 2916: /* This has completed the swap; now dd is again the current divisor.
! 2917: * The following test originally inspected dd1 -- a most subtle and
! 2918: * most annoying bug. The Management. */
! 2919: if (HIGHMASK & dd)
! 2920: {
! 2921: /* recurrence matrix is the wrong way round; swap it. */
! 2922: tmp0 = xu; xu = xu1; xu1 = tmp0;
! 2923: tmp0 = xv; xv = xv1; xv1 = tmp0;
! 2924: }
! 2925: else
! 2926: {
! 2927: /* recurrence matrix is the wrong way round; fix this. */
! 2928: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 2929: return -res; /* full division asked for */
! 2930: }
! 2931: }
! 2932: else
! 2933: { /* time to return: <= 1 word left */
! 2934: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 2935: return res;
! 2936: }
! 2937: } /* end reshift */
! 2938:
! 2939: #ifdef DEBUG_LEHMER
! 2940: fprintf(stderr, " %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
! 2941: #endif
! 2942:
! 2943: /* The Second Loop. Rip-off of the first, but we now check for overflow
! 2944: * in the recurrences. Returns instead of breaking when we cannot fix the
! 2945: * quotient any longer.
! 2946: */
! 2947:
! 2948: for(;;)
! 2949: {
! 2950: /* First half of loop divides dd into dd1, and leaves the recurrence
! 2951: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
! 2952: * entries) when successful. */
! 2953: tmpd = dd1 - dd;
! 2954: if (tmpd < dd)
! 2955: { /* quotient suspected to be 1 */
! 2956: #ifdef DEBUG_LEHMER
! 2957: q = 1;
! 2958: #endif
! 2959: tmpu = xu + xu1;
! 2960: tmpv = addll(xv, xv1); /* xv,xv1 will overflow first */
! 2961: tmp1 = overflow;
! 2962: }
! 2963: else
! 2964: { /* division indicated */
! 2965: hiremainder = 0;
! 2966: q = 1 + divll(tmpd, dd);
! 2967: tmpd = hiremainder;
! 2968: tmpu = xu + q*xu1;
! 2969: tmpv = addll(xv, mulll(q,xv1));
! 2970: tmp1 = overflow | hiremainder;
! 2971: }
! 2972:
! 2973: tmp0 = addll(tmpv, xv1);
! 2974: if ((tmpd < tmpu) || overflow || tmp1 ||
! 2975: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
! 2976: {
! 2977: /* The recurrence matrix has not yet been warped... */
! 2978: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
! 2979: return res;
! 2980: }
! 2981: else
! 2982: { /* commit dd1, xu, xv */
! 2983: res++;
! 2984: dd1 = tmpd; xu = tmpu; xv = tmpv;
! 2985: #ifdef DEBUG_LEHMER
! 2986: fprintf(stderr, " q = %ld, %lx, %lx\n", q, dd, dd1);
! 2987: #endif
! 2988: }
! 2989:
! 2990: /* Second half of loop divides dd1 into dd, and the matrix returns to its
! 2991: * normal arrangement. */
! 2992: tmpd = dd - dd1;
! 2993: if (tmpd < dd1)
! 2994: { /* quotient suspected to be 1 */
! 2995: #ifdef DEBUG_LEHMER
! 2996: q = 1;
! 2997: #endif
! 2998: tmpu = xu1 + xu;
! 2999: tmpv = addll(xv1, xv);
! 3000: tmp1 = overflow;
! 3001: }
! 3002: else
! 3003: { /* division indicated */
! 3004: hiremainder = 0;
! 3005: q = 1 + divll(tmpd, dd1);
! 3006: tmpd = hiremainder;
! 3007: tmpu = xu1 + q*xu;
! 3008: tmpv = addll(xv1, mulll(q, xv));
! 3009: tmp1 = overflow | hiremainder;
! 3010: }
! 3011:
! 3012: tmp0 = addll(tmpu, xu);
! 3013: if ((tmpd < tmpv) || overflow || tmp1 ||
! 3014: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
! 3015: {
! 3016: /* The recurrence matrix has not yet been unwarped, so it is
! 3017: * the wrong way round; fix this. */
! 3018: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
! 3019: return res;
! 3020: }
! 3021: else
! 3022: { /* commit dd, xu1, xv1 */
! 3023: res++;
! 3024: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
! 3025: #ifdef DEBUG_LEHMER
! 3026: fprintf(stderr, " q = %ld, %lx, %lx\n", q, dd1, dd);
! 3027: #endif
! 3028: }
! 3029:
! 3030: } /* end of second loop */
! 3031:
! 3032: return(0); /* never reached */
! 3033: }
! 3034:
! 3035: /*==================================
! 3036: * invmod(a,b,res)
! 3037: *==================================
! 3038: * If a is invertible, return 1, and set res = a^{ -1 }
! 3039: * Otherwise, return 0, and set res = gcd(a,b)
! 3040:
! 3041: * Temporary - to be replaced with mpxgcd()
! 3042: * I think we won't need to change much ... just the parameter-passing and
! 3043: * return value conventions, and computing a final u in addition to a final
! 3044: * v value before returning (and doing so even when the gcd isn't 1)
! 3045: */
! 3046:
! 3047: #define mysetsigne(d,s) { fprintferr("%ld %ld\n",s,signe(d)); if (signe(d)==-(s)) setsigne(d,s); }
! 3048:
! 3049: int
! 3050: invmod(GEN a, GEN b, GEN *res)
! 3051: {
! 3052: GEN v,v1,d,d1,q,r;
! 3053: long av,av1,lim;
! 3054: long s;
! 3055: ulong g;
! 3056: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
! 3057: int lhmres; /* Lehmer stage return value */
! 3058:
! 3059: if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
! 3060: if (!signe(b)) { *res=absi(a); return 0; }
! 3061: av = avma;
! 3062: if (lgefint(b) == 3) /* single-word affair */
! 3063: {
! 3064: d1 = modiu(a, (ulong)b[2]);
! 3065: if (d1 == gzero)
! 3066: {
! 3067: if (b[2] == 1L)
! 3068: { *res = gzero; return 1; }
! 3069: else
! 3070: { *res = absi(b); return 0; }
! 3071: }
! 3072: g = xgcduu((ulong)b[2], (ulong)d1[2], 1, &xv, &xv1, &s);
! 3073: #ifdef DEBUG_LEHMER
! 3074: fprintferr(" <- %lu,%lu\n", (ulong)b[2], (ulong)d1[2]);
! 3075: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
! 3076: #endif
! 3077: avma = av;
! 3078: if (g != 1UL) { *res = utoi(g); return 0; }
! 3079: xv = xv1 % (ulong)b[2]; if (s < 0) xv = ((ulong)b[2]) - xv;
! 3080: *res = utoi(xv); return 1;
! 3081: }
! 3082:
! 3083: (void)new_chunk(lgefint(b));
! 3084: d = absi(b); d1 = modii(a,d);
! 3085:
! 3086: v=gzero; v1=gun; /* general case */
! 3087: #ifdef DEBUG_LEHMER
! 3088: fprintferr("INVERT: -------------------------\n");
! 3089: output(d1);
! 3090: #endif
! 3091: av1 = avma; lim = stack_lim(av,1);
! 3092:
! 3093: while (lgefint(d) > 3 && signe(d1))
! 3094: {
! 3095: #ifdef DEBUG_LEHMER
! 3096: fprintferr("Calling Lehmer:\n");
! 3097: #endif
! 3098: lhmres = lgcdii((ulong*)d, (ulong*)d1, &xu, &xu1, &xv, &xv1);
! 3099: if (lhmres != 0) /* check progress */
! 3100: { /* apply matrix */
! 3101: #ifdef DEBUG_LEHMER
! 3102: fprintferr("Lehmer returned %d [%lu,%lu;%lu,%lu].\n",
! 3103: lhmres, xu, xu1, xv, xv1);
! 3104: #endif
! 3105: if ((lhmres == 1) || (lhmres == -1))
! 3106: {
! 3107: if (xv1 == 1)
! 3108: {
! 3109: r = subii(d,d1); d=d1; d1=r;
! 3110: a = subii(v,v1); v=v1; v1=a;
! 3111: }
! 3112: else
! 3113: {
! 3114: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
! 3115: a = subii(v, mului(xv1,v1)); v=v1; v1=a;
! 3116: }
! 3117: }
! 3118: else
! 3119: {
! 3120: r = subii(muliu(d,xu), muliu(d1,xv));
! 3121: a = subii(muliu(v,xu), muliu(v1,xv));
! 3122: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
! 3123: v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
! 3124: if (lhmres&1) {
! 3125: setsigne(d,-signe(d));
! 3126: setsigne(v,-signe(v));
! 3127: }
! 3128: else {
! 3129: setsigne(d1,-signe(d1));
! 3130: setsigne(v1,-signe(v1));
! 3131: }
! 3132: }
! 3133: }
! 3134: #ifdef DEBUG_LEHMER
! 3135: else
! 3136: fprintferr("Lehmer returned 0.\n");
! 3137: output(d); output(d1); output(v); output(v1);
! 3138: sleep(1);
! 3139: #endif
! 3140:
! 3141: if (lhmres <= 0 && signe(d1))
! 3142: {
! 3143: q = dvmdii(d,d1,&r);
! 3144: #ifdef DEBUG_LEHMER
! 3145: fprintferr("Full division:\n");
! 3146: printf(" q = "); output(q); sleep (1);
! 3147: #endif
! 3148: a = subii(v,mulii(q,v1));
! 3149: v=v1; v1=a;
! 3150: d=d1; d1=r;
! 3151: }
! 3152: if (low_stack(lim, stack_lim(av,1)))
! 3153: {
! 3154: GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
! 3155: if(DEBUGMEM>1) err(warnmem,"invmod");
! 3156: gerepilemany(av1,gptr,4);
! 3157: }
! 3158: } /* end while */
! 3159:
! 3160: /* Postprocessing - final sprint */
! 3161: if (signe(d1))
! 3162: {
! 3163: /* Assertions: lgefint(d)==lgefint(d1)==3, and
! 3164: * gcd(d,d1) is nonzero and fits into one word
! 3165: */
! 3166: g = xxgcduu(d[2], d1[2], 1, &xu, &xu1, &xv, &xv1, &s);
! 3167: #ifdef DEBUG_LEHMER
! 3168: output(d);output(d1);output(v);output(v1);
! 3169: fprintferr(" <- %lu,%lu\n", (ulong)d[2], (ulong)d1[2]);
! 3170: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
! 3171: #endif
! 3172: if (g != 1UL) { avma = av; *res = utoi(g); return 0; }
! 3173: /* (From the xgcduu() blurb:)
! 3174: * For finishing the multiword modinv, we now have to multiply the
! 3175: * returned matrix (with properly adjusted signs) onto the values
! 3176: * v' and v1' previously obtained from the multiword division steps.
! 3177: * Actually, it is sufficient to take the scalar product of [v',v1']
! 3178: * with [u1,-v1], and change the sign if s==1.
! 3179: */
! 3180: v = subii(muliu(v,xu1),muliu(v1,xv1));
! 3181: if (s > 0) setsigne(v,-signe(v));
! 3182: avma = av; *res = modii(v,b);
! 3183: #ifdef DEBUG_LEHMER
! 3184: output(*res); fprintfderr("============================Done.\n");
! 3185: sleep(1);
! 3186: #endif
! 3187: return 1;
! 3188: }
! 3189: /* get here when the final sprint was skipped (d1 was zero already) */
! 3190: avma = av;
! 3191: if (!egalii(d,gun)) { *res = icopy(d); return 0; }
! 3192: *res = modii(v,b);
! 3193: #ifdef DEBUG_LEHMER
! 3194: output(*res); fprintferr("============================Done.\n");
! 3195: sleep(1);
! 3196: #endif
! 3197: return 1;
! 3198: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>