Annotation of OpenXM_contrib/pari/src/kernel/none/mp.c, Revision 1.1.1.1
1.1 maekawa 1: /***********************************************************************/
2: /** **/
3: /** MULTIPRECISION KERNEL **/
4: /** **/
5: /***********************************************************************/
6: /* $Id: mp.c,v 1.1.1.1 1999/09/16 13:47:57 karim Exp $ */
7: /* most of the routines in this file are commented out in 68k */
8: /* version (#ifdef __M68K__) since they are defined in mp.s */
9: #include "pari.h"
10:
11: /* z2 := z1[imin..imax].f shifted left sh bits (feeding f from the right) */
12: #define shift_left2(z2,z1,imin,imax,f, sh,m) {\
13: register ulong _i,_l,_k = ((ulong)f)>>m;\
14: for (_i=imax; _i>imin; _i--) {\
15: _l = z1[_i];\
16: z2[_i] = (_l<<(ulong)sh) | _k;\
17: _k = _l>>(ulong)m;\
18: }\
19: z2[imin]=(z1[imin]<<(ulong)sh) | _k;\
20: }
21: #define shift_left(z2,z1,imin,imax,f, sh) {\
22: register const ulong _m = BITS_IN_LONG - sh;\
23: shift_left2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
24: }
25:
26: /* z2 := f.z1[imin..imax-1] shifted right sh bits (feeding f from the left) */
27: #define shift_right2(z2,z1,imin,imax,f, sh,m) {\
28: register ulong _i,_k,_l = z1[2];\
29: z2[imin] = (_l>>(ulong)sh) | ((ulong)f<<(ulong)m);\
30: for (_i=imin+1; _i<imax; _i++) {\
31: _k = _l<<(ulong)m; _l = z1[_i];\
32: z2[_i] = (_l>>(ulong)sh) | _k;\
33: }\
34: }
35: #define shift_right(z2,z1,imin,imax,f, sh) {\
36: register const ulong _m = BITS_IN_LONG - sh;\
37: shift_right2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
38: }
39:
40: #ifdef INLINE
41: INLINE
42: #endif
43: GEN
44: addsispec(long s, GEN x, long nx)
45: {
46: GEN xd, zd = (GEN)avma;
47: long lz;
48: LOCAL_OVERFLOW;
49:
50: lz = nx+3; (void)new_chunk(lz);
51: xd = x + nx;
52: *--zd = addll(*--xd, s);
53: if (overflow)
54: for(;;)
55: {
56: if (xd == x) { *--zd = 1; break; } /* enlarge z */
57: *--zd = ((ulong)*--xd) + 1;
58: if (*zd) { lz--; break; }
59: }
60: else lz--;
61: while (xd > x) *--zd = *--xd;
62: *--zd = evalsigne(1) | evallgefint(lz);
63: *--zd = evaltyp(t_INT) | evallg(lz);
64: avma=(long)zd; return zd;
65: }
66:
67: #define swapspec(x,y, nx,ny) {long _a=nx;GEN _z=x; nx=ny; ny=_a; x=y; y=_z;}
68:
69: #ifdef INLINE
70: INLINE
71: #endif
72: GEN
73: addiispec(GEN x, GEN y, long nx, long ny)
74: {
75: GEN xd,yd,zd;
76: long lz;
77: LOCAL_OVERFLOW;
78:
79: if (nx < ny) swapspec(x,y, nx,ny);
80: if (ny == 1) return addsispec(*y,x,nx);
81: zd = (GEN)avma;
82: lz = nx+3; (void)new_chunk(lz);
83: xd = x + nx;
84: yd = y + ny;
85: *--zd = addll(*--xd, *--yd);
86: while (yd > y) *--zd = addllx(*--xd, *--yd);
87: if (overflow)
88: for(;;)
89: {
90: if (xd == x) { *--zd = 1; break; } /* enlarge z */
91: *--zd = ((ulong)*--xd) + 1;
92: if (*zd) { lz--; break; }
93: }
94: else lz--;
95: while (xd > x) *--zd = *--xd;
96: *--zd = evalsigne(1) | evallgefint(lz);
97: *--zd = evaltyp(t_INT) | evallg(lz);
98: avma=(long)zd; return zd;
99: }
100:
101: /* assume x >= y */
102: #ifdef INLINE
103: INLINE
104: #endif
105: GEN
106: subisspec(GEN x, long s, long nx)
107: {
108: GEN xd, zd = (GEN)avma;
109: long lz;
110: LOCAL_OVERFLOW;
111:
112: lz = nx+2; (void)new_chunk(lz);
113: xd = x + nx;
114: *--zd = subll(*--xd, s);
115: if (overflow)
116: for(;;)
117: {
118: *--zd = ((ulong)*--xd) - 1;
119: if (*xd) break;
120: }
121: if (xd == x)
122: while (*zd == 0) { zd++; lz--; } /* shorten z */
123: else
124: do *--zd = *--xd; while (xd > x);
125: *--zd = evalsigne(1) | evallgefint(lz);
126: *--zd = evaltyp(t_INT) | evallg(lz);
127: avma=(long)zd; return zd;
128: }
129:
130: /* assume x >= y */
131: #ifdef INLINE
132: INLINE
133: #endif
134: GEN
135: subiispec(GEN x, GEN y, long nx, long ny)
136: {
137: GEN xd,yd,zd;
138: long lz;
139: LOCAL_OVERFLOW;
140:
141: if (ny==1) return subisspec(x,*y,nx);
142: zd = (GEN)avma;
143: lz = nx+2; (void)new_chunk(lz);
144: xd = x + nx;
145: yd = y + ny;
146: *--zd = subll(*--xd, *--yd);
147: while (yd > y) *--zd = subllx(*--xd, *--yd);
148: if (overflow)
149: for(;;)
150: {
151: *--zd = ((ulong)*--xd) - 1;
152: if (*xd) break;
153: }
154: if (xd == x)
155: while (*zd == 0) { zd++; lz--; } /* shorten z */
156: else
157: do *--zd = *--xd; while (xd > x);
158: *--zd = evalsigne(1) | evallgefint(lz);
159: *--zd = evaltyp(t_INT) | evallg(lz);
160: avma=(long)zd; return zd;
161: }
162:
163: #ifndef __M68K__
164:
165: /* prototype of positive small ints */
166: static long pos_s[] = {
167: evaltyp(t_INT) | m_evallg(3), evalsigne(1) | evallgefint(3), 0 };
168:
169: /* prototype of negative small ints */
170: static long neg_s[] = {
171: evaltyp(t_INT) | m_evallg(3), evalsigne(-1) | evallgefint(3), 0 };
172:
173: void
174: affir(GEN x, GEN y)
175: {
176: const long s=signe(x),ly=lg(y);
177: long lx,sh,i;
178:
179: if (!s)
180: {
181: y[1] = evalexpo(-bit_accuracy(ly));
182: y[2] = 0; return;
183: }
184:
185: lx=lgefint(x); sh=bfffo(x[2]);
186: y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
187: if (sh)
188: {
189: if (lx>ly)
190: { shift_left(y,x,2,ly-1, x[ly],sh); }
191: else
192: {
193: for (i=lx; i<ly; i++) y[i]=0;
194: shift_left(y,x,2,lx-1, 0,sh);
195: }
196: return;
197: }
198:
199: if (lx>=ly)
200: for (i=2; i<ly; i++) y[i]=x[i];
201: else
202: {
203: for (i=2; i<lx; i++) y[i]=x[i];
204: for ( ; i<ly; i++) y[i]=0;
205: }
206: }
207:
208: void
209: affrr(GEN x, GEN y)
210: {
211: long lx,ly,i;
212:
213: y[1] = x[1]; if (!signe(x)) { y[2]=0; return; }
214:
215: lx=lg(x); ly=lg(y);
216: if (lx>=ly)
217: for (i=2; i<ly; i++) y[i]=x[i];
218: else
219: {
220: for (i=2; i<lx; i++) y[i]=x[i];
221: for ( ; i<ly; i++) y[i]=0;
222: }
223: }
224:
225: GEN
226: shifti(GEN x, long n)
227: {
228: const long s=signe(x);
229: long lx,ly,i,m;
230: GEN y;
231:
232: if (!s) return gzero;
233: if (!n) return icopy(x);
234: lx = lgefint(x);
235: if (n > 0)
236: {
237: GEN z = (GEN)avma;
238: long d = n>>TWOPOTBITS_IN_LONG;
239:
240: ly = lx+d; y = new_chunk(ly);
241: for ( ; d; d--) *--z = 0;
242: m = n & (BITS_IN_LONG-1);
243: if (!m) for (i=2; i<lx; i++) y[i]=x[i];
244: else
245: {
246: register const ulong sh = BITS_IN_LONG - m;
247: shift_left2(y,x, 2,lx-1, 0,m,sh);
248: i = ((ulong)x[2]) >> sh;
249: if (i) { ly++; y = new_chunk(1); y[2] = i; }
250: }
251: }
252: else
253: {
254: n = -n;
255: ly = lx - (n>>TWOPOTBITS_IN_LONG);
256: if (ly<3) return gzero;
257: y = new_chunk(ly);
258: m = n & (BITS_IN_LONG-1);
259: if (!m) for (i=2; i<ly; i++) y[i]=x[i];
260: else
261: {
262: shift_right(y,x, 2,ly, 0,m);
263: if (y[2] == 0)
264: {
265: if (ly==3) { avma = (long)(y+3); return gzero; }
266: ly--; avma = (long)(++y);
267: }
268: }
269: }
270: y[1] = evalsigne(s)|evallgefint(ly);
271: y[0] = evaltyp(t_INT)|evallg(ly); return y;
272: }
273:
274: GEN
275: mptrunc(GEN x)
276: {
277: long d,e,i,s,m;
278: GEN y;
279:
280: if (typ(x)==t_INT) return icopy(x);
281: if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gzero;
282: d = (e>>TWOPOTBITS_IN_LONG) + 3;
283: m = e & (BITS_IN_LONG-1);
284: if (d > lg(x)) err(truer2);
285:
286: y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
287: if (++m == BITS_IN_LONG)
288: for (i=2; i<d; i++) y[i]=x[i];
289: else
290: {
291: const register ulong sh = BITS_IN_LONG - m;
292: shift_right2(y,x, 2,d,0, sh,m);
293: }
294: return y;
295: }
296:
297: /* integral part */
298: GEN
299: mpent(GEN x)
300: {
301: long d,e,i,lx,m;
302: GEN y;
303:
304: if (typ(x)==t_INT) return icopy(x);
305: if (signe(x) >= 0) return mptrunc(x);
306: if ((e=expo(x)) < 0) return stoi(-1);
307: d = (e>>TWOPOTBITS_IN_LONG) + 3;
308: m = e & (BITS_IN_LONG-1);
309: lx=lg(x); if (d>lx) err(truer2);
310: y = new_chunk(d);
311: if (++m == BITS_IN_LONG)
312: {
313: for (i=2; i<d; i++) y[i]=x[i];
314: i=d; while (i<lx && !x[i]) i++;
315: if (i==lx) goto END;
316: }
317: else
318: {
319: const register ulong sh = BITS_IN_LONG - m;
320: shift_right2(y,x, 2,d,0, sh,m);
321: if (x[d-1]<<m == 0)
322: {
323: i=d; while (i<lx && !x[i]) i++;
324: if (i==lx) goto END;
325: }
326: }
327: /* set y:=y+1 */
328: for (i=d-1; i>=2; i--) { y[i]++; if (y[i]) goto END; }
329: y=new_chunk(1); y[2]=1; d++;
330: END:
331: y[1] = evalsigne(-1) | evallgefint(d);
332: y[0] = evaltyp(t_INT) | evallg(d); return y;
333: }
334:
335: int
336: cmpsi(long x, GEN y)
337: {
338: ulong p;
339:
340: if (!x) return -signe(y);
341:
342: if (x>0)
343: {
344: if (signe(y)<=0) return 1;
345: if (lgefint(y)>3) return -1;
346: p=y[2]; if (p == (ulong)x) return 0;
347: return p < (ulong)x ? 1 : -1;
348: }
349:
350: if (signe(y)>=0) return -1;
351: if (lgefint(y)>3) return 1;
352: p=y[2]; if (p == (ulong)-x) return 0;
353: return p < (ulong)(-x) ? -1 : 1;
354: }
355:
356: int
357: cmpii(GEN x, GEN y)
358: {
359: const long sx = signe(x), sy = signe(y);
360: long lx,ly,i;
361:
362: if (sx<sy) return -1;
363: if (sx>sy) return 1;
364: if (!sx) return 0;
365:
366: lx=lgefint(x); ly=lgefint(y);
367: if (lx>ly) return sx;
368: if (lx<ly) return -sx;
369: i=2; while (i<lx && x[i]==y[i]) i++;
370: if (i==lx) return 0;
371: return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
372: }
373:
374: int
375: cmprr(GEN x, GEN y)
376: {
377: const long sx = signe(x), sy = signe(y);
378: long ex,ey,lx,ly,lz,i;
379:
380: if (sx<sy) return -1;
381: if (sx>sy) return 1;
382: if (!sx) return 0;
383:
384: ex=expo(x); ey=expo(y);
385: if (ex>ey) return sx;
386: if (ex<ey) return -sx;
387:
388: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
389: i=2; while (i<lz && x[i]==y[i]) i++;
390: if (i<lz) return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
391: if (lx>=ly)
392: {
393: while (i<lx && !x[i]) i++;
394: return (i==lx) ? 0 : sx;
395: }
396: while (i<ly && !y[i]) i++;
397: return (i==ly) ? 0 : -sx;
398: }
399:
400: GEN
401: addss(long x, long y)
402: {
403: if (!x) return stoi(y);
404: if (x>0) { pos_s[2] = x; return addsi(y,pos_s); }
405: neg_s[2] = -x; return addsi(y,neg_s);
406: }
407:
408: GEN
409: addsi(long x, GEN y)
410: {
411: long sx,sy,ly;
412: GEN z;
413:
414: if (!x) return icopy(y);
415: sy=signe(y); if (!sy) return stoi(x);
416: if (x<0) { sx=-1; x=-x; } else sx=1;
417: if (sx==sy)
418: {
419: z = addsispec(x,y+2, lgefint(y)-2);
420: setsigne(z,sy); return z;
421: }
422: ly=lgefint(y);
423: if (ly==3)
424: {
425: const long d = y[2] - x;
426: if (!d) return gzero;
427: z=cgeti(3);
428: if (y[2] < 0 || d > 0) {
429: z[1] = evalsigne(sy) | evallgefint(3);
430: z[2] = d;
431: }
432: else {
433: z[1] = evalsigne(-sy) | evallgefint(3);
434: z[2] =-d;
435: }
436: return z;
437: }
438: z = subisspec(y+2,x, ly-2);
439: setsigne(z,sy); return z;
440: }
441:
442: GEN
443: addii(GEN x, GEN y)
444: {
445: long sx = signe(x);
446: long sy = signe(y);
447: long lx,ly;
448: GEN z;
449:
450: if (!sx) return sy? icopy(y): gzero;
451: if (!sy) return icopy(x);
452: lx=lgefint(x); ly=lgefint(y);
453:
454: if (sx==sy)
455: z = addiispec(x+2,y+2,lx-2,ly-2);
456: else
457: { /* sx != sy */
458: long i = lx - ly;
459: if (i==0)
460: {
461: i = absi_cmp(x,y);
462: if (!i) return gzero;
463: }
464: if (i<0) { sx=sy; swapspec(x,y, lx,ly); } /* ensure |x| >= |y| */
465: z = subiispec(x+2,y+2,lx-2,ly-2);
466: }
467: setsigne(z,sx); return z;
468: }
469:
470: GEN
471: addsr(long x, GEN y)
472: {
473: if (!x) return rcopy(y);
474: if (x>0) { pos_s[2]=x; return addir(pos_s,y); }
475: neg_s[2] = -x; return addir(neg_s,y);
476: }
477:
478: GEN
479: addir(GEN x, GEN y)
480: {
481: long e,l,ly;
482: GEN z;
483:
484: if (!signe(x)) return rcopy(y);
485: if (!signe(y))
486: {
487: l=lgefint(x)-(expo(y)>>TWOPOTBITS_IN_LONG);
488: if (l<3) err(adder3);
489: z=cgetr(l); affir(x,z); return z;
490: }
491:
492: e = expo(y)-expi(x); ly=lg(y);
493: if (e>0)
494: {
495: l = ly - (e>>TWOPOTBITS_IN_LONG);
496: if (l<3) return rcopy(y);
497: }
498: else l = ly + ((-e)>>TWOPOTBITS_IN_LONG)+1;
499: z=cgetr(l); affir(x,z); y=addrr(z,y);
500: z = y+l; ly = lg(y); while (ly--) z[ly] = y[ly];
501: avma=(long)z; return z;
502: }
503:
504: GEN
505: addrr(GEN x, GEN y)
506: {
507: long sx=signe(x),sy=signe(y),ex=expo(x),ey=expo(y);
508: long e,m,flag,i,j,f2,lx,ly,lz;
509: GEN z;
510: LOCAL_OVERFLOW;
511:
512: e = ey-ex;
513: if (!sy)
514: {
515: if (!sx)
516: {
517: if (e > 0) ex=ey;
518: z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z;
519: }
520: if (e > 0) { z=cgetr(3); z[1]=evalexpo(ey); z[2]=0; return z; }
521: lz = 3 + ((-e)>>TWOPOTBITS_IN_LONG);
522: lx = lg(x); if (lz>lx) lz=lx;
523: z = cgetr(lz); while(--lz) z[lz]=x[lz];
524: return z;
525: }
526: if (!sx)
527: {
528: if (e < 0) { z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z; }
529: lz = 3 + (e>>TWOPOTBITS_IN_LONG);
530: ly = lg(y); if (lz>ly) lz=ly;
531: z = cgetr(lz); while (--lz) z[lz]=y[lz];
532: return z;
533: }
534:
535: if (e < 0) { z=x; x=y; y=z; ey=ex; i=sx; sx=sy; sy=i; e=-e; }
536: /* now ey >= ex */
537: lx=lg(x); ly=lg(y);
538: if (e)
539: {
540: long d = e >> TWOPOTBITS_IN_LONG, l = ly-d;
541: if (l > lx) { flag=0; lz=lx+d+1; }
542: else if (l > 2) { flag=1; lz=ly; lx=l; }
543: else return rcopy(y);
544: m = e & (BITS_IN_LONG-1);
545: }
546: else
547: {
548: if (lx > ly) lx = ly;
549: flag=2; lz=lx; m=0;
550: }
551: z = cgetr(lz);
552: if (m)
553: { /* shift right x m bits */
554: const ulong sh = BITS_IN_LONG-m;
555: GEN p1 = x; x = new_chunk(lx+1);
556: shift_right2(x,p1,2,lx, 0,m,sh);
557: if (flag==0)
558: {
559: x[lx] = p1[lx-1] << sh;
560: if (x[lx]) { flag = 3; lx++; }
561: }
562: }
563:
564: if (sx==sy)
565: { /* addition */
566: i=lz-1; avma = (long)z;
567: if (flag==0) { z[i] = y[i]; i--; }
568: overflow=0;
569: for (j=lx-1; j>=2; i--,j--) z[i] = addllx(x[j],y[i]);
570:
571: if (overflow)
572: for (;;)
573: {
574: if (i==1)
575: {
576: shift_right(z,z, 2,lz, 1,1);
577: ey=evalexpo(ey+1); if (ey & ~EXPOBITS) err(adder4);
578: z[1] = evalsigne(sx) | ey; return z;
579: }
580: z[i] = y[i]+1; if (z[i--]) break;
581: }
582: for (; i>=2; i--) z[i]=y[i];
583: z[1] = evalsigne(sx) | evalexpo(ey); return z;
584: }
585:
586: /* subtraction */
587: if (e) f2 = 1;
588: else
589: {
590: i=2; while (i<lx && x[i]==y[i]) i++;
591: if (i==lx)
592: {
593: avma = (long)(z+lz);
594: e = evalexpo(ey - bit_accuracy(lx));
595: if (e & ~EXPOBITS) err(adder4);
596: z=cgetr(3); z[1]=e; z[2]=0; return z;
597: }
598: f2 = ((ulong)y[i] > (ulong)x[i]);
599: }
600:
601: /* result is non-zero. f2 = (y > x) */
602: i=lz-1;
603: if (f2)
604: {
605: if (flag==0) { z[i] = y[i]; i--; }
606: j=lx-1; z[i] = subll(y[i],x[j]); i--; j--;
607: for (; j>=2; i--,j--) z[i] = subllx(y[i],x[j]);
608: if (overflow)
609: for (;;) { z[i] = y[i]-1; if (y[i--]) break; }
610: for (; i>=2; i--) z[i]=y[i];
611: sx = sy;
612: }
613: else
614: {
615: if (flag==0) { z[i] = -y[i]; i--; overflow=1; } else overflow=0;
616: for (; i>=2; i--) z[i]=subllx(x[i],y[i]);
617: }
618:
619: x = z+2; i=0; while (!x[i]) i++;
620: lz -= i; z += i; m = bfffo(z[2]);
621: e = evalexpo(ey - (m | (i<<TWOPOTBITS_IN_LONG)));
622: if (e & ~EXPOBITS) err(adder5);
623: if (m) shift_left(z,z,2,lz-1, 0,m);
624: z[1] = evalsigne(sx) | e;
625: z[0] = evaltyp(t_REAL) | evallg(lz);
626: avma = (long)z; return z;
627: }
628:
629: GEN
630: mulss(long x, long y)
631: {
632: long s,p1;
633: GEN z;
634: LOCAL_HIREMAINDER;
635:
636: if (!x || !y) return gzero;
637: if (x<0) { s = -1; x = -x; } else s=1;
638: if (y<0) { s = -s; y = -y; }
639: p1 = mulll(x,y);
640: if (hiremainder)
641: {
642: z=cgeti(4); z[1] = evalsigne(s) | evallgefint(4);
643: z[2]=hiremainder; z[3]=p1; return z;
644: }
645: z=cgeti(3); z[1] = evalsigne(s) | evallgefint(3);
646: z[2]=p1; return z;
647: }
648: #endif
649:
650: GEN
651: muluu(ulong x, ulong y)
652: {
653: long p1;
654: GEN z;
655: LOCAL_HIREMAINDER;
656:
657: if (!x || !y) return gzero;
658: p1 = mulll(x,y);
659: if (hiremainder)
660: {
661: z=cgeti(4); z[1] = evalsigne(1) | evallgefint(4);
662: z[2]=hiremainder; z[3]=p1; return z;
663: }
664: z=cgeti(3); z[1] = evalsigne(1) | evallgefint(3);
665: z[2]=p1; return z;
666: }
667:
668: /* assume ny > 0 */
669: #ifdef INLINE
670: INLINE
671: #endif
672: GEN
673: mulsispec(long x, GEN y, long ny)
674: {
675: GEN yd, z = (GEN)avma;
676: long lz = ny+3;
677: LOCAL_HIREMAINDER;
678:
679: (void)new_chunk(lz);
680: yd = y + ny; *--z = mulll(x, *--yd);
681: while (yd > y) *--z = addmul(x,*--yd);
682: if (hiremainder) *--z = hiremainder; else lz--;
683: *--z = evalsigne(1) | evallgefint(lz);
684: *--z = evaltyp(t_INT) | evallg(lz);
685: avma=(long)z; return z;
686: }
687:
688: GEN
689: mului(ulong x, GEN y)
690: {
691: long s = signe(y);
692: GEN z;
693:
694: if (!s || !x) return gzero;
695: z = mulsispec(x, y+2, lgefint(y)-2);
696: setsigne(z,s); return z;
697: }
698:
699: #ifndef __M68K__
700:
701: GEN
702: mulsi(long x, GEN y)
703: {
704: long s = signe(y);
705: GEN z;
706:
707: if (!s || !x) return gzero;
708: if (x<0) { s = -s; x = -x; }
709: z = mulsispec(x, y+2, lgefint(y)-2);
710: setsigne(z,s); return z;
711: }
712:
713: GEN
714: mulsr(long x, GEN y)
715: {
716: long lx,i,s,garde,e,sh,m;
717: GEN z;
718: LOCAL_HIREMAINDER;
719:
720: if (!x) return gzero;
721: s = signe(y);
722: if (!s)
723: {
724: if (x<0) x = -x;
725: e = y[1] + (BITS_IN_LONG-1)-bfffo(x);
726: if (e & ~EXPOBITS) err(muler2);
727: z=cgetr(3); z[1]=e; z[2]=0; return z;
728: }
729: if (x<0) { s = -s; x = -x; }
730: if (x==1) { z=rcopy(y); setsigne(z,s); return z; }
731:
732: lx = lg(y); e = expo(y);
733: z=cgetr(lx); y--; garde=mulll(x,y[lx]);
734: for (i=lx-1; i>=3; i--) z[i]=addmul(x,y[i]);
735: z[2]=hiremainder;
736:
737: sh = bfffo(hiremainder); m = BITS_IN_LONG-sh;
738: if (sh) shift_left2(z,z, 2,lx-1, garde,sh,m);
739: e = evalexpo(m+e); if (e & ~EXPOBITS) err(muler2);
740: z[1] = evalsigne(s) | e; return z;
741: }
742:
743: GEN
744: mulrr(GEN x, GEN y)
745: {
746: long sx = signe(x), sy = signe(y);
747: long i,j,ly,lz,lzz,e,flag,p1;
748: ulong garde;
749: GEN z, y1;
750: LOCAL_HIREMAINDER;
751: LOCAL_OVERFLOW;
752:
753: e = evalexpo(expo(x)+expo(y)); if (e & ~EXPOBITS) err(muler4);
754: if (!sx || !sy) { z=cgetr(3); z[1]=e; z[2]=0; return z; }
755: if (sy<0) sx = -sx;
756: lz=lg(x); ly=lg(y);
757: if (lz>ly) { lz=ly; z=x; x=y; y=z; flag=1; } else flag = (lz!=ly);
758: z=cgetr(lz); z[1] = evalsigne(sx) | e;
759: if (lz==3)
760: {
761: if (flag)
762: {
763: (void)mulll(x[2],y[3]);
764: garde = addmul(x[2],y[2]);
765: }
766: else
767: garde = mulll(x[2],y[2]);
768: if ((long)hiremainder<0) { z[2]=hiremainder; z[1]++; }
769: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
770: return z;
771: }
772:
773: if (flag) { (void)mulll(x[2],y[lz]); garde = hiremainder; } else garde=0;
774: lzz=lz-1; p1=x[lzz];
775: if (p1)
776: {
777: (void)mulll(p1,y[3]);
778: garde = addll(addmul(p1,y[2]), garde);
779: z[lzz] = overflow+hiremainder;
780: }
781: else z[lzz]=0;
782: for (j=lz-2, y1=y-j; j>=3; j--)
783: {
784: p1 = x[j]; y1++;
785: if (p1)
786: {
787: (void)mulll(p1,y1[lz+1]);
788: garde = addll(addmul(p1,y1[lz]), garde);
789: for (i=lzz; i>j; i--)
790: {
791: hiremainder += overflow;
792: z[i] = addll(addmul(p1,y1[i]), z[i]);
793: }
794: z[j] = hiremainder+overflow;
795: }
796: else z[j]=0;
797: }
798: p1=x[2]; y1++;
799: garde = addll(mulll(p1,y1[lz]), garde);
800: for (i=lzz; i>2; i--)
801: {
802: hiremainder += overflow;
803: z[i] = addll(addmul(p1,y1[i]), z[i]);
804: }
805: z[2] = hiremainder+overflow;
806: if (z[2] < 0) z[1]++;
807: else
808: shift_left(z,z,2,lzz,garde, 1);
809: return z;
810: }
811:
812: GEN
813: mulir(GEN x, GEN y)
814: {
815: long sx=signe(x),sy,lz,lzz,ey,e,p1,i,j;
816: ulong garde;
817: GEN z,y1;
818: LOCAL_HIREMAINDER;
819: LOCAL_OVERFLOW;
820:
821: if (!sx) return gzero;
822: if (!is_bigint(x)) return mulsr(itos(x),y);
823: sy=signe(y); ey=expo(y);
824: if (!sy)
825: {
826: e = evalexpo(expi(x)+ey);
827: if (e & ~EXPOBITS) err(muler6);
828: z=cgetr(3); z[1]=e; z[2]=0; return z;
829: }
830:
831: if (sy<0) sx = -sx;
832: lz=lg(y); z=cgetr(lz);
833: y1=cgetr(lz+1);
834: affir(x,y1); x=y; y=y1;
835: e = evalexpo(expo(y)+ey);
836: if (e & ~EXPOBITS) err(muler4);
837: z[1] = evalsigne(sx) | e;
838: if (lz==3)
839: {
840: (void)mulll(x[2],y[3]);
841: garde=addmul(x[2],y[2]);
842: if ((long)hiremainder < 0) { z[2]=hiremainder; z[1]++; }
843: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
844: avma=(long)z; return z;
845: }
846:
847: (void)mulll(x[2],y[lz]); garde=hiremainder;
848: lzz=lz-1; p1=x[lzz];
849: if (p1)
850: {
851: (void)mulll(p1,y[3]);
852: garde=addll(addmul(p1,y[2]),garde);
853: z[lzz] = overflow+hiremainder;
854: }
855: else z[lzz]=0;
856: for (j=lz-2, y1=y-j; j>=3; j--)
857: {
858: p1=x[j]; y1++;
859: if (p1)
860: {
861: (void)mulll(p1,y1[lz+1]);
862: garde = addll(addmul(p1,y1[lz]), garde);
863: for (i=lzz; i>j; i--)
864: {
865: hiremainder += overflow;
866: z[i] = addll(addmul(p1,y1[i]), z[i]);
867: }
868: z[j] = hiremainder+overflow;
869: }
870: else z[j]=0;
871: }
872: p1=x[2]; y1++;
873: garde = addll(mulll(p1,y1[lz]), garde);
874: for (i=lzz; i>2; i--)
875: {
876: hiremainder += overflow;
877: z[i] = addll(addmul(p1,y1[i]), z[i]);
878: }
879: z[2] = hiremainder+overflow;
880: if (z[2] < 0) z[1]++;
881: else
882: shift_left(z,z,2,lzz,garde, 1);
883: avma=(long)z; return z;
884: }
885:
886: /* written by Bruno Haible following an idea of Robert Harley */
887: long
888: vals(ulong z)
889: {
890: static char tab[64]={-1,0,1,12,2,6,-1,13,3,-1,7,-1,-1,-1,-1,14,10,4,-1,-1,8,-1,-1,25,-1,-1,-1,-1,-1,21,27,15,31,11,5,-1,-1,-1,-1,-1,9,-1,-1,24,-1,-1,20,26,30,-1,-1,-1,-1,23,-1,19,29,-1,22,18,28,17,16,-1};
891: #ifdef LONG_IS_64BIT
892: long s;
893: #endif
894:
895: if (!z) return -1;
896: #ifdef LONG_IS_64BIT
897: if (! (z&0xffffffff)) { s = 32; z >>=32; } else s = 0;
898: #endif
899: z |= ~z + 1;
900: z += z << 4;
901: z += z << 6;
902: z ^= z << 16; /* or z -= z<<16 */
903: #ifdef LONG_IS_64BIT
904: return s + tab[(z&0xffffffff)>>26];
905: #else
906: return tab[z>>26];
907: #endif
908: }
909:
910: GEN
911: modss(long x, long y)
912: {
913: LOCAL_HIREMAINDER;
914:
915: if (!y) err(moder1);
916: if (y<0) y=-y;
917: hiremainder=0; divll(labs(x),y);
918: if (!hiremainder) return gzero;
919: return (x < 0) ? stoi(y-hiremainder) : stoi(hiremainder);
920: }
921:
922: GEN
923: resss(long x, long y)
924: {
925: LOCAL_HIREMAINDER;
926:
927: if (!y) err(reser1);
928: hiremainder=0; divll(labs(x),labs(y));
929: return (x < 0) ? stoi(-((long)hiremainder)) : stoi(hiremainder);
930: }
931:
932: GEN
933: divsi(long x, GEN y)
934: {
935: long p1, s = signe(y);
936: LOCAL_HIREMAINDER;
937:
938: if (!s) err(diver2);
939: if (!x || lgefint(y)>3 || ((long)y[2])<0)
940: {
941: hiremainder=x; SAVE_HIREMAINDER; return gzero;
942: }
943: hiremainder=0; p1=divll(labs(x),y[2]);
944: if (x<0) { hiremainder = -((long)hiremainder); p1 = -p1; }
945: if (s<0) p1 = -p1;
946: SAVE_HIREMAINDER; return stoi(p1);
947: }
948: #endif
949:
950: GEN
951: modui(ulong x, GEN y)
952: {
953: LOCAL_HIREMAINDER;
954:
955: if (!signe(y)) err(diver2);
956: if (!x || lgefint(y)>3) hiremainder=x;
957: else
958: {
959: hiremainder=0; (void)divll(x,y[2]);
960: }
961: return utoi(hiremainder);
962: }
963:
964: GEN
965: modiu(GEN y, ulong x)
966: {
967: long sy=signe(y),ly,i;
968: LOCAL_HIREMAINDER;
969:
970: if (!x) err(diver4);
971: if (!sy) return gzero;
972: ly = lgefint(y);
973: if (x <= (ulong)y[2]) hiremainder=0;
974: else
975: {
976: if (ly==3) return utoi((sy > 0)? (ulong)y[2]: x - (ulong)y[2]);
977: hiremainder=y[2]; ly--; y++;
978: }
979: for (i=2; i<ly; i++) (void)divll(y[i],x);
980: return utoi((sy > 0)? hiremainder: x - hiremainder);
981: }
982:
983: #ifndef __M68K__
984:
985: GEN
986: modsi(long x, GEN y)
987: {
988: long s = signe(y);
989: GEN p1;
990: LOCAL_HIREMAINDER;
991:
992: if (!s) err(diver2);
993: if (!x || lgefint(y)>3 || ((long)y[2])<0) hiremainder=x;
994: else
995: {
996: hiremainder=0; (void)divll(labs(x),y[2]);
997: if (x<0) hiremainder = -((long)hiremainder);
998: }
999: if (!hiremainder) return gzero;
1000: if (x>0) return stoi(hiremainder);
1001: if (s<0)
1002: { setsigne(y,1); p1=addsi(hiremainder,y); setsigne(y,-1); }
1003: else
1004: p1=addsi(hiremainder,y);
1005: return p1;
1006: }
1007:
1008: GEN
1009: divis(GEN y, long x)
1010: {
1011: long sy=signe(y),ly,s,i;
1012: GEN z;
1013: LOCAL_HIREMAINDER;
1014:
1015: if (!x) err(diver4);
1016: if (!sy) { hiremainder=0; SAVE_HIREMAINDER; return gzero; }
1017: if (x<0) { s = -sy; x = -x; } else s=sy;
1018:
1019: ly = lgefint(y);
1020: if ((ulong)x <= (ulong)y[2]) hiremainder=0;
1021: else
1022: {
1023: if (ly==3) { hiremainder=itos(y); SAVE_HIREMAINDER; return gzero; }
1024: hiremainder=y[2]; ly--; y++;
1025: }
1026: z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
1027: for (i=2; i<ly; i++) z[i]=divll(y[i],x);
1028: if (sy<0) hiremainder = - ((long)hiremainder);
1029: SAVE_HIREMAINDER; return z;
1030: }
1031:
1032: GEN
1033: divir(GEN x, GEN y)
1034: {
1035: GEN xr,z;
1036: long av,ly;
1037:
1038: if (!signe(y)) err(diver5);
1039: if (!signe(x)) return gzero;
1040: ly=lg(y); z=cgetr(ly); av=avma; xr=cgetr(ly+1); affir(x,xr);
1041: affrr(divrr(xr,y),z); avma=av; return z;
1042: }
1043:
1044: GEN
1045: divri(GEN x, GEN y)
1046: {
1047: GEN yr,z;
1048: long av,lx,s=signe(y);
1049:
1050: if (!s) err(diver8);
1051: if (!signe(x))
1052: {
1053: const long ex = evalexpo(expo(x) - expi(y));
1054:
1055: if (ex<0) err(diver12);
1056: z=cgetr(3); z[1]=ex; z[2]=0; return z;
1057: }
1058: if (!is_bigint(y)) return divrs(x, s>0? y[2]: -y[2]);
1059:
1060: lx=lg(x); z=cgetr(lx);
1061: av=avma; yr=cgetr(lx+1); affir(y,yr);
1062: affrr(divrr(x,yr),z); avma=av; return z;
1063: }
1064:
1065: void
1066: diviiz(GEN x, GEN y, GEN z)
1067: {
1068: long av=avma,lz;
1069: GEN xr,yr;
1070:
1071: if (typ(z)==t_INT) { affii(divii(x,y),z); avma=av; return; }
1072: lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr);
1073: affrr(divrr(xr,yr),z); avma=av;
1074: }
1075:
1076: void
1077: mpdivz(GEN x, GEN y, GEN z)
1078: {
1079: long av=avma;
1080:
1081: if (typ(z)==t_INT)
1082: {
1083: if (typ(x)==t_REAL || typ(y)==t_REAL) err(divzer1);
1084: affii(divii(x,y),z); avma=av; return;
1085: }
1086: if (typ(x)==t_INT)
1087: {
1088: GEN xr,yr;
1089: long lz;
1090:
1091: if (typ(y)==t_REAL) { affrr(divir(x,y),z); avma=av; return; }
1092: lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr);
1093: affrr(divrr(xr,yr),z); avma=av; return;
1094: }
1095: if (typ(y)==t_REAL) { affrr(divrr(x,y),z); avma=av; return; }
1096: affrr(divri(x,y),z); avma=av;
1097: }
1098:
1099: GEN
1100: divsr(long x, GEN y)
1101: {
1102: long av,ly;
1103: GEN p1,z;
1104:
1105: if (!signe(y)) err(diver3);
1106: if (!x) return gzero;
1107: ly=lg(y); z=cgetr(ly); av=avma;
1108: p1=cgetr(ly+1); affsr(x,p1); affrr(divrr(p1,y),z);
1109: avma=av; return z;
1110: }
1111:
1112: GEN
1113: modii(GEN x, GEN y)
1114: {
1115: switch(signe(x))
1116: {
1117: case 0: return gzero;
1118: case 1: return resii(x,y);
1119: default:
1120: {
1121: long av = avma;
1122: (void)new_chunk(lgefint(y));
1123: x = resii(x,y); avma=av;
1124: if (x==gzero) return x;
1125: return subiispec(y+2,x+2,lgefint(y)-2,lgefint(x)-2);
1126: }
1127: }
1128: }
1129:
1130: void
1131: modiiz(GEN x, GEN y, GEN z)
1132: {
1133: const long av = avma;
1134: affii(modii(x,y),z); avma=av;
1135: }
1136:
1137: GEN
1138: divrs(GEN x, long y)
1139: {
1140: long i,lx,ex,garde,sh,s=signe(x);
1141: GEN z;
1142: LOCAL_HIREMAINDER;
1143:
1144: if (!y) err(diver6);
1145: if (!s)
1146: {
1147: z=cgetr(3); z[1] = x[1] - (BITS_IN_LONG-1) + bfffo(y);
1148: if (z[1]<0) err(diver7);
1149: z[2]=0; return z;
1150: }
1151: if (y<0) { s = -s; y = -y; }
1152: if (y==1) { z=rcopy(x); setsigne(z,s); return z; }
1153:
1154: z=cgetr(lx=lg(x)); hiremainder=0;
1155: for (i=2; i<lx; i++) z[i]=divll(x[i],y);
1156:
1157: /* we may have hiremainder != 0 ==> garde */
1158: garde=divll(0,y); sh=bfffo(z[2]); ex=evalexpo(expo(x)-sh);
1159: if (ex & ~EXPOBITS) err(diver7);
1160: if (sh) shift_left(z,z, 2,lx-1, garde,sh);
1161: z[1] = evalsigne(s) | ex;
1162: return z;
1163: }
1164:
1165: GEN
1166: divrr(GEN x, GEN y)
1167: {
1168: long sx=signe(x), sy=signe(y), lx,ly,lz,e,i,j;
1169: ulong si,saux;
1170: GEN z,x1;
1171:
1172: if (!sy) err(diver9);
1173: e = evalexpo(expo(x) - expo(y));
1174: if (e & ~EXPOBITS) err(diver11);
1175: if (!sx) { z=cgetr(3); z[1]=e; z[2]=0; return z; }
1176: if (sy<0) sx = -sx;
1177: e = evalsigne(sx) | e;
1178: lx=lg(x); ly=lg(y);
1179: if (ly==3)
1180: {
1181: ulong k = x[2], l = (lx>3)? x[3]: 0;
1182: LOCAL_HIREMAINDER;
1183: if (k < (ulong)y[2]) e--;
1184: else
1185: {
1186: l >>= 1; if (k&1) l |= HIGHBIT;
1187: k >>= 1;
1188: }
1189: z=cgetr(3); z[1]=e;
1190: hiremainder=k; z[2]=divll(l,y[2]); return z;
1191: }
1192:
1193: lz = (lx<=ly)? lx: ly;
1194: x1 = (z=new_chunk(lz))-1;
1195: x1[1]=0; for (i=2; i<lz; i++) x1[i]=x[i];
1196: x1[lz] = (lx>lz)? x[lz]: 0;
1197: x=z; si=y[2]; saux=y[3];
1198: for (i=0; i<lz-1; i++)
1199: { /* x1 = x + (i-1) */
1200: ulong k,qp;
1201: LOCAL_HIREMAINDER;
1202: LOCAL_OVERFLOW;
1203: if ((ulong)x1[1] == si)
1204: {
1205: qp = MAXULONG; k=addll(si,x1[2]);
1206: }
1207: else
1208: {
1209: if ((ulong)x1[1] > si) /* can't happen if i=0 */
1210: {
1211: GEN y1 = y+1;
1212: overflow=0;
1213: for (j=lz-i; j>0; j--) x1[j] = subllx(x1[j],y1[j]);
1214: j=i; do x[--j]++; while (j && !x[j]);
1215: }
1216: hiremainder=x1[1]; overflow=0;
1217: qp=divll(x1[2],si); k=hiremainder;
1218: }
1219: if (!overflow)
1220: {
1221: long k3 = subll(mulll(qp,saux), x1[3]);
1222: long k4 = subllx(hiremainder,k);
1223: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
1224: }
1225: j = lz-i+1;
1226: if (j<ly) (void)mulll(qp,y[j]); else { hiremainder=0 ; j=ly; }
1227: for (j--; j>1; j--)
1228: {
1229: x1[j] = subll(x1[j], addmul(qp,y[j]));
1230: hiremainder += overflow;
1231: }
1232: if (x1[1] != hiremainder)
1233: {
1234: if ((ulong)x1[1] < hiremainder)
1235: {
1236: qp--;
1237: overflow=0; for (j=lz-i; j>1; j--) x1[j]=addllx(x1[j], y[j]);
1238: }
1239: else
1240: {
1241: x1[1] -= hiremainder;
1242: while (x1[1])
1243: {
1244: qp++; if (!qp) { j=i; do x[--j]++; while (j && !x[j]); }
1245: overflow=0; for (j=lz-i; j>1; j--) x1[j]=subllx(x1[j],y[j]);
1246: x1[1] -= overflow;
1247: }
1248: }
1249: }
1250: x1[1]=qp; x1++;
1251: }
1252: x1 = x-1; for (j=lz-1; j>=2; j--) x[j]=x1[j];
1253: if (*x) { shift_right(x,x, 2,lz, 1,1); } else e--;
1254: x[0]=evaltyp(t_REAL)|evallg(lz);
1255: x[1]=e; return x;
1256: }
1257: #endif /* !defined(__M68K__) */
1258: /* The following ones are not in mp.s (mulii is, with a different algorithm) */
1259:
1260: /* Integer division x / y:
1261: * if z = ONLY_REM return remainder, otherwise return quotient
1262: * if z != NULL set *z to remainder
1263: * *z is the last object on stack (and thus can be disposed of with cgiv
1264: * instead of gerepile)
1265: * If *z is zero, we put gzero here and no copy.
1266: * space needed: lx + ly
1267: */
1268: GEN
1269: dvmdii(GEN x, GEN y, GEN *z)
1270: {
1271: long sx=signe(x),sy=signe(y);
1272: long av,lx,ly,lz,i,j,sh,k,lq,lr;
1273: ulong si,qp,saux, *xd,*rd,*qd;
1274: GEN r,q,x1;
1275:
1276: if (!sy) err(dvmer1);
1277: if (!sx)
1278: {
1279: if (!z || z == ONLY_REM) return gzero;
1280: *z=gzero; return gzero;
1281: }
1282: lx=lgefint(x);
1283: ly=lgefint(y); lz=lx-ly;
1284: if (lz <= 0)
1285: {
1286: if (lz == 0)
1287: {
1288: for (i=2; i<lx; i++)
1289: if (x[i] != y[i])
1290: {
1291: if ((ulong)x[i] > (ulong)y[i]) goto DIVIDE;
1292: goto TRIVIAL;
1293: }
1294: if (z == ONLY_REM) return gzero;
1295: if (z) *z = gzero;
1296: if (sx < 0) sy = -sy;
1297: return stoi(sy);
1298: }
1299: TRIVIAL:
1300: if (z == ONLY_REM) return icopy(x);
1301: if (z) *z = icopy(x);
1302: return gzero;
1303: }
1304: DIVIDE: /* quotient is non-zero */
1305: av=avma; if (sx<0) sy = -sy;
1306: if (ly==3)
1307: {
1308: LOCAL_HIREMAINDER;
1309: si = y[2];
1310: if (si <= (ulong)x[2]) hiremainder=0;
1311: else
1312: {
1313: hiremainder = x[2]; lx--; x++;
1314: }
1315: q = new_chunk(lx); for (i=2; i<lx; i++) q[i]=divll(x[i],si);
1316: if (z == ONLY_REM)
1317: {
1318: avma=av; if (!hiremainder) return gzero;
1319: r=cgeti(3);
1320: r[1] = evalsigne(sx) | evallgefint(3);
1321: r[2]=hiremainder; return r;
1322: }
1323: q[1] = evalsigne(sy) | evallgefint(lx);
1324: q[0] = evaltyp(t_INT) | evallg(lx);
1325: if (!z) return q;
1326: if (!hiremainder) { *z=gzero; return q; }
1327: r=cgeti(3);
1328: r[1] = evalsigne(sx) | evallgefint(3);
1329: r[2] = hiremainder; *z=r; return q;
1330: }
1331:
1332: x1 = new_chunk(lx); sh = bfffo(y[2]);
1333: if (sh)
1334: { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
1335: register const ulong m = BITS_IN_LONG - sh;
1336: r = new_chunk(ly);
1337: shift_left2(r, y,2,ly-1, 0,sh,m); y = r;
1338: shift_left2(x1,x,2,lx-1, 0,sh,m);
1339: x1[1] = ((ulong)x[2]) >> m;
1340: }
1341: else
1342: {
1343: x1[1]=0; for (j=2; j<lx; j++) x1[j]=x[j];
1344: }
1345: x=x1; si=y[2]; saux=y[3];
1346: for (i=0; i<=lz; i++)
1347: { /* x1 = x + i */
1348: LOCAL_HIREMAINDER;
1349: LOCAL_OVERFLOW;
1350: if ((ulong)x1[1] == si)
1351: {
1352: qp = MAXULONG; k=addll(si,x1[2]);
1353: }
1354: else
1355: {
1356: hiremainder=x1[1]; overflow=0;
1357: qp=divll(x1[2],si); k=hiremainder;
1358: }
1359: if (!overflow)
1360: {
1361: long k3 = subll(mulll(qp,saux), x1[3]);
1362: long k4 = subllx(hiremainder,k);
1363: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
1364: }
1365: hiremainder=0;
1366: for (j=ly-1; j>1; j--)
1367: {
1368: x1[j] = subll(x1[j], addmul(qp,y[j]));
1369: hiremainder+=overflow;
1370: }
1371: if ((ulong)x1[1] < hiremainder)
1372: {
1373: overflow=0; qp--;
1374: for (j=ly-1; j>1; j--) x1[j] = addllx(x1[j],y[j]);
1375: }
1376: x1[1]=qp; x1++;
1377: }
1378:
1379: lq = lz+2;
1380: if (!z)
1381: {
1382: qd = (ulong*)av;
1383: xd = (ulong*)(x + lq);
1384: if (x[1]) { lz++; lq++; }
1385: while (lz--) *--qd = *--xd;
1386: *--qd = evalsigne(sy) | evallgefint(lq);
1387: *--qd = evaltyp(t_INT) | evallg(lq);
1388: avma = (long)qd; return (GEN)qd;
1389: }
1390:
1391: j=lq; while (j<lx && !x[j]) j++;
1392: if (z == ONLY_REM)
1393: {
1394: lz = lx-j; if (lz==0) { avma = av; return gzero; }
1395: rd = (ulong*)av; lr = lz+2;
1396: xd = (ulong*)(x + lx);
1397: if (!sh) while (lz--) *--rd = *--xd;
1398: else
1399: { /* shift remainder right by sh bits */
1400: const ulong shl = BITS_IN_LONG - sh;
1401: ulong l;
1402: xd--;
1403: while (--lz) /* fill r[3..] */
1404: {
1405: l = *xd >> sh;
1406: *--rd = l | (*--xd << shl);
1407: }
1408: l = *xd >> sh;
1409: if (l) *--rd = l; else lr--;
1410: }
1411: *--rd = evalsigne(sx) | evallgefint(lr);
1412: *--rd = evaltyp(t_INT) | evallg(lr);
1413: avma = (long)rd; return (GEN)rd;
1414: }
1415:
1416: lz = lx-j; lr = lz+2;
1417: if (lz)
1418: {
1419: xd = (ulong*)(x + lx);
1420: if (!sh)
1421: {
1422: rd = (ulong*)avma; r = new_chunk(lr);
1423: while (lz--) *--rd = *--xd;
1424: }
1425: else
1426: { /* shift remainder right by sh bits */
1427: const ulong shl = BITS_IN_LONG - sh;
1428: ulong l;
1429: rd = (ulong*)x; /* overwrite shifted y */
1430: xd--;
1431: while (--lz)
1432: {
1433: l = *xd >> sh;
1434: *--rd = l | (*--xd << shl);
1435: }
1436: l = *xd >> sh;
1437: if (l) *--rd = l; else lr--;
1438: }
1439: *--rd = evalsigne(sx) | evallgefint(lr);
1440: *--rd = evaltyp(t_INT) | evallg(lr);
1441: rd += lr;
1442: }
1443: qd = (ulong*)av;
1444: xd = (ulong*)(x + lq);
1445: if (x[1]) lq++;
1446: j = lq-2; while (j--) *--qd = *--xd;
1447: *--qd = evalsigne(sy) | evallgefint(lq);
1448: *--qd = evaltyp(t_INT) | evallg(lq);
1449: q = (GEN)qd;
1450: if (lr==2) *z = gzero;
1451: else
1452: {
1453: while (lr--) *--qd = *--rd;
1454: *z = (GEN)qd;
1455: }
1456: avma = (long)qd; return q;
1457: }
1458:
1459: /* assume y > x > 0. return y mod x */
1460: ulong
1461: mppgcd_resiu(GEN y, ulong x)
1462: {
1463: long i, ly = lgefint(y);
1464: LOCAL_HIREMAINDER;
1465:
1466: hiremainder = 0;
1467: for (i=2; i<ly; i++) (void)divll(y[i],x);
1468: return hiremainder;
1469: }
1470:
1471: /* Assume x>y>0, both of them odd. return x-y if x=y mod 4, x+y otherwise */
1472: void
1473: mppgcd_plus_minus(GEN x, GEN y, GEN res)
1474: {
1475: long av = avma;
1476: long lx = lgefint(x)-1;
1477: long ly = lgefint(y)-1, lt,m,i;
1478: GEN t;
1479:
1480: if ((x[lx]^y[ly]) & 3) /* x != y mod 4*/
1481: t = addiispec(x+2,y+2,lx-1,ly-1);
1482: else
1483: t = subiispec(x+2,y+2,lx-1,ly-1);
1484:
1485: lt = lgefint(t)-1; while (!t[lt]) lt--;
1486: m = vals(t[lt]); lt++;
1487: if (m == 0) /* 2^32 | t */
1488: {
1489: for (i = 2; i < lt; i++) res[i] = t[i];
1490: }
1491: else if (t[2] >> m)
1492: {
1493: shift_right(res,t, 2,lt, 0,m);
1494: }
1495: else
1496: {
1497: lt--; t++;
1498: shift_right(res,t, 2,lt, t[1],m);
1499: }
1500: res[1] = evalsigne(1)|evallgefint(lt);
1501: avma = av;
1502: }
1503:
1504: /* private version of mulss */
1505: ulong
1506: smulss(ulong x, ulong y, ulong *rem)
1507: {
1508: LOCAL_HIREMAINDER;
1509: x=mulll(x,y); *rem=hiremainder; return x;
1510: }
1511:
1512: #ifdef LONG_IS_64BIT
1513: # define DIVCONVI 7
1514: #else
1515: # define DIVCONVI 14
1516: #endif
1517:
1518: /* Conversion entier --> base 10^9. Assume x > 0 */
1519: GEN
1520: convi(GEN x)
1521: {
1522: ulong av=avma, lim;
1523: long lz;
1524: GEN z,p1;
1525:
1526: lz = 3 + ((lgefint(x)-2)*15)/DIVCONVI;
1527: z=new_chunk(lz); z[1] = -1; p1 = z+2;
1528: lim = stack_lim(av,1);
1529: for(;;)
1530: {
1531: x = divis(x,1000000000); *p1++ = hiremainder;
1532: if (!signe(x)) { avma=av; return p1; }
1533: if (low_stack(lim, stack_lim(av,1))) x = gerepileuptoint((long)z,x);
1534: }
1535: }
1536: #undef DIVCONVI
1537:
1538: /* Conversion partie fractionnaire --> base 10^9 (expo(x)<0) */
1539: GEN
1540: confrac(GEN x)
1541: {
1542: long lx=lg(x), ex = -expo(x)-1, d,m,ly,ey,lr,nbdec,i,j;
1543: GEN y,y1;
1544:
1545: ey = ex + bit_accuracy(lx);
1546: ly = 1 + ((ey-1) >> TWOPOTBITS_IN_LONG);
1547: d = ex >> TWOPOTBITS_IN_LONG;
1548: m = ex & (BITS_IN_LONG-1);
1549: y = new_chunk(ly); y1 = y + (d-2);
1550: while (d--) y[d]=0;
1551: if (!m)
1552: for (i=2; i<lx; i++) y1[i] = x[i];
1553: else
1554: { /* shift x left sh bits */
1555: const ulong sh=BITS_IN_LONG-m;
1556: ulong k=0, l;
1557: for (i=2; i<lx; i++)
1558: {
1559: l = x[i];
1560: y1[i] = (l>>m)|k;
1561: k = l<<sh;
1562: }
1563: y1[i] = k;
1564: }
1565: nbdec = (long) (ey*L2SL10)+1; lr=(nbdec+17)/9;
1566: y1=new_chunk(lr); *y1=nbdec;
1567: for (j=1; j<lr; j++)
1568: {
1569: LOCAL_HIREMAINDER;
1570: hiremainder=0;
1571: for (i=ly-1; i>=0; i--) y[i]=addmul(y[i],1000000000);
1572: y1[j]=hiremainder;
1573: }
1574: return y1;
1575: }
1576:
1577: GEN
1578: truedvmdii(GEN x, GEN y, GEN *z)
1579: {
1580: long av=avma,tetpil;
1581: GEN res, qu = dvmdii(x,y,&res);
1582: GEN *gptr[2];
1583:
1584: if (signe(res)>=0)
1585: {
1586: if (z == ONLY_REM) return gerepileuptoint(av,res);
1587: if (z) *z = res; else cgiv(res);
1588: return qu;
1589: }
1590:
1591: tetpil=avma;
1592: if (z == ONLY_REM)
1593: {
1594: res = subiispec(y+2,res+2, lgefint(y)-2,lgefint(res)-2);
1595: return gerepile(av,tetpil,res);
1596: }
1597: qu = addsi(-signe(y),qu);
1598: if (!z) return gerepile(av,tetpil,qu);
1599:
1600: *z = subiispec(y+2,res+2, lgefint(y)-2,lgefint(res)-2);
1601: gptr[0]=&qu; gptr[1]=z; gerepilemanysp(av,tetpil,gptr,2);
1602: return qu;
1603: }
1604:
1605: #if 0
1606: /* Exact integer division */
1607:
1608: static ulong
1609: invrev(ulong b)
1610: /* Find c such that 1=c*b mod B (where B = 2^32 or 2^64), assuming b odd,
1611: which is not checked */
1612: {
1613: int r;
1614: ulong x;
1615:
1616: r=b&7; x=(r==3 || r==5)? b+8: b; /* x=b^(-1) mod 2^4 */
1617: x=x*(2-b*x); x=x*(2-b*x); x=x*(2-b*x); /* x=b^(-1) mod 2^32 */
1618: #ifdef LONG_IS_64BIT
1619: x=x*(2-b*x); /* x=b^(-1) mod 2^64 */
1620: #endif
1621: return x;
1622: }
1623:
1624: #define divllrev(a,b) (((ulong)a)*invrev(b))
1625:
1626: /* 2-adic division */
1627: GEN
1628: diviirev(GEN x, GEN y, long a)
1629: /* Find z such that |x|=|y|*z mod B^a, where a<=lgefint(x)-2 */
1630: {
1631: long lx,lgx,ly,vy,av=avma,tetpil,i,j,ii;
1632: ulong binv,q;
1633: GEN z;
1634:
1635: if (!signe(y)) err(dvmer1);
1636: if (!signe(x)) return gzero;
1637: /* make y odd */
1638: vy=vali(y);
1639: if (vy)
1640: {
1641: if (vali(x)<vy) err(talker,"impossible division in diviirev");
1642: y=shifti(y,-vy); x=shifti(x,-vy); a-=(vy>>TWOPOTBITS_IN_LONG);
1643: }
1644: else x=icopy(x); /* necessary because we destroy x */
1645: /* improve the above by touching only a words */
1646: if (a<=0) {avma=a;return gzero;}
1647: /* now y is odd */
1648: lx=a+2; ly=lgefint(y); lgx=lgefint(x);
1649: if (lx>lgx) err(talker,"3rd parameter too large in diviirev");
1650: binv=invrev(y[ly-1]);
1651: z=cgeti(lx);
1652: for (ii=lgx-1,i=lx-1; i>=2; i--,ii--)
1653: {
1654: long limj;
1655: LOCAL_HIREMAINDER;
1656: LOCAL_OVERFLOW;
1657:
1658: z[i]=q=binv*((ulong)x[ii]); /* this is the i-th quotient */
1659: limj=max(lgx-a,3+ii-ly);
1660: mulll(q,y[ly-1]); overflow=0;
1661: for (j=ii-1; j>=limj; j--)
1662: x[j]=subllx(x[j],addmul(q,y[j+ly-ii-1]));
1663: }
1664: tetpil=avma; i=2; while((i<lx)&&(!z[i])) i++;
1665: if (i==lx) {avma=av; return gzero;}
1666: y=cgeti(lx-i+2); y[1]=evalsigne(1)|evallgefint(lx-i+2); j=2;
1667: for ( ; i<lx; i++) y[j++]=z[i];
1668: return gerepile(av,tetpil,y);
1669: }
1670:
1671: GEN
1672: diviiexactfullrev(GEN x, GEN y)
1673: /* Find z such that x=y*z knowing that y divides x */
1674: {
1675: long lx,lz,ly,vy,av=avma,tetpil,i,j,ii,sx=signe(x),sy=signe(y);
1676: ulong binv,q;
1677: GEN z;
1678:
1679: if (!sy) err(dvmer1);
1680: if (!sx) return gzero;
1681: /* make y odd */
1682: vy=vali(y);
1683: if (vy)
1684: {
1685: if (vali(x)<vy) err(talker,"impossible division in diviirev");
1686: y=shifti(y,-vy); x=shifti(x,-vy);
1687: }
1688: else x=icopy(x); /* necessary because we destroy x */
1689: /* now y is odd */
1690: ly=lgefint(y); lx=lgefint(x);
1691: if (ly>lx) err(talker,"impossible division in diviirev");
1692: binv=invrev(y[ly-1]);
1693: i=2; while (i<ly && y[i]==x[i]) i++;
1694: lz=(i==ly || y[i]<x[i]) ? lx-ly+3 : lx-ly+2;
1695: z=cgeti(lz);
1696: for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
1697: {
1698: long limj;
1699: LOCAL_HIREMAINDER;
1700: LOCAL_OVERFLOW;
1701:
1702: z[i]=q=binv*((ulong)x[ii]); /* this is the i-th quotient */
1703: limj=max(lx-lz+2,3+ii-ly);
1704: mulll(q,y[ly-1]); overflow=0;
1705: for (j=ii-1; j>=limj; j--)
1706: x[j]=subllx(x[j],addmul(q,y[j+ly-ii-1]));
1707: }
1708: tetpil=avma; i=2; while((i<lz)&&(!z[i])) i++;
1709: if (i==lz) err(talker,"bug in diviiexact");
1710: y=cgeti(lz-i+2); y[1]=evalsigne(sx*sy) | evallgefint(lz-i+2); j=2;
1711: for ( ; i<lz; i++) y[j++]=z[i];
1712: return gerepile(av,tetpil,y);
1713: }
1714:
1715: GEN
1716: diviiexact2(GEN x, GEN y)
1717: /* Find z such that x=y*z assuming y divides x (which is not checked) */
1718: {
1719: long sx=signe(x),sy=signe(y),av=avma,tetpil,lyinv,lpr,a,lx,ly,lz,lzs,lp1;
1720: long i,j,vy;
1721: ulong aux;
1722: GEN yinv,p1,z,xinit,yinit;
1723:
1724: if (!sy) err(dvmer1);
1725: if (!sx) return gzero;
1726: xinit=x; yinit=y;
1727: setsigne(y,1);setsigne(x,1);
1728: /* make y odd */
1729: vy=vali(y);
1730: if (vy)
1731: {
1732: if (vali(x)<vy) err(talker,"impossible division in diviirev");
1733: y=shifti(y,-vy); x=shifti(x,-vy);
1734: }
1735: /* now y is odd */
1736: ly=lgefint(y); lx=lgefint(x);
1737: if (lx<ly) err(talker,"not an exact division in diviiexact");
1738: a=lx-ly+1; lz=a+2;
1739: aux=invrev(y[ly-1]);
1740: if (aux & HIGHBIT) {yinv=stoi(aux^HIGHBIT);yinv[2]|=HIGHBIT;}
1741: else yinv=stoi(aux); /* inverse of y mod 2^32 (or 2^64) */
1742: lpr=1; /* current accuracy */
1743: while(lpr<a)
1744: {
1745: long lycut;
1746: GEN ycut;
1747:
1748: lyinv=lgefint(yinv);
1749: lycut=min(2*lpr+2,ly);
1750: ycut=cgeti(lycut); ycut[1]=evalsigne(1) | evallgefint(lycut);
1751: for(i=2; i<lycut; i++) ycut[i]=y[ly+i-lycut];
1752: p1=mulii(yinv,ycut); lp1=lgefint(p1);
1753: if (lp1>lpr+2)
1754: {
1755: long lp1cut,lynew,lp2;
1756: GEN p1cut,p2,ynew;
1757: LOCAL_OVERFLOW;
1758:
1759: lp1cut=min(lp1-lpr,lpr+2);
1760: p1cut=cgeti(lp1cut); p1cut[1]=evalsigne(1) | evallgefint(lp1cut);
1761: overflow=0;
1762: for (i=lp1cut-1; i>=2; i--) p1cut[i]=subllx(0,p1[i+lp1-lpr-lp1cut]);
1763: p2=mulii(p1cut,yinv); lp2=lgefint(p2);
1764: lynew=(lp2<=lpr+2) ? lpr+lp2 : 2*lpr+2;
1765: ynew=cgeti(lynew); ynew[1]=evalsigne(1) | evallgefint(lynew);
1766: for (i=lynew-1; i>=lynew-lpr; i--) ynew[i]=yinv[i+lyinv-lynew];
1767: for (i=lynew-lpr-1; i>=2; i--) ynew[i]=p2[i+lp2+lpr-lynew];
1768: yinv=ynew;
1769: }
1770: lpr<<=1;
1771: }
1772: lyinv=lgefint(yinv); lzs=min(lz,lyinv);
1773: z=cgeti(lzs); z[1]=evalsigne(1) | evallgefint(lzs);
1774: for(i=2; i<lzs; i++) z[i]=yinv[i+lyinv-lzs];
1775: p1=mulii(z,x); lp1=lgefint(p1); lzs=min(lz,lp1);
1776: z=cgeti(lzs);
1777: for (i=2; i<lzs; i++) z[i]=p1[i+lp1-lzs];
1778: i=2; while (i<lzs && !z[i]) i++;
1779: if (i==lzs) err(talker,"bug in diviiexact");
1780: tetpil=avma; p1=cgeti(lzs-i+2);
1781: p1[1]=evalsigne(sx*sy) | evallgefint(lzs-i+2);
1782: for (j=2; j<=lzs-i+1; j++) p1[j]=z[j+i-2];
1783: setsigne(xinit,sx);setsigne(yinit,sy);
1784: return gerepile(av,tetpil,p1);
1785: }
1786: #endif
1787:
1788: long
1789: smodsi(long x, GEN y)
1790: {
1791: if (x<0) err(talker,"negative small integer in smodsi");
1792: (void)divsi(x,y); return hiremainder;
1793: }
1794:
1795: /* x and y are integers. Return 1 if |x| == |y|, 0 otherwise */
1796: int
1797: absi_equal(GEN x, GEN y)
1798: {
1799: long lx,i;
1800:
1801: if (!signe(x)) return !signe(y);
1802: if (!signe(y)) return 0;
1803:
1804: lx=lgefint(x); if (lx != lgefint(y)) return 0;
1805: i=2; while (i<lx && x[i]==y[i]) i++;
1806: return (i==lx);
1807: }
1808:
1809: /* x and y are integers. Return sign(|x| - |y|) */
1810: int
1811: absi_cmp(GEN x, GEN y)
1812: {
1813: long lx,ly,i;
1814:
1815: if (!signe(x)) return signe(y)? -1: 0;
1816: if (!signe(y)) return 1;
1817:
1818: lx=lgefint(x); ly=lgefint(y);
1819: if (lx>ly) return 1;
1820: if (lx<ly) return -1;
1821: i=2; while (i<lx && x[i]==y[i]) i++;
1822: if (i==lx) return 0;
1823: return ((ulong)x[i] > (ulong)y[i])? 1: -1;
1824: }
1825:
1826: /* x and y are reals. Return sign(|x| - |y|) */
1827: int
1828: absr_cmp(GEN x, GEN y)
1829: {
1830: long ex,ey,lx,ly,lz,i;
1831:
1832: if (!signe(x)) return signe(y)? -1: 0;
1833: if (!signe(y)) return 1;
1834:
1835: ex=expo(x); ey=expo(y);
1836: if (ex>ey) return 1;
1837: if (ex<ey) return -1;
1838:
1839: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
1840: i=2; while (i<lz && x[i]==y[i]) i++;
1841: if (i<lz) return ((ulong)x[i] > (ulong)y[i])? 1: -1;
1842: if (lx>=ly)
1843: {
1844: while (i<lx && !x[i]) i++;
1845: return (i==lx)? 0: 1;
1846: }
1847: while (i<ly && !y[i]) i++;
1848: return (i==ly)? 0: -1;
1849: }
1850:
1851: /********************************************************************/
1852: /** **/
1853: /** INTEGER MULTIPLICATION (KARATSUBA) **/
1854: /** **/
1855: /********************************************************************/
1856:
1857: #if 0 /* for tunings */
1858: long SQRI_LIMIT = 12;
1859: long MULII_LIMIT = 16;
1860:
1861: void
1862: setsqi(long a) { SQRI_LIMIT=a; }
1863: void
1864: setmuli(long a) { MULII_LIMIT=a; }
1865:
1866: GEN
1867: speci(GEN x, long nx)
1868: {
1869: GEN z = cgeti(nx+2);
1870: long i;
1871: for (i=0; i<nx; i++) z[i+2] = x[i];
1872: z[1]=evalsigne(1)|evallgefint(nx+2);
1873: return z;
1874: }
1875: #else
1876: # define SQRI_LIMIT 12
1877: # define MULII_LIMIT 16
1878: #endif
1879:
1880: /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
1881: #ifdef INLINE
1882: INLINE
1883: #endif
1884: GEN
1885: muliispec(GEN x, GEN y, long nx, long ny)
1886: {
1887: GEN z2e,z2d,yd,xd,ye,zd;
1888: long p1,lz;
1889: LOCAL_HIREMAINDER;
1890:
1891: if (!ny) return gzero;
1892: zd = (GEN)avma; lz = nx+ny+2;
1893: (void)new_chunk(lz);
1894: xd = x + nx;
1895: yd = y + ny;
1896: ye = yd; p1 = *--xd;
1897:
1898: *--zd = mulll(p1, *--yd); z2e = zd;
1899: while (yd > y) *--zd = addmul(p1, *--yd);
1900: *--zd = hiremainder;
1901:
1902: while (xd > x)
1903: {
1904: LOCAL_OVERFLOW;
1905: yd = ye; p1 = *--xd;
1906:
1907: z2d = --z2e;
1908: *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
1909: while (yd > y)
1910: {
1911: hiremainder += overflow;
1912: *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
1913: }
1914: *--zd = hiremainder + overflow;
1915: }
1916: if (*zd == 0) { zd++; lz--; } /* normalize */
1917: *--zd = evalsigne(1) | evallgefint(lz);
1918: *--zd = evaltyp(t_INT) | evallg(lz);
1919: avma=(long)zd; return zd;
1920: }
1921:
1922: /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
1923: static GEN
1924: addshiftw(GEN x, GEN y, long d)
1925: {
1926: GEN z,z0,y0,yd, zd = (GEN)avma;
1927: long a,lz,ly = lgefint(y);
1928:
1929: z0 = new_chunk(d);
1930: a = ly-2; yd = y+ly;
1931: if (a >= d)
1932: {
1933: y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
1934: a -= d;
1935: if (a)
1936: z = addiispec(x+2, y+2, lgefint(x)-2, a);
1937: else
1938: z = icopy(x);
1939: }
1940: else
1941: {
1942: y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
1943: while (zd >= z0) *--zd = 0; /* complete with 0s */
1944: z = icopy(x);
1945: }
1946: lz = lgefint(z)+d;
1947: z[1] = evalsigne(1) | evallgefint(lz);
1948: z[0] = evaltyp(t_INT) | evallg(lz); return z;
1949: }
1950:
1951: /* Fast product (Karatsuba) of integers a,b. These are not real GENs, a+2
1952: * and b+2 were sent instead. na, nb = number of digits of a, b.
1953: * c,c0,c1,c2 are genuine GENs.
1954: */
1955: static GEN
1956: quickmulii(GEN a, GEN b, long na, long nb)
1957: {
1958: GEN a0,c,c0;
1959: long av,n0,n0a,i;
1960:
1961: if (na < nb) swapspec(a,b, na,nb);
1962: if (nb == 1) return mulsispec(*b, a, na);
1963: if (nb == 0) return gzero;
1964: if (nb < MULII_LIMIT) return muliispec(a,b,na,nb);
1965: i=(na>>1); n0=na-i; na=i;
1966: av=avma; a0=a+na; n0a=n0;
1967: while (!*a0 && n0a) { a0++; n0a--; }
1968:
1969: if (n0a && nb > n0)
1970: { /* nb <= na <= n0 */
1971: GEN b0,c1,c2;
1972: long n0b;
1973:
1974: nb -= n0;
1975: c = quickmulii(a,b,na,nb);
1976: b0 = b+nb; n0b = n0;
1977: while (!*b0 && n0b) { b0++; n0b--; }
1978: if (n0b)
1979: {
1980: c0 = quickmulii(a0,b0, n0a,n0b);
1981:
1982: c2 = addiispec(a0,a, n0a,na);
1983: c1 = addiispec(b0,b, n0b,nb);
1984: c1 = quickmulii(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
1985: c2 = addiispec(c0+2, c+2, lgefint(c0)-2,lgefint(c) -2);
1986:
1987: c1 = subiispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
1988: }
1989: else
1990: {
1991: c0 = gzero;
1992: c1 = quickmulii(a0,b, n0a,nb);
1993: }
1994: c = addshiftw(c,c1, n0);
1995: }
1996: else
1997: {
1998: c = quickmulii(a,b,na,nb);
1999: c0 = quickmulii(a0,b,n0a,nb);
2000: }
2001: return gerepileuptoint(av, addshiftw(c,c0, n0));
2002: }
2003:
2004: /* actual operations will take place on a+2 and b+2: we strip the codewords */
2005: GEN
2006: mulii(GEN a,GEN b)
2007: {
2008: long sa,sb;
2009: GEN z;
2010:
2011: sa=signe(a); if (!sa) return gzero;
2012: sb=signe(b); if (!sb) return gzero;
2013: if (sb<0) sa = -sa;
2014: z = quickmulii(a+2,b+2, lgefint(a)-2,lgefint(b)-2);
2015: setsigne(z,sa); return z;
2016: }
2017:
2018: static GEN
2019: quicksqri(GEN a, long na)
2020: {
2021: GEN a0,c,c0,c1;
2022: long av,n0,n0a,i;
2023:
2024: if (na<SQRI_LIMIT) return muliispec(a,a,na,na);
2025: i=(na>>1); n0=na-i; na=i;
2026: av=avma; a0=a+na; n0a=n0;
2027: while (!*a0 && n0a) { a0++; n0a--; }
2028: c = quicksqri(a,na);
2029: if (n0a)
2030: {
2031: c0 = quicksqri(a0,n0a);
2032: c1 = shifti(quickmulii(a0,a, n0a,na),1);
2033: c = addshiftw(c,c1, n0);
2034: c = addshiftw(c,c0, n0);
2035: }
2036: else
2037: c = addshiftw(c,gzero,n0<<1);
2038: return gerepileuptoint(av, c);
2039: }
2040:
2041: GEN
2042: sqri(GEN a) { return quicksqri(a+2, lgefint(a)-2); }
2043:
2044: #define MULRR_LIMIT 32
2045: #define MULRR2_LIMIT 32
2046:
2047: #if 0
2048: GEN
2049: karamulrr1(GEN x, GEN y)
2050: {
2051: long sx,sy;
2052: long i,i1,i2,lx=lg(x),ly=lg(y),e,flag,garde;
2053: long lz2,lz3,lz4;
2054: GEN z,lo1,lo2,hi;
2055:
2056: /* ensure that lg(y) >= lg(x) */
2057: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
2058: if (lx < MULRR_LIMIT) return mulrr(x,y);
2059: sx=signe(x); sy=signe(y);
2060: e = evalexpo(expo(x)+expo(y)); if (e & ~EXPOBITS) err(muler4);
2061: if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; }
2062: if (sy<0) sx = -sx;
2063: ly=lx+flag; z=cgetr(lx);
2064: lz2 = (lx>>1); lz3 = lx-lz2;
2065: x += 2; lx -= 2;
2066: y += 2; ly -= 2;
2067: hi = quickmulii(x,y,lz2,lz2);
2068: i1=lz2; while (i1<lx && !x[i1]) i1++;
2069: lo1 = quickmulii(y,x+i1,lz2,lx-i1);
2070: i2=lz2; while (i2<ly && !y[i2]) i2++;
2071: lo2 = quickmulii(x,y+i2,lz2,ly-i2);
2072:
2073: if (signe(lo1))
2074: {
2075: if (flag) { lo2 = addshiftw(lo1,lo2,1); lz3++; } else lo2=addii(lo1,lo2);
2076: }
2077: lz4=lgefint(lo2)-lz3;
2078: if (lz4>0) hi = addiispec(hi+2,lo2+2, lgefint(hi)-2,lz4);
2079: if (hi[2] < 0)
2080: {
2081: e++; garde=hi[lx+2];
2082: for (i=lx+1; i>=2 ; i--) z[i]=hi[i];
2083: }
2084: else
2085: {
2086: garde = (hi[lx+2] << 1);
2087: shift_left(z,hi,2,lx+1, garde, 1);
2088: }
2089: z[1]=evalsigne(sx) | e;
2090: if (garde < 0)
2091: { /* round to nearest */
2092: i=lx+2; do z[--i]++; while (z[i]==0);
2093: if (i==1) z[2]=HIGHBIT;
2094: }
2095: avma=(long)z; return z;
2096: }
2097:
2098: GEN
2099: karamulrr2(GEN x, GEN y)
2100: {
2101: long sx,sy,i,lx=lg(x),ly=lg(y),e,flag,garde;
2102: GEN z,hi;
2103:
2104: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
2105: if (lx < MULRR2_LIMIT) return mulrr(x,y);
2106: ly=lx+flag; sx=signe(x); sy=signe(y);
2107: e = evalexpo(expo(x)+expo(y)); if (e & ~EXPOBITS) err(muler4);
2108: if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; }
2109: if (sy<0) sx = -sx;
2110: z=cgetr(lx);
2111: hi=quickmulii(y+2,x+2,ly-2,lx-2);
2112: if (hi[2] < 0)
2113: {
2114: e++; garde=hi[lx];
2115: for (i=2; i<lx ; i++) z[i]=hi[i];
2116: }
2117: else
2118: {
2119: garde = (hi[lx] << 1);
2120: shift_left(z,hi,2,lx-1, garde, 1);
2121: }
2122: z[1]=evalsigne(sx) | e;
2123: if (garde < 0)
2124: { /* round to nearest */
2125: i=lx; do z[--i]++; while (z[i]==0);
2126: if (i==1) z[2]=HIGHBIT;
2127: }
2128: avma=(long)z; return z;
2129: }
2130:
2131: GEN
2132: karamulrr(GEN x, GEN y, long flag)
2133: {
2134: switch(flag)
2135: {
2136: case 1: return karamulrr1(x,y);
2137: case 2: return karamulrr2(x,y);
2138: default: err(flagerr,"karamulrr");
2139: }
2140: return NULL; /* not reached */
2141: }
2142:
2143: GEN
2144: karamulir(GEN x, GEN y, long flag)
2145: {
2146: long sx=signe(x),lz,i;
2147: GEN z,temp,z1;
2148:
2149: if (!sx) return gzero;
2150: lz=lg(y); z=cgetr(lz);
2151: temp=cgetr(lz+1); affir(x,temp);
2152: z1=karamulrr(temp,y,flag);
2153: for (i=1; i<lz; i++) z[i]=z1[i];
2154: avma=(long)z; return z;
2155: }
2156: #endif
2157:
2158: #ifdef LONG_IS_64BIT
2159:
2160: #if PARI_BYTE_ORDER == LITTLE_ENDIAN_64 || PARI_BYTE_ORDER == BIG_ENDIAN_64
2161: #else
2162: error... unknown machine
2163: #endif
2164:
2165: GEN
2166: dbltor(double x)
2167: {
2168: GEN z = cgetr(3);
2169: long e;
2170: union { double f; ulong i; } fi;
2171: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2172: const int exp_mid = 0x3ff;/* exponent bias */
2173: const int expo_len = 11; /* number of bits of exponent */
2174: LOCAL_HIREMAINDER;
2175:
2176: if (x==0) { z[1]=evalexpo(-308); z[2]=0; return z; }
2177: fi.f = x;
2178: e = evalexpo(((fi.i & (HIGHBIT-1)) >> mant_len) - exp_mid);
2179: z[1] = e | evalsigne(x<0? -1: 1);
2180: z[2] = (fi.i << expo_len) | HIGHBIT;
2181: return z;
2182: }
2183:
2184: double
2185: rtodbl(GEN x)
2186: {
2187: long ex,s=signe(x);
2188: ulong a;
2189: union { double f; ulong i; } fi;
2190: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2191: const int exp_mid = 0x3ff;/* exponent bias */
2192: const int expo_len = 11; /* number of bits of exponent */
2193: LOCAL_HIREMAINDER;
2194:
2195: if (typ(x)==t_INT && !s) return 0.0;
2196: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
2197: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
2198:
2199: /* start by rounding to closest */
2200: a = (x[2] & (HIGHBIT-1)) + 0x400;
2201: if (a & HIGHBIT) { ex++; a=0; }
2202: if (ex >= exp_mid) err(rtodber);
2203: fi.i = ((ex + exp_mid) << mant_len) | (a >> expo_len);
2204: if (s<0) fi.i |= HIGHBIT;
2205: return fi.f;
2206: }
2207:
2208: #else
2209:
2210: #if PARI_BYTE_ORDER == LITTLE_ENDIAN
2211: # define INDEX0 1
2212: # define INDEX1 0
2213: #elif PARI_BYTE_ORDER == BIG_ENDIAN
2214: # define INDEX0 0
2215: # define INDEX1 1
2216: #else
2217: error... unknown machine
2218: #endif
2219:
2220: GEN
2221: dbltor(double x)
2222: {
2223: GEN z;
2224: long e;
2225: union { double f; ulong i[2]; } fi;
2226: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2227: const int exp_mid = 0x3ff;/* exponent bias */
2228: const int shift = mant_len-32;
2229: const int expo_len = 11; /* number of bits of exponent */
2230:
2231: if (x==0) { z=cgetr(3); z[1]=evalexpo(-308); z[2]=0; return z; }
2232: fi.f = x; z=cgetr(4);
2233: {
2234: const ulong a = fi.i[INDEX0];
2235: const ulong b = fi.i[INDEX1];
2236: e = evalexpo(((a & (HIGHBIT-1)) >> shift) - exp_mid);
2237: z[1] = e | evalsigne(x<0? -1: 1);
2238: z[3] = b << expo_len;
2239: z[2] = HIGHBIT | b >> (BITS_IN_LONG-expo_len) | (a << expo_len);
2240: }
2241: return z;
2242: }
2243:
2244: double
2245: rtodbl(GEN x)
2246: {
2247: long ex,s=signe(x),lx=lg(x);
2248: ulong a,b,k;
2249: union { double f; ulong i[2]; } fi;
2250: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2251: const int exp_mid = 0x3ff;/* exponent bias */
2252: const int shift = mant_len-32;
2253: const int expo_len = 11; /* number of bits of exponent */
2254:
2255: if (typ(x)==t_INT && !s) return 0.0;
2256: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
2257: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
2258:
2259: /* start by rounding to closest */
2260: a = x[2] & (HIGHBIT-1);
2261: if (lx > 3)
2262: {
2263: b = x[3] + 0x400UL; if (b < 0x400UL) a++;
2264: if (a & HIGHBIT) { ex++; a=0; }
2265: }
2266: else b = 0;
2267: if (ex > exp_mid) err(rtodber);
2268: ex += exp_mid;
2269: k = (a >> expo_len) | (ex << shift);
2270: if (s<0) k |= HIGHBIT;
2271: fi.i[INDEX0] = k;
2272: fi.i[INDEX1] = (a << (BITS_IN_LONG-expo_len)) | (b >> expo_len);
2273: return fi.f;
2274: }
2275: #endif
2276:
2277: /* Old cgiv without reference count (which was not used anyway)
2278: * Should be a macro.
2279: */
2280: void
2281: cgiv(GEN x)
2282: {
2283: if (x == (GEN) avma)
2284: avma = (long) (x+lg(x));
2285: }
2286:
2287: /********************************************************************/
2288: /** **/
2289: /** INTEGER EXTENDED GCD (AND INVMOD) **/
2290: /** **/
2291: /********************************************************************/
2292:
2293: /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
2294: * in the context of trying to improve elliptic curve cryptosystem attacking
2295: * algorithms.
2296: *
2297: * Two basic ideas - (1) avoid many integer divisions, especially when the
2298: * quotient is 1 (which happens more than 40% of the time). (2) Use Lehmer's
2299: * trick as modified by Jebelean of extracting a couple of words' worth of
2300: * leading bits from both operands, and compute partial quotients from them
2301: * as long as we can be sure of their values. The Jebelean modifications
2302: * consist in reliable inequalities from which we can decide fast whether
2303: * to carry on or to return to the outer loop, and in re-shifting after the
2304: * first word's worth of bits has been used up. All of this is described
2305: * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
2306: * quite right (the catch-up divisions needed when one partial quotient is
2307: * larger than a word are missing).
2308: *
2309: * The API is mpxgcd() below; the single-word routines xgcduu and xxgcduu
2310: * may be called directly if desired; lgcdii() probably doesn't make much
2311: * sense out of context.
2312: *
2313: * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
2314: * ptotically about a factor 2.5 .. 3, depending on processor architecture,
2315: * than the naive continued-division code. Unfortunately, thanks to the
2316: * unrolled loops and all, the code is a bit lengthy.
2317: */
2318:
2319: /*==================================
2320: * xgcduu(d,d1,f,v,v1,s)
2321: * xxgcduu(d,d1,f,u,u1,v,v1,s)
2322: *==================================*/
2323: /*
2324: * Fast `final' extended gcd algorithm, acting on two ulongs. Ideally this
2325: * should be replaced with assembler versions wherever possible. The present
2326: * code essentially does `subtract, compare, and possibly divide' at each step,
2327: * which is reasonable when hardware division (a) exists, (b) is a bit slowish
2328: * and (c) does not depend a lot on the operand values (as on i486). When
2329: * wordsize division is in fact an assembler routine based on subtraction,
2330: * this strategy may not be the most efficient one.
2331: *
2332: * xxgcduu() should be called with d > d1 > 0, returns gcd(d,d1), and assigns
2333: * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1] (i.e.,
2334: * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
2335: * the quotient of the first division step), and the information about the
2336: * implied signs to s (-1 when an odd number of divisions has been done,
2337: * 1 otherwise). xgcduu() is exactly the same except that u,u1 are not com-
2338: * puted (and not returned, of course).
2339: *
2340: * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
2341: * (so we can stop the chain division one step early: as soon as the remainder
2342: * equals 1). Use this when you intend to use only what would be v, or only
2343: * what would be u and v, after that final division step, but not u1 and v1.
2344: * With the flag in force and thus without that final step, the interesting
2345: * quantity/ies will still sit in [u1 and] v1, of course.
2346: *
2347: * For computing the inverse of a single-word INTMOD known to exist, pass f=1
2348: * to xgcduu(), and obtain the result from s and v1. (The routine does the
2349: * right thing when d1==1 already.) For finishing a multiword modinv known
2350: * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix (with
2351: * properly adjusted signs) onto the values v' and v1' previously obtained
2352: * from the multiword division steps. Actually, just take the scalar product
2353: * of [v',v1'] with [u1,-v1], and change the sign if s==-1. (If the final
2354: * step had been carried out, it would be [-u,v], and s would also change.)
2355: * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
2356: * Finally, f=0 with xxgcduu() is useful for Bezout computations.
2357: * [Harrumph. In the above prescription, the sign turns out to be precisely
2358: * wrong.]
2359: * (It is safe for inversemodulo() to call xgcduu() with f=1, because f&1
2360: * doesn't make a difference when gcd(d,d1)>1. The speedup is negligible.)
2361: *
2362: * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
2363: * recover the final u,u1 given only v,v1 and s. However, it probably isn't
2364: * worthwhile, as it trades a few multiplications for a division.
2365: *
2366: * Note that these routines do not know and do not need to know about the
2367: * PARI stack.
2368: */
2369:
2370: ulong
2371: xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
2372: {
2373: ulong xv,xv1, xs, q,res;
2374: LOCAL_HIREMAINDER;
2375:
2376: /* The above blurb contained a lie. The main loop always stops when d1
2377: * has become equal to 1. If (d1 == 1 && !(f&1)) after the loop, we do
2378: * the final `division' of d by 1 `by hand' as it were.
2379: *
2380: * The loop has already been unrolled once. Aggressive optimization could
2381: * well lead to a totally unrolled assembler version...
2382: *
2383: * On modern x86 architectures, this loop is a pig anyway. The division
2384: * instruction always puts its result into the same pair of registers, and
2385: * we always want to use one of them straight away, so pipeline performance
2386: * will suck big time. An assembler version should probably do a first loop
2387: * computing and storing all the quotients -- their number is bounded in
2388: * advance -- and then assembling the matrix in a second pass. On other
2389: * architectures where we can cycle through four or so groups of registers
2390: * and exploit a fast ALU result-to-operand feedback path, this is much less
2391: * of an issue. (Intel sucks. See http://www.x86.org/ ...)
2392: */
2393: xs = res = 0;
2394: xv = 0UL; xv1 = 1UL;
2395: while (d1 > 1UL)
2396: {
2397: d -= d1; /* no need to use subll */
2398: if (d >= d1)
2399: {
2400: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
2401: xv += q * xv1;
2402: }
2403: else
2404: xv += xv1;
2405: /* possible loop exit */
2406: if (d <= 1UL) { xs=1; break; }
2407: /* repeat with inverted roles */
2408: d1 -= d;
2409: if (d1 >= d)
2410: {
2411: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
2412: xv1 += q * xv;
2413: }
2414: else
2415: xv1 += xv;
2416: } /* while */
2417:
2418: if (!(f&1)) /* division by 1 postprocessing if needed */
2419: {
2420: if (xs && d==1)
2421: { xv1 += d1 * xv; xs = 0; res = 1UL; }
2422: else if (!xs && d1==1)
2423: { xv += d * xv1; xs = 1; res = 1UL; }
2424: }
2425:
2426: if (xs)
2427: {
2428: *s = -1; *v = xv1; *v1 = xv;
2429: return (res ? res : (d==1 ? 1UL : d1));
2430: }
2431: else
2432: {
2433: *s = 1; *v = xv; *v1 = xv1;
2434: return (res ? res : (d1==1 ? 1UL : d));
2435: }
2436: }
2437:
2438:
2439: ulong
2440: xxgcduu(ulong d, ulong d1, int f,
2441: ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
2442: {
2443: ulong xu,xu1, xv,xv1, xs, q,res;
2444: LOCAL_HIREMAINDER;
2445:
2446: xs = res = 0;
2447: xu = xv1 = 1UL;
2448: xu1 = xv = 0UL;
2449: while (d1 > 1UL)
2450: {
2451: d -= d1; /* no need to use subll */
2452: if (d >= d1)
2453: {
2454: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
2455: xv += q * xv1;
2456: xu += q * xu1;
2457: }
2458: else
2459: { xv += xv1; xu += xu1; }
2460: /* possible loop exit */
2461: if (d <= 1UL) { xs=1; break; }
2462: /* repeat with inverted roles */
2463: d1 -= d;
2464: if (d1 >= d)
2465: {
2466: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
2467: xv1 += q * xv;
2468: xu1 += q * xu;
2469: }
2470: else
2471: { xv1 += xv; xu1 += xu; }
2472: } /* while */
2473:
2474: if (!(f&1)) /* division by 1 postprocessing if needed */
2475: {
2476: if (xs && d==1)
2477: {
2478: xv1 += d1 * xv;
2479: xu1 += d1 * xu;
2480: xs = 0; res = 1UL;
2481: }
2482: else if (!xs && d1==1)
2483: {
2484: xv += d * xv1;
2485: xu += d * xu1;
2486: xs = 1; res = 1UL;
2487: }
2488: }
2489:
2490: if (xs)
2491: {
2492: *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
2493: return (res ? res : (d==1 ? 1UL : d1));
2494: }
2495: else
2496: {
2497: *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
2498: return (res ? res : (d1==1 ? 1UL : d));
2499: }
2500: }
2501:
2502: /*==================================
2503: * lgcdii(d,d1,u,u1,v,v1)
2504: *==================================*/
2505: /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
2506: *
2507: * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
2508: * and a quantity of bits from d1 obtained by a shift of the same displacement,
2509: * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
2510: * the product of all the [0, 1; 1, q_j] thus obtained, where the leftmost
2511: * factor arises from the quotient of the first division step.
2512: *
2513: * MUST be called with d > d1 > 0, and with d occupying more than one
2514: * significant word (if it doesn't, the caller has no business with us;
2515: * he/she/it should use xgcduu() instead). Returns the number of reduction/
2516: * swap steps carried out, possibly zero, or under certain conditions minus
2517: * that number. When the return value is nonzero, the caller should use the
2518: * returned recurrence matrix to update its own copies of d,d1. When the
2519: * return value is non-positive, and the latest remainder after updating
2520: * turns out to be nonzero, the caller should at once attempt a full division,
2521: * rather than first trying lgcdii() again -- this typically happens when we
2522: * are about to encounter a quotient larger than half a word. (This is not
2523: * detected infallibly -- after a positive return value, it is perfectly
2524: * possible that the next stage will end up needing a full division. After
2525: * a negative return value, however, this is certain, and should be acted
2526: * upon.)
2527: *
2528: * (The sign information, for which xgcduu() has its return argument s, is now
2529: * implicit in the LSB of our return value, and the caller may take advantage
2530: * of the fact that a return value of +-1 implies u==0,u1==v==1 [only v1 pro-
2531: * vides interesting information in this case]. One might also use the fact
2532: * that if the return value is +-2, then u==1, but this is rather marginal.)
2533: *
2534: * If it was not possible to determine even the first quotient, either because
2535: * we're too close to an integer quotient or because the quotient would be
2536: * larger than one word (if the `leading digit' of d1 after shifting is all
2537: * zeros), we return 0 and do not bother to assign anything to the last four
2538: * args.
2539: *
2540: * The division chain might (sometimes) even run to completion. It will be
2541: * up to the caller to detect this case.
2542: *
2543: * This routine does _not_ change d or d1; this will also be up to the caller.
2544: *
2545: * Note that this routine does not know and does not need to know about the
2546: * PARI stack.
2547: */
2548: /*#define DEBUG_LEHMER 1 */
2549: int
2550: lgcdii(ulong* d, ulong* d1,
2551: ulong* u, ulong* u1, ulong* v, ulong* v1)
2552: {
2553: /* Strategy: (1) Extract/shift most significant bits. We assume that d
2554: * has at least two significant words, but we can cope with a one-word d1.
2555: * Let dd,dd1 be the most significant dividend word and matching part of the
2556: * divisor.
2557: * (2) Check for overflow on the first division. For our purposes, this
2558: * happens when the upper half of dd1 is zero. (Actually this is detected
2559: * during extraction.)
2560: * (3) Get a fix on the first quotient. We compute q = floor(dd/dd1), which
2561: * is an upper bound for floor(d/d1), and which gives the true value of the
2562: * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
2563: * (If it isn't, we give up. This is annoying because the subsequent full
2564: * division will repeat some work already done, but it happens very infre-
2565: * quently. Doing the extra-bit-fetch in this case would be awkward.)
2566: * (4) Finish initializations.
2567: *
2568: * The remainder of the action is comparatively boring... The main loop has
2569: * been unrolled once (so we don't swap things and we can apply Jebelean's
2570: * termination conditions which alternatingly take two different forms during
2571: * successive iterations). When we first run out of sufficient bits to form
2572: * a quotient, and have an extra word of each operand, we pull out two whole
2573: * word's worth of dividend bits, and divisor bits of matching significance;
2574: * to these we apply our partial matrix (disregarding overflow because the
2575: * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
2576: * re-extract one word's worth of the current dividend and a matching amount
2577: * of divisor bits. The affair will normally terminate with matrix entries
2578: * just short of a whole word. (We terminate the inner loop before these can
2579: * possibly overflow.)
2580: */
2581: ulong dd,dd1,ddlo,dd1lo, sh,shc; /* `digits', shift count */
2582: ulong xu,xu1, xv,xv1, q,res; /* recurrences, partial quotient, count */
2583: ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
2584: long ld, ld1, lz; /* t_INT effective lengths */
2585: int skip = 0; /* a boolean flag */
2586: LOCAL_OVERFLOW;
2587: LOCAL_HIREMAINDER;
2588:
2589: #ifdef DEBUG_LEHMER
2590: voir(d, -1); voir(d1, -1);
2591: #endif
2592: ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
2593: if (lz > 1) return 0; /* rare, quick and desperate exit */
2594:
2595: d += 2; d1 += 2; /* point at the leading `digits' */
2596: dd1lo = 0; /* unless we find something better */
2597: sh = bfffo(*d); /* obtain dividend left shift count */
2598:
2599: if (sh)
2600: { /* do the shifting */
2601: shc = BITS_IN_LONG - sh;
2602: if (lz)
2603: { /* dividend longer than divisor */
2604: dd1 = (*d1 >> shc);
2605: if (!(HIGHMASK & dd1)) return 0; /* overflow detected */
2606: if (ld1 > 3)
2607: dd1lo = (*d1 << sh) + (d1[1] >> shc);
2608: else
2609: dd1lo = (*d1 << sh);
2610: }
2611: else
2612: { /* dividend and divisor have the same length */
2613: /* dd1 = shiftl(d1,sh) would have left hiremainder==0, and dd1 != 0. */
2614: dd1 = (*d1 << sh);
2615: if (!(HIGHMASK & dd1)) return 0;
2616: if (ld1 > 3)
2617: {
2618: dd1 += (d1[1] >> shc);
2619: if (ld1 > 4)
2620: dd1lo = (d1[1] << sh) + (d1[2] >> shc);
2621: else
2622: dd1lo = (d1[1] << sh);
2623: }
2624: }
2625: /* following lines assume d to have 2 or more significant words */
2626: dd = (*d << sh) + (d[1] >> shc);
2627: if (ld > 4)
2628: ddlo = (d[1] << sh) + (d[2] >> shc);
2629: else
2630: ddlo = (d[1] << sh);
2631: }
2632: else
2633: { /* no shift needed */
2634: if (lz) return 0; /* div'd longer than div'r: o'flow automatic */
2635: dd1 = *d1;
2636: if (!(HIGHMASK & dd1)) return 0;
2637: if(ld1 > 3) dd1lo = d1[1];
2638: /* assume again that d has another significant word */
2639: dd = *d; ddlo = d[1];
2640: }
2641: #ifdef DEBUG_LEHMER
2642: fprintf(stderr, " %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
2643: #endif
2644:
2645: /* First subtraction/division stage. (If a subtraction initially suffices,
2646: * we don't divide at all.) If a Jebelean condition is violated, and we
2647: * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
2648: * give up and ask for a full division. Otherwise we commit the result,
2649: * possibly deciding to re-shift immediately afterwards.
2650: */
2651: dd -= dd1;
2652: if (dd < dd1)
2653: { /* first quotient known to be == 1 */
2654: xv1 = 1UL;
2655: if (!dd) /* !(Jebelean condition), extraspecial case */
2656: { /* note this can actually happen... Now
2657: * q==1 is known, but we underflow already.
2658: * OTOH we've just shortened d by a whole word.
2659: * Thus we feel pleased with ourselves and
2660: * return. (The re-shift code below would
2661: * do so anyway.) */
2662: *u = 0; *v = *u1 = *v1 = 1UL;
2663: return -1; /* Next step will be a full division. */
2664: } /* if !(Jebelean) then */
2665: }
2666: else
2667: { /* division indicated */
2668: hiremainder = 0;
2669: xv1 = 1 + divll(dd, dd1); /* xv1: alternative spelling of `q', here ;) */
2670: dd = hiremainder;
2671: if (dd < xv1) /* !(Jebelean cond'), non-extra special case */
2672: { /* Attempt to complete the division using the
2673: * less significant bits, before skipping right
2674: * past the 1st loop to the reshift stage. */
2675: ddlo = subll(ddlo, mulll(xv1, dd1lo));
2676: dd = subllx(dd, hiremainder);
2677:
2678: /* If we now have an overflow, q was _certainly_ too large. Thanks to
2679: * our decision not to get here unless the original dd1 had bits set in
2680: * the upper half of the word, however, we now do know that the correct
2681: * quotient is in fact q-1. Adjust our data accordingly. */
2682: if (overflow)
2683: {
2684: xv1--;
2685: ddlo = addll(ddlo,dd1lo);
2686: dd = addllx(dd,dd1); /* overflows again which cancels the borrow */
2687: /* ...and fall through to skip=1 below */
2688: }
2689: else
2690: /* Test Jebelean condition anew, at this point using _all_ the extracted
2691: * bits we have. This is clutching at straws; we have a more or less
2692: * even chance of succeeding this time. Note that if we fail, we really
2693: * do not know whether the correct quotient would have been q or some
2694: * smaller value. */
2695: if (!dd && ddlo < xv1) return 0;
2696:
2697: /* Otherwise, we now know that q is correct, but we cannot go into the
2698: * 1st loop. Raise a flag so we'll remember to skip past the loop.
2699: * Get here also after the q-1 adjustment case. */
2700: skip = 1;
2701: } /* if !(Jebelean) then */
2702: }
2703: xu = 0; xv = xu1 = 1UL; res = 1;
2704: #ifdef DEBUG_LEHMER
2705: fprintf(stderr, " q = %ld, %lx, %lx\n", xv1, dd1, dd);
2706: #endif
2707:
2708: /* Some invariants from here across the first loop:
2709: *
2710: * At this point, and again after we are finished with the first loop and
2711: * subsequent conditional, a division and the associated update of the
2712: * recurrence matrix have just been carried out completely. The matrix
2713: * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
2714: * columns), and the current remainder == next divisor (dd at the moment)
2715: * is nonzero (it might be zero here, but then skip will have been set).
2716: *
2717: * After the first loop, or when skip is set already, it will also be the
2718: * case that there aren't sufficiently many bits to continue without re-
2719: * shifting. If the divisor after reshifting is zero, or indeed if it
2720: * doesn't have more than half a word's worth of bits, we will have to
2721: * return at that point. Otherwise, we proceed into the second loop.
2722: *
2723: * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
2724: * already reflect the result of applying the current matrix to the old
2725: * ddorig:ddlo and dd1orig:dd1lo. (For the first iteration above, this
2726: * was easy to achieve, and we didn't even need to peek into the (now
2727: * no longer existent!) saved words. After the loop, we'll stop for a
2728: * moment to merge in the ddlo,dd1lo contributions.)
2729: *
2730: * Note that after the first division, even an a priori quotient of 1 cannot
2731: * be trusted until we've checked Jebelean's condition -- it cannot be too
2732: * large, of course, but it might be too small.
2733: */
2734:
2735: if (!skip)
2736: {
2737: while (1)
2738: {
2739: /* First half of loop divides dd into dd1, and leaves the recurrence
2740: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
2741: * entries) when successful. */
2742: tmpd = dd1 - dd;
2743: if (tmpd < dd)
2744: { /* quotient suspected to be 1 */
2745: #ifdef DEBUG_LEHMER
2746: q = 1;
2747: #endif
2748: tmpu = xu + xu1; /* cannot overflow -- everything bounded by
2749: * the original dd during first loop */
2750: tmpv = xv + xv1;
2751: }
2752: else
2753: { /* division indicated */
2754: hiremainder = 0;
2755: q = 1 + divll(tmpd, dd);
2756: tmpd = hiremainder;
2757: tmpu = xu + q*xu1; /* can't overflow, but may need to be undone */
2758: tmpv = xv + q*xv1;
2759: }
2760:
2761: tmp0 = addll(tmpv, xv1);
2762: if ((tmpd < tmpu) || overflow ||
2763: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
2764: break; /* skip ahead to reshift stage */
2765: else
2766: { /* commit dd1, xu, xv */
2767: res++;
2768: dd1 = tmpd; xu = tmpu; xv = tmpv;
2769: #ifdef DEBUG_LEHMER
2770: fprintf(stderr, " q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
2771: q, dd, dd1, xu1, xu, xv1, xv);
2772: #endif
2773: }
2774:
2775: /* Second half of loop divides dd1 into dd, and the matrix returns to its
2776: * normal arrangement. */
2777: tmpd = dd - dd1;
2778: if (tmpd < dd1)
2779: { /* quotient suspected to be 1 */
2780: #ifdef DEBUG_LEHMER
2781: q = 1;
2782: #endif
2783: tmpu = xu1 + xu; /* cannot overflow */
2784: tmpv = xv1 + xv;
2785: }
2786: else
2787: { /* division indicated */
2788: hiremainder = 0;
2789: q = 1 + divll(tmpd, dd1);
2790: tmpd = hiremainder;
2791: tmpu = xu1 + q*xu;
2792: tmpv = xv1 + q*xv;
2793: }
2794:
2795: tmp0 = addll(tmpu, xu);
2796: if ((tmpd < tmpv) || overflow ||
2797: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
2798: break; /* skip ahead to reshift stage */
2799: else
2800: { /* commit dd, xu1, xv1 */
2801: res++;
2802: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
2803: #ifdef DEBUG_LEHMER
2804: fprintf(stderr, " q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
2805: q, dd1, dd, xu, xu1, xv, xv1);
2806: #endif
2807: }
2808:
2809: } /* end of first loop */
2810:
2811: /* Intermezzo: update dd:ddlo, dd1:dd1lo. (But not if skip is set.) */
2812:
2813: if (res&1)
2814: {
2815: /* after failed division in 1st half of loop:
2816: * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
2817: * * [ -xu, xu1 ; xv, -xv1 ]
2818: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
2819: * add the high-order remainders + overflows onto [dd1,dd].)
2820: */
2821: tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
2822: tmp1 = subll(mulll(dd1lo,xv), tmp1);
2823: dd1 += subllx(hiremainder, tmp0);
2824: tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
2825: ddlo = subll(tmp2, mulll(dd1lo,xv1));
2826: dd += subllx(tmp0, hiremainder);
2827: dd1lo = tmp1;
2828: }
2829: else
2830: {
2831: /* after failed division in 2nd half of loop:
2832: * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
2833: * * [ xu1, -xu ; -xv1, xv ]
2834: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
2835: * add the high-order remainders + overflows onto [dd,dd1].)
2836: */
2837: tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
2838: tmp1 = subll(tmp1, mulll(dd1lo,xv1));
2839: dd += subllx(tmp0, hiremainder);
2840: tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
2841: dd1lo = subll(mulll(dd1lo,xv), tmp2);
2842: dd1 += subllx(hiremainder, tmp0);
2843: ddlo = tmp1;
2844: }
2845: #ifdef DEBUG_LEHMER
2846: fprintf(stderr, " %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
2847: #endif
2848: } /* end of skip-pable section: get here also, with res==1, when there
2849: * was a problem immediately after the very first division. */
2850:
2851: /* Re-shift. Note: the shift count _can_ be zero, viz. under the following
2852: * precise conditions: The original dd1 had its topmost bit set, so the 1st
2853: * q was 1, and after subtraction, dd had its topmost bit unset. If now
2854: * dd==0, we'd have taken the return exit already, so we couldn't have got
2855: * here. If not, then it must have been the second division which has gone
2856: * amiss (because dd1 was very close to an exact multiple of the remainder
2857: * dd value, so this will be very rare). At this point, we'd have a fairly
2858: * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
2859: * this is not guaranteed to work. Instead of trying, we return at once.
2860: * The caller will see to it that the initial subtraction is re-done using
2861: * _all_ the bits of both operands, which already helps, and the next round
2862: * will either be a full division (if dd occupied a halfword or less), or
2863: * another llgcdii() first step. In the latter case, since we try a little
2864: * harder during our first step, we may actually be able to fix the problem,
2865: * and get here again with improved low-order bits and with another step
2866: * under our belt. Otherwise we'll have given up above and forced a full-
2867: * blown division.
2868: *
2869: * If res is even, the shift count _cannot_ be zero. (The first step forces
2870: * a zero into the remainder's MSB, and all subsequent remainders will have
2871: * inherited it.)
2872: *
2873: * The re-shift stage exits if the next divisor has at most half a word's
2874: * worth of bits.
2875: *
2876: * For didactic reasons, the second loop will be arranged in the same way
2877: * as the first -- beginning with the division of dd into dd1, as if res
2878: * was odd. To cater for this, if res is actually even, we swap things
2879: * around during reshifting. (During the second loop, the parity of res
2880: * does not matter; we know in which half of the loop we are when we decide
2881: * to return.)
2882: */
2883: #ifdef DEBUG_LEHMER
2884: fprintf(stderr, "(sh)");
2885: #endif
2886:
2887: if (res&1)
2888: { /* after odd number of division(s) */
2889: if (dd1 && (sh = bfffo(dd1)))
2890: {
2891: shc = BITS_IN_LONG - sh;
2892: dd = (ddlo >> shc) + (dd << sh);
2893: if (!(HIGHMASK & dd))
2894: {
2895: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
2896: return -res; /* full division asked for */
2897: }
2898: dd1 = (dd1lo >> shc) + (dd1 << sh);
2899: }
2900: else
2901: { /* time to return: <= 1 word left, or sh==0 */
2902: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
2903: return res;
2904: }
2905: }
2906: else
2907: { /* after even number of divisions */
2908: if (dd)
2909: {
2910: sh = bfffo(dd); /* Known to be positive. */
2911: shc = BITS_IN_LONG - sh;
2912: /* dd:ddlo will become the new dd1, and v.v. */
2913: tmpd = (ddlo >> shc) + (dd << sh);
2914: dd = (dd1lo >> shc) + (dd1 << sh);
2915: dd1 = tmpd;
2916: /* This has completed the swap; now dd is again the current divisor.
2917: * The following test originally inspected dd1 -- a most subtle and
2918: * most annoying bug. The Management. */
2919: if (HIGHMASK & dd)
2920: {
2921: /* recurrence matrix is the wrong way round; swap it. */
2922: tmp0 = xu; xu = xu1; xu1 = tmp0;
2923: tmp0 = xv; xv = xv1; xv1 = tmp0;
2924: }
2925: else
2926: {
2927: /* recurrence matrix is the wrong way round; fix this. */
2928: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
2929: return -res; /* full division asked for */
2930: }
2931: }
2932: else
2933: { /* time to return: <= 1 word left */
2934: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
2935: return res;
2936: }
2937: } /* end reshift */
2938:
2939: #ifdef DEBUG_LEHMER
2940: fprintf(stderr, " %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
2941: #endif
2942:
2943: /* The Second Loop. Rip-off of the first, but we now check for overflow
2944: * in the recurrences. Returns instead of breaking when we cannot fix the
2945: * quotient any longer.
2946: */
2947:
2948: for(;;)
2949: {
2950: /* First half of loop divides dd into dd1, and leaves the recurrence
2951: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
2952: * entries) when successful. */
2953: tmpd = dd1 - dd;
2954: if (tmpd < dd)
2955: { /* quotient suspected to be 1 */
2956: #ifdef DEBUG_LEHMER
2957: q = 1;
2958: #endif
2959: tmpu = xu + xu1;
2960: tmpv = addll(xv, xv1); /* xv,xv1 will overflow first */
2961: tmp1 = overflow;
2962: }
2963: else
2964: { /* division indicated */
2965: hiremainder = 0;
2966: q = 1 + divll(tmpd, dd);
2967: tmpd = hiremainder;
2968: tmpu = xu + q*xu1;
2969: tmpv = addll(xv, mulll(q,xv1));
2970: tmp1 = overflow | hiremainder;
2971: }
2972:
2973: tmp0 = addll(tmpv, xv1);
2974: if ((tmpd < tmpu) || overflow || tmp1 ||
2975: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
2976: {
2977: /* The recurrence matrix has not yet been warped... */
2978: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
2979: return res;
2980: }
2981: else
2982: { /* commit dd1, xu, xv */
2983: res++;
2984: dd1 = tmpd; xu = tmpu; xv = tmpv;
2985: #ifdef DEBUG_LEHMER
2986: fprintf(stderr, " q = %ld, %lx, %lx\n", q, dd, dd1);
2987: #endif
2988: }
2989:
2990: /* Second half of loop divides dd1 into dd, and the matrix returns to its
2991: * normal arrangement. */
2992: tmpd = dd - dd1;
2993: if (tmpd < dd1)
2994: { /* quotient suspected to be 1 */
2995: #ifdef DEBUG_LEHMER
2996: q = 1;
2997: #endif
2998: tmpu = xu1 + xu;
2999: tmpv = addll(xv1, xv);
3000: tmp1 = overflow;
3001: }
3002: else
3003: { /* division indicated */
3004: hiremainder = 0;
3005: q = 1 + divll(tmpd, dd1);
3006: tmpd = hiremainder;
3007: tmpu = xu1 + q*xu;
3008: tmpv = addll(xv1, mulll(q, xv));
3009: tmp1 = overflow | hiremainder;
3010: }
3011:
3012: tmp0 = addll(tmpu, xu);
3013: if ((tmpd < tmpv) || overflow || tmp1 ||
3014: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
3015: {
3016: /* The recurrence matrix has not yet been unwarped, so it is
3017: * the wrong way round; fix this. */
3018: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3019: return res;
3020: }
3021: else
3022: { /* commit dd, xu1, xv1 */
3023: res++;
3024: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
3025: #ifdef DEBUG_LEHMER
3026: fprintf(stderr, " q = %ld, %lx, %lx\n", q, dd1, dd);
3027: #endif
3028: }
3029:
3030: } /* end of second loop */
3031:
3032: return(0); /* never reached */
3033: }
3034:
3035: /*==================================
3036: * invmod(a,b,res)
3037: *==================================
3038: * If a is invertible, return 1, and set res = a^{ -1 }
3039: * Otherwise, return 0, and set res = gcd(a,b)
3040:
3041: * Temporary - to be replaced with mpxgcd()
3042: * I think we won't need to change much ... just the parameter-passing and
3043: * return value conventions, and computing a final u in addition to a final
3044: * v value before returning (and doing so even when the gcd isn't 1)
3045: */
3046:
3047: #define mysetsigne(d,s) { fprintferr("%ld %ld\n",s,signe(d)); if (signe(d)==-(s)) setsigne(d,s); }
3048:
3049: int
3050: invmod(GEN a, GEN b, GEN *res)
3051: {
3052: GEN v,v1,d,d1,q,r;
3053: long av,av1,lim;
3054: long s;
3055: ulong g;
3056: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
3057: int lhmres; /* Lehmer stage return value */
3058:
3059: if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
3060: if (!signe(b)) { *res=absi(a); return 0; }
3061: av = avma;
3062: if (lgefint(b) == 3) /* single-word affair */
3063: {
3064: d1 = modiu(a, (ulong)b[2]);
3065: if (d1 == gzero)
3066: {
3067: if (b[2] == 1L)
3068: { *res = gzero; return 1; }
3069: else
3070: { *res = absi(b); return 0; }
3071: }
3072: g = xgcduu((ulong)b[2], (ulong)d1[2], 1, &xv, &xv1, &s);
3073: #ifdef DEBUG_LEHMER
3074: fprintferr(" <- %lu,%lu\n", (ulong)b[2], (ulong)d1[2]);
3075: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
3076: #endif
3077: avma = av;
3078: if (g != 1UL) { *res = utoi(g); return 0; }
3079: xv = xv1 % (ulong)b[2]; if (s < 0) xv = ((ulong)b[2]) - xv;
3080: *res = utoi(xv); return 1;
3081: }
3082:
3083: (void)new_chunk(lgefint(b));
3084: d = absi(b); d1 = modii(a,d);
3085:
3086: v=gzero; v1=gun; /* general case */
3087: #ifdef DEBUG_LEHMER
3088: fprintferr("INVERT: -------------------------\n");
3089: output(d1);
3090: #endif
3091: av1 = avma; lim = stack_lim(av,1);
3092:
3093: while (lgefint(d) > 3 && signe(d1))
3094: {
3095: #ifdef DEBUG_LEHMER
3096: fprintferr("Calling Lehmer:\n");
3097: #endif
3098: lhmres = lgcdii((ulong*)d, (ulong*)d1, &xu, &xu1, &xv, &xv1);
3099: if (lhmres != 0) /* check progress */
3100: { /* apply matrix */
3101: #ifdef DEBUG_LEHMER
3102: fprintferr("Lehmer returned %d [%lu,%lu;%lu,%lu].\n",
3103: lhmres, xu, xu1, xv, xv1);
3104: #endif
3105: if ((lhmres == 1) || (lhmres == -1))
3106: {
3107: if (xv1 == 1)
3108: {
3109: r = subii(d,d1); d=d1; d1=r;
3110: a = subii(v,v1); v=v1; v1=a;
3111: }
3112: else
3113: {
3114: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
3115: a = subii(v, mului(xv1,v1)); v=v1; v1=a;
3116: }
3117: }
3118: else
3119: {
3120: r = subii(muliu(d,xu), muliu(d1,xv));
3121: a = subii(muliu(v,xu), muliu(v1,xv));
3122: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
3123: v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
3124: if (lhmres&1) {
3125: setsigne(d,-signe(d));
3126: setsigne(v,-signe(v));
3127: }
3128: else {
3129: setsigne(d1,-signe(d1));
3130: setsigne(v1,-signe(v1));
3131: }
3132: }
3133: }
3134: #ifdef DEBUG_LEHMER
3135: else
3136: fprintferr("Lehmer returned 0.\n");
3137: output(d); output(d1); output(v); output(v1);
3138: sleep(1);
3139: #endif
3140:
3141: if (lhmres <= 0 && signe(d1))
3142: {
3143: q = dvmdii(d,d1,&r);
3144: #ifdef DEBUG_LEHMER
3145: fprintferr("Full division:\n");
3146: printf(" q = "); output(q); sleep (1);
3147: #endif
3148: a = subii(v,mulii(q,v1));
3149: v=v1; v1=a;
3150: d=d1; d1=r;
3151: }
3152: if (low_stack(lim, stack_lim(av,1)))
3153: {
3154: GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
3155: if(DEBUGMEM>1) err(warnmem,"invmod");
3156: gerepilemany(av1,gptr,4);
3157: }
3158: } /* end while */
3159:
3160: /* Postprocessing - final sprint */
3161: if (signe(d1))
3162: {
3163: /* Assertions: lgefint(d)==lgefint(d1)==3, and
3164: * gcd(d,d1) is nonzero and fits into one word
3165: */
3166: g = xxgcduu(d[2], d1[2], 1, &xu, &xu1, &xv, &xv1, &s);
3167: #ifdef DEBUG_LEHMER
3168: output(d);output(d1);output(v);output(v1);
3169: fprintferr(" <- %lu,%lu\n", (ulong)d[2], (ulong)d1[2]);
3170: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
3171: #endif
3172: if (g != 1UL) { avma = av; *res = utoi(g); return 0; }
3173: /* (From the xgcduu() blurb:)
3174: * For finishing the multiword modinv, we now have to multiply the
3175: * returned matrix (with properly adjusted signs) onto the values
3176: * v' and v1' previously obtained from the multiword division steps.
3177: * Actually, it is sufficient to take the scalar product of [v',v1']
3178: * with [u1,-v1], and change the sign if s==1.
3179: */
3180: v = subii(muliu(v,xu1),muliu(v1,xv1));
3181: if (s > 0) setsigne(v,-signe(v));
3182: avma = av; *res = modii(v,b);
3183: #ifdef DEBUG_LEHMER
3184: output(*res); fprintfderr("============================Done.\n");
3185: sleep(1);
3186: #endif
3187: return 1;
3188: }
3189: /* get here when the final sprint was skipped (d1 was zero already) */
3190: avma = av;
3191: if (!egalii(d,gun)) { *res = icopy(d); return 0; }
3192: *res = modii(v,b);
3193: #ifdef DEBUG_LEHMER
3194: output(*res); fprintferr("============================Done.\n");
3195: sleep(1);
3196: #endif
3197: return 1;
3198: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>