Annotation of OpenXM_contrib/pari-2.2/src/kernel/none/mp.c, Revision 1.1.1.1
1.1 noro 1: /* $Id: mp.c,v 1.47 2001/09/28 17:16:45 xavier Exp $
2:
3: Copyright (C) 2000 The PARI group.
4:
5: This file is part of the PARI/GP package.
6:
7: PARI/GP is free software; you can redistribute it and/or modify it under the
8: terms of the GNU General Public License as published by the Free Software
9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
10: ANY WARRANTY WHATSOEVER.
11:
12: Check the License for details. You should have received a copy of it, along
13: with the package; see the file 'COPYING'. If not, write to the Free Software
14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
15:
16: /***********************************************************************/
17: /** **/
18: /** MULTIPRECISION KERNEL **/
19: /** **/
20: /***********************************************************************/
21: /* most of the routines in this file are commented out in 68k */
22: /* version (#ifdef __M68K__) since they are defined in mp.s */
23: #include "pari.h"
24:
25: /* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't
26: * GENs but pairs (long *a, long na) representing a list of digits (in basis
27: * BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no
28: * need to reintroduce codewords ]
29: * Use speci(a,na) to visualize the coresponding GEN.
30: */
31:
32: /* z2 := z1[imin..imax].f shifted left sh bits (feeding f from the right) */
33: #define shift_left2(z2,z1,imin,imax,f, sh,m) {\
34: register ulong _l,_k = ((ulong)f)>>m;\
35: GEN t1 = z1 + imax, t2 = z2 + imax, T = z1 + imin;\
36: while (t1 > T) {\
37: _l = *t1--;\
38: *t2-- = (_l<<(ulong)sh) | _k;\
39: _k = _l>>(ulong)m;\
40: }\
41: *t2 = (*t1<<(ulong)sh) | _k;\
42: }
43: #define shift_left(z2,z1,imin,imax,f, sh) {\
44: register const ulong _m = BITS_IN_LONG - sh;\
45: shift_left2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
46: }
47:
48: /* z2 := f.z1[imin..imax-1] shifted right sh bits (feeding f from the left) */
49: #define shift_right2(z2,z1,imin,imax,f, sh,m) {\
50: register GEN t1 = z1 + imin, t2 = z2 + imin, T = z1 + imax;\
51: register ulong _k,_l = *t1++;\
52: *t2++ = (_l>>(ulong)sh) | ((ulong)f<<(ulong)m);\
53: while (t1 < T) {\
54: _k = _l<<(ulong)m; _l = *t1++;\
55: *t2++ = (_l>>(ulong)sh) | _k;\
56: }\
57: }
58: #define shift_right(z2,z1,imin,imax,f, sh) {\
59: register const ulong _m = BITS_IN_LONG - sh;\
60: shift_right2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
61: }
62:
63: #define MASK(x) (((ulong)(x)) & (LGEFINTBITS | SIGNBITS))
64: int
65: egalii(GEN x, GEN y)
66: {
67: long i;
68: if (MASK(x[1]) != MASK(y[1])) return 0;
69: i = lgefint(x)-1; while (i>1 && x[i]==y[i]) i--;
70: return i==1;
71: }
72: #undef MASK
73:
74: #ifdef INLINE
75: INLINE
76: #endif
77: GEN
78: addsispec(long s, GEN x, long nx)
79: {
80: GEN xd, zd = (GEN)avma;
81: long lz;
82: LOCAL_OVERFLOW;
83:
84: lz = nx+3; (void)new_chunk(lz);
85: xd = x + nx;
86: *--zd = addll(*--xd, s);
87: if (overflow)
88: for(;;)
89: {
90: if (xd == x) { *--zd = 1; break; } /* enlarge z */
91: *--zd = ((ulong)*--xd) + 1;
92: if (*zd) { lz--; break; }
93: }
94: else lz--;
95: while (xd > x) *--zd = *--xd;
96: *--zd = evalsigne(1) | evallgefint(lz);
97: *--zd = evaltyp(t_INT) | evallg(lz);
98: avma=(long)zd; return zd;
99: }
100:
101: #define swapspec(x,y, nx,ny) {long _a=nx;GEN _z=x; nx=ny; ny=_a; x=y; y=_z;}
102:
103: #ifdef INLINE
104: INLINE
105: #endif
106: GEN
107: addiispec(GEN x, GEN y, long nx, long ny)
108: {
109: GEN xd,yd,zd;
110: long lz;
111: LOCAL_OVERFLOW;
112:
113: if (nx < ny) swapspec(x,y, nx,ny);
114: if (ny == 1) return addsispec(*y,x,nx);
115: zd = (GEN)avma;
116: lz = nx+3; (void)new_chunk(lz);
117: xd = x + nx;
118: yd = y + ny;
119: *--zd = addll(*--xd, *--yd);
120: while (yd > y) *--zd = addllx(*--xd, *--yd);
121: if (overflow)
122: for(;;)
123: {
124: if (xd == x) { *--zd = 1; break; } /* enlarge z */
125: *--zd = ((ulong)*--xd) + 1;
126: if (*zd) { lz--; break; }
127: }
128: else lz--;
129: while (xd > x) *--zd = *--xd;
130: *--zd = evalsigne(1) | evallgefint(lz);
131: *--zd = evaltyp(t_INT) | evallg(lz);
132: avma=(long)zd; return zd;
133: }
134:
135: /* assume x >= y */
136: #ifdef INLINE
137: INLINE
138: #endif
139: GEN
140: subisspec(GEN x, long s, long nx)
141: {
142: GEN xd, zd = (GEN)avma;
143: long lz;
144: LOCAL_OVERFLOW;
145:
146: lz = nx+2; (void)new_chunk(lz);
147: xd = x + nx;
148: *--zd = subll(*--xd, s);
149: if (overflow)
150: for(;;)
151: {
152: *--zd = ((ulong)*--xd) - 1;
153: if (*xd) break;
154: }
155: if (xd == x)
156: while (*zd == 0) { zd++; lz--; } /* shorten z */
157: else
158: do *--zd = *--xd; while (xd > x);
159: *--zd = evalsigne(1) | evallgefint(lz);
160: *--zd = evaltyp(t_INT) | evallg(lz);
161: avma=(long)zd; return zd;
162: }
163:
164: /* assume x > y */
165: #ifdef INLINE
166: INLINE
167: #endif
168: GEN
169: subiispec(GEN x, GEN y, long nx, long ny)
170: {
171: GEN xd,yd,zd;
172: long lz;
173: LOCAL_OVERFLOW;
174:
175: if (ny==1) return subisspec(x,*y,nx);
176: zd = (GEN)avma;
177: lz = nx+2; (void)new_chunk(lz);
178: xd = x + nx;
179: yd = y + ny;
180: *--zd = subll(*--xd, *--yd);
181: while (yd > y) *--zd = subllx(*--xd, *--yd);
182: if (overflow)
183: for(;;)
184: {
185: *--zd = ((ulong)*--xd) - 1;
186: if (*xd) break;
187: }
188: if (xd == x)
189: while (*zd == 0) { zd++; lz--; } /* shorten z */
190: else
191: do *--zd = *--xd; while (xd > x);
192: *--zd = evalsigne(1) | evallgefint(lz);
193: *--zd = evaltyp(t_INT) | evallg(lz);
194: avma=(long)zd; return zd;
195: }
196:
197: #ifndef __M68K__
198:
199: /* prototype of positive small ints */
200: static long pos_s[] = {
201: evaltyp(t_INT) | m_evallg(3), evalsigne(1) | evallgefint(3), 0 };
202:
203: /* prototype of negative small ints */
204: static long neg_s[] = {
205: evaltyp(t_INT) | m_evallg(3), evalsigne(-1) | evallgefint(3), 0 };
206:
207: void
208: affir(GEN x, GEN y)
209: {
210: const long s=signe(x),ly=lg(y);
211: long lx,sh,i;
212:
213: if (!s)
214: {
215: y[1] = evalexpo(-bit_accuracy(ly));
216: y[2] = 0; return;
217: }
218:
219: lx=lgefint(x); sh=bfffo(x[2]);
220: y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
221: if (sh)
222: {
223: if (lx>ly)
224: { shift_left(y,x,2,ly-1, x[ly],sh); }
225: else
226: {
227: for (i=lx; i<ly; i++) y[i]=0;
228: shift_left(y,x,2,lx-1, 0,sh);
229: }
230: return;
231: }
232:
233: if (lx>=ly)
234: for (i=2; i<ly; i++) y[i]=x[i];
235: else
236: {
237: for (i=2; i<lx; i++) y[i]=x[i];
238: for ( ; i<ly; i++) y[i]=0;
239: }
240: }
241:
242: void
243: affrr(GEN x, GEN y)
244: {
245: long lx,ly,i;
246:
247: y[1] = x[1]; if (!signe(x)) { y[2]=0; return; }
248:
249: lx=lg(x); ly=lg(y);
250: if (lx>=ly)
251: for (i=2; i<ly; i++) y[i]=x[i];
252: else
253: {
254: for (i=2; i<lx; i++) y[i]=x[i];
255: for ( ; i<ly; i++) y[i]=0;
256: }
257: }
258:
259: GEN
260: shifti(GEN x, long n)
261: {
262: const long s=signe(x);
263: long lx,ly,i,m;
264: GEN y;
265:
266: if (!s) return gzero;
267: if (!n) return icopy(x);
268: lx = lgefint(x);
269: if (n > 0)
270: {
271: GEN z = (GEN)avma;
272: long d = n>>TWOPOTBITS_IN_LONG;
273:
274: ly = lx+d; y = new_chunk(ly);
275: for ( ; d; d--) *--z = 0;
276: m = n & (BITS_IN_LONG-1);
277: if (!m) for (i=2; i<lx; i++) y[i]=x[i];
278: else
279: {
280: register const ulong sh = BITS_IN_LONG - m;
281: shift_left2(y,x, 2,lx-1, 0,m,sh);
282: i = ((ulong)x[2]) >> sh;
283: if (i) { ly++; y = new_chunk(1); y[2] = i; }
284: }
285: }
286: else
287: {
288: n = -n;
289: ly = lx - (n>>TWOPOTBITS_IN_LONG);
290: if (ly<3) return gzero;
291: y = new_chunk(ly);
292: m = n & (BITS_IN_LONG-1);
293: if (!m) for (i=2; i<ly; i++) y[i]=x[i];
294: else
295: {
296: shift_right(y,x, 2,ly, 0,m);
297: if (y[2] == 0)
298: {
299: if (ly==3) { avma = (long)(y+3); return gzero; }
300: ly--; avma = (long)(++y);
301: }
302: }
303: }
304: y[1] = evalsigne(s)|evallgefint(ly);
305: y[0] = evaltyp(t_INT)|evallg(ly); return y;
306: }
307:
308: GEN
309: mptrunc(GEN x)
310: {
311: long d,e,i,s,m;
312: GEN y;
313:
314: if (typ(x)==t_INT) return icopy(x);
315: if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gzero;
316: d = (e>>TWOPOTBITS_IN_LONG) + 3;
317: m = e & (BITS_IN_LONG-1);
318: if (d > lg(x)) err(precer, "mptrunc (precision loss in truncation)");
319:
320: y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
321: if (++m == BITS_IN_LONG)
322: for (i=2; i<d; i++) y[i]=x[i];
323: else
324: {
325: register const ulong sh = BITS_IN_LONG - m;
326: shift_right2(y,x, 2,d,0, sh,m);
327: }
328: return y;
329: }
330:
331: /* integral part */
332: GEN
333: mpent(GEN x)
334: {
335: long d,e,i,lx,m;
336: GEN y;
337:
338: if (typ(x)==t_INT) return icopy(x);
339: if (signe(x) >= 0) return mptrunc(x);
340: if ((e=expo(x)) < 0) return stoi(-1);
341: d = (e>>TWOPOTBITS_IN_LONG) + 3;
342: m = e & (BITS_IN_LONG-1);
343: lx=lg(x); if (d>lx) err(precer, "mpent (precision loss in trucation)");
344: y = new_chunk(d);
345: if (++m == BITS_IN_LONG)
346: {
347: for (i=2; i<d; i++) y[i]=x[i];
348: i=d; while (i<lx && !x[i]) i++;
349: if (i==lx) goto END;
350: }
351: else
352: {
353: register const ulong sh = BITS_IN_LONG - m;
354: shift_right2(y,x, 2,d,0, sh,m);
355: if (x[d-1]<<m == 0)
356: {
357: i=d; while (i<lx && !x[i]) i++;
358: if (i==lx) goto END;
359: }
360: }
361: /* set y:=y+1 */
362: for (i=d-1; i>=2; i--) { y[i]++; if (y[i]) goto END; }
363: y=new_chunk(1); y[2]=1; d++;
364: END:
365: y[1] = evalsigne(-1) | evallgefint(d);
366: y[0] = evaltyp(t_INT) | evallg(d); return y;
367: }
368:
369: int
370: cmpsi(long x, GEN y)
371: {
372: ulong p;
373:
374: if (!x) return -signe(y);
375:
376: if (x>0)
377: {
378: if (signe(y)<=0) return 1;
379: if (lgefint(y)>3) return -1;
380: p=y[2]; if (p == (ulong)x) return 0;
381: return p < (ulong)x ? 1 : -1;
382: }
383:
384: if (signe(y)>=0) return -1;
385: if (lgefint(y)>3) return 1;
386: p=y[2]; if (p == (ulong)-x) return 0;
387: return p < (ulong)(-x) ? -1 : 1;
388: }
389:
390: int
391: cmpii(GEN x, GEN y)
392: {
393: const long sx = signe(x), sy = signe(y);
394: long lx,ly,i;
395:
396: if (sx<sy) return -1;
397: if (sx>sy) return 1;
398: if (!sx) return 0;
399:
400: lx=lgefint(x); ly=lgefint(y);
401: if (lx>ly) return sx;
402: if (lx<ly) return -sx;
403: i=2; while (i<lx && x[i]==y[i]) i++;
404: if (i==lx) return 0;
405: return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
406: }
407:
408: int
409: cmprr(GEN x, GEN y)
410: {
411: const long sx = signe(x), sy = signe(y);
412: long ex,ey,lx,ly,lz,i;
413:
414: if (sx<sy) return -1;
415: if (sx>sy) return 1;
416: if (!sx) return 0;
417:
418: ex=expo(x); ey=expo(y);
419: if (ex>ey) return sx;
420: if (ex<ey) return -sx;
421:
422: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
423: i=2; while (i<lz && x[i]==y[i]) i++;
424: if (i<lz) return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
425: if (lx>=ly)
426: {
427: while (i<lx && !x[i]) i++;
428: return (i==lx) ? 0 : sx;
429: }
430: while (i<ly && !y[i]) i++;
431: return (i==ly) ? 0 : -sx;
432: }
433:
434: GEN
435: addss(long x, long y)
436: {
437: if (!x) return stoi(y);
438: if (x>0) { pos_s[2] = x; return addsi(y,pos_s); }
439: neg_s[2] = -x; return addsi(y,neg_s);
440: }
441:
442: GEN
443: addsi(long x, GEN y)
444: {
445: long sx,sy,ly;
446: GEN z;
447:
448: if (!x) return icopy(y);
449: sy=signe(y); if (!sy) return stoi(x);
450: if (x<0) { sx=-1; x=-x; } else sx=1;
451: if (sx==sy)
452: {
453: z = addsispec(x,y+2, lgefint(y)-2);
454: setsigne(z,sy); return z;
455: }
456: ly=lgefint(y);
457: if (ly==3)
458: {
459: const long d = y[2] - x;
460: if (!d) return gzero;
461: z=cgeti(3);
462: if (y[2] < 0 || d > 0) {
463: z[1] = evalsigne(sy) | evallgefint(3);
464: z[2] = d;
465: }
466: else {
467: z[1] = evalsigne(-sy) | evallgefint(3);
468: z[2] =-d;
469: }
470: return z;
471: }
472: z = subisspec(y+2,x, ly-2);
473: setsigne(z,sy); return z;
474: }
475:
476: GEN
477: addii(GEN x, GEN y)
478: {
479: long sx = signe(x);
480: long sy = signe(y);
481: long lx,ly;
482: GEN z;
483:
484: if (!sx) return sy? icopy(y): gzero;
485: if (!sy) return icopy(x);
486: lx=lgefint(x); ly=lgefint(y);
487:
488: if (sx==sy)
489: z = addiispec(x+2,y+2,lx-2,ly-2);
490: else
491: { /* sx != sy */
492: long i = lx - ly;
493: if (i==0)
494: {
495: i = absi_cmp(x,y);
496: if (!i) return gzero;
497: }
498: if (i<0) { sx=sy; swapspec(x,y, lx,ly); } /* ensure |x| >= |y| */
499: z = subiispec(x+2,y+2,lx-2,ly-2);
500: }
501: setsigne(z,sx); return z;
502: }
503:
504: GEN
505: addsr(long x, GEN y)
506: {
507: if (!x) return rcopy(y);
508: if (x>0) { pos_s[2]=x; return addir(pos_s,y); }
509: neg_s[2] = -x; return addir(neg_s,y);
510: }
511:
512: GEN
513: addir(GEN x, GEN y)
514: {
515: long e,l,ly;
516: GEN z;
517:
518: if (!signe(x)) return rcopy(y);
519: e = expo(y)-expi(x);
520: if (!signe(y))
521: {
522: #if 0
523: if (e>0) err(adder3);
524: #else /* design issue: make 0.0 "absorbing" */
525: if (e>0) return rcopy(y);
526: #endif
527: z = cgetr(3 + ((-e)>>TWOPOTBITS_IN_LONG));
528: affir(x,z); return z;
529: }
530:
531: ly=lg(y);
532: if (e>0)
533: {
534: l = ly - (e>>TWOPOTBITS_IN_LONG);
535: if (l<3) return rcopy(y);
536: }
537: else l = ly + ((-e)>>TWOPOTBITS_IN_LONG)+1;
538: z=cgetr(l); affir(x,z); y=addrr(z,y);
539: z = y+l; ly = lg(y); while (ly--) z[ly] = y[ly];
540: avma=(long)z; return z;
541: }
542:
543: GEN
544: addrr(GEN x, GEN y)
545: {
546: long sx=signe(x),sy=signe(y),ex=expo(x),ey=expo(y);
547: long e,m,flag,i,j,f2,lx,ly,lz;
548: GEN z;
549: LOCAL_OVERFLOW;
550:
551: e = ey-ex;
552: if (!sy)
553: {
554: if (!sx)
555: {
556: if (e > 0) ex=ey;
557: z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z;
558: }
559: if (e > 0) { z=cgetr(3); z[1]=evalexpo(ey); z[2]=0; return z; }
560: lz = 3 + ((-e)>>TWOPOTBITS_IN_LONG);
561: lx = lg(x); if (lz>lx) lz=lx;
562: z = cgetr(lz); while(--lz) z[lz]=x[lz];
563: return z;
564: }
565: if (!sx)
566: {
567: if (e < 0) { z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z; }
568: lz = 3 + (e>>TWOPOTBITS_IN_LONG);
569: ly = lg(y); if (lz>ly) lz=ly;
570: z = cgetr(lz); while (--lz) z[lz]=y[lz];
571: return z;
572: }
573:
574: if (e < 0) { z=x; x=y; y=z; ey=ex; i=sx; sx=sy; sy=i; e=-e; }
575: /* now ey >= ex */
576: lx=lg(x); ly=lg(y);
577: if (e)
578: {
579: long d = e >> TWOPOTBITS_IN_LONG, l = ly-d;
580: if (l > lx) { flag=0; lz=lx+d+1; }
581: else if (l > 2) { flag=1; lz=ly; lx=l; }
582: else return rcopy(y);
583: m = e & (BITS_IN_LONG-1);
584: }
585: else
586: {
587: if (lx > ly) lx = ly;
588: flag=2; lz=lx; m=0;
589: }
590: z = cgetr(lz);
591: if (m)
592: { /* shift right x m bits */
593: const ulong sh = BITS_IN_LONG-m;
594: GEN p1 = x; x = new_chunk(lx+1);
595: shift_right2(x,p1,2,lx, 0,m,sh);
596: if (flag==0)
597: {
598: x[lx] = p1[lx-1] << sh;
599: if (x[lx]) { flag = 3; lx++; }
600: }
601: }
602:
603: if (sx==sy)
604: { /* addition */
605: i=lz-1; avma = (long)z;
606: if (flag==0) { z[i] = y[i]; i--; }
607: overflow=0;
608: for (j=lx-1; j>=2; i--,j--) z[i] = addllx(x[j],y[i]);
609:
610: if (overflow)
611: for (;;)
612: {
613: if (i==1)
614: {
615: shift_right(z,z, 2,lz, 1,1);
616: ey=evalexpo(ey+1);
617: z[1] = evalsigne(sx) | ey; return z;
618: }
619: z[i] = y[i]+1; if (z[i--]) break;
620: }
621: for (; i>=2; i--) z[i]=y[i];
622: z[1] = evalsigne(sx) | evalexpo(ey); return z;
623: }
624:
625: /* subtraction */
626: if (e) f2 = 1;
627: else
628: {
629: i=2; while (i<lx && x[i]==y[i]) i++;
630: if (i==lx)
631: {
632: avma = (long)(z+lz);
633: e = evalexpo(ey - bit_accuracy(lx));
634: z=cgetr(3); z[1]=e; z[2]=0; return z;
635: }
636: f2 = ((ulong)y[i] > (ulong)x[i]);
637: }
638:
639: /* result is non-zero. f2 = (y > x) */
640: i=lz-1;
641: if (f2)
642: {
643: if (flag==0) { z[i] = y[i]; i--; }
644: j=lx-1; z[i] = subll(y[i],x[j]); i--; j--;
645: for (; j>=2; i--,j--) z[i] = subllx(y[i],x[j]);
646: if (overflow)
647: for (;;) { z[i] = y[i]-1; if (y[i--]) break; }
648: for (; i>=2; i--) z[i]=y[i];
649: sx = sy;
650: }
651: else
652: {
653: if (flag==0) { z[i] = -y[i]; i--; overflow=1; } else overflow=0;
654: for (; i>=2; i--) z[i]=subllx(x[i],y[i]);
655: }
656:
657: x = z+2; i=0; while (!x[i]) i++;
658: lz -= i; z += i; m = bfffo(z[2]);
659: e = evalexpo(ey - (m | (i<<TWOPOTBITS_IN_LONG)));
660: if (m) shift_left(z,z,2,lz-1, 0,m);
661: z[1] = evalsigne(sx) | e;
662: z[0] = evaltyp(t_REAL) | evallg(lz);
663: avma = (long)z; return z;
664: }
665:
666: GEN
667: mulss(long x, long y)
668: {
669: long s,p1;
670: GEN z;
671: LOCAL_HIREMAINDER;
672:
673: if (!x || !y) return gzero;
674: if (x<0) { s = -1; x = -x; } else s=1;
675: if (y<0) { s = -s; y = -y; }
676: p1 = mulll(x,y);
677: if (hiremainder)
678: {
679: z=cgeti(4); z[1] = evalsigne(s) | evallgefint(4);
680: z[2]=hiremainder; z[3]=p1; return z;
681: }
682: z=cgeti(3); z[1] = evalsigne(s) | evallgefint(3);
683: z[2]=p1; return z;
684: }
685: #endif
686:
687: GEN
688: muluu(ulong x, ulong y)
689: {
690: long p1;
691: GEN z;
692: LOCAL_HIREMAINDER;
693:
694: if (!x || !y) return gzero;
695: p1 = mulll(x,y);
696: if (hiremainder)
697: {
698: z=cgeti(4); z[1] = evalsigne(1) | evallgefint(4);
699: z[2]=hiremainder; z[3]=p1; return z;
700: }
701: z=cgeti(3); z[1] = evalsigne(1) | evallgefint(3);
702: z[2]=p1; return z;
703: }
704:
705: /* assume ny > 0 */
706: #ifdef INLINE
707: INLINE
708: #endif
709: GEN
710: mulsispec(long x, GEN y, long ny)
711: {
712: GEN yd, z = (GEN)avma;
713: long lz = ny+3;
714: LOCAL_HIREMAINDER;
715:
716: (void)new_chunk(lz);
717: yd = y + ny; *--z = mulll(x, *--yd);
718: while (yd > y) *--z = addmul(x,*--yd);
719: if (hiremainder) *--z = hiremainder; else lz--;
720: *--z = evalsigne(1) | evallgefint(lz);
721: *--z = evaltyp(t_INT) | evallg(lz);
722: avma=(long)z; return z;
723: }
724:
725: GEN
726: mului(ulong x, GEN y)
727: {
728: long s = signe(y);
729: GEN z;
730:
731: if (!s || !x) return gzero;
732: z = mulsispec(x, y+2, lgefint(y)-2);
733: setsigne(z,s); return z;
734: }
735:
736: /* a + b*Y, assume Y >= 0, 0 <= a,b <= VERYBIGINT */
737: GEN
738: addsmulsi(long a, long b, GEN Y)
739: {
740: GEN yd,y,z;
741: long ny,lz;
742: LOCAL_HIREMAINDER;
743: LOCAL_OVERFLOW;
744:
745: if (!signe(Y)) return stoi(a);
746:
747: y = Y+2; z = (GEN)avma;
748: ny = lgefint(Y)-2;
749: lz = ny+3;
750:
751: (void)new_chunk(lz);
752: yd = y + ny; *--z = addll(a, mulll(b, *--yd));
753: if (overflow) hiremainder++; /* can't overflow */
754: while (yd > y) *--z = addmul(b,*--yd);
755: if (hiremainder) *--z = hiremainder; else lz--;
756: *--z = evalsigne(1) | evallgefint(lz);
757: *--z = evaltyp(t_INT) | evallg(lz);
758: avma=(long)z; return z;
759: }
760:
761: #ifndef __M68K__
762:
763: GEN
764: mulsi(long x, GEN y)
765: {
766: long s = signe(y);
767: GEN z;
768:
769: if (!s || !x) return gzero;
770: if (x<0) { s = -s; x = -x; }
771: z = mulsispec(x, y+2, lgefint(y)-2);
772: setsigne(z,s); return z;
773: }
774:
775: GEN
776: mulsr(long x, GEN y)
777: {
778: long lx,i,s,garde,e,sh,m;
779: GEN z;
780: LOCAL_HIREMAINDER;
781:
782: if (!x) return gzero;
783: s = signe(y);
784: if (!s)
785: {
786: if (x<0) x = -x;
787: e = y[1] + (BITS_IN_LONG-1)-bfffo(x);
788: if (e & ~EXPOBITS) err(muler2);
789: z=cgetr(3); z[1]=e; z[2]=0; return z;
790: }
791: if (x<0) { s = -s; x = -x; }
792: if (x==1) { z=rcopy(y); setsigne(z,s); return z; }
793:
794: lx = lg(y); e = expo(y);
795: z=cgetr(lx); y--; garde=mulll(x,y[lx]);
796: for (i=lx-1; i>=3; i--) z[i]=addmul(x,y[i]);
797: z[2]=hiremainder;
798:
799: sh = bfffo(hiremainder); m = BITS_IN_LONG-sh;
800: if (sh) shift_left2(z,z, 2,lx-1, garde,sh,m);
801: e = evalexpo(m+e);
802: z[1] = evalsigne(s) | e; return z;
803: }
804:
805: GEN
806: mulrr(GEN x, GEN y)
807: {
808: long sx = signe(x), sy = signe(y);
809: long i,j,ly,lz,lzz,e,flag,p1;
810: ulong garde;
811: GEN z, y1;
812: LOCAL_HIREMAINDER;
813: LOCAL_OVERFLOW;
814:
815: e = evalexpo(expo(x)+expo(y));
816: if (!sx || !sy) { z=cgetr(3); z[1]=e; z[2]=0; return z; }
817: if (sy<0) sx = -sx;
818: lz=lg(x); ly=lg(y);
819: if (lz>ly) { lz=ly; z=x; x=y; y=z; flag=1; } else flag = (lz!=ly);
820: z=cgetr(lz); z[1] = evalsigne(sx) | e;
821: if (lz==3)
822: {
823: if (flag)
824: {
825: (void)mulll(x[2],y[3]);
826: garde = addmul(x[2],y[2]);
827: }
828: else
829: garde = mulll(x[2],y[2]);
830: if ((long)hiremainder<0) { z[2]=hiremainder; z[1]++; }
831: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
832: return z;
833: }
834:
835: if (flag) { (void)mulll(x[2],y[lz]); garde = hiremainder; } else garde=0;
836: lzz=lz-1; p1=x[lzz];
837: if (p1)
838: {
839: (void)mulll(p1,y[3]);
840: garde = addll(addmul(p1,y[2]), garde);
841: z[lzz] = overflow+hiremainder;
842: }
843: else z[lzz]=0;
844: for (j=lz-2, y1=y-j; j>=3; j--)
845: {
846: p1 = x[j]; y1++;
847: if (p1)
848: {
849: (void)mulll(p1,y1[lz+1]);
850: garde = addll(addmul(p1,y1[lz]), garde);
851: for (i=lzz; i>j; i--)
852: {
853: hiremainder += overflow;
854: z[i] = addll(addmul(p1,y1[i]), z[i]);
855: }
856: z[j] = hiremainder+overflow;
857: }
858: else z[j]=0;
859: }
860: p1=x[2]; y1++;
861: garde = addll(mulll(p1,y1[lz]), garde);
862: for (i=lzz; i>2; i--)
863: {
864: hiremainder += overflow;
865: z[i] = addll(addmul(p1,y1[i]), z[i]);
866: }
867: z[2] = hiremainder+overflow;
868: if (z[2] < 0) z[1]++;
869: else
870: shift_left(z,z,2,lzz,garde, 1);
871: return z;
872: }
873:
874: GEN
875: mulir(GEN x, GEN y)
876: {
877: long sx=signe(x),sy,lz,lzz,ey,e,p1,i,j;
878: ulong garde;
879: GEN z,y1;
880: LOCAL_HIREMAINDER;
881: LOCAL_OVERFLOW;
882:
883: if (!sx) return gzero;
884: if (!is_bigint(x)) return mulsr(itos(x),y);
885: sy=signe(y); ey=expo(y);
886: if (!sy)
887: {
888: e = evalexpo(expi(x)+ey);
889: z=cgetr(3); z[1]=e; z[2]=0; return z;
890: }
891:
892: if (sy<0) sx = -sx;
893: lz=lg(y); z=cgetr(lz);
894: y1=cgetr(lz+1);
895: affir(x,y1); x=y; y=y1;
896: e = evalexpo(expo(y)+ey);
897: z[1] = evalsigne(sx) | e;
898: if (lz==3)
899: {
900: (void)mulll(x[2],y[3]);
901: garde=addmul(x[2],y[2]);
902: if ((long)hiremainder < 0) { z[2]=hiremainder; z[1]++; }
903: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
904: avma=(long)z; return z;
905: }
906:
907: (void)mulll(x[2],y[lz]); garde=hiremainder;
908: lzz=lz-1; p1=x[lzz];
909: if (p1)
910: {
911: (void)mulll(p1,y[3]);
912: garde=addll(addmul(p1,y[2]),garde);
913: z[lzz] = overflow+hiremainder;
914: }
915: else z[lzz]=0;
916: for (j=lz-2, y1=y-j; j>=3; j--)
917: {
918: p1=x[j]; y1++;
919: if (p1)
920: {
921: (void)mulll(p1,y1[lz+1]);
922: garde = addll(addmul(p1,y1[lz]), garde);
923: for (i=lzz; i>j; i--)
924: {
925: hiremainder += overflow;
926: z[i] = addll(addmul(p1,y1[i]), z[i]);
927: }
928: z[j] = hiremainder+overflow;
929: }
930: else z[j]=0;
931: }
932: p1=x[2]; y1++;
933: garde = addll(mulll(p1,y1[lz]), garde);
934: for (i=lzz; i>2; i--)
935: {
936: hiremainder += overflow;
937: z[i] = addll(addmul(p1,y1[i]), z[i]);
938: }
939: z[2] = hiremainder+overflow;
940: if (z[2] < 0) z[1]++;
941: else
942: shift_left(z,z,2,lzz,garde, 1);
943: avma=(long)z; return z;
944: }
945:
946: /* written by Bruno Haible following an idea of Robert Harley */
947: long
948: vals(ulong z)
949: {
950: static char tab[64]={-1,0,1,12,2,6,-1,13,3,-1,7,-1,-1,-1,-1,14,10,4,-1,-1,8,-1,-1,25,-1,-1,-1,-1,-1,21,27,15,31,11,5,-1,-1,-1,-1,-1,9,-1,-1,24,-1,-1,20,26,30,-1,-1,-1,-1,23,-1,19,29,-1,22,18,28,17,16,-1};
951: #ifdef LONG_IS_64BIT
952: long s;
953: #endif
954:
955: if (!z) return -1;
956: #ifdef LONG_IS_64BIT
957: if (! (z&0xffffffff)) { s = 32; z >>=32; } else s = 0;
958: #endif
959: z |= ~z + 1;
960: z += z << 4;
961: z += z << 6;
962: z ^= z << 16; /* or z -= z<<16 */
963: #ifdef LONG_IS_64BIT
964: return s + tab[(z&0xffffffff)>>26];
965: #else
966: return tab[z>>26];
967: #endif
968: }
969:
970: GEN
971: modss(long x, long y)
972: {
973: LOCAL_HIREMAINDER;
974:
975: if (!y) err(moder1);
976: if (y<0) y=-y;
977: hiremainder=0; divll(labs(x),y);
978: if (!hiremainder) return gzero;
979: return (x < 0) ? stoi(y-hiremainder) : stoi(hiremainder);
980: }
981:
982: GEN
983: resss(long x, long y)
984: {
985: LOCAL_HIREMAINDER;
986:
987: if (!y) err(reser1);
988: hiremainder=0; divll(labs(x),labs(y));
989: return (x < 0) ? stoi(-((long)hiremainder)) : stoi(hiremainder);
990: }
991:
992: GEN
993: divsi(long x, GEN y)
994: {
995: long p1, s = signe(y);
996: LOCAL_HIREMAINDER;
997:
998: if (!s) err(diver2);
999: if (!x || lgefint(y)>3 || ((long)y[2])<0)
1000: {
1001: hiremainder=x; SAVE_HIREMAINDER; return gzero;
1002: }
1003: hiremainder=0; p1=divll(labs(x),y[2]);
1004: if (x<0) { hiremainder = -((long)hiremainder); p1 = -p1; }
1005: if (s<0) p1 = -p1;
1006: SAVE_HIREMAINDER; return stoi(p1);
1007: }
1008: #endif
1009:
1010: GEN
1011: modui(ulong x, GEN y)
1012: {
1013: LOCAL_HIREMAINDER;
1014:
1015: if (!signe(y)) err(diver2);
1016: if (!x || lgefint(y)>3) hiremainder=x;
1017: else
1018: {
1019: hiremainder=0; (void)divll(x,y[2]);
1020: }
1021: return utoi(hiremainder);
1022: }
1023:
1024: ulong
1025: umodiu(GEN y, ulong x)
1026: {
1027: long sy=signe(y),ly,i;
1028: LOCAL_HIREMAINDER;
1029:
1030: if (!x) err(diver4);
1031: if (!sy) return 0;
1032: ly = lgefint(y);
1033: if (x <= (ulong)y[2]) hiremainder=0;
1034: else
1035: {
1036: if (ly==3) return (sy > 0)? (ulong)y[2]: x - (ulong)y[2];
1037: hiremainder=y[2]; ly--; y++;
1038: }
1039: for (i=2; i<ly; i++) (void)divll(y[i],x);
1040: if (!hiremainder) return 0;
1041: return (sy > 0)? hiremainder: x - hiremainder;
1042: }
1043:
1044: GEN
1045: modiu(GEN y, ulong x) { return utoi(umodiu(y,x)); }
1046:
1047: #ifndef __M68K__
1048:
1049: GEN
1050: modsi(long x, GEN y)
1051: {
1052: long s = signe(y);
1053: GEN p1;
1054: LOCAL_HIREMAINDER;
1055:
1056: if (!s) err(diver2);
1057: if (!x || lgefint(y)>3 || ((long)y[2])<0) hiremainder=x;
1058: else
1059: {
1060: hiremainder=0; (void)divll(labs(x),y[2]);
1061: if (x<0) hiremainder = -((long)hiremainder);
1062: }
1063: if (!hiremainder) return gzero;
1064: if (x>0) return stoi(hiremainder);
1065: if (s<0)
1066: { setsigne(y,1); p1=addsi(hiremainder,y); setsigne(y,-1); }
1067: else
1068: p1=addsi(hiremainder,y);
1069: return p1;
1070: }
1071:
1072: GEN
1073: divis(GEN y, long x)
1074: {
1075: long sy=signe(y),ly,s,i;
1076: GEN z;
1077: LOCAL_HIREMAINDER;
1078:
1079: if (!x) err(diver4);
1080: if (!sy) { hiremainder=0; SAVE_HIREMAINDER; return gzero; }
1081: if (x<0) { s = -sy; x = -x; } else s=sy;
1082:
1083: ly = lgefint(y);
1084: if ((ulong)x <= (ulong)y[2]) hiremainder=0;
1085: else
1086: {
1087: if (ly==3) { hiremainder=itos(y); SAVE_HIREMAINDER; return gzero; }
1088: hiremainder=y[2]; ly--; y++;
1089: }
1090: z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
1091: for (i=2; i<ly; i++) z[i]=divll(y[i],x);
1092: if (sy<0) hiremainder = - ((long)hiremainder);
1093: SAVE_HIREMAINDER; return z;
1094: }
1095:
1096: GEN
1097: divir(GEN x, GEN y)
1098: {
1099: GEN xr,z;
1100: long av,ly;
1101:
1102: if (!signe(y)) err(diver5);
1103: if (!signe(x)) return gzero;
1104: ly=lg(y); z=cgetr(ly); av=avma; xr=cgetr(ly+1); affir(x,xr);
1105: affrr(divrr(xr,y),z); avma=av; return z;
1106: }
1107:
1108: GEN
1109: divri(GEN x, GEN y)
1110: {
1111: GEN yr,z;
1112: long av,lx,s=signe(y);
1113:
1114: if (!s) err(diver8);
1115: if (!signe(x))
1116: {
1117: const long ex = evalexpo(expo(x) - expi(y));
1118:
1119: if (ex<0) err(diver12);
1120: z=cgetr(3); z[1]=ex; z[2]=0; return z;
1121: }
1122: if (!is_bigint(y)) return divrs(x, s>0? y[2]: -y[2]);
1123:
1124: lx=lg(x); z=cgetr(lx);
1125: av=avma; yr=cgetr(lx+1); affir(y,yr);
1126: affrr(divrr(x,yr),z); avma=av; return z;
1127: }
1128:
1129: void
1130: diviiz(GEN x, GEN y, GEN z)
1131: {
1132: long av=avma,lz;
1133: GEN xr,yr;
1134:
1135: if (typ(z)==t_INT) { affii(divii(x,y),z); avma=av; return; }
1136: lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr);
1137: affrr(divrr(xr,yr),z); avma=av;
1138: }
1139:
1140: void
1141: mpdivz(GEN x, GEN y, GEN z)
1142: {
1143: long av=avma;
1144:
1145: if (typ(z)==t_INT)
1146: {
1147: if (typ(x)==t_REAL || typ(y)==t_REAL) err(divzer1);
1148: affii(divii(x,y),z); avma=av; return;
1149: }
1150: if (typ(x)==t_INT)
1151: {
1152: GEN xr,yr;
1153: long lz;
1154:
1155: if (typ(y)==t_REAL) { affrr(divir(x,y),z); avma=av; return; }
1156: lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr);
1157: affrr(divrr(xr,yr),z); avma=av; return;
1158: }
1159: if (typ(y)==t_REAL) { affrr(divrr(x,y),z); avma=av; return; }
1160: affrr(divri(x,y),z); avma=av;
1161: }
1162:
1163: GEN
1164: divsr(long x, GEN y)
1165: {
1166: long av,ly;
1167: GEN p1,z;
1168:
1169: if (!signe(y)) err(diver3);
1170: if (!x) return gzero;
1171: ly=lg(y); z=cgetr(ly); av=avma;
1172: p1=cgetr(ly+1); affsr(x,p1); affrr(divrr(p1,y),z);
1173: avma=av; return z;
1174: }
1175:
1176: GEN
1177: modii(GEN x, GEN y)
1178: {
1179: switch(signe(x))
1180: {
1181: case 0: return gzero;
1182: case 1: return resii(x,y);
1183: default:
1184: {
1185: long av = avma;
1186: (void)new_chunk(lgefint(y));
1187: x = resii(x,y); avma=av;
1188: if (x==gzero) return x;
1189: return subiispec(y+2,x+2,lgefint(y)-2,lgefint(x)-2);
1190: }
1191: }
1192: }
1193:
1194: void
1195: modiiz(GEN x, GEN y, GEN z)
1196: {
1197: const long av = avma;
1198: affii(modii(x,y),z); avma=av;
1199: }
1200:
1201: GEN
1202: divrs(GEN x, long y)
1203: {
1204: long i,lx,ex,garde,sh,s=signe(x);
1205: GEN z;
1206: LOCAL_HIREMAINDER;
1207:
1208: if (!y) err(diver6);
1209: if (!s)
1210: {
1211: z=cgetr(3); z[1] = x[1] - (BITS_IN_LONG-1) + bfffo(y);
1212: if (z[1]<0) err(diver7);
1213: z[2]=0; return z;
1214: }
1215: if (y<0) { s = -s; y = -y; }
1216: if (y==1) { z=rcopy(x); setsigne(z,s); return z; }
1217:
1218: z=cgetr(lx=lg(x)); hiremainder=0;
1219: for (i=2; i<lx; i++) z[i]=divll(x[i],y);
1220:
1221: /* we may have hiremainder != 0 ==> garde */
1222: garde=divll(0,y); sh=bfffo(z[2]); ex=evalexpo(expo(x)-sh);
1223: if (sh) shift_left(z,z, 2,lx-1, garde,sh);
1224: z[1] = evalsigne(s) | ex;
1225: return z;
1226: }
1227:
1228: GEN
1229: divrr(GEN x, GEN y)
1230: {
1231: long sx=signe(x), sy=signe(y), lx,ly,lz,e,i,j;
1232: ulong si,saux;
1233: GEN z,x1;
1234:
1235: if (!sy) err(diver9);
1236: e = evalexpo(expo(x) - expo(y));
1237: if (!sx) { z=cgetr(3); z[1]=e; z[2]=0; return z; }
1238: if (sy<0) sx = -sx;
1239: e = evalsigne(sx) | e;
1240: lx=lg(x); ly=lg(y);
1241: if (ly==3)
1242: {
1243: ulong k = x[2], l = (lx>3)? x[3]: 0;
1244: LOCAL_HIREMAINDER;
1245: if (k < (ulong)y[2]) e--;
1246: else
1247: {
1248: l >>= 1; if (k&1) l |= HIGHBIT;
1249: k >>= 1;
1250: }
1251: z=cgetr(3); z[1]=e;
1252: hiremainder=k; z[2]=divll(l,y[2]); return z;
1253: }
1254:
1255: lz = (lx<=ly)? lx: ly;
1256: x1 = (z=new_chunk(lz))-1;
1257: x1[1]=0; for (i=2; i<lz; i++) x1[i]=x[i];
1258: x1[lz] = (lx>lz)? x[lz]: 0;
1259: x=z; si=y[2]; saux=y[3];
1260: for (i=0; i<lz-1; i++)
1261: { /* x1 = x + (i-1) */
1262: ulong k,qp;
1263: LOCAL_HIREMAINDER;
1264: LOCAL_OVERFLOW;
1265: if ((ulong)x1[1] == si)
1266: {
1267: qp = MAXULONG; k=addll(si,x1[2]);
1268: }
1269: else
1270: {
1271: if ((ulong)x1[1] > si) /* can't happen if i=0 */
1272: {
1273: GEN y1 = y+1;
1274: overflow=0;
1275: for (j=lz-i; j>0; j--) x1[j] = subllx(x1[j],y1[j]);
1276: j=i; do x[--j]++; while (j && !x[j]);
1277: }
1278: hiremainder=x1[1]; overflow=0;
1279: qp=divll(x1[2],si); k=hiremainder;
1280: }
1281: if (!overflow)
1282: {
1283: long k3 = subll(mulll(qp,saux), x1[3]);
1284: long k4 = subllx(hiremainder,k);
1285: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
1286: }
1287: j = lz-i+1;
1288: if (j<ly) (void)mulll(qp,y[j]); else { hiremainder=0 ; j=ly; }
1289: for (j--; j>1; j--)
1290: {
1291: x1[j] = subll(x1[j], addmul(qp,y[j]));
1292: hiremainder += overflow;
1293: }
1294: if ((ulong)x1[1] != hiremainder)
1295: {
1296: if ((ulong)x1[1] < hiremainder)
1297: {
1298: qp--;
1299: overflow=0; for (j=lz-i; j>1; j--) x1[j]=addllx(x1[j], y[j]);
1300: }
1301: else
1302: {
1303: x1[1] -= hiremainder;
1304: while (x1[1])
1305: {
1306: qp++; if (!qp) { j=i; do x[--j]++; while (j && !x[j]); }
1307: overflow=0; for (j=lz-i; j>1; j--) x1[j]=subllx(x1[j],y[j]);
1308: x1[1] -= overflow;
1309: }
1310: }
1311: }
1312: x1[1]=qp; x1++;
1313: }
1314: x1 = x-1; for (j=lz-1; j>=2; j--) x[j]=x1[j];
1315: if (*x) { shift_right(x,x, 2,lz, 1,1); } else e--;
1316: x[0]=evaltyp(t_REAL)|evallg(lz);
1317: x[1]=e; return x;
1318: }
1319: #endif /* !defined(__M68K__) */
1320: /* The following ones are not in mp.s (mulii is, with a different algorithm) */
1321:
1322: /* Integer division x / y:
1323: * if z = ONLY_REM return remainder, otherwise return quotient
1324: * if z != NULL set *z to remainder
1325: * *z is the last object on stack (and thus can be disposed of with cgiv
1326: * instead of gerepile)
1327: * If *z is zero, we put gzero here and no copy.
1328: * space needed: lx + ly
1329: */
1330: GEN
1331: dvmdii(GEN x, GEN y, GEN *z)
1332: {
1333: long sx=signe(x),sy=signe(y);
1334: long av,lx,ly,lz,i,j,sh,k,lq,lr;
1335: ulong si,qp,saux, *xd,*rd,*qd;
1336: GEN r,q,x1;
1337:
1338: if (!sy) err(dvmer1);
1339: if (!sx)
1340: {
1341: if (!z || z == ONLY_REM) return gzero;
1342: *z=gzero; return gzero;
1343: }
1344: lx=lgefint(x);
1345: ly=lgefint(y); lz=lx-ly;
1346: if (lz <= 0)
1347: {
1348: if (lz == 0)
1349: {
1350: for (i=2; i<lx; i++)
1351: if (x[i] != y[i])
1352: {
1353: if ((ulong)x[i] > (ulong)y[i]) goto DIVIDE;
1354: goto TRIVIAL;
1355: }
1356: if (z == ONLY_REM) return gzero;
1357: if (z) *z = gzero;
1358: if (sx < 0) sy = -sy;
1359: return stoi(sy);
1360: }
1361: TRIVIAL:
1362: if (z == ONLY_REM) return icopy(x);
1363: if (z) *z = icopy(x);
1364: return gzero;
1365: }
1366: DIVIDE: /* quotient is non-zero */
1367: av=avma; if (sx<0) sy = -sy;
1368: if (ly==3)
1369: {
1370: LOCAL_HIREMAINDER;
1371: si = y[2];
1372: if (si <= (ulong)x[2]) hiremainder=0;
1373: else
1374: {
1375: hiremainder = x[2]; lx--; x++;
1376: }
1377: q = new_chunk(lx); for (i=2; i<lx; i++) q[i]=divll(x[i],si);
1378: if (z == ONLY_REM)
1379: {
1380: avma=av; if (!hiremainder) return gzero;
1381: r=cgeti(3);
1382: r[1] = evalsigne(sx) | evallgefint(3);
1383: r[2]=hiremainder; return r;
1384: }
1385: q[1] = evalsigne(sy) | evallgefint(lx);
1386: q[0] = evaltyp(t_INT) | evallg(lx);
1387: if (!z) return q;
1388: if (!hiremainder) { *z=gzero; return q; }
1389: r=cgeti(3);
1390: r[1] = evalsigne(sx) | evallgefint(3);
1391: r[2] = hiremainder; *z=r; return q;
1392: }
1393:
1394: x1 = new_chunk(lx); sh = bfffo(y[2]);
1395: if (sh)
1396: { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
1397: register const ulong m = BITS_IN_LONG - sh;
1398: r = new_chunk(ly);
1399: shift_left2(r, y,2,ly-1, 0,sh,m); y = r;
1400: shift_left2(x1,x,2,lx-1, 0,sh,m);
1401: x1[1] = ((ulong)x[2]) >> m;
1402: }
1403: else
1404: {
1405: x1[1]=0; for (j=2; j<lx; j++) x1[j]=x[j];
1406: }
1407: x=x1; si=y[2]; saux=y[3];
1408: for (i=0; i<=lz; i++)
1409: { /* x1 = x + i */
1410: LOCAL_HIREMAINDER;
1411: LOCAL_OVERFLOW;
1412: if ((ulong)x1[1] == si)
1413: {
1414: qp = MAXULONG; k=addll(si,x1[2]);
1415: }
1416: else
1417: {
1418: hiremainder=x1[1]; overflow=0;
1419: qp=divll(x1[2],si); k=hiremainder;
1420: }
1421: if (!overflow)
1422: {
1423: long k3 = subll(mulll(qp,saux), x1[3]);
1424: long k4 = subllx(hiremainder,k);
1425: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
1426: }
1427: hiremainder=0;
1428: for (j=ly-1; j>1; j--)
1429: {
1430: x1[j] = subll(x1[j], addmul(qp,y[j]));
1431: hiremainder+=overflow;
1432: }
1433: if ((ulong)x1[1] < hiremainder)
1434: {
1435: overflow=0; qp--;
1436: for (j=ly-1; j>1; j--) x1[j] = addllx(x1[j],y[j]);
1437: }
1438: x1[1]=qp; x1++;
1439: }
1440:
1441: lq = lz+2;
1442: if (!z)
1443: {
1444: qd = (ulong*)av;
1445: xd = (ulong*)(x + lq);
1446: if (x[1]) { lz++; lq++; }
1447: while (lz--) *--qd = *--xd;
1448: *--qd = evalsigne(sy) | evallgefint(lq);
1449: *--qd = evaltyp(t_INT) | evallg(lq);
1450: avma = (long)qd; return (GEN)qd;
1451: }
1452:
1453: j=lq; while (j<lx && !x[j]) j++;
1454: lz = lx-j;
1455: if (z == ONLY_REM)
1456: {
1457: if (lz==0) { avma = av; return gzero; }
1458: rd = (ulong*)av; lr = lz+2;
1459: xd = (ulong*)(x + lx);
1460: if (!sh) while (lz--) *--rd = *--xd;
1461: else
1462: { /* shift remainder right by sh bits */
1463: const ulong shl = BITS_IN_LONG - sh;
1464: ulong l;
1465: xd--;
1466: while (--lz) /* fill r[3..] */
1467: {
1468: l = *xd >> sh;
1469: *--rd = l | (*--xd << shl);
1470: }
1471: l = *xd >> sh;
1472: if (l) *--rd = l; else lr--;
1473: }
1474: *--rd = evalsigne(sx) | evallgefint(lr);
1475: *--rd = evaltyp(t_INT) | evallg(lr);
1476: avma = (long)rd; return (GEN)rd;
1477: }
1478:
1479: lr = lz+2;
1480: rd = NULL; /* gcc -Wall */
1481: if (lz)
1482: { /* non zero remainder: initialize rd */
1483: xd = (ulong*)(x + lx);
1484: if (!sh)
1485: {
1486: rd = (ulong*)avma; (void)new_chunk(lr);
1487: while (lz--) *--rd = *--xd;
1488: }
1489: else
1490: { /* shift remainder right by sh bits */
1491: const ulong shl = BITS_IN_LONG - sh;
1492: ulong l;
1493: rd = (ulong*)x; /* overwrite shifted y */
1494: xd--;
1495: while (--lz)
1496: {
1497: l = *xd >> sh;
1498: *--rd = l | (*--xd << shl);
1499: }
1500: l = *xd >> sh;
1501: if (l) *--rd = l; else lr--;
1502: }
1503: *--rd = evalsigne(sx) | evallgefint(lr);
1504: *--rd = evaltyp(t_INT) | evallg(lr);
1505: rd += lr;
1506: }
1507: qd = (ulong*)av;
1508: xd = (ulong*)(x + lq);
1509: if (x[1]) lq++;
1510: j = lq-2; while (j--) *--qd = *--xd;
1511: *--qd = evalsigne(sy) | evallgefint(lq);
1512: *--qd = evaltyp(t_INT) | evallg(lq);
1513: q = (GEN)qd;
1514: if (lr==2) *z = gzero;
1515: else
1516: { /* rd has been properly initialized: we had lz > 0 */
1517: while (lr--) *--qd = *--rd;
1518: *z = (GEN)qd;
1519: }
1520: avma = (long)qd; return q;
1521: }
1522:
1523: /* assume y > x > 0. return y mod x */
1524: ulong
1525: mppgcd_resiu(GEN y, ulong x)
1526: {
1527: long i, ly = lgefint(y);
1528: LOCAL_HIREMAINDER;
1529:
1530: hiremainder = 0;
1531: for (i=2; i<ly; i++) (void)divll(y[i],x);
1532: return hiremainder;
1533: }
1534:
1535: /* Assume x>y>0, both of them odd. return x-y if x=y mod 4, x+y otherwise */
1536: void
1537: mppgcd_plus_minus(GEN x, GEN y, GEN res)
1538: {
1539: long av = avma;
1540: long lx = lgefint(x)-1;
1541: long ly = lgefint(y)-1, lt,m,i;
1542: GEN t;
1543:
1544: if ((x[lx]^y[ly]) & 3) /* x != y mod 4*/
1545: t = addiispec(x+2,y+2,lx-1,ly-1);
1546: else
1547: t = subiispec(x+2,y+2,lx-1,ly-1);
1548:
1549: lt = lgefint(t)-1; while (!t[lt]) lt--;
1550: m = vals(t[lt]); lt++;
1551: if (m == 0) /* 2^32 | t */
1552: {
1553: for (i = 2; i < lt; i++) res[i] = t[i];
1554: }
1555: else if (t[2] >> m)
1556: {
1557: shift_right(res,t, 2,lt, 0,m);
1558: }
1559: else
1560: {
1561: lt--; t++;
1562: shift_right(res,t, 2,lt, t[1],m);
1563: }
1564: res[1] = evalsigne(1)|evallgefint(lt);
1565: avma = av;
1566: }
1567:
1568: /* private version of mulss */
1569: ulong
1570: smulss(ulong x, ulong y, ulong *rem)
1571: {
1572: LOCAL_HIREMAINDER;
1573: x=mulll(x,y); *rem=hiremainder; return x;
1574: }
1575:
1576: #ifdef LONG_IS_64BIT
1577: # define DIVCONVI 7
1578: #else
1579: # define DIVCONVI 14
1580: #endif
1581:
1582: /* convert integer --> base 10^9 [assume x > 0] */
1583: GEN
1584: convi(GEN x)
1585: {
1586: ulong av=avma, lim;
1587: long lz;
1588: GEN z,p1;
1589:
1590: lz = 3 + ((lgefint(x)-2)*15)/DIVCONVI;
1591: z=new_chunk(lz); z[1] = -1; p1 = z+2;
1592: lim = stack_lim(av,1);
1593: for(;;)
1594: {
1595: x = divis(x,1000000000); *p1++ = hiremainder;
1596: if (!signe(x)) { avma=av; return p1; }
1597: if (low_stack(lim, stack_lim(av,1))) x = gerepileuptoint((long)z,x);
1598: }
1599: }
1600: #undef DIVCONVI
1601:
1602: /* convert fractional part --> base 10^9 [assume expo(x) < 0] */
1603: GEN
1604: confrac(GEN x)
1605: {
1606: long lx=lg(x), ex = -expo(x)-1, d,m,ly,ey,lr,i,j;
1607: GEN y,y1;
1608:
1609: ey = ex + bit_accuracy(lx);
1610: ly = 1 + ((ey-1) >> TWOPOTBITS_IN_LONG);
1611: d = ex >> TWOPOTBITS_IN_LONG;
1612: m = ex & (BITS_IN_LONG-1);
1613: y = new_chunk(ly); y1 = y + (d-2);
1614: while (d--) y[d]=0;
1615: if (!m)
1616: for (i=2; i<lx; i++) y1[i] = x[i];
1617: else
1618: { /* shift x left sh bits */
1619: const ulong sh=BITS_IN_LONG-m;
1620: ulong k=0, l;
1621: for (i=2; i<lx; i++)
1622: {
1623: l = x[i];
1624: y1[i] = (l>>m)|k;
1625: k = l<<sh;
1626: }
1627: y1[i] = k;
1628: }
1629: lr = 1 + ((long) (ey*L2SL10))/9;
1630: y1 = new_chunk(lr);
1631: for (j=0; j<lr; j++)
1632: {
1633: LOCAL_HIREMAINDER;
1634: hiremainder=0;
1635: for (i=ly-1; i>=0; i--) y[i]=addmul(y[i],1000000000);
1636: y1[j]=hiremainder;
1637: }
1638: return y1;
1639: }
1640:
1641: GEN
1642: truedvmdii(GEN x, GEN y, GEN *z)
1643: {
1644: long av = avma;
1645: GEN r, q = dvmdii(x,y,&r); /* assume that r is last on stack */
1646: GEN *gptr[2];
1647:
1648: if (signe(r)>=0)
1649: {
1650: if (z == ONLY_REM) return gerepileuptoint(av,r);
1651: if (z) *z = r; else cgiv(r);
1652: return q;
1653: }
1654:
1655: if (z == ONLY_REM)
1656: {
1657: r = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2);
1658: return gerepileuptoint(av, r);
1659: }
1660: q = addsi(-signe(y),q);
1661: if (!z) return gerepileuptoint(av, q);
1662:
1663: *z = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2);
1664: gptr[0]=&q; gptr[1]=z; gerepilemanysp(av,(long)r,gptr,2);
1665: return q;
1666: }
1667:
1668: /* EXACT INTEGER DIVISION */
1669:
1670: /* Find c such that 1=c*b mod 2^BITS_IN_LONG, assuming b odd (unchecked) */
1671: static ulong
1672: invrev(ulong b)
1673: {
1674: ulong x;
1675: switch(b & 7)
1676: {
1677: case 3:
1678: case 5: x = b+8; break;
1679: default: x = b; break;
1680: } /* x = b^(-1) mod 2^4 */
1681: x = x*(2-b*x);
1682: x = x*(2-b*x);
1683: x = x*(2-b*x); /* b^(-1) mod 2^32 (Newton applied to 1/x - b = 0) */
1684: #ifdef LONG_IS_64BIT
1685: x = x*(2-b*x); /* b^(-1) mod 2^64 */
1686: #endif
1687: return x;
1688: }
1689:
1690: /* assume xy>0, y odd */
1691: GEN
1692: diviuexact(GEN x, ulong y)
1693: {
1694: long i,lz,lx;
1695: ulong q, yinv;
1696: GEN z, z0, x0, x0min;
1697:
1698: if (y == 1) return icopy(x);
1699: lx = lgefint(x);
1700: if (lx == 3) return stoi((ulong)x[2] / y);
1701: yinv = invrev(y);
1702: lz = (y <= (ulong)x[2]) ? lx : lx-1;
1703: z = new_chunk(lz);
1704: z0 = z + lz;
1705: x0 = x + lx; x0min = x + lx-lz+2;
1706:
1707: while (x0 > x0min)
1708: {
1709: *--z0 = q = yinv*((ulong)*--x0); /* i-th quotient */
1710: if (!q) continue;
1711: /* x := x - q * y */
1712: { /* update neither lowest word (could set it to 0) nor highest ones */
1713: register GEN x1 = x0 - 1;
1714: LOCAL_HIREMAINDER;
1715: (void)mulll(q,y);
1716: if (hiremainder)
1717: {
1718: if ((ulong)*x1 < hiremainder)
1719: {
1720: *x1 -= hiremainder;
1721: do (*--x1)--; while ((ulong)*x1 == MAXULONG);
1722: }
1723: else
1724: *x1 -= hiremainder;
1725: }
1726: }
1727: }
1728: i=2; while(!z[i]) i++;
1729: z += i-2; lz -= i-2;
1730: z[0] = evaltyp(t_INT)|evallg(lz);
1731: z[1] = evalsigne(1)|evallg(lz);
1732: avma = (ulong)z; return z;
1733: }
1734:
1735: /* Find z such that x=y*z, knowing that y | x (unchecked)
1736: * Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.
1737: * Set x := (x - z0 y) / B, updating only relevant words, and repeat */
1738: GEN
1739: diviiexact(GEN x, GEN y)
1740: {
1741: long av,lx,ly,lz,vy,i,ii,sx = signe(x), sy = signe(y);
1742: ulong y0inv,q;
1743: GEN z;
1744:
1745: if (!sy) err(dvmer1);
1746: if (!sx) return gzero;
1747: vy = vali(y); av = avma;
1748: (void)new_chunk(lgefint(x)); /* enough room for z */
1749: if (vy)
1750: { /* make y odd */
1751: #if 0
1752: if (vali(x) < vy) err(talker,"impossible division in diviiexact");
1753: #endif
1754: y = shifti(y,-vy);
1755: x = shifti(x,-vy);
1756: }
1757: else x = icopy(x); /* necessary because we destroy x */
1758: avma = av; /* will erase our x,y when exiting */
1759: /* now y is odd */
1760: ly = lgefint(y);
1761: if (ly == 3)
1762: {
1763: x = diviuexact(x,(ulong)y[2]);
1764: if (signe(x)) setsigne(x,sx*sy); /* should have x != 0 at this point */
1765: return x;
1766: }
1767: lx = lgefint(x); if (ly>lx) err(talker,"impossible division in diviiexact");
1768: y0inv = invrev(y[ly-1]);
1769: i=2; while (i<ly && y[i]==x[i]) i++;
1770: lz = (i==ly || (ulong)y[i] < (ulong)x[i]) ? lx-ly+3 : lx-ly+2;
1771: z = new_chunk(lz);
1772:
1773: y += ly - 1; /* now y[-i] = i-th word of y */
1774: for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
1775: {
1776: long limj;
1777: LOCAL_HIREMAINDER;
1778: LOCAL_OVERFLOW;
1779:
1780: z[i] = q = y0inv*((ulong)x[ii]); /* i-th quotient */
1781: if (!q) continue;
1782:
1783: /* x := x - q * y */
1784: (void)mulll(q,y[0]); limj = max(lx - lz, ii+3-ly);
1785: { /* update neither lowest word (could set it to 0) nor highest ones */
1786: register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;
1787: for (; x0 >= xlim; x0--, y0--)
1788: {
1789: *x0 = subll(*x0, addmul(q,*y0));
1790: hiremainder += overflow;
1791: }
1792: if (hiremainder && limj != lx - lz)
1793: {
1794: if ((ulong)*x0 < hiremainder)
1795: {
1796: *x0 -= hiremainder;
1797: do (*--x0)--; while ((ulong)*x0 == MAXULONG);
1798: }
1799: else
1800: *x0 -= hiremainder;
1801: }
1802: }
1803: }
1804: #if 0
1805: i=2; while(i<lz && !z[i]) i++;
1806: if (i==lz) err(bugparier,"diviiexact");
1807: #else
1808: i=2; while(!z[i]) i++;
1809: #endif
1810: z += i-2; lz -= (i-2);
1811: z[0] = evaltyp(t_INT)|evallg(lz);
1812: z[1] = evalsigne(sx*sy)|evallg(lz);
1813: avma = (ulong)z; return z;
1814: }
1815:
1816: long
1817: smodsi(long x, GEN y)
1818: {
1819: if (x<0) err(talker,"negative small integer in smodsi");
1820: (void)divsi(x,y); return hiremainder;
1821: }
1822:
1823: /* x and y are integers. Return 1 if |x| == |y|, 0 otherwise */
1824: int
1825: absi_equal(GEN x, GEN y)
1826: {
1827: long lx,i;
1828:
1829: if (!signe(x)) return !signe(y);
1830: if (!signe(y)) return 0;
1831:
1832: lx=lgefint(x); if (lx != lgefint(y)) return 0;
1833: i=2; while (i<lx && x[i]==y[i]) i++;
1834: return (i==lx);
1835: }
1836:
1837: /* x and y are integers. Return sign(|x| - |y|) */
1838: int
1839: absi_cmp(GEN x, GEN y)
1840: {
1841: long lx,ly,i;
1842:
1843: if (!signe(x)) return signe(y)? -1: 0;
1844: if (!signe(y)) return 1;
1845:
1846: lx=lgefint(x); ly=lgefint(y);
1847: if (lx>ly) return 1;
1848: if (lx<ly) return -1;
1849: i=2; while (i<lx && x[i]==y[i]) i++;
1850: if (i==lx) return 0;
1851: return ((ulong)x[i] > (ulong)y[i])? 1: -1;
1852: }
1853:
1854: /* x and y are reals. Return sign(|x| - |y|) */
1855: int
1856: absr_cmp(GEN x, GEN y)
1857: {
1858: long ex,ey,lx,ly,lz,i;
1859:
1860: if (!signe(x)) return signe(y)? -1: 0;
1861: if (!signe(y)) return 1;
1862:
1863: ex=expo(x); ey=expo(y);
1864: if (ex>ey) return 1;
1865: if (ex<ey) return -1;
1866:
1867: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
1868: i=2; while (i<lz && x[i]==y[i]) i++;
1869: if (i<lz) return ((ulong)x[i] > (ulong)y[i])? 1: -1;
1870: if (lx>=ly)
1871: {
1872: while (i<lx && !x[i]) i++;
1873: return (i==lx)? 0: 1;
1874: }
1875: while (i<ly && !y[i]) i++;
1876: return (i==ly)? 0: -1;
1877: }
1878:
1879: /********************************************************************/
1880: /** **/
1881: /** INTEGER MULTIPLICATION (KARATSUBA) **/
1882: /** **/
1883: /********************************************************************/
1884: #define _sqri_l 47
1885: #define _muli_l 25 /* optimal on PII 350MHz + gcc 2.7.2.1 (gp-dyn) */
1886:
1887: #if 0 /* for tunings */
1888: long KARATSUBA_SQRI_LIMIT = _sqri_l;
1889: long KARATSUBA_MULI_LIMIT = _muli_l;
1890:
1891: void setsqri(long a) { KARATSUBA_SQRI_LIMIT = a; }
1892: void setmuli(long a) { KARATSUBA_MULI_LIMIT = a; }
1893:
1894: GEN
1895: speci(GEN x, long nx)
1896: {
1897: GEN z;
1898: long i;
1899: if (!nx) return gzero;
1900: z = cgeti(nx+2); z[1] = evalsigne(1)|evallgefint(nx+2);
1901: for (i=0; i<nx; i++) z[i+2] = x[i];
1902: return z;
1903: }
1904: #else
1905: # define KARATSUBA_SQRI_LIMIT _sqri_l
1906: # define KARATSUBA_MULI_LIMIT _muli_l
1907: #endif
1908:
1909: /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
1910: #ifdef INLINE
1911: INLINE
1912: #endif
1913: GEN
1914: muliispec(GEN x, GEN y, long nx, long ny)
1915: {
1916: GEN z2e,z2d,yd,xd,ye,zd;
1917: long p1,lz;
1918: LOCAL_HIREMAINDER;
1919:
1920: if (!ny) return gzero;
1921: zd = (GEN)avma; lz = nx+ny+2;
1922: (void)new_chunk(lz);
1923: xd = x + nx;
1924: yd = y + ny;
1925: ye = yd; p1 = *--xd;
1926:
1927: *--zd = mulll(p1, *--yd); z2e = zd;
1928: while (yd > y) *--zd = addmul(p1, *--yd);
1929: *--zd = hiremainder;
1930:
1931: while (xd > x)
1932: {
1933: LOCAL_OVERFLOW;
1934: yd = ye; p1 = *--xd;
1935:
1936: z2d = --z2e;
1937: *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
1938: while (yd > y)
1939: {
1940: hiremainder += overflow;
1941: *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
1942: }
1943: *--zd = hiremainder + overflow;
1944: }
1945: if (*zd == 0) { zd++; lz--; } /* normalize */
1946: *--zd = evalsigne(1) | evallgefint(lz);
1947: *--zd = evaltyp(t_INT) | evallg(lz);
1948: avma=(long)zd; return zd;
1949: }
1950:
1951: #ifdef INLINE
1952: INLINE
1953: #endif
1954: GEN
1955: sqrispec(GEN x, long nx)
1956: {
1957: GEN z2e,z2d,yd,xd,zd,x0,z0;
1958: long p1,lz;
1959: LOCAL_HIREMAINDER;
1960:
1961: if (!nx) return gzero;
1962: zd = (GEN)avma; lz = (nx+1) << 1;
1963: z0 = new_chunk(lz);
1964: if (nx == 1)
1965: {
1966: *--zd = mulll(*x, *x);
1967: *--zd = hiremainder; goto END;
1968: }
1969: xd = x + nx;
1970:
1971: /* compute double products --> zd */
1972: p1 = *--xd; yd = xd; --zd;
1973: *--zd = mulll(p1, *--yd); z2e = zd;
1974: while (yd > x) *--zd = addmul(p1, *--yd);
1975: *--zd = hiremainder;
1976:
1977: x0 = x+1;
1978: while (xd > x0)
1979: {
1980: LOCAL_OVERFLOW;
1981: p1 = *--xd; yd = xd;
1982:
1983: z2e -= 2; z2d = z2e;
1984: *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
1985: while (yd > x)
1986: {
1987: hiremainder += overflow;
1988: *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
1989: }
1990: *--zd = hiremainder + overflow;
1991: }
1992: /* multiply zd by 2 (put result in zd - 1) */
1993: zd[-1] = ((*zd & HIGHBIT) != 0);
1994: shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);
1995:
1996: /* add the squares */
1997: xd = x + nx; zd = z0 + lz;
1998: p1 = *--xd;
1999: zd--; *zd = mulll(p1,p1);
2000: zd--; *zd = addll(hiremainder, *zd);
2001: while (xd > x)
2002: {
2003: p1 = *--xd;
2004: zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);
2005: zd--; *zd = addll(hiremainder + overflow, *zd);
2006: }
2007:
2008: END:
2009: if (*zd == 0) { zd++; lz--; } /* normalize */
2010: *--zd = evalsigne(1) | evallgefint(lz);
2011: *--zd = evaltyp(t_INT) | evallg(lz);
2012: avma=(long)zd; return zd;
2013: }
2014:
2015: /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
2016: static GEN
2017: addshiftw(GEN x, GEN y, long d)
2018: {
2019: GEN z,z0,y0,yd, zd = (GEN)avma;
2020: long a,lz,ly = lgefint(y);
2021:
2022: z0 = new_chunk(d);
2023: a = ly-2; yd = y+ly;
2024: if (a >= d)
2025: {
2026: y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
2027: a -= d;
2028: if (a)
2029: z = addiispec(x+2, y+2, lgefint(x)-2, a);
2030: else
2031: z = icopy(x);
2032: }
2033: else
2034: {
2035: y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
2036: while (zd >= z0) *--zd = 0; /* complete with 0s */
2037: z = icopy(x);
2038: }
2039: lz = lgefint(z)+d;
2040: z[1] = evalsigne(1) | evallgefint(lz);
2041: z[0] = evaltyp(t_INT) | evallg(lz); return z;
2042: }
2043:
2044: /* Fast product (Karatsuba) of integers. a and b are "special" GENs
2045: * c,c0,c1,c2 are genuine GENs.
2046: */
2047: static GEN
2048: quickmulii(GEN a, GEN b, long na, long nb)
2049: {
2050: GEN a0,c,c0;
2051: long av,n0,n0a,i;
2052:
2053: if (na < nb) swapspec(a,b, na,nb);
2054: if (nb == 1) return mulsispec(*b, a, na);
2055: if (nb == 0) return gzero;
2056: if (nb < KARATSUBA_MULI_LIMIT) return muliispec(a,b,na,nb);
2057: i=(na>>1); n0=na-i; na=i;
2058: av=avma; a0=a+na; n0a=n0;
2059: while (!*a0 && n0a) { a0++; n0a--; }
2060:
2061: if (n0a && nb > n0)
2062: { /* nb <= na <= n0 */
2063: GEN b0,c1,c2;
2064: long n0b;
2065:
2066: nb -= n0;
2067: c = quickmulii(a,b,na,nb);
2068: b0 = b+nb; n0b = n0;
2069: while (!*b0 && n0b) { b0++; n0b--; }
2070: if (n0b)
2071: {
2072: c0 = quickmulii(a0,b0, n0a,n0b);
2073:
2074: c2 = addiispec(a0,a, n0a,na);
2075: c1 = addiispec(b0,b, n0b,nb);
2076: c1 = quickmulii(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
2077: c2 = addiispec(c0+2, c+2, lgefint(c0)-2,lgefint(c) -2);
2078:
2079: c1 = subiispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
2080: }
2081: else
2082: {
2083: c0 = gzero;
2084: c1 = quickmulii(a0,b, n0a,nb);
2085: }
2086: c = addshiftw(c,c1, n0);
2087: }
2088: else
2089: {
2090: c = quickmulii(a,b,na,nb);
2091: c0 = quickmulii(a0,b,n0a,nb);
2092: }
2093: return gerepileuptoint(av, addshiftw(c,c0, n0));
2094: }
2095:
2096: /* actual operations will take place on a+2 and b+2: we strip the codewords */
2097: GEN
2098: mulii(GEN a,GEN b)
2099: {
2100: long sa,sb;
2101: GEN z;
2102:
2103: sa=signe(a); if (!sa) return gzero;
2104: sb=signe(b); if (!sb) return gzero;
2105: if (sb<0) sa = -sa;
2106: z = quickmulii(a+2,b+2, lgefint(a)-2,lgefint(b)-2);
2107: setsigne(z,sa); return z;
2108: }
2109:
2110: GEN
2111: resiimul(GEN x, GEN sy)
2112: {
2113: GEN r, q, y = (GEN)sy[1], invy;
2114: long av = avma, k;
2115:
2116: k = cmpii(x, y);
2117: if (k <= 0) return k? icopy(x): gzero;
2118: invy = (GEN)sy[2];
2119: q = mulir(x,invy);
2120: q = mptrunc(q); /* <= divii(x, y) (at most 1 less) */
2121: r = subii(x, mulii(y,q));
2122: /* resii(x,y) + y >= r >= resii(x,y) */
2123: k = cmpii(r, y);
2124: if (k >= 0)
2125: {
2126: if (k == 0) { avma = av; return gzero; }
2127: r = subiispec(r+2, y+2, lgefint(r)-2, lgefint(y)-2);
2128: }
2129: #if 0
2130: q = subii(r,resii(x,y));
2131: if (signe(q))
2132: err(talker,"bug in resiimul: x = %Z\ny = %Z\ndif = %Z", x,y,q);
2133: #endif
2134: return gerepileuptoint(av, r); /* = resii(x, y) */
2135: }
2136:
2137: /* x % (2^n), assuming x, n >= 0 */
2138: GEN
2139: resmod2n(GEN x, long n)
2140: {
2141: long hi,l,k,lx,ly;
2142: GEN z, xd, zd;
2143:
2144: if (!signe(x) || !n) return gzero;
2145:
2146: l = n & (BITS_IN_LONG-1); /* n % BITS_IN_LONG */
2147: k = n >> TWOPOTBITS_IN_LONG; /* n / BITS_IN_LONG */
2148: lx = lgefint(x);
2149: if (lx < k+3) return icopy(x);
2150:
2151: xd = x + (lx-k-1);
2152: /* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words
2153: * ^--- initial xd */
2154: hi = *xd & ((1<<l) - 1); /* last l bits of # = top bits of result */
2155: if (!hi)
2156: { /* strip leading zeroes from result */
2157: xd++; while (k && !*xd) { k--; xd++; }
2158: if (!k) return gzero;
2159: ly = k+2; xd--;
2160: }
2161: else
2162: ly = k+3;
2163:
2164: zd = z = cgeti(ly);
2165: *++zd = evalsigne(1) | evallgefint(ly);
2166: if (hi) *++zd = hi;
2167: for ( ;k; k--) *++zd = *++xd;
2168: return z;
2169: }
2170:
2171: static GEN
2172: quicksqri(GEN a, long na)
2173: {
2174: GEN a0,c;
2175: long av,n0,n0a,i;
2176:
2177: if (na < KARATSUBA_SQRI_LIMIT) return sqrispec(a,na);
2178: i=(na>>1); n0=na-i; na=i;
2179: av=avma; a0=a+na; n0a=n0;
2180: while (!*a0 && n0a) { a0++; n0a--; }
2181: c = quicksqri(a,na);
2182: if (n0a)
2183: {
2184: GEN t, c1, c0 = quicksqri(a0,n0a);
2185: #if 0
2186: c1 = shifti(quickmulii(a0,a, n0a,na),1);
2187: #else /* slower !!! */
2188: t = addiispec(a0,a,n0a,na);
2189: t = quicksqri(t+2,lgefint(t)-2);
2190: c1= addiispec(c0+2,c+2, lgefint(c0)-2, lgefint(c)-2);
2191: c1= subiispec(t+2, c1+2, lgefint(t)-2, lgefint(c1)-2);
2192: #endif
2193: c = addshiftw(c,c1, n0);
2194: c = addshiftw(c,c0, n0);
2195: }
2196: else
2197: c = addshiftw(c,gzero,n0<<1);
2198: return gerepileuptoint(av, c);
2199: }
2200:
2201: GEN
2202: sqri(GEN a) { return quicksqri(a+2, lgefint(a)-2); }
2203:
2204: #define MULRR_LIMIT 32
2205: #define MULRR2_LIMIT 32
2206:
2207: #if 0
2208: GEN
2209: karamulrr1(GEN x, GEN y)
2210: {
2211: long sx,sy;
2212: long i,i1,i2,lx=lg(x),ly=lg(y),e,flag,garde;
2213: long lz2,lz3,lz4;
2214: GEN z,lo1,lo2,hi;
2215:
2216: /* ensure that lg(y) >= lg(x) */
2217: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
2218: if (lx < MULRR_LIMIT) return mulrr(x,y);
2219: sx=signe(x); sy=signe(y);
2220: e = evalexpo(expo(x)+expo(y));
2221: if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; }
2222: if (sy<0) sx = -sx;
2223: ly=lx+flag; z=cgetr(lx);
2224: lz2 = (lx>>1); lz3 = lx-lz2;
2225: x += 2; lx -= 2;
2226: y += 2; ly -= 2;
2227: hi = quickmulii(x,y,lz2,lz2);
2228: i1=lz2; while (i1<lx && !x[i1]) i1++;
2229: lo1 = quickmulii(y,x+i1,lz2,lx-i1);
2230: i2=lz2; while (i2<ly && !y[i2]) i2++;
2231: lo2 = quickmulii(x,y+i2,lz2,ly-i2);
2232:
2233: if (signe(lo1))
2234: {
2235: if (flag) { lo2 = addshiftw(lo1,lo2,1); lz3++; } else lo2=addii(lo1,lo2);
2236: }
2237: lz4=lgefint(lo2)-lz3;
2238: if (lz4>0) hi = addiispec(hi+2,lo2+2, lgefint(hi)-2,lz4);
2239: if (hi[2] < 0)
2240: {
2241: e++; garde=hi[lx+2];
2242: for (i=lx+1; i>=2 ; i--) z[i]=hi[i];
2243: }
2244: else
2245: {
2246: garde = (hi[lx+2] << 1);
2247: shift_left(z,hi,2,lx+1, garde, 1);
2248: }
2249: z[1]=evalsigne(sx) | e;
2250: if (garde < 0)
2251: { /* round to nearest */
2252: i=lx+2; do z[--i]++; while (z[i]==0);
2253: if (i==1) z[2]=HIGHBIT;
2254: }
2255: avma=(long)z; return z;
2256: }
2257:
2258: GEN
2259: karamulrr2(GEN x, GEN y)
2260: {
2261: long sx,sy,i,lx=lg(x),ly=lg(y),e,flag,garde;
2262: GEN z,hi;
2263:
2264: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
2265: if (lx < MULRR2_LIMIT) return mulrr(x,y);
2266: ly=lx+flag; sx=signe(x); sy=signe(y);
2267: e = evalexpo(expo(x)+expo(y));
2268: if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; }
2269: if (sy<0) sx = -sx;
2270: z=cgetr(lx);
2271: hi=quickmulii(y+2,x+2,ly-2,lx-2);
2272: if (hi[2] < 0)
2273: {
2274: e++; garde=hi[lx];
2275: for (i=2; i<lx ; i++) z[i]=hi[i];
2276: }
2277: else
2278: {
2279: garde = (hi[lx] << 1);
2280: shift_left(z,hi,2,lx-1, garde, 1);
2281: }
2282: z[1]=evalsigne(sx) | e;
2283: if (garde < 0)
2284: { /* round to nearest */
2285: i=lx; do z[--i]++; while (z[i]==0);
2286: if (i==1) z[2]=HIGHBIT;
2287: }
2288: avma=(long)z; return z;
2289: }
2290:
2291: GEN
2292: karamulrr(GEN x, GEN y, long flag)
2293: {
2294: switch(flag)
2295: {
2296: case 1: return karamulrr1(x,y);
2297: case 2: return karamulrr2(x,y);
2298: default: err(flagerr,"karamulrr");
2299: }
2300: return NULL; /* not reached */
2301: }
2302:
2303: GEN
2304: karamulir(GEN x, GEN y, long flag)
2305: {
2306: long sx=signe(x),lz,i;
2307: GEN z,temp,z1;
2308:
2309: if (!sx) return gzero;
2310: lz=lg(y); z=cgetr(lz);
2311: temp=cgetr(lz+1); affir(x,temp);
2312: z1=karamulrr(temp,y,flag);
2313: for (i=1; i<lz; i++) z[i]=z1[i];
2314: avma=(long)z; return z;
2315: }
2316: #endif
2317:
2318: #ifdef LONG_IS_64BIT
2319:
2320: #if PARI_BYTE_ORDER == LITTLE_ENDIAN_64 || PARI_BYTE_ORDER == BIG_ENDIAN_64
2321: #else
2322: error... unknown machine
2323: #endif
2324:
2325: GEN
2326: dbltor(double x)
2327: {
2328: GEN z = cgetr(3);
2329: long e;
2330: union { double f; ulong i; } fi;
2331: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2332: const int exp_mid = 0x3ff;/* exponent bias */
2333: const int expo_len = 11; /* number of bits of exponent */
2334: LOCAL_HIREMAINDER;
2335:
2336: if (x==0) { z[1]=evalexpo(-308); z[2]=0; return z; }
2337: fi.f = x;
2338: e = ((fi.i & (HIGHBIT-1)) >> mant_len) - exp_mid;
2339: z[1] = evalexpo(e) | evalsigne(x<0? -1: 1);
2340: z[2] = (fi.i << expo_len) | HIGHBIT;
2341: return z;
2342: }
2343:
2344: double
2345: rtodbl(GEN x)
2346: {
2347: long ex,s=signe(x);
2348: ulong a;
2349: union { double f; ulong i; } fi;
2350: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2351: const int exp_mid = 0x3ff;/* exponent bias */
2352: const int expo_len = 11; /* number of bits of exponent */
2353: LOCAL_HIREMAINDER;
2354:
2355: if (typ(x)==t_INT && !s) return 0.0;
2356: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
2357: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
2358:
2359: /* start by rounding to closest */
2360: a = (x[2] & (HIGHBIT-1)) + 0x400;
2361: if (a & HIGHBIT) { ex++; a=0; }
2362: if (ex >= exp_mid) err(rtodber);
2363: fi.i = ((ex + exp_mid) << mant_len) | (a >> expo_len);
2364: if (s<0) fi.i |= HIGHBIT;
2365: return fi.f;
2366: }
2367:
2368: #else
2369:
2370: #if PARI_BYTE_ORDER == LITTLE_ENDIAN
2371: # define INDEX0 1
2372: # define INDEX1 0
2373: #elif PARI_BYTE_ORDER == BIG_ENDIAN
2374: # define INDEX0 0
2375: # define INDEX1 1
2376: #else
2377: error... unknown machine
2378: #endif
2379:
2380: GEN
2381: dbltor(double x)
2382: {
2383: GEN z;
2384: long e;
2385: union { double f; ulong i[2]; } fi;
2386: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2387: const int exp_mid = 0x3ff;/* exponent bias */
2388: const int shift = mant_len-32;
2389: const int expo_len = 11; /* number of bits of exponent */
2390:
2391: if (x==0) { z=cgetr(3); z[1]=evalexpo(-308); z[2]=0; return z; }
2392: fi.f = x; z=cgetr(4);
2393: {
2394: const ulong a = fi.i[INDEX0];
2395: const ulong b = fi.i[INDEX1];
2396: e = ((a & (HIGHBIT-1)) >> shift) - exp_mid;
2397: z[1] = evalexpo(e) | evalsigne(x<0? -1: 1);
2398: z[3] = b << expo_len;
2399: z[2] = HIGHBIT | b >> (BITS_IN_LONG-expo_len) | (a << expo_len);
2400: }
2401: return z;
2402: }
2403:
2404: double
2405: rtodbl(GEN x)
2406: {
2407: long ex,s=signe(x),lx=lg(x);
2408: ulong a,b,k;
2409: union { double f; ulong i[2]; } fi;
2410: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2411: const int exp_mid = 0x3ff;/* exponent bias */
2412: const int shift = mant_len-32;
2413: const int expo_len = 11; /* number of bits of exponent */
2414:
2415: if (typ(x)==t_INT && !s) return 0.0;
2416: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
2417: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
2418:
2419: /* start by rounding to closest */
2420: a = x[2] & (HIGHBIT-1);
2421: if (lx > 3)
2422: {
2423: b = x[3] + 0x400UL; if (b < 0x400UL) a++;
2424: if (a & HIGHBIT) { ex++; a=0; }
2425: }
2426: else b = 0;
2427: if (ex > exp_mid) err(rtodber);
2428: ex += exp_mid;
2429: k = (a >> expo_len) | (ex << shift);
2430: if (s<0) k |= HIGHBIT;
2431: fi.i[INDEX0] = k;
2432: fi.i[INDEX1] = (a << (BITS_IN_LONG-expo_len)) | (b >> expo_len);
2433: return fi.f;
2434: }
2435: #endif
2436:
2437: /* Old cgiv without reference count (which was not used anyway)
2438: * Should be a macro.
2439: */
2440: void
2441: cgiv(GEN x)
2442: {
2443: if (x == (GEN) avma)
2444: avma = (long) (x+lg(x));
2445: }
2446:
2447: /********************************************************************/
2448: /** **/
2449: /** INTEGER EXTENDED GCD (AND INVMOD) **/
2450: /** **/
2451: /********************************************************************/
2452:
2453: /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
2454: * in the context of trying to improve elliptic curve cryptosystem attacking
2455: * algorithms. 2001Jan02 -- added bezout() functionality.
2456: *
2457: * Two basic ideas - (1) avoid many integer divisions, especially when the
2458: * quotient is 1 (which happens more than 40% of the time). (2) Use Lehmer's
2459: * trick as modified by Jebelean of extracting a couple of words' worth of
2460: * leading bits from both operands, and compute partial quotients from them
2461: * as long as we can be sure of their values. The Jebelean modifications
2462: * consist in reliable inequalities from which we can decide fast whether
2463: * to carry on or to return to the outer loop, and in re-shifting after the
2464: * first word's worth of bits has been used up. All of this is described
2465: * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
2466: * quite right (the catch-up divisions needed when one partial quotient is
2467: * larger than a word are missing).
2468: *
2469: * The API consists of invmod() and bezout() below; the single-word routines
2470: * xgcduu and xxgcduu may be called directly if desired; lgcdii() probably
2471: * doesn't make much sense out of context.
2472: *
2473: * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
2474: * ptotically about a factor 2.5 .. 3, depending on processor architecture,
2475: * than the naive continued-division code. Unfortunately, thanks to the
2476: * unrolled loops and all, the code is a bit lengthy.
2477: */
2478:
2479: /*==================================
2480: * xgcduu(d,d1,f,v,v1,s)
2481: * xxgcduu(d,d1,f,u,u1,v,v1,s)
2482: * rgcduu(d,d1,vmax,u,u1,v,v1,s)
2483: *==================================*/
2484: /*
2485: * Fast `final' extended gcd algorithm, acting on two ulongs. Ideally this
2486: * should be replaced with assembler versions wherever possible. The present
2487: * code essentially does `subtract, compare, and possibly divide' at each step,
2488: * which is reasonable when hardware division (a) exists, (b) is a bit slowish
2489: * and (c) does not depend a lot on the operand values (as on i486). When
2490: * wordsize division is in fact an assembler routine based on subtraction,
2491: * this strategy may not be the most efficient one.
2492: *
2493: * xxgcduu() should be called with d > d1 > 0, returns gcd(d,d1), and assigns
2494: * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1] (i.e.,
2495: * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
2496: * the quotient of the first division step), and the information about the
2497: * implied signs to s (-1 when an odd number of divisions has been done,
2498: * 1 otherwise). xgcduu() is exactly the same except that u,u1 are not com-
2499: * puted (and not returned, of course).
2500: *
2501: * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
2502: * (so we can stop the chain division one step early: as soon as the remainder
2503: * equals 1). Use this when you intend to use only what would be v, or only
2504: * what would be u and v, after that final division step, but not u1 and v1.
2505: * With the flag in force and thus without that final step, the interesting
2506: * quantity/ies will still sit in [u1 and] v1, of course.
2507: *
2508: * For computing the inverse of a single-word INTMOD known to exist, pass f=1
2509: * to xgcduu(), and obtain the result from s and v1. (The routine does the
2510: * right thing when d1==1 already.) For finishing a multiword modinv known
2511: * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix (with
2512: * properly adjusted signs) onto the values v' and v1' previously obtained
2513: * from the multiword division steps. Actually, just take the scalar product
2514: * of [v',v1'] with [u1,-v1], and change the sign if s==-1. (If the final
2515: * step had been carried out, it would be [-u,v], and s would also change.)
2516: * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
2517: * Finally, f=0 with xxgcduu() is useful for Bezout computations.
2518: * [Harrumph. In the above prescription, the sign turns out to be precisely
2519: * wrong.]
2520: * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
2521: * make a difference when gcd(d,d1)>1. The speedup is negligible.)
2522: *
2523: * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
2524: * recover the final u,u1 given only v,v1 and s. However, it probably isn't
2525: * worthwhile, as it trades a few multiplications for a division.
2526: *
2527: * Note that these routines do not know and do not need to know about the
2528: * PARI stack.
2529: *
2530: * Added 2001Jan15:
2531: * rgcduu() is a variant of xxgcduu() which does not have f (the effect is
2532: * that of f=0), but instead has a ulong vmax parameter, for use in rational
2533: * reconstruction below. It returns when v1 exceeds vmax; v will never
2534: * exceed vmax. (vmax=0 is taken as a synonym of MAXULONG i.e. unlimited,
2535: * in which case rgcduu behaves exactly like xxgcduu with f=0.) The return
2536: * value of rgcduu() is typically meaningless; the interesting part is the
2537: * matrix.
2538: */
2539:
2540: ulong
2541: xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
2542: {
2543: ulong xv,xv1, xs, q,res;
2544: LOCAL_HIREMAINDER;
2545:
2546: /* The above blurb contained a lie. The main loop always stops when d1
2547: * has become equal to 1. If (d1 == 1 && !(f&1)) after the loop, we do
2548: * the final `division' of d by 1 `by hand' as it were.
2549: *
2550: * The loop has already been unrolled once. Aggressive optimization could
2551: * well lead to a totally unrolled assembler version...
2552: *
2553: * On modern x86 architectures, this loop is a pig anyway. The division
2554: * instruction always puts its result into the same pair of registers, and
2555: * we always want to use one of them straight away, so pipeline performance
2556: * will suck big time. An assembler version should probably do a first loop
2557: * computing and storing all the quotients -- their number is bounded in
2558: * advance -- and then assembling the matrix in a second pass. On other
2559: * architectures where we can cycle through four or so groups of registers
2560: * and exploit a fast ALU result-to-operand feedback path, this is much less
2561: * of an issue. (Intel sucks. See http://www.x86.org/ ...)
2562: */
2563: xs = res = 0;
2564: xv = 0UL; xv1 = 1UL;
2565: while (d1 > 1UL)
2566: {
2567: d -= d1; /* no need to use subll */
2568: if (d >= d1)
2569: {
2570: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
2571: xv += q * xv1;
2572: }
2573: else
2574: xv += xv1;
2575: /* possible loop exit */
2576: if (d <= 1UL) { xs=1; break; }
2577: /* repeat with inverted roles */
2578: d1 -= d;
2579: if (d1 >= d)
2580: {
2581: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
2582: xv1 += q * xv;
2583: }
2584: else
2585: xv1 += xv;
2586: } /* while */
2587:
2588: if (!(f&1)) /* division by 1 postprocessing if needed */
2589: {
2590: if (xs && d==1)
2591: { xv1 += d1 * xv; xs = 0; res = 1UL; }
2592: else if (!xs && d1==1)
2593: { xv += d * xv1; xs = 1; res = 1UL; }
2594: }
2595:
2596: if (xs)
2597: {
2598: *s = -1; *v = xv1; *v1 = xv;
2599: return (res ? res : (d==1 ? 1UL : d1));
2600: }
2601: else
2602: {
2603: *s = 1; *v = xv; *v1 = xv1;
2604: return (res ? res : (d1==1 ? 1UL : d));
2605: }
2606: }
2607:
2608:
2609: ulong
2610: xxgcduu(ulong d, ulong d1, int f,
2611: ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
2612: {
2613: ulong xu,xu1, xv,xv1, xs, q,res;
2614: LOCAL_HIREMAINDER;
2615:
2616: xs = res = 0;
2617: xu = xv1 = 1UL;
2618: xu1 = xv = 0UL;
2619: while (d1 > 1UL)
2620: {
2621: d -= d1; /* no need to use subll */
2622: if (d >= d1)
2623: {
2624: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
2625: xv += q * xv1;
2626: xu += q * xu1;
2627: }
2628: else
2629: { xv += xv1; xu += xu1; }
2630: /* possible loop exit */
2631: if (d <= 1UL) { xs=1; break; }
2632: /* repeat with inverted roles */
2633: d1 -= d;
2634: if (d1 >= d)
2635: {
2636: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
2637: xv1 += q * xv;
2638: xu1 += q * xu;
2639: }
2640: else
2641: { xv1 += xv; xu1 += xu; }
2642: } /* while */
2643:
2644: if (!(f&1)) /* division by 1 postprocessing if needed */
2645: {
2646: if (xs && d==1)
2647: {
2648: xv1 += d1 * xv;
2649: xu1 += d1 * xu;
2650: xs = 0; res = 1UL;
2651: }
2652: else if (!xs && d1==1)
2653: {
2654: xv += d * xv1;
2655: xu += d * xu1;
2656: xs = 1; res = 1UL;
2657: }
2658: }
2659:
2660: if (xs)
2661: {
2662: *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
2663: return (res ? res : (d==1 ? 1UL : d1));
2664: }
2665: else
2666: {
2667: *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
2668: return (res ? res : (d1==1 ? 1UL : d));
2669: }
2670: }
2671:
2672: ulong
2673: rgcduu(ulong d, ulong d1, ulong vmax,
2674: ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
2675: {
2676: ulong xu,xu1, xv,xv1, xs, q, res=0;
2677: int f = 0;
2678: LOCAL_HIREMAINDER;
2679:
2680: if (vmax == 0) vmax = MAXULONG;
2681: xs = res = 0;
2682: xu = xv1 = 1UL;
2683: xu1 = xv = 0UL;
2684: while (d1 > 1UL)
2685: {
2686: d -= d1; /* no need to use subll */
2687: if (d >= d1)
2688: {
2689: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
2690: xv += q * xv1;
2691: xu += q * xu1;
2692: }
2693: else
2694: { xv += xv1; xu += xu1; }
2695: /* possible loop exit */
2696: if (xv > vmax) { f=xs=1; break; }
2697: if (d <= 1UL) { xs=1; break; }
2698: /* repeat with inverted roles */
2699: d1 -= d;
2700: if (d1 >= d)
2701: {
2702: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
2703: xv1 += q * xv;
2704: xu1 += q * xu;
2705: }
2706: else
2707: { xv1 += xv; xu1 += xu; }
2708: /* possible loop exit */
2709: if (xv1 > vmax) { f=1; break; }
2710: } /* while */
2711:
2712: if (!(f&1)) /* division by 1 postprocessing if needed */
2713: {
2714: if (xs && d==1)
2715: {
2716: xv1 += d1 * xv;
2717: xu1 += d1 * xu;
2718: xs = 0; res = 1UL;
2719: }
2720: else if (!xs && d1==1)
2721: {
2722: xv += d * xv1;
2723: xu += d * xu1;
2724: xs = 1; res = 1UL;
2725: }
2726: }
2727:
2728: if (xs)
2729: {
2730: *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
2731: return (res ? res : (d==1 ? 1UL : d1));
2732: }
2733: else
2734: {
2735: *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
2736: return (res ? res : (d1==1 ? 1UL : d));
2737: }
2738: }
2739:
2740: /*==================================
2741: * lgcdii(d,d1,u,u1,v,v1,vmax)
2742: *==================================*/
2743: /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
2744: *
2745: * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
2746: * and a quantity of bits from d1 obtained by a shift of the same displacement,
2747: * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
2748: * the product of all the [0, 1; 1, q_j] thus obtained, where the leftmost
2749: * factor arises from the quotient of the first division step.
2750: *
2751: * For use in rational reconstruction, input param vmax can be given a
2752: * nonzero value. In this case, we will return early as soon as v1 > vmax
2753: * (i.e. it is guaranteed that v <= vmax). --2001Jan15
2754: *
2755: * MUST be called with d > d1 > 0, and with d occupying more than one
2756: * significant word (if it doesn't, the caller has no business with us;
2757: * he/she/it should use xgcduu() instead). Returns the number of reduction/
2758: * swap steps carried out, possibly zero, or under certain conditions minus
2759: * that number. When the return value is nonzero, the caller should use the
2760: * returned recurrence matrix to update its own copies of d,d1. When the
2761: * return value is non-positive, and the latest remainder after updating
2762: * turns out to be nonzero, the caller should at once attempt a full division,
2763: * rather than first trying lgcdii() again -- this typically happens when we
2764: * are about to encounter a quotient larger than half a word. (This is not
2765: * detected infallibly -- after a positive return value, it is perfectly
2766: * possible that the next stage will end up needing a full division. After
2767: * a negative return value, however, this is certain, and should be acted
2768: * upon.)
2769: *
2770: * (The sign information, for which xgcduu() has its return argument s, is now
2771: * implicit in the LSB of our return value, and the caller may take advantage
2772: * of the fact that a return value of +-1 implies u==0,u1==v==1 [only v1 pro-
2773: * vides interesting information in this case]. One might also use the fact
2774: * that if the return value is +-2, then u==1, but this is rather marginal.)
2775: *
2776: * If it was not possible to determine even the first quotient, either because
2777: * we're too close to an integer quotient or because the quotient would be
2778: * larger than one word (if the `leading digit' of d1 after shifting is all
2779: * zeros), we return 0 and do not bother to assign anything to the last four
2780: * args.
2781: *
2782: * The division chain might (sometimes) even run to completion. It will be
2783: * up to the caller to detect this case.
2784: *
2785: * This routine does _not_ change d or d1; this will also be up to the caller.
2786: *
2787: * Note that this routine does not know and does not need to know about the
2788: * PARI stack.
2789: */
2790: /*#define DEBUG_LEHMER 1 */
2791: int
2792: lgcdii(ulong* d, ulong* d1,
2793: ulong* u, ulong* u1, ulong* v, ulong* v1,
2794: ulong vmax)
2795: {
2796: /* Strategy: (1) Extract/shift most significant bits. We assume that d
2797: * has at least two significant words, but we can cope with a one-word d1.
2798: * Let dd,dd1 be the most significant dividend word and matching part of the
2799: * divisor.
2800: * (2) Check for overflow on the first division. For our purposes, this
2801: * happens when the upper half of dd1 is zero. (Actually this is detected
2802: * during extraction.)
2803: * (3) Get a fix on the first quotient. We compute q = floor(dd/dd1), which
2804: * is an upper bound for floor(d/d1), and which gives the true value of the
2805: * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
2806: * (If it isn't, we give up. This is annoying because the subsequent full
2807: * division will repeat some work already done, but it happens very infre-
2808: * quently. Doing the extra-bit-fetch in this case would be awkward.)
2809: * (4) Finish initializations.
2810: *
2811: * The remainder of the action is comparatively boring... The main loop has
2812: * been unrolled once (so we don't swap things and we can apply Jebelean's
2813: * termination conditions which alternatingly take two different forms during
2814: * successive iterations). When we first run out of sufficient bits to form
2815: * a quotient, and have an extra word of each operand, we pull out two whole
2816: * word's worth of dividend bits, and divisor bits of matching significance;
2817: * to these we apply our partial matrix (disregarding overflow because the
2818: * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
2819: * re-extract one word's worth of the current dividend and a matching amount
2820: * of divisor bits. The affair will normally terminate with matrix entries
2821: * just short of a whole word. (We terminate the inner loop before these can
2822: * possibly overflow.)
2823: */
2824: ulong dd,dd1,ddlo,dd1lo, sh,shc; /* `digits', shift count */
2825: ulong xu,xu1, xv,xv1, q,res; /* recurrences, partial quotient, count */
2826: ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
2827: long ld, ld1, lz; /* t_INT effective lengths */
2828: int skip = 0; /* a boolean flag */
2829: LOCAL_OVERFLOW;
2830: LOCAL_HIREMAINDER;
2831:
2832: #ifdef DEBUG_LEHMER
2833: voir(d, -1); voir(d1, -1);
2834: #endif
2835: /* following is just for convenience: vmax==0 means no bound */
2836: if (vmax == 0) vmax = MAXULONG;
2837: ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
2838: if (lz > 1) return 0; /* rare, quick and desperate exit */
2839:
2840: d += 2; d1 += 2; /* point at the leading `digits' */
2841: dd1lo = 0; /* unless we find something better */
2842: sh = bfffo(*d); /* obtain dividend left shift count */
2843:
2844: if (sh)
2845: { /* do the shifting */
2846: shc = BITS_IN_LONG - sh;
2847: if (lz)
2848: { /* dividend longer than divisor */
2849: dd1 = (*d1 >> shc);
2850: if (!(HIGHMASK & dd1)) return 0; /* overflow detected */
2851: if (ld1 > 3)
2852: dd1lo = (*d1 << sh) + (d1[1] >> shc);
2853: else
2854: dd1lo = (*d1 << sh);
2855: }
2856: else
2857: { /* dividend and divisor have the same length */
2858: /* dd1 = shiftl(d1,sh) would have left hiremainder==0, and dd1 != 0. */
2859: dd1 = (*d1 << sh);
2860: if (!(HIGHMASK & dd1)) return 0;
2861: if (ld1 > 3)
2862: {
2863: dd1 += (d1[1] >> shc);
2864: if (ld1 > 4)
2865: dd1lo = (d1[1] << sh) + (d1[2] >> shc);
2866: else
2867: dd1lo = (d1[1] << sh);
2868: }
2869: }
2870: /* following lines assume d to have 2 or more significant words */
2871: dd = (*d << sh) + (d[1] >> shc);
2872: if (ld > 4)
2873: ddlo = (d[1] << sh) + (d[2] >> shc);
2874: else
2875: ddlo = (d[1] << sh);
2876: }
2877: else
2878: { /* no shift needed */
2879: if (lz) return 0; /* div'd longer than div'r: o'flow automatic */
2880: dd1 = *d1;
2881: if (!(HIGHMASK & dd1)) return 0;
2882: if(ld1 > 3) dd1lo = d1[1];
2883: /* assume again that d has another significant word */
2884: dd = *d; ddlo = d[1];
2885: }
2886: #ifdef DEBUG_LEHMER
2887: fprintferr(" %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
2888: #endif
2889:
2890: /* First subtraction/division stage. (If a subtraction initially suffices,
2891: * we don't divide at all.) If a Jebelean condition is violated, and we
2892: * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
2893: * give up and ask for a full division. Otherwise we commit the result,
2894: * possibly deciding to re-shift immediately afterwards.
2895: */
2896: dd -= dd1;
2897: if (dd < dd1)
2898: { /* first quotient known to be == 1 */
2899: xv1 = 1UL;
2900: if (!dd) /* !(Jebelean condition), extraspecial case */
2901: { /* note this can actually happen... Now
2902: * q==1 is known, but we underflow already.
2903: * OTOH we've just shortened d by a whole word.
2904: * Thus we feel pleased with ourselves and
2905: * return. (The re-shift code below would
2906: * do so anyway.) */
2907: *u = 0; *v = *u1 = *v1 = 1UL;
2908: return -1; /* Next step will be a full division. */
2909: } /* if !(Jebelean) then */
2910: }
2911: else
2912: { /* division indicated */
2913: hiremainder = 0;
2914: xv1 = 1 + divll(dd, dd1); /* xv1: alternative spelling of `q', here ;) */
2915: dd = hiremainder;
2916: if (dd < xv1) /* !(Jebelean cond'), non-extra special case */
2917: { /* Attempt to complete the division using the
2918: * less significant bits, before skipping right
2919: * past the 1st loop to the reshift stage. */
2920: ddlo = subll(ddlo, mulll(xv1, dd1lo));
2921: dd = subllx(dd, hiremainder);
2922:
2923: /* If we now have an overflow, q was _certainly_ too large. Thanks to
2924: * our decision not to get here unless the original dd1 had bits set in
2925: * the upper half of the word, however, we now do know that the correct
2926: * quotient is in fact q-1. Adjust our data accordingly. */
2927: if (overflow)
2928: {
2929: xv1--;
2930: ddlo = addll(ddlo,dd1lo);
2931: dd = addllx(dd,dd1); /* overflows again which cancels the borrow */
2932: /* ...and fall through to skip=1 below */
2933: }
2934: else
2935: /* Test Jebelean condition anew, at this point using _all_ the extracted
2936: * bits we have. This is clutching at straws; we have a more or less
2937: * even chance of succeeding this time. Note that if we fail, we really
2938: * do not know whether the correct quotient would have been q or some
2939: * smaller value. */
2940: if (!dd && ddlo < xv1) return 0;
2941:
2942: /* Otherwise, we now know that q is correct, but we cannot go into the
2943: * 1st loop. Raise a flag so we'll remember to skip past the loop.
2944: * Get here also after the q-1 adjustment case. */
2945: skip = 1;
2946: } /* if !(Jebelean) then */
2947: }
2948: res = 1;
2949: #ifdef DEBUG_LEHMER
2950: fprintferr(" q = %ld, %lx, %lx\n", xv1, dd1, dd);
2951: #endif
2952: if (xv1 > vmax)
2953: { /* gone past the bound already */
2954: *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
2955: return res;
2956: }
2957: xu = 0UL; xv = xu1 = 1UL;
2958:
2959: /* Some invariants from here across the first loop:
2960: *
2961: * At this point, and again after we are finished with the first loop and
2962: * subsequent conditional, a division and the associated update of the
2963: * recurrence matrix have just been carried out completely. The matrix
2964: * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
2965: * columns), and the current remainder == next divisor (dd at the moment)
2966: * is nonzero (it might be zero here, but then skip will have been set).
2967: *
2968: * After the first loop, or when skip is set already, it will also be the
2969: * case that there aren't sufficiently many bits to continue without re-
2970: * shifting. If the divisor after reshifting is zero, or indeed if it
2971: * doesn't have more than half a word's worth of bits, we will have to
2972: * return at that point. Otherwise, we proceed into the second loop.
2973: *
2974: * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
2975: * already reflect the result of applying the current matrix to the old
2976: * ddorig:ddlo and dd1orig:dd1lo. (For the first iteration above, this
2977: * was easy to achieve, and we didn't even need to peek into the (now
2978: * no longer existent!) saved words. After the loop, we'll stop for a
2979: * moment to merge in the ddlo,dd1lo contributions.)
2980: *
2981: * Note that after the first division, even an a priori quotient of 1 cannot
2982: * be trusted until we've checked Jebelean's condition -- it cannot be too
2983: * large, of course, but it might be too small.
2984: */
2985:
2986: if (!skip)
2987: {
2988: for(;;)
2989: {
2990: /* First half of loop divides dd into dd1, and leaves the recurrence
2991: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
2992: * entries) when successful. */
2993: tmpd = dd1 - dd;
2994: if (tmpd < dd)
2995: { /* quotient suspected to be 1 */
2996: #ifdef DEBUG_LEHMER
2997: q = 1;
2998: #endif
2999: tmpu = xu + xu1; /* cannot overflow -- everything bounded by
3000: * the original dd during first loop */
3001: tmpv = xv + xv1;
3002: }
3003: else
3004: { /* division indicated */
3005: hiremainder = 0;
3006: q = 1 + divll(tmpd, dd);
3007: tmpd = hiremainder;
3008: tmpu = xu + q*xu1; /* can't overflow, but may need to be undone */
3009: tmpv = xv + q*xv1;
3010: }
3011:
3012: tmp0 = addll(tmpv, xv1);
3013: if ((tmpd < tmpu) || overflow ||
3014: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
3015: break; /* skip ahead to reshift stage */
3016: else
3017: { /* commit dd1, xu, xv */
3018: res++;
3019: dd1 = tmpd; xu = tmpu; xv = tmpv;
3020: #ifdef DEBUG_LEHMER
3021: fprintferr(" q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
3022: q, dd, dd1, xu1, xu, xv1, xv);
3023: #endif
3024: if (xv > vmax)
3025: { /* time to return */
3026: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3027: return res;
3028: }
3029: }
3030:
3031: /* Second half of loop divides dd1 into dd, and the matrix returns to its
3032: * normal arrangement. */
3033: tmpd = dd - dd1;
3034: if (tmpd < dd1)
3035: { /* quotient suspected to be 1 */
3036: #ifdef DEBUG_LEHMER
3037: q = 1;
3038: #endif
3039: tmpu = xu1 + xu; /* cannot overflow */
3040: tmpv = xv1 + xv;
3041: }
3042: else
3043: { /* division indicated */
3044: hiremainder = 0;
3045: q = 1 + divll(tmpd, dd1);
3046: tmpd = hiremainder;
3047: tmpu = xu1 + q*xu;
3048: tmpv = xv1 + q*xv;
3049: }
3050:
3051: tmp0 = addll(tmpu, xu);
3052: if ((tmpd < tmpv) || overflow ||
3053: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
3054: break; /* skip ahead to reshift stage */
3055: else
3056: { /* commit dd, xu1, xv1 */
3057: res++;
3058: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
3059: #ifdef DEBUG_LEHMER
3060: fprintferr(" q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
3061: q, dd1, dd, xu, xu1, xv, xv1);
3062: #endif
3063: if (xv1 > vmax)
3064: { /* time to return */
3065: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3066: return res;
3067: }
3068: }
3069:
3070: } /* end of first loop */
3071:
3072: /* Intermezzo: update dd:ddlo, dd1:dd1lo. (But not if skip is set.) */
3073:
3074: if (res&1)
3075: {
3076: /* after failed division in 1st half of loop:
3077: * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
3078: * * [ -xu, xu1 ; xv, -xv1 ]
3079: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
3080: * add the high-order remainders + overflows onto [dd1,dd].)
3081: */
3082: tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
3083: tmp1 = subll(mulll(dd1lo,xv), tmp1);
3084: dd1 += subllx(hiremainder, tmp0);
3085: tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
3086: ddlo = subll(tmp2, mulll(dd1lo,xv1));
3087: dd += subllx(tmp0, hiremainder);
3088: dd1lo = tmp1;
3089: }
3090: else
3091: {
3092: /* after failed division in 2nd half of loop:
3093: * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
3094: * * [ xu1, -xu ; -xv1, xv ]
3095: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
3096: * add the high-order remainders + overflows onto [dd,dd1].)
3097: */
3098: tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
3099: tmp1 = subll(tmp1, mulll(dd1lo,xv1));
3100: dd += subllx(tmp0, hiremainder);
3101: tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
3102: dd1lo = subll(mulll(dd1lo,xv), tmp2);
3103: dd1 += subllx(hiremainder, tmp0);
3104: ddlo = tmp1;
3105: }
3106: #ifdef DEBUG_LEHMER
3107: fprintferr(" %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
3108: #endif
3109: } /* end of skip-pable section: get here also, with res==1, when there
3110: * was a problem immediately after the very first division. */
3111:
3112: /* Re-shift. Note: the shift count _can_ be zero, viz. under the following
3113: * precise conditions: The original dd1 had its topmost bit set, so the 1st
3114: * q was 1, and after subtraction, dd had its topmost bit unset. If now
3115: * dd==0, we'd have taken the return exit already, so we couldn't have got
3116: * here. If not, then it must have been the second division which has gone
3117: * amiss (because dd1 was very close to an exact multiple of the remainder
3118: * dd value, so this will be very rare). At this point, we'd have a fairly
3119: * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
3120: * this is not guaranteed to work. Instead of trying, we return at once.
3121: * The caller will see to it that the initial subtraction is re-done using
3122: * _all_ the bits of both operands, which already helps, and the next round
3123: * will either be a full division (if dd occupied a halfword or less), or
3124: * another llgcdii() first step. In the latter case, since we try a little
3125: * harder during our first step, we may actually be able to fix the problem,
3126: * and get here again with improved low-order bits and with another step
3127: * under our belt. Otherwise we'll have given up above and forced a full-
3128: * blown division.
3129: *
3130: * If res is even, the shift count _cannot_ be zero. (The first step forces
3131: * a zero into the remainder's MSB, and all subsequent remainders will have
3132: * inherited it.)
3133: *
3134: * The re-shift stage exits if the next divisor has at most half a word's
3135: * worth of bits.
3136: *
3137: * For didactic reasons, the second loop will be arranged in the same way
3138: * as the first -- beginning with the division of dd into dd1, as if res
3139: * was odd. To cater for this, if res is actually even, we swap things
3140: * around during reshifting. (During the second loop, the parity of res
3141: * does not matter; we know in which half of the loop we are when we decide
3142: * to return.)
3143: */
3144: #ifdef DEBUG_LEHMER
3145: fprintferr("(sh)");
3146: #endif
3147:
3148: if (res&1)
3149: { /* after odd number of division(s) */
3150: if (dd1 && (sh = bfffo(dd1)))
3151: {
3152: shc = BITS_IN_LONG - sh;
3153: dd = (ddlo >> shc) + (dd << sh);
3154: if (!(HIGHMASK & dd))
3155: {
3156: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3157: return -res; /* full division asked for */
3158: }
3159: dd1 = (dd1lo >> shc) + (dd1 << sh);
3160: }
3161: else
3162: { /* time to return: <= 1 word left, or sh==0 */
3163: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3164: return res;
3165: }
3166: }
3167: else
3168: { /* after even number of divisions */
3169: if (dd)
3170: {
3171: sh = bfffo(dd); /* Known to be positive. */
3172: shc = BITS_IN_LONG - sh;
3173: /* dd:ddlo will become the new dd1, and v.v. */
3174: tmpd = (ddlo >> shc) + (dd << sh);
3175: dd = (dd1lo >> shc) + (dd1 << sh);
3176: dd1 = tmpd;
3177: /* This has completed the swap; now dd is again the current divisor.
3178: * The following test originally inspected dd1 -- a most subtle and
3179: * most annoying bug. The Management. */
3180: if (HIGHMASK & dd)
3181: {
3182: /* recurrence matrix is the wrong way round; swap it. */
3183: tmp0 = xu; xu = xu1; xu1 = tmp0;
3184: tmp0 = xv; xv = xv1; xv1 = tmp0;
3185: }
3186: else
3187: {
3188: /* recurrence matrix is the wrong way round; fix this. */
3189: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3190: return -res; /* full division asked for */
3191: }
3192: }
3193: else
3194: { /* time to return: <= 1 word left */
3195: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3196: return res;
3197: }
3198: } /* end reshift */
3199:
3200: #ifdef DEBUG_LEHMER
3201: fprintferr(" %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
3202: #endif
3203:
3204: /* The Second Loop. Rip-off of the first, but we now check for overflow
3205: * in the recurrences. Returns instead of breaking when we cannot fix the
3206: * quotient any longer.
3207: */
3208:
3209: for(;;)
3210: {
3211: /* First half of loop divides dd into dd1, and leaves the recurrence
3212: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
3213: * entries) when successful. */
3214: tmpd = dd1 - dd;
3215: if (tmpd < dd)
3216: { /* quotient suspected to be 1 */
3217: #ifdef DEBUG_LEHMER
3218: q = 1;
3219: #endif
3220: tmpu = xu + xu1;
3221: tmpv = addll(xv, xv1); /* xv,xv1 will overflow first */
3222: tmp1 = overflow;
3223: }
3224: else
3225: { /* division indicated */
3226: hiremainder = 0;
3227: q = 1 + divll(tmpd, dd);
3228: tmpd = hiremainder;
3229: tmpu = xu + q*xu1;
3230: tmpv = addll(xv, mulll(q,xv1));
3231: tmp1 = overflow | hiremainder;
3232: }
3233:
3234: tmp0 = addll(tmpv, xv1);
3235: if ((tmpd < tmpu) || overflow || tmp1 ||
3236: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
3237: {
3238: /* The recurrence matrix has not yet been warped... */
3239: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3240: break;
3241: }
3242: /* commit dd1, xu, xv */
3243: res++;
3244: dd1 = tmpd; xu = tmpu; xv = tmpv;
3245: #ifdef DEBUG_LEHMER
3246: fprintferr(" q = %ld, %lx, %lx\n", q, dd, dd1);
3247: #endif
3248: if (xv > vmax)
3249: { /* time to return */
3250: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3251: return res;
3252: }
3253:
3254: /* Second half of loop divides dd1 into dd, and the matrix returns to its
3255: * normal arrangement. */
3256: tmpd = dd - dd1;
3257: if (tmpd < dd1)
3258: { /* quotient suspected to be 1 */
3259: #ifdef DEBUG_LEHMER
3260: q = 1;
3261: #endif
3262: tmpu = xu1 + xu;
3263: tmpv = addll(xv1, xv);
3264: tmp1 = overflow;
3265: }
3266: else
3267: { /* division indicated */
3268: hiremainder = 0;
3269: q = 1 + divll(tmpd, dd1);
3270: tmpd = hiremainder;
3271: tmpu = xu1 + q*xu;
3272: tmpv = addll(xv1, mulll(q, xv));
3273: tmp1 = overflow | hiremainder;
3274: }
3275:
3276: tmp0 = addll(tmpu, xu);
3277: if ((tmpd < tmpv) || overflow || tmp1 ||
3278: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
3279: {
3280: /* The recurrence matrix has not yet been unwarped, so it is
3281: * the wrong way round; fix this. */
3282: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3283: break;
3284: }
3285:
3286: res++; /* commit dd, xu1, xv1 */
3287: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
3288: #ifdef DEBUG_LEHMER
3289: fprintferr(" q = %ld, %lx, %lx\n", q, dd1, dd);
3290: #endif
3291: if (xv1 > vmax)
3292: { /* time to return */
3293: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3294: return res;
3295: }
3296: } /* end of second loop */
3297:
3298: return res;
3299: }
3300:
3301: /*==================================
3302: * invmod(a,b,res)
3303: *==================================
3304: * If a is invertible, return 1, and set res = a^{ -1 }
3305: * Otherwise, return 0, and set res = gcd(a,b)
3306: *
3307: * This is sufficiently different from bezout() to be implemented separately
3308: * instead of having a bunch of extra conditionals in a single function body
3309: * to meet both purposes.
3310: */
3311:
3312: int
3313: invmod(GEN a, GEN b, GEN *res)
3314: {
3315: GEN v,v1,d,d1,q,r;
3316: long av,av1,lim;
3317: long s;
3318: ulong g;
3319: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
3320: int lhmres; /* Lehmer stage return value */
3321:
3322: if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
3323: if (!signe(b)) { *res=absi(a); return 0; }
3324: av = avma;
3325: if (lgefint(b) == 3) /* single-word affair */
3326: {
3327: d1 = modiu(a, (ulong)(b[2]));
3328: if (d1 == gzero)
3329: {
3330: if (b[2] == 1L)
3331: { *res = gzero; return 1; }
3332: else
3333: { *res = absi(b); return 0; }
3334: }
3335: g = xgcduu((ulong)(b[2]), (ulong)(d1[2]), 1, &xv, &xv1, &s);
3336: #ifdef DEBUG_LEHMER
3337: fprintferr(" <- %lu,%lu\n", (ulong)(b[2]), (ulong)(d1[2]));
3338: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
3339: #endif
3340: avma = av;
3341: if (g != 1UL) { *res = utoi(g); return 0; }
3342: xv = xv1 % (ulong)(b[2]); if (s < 0) xv = ((ulong)(b[2])) - xv;
3343: *res = utoi(xv); return 1;
3344: }
3345:
3346: (void)new_chunk(lgefint(b));
3347: d = absi(b); d1 = modii(a,d);
3348:
3349: v=gzero; v1=gun; /* general case */
3350: #ifdef DEBUG_LEHMER
3351: fprintferr("INVERT: -------------------------\n");
3352: output(d1);
3353: #endif
3354: av1 = avma; lim = stack_lim(av,1);
3355:
3356: while (lgefint(d) > 3 && signe(d1))
3357: {
3358: #ifdef DEBUG_LEHMER
3359: fprintferr("Calling Lehmer:\n");
3360: #endif
3361: lhmres = lgcdii((ulong*)d, (ulong*)d1, &xu, &xu1, &xv, &xv1, MAXULONG);
3362: if (lhmres != 0) /* check progress */
3363: { /* apply matrix */
3364: #ifdef DEBUG_LEHMER
3365: fprintferr("Lehmer returned %d [%lu,%lu;%lu,%lu].\n",
3366: lhmres, xu, xu1, xv, xv1);
3367: #endif
3368: if ((lhmres == 1) || (lhmres == -1))
3369: {
3370: if (xv1 == 1)
3371: {
3372: r = subii(d,d1); d=d1; d1=r;
3373: a = subii(v,v1); v=v1; v1=a;
3374: }
3375: else
3376: {
3377: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
3378: a = subii(v, mului(xv1,v1)); v=v1; v1=a;
3379: }
3380: }
3381: else
3382: {
3383: r = subii(muliu(d,xu), muliu(d1,xv));
3384: a = subii(muliu(v,xu), muliu(v1,xv));
3385: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
3386: v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
3387: if (lhmres&1)
3388: {
3389: setsigne(d,-signe(d));
3390: setsigne(v,-signe(v));
3391: }
3392: else
3393: {
3394: if (signe(d1)) { setsigne(d1,-signe(d1)); }
3395: setsigne(v1,-signe(v1));
3396: }
3397: }
3398: }
3399: #ifdef DEBUG_LEHMER
3400: else
3401: fprintferr("Lehmer returned 0.\n");
3402: output(d); output(d1); output(v); output(v1);
3403: sleep(1);
3404: #endif
3405:
3406: if (lhmres <= 0 && signe(d1))
3407: {
3408: q = dvmdii(d,d1,&r);
3409: #ifdef DEBUG_LEHMER
3410: fprintferr("Full division:\n");
3411: printf(" q = "); output(q); sleep (1);
3412: #endif
3413: a = subii(v,mulii(q,v1));
3414: v=v1; v1=a;
3415: d=d1; d1=r;
3416: }
3417: if (low_stack(lim, stack_lim(av,1)))
3418: {
3419: GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
3420: if(DEBUGMEM>1) err(warnmem,"invmod");
3421: gerepilemany(av1,gptr,4);
3422: }
3423: } /* end while */
3424:
3425: /* Postprocessing - final sprint */
3426: if (signe(d1))
3427: {
3428: /* Assertions: lgefint(d)==lgefint(d1)==3, and
3429: * gcd(d,d1) is nonzero and fits into one word
3430: */
3431: g = xxgcduu((ulong)(d[2]), (ulong)(d1[2]), 1, &xu, &xu1, &xv, &xv1, &s);
3432: #ifdef DEBUG_LEHMER
3433: output(d);output(d1);output(v);output(v1);
3434: fprintferr(" <- %lu,%lu\n", (ulong)(d[2]), (ulong)(d1[2]));
3435: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
3436: #endif
3437: if (g != 1UL) { avma = av; *res = utoi(g); return 0; }
3438: /* (From the xgcduu() blurb:)
3439: * For finishing the multiword modinv, we now have to multiply the
3440: * returned matrix (with properly adjusted signs) onto the values
3441: * v' and v1' previously obtained from the multiword division steps.
3442: * Actually, it is sufficient to take the scalar product of [v',v1']
3443: * with [u1,-v1], and change the sign if s==1.
3444: */
3445: v = subii(muliu(v,xu1),muliu(v1,xv1));
3446: if (s > 0) setsigne(v,-signe(v));
3447: avma = av; *res = modii(v,b);
3448: #ifdef DEBUG_LEHMER
3449: output(*res); fprintfderr("============================Done.\n");
3450: sleep(1);
3451: #endif
3452: return 1;
3453: }
3454: /* get here when the final sprint was skipped (d1 was zero already) */
3455: avma = av;
3456: if (!egalii(d,gun)) { *res = icopy(d); return 0; }
3457: *res = modii(v,b);
3458: #ifdef DEBUG_LEHMER
3459: output(*res); fprintferr("============================Done.\n");
3460: sleep(1);
3461: #endif
3462: return 1;
3463: }
3464:
3465: /*==================================
3466: * bezout(a,b,pu,pv)
3467: *==================================
3468: * Return g = gcd(a,b) >= 0, and assign GENs u,v through pointers pu,pv
3469: * such that g = u*a + v*b.
3470: * Special cases:
3471: * a == b == 0 ==> pick u=1, v=0
3472: * a != 0 == b ==> keep v=0
3473: * a == 0 != b ==> keep u=0
3474: * |a| == |b| != 0 ==> keep u=0, set v=+-1
3475: * Assignments through pu,pv will be suppressed when the corresponding
3476: * pointer is NULL (but the computations will happen nonetheless).
3477: */
3478:
3479: GEN
3480: bezout(GEN a, GEN b, GEN *pu, GEN *pv)
3481: {
3482: GEN t,u,u1,v,v1,d,d1,q,r;
3483: GEN *pt;
3484: long av,av1,lim;
3485: long s;
3486: int sa, sb;
3487: ulong g;
3488: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
3489: int lhmres; /* Lehmer stage return value */
3490:
3491: if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
3492: s = absi_cmp(a,b);
3493: if (s < 0)
3494: {
3495: t=b; b=a; a=t;
3496: pt=pu; pu=pv; pv=pt;
3497: }
3498: /* now |a| >= |b| */
3499:
3500: sa = signe(a); sb = signe(b);
3501: if (!sb)
3502: {
3503: if (pv != NULL) *pv = gzero;
3504: switch(sa)
3505: {
3506: case 0:
3507: if (pu != NULL) *pu = gun; return gzero;
3508: case 1:
3509: if (pu != NULL) *pu = gun; return icopy(a);
3510: case -1:
3511: if (pu != NULL) *pu = negi(gun); return(negi(a));
3512: }
3513: }
3514: if (s == 0) /* |a| == |b| != 0 */
3515: {
3516: if (pu != NULL) *pu = gzero;
3517: if (sb > 0)
3518: {
3519: if (pv != NULL) *pv = gun; return icopy(b);
3520: }
3521: else
3522: {
3523: if (pv != NULL) *pv = negi(gun); return(negi(b));
3524: }
3525: }
3526: /* now |a| > |b| > 0 */
3527:
3528: if (lgefint(a) == 3) /* single-word affair */
3529: {
3530: g = xxgcduu((ulong)(a[2]), (ulong)(b[2]), 0, &xu, &xu1, &xv, &xv1, &s);
3531: sa = s > 0 ? sa : -sa;
3532: sb = s > 0 ? -sb : sb;
3533: if (pu != NULL)
3534: {
3535: if (xu == 0) *pu = gzero; /* can happen when b divides a */
3536: else if (xu == 1) *pu = sa < 0 ? negi(gun) : gun;
3537: else if (xu == 2) *pu = sa < 0 ? negi(gdeux) : gdeux;
3538: else
3539: {
3540: *pu = cgeti(3);
3541: (*pu)[1] = evalsigne(sa)|evallgefint(3);
3542: (*pu)[2] = xu;
3543: }
3544: }
3545: if (pv != NULL)
3546: {
3547: if (xv == 1) *pv = sb < 0 ? negi(gun) : gun;
3548: else if (xv == 2) *pv = sb < 0 ? negi(gdeux) : gdeux;
3549: else
3550: {
3551: *pv = cgeti(3);
3552: (*pv)[1] = evalsigne(sb)|evallgefint(3);
3553: (*pv)[2] = xv;
3554: }
3555: }
3556: if (g == 1) return gun;
3557: else if (g == 2) return gdeux;
3558: else
3559: {
3560: r = cgeti(3);
3561: r[1] = evalsigne(1)|evallgefint(3);
3562: r[2] = g;
3563: return r;
3564: }
3565: }
3566:
3567: /* general case */
3568: av = avma;
3569: (void)new_chunk(lgefint(b) + (lgefint(a)<<1)); /* room for u,v,gcd */
3570: /* if a is significantly larger than b, calling lgcdii() is not the best
3571: * way to start -- reduce a mod b first
3572: */
3573: if (lgefint(a) > lgefint(b))
3574: {
3575: d = absi(b), q = dvmdii(absi(a), d, &d1);
3576: if (!signe(d1)) /* a == qb */
3577: {
3578: avma = av;
3579: if (pu != NULL) *pu = gzero;
3580: if (pv != NULL) *pv = sb < 0 ? negi(gun) : gun;
3581: return (icopy(d));
3582: }
3583: else
3584: {
3585: u = gzero;
3586: u1 = v = gun;
3587: v1 = negi(q);
3588: }
3589: /* if this results in lgefint(d) == 3, will fall past main loop */
3590: }
3591: else
3592: {
3593: d = absi(a); d1 = absi(b);
3594: u = v1 = gun; u1 = v = gzero;
3595: }
3596: av1 = avma; lim = stack_lim(av, 1);
3597:
3598: /* main loop is almost identical to that of invmod() */
3599: while (lgefint(d) > 3 && signe(d1))
3600: {
3601: lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, MAXULONG);
3602: if (lhmres != 0) /* check progress */
3603: { /* apply matrix */
3604: if ((lhmres == 1) || (lhmres == -1))
3605: {
3606: if (xv1 == 1)
3607: {
3608: r = subii(d,d1); d=d1; d1=r;
3609: a = subii(u,u1); u=u1; u1=a;
3610: a = subii(v,v1); v=v1; v1=a;
3611: }
3612: else
3613: {
3614: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
3615: a = subii(u, mului(xv1,u1)); u=u1; u1=a;
3616: a = subii(v, mului(xv1,v1)); v=v1; v1=a;
3617: }
3618: }
3619: else
3620: {
3621: r = subii(muliu(d,xu), muliu(d1,xv));
3622: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
3623: a = subii(muliu(u,xu), muliu(u1,xv));
3624: u1 = subii(muliu(u,xu1), muliu(u1,xv1)); u = a;
3625: a = subii(muliu(v,xu), muliu(v1,xv));
3626: v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
3627: if (lhmres&1)
3628: {
3629: setsigne(d,-signe(d));
3630: setsigne(u,-signe(u));
3631: setsigne(v,-signe(v));
3632: }
3633: else
3634: {
3635: if (signe(d1)) { setsigne(d1,-signe(d1)); }
3636: setsigne(u1,-signe(u1));
3637: setsigne(v1,-signe(v1));
3638: }
3639: }
3640: }
3641: if (lhmres <= 0 && signe(d1))
3642: {
3643: q = dvmdii(d,d1,&r);
3644: #ifdef DEBUG_LEHMER
3645: fprintferr("Full division:\n");
3646: printf(" q = "); output(q); sleep (1);
3647: #endif
3648: a = subii(u,mulii(q,u1));
3649: u=u1; u1=a;
3650: a = subii(v,mulii(q,v1));
3651: v=v1; v1=a;
3652: d=d1; d1=r;
3653: }
3654: if (low_stack(lim, stack_lim(av,1)))
3655: {
3656: GEN *gptr[6]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&u; gptr[3]=&u1;
3657: gptr[4]=&v; gptr[5]=&v1;
3658: if(DEBUGMEM>1) err(warnmem,"bezout");
3659: gerepilemany(av1,gptr,6);
3660: }
3661: } /* end while */
3662:
3663: /* Postprocessing - final sprint */
3664: if (signe(d1))
3665: {
3666: /* Assertions: lgefint(d)==lgefint(d1)==3, and
3667: * gcd(d,d1) is nonzero and fits into one word
3668: */
3669: g = xxgcduu((ulong)(d[2]), (ulong)(d1[2]), 0, &xu, &xu1, &xv, &xv1, &s);
3670: u = subii(muliu(u,xu), muliu(u1, xv));
3671: v = subii(muliu(v,xu), muliu(v1, xv));
3672: if (s < 0) { sa = -sa; sb = -sb; }
3673: avma = av;
3674: if (pu != NULL) *pu = sa < 0 ? negi(u) : icopy(u);
3675: if (pv != NULL) *pv = sb < 0 ? negi(v) : icopy(v);
3676: if (g == 1) return gun;
3677: else if (g == 2) return gdeux;
3678: else
3679: {
3680: r = cgeti(3);
3681: r[1] = evalsigne(1)|evallgefint(3);
3682: r[2] = g;
3683: return r;
3684: }
3685: }
3686: /* get here when the final sprint was skipped (d1 was zero already).
3687: * Now the matrix is final, and d contains the gcd.
3688: */
3689: avma = av;
3690: if (pu != NULL) *pu = sa < 0 ? negi(u) : icopy(u);
3691: if (pv != NULL) *pv = sb < 0 ? negi(v) : icopy(v);
3692: return icopy(d);
3693: }
3694:
3695: /*==================================
3696: * cbezout(a,b,uu,vv)
3697: *==================================
3698: * Same as bezout() but for C longs.
3699: * Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
3700: * such that g = u*a + v*b.
3701: * Special cases:
3702: * a == b == 0 ==> pick u=1, v=0 (and return 1, surprisingly)
3703: * a != 0 == b ==> keep v=0
3704: * a == 0 != b ==> keep u=0
3705: * |a| == |b| != 0 ==> keep u=0, set v=+-1
3706: * Assignments through uu,vv happen unconditionally; non-NULL pointers
3707: * _must_ be used.
3708: */
3709: long
3710: cbezout(long a,long b,long *uu,long *vv)
3711: {
3712: long s,*t;
3713: ulong d = labs(a), d1 = labs(b);
3714: ulong r,u,u1,v,v1;
3715:
3716: #ifdef DEBUG_CBEZOUT
3717: fprintferr("> cbezout(%ld,%ld,%p,%p)\n", a, b, (void *)uu, (void *)vv);
3718: #endif
3719: if (!b)
3720: {
3721: *vv=0L;
3722: if (!a)
3723: {
3724: *uu=1L;
3725: #ifdef DEBUG_CBEZOUT
3726: fprintferr("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
3727: #endif
3728: return 0L;
3729: }
3730: *uu = a < 0 ? -1L : 1L;
3731: #ifdef DEBUG_CBEZOUT
3732: fprintferr("< %ld (%ld, %ld)\n", (long)d, *uu, *vv);
3733: #endif
3734: return (long)d;
3735: }
3736: else if (!a || (d == d1))
3737: {
3738: *uu = 0L; *vv = b < 0 ? -1L : 1L;
3739: #ifdef DEBUG_CBEZOUT
3740: fprintferr("< %ld (%ld, %ld)\n", (long)d1, *uu, *vv);
3741: #endif
3742: return (long)d1;
3743: }
3744: else if (d == 1) /* frequently used by nfinit */
3745: {
3746: *uu = a; *vv = 0L;
3747: #ifdef DEBUG_CBEZOUT
3748: fprintferr("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
3749: #endif
3750: return 1L;
3751: }
3752: else if (d < d1)
3753: {
3754: /* bug in gcc-2.95.3:
3755: * s = a; a = b; b = s; produces wrong result a = b. This is OK: */
3756: { long _x = a; a = b; b = _x; } /* in order to keep the right signs */
3757: r = d; d = d1; d1 = r;
3758: t = uu; uu = vv; vv = t;
3759: #ifdef DEBUG_CBEZOUT
3760: fprintferr(" swapping\n");
3761: #endif
3762: }
3763: /* d > d1 > 0 */
3764: r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
3765: if (s < 0)
3766: {
3767: *uu = a < 0 ? u : -(long)u;
3768: *vv = b < 0 ? -(long)v : v;
3769: }
3770: else
3771: {
3772: *uu = a < 0 ? -(long)u : u;
3773: *vv = b < 0 ? v : -(long)v;
3774: }
3775: #ifdef DEBUG_CBEZOUT
3776: fprintferr("< %ld (%ld, %ld)\n", (long)r, *uu, *vv);
3777: #endif
3778: return (long)r;
3779: }
3780:
3781: /*==========================================================
3782: * ratlift(GEN x, GEN m, GEN *a, GEN *b, GEN amax, GEN bmax)
3783: *==========================================================
3784: * Reconstruct rational number from its residue x mod m
3785: * Given t_INT x, m, amax>=0, bmax>0 such that
3786: * 0 <= x < m; 2*amax*bmax < m
3787: * attempts to find t_INT a, b such that
3788: * (1) a = b*x (mod m)
3789: * (2) |a| <= amax, 0 < b <= bmax
3790: * (3) gcd(m, b) = gcd(a, b)
3791: * If unsuccessful, it will return 0 and leave a,b unchanged (and
3792: * caller may deduce no such a,b exist). If successful, sets a,b
3793: * and returns 1. If there exist a,b satisfying (1), (2), and
3794: * (3') gcd(m, b) = 1
3795: * then they are uniquely determined subject to (1),(2) and
3796: * (3'') gcd(a, b) = 1,
3797: * and will be returned by the routine. (The caller may wish to
3798: * check gcd(a,b)==1, either directly or based on known prime
3799: * divisors of m, depending on the application.)
3800: * Reference:
3801: @article {MR97c:11116,
3802: AUTHOR = {Collins, George E. and Encarnaci{\'o}n, Mark J.},
3803: TITLE = {Efficient rational number reconstruction},
3804: JOURNAL = {J. Symbolic Comput.},
3805: FJOURNAL = {Journal of Symbolic Computation},
3806: VOLUME = {20},
3807: YEAR = {1995},
3808: NUMBER = {3},
3809: PAGES = {287--297},
3810: ISSN = {0747-7171},
3811: MRCLASS = {11Y16 (68Q40)},
3812: MRNUMBER = {97c:11116},
3813: MRREVIEWER = {Maurice Mignotte},
3814: }
3815: * Preprint available from:
3816: * ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1994/94-64.ps.gz
3817: */
3818:
3819: /* #define DEBUG_RATLIFT */
3820:
3821: int
3822: ratlift(GEN x, GEN m, GEN *a, GEN *b, GEN amax, GEN bmax)
3823: {
3824: GEN d,d1,v,v1,q,r;
3825: ulong av = avma, av1, lim;
3826: long lb,lr,lbb,lbr,s,s0;
3827: ulong vmax;
3828: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
3829: int lhmres; /* Lehmer stage return value */
3830:
3831: if ((typ(x) | typ(m) | typ(amax) | typ(bmax)) != t_INT) err(arither1);
3832: if (signe(bmax) <= 0)
3833: err(talker, "ratlift: bmax must be > 0, found\n\tbmax=%Z\n", bmax);
3834: if (signe(amax) < 0)
3835: err(talker, "ratilft: amax must be >= 0, found\n\tamax=%Z\n", amax);
3836: /* check 2*amax*bmax < m */
3837: if (cmpii(shifti(mulii(amax, bmax), 1), m) >= 0)
3838: err(talker, "ratlift: must have 2*amax*bmax < m, found\n\tamax=%Z\n\tbmax=%Z\n\tm=%Z\n", amax,bmax,m);
3839: /* we _could_ silently replace x with modii(x,m) instead of the following,
3840: * but let's leave this up to the caller
3841: */
3842: avma = av; s = signe(x);
3843: if (s < 0 || cmpii(x,m) >= 0)
3844: err(talker, "ratlift: must have 0 <= x < m, found\n\tx=%Z\n\tm=%Z\n", x,m);
3845:
3846: /* special cases x=0 and/or amax=0 */
3847: if (s == 0)
3848: {
3849: if (a != NULL) *a = gzero;
3850: if (b != NULL) *b = gun;
3851: return 1;
3852: }
3853: else if (signe(amax)==0)
3854: return 0;
3855: /* assert: m > x > 0, amax > 0 */
3856:
3857: /* check here whether a=x, b=1 is a solution */
3858: if (cmpii(x,amax) <= 0)
3859: {
3860: if (a != NULL) *a = icopy(x);
3861: if (b != NULL) *b = gun;
3862: return 1;
3863: }
3864:
3865: /* There is no special case for single-word numbers since this is
3866: * mainly meant to be used with large moduli.
3867: */
3868: (void)new_chunk(lgefint(bmax) + lgefint(amax)); /* room for a,b */
3869: d = m; d1 = x;
3870: v = gzero; v1 = gun;
3871: /* assert d1 > amax, v1 <= bmax here */
3872: lb = lgefint(bmax);
3873: lbb = bfffo(bmax[2]);
3874: s = 1;
3875: av1 = avma; lim = stack_lim(av, 1);
3876:
3877: /* general case: Euclidean division chain starting with m div x, and
3878: * with bounds on the sequence of convergents' denoms v_j.
3879: * Just to be different from what invmod and bezout are doing, we work
3880: * here with the all-nonnegative matrices [u,u1;v,v1]=prod_j([0,1;1,q_j]).
3881: * Loop invariants:
3882: * (a) (sign)*[-v,v1]*x = [d,d1] (mod m) (componentwise)
3883: * (sign initially +1, changes with each Euclidean step)
3884: * so [a,b] will be obtained in the form [-+d,v] or [+-d1,v1];
3885: * this congruence is a consequence of
3886: * (b) [x,m]~ = [u,u1;v,v1]*[d1,d]~,
3887: * where u,u1 is the usual numerator sequence starting with 1,0
3888: * instead of 0,1 (just multiply the eqn on the left by the inverse
3889: * matrix, which is det*[v1,-u1;-v,u], where "det" is the same as the
3890: * "(sign)" in (a)). From m = v*d1 + v1*d and
3891: * (c) d > d1 >= 0, 0 <= v < v1,
3892: * we have d >= m/(2*v1), so while v1 remains smaller than m/(2*amax),
3893: * the pair [-(sign)*d,v] satisfies (1) but violates (2) (d > amax).
3894: * Conversely, v1 > bmax indicates that no further solutions will be
3895: * forthcoming; [-(sign)*d,v] will be the last, and first, candidate.
3896: * Thus there's at most one point in the chain division where a solution
3897: * can live: v < bmax, v1 >= m/(2*amax) > bmax, and this is acceptable
3898: * iff in fact d <= amax (e.g. m=221, x=34 or 35, amax=bmax=10 fail on
3899: * this count while x=32,33,36,37 succeed). However, a division may leave
3900: * a zero residue before we ever reach this point (consider m=210, x=35,
3901: * amax=bmax=10), and our caller may find that gcd(d,v) > 1 (numerous
3902: * examples -- keep m=210 and consider any of x=29,31,32,33,34,36,37,38,
3903: * 39,40,41).
3904: * Furthermore, at the start of the loop body we have in fact
3905: * (c') 0 <= v < v1 <= bmax, d > d1 > amax >= 0,
3906: * (and are never done already).
3907: *
3908: * Main loop is similar to those of invmod() and bezout(), except for
3909: * having to determine appropriate vmax bounds, and checking termination
3910: * conditions. The signe(d1) condition is only for paranoia
3911: */
3912: while (lgefint(d) > 3 && signe(d1))
3913: {
3914: /* determine vmax for lgcdii so as to ensure v won't overshoot.
3915: * If v+v1 > bmax, the next step would take v1 beyond the limit, so
3916: * since [+-d1,v1] is not a solution, we give up. Otherwise if v+v1
3917: * is way shorter than bmax, use vmax=MAXULUNG. Otherwise, set vmax
3918: * to a crude lower approximation of bmax/(v+v1), or to 1, which will
3919: * allow the inner loop to do one step
3920: */
3921: r = addii(v,v1);
3922: lr = lb - lgefint(r);
3923: lbr = bfffo(r[2]);
3924: if (cmpii(r,bmax) > 0) /* done, not found */
3925: {
3926: avma = av;
3927: return 0;
3928: }
3929: else if (lr > 1) /* still more than a word's worth to go */
3930: {
3931: vmax = MAXULONG;
3932: }
3933: else /* take difference of bit lengths */
3934: {
3935: lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
3936: if (lr > BITS_IN_LONG)
3937: vmax = MAXULONG;
3938: else if (lr == 0)
3939: vmax = 1UL;
3940: else
3941: vmax = 1UL << (lr-1);
3942: /* the latter is pessimistic but faster than a division */
3943: }
3944: /* do a Lehmer-Jebelean round */
3945: lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, vmax);
3946: if (lhmres != 0) /* check progress */
3947: { /* apply matrix */
3948: if ((lhmres == 1) || (lhmres == -1))
3949: {
3950: s = -s;
3951: if (xv1 == 1)
3952: {
3953: /* re-use v+v1 computed above */
3954: v=v1; v1=r;
3955: r = subii(d,d1); d=d1; d1=r;
3956: }
3957: else
3958: {
3959: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
3960: r = addii(v, mului(xv1,v1)); v=v1; v1=r;
3961: }
3962: }
3963: else
3964: {
3965: r = subii(muliu(d,xu), muliu(d1,xv));
3966: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
3967: r = addii(muliu(v,xu), muliu(v1,xv));
3968: v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
3969: if (lhmres&1)
3970: {
3971: setsigne(d,-signe(d));
3972: s = -s;
3973: }
3974: else if (signe(d1))
3975: {
3976: setsigne(d1,-signe(d1));
3977: }
3978: }
3979: /* check whether we're done. Assert v <= bmax here. Examine v1:
3980: * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
3981: * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise
3982: * proceed.
3983: */
3984: if (cmpii(v1,bmax) > 0) /* certainly done */
3985: {
3986: avma = av;
3987: if (cmpii(d,amax) <= 0) /* done, found */
3988: {
3989: if (a != NULL)
3990: {
3991: *a = icopy(d);
3992: setsigne(*a,-s); /* sign opposite to s */
3993: }
3994: if (b != NULL) *b = icopy(v);
3995: return 1;
3996: }
3997: else /* done, not found */
3998: return 0;
3999: }
4000: else if (cmpii(d1,amax) <= 0) /* also done, found */
4001: {
4002: avma = av;
4003: if (a != NULL)
4004: {
4005: if (signe(d1))
4006: {
4007: *a = icopy(d1);
4008: setsigne(*a,s); /* same sign as s */
4009: }
4010: else
4011: *a = gzero;
4012: }
4013: if (b != NULL) *b = icopy(v1);
4014: return 1;
4015: }
4016: } /* lhmres != 0 */
4017:
4018: if (lhmres <= 0 && signe(d1))
4019: {
4020: q = dvmdii(d,d1,&r);
4021: #ifdef DEBUG_LEHMER
4022: fprintferr("Full division:\n");
4023: printf(" q = "); output(q); sleep (1);
4024: #endif
4025: d=d1; d1=r;
4026: r = addii(v,mulii(q,v1));
4027: v=v1; v1=r;
4028: s = -s;
4029: /* check whether we are done now. Since we weren't before the div, it
4030: * suffices to examine v1 and d1 -- the new d (former d1) cannot cut it
4031: */
4032: if (cmpii(v1,bmax) > 0) /* done, not found */
4033: {
4034: avma = av;
4035: return 0;
4036: }
4037: else if (cmpii(d1,amax) <= 0) /* done, found */
4038: {
4039: avma = av;
4040: if (a != NULL)
4041: {
4042: if (signe(d1))
4043: {
4044: *a = icopy(d1);
4045: setsigne(*a,s); /* same sign as s */
4046: }
4047: else
4048: *a = gzero;
4049: }
4050: if (b != NULL) *b = icopy(v1);
4051: return 1;
4052: }
4053: }
4054:
4055: if (low_stack(lim, stack_lim(av,1)))
4056: {
4057: GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
4058: if(DEBUGMEM>1) err(warnmem,"ratlift");
4059: gerepilemany(av1,gptr,4);
4060: }
4061: } /* end while */
4062:
4063: /* Postprocessing - final sprint. Since we usually underestimate vmax,
4064: * this function needs a loop here instead of a simple conditional.
4065: * Note we can only get here when amax fits into one word (which will
4066: * typically not be the case!). The condition is bogus -- d1 is never
4067: * zero at the start of the loop. There will be at most a few iterations,
4068: * so we don't bother collecting garbage
4069: */
4070: while (signe(d1))
4071: {
4072: /* Assertions: lgefint(d)==lgefint(d1)==3.
4073: * Moreover, we aren't done already, or we would have returned by now.
4074: * Recompute vmax...
4075: */
4076: #ifdef DEBUG_RATLIFT
4077: fprintferr("rl-fs: d,d1=%Z,%Z\n", d, d1);
4078: fprintferr("rl-fs: v,v1=%Z,%Z\n", v, v1);
4079: #endif
4080: r = addii(v,v1);
4081: lr = lb - lgefint(r);
4082: lbr = bfffo(r[2]);
4083: if (cmpii(r,bmax) > 0) /* done, not found */
4084: {
4085: avma = av;
4086: return 0;
4087: }
4088: else if (lr > 1) /* still more than a word's worth to go */
4089: {
4090: vmax = MAXULONG; /* (cannot in fact happen) */
4091: }
4092: else /* take difference of bit lengths */
4093: {
4094: lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
4095: if (lr > BITS_IN_LONG)
4096: vmax = MAXULONG;
4097: else if (lr == 0)
4098: vmax = 1UL;
4099: else
4100: vmax = 1UL << (lr-1); /* as above */
4101: }
4102: #ifdef DEBUG_RATLIFT
4103: fprintferr("rl-fs: vmax=%lu\n", vmax);
4104: #endif
4105: /* single-word "Lehmer", discarding the gcd or whatever it returns */
4106: (void)rgcduu((ulong)d[2], (ulong)d1[2], vmax, &xu, &xu1, &xv, &xv1, &s0);
4107: #ifdef DEBUG_RATLIFT
4108: fprintferr("rl-fs: [%lu,%lu; %lu,%lu] %s\n",
4109: xu, xu1, xv, xv1,
4110: s0 < 0 ? "-" : "+");
4111: #endif
4112: if (xv1 == 1) /* avoid multiplications */
4113: {
4114: /* re-use v+v1 computed above */
4115: v=v1; v1=r;
4116: r = subii(d,d1); d=d1; d1=r;
4117: s = -s;
4118: }
4119: else if (xu == 0) /* and xv==1, xu1==1, xv1 > 1 */
4120: {
4121: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
4122: r = addii(v, mului(xv1,v1)); v=v1; v1=r;
4123: s = -s;
4124: }
4125: else
4126: {
4127: r = subii(muliu(d,xu), muliu(d1,xv));
4128: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
4129: r = addii(muliu(v,xu), muliu(v1,xv));
4130: v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
4131: if (s0 < 0)
4132: {
4133: setsigne(d,-signe(d));
4134: s = -s;
4135: }
4136: else if (signe(d1)) /* sic: might vanish now */
4137: {
4138: setsigne(d1,-signe(d1));
4139: }
4140: }
4141: /* check whether we're done, as above. Assert v <= bmax. Examine v1:
4142: * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
4143: * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise proceed.
4144: */
4145: if (cmpii(v1,bmax) > 0) /* certainly done */
4146: {
4147: avma = av;
4148: if (cmpii(d,amax) <= 0) /* done, found */
4149: {
4150: if (a != NULL)
4151: {
4152: *a = icopy(d);
4153: setsigne(*a,-s); /* sign opposite to s */
4154: }
4155: if (b != NULL) *b = icopy(v);
4156: return 1;
4157: }
4158: else /* done, not found */
4159: return 0;
4160: }
4161: else if (cmpii(d1,amax) <= 0) /* also done, found */
4162: {
4163: avma = av;
4164: if (a != NULL)
4165: {
4166: if (signe(d1))
4167: {
4168: *a = icopy(d1);
4169: setsigne(*a,s); /* same sign as s */
4170: }
4171: else
4172: *a = gzero;
4173: }
4174: if (b != NULL) *b = icopy(v1);
4175: return 1;
4176: }
4177: } /* while */
4178:
4179: /* get here when we have run into d1 == 0 before returning... in fact,
4180: * this cannot happen.
4181: */
4182: err(talker, "ratlift failed to catch d1 == 0\n");
4183: /* NOTREACHED */
4184: return 0;
4185: }
4186:
4187: /********************************************************************/
4188: /** **/
4189: /** INTEGER SQUARE ROOT **/
4190: /** **/
4191: /********************************************************************/
4192:
4193: /* sqrt()'s result may be off by 1 when a is not representable exactly as a
4194: * double [64bit machine] */
4195: ulong
4196: usqrtsafe(ulong a)
4197: {
4198: ulong x = (ulong)sqrt((double)a);
4199: #ifdef LONG_IS_64BIT
4200: ulong y = x+1; if (y <= MAXHALFULONG && y*y <= a) x = y;
4201: #endif
4202: return x;
4203: }
4204:
4205: /* Assume a >= 0 has <= 4 words, return trunc[sqrt(a)] */
4206: ulong
4207: mpsqrtl(GEN a)
4208: {
4209: long l = lgefint(a);
4210: ulong x,y,z,k,m;
4211: int L, s;
4212:
4213: if (l <= 3) return l==2? 0: usqrtsafe((ulong)a[2]);
4214: s = bfffo(a[2]);
4215: if (s > 1)
4216: {
4217: s &= ~1UL; /* make it even */
4218: z = BITS_IN_LONG - s;
4219: m = (ulong)(a[2] << s) | (a[3] >> z);
4220: L = z>>1;
4221: }
4222: else
4223: {
4224: m = (ulong)a[2];
4225: L = BITS_IN_LONG/2;
4226: }
4227: /* m = BIL-1 (bffo odd) or BIL first bits of a */
4228: k = (long) sqrt((double)m);
4229: if (L == BITS_IN_LONG/2 && k == MAXHALFULONG)
4230: x = MAXULONG;
4231: else
4232: x = (k+1) << L;
4233: do
4234: {
4235: LOCAL_HIREMAINDER;
4236: LOCAL_OVERFLOW;
4237: hiremainder = a[2];
4238: if (hiremainder >= x) return x;
4239: z = addll(x, divll(a[3],x));
4240: z >>= 1; if (overflow) z |= HIGHBIT;
4241: y = x; x = z;
4242: }
4243: while (x < y);
4244: return y;
4245: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>