Annotation of OpenXM_contrib/pari-2.2/src/kernel/none/mp.c, Revision 1.2
1.2 ! noro 1: /* $Id: mp.c,v 1.74 2002/06/08 22:07:38 karim Exp $
1.1 noro 2:
3: Copyright (C) 2000 The PARI group.
4:
5: This file is part of the PARI/GP package.
6:
7: PARI/GP is free software; you can redistribute it and/or modify it under the
8: terms of the GNU General Public License as published by the Free Software
9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
10: ANY WARRANTY WHATSOEVER.
11:
12: Check the License for details. You should have received a copy of it, along
13: with the package; see the file 'COPYING'. If not, write to the Free Software
14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
15:
16: /***********************************************************************/
17: /** **/
18: /** MULTIPRECISION KERNEL **/
19: /** **/
20: /***********************************************************************/
21: /* most of the routines in this file are commented out in 68k */
22: /* version (#ifdef __M68K__) since they are defined in mp.s */
23: #include "pari.h"
24:
25: /* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't
26: * GENs but pairs (long *a, long na) representing a list of digits (in basis
27: * BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no
28: * need to reintroduce codewords ]
29: * Use speci(a,na) to visualize the coresponding GEN.
30: */
31:
32: /* z2 := z1[imin..imax].f shifted left sh bits (feeding f from the right) */
1.2 ! noro 33: /* These macros work only for sh != 0 !!! */
1.1 noro 34: #define shift_left2(z2,z1,imin,imax,f, sh,m) {\
35: register ulong _l,_k = ((ulong)f)>>m;\
36: GEN t1 = z1 + imax, t2 = z2 + imax, T = z1 + imin;\
37: while (t1 > T) {\
38: _l = *t1--;\
39: *t2-- = (_l<<(ulong)sh) | _k;\
40: _k = _l>>(ulong)m;\
41: }\
42: *t2 = (*t1<<(ulong)sh) | _k;\
43: }
44: #define shift_left(z2,z1,imin,imax,f, sh) {\
45: register const ulong _m = BITS_IN_LONG - sh;\
46: shift_left2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
47: }
48:
1.2 ! noro 49: #define shift_words_r(target,source,source_end,prepend, sh, sh_complement) {\
! 50: register ulong _k,_l = *source++;\
! 51: *target++ = (_l>>(ulong)sh) | ((ulong)prepend<<(ulong)sh_complement);\
! 52: while (source < source_end) {\
! 53: _k = _l<<(ulong)sh_complement; _l = *source++;\
! 54: *target++ = (_l>>(ulong)sh) | _k;\
! 55: }\
! 56: }
1.1 noro 57: #define shift_right2(z2,z1,imin,imax,f, sh,m) {\
1.2 ! noro 58: register GEN s = (z1) + (imin), ta = (z2) + (imin), se = (z1) + (imax);\
! 59: shift_words_r(ta,s,se,(f),(sh),(m)); \
1.1 noro 60: }
1.2 ! noro 61: /* z2 := f.z1[imin..imax-1] shifted right sh bits (feeding f from the left) */
1.1 noro 62: #define shift_right(z2,z1,imin,imax,f, sh) {\
1.2 ! noro 63: register const ulong _m = BITS_IN_LONG - (sh);\
1.1 noro 64: shift_right2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
65: }
66:
67: #define MASK(x) (((ulong)(x)) & (LGEFINTBITS | SIGNBITS))
68: int
69: egalii(GEN x, GEN y)
70: {
71: long i;
72: if (MASK(x[1]) != MASK(y[1])) return 0;
73: i = lgefint(x)-1; while (i>1 && x[i]==y[i]) i--;
74: return i==1;
75: }
76: #undef MASK
77:
78: #ifdef INLINE
79: INLINE
80: #endif
81: GEN
82: addsispec(long s, GEN x, long nx)
83: {
84: GEN xd, zd = (GEN)avma;
85: long lz;
86: LOCAL_OVERFLOW;
87:
88: lz = nx+3; (void)new_chunk(lz);
89: xd = x + nx;
90: *--zd = addll(*--xd, s);
91: if (overflow)
92: for(;;)
93: {
94: if (xd == x) { *--zd = 1; break; } /* enlarge z */
95: *--zd = ((ulong)*--xd) + 1;
96: if (*zd) { lz--; break; }
97: }
98: else lz--;
99: while (xd > x) *--zd = *--xd;
100: *--zd = evalsigne(1) | evallgefint(lz);
101: *--zd = evaltyp(t_INT) | evallg(lz);
1.2 ! noro 102: avma=(gpmem_t)zd; return zd;
1.1 noro 103: }
104:
105: #define swapspec(x,y, nx,ny) {long _a=nx;GEN _z=x; nx=ny; ny=_a; x=y; y=_z;}
106:
107: #ifdef INLINE
108: INLINE
109: #endif
110: GEN
111: addiispec(GEN x, GEN y, long nx, long ny)
112: {
113: GEN xd,yd,zd;
114: long lz;
115: LOCAL_OVERFLOW;
116:
117: if (nx < ny) swapspec(x,y, nx,ny);
118: if (ny == 1) return addsispec(*y,x,nx);
119: zd = (GEN)avma;
120: lz = nx+3; (void)new_chunk(lz);
121: xd = x + nx;
122: yd = y + ny;
123: *--zd = addll(*--xd, *--yd);
124: while (yd > y) *--zd = addllx(*--xd, *--yd);
125: if (overflow)
126: for(;;)
127: {
128: if (xd == x) { *--zd = 1; break; } /* enlarge z */
129: *--zd = ((ulong)*--xd) + 1;
130: if (*zd) { lz--; break; }
131: }
132: else lz--;
133: while (xd > x) *--zd = *--xd;
134: *--zd = evalsigne(1) | evallgefint(lz);
135: *--zd = evaltyp(t_INT) | evallg(lz);
1.2 ! noro 136: avma=(gpmem_t)zd; return zd;
1.1 noro 137: }
138:
139: /* assume x >= y */
140: #ifdef INLINE
141: INLINE
142: #endif
143: GEN
144: subisspec(GEN x, long s, long nx)
145: {
146: GEN xd, zd = (GEN)avma;
147: long lz;
148: LOCAL_OVERFLOW;
149:
150: lz = nx+2; (void)new_chunk(lz);
151: xd = x + nx;
152: *--zd = subll(*--xd, s);
153: if (overflow)
154: for(;;)
155: {
156: *--zd = ((ulong)*--xd) - 1;
157: if (*xd) break;
158: }
159: if (xd == x)
160: while (*zd == 0) { zd++; lz--; } /* shorten z */
161: else
162: do *--zd = *--xd; while (xd > x);
163: *--zd = evalsigne(1) | evallgefint(lz);
164: *--zd = evaltyp(t_INT) | evallg(lz);
1.2 ! noro 165: avma=(gpmem_t)zd; return zd;
1.1 noro 166: }
167:
168: /* assume x > y */
169: #ifdef INLINE
170: INLINE
171: #endif
172: GEN
173: subiispec(GEN x, GEN y, long nx, long ny)
174: {
175: GEN xd,yd,zd;
176: long lz;
177: LOCAL_OVERFLOW;
178:
179: if (ny==1) return subisspec(x,*y,nx);
180: zd = (GEN)avma;
181: lz = nx+2; (void)new_chunk(lz);
182: xd = x + nx;
183: yd = y + ny;
184: *--zd = subll(*--xd, *--yd);
185: while (yd > y) *--zd = subllx(*--xd, *--yd);
186: if (overflow)
187: for(;;)
188: {
189: *--zd = ((ulong)*--xd) - 1;
190: if (*xd) break;
191: }
192: if (xd == x)
193: while (*zd == 0) { zd++; lz--; } /* shorten z */
194: else
195: do *--zd = *--xd; while (xd > x);
196: *--zd = evalsigne(1) | evallgefint(lz);
197: *--zd = evaltyp(t_INT) | evallg(lz);
1.2 ! noro 198: avma=(gpmem_t)zd; return zd;
1.1 noro 199: }
200:
201: #ifndef __M68K__
202:
203: /* prototype of positive small ints */
204: static long pos_s[] = {
1.2 ! noro 205: evaltyp(t_INT) | _evallg(3), evalsigne(1) | evallgefint(3), 0 };
1.1 noro 206:
207: /* prototype of negative small ints */
208: static long neg_s[] = {
1.2 ! noro 209: evaltyp(t_INT) | _evallg(3), evalsigne(-1) | evallgefint(3), 0 };
1.1 noro 210:
211: void
212: affir(GEN x, GEN y)
213: {
214: const long s=signe(x),ly=lg(y);
215: long lx,sh,i;
216:
217: if (!s)
218: {
219: y[1] = evalexpo(-bit_accuracy(ly));
1.2 ! noro 220: return;
1.1 noro 221: }
222:
1.2 ! noro 223: lx = lgefint(x); sh = bfffo(x[2]);
1.1 noro 224: y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
1.2 ! noro 225: if (sh) {
1.1 noro 226: if (lx>ly)
227: { shift_left(y,x,2,ly-1, x[ly],sh); }
228: else
229: {
230: for (i=lx; i<ly; i++) y[i]=0;
231: shift_left(y,x,2,lx-1, 0,sh);
232: }
233: }
1.2 ! noro 234: else {
! 235: if (lx>=ly)
! 236: for (i=2; i<ly; i++) y[i]=x[i];
! 237: else
! 238: {
! 239: for (i=2; i<lx; i++) y[i]=x[i];
! 240: for ( ; i<ly; i++) y[i]=0;
! 241: }
1.1 noro 242: }
243: }
244:
245: void
246: affrr(GEN x, GEN y)
247: {
248: long lx,ly,i;
249:
1.2 ! noro 250: y[1] = x[1]; if (!signe(x)) return;
1.1 noro 251:
252: lx=lg(x); ly=lg(y);
253: if (lx>=ly)
254: for (i=2; i<ly; i++) y[i]=x[i];
255: else
256: {
257: for (i=2; i<lx; i++) y[i]=x[i];
258: for ( ; i<ly; i++) y[i]=0;
259: }
260: }
261:
262: GEN
263: shifti(GEN x, long n)
264: {
1.2 ! noro 265: return shifti3(x, n, 0);
! 266: }
! 267:
! 268: GEN
! 269: shifti3(GEN x, long n, long flag)
! 270: {
1.1 noro 271: const long s=signe(x);
272: long lx,ly,i,m;
273: GEN y;
274:
275: if (!s) return gzero;
276: if (!n) return icopy(x);
277: lx = lgefint(x);
278: if (n > 0)
279: {
280: GEN z = (GEN)avma;
281: long d = n>>TWOPOTBITS_IN_LONG;
282:
283: ly = lx+d; y = new_chunk(ly);
284: for ( ; d; d--) *--z = 0;
285: m = n & (BITS_IN_LONG-1);
286: if (!m) for (i=2; i<lx; i++) y[i]=x[i];
287: else
288: {
289: register const ulong sh = BITS_IN_LONG - m;
290: shift_left2(y,x, 2,lx-1, 0,m,sh);
291: i = ((ulong)x[2]) >> sh;
292: if (i) { ly++; y = new_chunk(1); y[2] = i; }
293: }
294: }
295: else
296: {
1.2 ! noro 297: long lyorig;
! 298:
! 299: if (s > 0)
! 300: flag = 0;
1.1 noro 301: n = -n;
1.2 ! noro 302: ly = lyorig = lx - (n>>TWOPOTBITS_IN_LONG);
! 303: if (ly<3)
! 304: return flag ? stoi(-1) : gzero;
1.1 noro 305: y = new_chunk(ly);
306: m = n & (BITS_IN_LONG-1);
1.2 ! noro 307: if (m) {
1.1 noro 308: shift_right(y,x, 2,ly, 0,m);
309: if (y[2] == 0)
310: {
1.2 ! noro 311: if (ly==3) { avma = (gpmem_t)(y+3); return flag ? stoi(-1) : gzero; }
! 312: ly--; avma = (gpmem_t)(++y);
! 313: }
! 314: } else {
! 315: for (i=2; i<ly; i++) y[i]=x[i];
! 316: }
! 317: /* With FLAG: round up instead of rounding down */
! 318: if (flag) { /* Check low bits of x */
! 319: i = lx; flag = 0;
! 320: while (--i >= lyorig)
! 321: if (x[i]) { flag = 1; break; } /* Need to increment y by 1 */
! 322: if (!flag && m)
! 323: flag = x[lyorig - 1] & ((1<<m) - 1);
! 324: }
! 325: if (flag) { /* Increment y */
! 326: for (i = ly;;)
! 327: {
! 328: if (--i < 2)
! 329: { /* Extend y on the left */
! 330: if (avma <= bot) err(errpile);
! 331: avma = (gpmem_t)(--y); ly++;
! 332: y[2] = 1; break;
! 333: }
! 334: if (++y[i]) break;
! 335: /* Now propagate the bit into the next longword */
1.1 noro 336: }
337: }
338: }
339: y[1] = evalsigne(s)|evallgefint(ly);
340: y[0] = evaltyp(t_INT)|evallg(ly); return y;
341: }
342:
343: GEN
344: mptrunc(GEN x)
345: {
346: long d,e,i,s,m;
347: GEN y;
348:
349: if (typ(x)==t_INT) return icopy(x);
350: if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gzero;
351: d = (e>>TWOPOTBITS_IN_LONG) + 3;
352: m = e & (BITS_IN_LONG-1);
353: if (d > lg(x)) err(precer, "mptrunc (precision loss in truncation)");
354:
355: y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
356: if (++m == BITS_IN_LONG)
357: for (i=2; i<d; i++) y[i]=x[i];
358: else
359: {
360: register const ulong sh = BITS_IN_LONG - m;
361: shift_right2(y,x, 2,d,0, sh,m);
362: }
363: return y;
364: }
365:
366: /* integral part */
367: GEN
368: mpent(GEN x)
369: {
370: long d,e,i,lx,m;
371: GEN y;
372:
373: if (typ(x)==t_INT) return icopy(x);
374: if (signe(x) >= 0) return mptrunc(x);
375: if ((e=expo(x)) < 0) return stoi(-1);
376: d = (e>>TWOPOTBITS_IN_LONG) + 3;
377: m = e & (BITS_IN_LONG-1);
378: lx=lg(x); if (d>lx) err(precer, "mpent (precision loss in trucation)");
379: y = new_chunk(d);
380: if (++m == BITS_IN_LONG)
381: {
382: for (i=2; i<d; i++) y[i]=x[i];
383: i=d; while (i<lx && !x[i]) i++;
384: if (i==lx) goto END;
385: }
386: else
387: {
388: register const ulong sh = BITS_IN_LONG - m;
389: shift_right2(y,x, 2,d,0, sh,m);
390: if (x[d-1]<<m == 0)
391: {
392: i=d; while (i<lx && !x[i]) i++;
393: if (i==lx) goto END;
394: }
395: }
396: /* set y:=y+1 */
397: for (i=d-1; i>=2; i--) { y[i]++; if (y[i]) goto END; }
398: y=new_chunk(1); y[2]=1; d++;
399: END:
400: y[1] = evalsigne(-1) | evallgefint(d);
401: y[0] = evaltyp(t_INT) | evallg(d); return y;
402: }
403:
404: int
405: cmpsi(long x, GEN y)
406: {
407: ulong p;
408:
409: if (!x) return -signe(y);
410:
411: if (x>0)
412: {
413: if (signe(y)<=0) return 1;
414: if (lgefint(y)>3) return -1;
415: p=y[2]; if (p == (ulong)x) return 0;
416: return p < (ulong)x ? 1 : -1;
417: }
418:
419: if (signe(y)>=0) return -1;
420: if (lgefint(y)>3) return 1;
421: p=y[2]; if (p == (ulong)-x) return 0;
422: return p < (ulong)(-x) ? -1 : 1;
423: }
424:
425: int
426: cmpii(GEN x, GEN y)
427: {
428: const long sx = signe(x), sy = signe(y);
429: long lx,ly,i;
430:
431: if (sx<sy) return -1;
432: if (sx>sy) return 1;
433: if (!sx) return 0;
434:
435: lx=lgefint(x); ly=lgefint(y);
436: if (lx>ly) return sx;
437: if (lx<ly) return -sx;
438: i=2; while (i<lx && x[i]==y[i]) i++;
439: if (i==lx) return 0;
440: return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
441: }
442:
443: int
444: cmprr(GEN x, GEN y)
445: {
446: const long sx = signe(x), sy = signe(y);
447: long ex,ey,lx,ly,lz,i;
448:
449: if (sx<sy) return -1;
450: if (sx>sy) return 1;
451: if (!sx) return 0;
452:
453: ex=expo(x); ey=expo(y);
454: if (ex>ey) return sx;
455: if (ex<ey) return -sx;
456:
457: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
458: i=2; while (i<lz && x[i]==y[i]) i++;
459: if (i<lz) return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
460: if (lx>=ly)
461: {
462: while (i<lx && !x[i]) i++;
463: return (i==lx) ? 0 : sx;
464: }
465: while (i<ly && !y[i]) i++;
466: return (i==ly) ? 0 : -sx;
467: }
468:
469: GEN
470: addss(long x, long y)
471: {
472: if (!x) return stoi(y);
473: if (x>0) { pos_s[2] = x; return addsi(y,pos_s); }
474: neg_s[2] = -x; return addsi(y,neg_s);
475: }
476:
477: GEN
478: addsi(long x, GEN y)
479: {
480: long sx,sy,ly;
481: GEN z;
482:
483: if (!x) return icopy(y);
484: sy=signe(y); if (!sy) return stoi(x);
485: if (x<0) { sx=-1; x=-x; } else sx=1;
486: if (sx==sy)
487: {
488: z = addsispec(x,y+2, lgefint(y)-2);
489: setsigne(z,sy); return z;
490: }
491: ly=lgefint(y);
492: if (ly==3)
493: {
494: const long d = y[2] - x;
495: if (!d) return gzero;
496: z=cgeti(3);
497: if (y[2] < 0 || d > 0) {
498: z[1] = evalsigne(sy) | evallgefint(3);
499: z[2] = d;
500: }
501: else {
502: z[1] = evalsigne(-sy) | evallgefint(3);
503: z[2] =-d;
504: }
505: return z;
506: }
507: z = subisspec(y+2,x, ly-2);
508: setsigne(z,sy); return z;
509: }
510:
511: GEN
512: addii(GEN x, GEN y)
513: {
514: long sx = signe(x);
515: long sy = signe(y);
516: long lx,ly;
517: GEN z;
518:
519: if (!sx) return sy? icopy(y): gzero;
520: if (!sy) return icopy(x);
521: lx=lgefint(x); ly=lgefint(y);
522:
523: if (sx==sy)
524: z = addiispec(x+2,y+2,lx-2,ly-2);
525: else
526: { /* sx != sy */
527: long i = lx - ly;
528: if (i==0)
529: {
530: i = absi_cmp(x,y);
531: if (!i) return gzero;
532: }
533: if (i<0) { sx=sy; swapspec(x,y, lx,ly); } /* ensure |x| >= |y| */
534: z = subiispec(x+2,y+2,lx-2,ly-2);
535: }
536: setsigne(z,sx); return z;
537: }
538:
539: GEN
540: addsr(long x, GEN y)
541: {
542: if (!x) return rcopy(y);
543: if (x>0) { pos_s[2]=x; return addir(pos_s,y); }
544: neg_s[2] = -x; return addir(neg_s,y);
545: }
546:
547: GEN
548: addir(GEN x, GEN y)
549: {
550: long e,l,ly;
551: GEN z;
552:
553: if (!signe(x)) return rcopy(y);
554: e = expo(y)-expi(x);
555: if (!signe(y))
556: {
557: #if 0
558: if (e>0) err(adder3);
559: #else /* design issue: make 0.0 "absorbing" */
560: if (e>0) return rcopy(y);
561: #endif
1.2 ! noro 562: return itor(x, 3 + ((-e)>>TWOPOTBITS_IN_LONG));
1.1 noro 563: }
564:
565: ly=lg(y);
1.2 ! noro 566: if (e > 0)
1.1 noro 567: {
568: l = ly - (e>>TWOPOTBITS_IN_LONG);
569: if (l<3) return rcopy(y);
570: }
571: else l = ly + ((-e)>>TWOPOTBITS_IN_LONG)+1;
1.2 ! noro 572: y = addrr(itor(x,l), y);
1.1 noro 573: z = y+l; ly = lg(y); while (ly--) z[ly] = y[ly];
1.2 ! noro 574: avma=(gpmem_t)z; return z;
1.1 noro 575: }
576:
577: GEN
578: addrr(GEN x, GEN y)
579: {
580: long sx=signe(x),sy=signe(y),ex=expo(x),ey=expo(y);
581: long e,m,flag,i,j,f2,lx,ly,lz;
582: GEN z;
583: LOCAL_OVERFLOW;
584:
585: e = ey-ex;
586: if (!sy)
587: {
588: if (!sx)
589: {
590: if (e > 0) ex=ey;
1.2 ! noro 591: return realzero_bit(ex);
1.1 noro 592: }
1.2 ! noro 593: if (e > 0) return realzero_bit(ey);
1.1 noro 594: lz = 3 + ((-e)>>TWOPOTBITS_IN_LONG);
595: lx = lg(x); if (lz>lx) lz=lx;
596: z = cgetr(lz); while(--lz) z[lz]=x[lz];
597: return z;
598: }
599: if (!sx)
600: {
1.2 ! noro 601: if (e < 0) return realzero_bit(ex);
1.1 noro 602: lz = 3 + (e>>TWOPOTBITS_IN_LONG);
603: ly = lg(y); if (lz>ly) lz=ly;
604: z = cgetr(lz); while (--lz) z[lz]=y[lz];
605: return z;
606: }
607:
608: if (e < 0) { z=x; x=y; y=z; ey=ex; i=sx; sx=sy; sy=i; e=-e; }
609: /* now ey >= ex */
610: lx=lg(x); ly=lg(y);
611: if (e)
612: {
613: long d = e >> TWOPOTBITS_IN_LONG, l = ly-d;
614: if (l > lx) { flag=0; lz=lx+d+1; }
615: else if (l > 2) { flag=1; lz=ly; lx=l; }
616: else return rcopy(y);
617: m = e & (BITS_IN_LONG-1);
618: }
619: else
620: {
621: if (lx > ly) lx = ly;
622: flag=2; lz=lx; m=0;
623: }
624: z = cgetr(lz);
625: if (m)
626: { /* shift right x m bits */
627: const ulong sh = BITS_IN_LONG-m;
628: GEN p1 = x; x = new_chunk(lx+1);
629: shift_right2(x,p1,2,lx, 0,m,sh);
630: if (flag==0)
631: {
632: x[lx] = p1[lx-1] << sh;
633: if (x[lx]) { flag = 3; lx++; }
634: }
635: }
636:
637: if (sx==sy)
638: { /* addition */
1.2 ! noro 639: i=lz-1; avma = (gpmem_t)z;
1.1 noro 640: if (flag==0) { z[i] = y[i]; i--; }
641: overflow=0;
642: for (j=lx-1; j>=2; i--,j--) z[i] = addllx(x[j],y[i]);
643:
644: if (overflow)
645: for (;;)
646: {
647: if (i==1)
648: {
649: shift_right(z,z, 2,lz, 1,1);
1.2 ! noro 650: z[1] = evalsigne(sx) | evalexpo(ey+1); return z;
1.1 noro 651: }
652: z[i] = y[i]+1; if (z[i--]) break;
653: }
654: for (; i>=2; i--) z[i]=y[i];
655: z[1] = evalsigne(sx) | evalexpo(ey); return z;
656: }
657:
658: /* subtraction */
659: if (e) f2 = 1;
660: else
661: {
662: i=2; while (i<lx && x[i]==y[i]) i++;
663: if (i==lx)
664: {
1.2 ! noro 665: avma = (gpmem_t)(z+lz);
! 666: return realzero_bit(ey - bit_accuracy(lx));
1.1 noro 667: }
668: f2 = ((ulong)y[i] > (ulong)x[i]);
669: }
670:
671: /* result is non-zero. f2 = (y > x) */
672: i=lz-1;
673: if (f2)
674: {
675: if (flag==0) { z[i] = y[i]; i--; }
676: j=lx-1; z[i] = subll(y[i],x[j]); i--; j--;
677: for (; j>=2; i--,j--) z[i] = subllx(y[i],x[j]);
678: if (overflow)
679: for (;;) { z[i] = y[i]-1; if (y[i--]) break; }
680: for (; i>=2; i--) z[i]=y[i];
681: sx = sy;
682: }
683: else
684: {
685: if (flag==0) { z[i] = -y[i]; i--; overflow=1; } else overflow=0;
686: for (; i>=2; i--) z[i]=subllx(x[i],y[i]);
687: }
688:
689: x = z+2; i=0; while (!x[i]) i++;
690: lz -= i; z += i; m = bfffo(z[2]);
691: if (m) shift_left(z,z,2,lz-1, 0,m);
1.2 ! noro 692: z[1] = evalsigne(sx) | evalexpo(ey - (m | (i<<TWOPOTBITS_IN_LONG)));
1.1 noro 693: z[0] = evaltyp(t_REAL) | evallg(lz);
1.2 ! noro 694: avma = (gpmem_t)z; return z;
1.1 noro 695: }
696:
697: GEN
698: mulss(long x, long y)
699: {
700: long s,p1;
701: GEN z;
702: LOCAL_HIREMAINDER;
703:
704: if (!x || !y) return gzero;
705: if (x<0) { s = -1; x = -x; } else s=1;
706: if (y<0) { s = -s; y = -y; }
707: p1 = mulll(x,y);
708: if (hiremainder)
709: {
710: z=cgeti(4); z[1] = evalsigne(s) | evallgefint(4);
711: z[2]=hiremainder; z[3]=p1; return z;
712: }
713: z=cgeti(3); z[1] = evalsigne(s) | evallgefint(3);
714: z[2]=p1; return z;
715: }
716: #endif
717:
718: GEN
719: muluu(ulong x, ulong y)
720: {
721: long p1;
722: GEN z;
723: LOCAL_HIREMAINDER;
724:
725: if (!x || !y) return gzero;
726: p1 = mulll(x,y);
727: if (hiremainder)
728: {
729: z=cgeti(4); z[1] = evalsigne(1) | evallgefint(4);
730: z[2]=hiremainder; z[3]=p1; return z;
731: }
732: z=cgeti(3); z[1] = evalsigne(1) | evallgefint(3);
733: z[2]=p1; return z;
734: }
735:
736: /* assume ny > 0 */
737: #ifdef INLINE
738: INLINE
739: #endif
740: GEN
741: mulsispec(long x, GEN y, long ny)
742: {
743: GEN yd, z = (GEN)avma;
744: long lz = ny+3;
745: LOCAL_HIREMAINDER;
746:
747: (void)new_chunk(lz);
748: yd = y + ny; *--z = mulll(x, *--yd);
749: while (yd > y) *--z = addmul(x,*--yd);
750: if (hiremainder) *--z = hiremainder; else lz--;
751: *--z = evalsigne(1) | evallgefint(lz);
752: *--z = evaltyp(t_INT) | evallg(lz);
1.2 ! noro 753: avma=(gpmem_t)z; return z;
1.1 noro 754: }
755:
756: GEN
757: mului(ulong x, GEN y)
758: {
759: long s = signe(y);
760: GEN z;
761:
762: if (!s || !x) return gzero;
763: z = mulsispec(x, y+2, lgefint(y)-2);
764: setsigne(z,s); return z;
765: }
766:
767: /* a + b*Y, assume Y >= 0, 0 <= a,b <= VERYBIGINT */
768: GEN
769: addsmulsi(long a, long b, GEN Y)
770: {
771: GEN yd,y,z;
772: long ny,lz;
773: LOCAL_HIREMAINDER;
774: LOCAL_OVERFLOW;
775:
776: if (!signe(Y)) return stoi(a);
777:
778: y = Y+2; z = (GEN)avma;
779: ny = lgefint(Y)-2;
780: lz = ny+3;
781:
782: (void)new_chunk(lz);
783: yd = y + ny; *--z = addll(a, mulll(b, *--yd));
784: if (overflow) hiremainder++; /* can't overflow */
785: while (yd > y) *--z = addmul(b,*--yd);
786: if (hiremainder) *--z = hiremainder; else lz--;
787: *--z = evalsigne(1) | evallgefint(lz);
788: *--z = evaltyp(t_INT) | evallg(lz);
1.2 ! noro 789: avma=(gpmem_t)z; return z;
1.1 noro 790: }
791:
792: #ifndef __M68K__
793:
794: GEN
795: mulsi(long x, GEN y)
796: {
797: long s = signe(y);
798: GEN z;
799:
800: if (!s || !x) return gzero;
801: if (x<0) { s = -s; x = -x; }
802: z = mulsispec(x, y+2, lgefint(y)-2);
803: setsigne(z,s); return z;
804: }
805:
806: GEN
807: mulsr(long x, GEN y)
808: {
809: long lx,i,s,garde,e,sh,m;
810: GEN z;
811: LOCAL_HIREMAINDER;
812:
813: if (!x) return gzero;
814: s = signe(y);
815: if (!s)
816: {
817: if (x<0) x = -x;
1.2 ! noro 818: e = expo(y) + (BITS_IN_LONG-1)-bfffo(x);
! 819: return realzero_bit(e);
1.1 noro 820: }
821: if (x<0) { s = -s; x = -x; }
822: if (x==1) { z=rcopy(y); setsigne(z,s); return z; }
823:
824: lx = lg(y); e = expo(y);
825: z=cgetr(lx); y--; garde=mulll(x,y[lx]);
826: for (i=lx-1; i>=3; i--) z[i]=addmul(x,y[i]);
827: z[2]=hiremainder;
828:
829: sh = bfffo(hiremainder); m = BITS_IN_LONG-sh;
830: if (sh) shift_left2(z,z, 2,lx-1, garde,sh,m);
1.2 ! noro 831: z[1] = evalsigne(s) | evalexpo(m+e); return z;
1.1 noro 832: }
833:
834: GEN
835: mulrr(GEN x, GEN y)
836: {
837: long sx = signe(x), sy = signe(y);
838: long i,j,ly,lz,lzz,e,flag,p1;
839: ulong garde;
840: GEN z, y1;
841: LOCAL_HIREMAINDER;
842: LOCAL_OVERFLOW;
843:
1.2 ! noro 844: e = expo(x)+expo(y);
! 845: if (!sx || !sy) return realzero_bit(e);
1.1 noro 846: if (sy<0) sx = -sx;
847: lz=lg(x); ly=lg(y);
848: if (lz>ly) { lz=ly; z=x; x=y; y=z; flag=1; } else flag = (lz!=ly);
1.2 ! noro 849: z=cgetr(lz);
1.1 noro 850: if (lz==3)
851: {
852: if (flag)
853: {
854: (void)mulll(x[2],y[3]);
855: garde = addmul(x[2],y[2]);
856: }
857: else
858: garde = mulll(x[2],y[2]);
1.2 ! noro 859: if ((long)hiremainder<0) { z[2]=hiremainder; e++; }
1.1 noro 860: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
1.2 ! noro 861: z[1] = evalsigne(sx)|evalexpo(e);
1.1 noro 862: return z;
863: }
864:
865: if (flag) { (void)mulll(x[2],y[lz]); garde = hiremainder; } else garde=0;
866: lzz=lz-1; p1=x[lzz];
867: if (p1)
868: {
869: (void)mulll(p1,y[3]);
870: garde = addll(addmul(p1,y[2]), garde);
871: z[lzz] = overflow+hiremainder;
872: }
873: else z[lzz]=0;
874: for (j=lz-2, y1=y-j; j>=3; j--)
875: {
876: p1 = x[j]; y1++;
877: if (p1)
878: {
879: (void)mulll(p1,y1[lz+1]);
880: garde = addll(addmul(p1,y1[lz]), garde);
881: for (i=lzz; i>j; i--)
882: {
883: hiremainder += overflow;
884: z[i] = addll(addmul(p1,y1[i]), z[i]);
885: }
886: z[j] = hiremainder+overflow;
887: }
888: else z[j]=0;
889: }
890: p1=x[2]; y1++;
891: garde = addll(mulll(p1,y1[lz]), garde);
892: for (i=lzz; i>2; i--)
893: {
894: hiremainder += overflow;
895: z[i] = addll(addmul(p1,y1[i]), z[i]);
896: }
897: z[2] = hiremainder+overflow;
1.2 ! noro 898: if (z[2] < 0) e++; else shift_left(z,z,2,lzz,garde, 1);
! 899: z[1] = evalsigne(sx) | evalexpo(e);
1.1 noro 900: return z;
901: }
902:
903: GEN
904: mulir(GEN x, GEN y)
905: {
906: long sx=signe(x),sy,lz,lzz,ey,e,p1,i,j;
907: ulong garde;
908: GEN z,y1;
909: LOCAL_HIREMAINDER;
910: LOCAL_OVERFLOW;
911:
912: if (!sx) return gzero;
913: if (!is_bigint(x)) return mulsr(itos(x),y);
914: sy=signe(y); ey=expo(y);
1.2 ! noro 915: if (!sy) return realzero_bit(expi(x)+ey);
1.1 noro 916:
917: if (sy<0) sx = -sx;
918: lz=lg(y); z=cgetr(lz);
1.2 ! noro 919: y1 = itor(x, lz+1); x = y; y = y1;
! 920: e = expo(y)+ey;
1.1 noro 921: if (lz==3)
922: {
923: (void)mulll(x[2],y[3]);
924: garde=addmul(x[2],y[2]);
1.2 ! noro 925: if ((long)hiremainder < 0) { z[2]=hiremainder; e++; }
1.1 noro 926: else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
1.2 ! noro 927: z[1] = evalsigne(sx) | evalexpo(e);
! 928: avma=(gpmem_t)z; return z;
1.1 noro 929: }
930:
931: (void)mulll(x[2],y[lz]); garde=hiremainder;
932: lzz=lz-1; p1=x[lzz];
933: if (p1)
934: {
935: (void)mulll(p1,y[3]);
936: garde=addll(addmul(p1,y[2]),garde);
937: z[lzz] = overflow+hiremainder;
938: }
939: else z[lzz]=0;
940: for (j=lz-2, y1=y-j; j>=3; j--)
941: {
942: p1=x[j]; y1++;
943: if (p1)
944: {
945: (void)mulll(p1,y1[lz+1]);
946: garde = addll(addmul(p1,y1[lz]), garde);
947: for (i=lzz; i>j; i--)
948: {
949: hiremainder += overflow;
950: z[i] = addll(addmul(p1,y1[i]), z[i]);
951: }
952: z[j] = hiremainder+overflow;
953: }
954: else z[j]=0;
955: }
956: p1=x[2]; y1++;
957: garde = addll(mulll(p1,y1[lz]), garde);
958: for (i=lzz; i>2; i--)
959: {
960: hiremainder += overflow;
961: z[i] = addll(addmul(p1,y1[i]), z[i]);
962: }
963: z[2] = hiremainder+overflow;
1.2 ! noro 964: if (z[2] < 0) e++;
1.1 noro 965: else
966: shift_left(z,z,2,lzz,garde, 1);
1.2 ! noro 967: z[1] = evalsigne(sx) | evalexpo(e);
! 968: avma=(gpmem_t)z; return z;
1.1 noro 969: }
970:
971: /* written by Bruno Haible following an idea of Robert Harley */
972: long
973: vals(ulong z)
974: {
975: static char tab[64]={-1,0,1,12,2,6,-1,13,3,-1,7,-1,-1,-1,-1,14,10,4,-1,-1,8,-1,-1,25,-1,-1,-1,-1,-1,21,27,15,31,11,5,-1,-1,-1,-1,-1,9,-1,-1,24,-1,-1,20,26,30,-1,-1,-1,-1,23,-1,19,29,-1,22,18,28,17,16,-1};
976: #ifdef LONG_IS_64BIT
977: long s;
978: #endif
979:
980: if (!z) return -1;
981: #ifdef LONG_IS_64BIT
982: if (! (z&0xffffffff)) { s = 32; z >>=32; } else s = 0;
983: #endif
984: z |= ~z + 1;
985: z += z << 4;
986: z += z << 6;
987: z ^= z << 16; /* or z -= z<<16 */
988: #ifdef LONG_IS_64BIT
989: return s + tab[(z&0xffffffff)>>26];
990: #else
991: return tab[z>>26];
992: #endif
993: }
994:
995: GEN
996: modss(long x, long y)
997: {
998: LOCAL_HIREMAINDER;
999:
1000: if (!y) err(moder1);
1001: if (y<0) y=-y;
1.2 ! noro 1002: hiremainder=0; (void)divll(labs(x),y);
1.1 noro 1003: if (!hiremainder) return gzero;
1004: return (x < 0) ? stoi(y-hiremainder) : stoi(hiremainder);
1005: }
1006:
1007: GEN
1008: resss(long x, long y)
1009: {
1010: LOCAL_HIREMAINDER;
1011:
1012: if (!y) err(reser1);
1.2 ! noro 1013: hiremainder=0; (void)divll(labs(x),labs(y));
1.1 noro 1014: return (x < 0) ? stoi(-((long)hiremainder)) : stoi(hiremainder);
1015: }
1016:
1017: GEN
1018: divsi(long x, GEN y)
1019: {
1020: long p1, s = signe(y);
1021: LOCAL_HIREMAINDER;
1022:
1023: if (!s) err(diver2);
1024: if (!x || lgefint(y)>3 || ((long)y[2])<0)
1025: {
1026: hiremainder=x; SAVE_HIREMAINDER; return gzero;
1027: }
1028: hiremainder=0; p1=divll(labs(x),y[2]);
1029: if (x<0) { hiremainder = -((long)hiremainder); p1 = -p1; }
1030: if (s<0) p1 = -p1;
1031: SAVE_HIREMAINDER; return stoi(p1);
1032: }
1033: #endif
1034:
1035: GEN
1036: modui(ulong x, GEN y)
1037: {
1038: LOCAL_HIREMAINDER;
1039:
1040: if (!signe(y)) err(diver2);
1041: if (!x || lgefint(y)>3) hiremainder=x;
1042: else
1043: {
1044: hiremainder=0; (void)divll(x,y[2]);
1045: }
1046: return utoi(hiremainder);
1047: }
1048:
1049: ulong
1050: umodiu(GEN y, ulong x)
1051: {
1052: long sy=signe(y),ly,i;
1053: LOCAL_HIREMAINDER;
1054:
1055: if (!x) err(diver4);
1056: if (!sy) return 0;
1057: ly = lgefint(y);
1058: if (x <= (ulong)y[2]) hiremainder=0;
1059: else
1060: {
1061: if (ly==3) return (sy > 0)? (ulong)y[2]: x - (ulong)y[2];
1062: hiremainder=y[2]; ly--; y++;
1063: }
1064: for (i=2; i<ly; i++) (void)divll(y[i],x);
1065: if (!hiremainder) return 0;
1066: return (sy > 0)? hiremainder: x - hiremainder;
1067: }
1068:
1069: GEN
1070: modiu(GEN y, ulong x) { return utoi(umodiu(y,x)); }
1071:
1.2 ! noro 1072: /* return |y| \/ x */
! 1073: GEN
! 1074: diviu(GEN y, ulong x)
! 1075: {
! 1076: long sy=signe(y),ly,i;
! 1077: GEN z;
! 1078: LOCAL_HIREMAINDER;
! 1079:
! 1080: if (!x) err(diver4);
! 1081: if (!sy) { hiremainder=0; SAVE_HIREMAINDER; return gzero; }
! 1082:
! 1083: ly = lgefint(y);
! 1084: if (x <= (ulong)y[2]) hiremainder=0;
! 1085: else
! 1086: {
! 1087: if (ly==3) { hiremainder=itou(y); SAVE_HIREMAINDER; return gzero; }
! 1088: hiremainder=y[2]; ly--; y++;
! 1089: }
! 1090: z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(1);
! 1091: for (i=2; i<ly; i++) z[i]=divll(y[i],x);
! 1092: SAVE_HIREMAINDER; return z;
! 1093: }
! 1094:
1.1 noro 1095: #ifndef __M68K__
1096:
1097: GEN
1098: modsi(long x, GEN y)
1099: {
1100: long s = signe(y);
1101: GEN p1;
1102: LOCAL_HIREMAINDER;
1103:
1104: if (!s) err(diver2);
1105: if (!x || lgefint(y)>3 || ((long)y[2])<0) hiremainder=x;
1106: else
1107: {
1108: hiremainder=0; (void)divll(labs(x),y[2]);
1109: if (x<0) hiremainder = -((long)hiremainder);
1110: }
1111: if (!hiremainder) return gzero;
1112: if (x>0) return stoi(hiremainder);
1113: if (s<0)
1114: { setsigne(y,1); p1=addsi(hiremainder,y); setsigne(y,-1); }
1115: else
1116: p1=addsi(hiremainder,y);
1117: return p1;
1118: }
1119:
1120: GEN
1121: divis(GEN y, long x)
1122: {
1123: long sy=signe(y),ly,s,i;
1124: GEN z;
1125: LOCAL_HIREMAINDER;
1126:
1127: if (!x) err(diver4);
1128: if (!sy) { hiremainder=0; SAVE_HIREMAINDER; return gzero; }
1129: if (x<0) { s = -sy; x = -x; } else s=sy;
1130:
1131: ly = lgefint(y);
1132: if ((ulong)x <= (ulong)y[2]) hiremainder=0;
1133: else
1134: {
1135: if (ly==3) { hiremainder=itos(y); SAVE_HIREMAINDER; return gzero; }
1136: hiremainder=y[2]; ly--; y++;
1137: }
1138: z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
1139: for (i=2; i<ly; i++) z[i]=divll(y[i],x);
1140: if (sy<0) hiremainder = - ((long)hiremainder);
1141: SAVE_HIREMAINDER; return z;
1142: }
1143:
1144: GEN
1145: divir(GEN x, GEN y)
1146: {
1.2 ! noro 1147: GEN z;
! 1148: long ly;
! 1149: gpmem_t av;
1.1 noro 1150:
1151: if (!signe(y)) err(diver5);
1152: if (!signe(x)) return gzero;
1.2 ! noro 1153: ly = lg(y); z = cgetr(ly); av = avma;
! 1154: affrr(divrr(itor(x, ly+1), y), z);
! 1155: avma = av; return z;
1.1 noro 1156: }
1157:
1158: GEN
1159: divri(GEN x, GEN y)
1160: {
1.2 ! noro 1161: long lx, s = signe(y);
! 1162: gpmem_t av;
! 1163: GEN z;
1.1 noro 1164:
1165: if (!s) err(diver8);
1.2 ! noro 1166: if (!signe(x)) return realzero_bit(expo(x) - expi(y));
1.1 noro 1167: if (!is_bigint(y)) return divrs(x, s>0? y[2]: -y[2]);
1168:
1.2 ! noro 1169: lx = lg(x); z = cgetr(lx); av = avma;
! 1170: affrr(divrr(x, itor(y, lx+1)), z);
! 1171: avma = av; return z;
1.1 noro 1172: }
1173:
1174: void
1175: diviiz(GEN x, GEN y, GEN z)
1176: {
1.2 ! noro 1177: gpmem_t av = avma;
! 1178: if (typ(z) == t_INT) affii(divii(x,y), z);
! 1179: else {
! 1180: long lz = lg(z);
! 1181: affrr(divrr(itor(x,lz), itor(y,lz)), z);
! 1182: }
! 1183: avma = av;
1.1 noro 1184: }
1185:
1186: void
1187: mpdivz(GEN x, GEN y, GEN z)
1188: {
1.2 ! noro 1189: gpmem_t av = avma;
! 1190: GEN r;
1.1 noro 1191:
1192: if (typ(z)==t_INT)
1193: {
1.2 ! noro 1194: if (typ(x) == t_REAL || typ(y) == t_REAL) err(divzer1);
! 1195: affii(divii(x,y), z);
! 1196: avma = av; return;
1.1 noro 1197: }
1.2 ! noro 1198:
! 1199: if (typ(x) == t_INT)
1.1 noro 1200: {
1.2 ! noro 1201: if (typ(y) == t_REAL)
! 1202: r = divir(x,y);
! 1203: else
! 1204: {
! 1205: long lz = lg(z);
! 1206: r = divrr(itor(x,lz), itor(y,lz));
! 1207: }
1.1 noro 1208: }
1.2 ! noro 1209: else if (typ(y) == t_REAL)
! 1210: r = divrr(x,y);
! 1211: else
! 1212: r = divri(x,y);
! 1213: affrr(r, z); avma = av;
1.1 noro 1214: }
1215:
1216: GEN
1217: divsr(long x, GEN y)
1218: {
1.2 ! noro 1219: gpmem_t av;
! 1220: long ly;
! 1221: GEN z;
1.1 noro 1222:
1223: if (!signe(y)) err(diver3);
1224: if (!x) return gzero;
1.2 ! noro 1225: ly = lg(y); z = cgetr(ly); av = avma;
! 1226: affrr(divrr(stor(x,ly+1), y), z);
! 1227: avma = av; return z;
1.1 noro 1228: }
1229:
1230: GEN
1231: modii(GEN x, GEN y)
1232: {
1233: switch(signe(x))
1234: {
1235: case 0: return gzero;
1236: case 1: return resii(x,y);
1237: default:
1238: {
1.2 ! noro 1239: gpmem_t av = avma;
1.1 noro 1240: (void)new_chunk(lgefint(y));
1241: x = resii(x,y); avma=av;
1242: if (x==gzero) return x;
1243: return subiispec(y+2,x+2,lgefint(y)-2,lgefint(x)-2);
1244: }
1245: }
1246: }
1247:
1248: void
1249: modiiz(GEN x, GEN y, GEN z)
1250: {
1.2 ! noro 1251: const gpmem_t av = avma;
1.1 noro 1252: affii(modii(x,y),z); avma=av;
1253: }
1254:
1255: GEN
1256: divrs(GEN x, long y)
1257: {
1.2 ! noro 1258: long i,lx,garde,sh,s=signe(x);
1.1 noro 1259: GEN z;
1260: LOCAL_HIREMAINDER;
1261:
1262: if (!y) err(diver6);
1.2 ! noro 1263: if (!s) return realzero_bit(expo(x) - (BITS_IN_LONG-1)+bfffo(y));
1.1 noro 1264: if (y<0) { s = -s; y = -y; }
1265: if (y==1) { z=rcopy(x); setsigne(z,s); return z; }
1266:
1267: z=cgetr(lx=lg(x)); hiremainder=0;
1268: for (i=2; i<lx; i++) z[i]=divll(x[i],y);
1269:
1270: /* we may have hiremainder != 0 ==> garde */
1.2 ! noro 1271: garde=divll(0,y); sh=bfffo(z[2]);
1.1 noro 1272: if (sh) shift_left(z,z, 2,lx-1, garde,sh);
1.2 ! noro 1273: z[1] = evalsigne(s) | evalexpo(expo(x)-sh);
1.1 noro 1274: return z;
1275: }
1276:
1277: GEN
1278: divrr(GEN x, GEN y)
1279: {
1280: long sx=signe(x), sy=signe(y), lx,ly,lz,e,i,j;
1281: ulong si,saux;
1282: GEN z,x1;
1283:
1284: if (!sy) err(diver9);
1.2 ! noro 1285: e = expo(x) - expo(y);
! 1286: if (!sx) return realzero_bit(e);
1.1 noro 1287: if (sy<0) sx = -sx;
1288: lx=lg(x); ly=lg(y);
1289: if (ly==3)
1290: {
1291: ulong k = x[2], l = (lx>3)? x[3]: 0;
1292: LOCAL_HIREMAINDER;
1293: if (k < (ulong)y[2]) e--;
1294: else
1295: {
1296: l >>= 1; if (k&1) l |= HIGHBIT;
1297: k >>= 1;
1298: }
1.2 ! noro 1299: z = cgetr(3); z[1] = evalsigne(sx) | evalexpo(e);
1.1 noro 1300: hiremainder=k; z[2]=divll(l,y[2]); return z;
1301: }
1302:
1.2 ! noro 1303: lz = min(lx,ly); z = new_chunk(lz);
! 1304: x1 = z-1;
1.1 noro 1305: x1[1]=0; for (i=2; i<lz; i++) x1[i]=x[i];
1306: x1[lz] = (lx>lz)? x[lz]: 0;
1307: x=z; si=y[2]; saux=y[3];
1308: for (i=0; i<lz-1; i++)
1309: { /* x1 = x + (i-1) */
1310: ulong k,qp;
1311: LOCAL_HIREMAINDER;
1312: LOCAL_OVERFLOW;
1313: if ((ulong)x1[1] == si)
1314: {
1315: qp = MAXULONG; k=addll(si,x1[2]);
1316: }
1317: else
1318: {
1319: if ((ulong)x1[1] > si) /* can't happen if i=0 */
1320: {
1321: GEN y1 = y+1;
1322: overflow=0;
1323: for (j=lz-i; j>0; j--) x1[j] = subllx(x1[j],y1[j]);
1324: j=i; do x[--j]++; while (j && !x[j]);
1325: }
1326: hiremainder=x1[1]; overflow=0;
1327: qp=divll(x1[2],si); k=hiremainder;
1328: }
1329: if (!overflow)
1330: {
1331: long k3 = subll(mulll(qp,saux), x1[3]);
1332: long k4 = subllx(hiremainder,k);
1333: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
1334: }
1335: j = lz-i+1;
1336: if (j<ly) (void)mulll(qp,y[j]); else { hiremainder=0 ; j=ly; }
1337: for (j--; j>1; j--)
1338: {
1339: x1[j] = subll(x1[j], addmul(qp,y[j]));
1340: hiremainder += overflow;
1341: }
1342: if ((ulong)x1[1] != hiremainder)
1343: {
1344: if ((ulong)x1[1] < hiremainder)
1345: {
1346: qp--;
1347: overflow=0; for (j=lz-i; j>1; j--) x1[j]=addllx(x1[j], y[j]);
1348: }
1349: else
1350: {
1351: x1[1] -= hiremainder;
1352: while (x1[1])
1353: {
1354: qp++; if (!qp) { j=i; do x[--j]++; while (j && !x[j]); }
1355: overflow=0; for (j=lz-i; j>1; j--) x1[j]=subllx(x1[j],y[j]);
1356: x1[1] -= overflow;
1357: }
1358: }
1359: }
1360: x1[1]=qp; x1++;
1361: }
1362: x1 = x-1; for (j=lz-1; j>=2; j--) x[j]=x1[j];
1363: if (*x) { shift_right(x,x, 2,lz, 1,1); } else e--;
1.2 ! noro 1364: x[0] = evaltyp(t_REAL)|evallg(lz);
! 1365: x[1] = evalsigne(sx) | evalexpo(e);
! 1366: return x;
1.1 noro 1367: }
1368: #endif /* !defined(__M68K__) */
1.2 ! noro 1369:
1.1 noro 1370: /* The following ones are not in mp.s (mulii is, with a different algorithm) */
1371:
1.2 ! noro 1372: /* Integer division x / y: such that sign(r) = sign(x)
1.1 noro 1373: * if z = ONLY_REM return remainder, otherwise return quotient
1374: * if z != NULL set *z to remainder
1375: * *z is the last object on stack (and thus can be disposed of with cgiv
1376: * instead of gerepile)
1377: * If *z is zero, we put gzero here and no copy.
1378: * space needed: lx + ly
1379: */
1380: GEN
1381: dvmdii(GEN x, GEN y, GEN *z)
1382: {
1383: long sx=signe(x),sy=signe(y);
1.2 ! noro 1384: long lx, ly, lz, i, j, sh, k, lq, lr;
! 1385: gpmem_t av;
1.1 noro 1386: ulong si,qp,saux, *xd,*rd,*qd;
1387: GEN r,q,x1;
1388:
1389: if (!sy) err(dvmer1);
1390: if (!sx)
1391: {
1392: if (!z || z == ONLY_REM) return gzero;
1393: *z=gzero; return gzero;
1394: }
1395: lx=lgefint(x);
1396: ly=lgefint(y); lz=lx-ly;
1397: if (lz <= 0)
1398: {
1399: if (lz == 0)
1400: {
1401: for (i=2; i<lx; i++)
1402: if (x[i] != y[i])
1403: {
1404: if ((ulong)x[i] > (ulong)y[i]) goto DIVIDE;
1405: goto TRIVIAL;
1406: }
1407: if (z == ONLY_REM) return gzero;
1408: if (z) *z = gzero;
1409: if (sx < 0) sy = -sy;
1410: return stoi(sy);
1411: }
1412: TRIVIAL:
1413: if (z == ONLY_REM) return icopy(x);
1414: if (z) *z = icopy(x);
1415: return gzero;
1416: }
1417: DIVIDE: /* quotient is non-zero */
1418: av=avma; if (sx<0) sy = -sy;
1419: if (ly==3)
1420: {
1421: LOCAL_HIREMAINDER;
1422: si = y[2];
1423: if (si <= (ulong)x[2]) hiremainder=0;
1424: else
1425: {
1426: hiremainder = x[2]; lx--; x++;
1427: }
1428: q = new_chunk(lx); for (i=2; i<lx; i++) q[i]=divll(x[i],si);
1429: if (z == ONLY_REM)
1430: {
1431: avma=av; if (!hiremainder) return gzero;
1432: r=cgeti(3);
1433: r[1] = evalsigne(sx) | evallgefint(3);
1434: r[2]=hiremainder; return r;
1435: }
1436: q[1] = evalsigne(sy) | evallgefint(lx);
1437: q[0] = evaltyp(t_INT) | evallg(lx);
1438: if (!z) return q;
1439: if (!hiremainder) { *z=gzero; return q; }
1440: r=cgeti(3);
1441: r[1] = evalsigne(sx) | evallgefint(3);
1442: r[2] = hiremainder; *z=r; return q;
1443: }
1444:
1445: x1 = new_chunk(lx); sh = bfffo(y[2]);
1446: if (sh)
1447: { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
1448: register const ulong m = BITS_IN_LONG - sh;
1449: r = new_chunk(ly);
1450: shift_left2(r, y,2,ly-1, 0,sh,m); y = r;
1451: shift_left2(x1,x,2,lx-1, 0,sh,m);
1452: x1[1] = ((ulong)x[2]) >> m;
1453: }
1454: else
1455: {
1456: x1[1]=0; for (j=2; j<lx; j++) x1[j]=x[j];
1457: }
1458: x=x1; si=y[2]; saux=y[3];
1459: for (i=0; i<=lz; i++)
1460: { /* x1 = x + i */
1461: LOCAL_HIREMAINDER;
1462: LOCAL_OVERFLOW;
1463: if ((ulong)x1[1] == si)
1464: {
1465: qp = MAXULONG; k=addll(si,x1[2]);
1466: }
1467: else
1468: {
1469: hiremainder=x1[1]; overflow=0;
1470: qp=divll(x1[2],si); k=hiremainder;
1471: }
1472: if (!overflow)
1473: {
1474: long k3 = subll(mulll(qp,saux), x1[3]);
1475: long k4 = subllx(hiremainder,k);
1476: while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
1477: }
1478: hiremainder=0;
1479: for (j=ly-1; j>1; j--)
1480: {
1481: x1[j] = subll(x1[j], addmul(qp,y[j]));
1482: hiremainder+=overflow;
1483: }
1484: if ((ulong)x1[1] < hiremainder)
1485: {
1486: overflow=0; qp--;
1487: for (j=ly-1; j>1; j--) x1[j] = addllx(x1[j],y[j]);
1488: }
1489: x1[1]=qp; x1++;
1490: }
1491:
1492: lq = lz+2;
1493: if (!z)
1494: {
1495: qd = (ulong*)av;
1496: xd = (ulong*)(x + lq);
1497: if (x[1]) { lz++; lq++; }
1498: while (lz--) *--qd = *--xd;
1499: *--qd = evalsigne(sy) | evallgefint(lq);
1500: *--qd = evaltyp(t_INT) | evallg(lq);
1.2 ! noro 1501: avma = (gpmem_t)qd; return (GEN)qd;
1.1 noro 1502: }
1503:
1504: j=lq; while (j<lx && !x[j]) j++;
1505: lz = lx-j;
1506: if (z == ONLY_REM)
1507: {
1508: if (lz==0) { avma = av; return gzero; }
1509: rd = (ulong*)av; lr = lz+2;
1510: xd = (ulong*)(x + lx);
1511: if (!sh) while (lz--) *--rd = *--xd;
1512: else
1513: { /* shift remainder right by sh bits */
1514: const ulong shl = BITS_IN_LONG - sh;
1515: ulong l;
1516: xd--;
1517: while (--lz) /* fill r[3..] */
1518: {
1519: l = *xd >> sh;
1520: *--rd = l | (*--xd << shl);
1521: }
1522: l = *xd >> sh;
1523: if (l) *--rd = l; else lr--;
1524: }
1525: *--rd = evalsigne(sx) | evallgefint(lr);
1526: *--rd = evaltyp(t_INT) | evallg(lr);
1.2 ! noro 1527: avma = (gpmem_t)rd; return (GEN)rd;
1.1 noro 1528: }
1529:
1530: lr = lz+2;
1531: rd = NULL; /* gcc -Wall */
1532: if (lz)
1533: { /* non zero remainder: initialize rd */
1534: xd = (ulong*)(x + lx);
1535: if (!sh)
1536: {
1537: rd = (ulong*)avma; (void)new_chunk(lr);
1538: while (lz--) *--rd = *--xd;
1539: }
1540: else
1541: { /* shift remainder right by sh bits */
1542: const ulong shl = BITS_IN_LONG - sh;
1543: ulong l;
1544: rd = (ulong*)x; /* overwrite shifted y */
1545: xd--;
1546: while (--lz)
1547: {
1548: l = *xd >> sh;
1549: *--rd = l | (*--xd << shl);
1550: }
1551: l = *xd >> sh;
1552: if (l) *--rd = l; else lr--;
1553: }
1554: *--rd = evalsigne(sx) | evallgefint(lr);
1555: *--rd = evaltyp(t_INT) | evallg(lr);
1556: rd += lr;
1557: }
1558: qd = (ulong*)av;
1559: xd = (ulong*)(x + lq);
1560: if (x[1]) lq++;
1561: j = lq-2; while (j--) *--qd = *--xd;
1562: *--qd = evalsigne(sy) | evallgefint(lq);
1563: *--qd = evaltyp(t_INT) | evallg(lq);
1564: q = (GEN)qd;
1565: if (lr==2) *z = gzero;
1566: else
1567: { /* rd has been properly initialized: we had lz > 0 */
1568: while (lr--) *--qd = *--rd;
1569: *z = (GEN)qd;
1570: }
1.2 ! noro 1571: avma = (gpmem_t)qd; return q;
1.1 noro 1572: }
1573:
1574: /* assume y > x > 0. return y mod x */
1575: ulong
1576: mppgcd_resiu(GEN y, ulong x)
1577: {
1578: long i, ly = lgefint(y);
1579: LOCAL_HIREMAINDER;
1580:
1581: hiremainder = 0;
1582: for (i=2; i<ly; i++) (void)divll(y[i],x);
1583: return hiremainder;
1584: }
1585:
1586: /* Assume x>y>0, both of them odd. return x-y if x=y mod 4, x+y otherwise */
1587: void
1588: mppgcd_plus_minus(GEN x, GEN y, GEN res)
1589: {
1.2 ! noro 1590: gpmem_t av = avma;
1.1 noro 1591: long lx = lgefint(x)-1;
1592: long ly = lgefint(y)-1, lt,m,i;
1593: GEN t;
1594:
1595: if ((x[lx]^y[ly]) & 3) /* x != y mod 4*/
1596: t = addiispec(x+2,y+2,lx-1,ly-1);
1597: else
1598: t = subiispec(x+2,y+2,lx-1,ly-1);
1599:
1600: lt = lgefint(t)-1; while (!t[lt]) lt--;
1601: m = vals(t[lt]); lt++;
1602: if (m == 0) /* 2^32 | t */
1603: {
1604: for (i = 2; i < lt; i++) res[i] = t[i];
1605: }
1606: else if (t[2] >> m)
1607: {
1608: shift_right(res,t, 2,lt, 0,m);
1609: }
1610: else
1611: {
1612: lt--; t++;
1613: shift_right(res,t, 2,lt, t[1],m);
1614: }
1615: res[1] = evalsigne(1)|evallgefint(lt);
1616: avma = av;
1617: }
1618:
1619: /* private version of mulss */
1620: ulong
1621: smulss(ulong x, ulong y, ulong *rem)
1622: {
1623: LOCAL_HIREMAINDER;
1624: x=mulll(x,y); *rem=hiremainder; return x;
1625: }
1626:
1627: #ifdef LONG_IS_64BIT
1628: # define DIVCONVI 7
1629: #else
1630: # define DIVCONVI 14
1631: #endif
1632:
1.2 ! noro 1633: /* convert integer --> base 10^9 */
1.1 noro 1634: GEN
1635: convi(GEN x)
1636: {
1.2 ! noro 1637: gpmem_t av = avma, lim;
1.1 noro 1638: long lz;
1639: GEN z,p1;
1640:
1641: lz = 3 + ((lgefint(x)-2)*15)/DIVCONVI;
1.2 ! noro 1642: z = new_chunk(lz); z[1] = -1; p1 = z+2;
1.1 noro 1643: lim = stack_lim(av,1);
1644: for(;;)
1645: {
1.2 ! noro 1646: x = diviu(x,1000000000); *p1++ = hiremainder;
! 1647: if (!signe(x)) { avma = av; return p1; }
! 1648: if (low_stack(lim, stack_lim(av,1))) x = gerepileuptoint((gpmem_t)z,x);
1.1 noro 1649: }
1650: }
1651: #undef DIVCONVI
1652:
1653: /* convert fractional part --> base 10^9 [assume expo(x) < 0] */
1654: GEN
1655: confrac(GEN x)
1656: {
1657: long lx=lg(x), ex = -expo(x)-1, d,m,ly,ey,lr,i,j;
1658: GEN y,y1;
1659:
1660: ey = ex + bit_accuracy(lx);
1661: ly = 1 + ((ey-1) >> TWOPOTBITS_IN_LONG);
1662: d = ex >> TWOPOTBITS_IN_LONG;
1663: m = ex & (BITS_IN_LONG-1);
1664: y = new_chunk(ly); y1 = y + (d-2);
1665: while (d--) y[d]=0;
1666: if (!m)
1667: for (i=2; i<lx; i++) y1[i] = x[i];
1668: else
1669: { /* shift x left sh bits */
1670: const ulong sh=BITS_IN_LONG-m;
1671: ulong k=0, l;
1672: for (i=2; i<lx; i++)
1673: {
1674: l = x[i];
1675: y1[i] = (l>>m)|k;
1676: k = l<<sh;
1677: }
1678: y1[i] = k;
1679: }
1680: lr = 1 + ((long) (ey*L2SL10))/9;
1681: y1 = new_chunk(lr);
1682: for (j=0; j<lr; j++)
1683: {
1684: LOCAL_HIREMAINDER;
1685: hiremainder=0;
1686: for (i=ly-1; i>=0; i--) y[i]=addmul(y[i],1000000000);
1687: y1[j]=hiremainder;
1688: }
1689: return y1;
1690: }
1691:
1692: GEN
1693: truedvmdii(GEN x, GEN y, GEN *z)
1694: {
1.2 ! noro 1695: gpmem_t av = avma;
1.1 noro 1696: GEN r, q = dvmdii(x,y,&r); /* assume that r is last on stack */
1697: GEN *gptr[2];
1698:
1699: if (signe(r)>=0)
1700: {
1701: if (z == ONLY_REM) return gerepileuptoint(av,r);
1702: if (z) *z = r; else cgiv(r);
1703: return q;
1704: }
1705:
1706: if (z == ONLY_REM)
1.2 ! noro 1707: { /* r += sign(y) * y, that is |y| */
1.1 noro 1708: r = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2);
1709: return gerepileuptoint(av, r);
1710: }
1711: q = addsi(-signe(y),q);
1712: if (!z) return gerepileuptoint(av, q);
1713:
1714: *z = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2);
1.2 ! noro 1715: gptr[0]=&q; gptr[1]=z; gerepilemanysp(av,(gpmem_t)r,gptr,2);
1.1 noro 1716: return q;
1717: }
1718:
1.2 ! noro 1719: /* Montgomery reduction.
! 1720: * N has k words, assume T >= 0 has less than 2k.
! 1721: * Return res := T / B^k mod N, where B = 2^BIL
! 1722: * such that 0 <= res < T/B^k + N and res has less than k words */
! 1723: GEN
! 1724: red_montgomery(GEN T, GEN N, ulong inv)
! 1725: {
! 1726: gpmem_t av;
! 1727: GEN Te,Td,Ne,Nd, scratch;
! 1728: ulong m,t,d,k = lgefint(N)-2;
! 1729: int carry;
! 1730: long i,j;
! 1731: LOCAL_HIREMAINDER;
! 1732: LOCAL_OVERFLOW;
! 1733:
! 1734: if (k == 0) return gzero;
! 1735: d = lgefint(T)-2; /* <= 2*k */
! 1736: #ifdef DEBUG
! 1737: if (d > 2*k) err(bugparier,"red_montgomery");
! 1738: #endif
! 1739: if (k == 1)
! 1740: { /* as below, special cased for efficiency */
! 1741: ulong n = (ulong)N[2];
! 1742: t = (ulong)T[d+1];
! 1743: m = t * inv;
! 1744: (void)addll(mulll(m, n), t); /* = 0 */
! 1745: t = hiremainder + overflow;
! 1746: if (d == 2)
! 1747: {
! 1748: t = addll((ulong)T[2], t);
! 1749: if (overflow) t -= n; /* t > n doesn't fit in 1 word */
! 1750: }
! 1751: return utoi(t);
! 1752: }
! 1753: /* assume k >= 2 */
! 1754: av = avma; scratch = new_chunk(k<<1); /* >= k + 2: result fits */
! 1755:
! 1756: /* copy T to scratch space (pad with zeroes to 2k words) */
! 1757: Td = (GEN)av;
! 1758: Te = T + (d+2);
! 1759: for (i=0; i < d ; i++) *--Td = *--Te;
! 1760: for ( ; i < (k<<1); i++) *--Td = 0;
! 1761:
! 1762: Te = (GEN)av; /* 1 beyond end of T mantissa */
! 1763: Ne = N + k+2; /* 1 beyond end of N mantissa */
! 1764:
! 1765: carry = 0;
! 1766: for (i=0; i<k; i++) /* set T := T/B nod N, k times */
! 1767: {
! 1768: Td = Te; /* one beyond end of (new) T mantissa */
! 1769: Nd = Ne;
! 1770: m = *--Td * inv; /* solve T + m N = O(B) */
! 1771:
! 1772: /* set T := (T + mN) / B */
! 1773: Te = Td;
! 1774: (void)addll(mulll(m, *--Nd), *Td); /* = 0 */
! 1775: for (j=1; j<k; j++)
! 1776: {
! 1777: hiremainder += overflow;
! 1778: t = addll(addmul(m, *--Nd), *--Td); *Td = t;
! 1779: }
! 1780: overflow += hiremainder;
! 1781: t = addll(overflow, *--Td); *Td = t + carry;
! 1782: carry = (overflow || (carry && *Td == 0));
! 1783: }
! 1784: if (carry)
! 1785: { /* Td > N overflows (k+1 words), set Td := Td - N */
! 1786: Td = Te;
! 1787: Nd = Ne;
! 1788: t = subll(*--Td, *--Nd); *Td = t;
! 1789: while (Td > scratch) { t = subllx(*--Td, *--Nd); *Td = t; }
! 1790: }
! 1791:
! 1792: /* copy result */
! 1793: Td = (GEN)av;
! 1794: while (! *scratch) scratch++; /* strip leading zeroes */
! 1795: while (Te > scratch) *--Td = *--Te;
! 1796: k = ((GEN)av - Td) + 2;
! 1797: *--Td = evalsigne(1) | evallgefint(k);
! 1798: *--Td = evaltyp(t_INT) | evallg(k);
! 1799: #ifdef DEBUG
! 1800: {
! 1801: long l = lgefint(N)-2, s = BITS_IN_LONG*l;
! 1802: GEN R = shifti(gun, s);
! 1803: GEN res = resii(mulii(T, mpinvmod(R, N)), N);
! 1804: if (k > lgefint(N)
! 1805: || !egalii(resii(Td,N),res)
! 1806: || cmpii(Td, addii(shifti(T, -s), N)) >= 0) err(bugparier,"red_montgomery");
! 1807: }
! 1808: #endif
! 1809: avma = (gpmem_t)Td; return Td;
! 1810: }
! 1811:
1.1 noro 1812: /* EXACT INTEGER DIVISION */
1813:
1814: /* Find c such that 1=c*b mod 2^BITS_IN_LONG, assuming b odd (unchecked) */
1.2 ! noro 1815: ulong
1.1 noro 1816: invrev(ulong b)
1817: {
1818: ulong x;
1819: switch(b & 7)
1820: {
1821: case 3:
1822: case 5: x = b+8; break;
1823: default: x = b; break;
1824: } /* x = b^(-1) mod 2^4 */
1825: x = x*(2-b*x);
1826: x = x*(2-b*x);
1827: x = x*(2-b*x); /* b^(-1) mod 2^32 (Newton applied to 1/x - b = 0) */
1828: #ifdef LONG_IS_64BIT
1829: x = x*(2-b*x); /* b^(-1) mod 2^64 */
1830: #endif
1831: return x;
1832: }
1833:
1834: /* assume xy>0, y odd */
1.2 ! noro 1835: GEN
1.1 noro 1836: diviuexact(GEN x, ulong y)
1837: {
1838: long i,lz,lx;
1839: ulong q, yinv;
1840: GEN z, z0, x0, x0min;
1841:
1842: if (y == 1) return icopy(x);
1843: lx = lgefint(x);
1.2 ! noro 1844: if (lx == 3) return utoi((ulong)x[2] / y);
1.1 noro 1845: yinv = invrev(y);
1846: lz = (y <= (ulong)x[2]) ? lx : lx-1;
1847: z = new_chunk(lz);
1848: z0 = z + lz;
1849: x0 = x + lx; x0min = x + lx-lz+2;
1850:
1851: while (x0 > x0min)
1852: {
1853: *--z0 = q = yinv*((ulong)*--x0); /* i-th quotient */
1854: if (!q) continue;
1855: /* x := x - q * y */
1856: { /* update neither lowest word (could set it to 0) nor highest ones */
1857: register GEN x1 = x0 - 1;
1858: LOCAL_HIREMAINDER;
1859: (void)mulll(q,y);
1860: if (hiremainder)
1861: {
1862: if ((ulong)*x1 < hiremainder)
1863: {
1864: *x1 -= hiremainder;
1865: do (*--x1)--; while ((ulong)*x1 == MAXULONG);
1866: }
1867: else
1868: *x1 -= hiremainder;
1869: }
1870: }
1871: }
1872: i=2; while(!z[i]) i++;
1873: z += i-2; lz -= i-2;
1874: z[0] = evaltyp(t_INT)|evallg(lz);
1875: z[1] = evalsigne(1)|evallg(lz);
1.2 ! noro 1876: avma = (gpmem_t)z; return z;
1.1 noro 1877: }
1878:
1879: /* Find z such that x=y*z, knowing that y | x (unchecked)
1880: * Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.
1881: * Set x := (x - z0 y) / B, updating only relevant words, and repeat */
1882: GEN
1883: diviiexact(GEN x, GEN y)
1884: {
1.2 ! noro 1885: long lx, ly, lz, vy, i, ii, sx = signe(x), sy = signe(y);
! 1886: gpmem_t av;
1.1 noro 1887: ulong y0inv,q;
1888: GEN z;
1889:
1890: if (!sy) err(dvmer1);
1891: if (!sx) return gzero;
1892: vy = vali(y); av = avma;
1893: (void)new_chunk(lgefint(x)); /* enough room for z */
1894: if (vy)
1895: { /* make y odd */
1896: #if 0
1897: if (vali(x) < vy) err(talker,"impossible division in diviiexact");
1898: #endif
1899: y = shifti(y,-vy);
1900: x = shifti(x,-vy);
1901: }
1902: else x = icopy(x); /* necessary because we destroy x */
1903: avma = av; /* will erase our x,y when exiting */
1904: /* now y is odd */
1905: ly = lgefint(y);
1.2 ! noro 1906: if (ly == 3)
1.1 noro 1907: {
1908: x = diviuexact(x,(ulong)y[2]);
1909: if (signe(x)) setsigne(x,sx*sy); /* should have x != 0 at this point */
1910: return x;
1911: }
1912: lx = lgefint(x); if (ly>lx) err(talker,"impossible division in diviiexact");
1913: y0inv = invrev(y[ly-1]);
1914: i=2; while (i<ly && y[i]==x[i]) i++;
1915: lz = (i==ly || (ulong)y[i] < (ulong)x[i]) ? lx-ly+3 : lx-ly+2;
1916: z = new_chunk(lz);
1917:
1918: y += ly - 1; /* now y[-i] = i-th word of y */
1919: for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
1920: {
1921: long limj;
1922: LOCAL_HIREMAINDER;
1923: LOCAL_OVERFLOW;
1924:
1925: z[i] = q = y0inv*((ulong)x[ii]); /* i-th quotient */
1926: if (!q) continue;
1927:
1928: /* x := x - q * y */
1929: (void)mulll(q,y[0]); limj = max(lx - lz, ii+3-ly);
1930: { /* update neither lowest word (could set it to 0) nor highest ones */
1.2 ! noro 1931: register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;
1.1 noro 1932: for (; x0 >= xlim; x0--, y0--)
1933: {
1934: *x0 = subll(*x0, addmul(q,*y0));
1935: hiremainder += overflow;
1936: }
1937: if (hiremainder && limj != lx - lz)
1938: {
1939: if ((ulong)*x0 < hiremainder)
1940: {
1941: *x0 -= hiremainder;
1942: do (*--x0)--; while ((ulong)*x0 == MAXULONG);
1943: }
1944: else
1945: *x0 -= hiremainder;
1946: }
1947: }
1948: }
1949: #if 0
1950: i=2; while(i<lz && !z[i]) i++;
1951: if (i==lz) err(bugparier,"diviiexact");
1952: #else
1953: i=2; while(!z[i]) i++;
1954: #endif
1955: z += i-2; lz -= (i-2);
1956: z[0] = evaltyp(t_INT)|evallg(lz);
1957: z[1] = evalsigne(sx*sy)|evallg(lz);
1.2 ! noro 1958: avma = (gpmem_t)z; return z;
1.1 noro 1959: }
1960:
1961: long
1962: smodsi(long x, GEN y)
1963: {
1964: if (x<0) err(talker,"negative small integer in smodsi");
1965: (void)divsi(x,y); return hiremainder;
1966: }
1967:
1968: /* x and y are integers. Return 1 if |x| == |y|, 0 otherwise */
1969: int
1970: absi_equal(GEN x, GEN y)
1971: {
1972: long lx,i;
1973:
1974: if (!signe(x)) return !signe(y);
1975: if (!signe(y)) return 0;
1976:
1977: lx=lgefint(x); if (lx != lgefint(y)) return 0;
1978: i=2; while (i<lx && x[i]==y[i]) i++;
1979: return (i==lx);
1980: }
1981:
1982: /* x and y are integers. Return sign(|x| - |y|) */
1983: int
1984: absi_cmp(GEN x, GEN y)
1985: {
1986: long lx,ly,i;
1987:
1988: if (!signe(x)) return signe(y)? -1: 0;
1989: if (!signe(y)) return 1;
1990:
1991: lx=lgefint(x); ly=lgefint(y);
1992: if (lx>ly) return 1;
1993: if (lx<ly) return -1;
1994: i=2; while (i<lx && x[i]==y[i]) i++;
1995: if (i==lx) return 0;
1996: return ((ulong)x[i] > (ulong)y[i])? 1: -1;
1997: }
1998:
1999: /* x and y are reals. Return sign(|x| - |y|) */
2000: int
2001: absr_cmp(GEN x, GEN y)
2002: {
2003: long ex,ey,lx,ly,lz,i;
2004:
2005: if (!signe(x)) return signe(y)? -1: 0;
2006: if (!signe(y)) return 1;
2007:
2008: ex=expo(x); ey=expo(y);
2009: if (ex>ey) return 1;
2010: if (ex<ey) return -1;
2011:
2012: lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
2013: i=2; while (i<lz && x[i]==y[i]) i++;
2014: if (i<lz) return ((ulong)x[i] > (ulong)y[i])? 1: -1;
2015: if (lx>=ly)
2016: {
2017: while (i<lx && !x[i]) i++;
2018: return (i==lx)? 0: 1;
2019: }
2020: while (i<ly && !y[i]) i++;
2021: return (i==ly)? 0: -1;
2022: }
2023:
2024: /********************************************************************/
2025: /** **/
2026: /** INTEGER MULTIPLICATION (KARATSUBA) **/
2027: /** **/
2028: /********************************************************************/
2029: #define _sqri_l 47
2030: #define _muli_l 25 /* optimal on PII 350MHz + gcc 2.7.2.1 (gp-dyn) */
2031:
1.2 ! noro 2032: #if 1 /* for tunings */
1.1 noro 2033: long KARATSUBA_SQRI_LIMIT = _sqri_l;
2034: long KARATSUBA_MULI_LIMIT = _muli_l;
2035:
2036: void setsqri(long a) { KARATSUBA_SQRI_LIMIT = a; }
2037: void setmuli(long a) { KARATSUBA_MULI_LIMIT = a; }
2038:
2039: GEN
2040: speci(GEN x, long nx)
2041: {
2042: GEN z;
2043: long i;
2044: if (!nx) return gzero;
2045: z = cgeti(nx+2); z[1] = evalsigne(1)|evallgefint(nx+2);
2046: for (i=0; i<nx; i++) z[i+2] = x[i];
2047: return z;
2048: }
2049: #else
2050: # define KARATSUBA_SQRI_LIMIT _sqri_l
2051: # define KARATSUBA_MULI_LIMIT _muli_l
2052: #endif
2053:
2054: /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
2055: #ifdef INLINE
2056: INLINE
2057: #endif
2058: GEN
2059: muliispec(GEN x, GEN y, long nx, long ny)
2060: {
2061: GEN z2e,z2d,yd,xd,ye,zd;
2062: long p1,lz;
2063: LOCAL_HIREMAINDER;
2064:
2065: if (!ny) return gzero;
2066: zd = (GEN)avma; lz = nx+ny+2;
2067: (void)new_chunk(lz);
2068: xd = x + nx;
2069: yd = y + ny;
2070: ye = yd; p1 = *--xd;
2071:
2072: *--zd = mulll(p1, *--yd); z2e = zd;
2073: while (yd > y) *--zd = addmul(p1, *--yd);
2074: *--zd = hiremainder;
2075:
2076: while (xd > x)
2077: {
2078: LOCAL_OVERFLOW;
2079: yd = ye; p1 = *--xd;
2080:
2081: z2d = --z2e;
2082: *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
2083: while (yd > y)
2084: {
2085: hiremainder += overflow;
2086: *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
2087: }
2088: *--zd = hiremainder + overflow;
2089: }
2090: if (*zd == 0) { zd++; lz--; } /* normalize */
2091: *--zd = evalsigne(1) | evallgefint(lz);
2092: *--zd = evaltyp(t_INT) | evallg(lz);
1.2 ! noro 2093: avma=(gpmem_t)zd; return zd;
1.1 noro 2094: }
2095:
2096: #ifdef INLINE
2097: INLINE
2098: #endif
2099: GEN
2100: sqrispec(GEN x, long nx)
2101: {
2102: GEN z2e,z2d,yd,xd,zd,x0,z0;
2103: long p1,lz;
2104: LOCAL_HIREMAINDER;
2105:
2106: if (!nx) return gzero;
2107: zd = (GEN)avma; lz = (nx+1) << 1;
2108: z0 = new_chunk(lz);
2109: if (nx == 1)
2110: {
2111: *--zd = mulll(*x, *x);
2112: *--zd = hiremainder; goto END;
2113: }
2114: xd = x + nx;
1.2 ! noro 2115:
1.1 noro 2116: /* compute double products --> zd */
2117: p1 = *--xd; yd = xd; --zd;
2118: *--zd = mulll(p1, *--yd); z2e = zd;
2119: while (yd > x) *--zd = addmul(p1, *--yd);
2120: *--zd = hiremainder;
2121:
2122: x0 = x+1;
2123: while (xd > x0)
2124: {
2125: LOCAL_OVERFLOW;
2126: p1 = *--xd; yd = xd;
2127:
2128: z2e -= 2; z2d = z2e;
2129: *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
2130: while (yd > x)
2131: {
2132: hiremainder += overflow;
2133: *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
2134: }
2135: *--zd = hiremainder + overflow;
2136: }
2137: /* multiply zd by 2 (put result in zd - 1) */
2138: zd[-1] = ((*zd & HIGHBIT) != 0);
2139: shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);
2140:
2141: /* add the squares */
2142: xd = x + nx; zd = z0 + lz;
2143: p1 = *--xd;
2144: zd--; *zd = mulll(p1,p1);
2145: zd--; *zd = addll(hiremainder, *zd);
2146: while (xd > x)
2147: {
2148: p1 = *--xd;
2149: zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);
2150: zd--; *zd = addll(hiremainder + overflow, *zd);
2151: }
2152:
2153: END:
2154: if (*zd == 0) { zd++; lz--; } /* normalize */
2155: *--zd = evalsigne(1) | evallgefint(lz);
2156: *--zd = evaltyp(t_INT) | evallg(lz);
1.2 ! noro 2157: avma=(gpmem_t)zd; return zd;
1.1 noro 2158: }
2159:
2160: /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
2161: static GEN
2162: addshiftw(GEN x, GEN y, long d)
2163: {
2164: GEN z,z0,y0,yd, zd = (GEN)avma;
2165: long a,lz,ly = lgefint(y);
2166:
2167: z0 = new_chunk(d);
2168: a = ly-2; yd = y+ly;
2169: if (a >= d)
2170: {
2171: y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
2172: a -= d;
2173: if (a)
2174: z = addiispec(x+2, y+2, lgefint(x)-2, a);
2175: else
2176: z = icopy(x);
2177: }
2178: else
2179: {
2180: y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
2181: while (zd >= z0) *--zd = 0; /* complete with 0s */
2182: z = icopy(x);
2183: }
2184: lz = lgefint(z)+d;
2185: z[1] = evalsigne(1) | evallgefint(lz);
2186: z[0] = evaltyp(t_INT) | evallg(lz); return z;
2187: }
2188:
2189: /* Fast product (Karatsuba) of integers. a and b are "special" GENs
2190: * c,c0,c1,c2 are genuine GENs.
2191: */
2192: static GEN
2193: quickmulii(GEN a, GEN b, long na, long nb)
2194: {
2195: GEN a0,c,c0;
1.2 ! noro 2196: long n0, n0a, i;
! 2197: gpmem_t av;
1.1 noro 2198:
2199: if (na < nb) swapspec(a,b, na,nb);
2200: if (nb == 1) return mulsispec(*b, a, na);
2201: if (nb == 0) return gzero;
2202: if (nb < KARATSUBA_MULI_LIMIT) return muliispec(a,b,na,nb);
2203: i=(na>>1); n0=na-i; na=i;
2204: av=avma; a0=a+na; n0a=n0;
2205: while (!*a0 && n0a) { a0++; n0a--; }
2206:
2207: if (n0a && nb > n0)
2208: { /* nb <= na <= n0 */
2209: GEN b0,c1,c2;
2210: long n0b;
2211:
2212: nb -= n0;
2213: c = quickmulii(a,b,na,nb);
2214: b0 = b+nb; n0b = n0;
2215: while (!*b0 && n0b) { b0++; n0b--; }
2216: if (n0b)
2217: {
2218: c0 = quickmulii(a0,b0, n0a,n0b);
2219:
2220: c2 = addiispec(a0,a, n0a,na);
2221: c1 = addiispec(b0,b, n0b,nb);
2222: c1 = quickmulii(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
2223: c2 = addiispec(c0+2, c+2, lgefint(c0)-2,lgefint(c) -2);
2224:
2225: c1 = subiispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
2226: }
2227: else
2228: {
2229: c0 = gzero;
2230: c1 = quickmulii(a0,b, n0a,nb);
2231: }
2232: c = addshiftw(c,c1, n0);
2233: }
2234: else
2235: {
2236: c = quickmulii(a,b,na,nb);
2237: c0 = quickmulii(a0,b,n0a,nb);
2238: }
2239: return gerepileuptoint(av, addshiftw(c,c0, n0));
2240: }
2241:
2242: /* actual operations will take place on a+2 and b+2: we strip the codewords */
2243: GEN
2244: mulii(GEN a,GEN b)
2245: {
2246: long sa,sb;
2247: GEN z;
2248:
2249: sa=signe(a); if (!sa) return gzero;
2250: sb=signe(b); if (!sb) return gzero;
2251: if (sb<0) sa = -sa;
2252: z = quickmulii(a+2,b+2, lgefint(a)-2,lgefint(b)-2);
2253: setsigne(z,sa); return z;
2254: }
2255:
2256: GEN
2257: resiimul(GEN x, GEN sy)
2258: {
2259: GEN r, q, y = (GEN)sy[1], invy;
1.2 ! noro 2260: long k;
! 2261: gpmem_t av = avma;
1.1 noro 2262:
2263: k = cmpii(x, y);
2264: if (k <= 0) return k? icopy(x): gzero;
2265: invy = (GEN)sy[2];
2266: q = mulir(x,invy);
2267: q = mptrunc(q); /* <= divii(x, y) (at most 1 less) */
2268: r = subii(x, mulii(y,q));
2269: /* resii(x,y) + y >= r >= resii(x,y) */
2270: k = cmpii(r, y);
2271: if (k >= 0)
2272: {
2273: if (k == 0) { avma = av; return gzero; }
2274: r = subiispec(r+2, y+2, lgefint(r)-2, lgefint(y)-2);
2275: }
2276: #if 0
2277: q = subii(r,resii(x,y));
2278: if (signe(q))
2279: err(talker,"bug in resiimul: x = %Z\ny = %Z\ndif = %Z", x,y,q);
2280: #endif
2281: return gerepileuptoint(av, r); /* = resii(x, y) */
2282: }
2283:
2284: /* x % (2^n), assuming x, n >= 0 */
2285: GEN
2286: resmod2n(GEN x, long n)
2287: {
2288: long hi,l,k,lx,ly;
2289: GEN z, xd, zd;
1.2 ! noro 2290:
1.1 noro 2291: if (!signe(x) || !n) return gzero;
2292:
1.2 ! noro 2293: l = n & (BITS_IN_LONG-1); /* n % BITS_IN_LONG */
1.1 noro 2294: k = n >> TWOPOTBITS_IN_LONG; /* n / BITS_IN_LONG */
2295: lx = lgefint(x);
2296: if (lx < k+3) return icopy(x);
2297:
2298: xd = x + (lx-k-1);
2299: /* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words
2300: * ^--- initial xd */
2301: hi = *xd & ((1<<l) - 1); /* last l bits of # = top bits of result */
2302: if (!hi)
2303: { /* strip leading zeroes from result */
2304: xd++; while (k && !*xd) { k--; xd++; }
2305: if (!k) return gzero;
2306: ly = k+2; xd--;
2307: }
2308: else
2309: ly = k+3;
2310:
2311: zd = z = cgeti(ly);
2312: *++zd = evalsigne(1) | evallgefint(ly);
2313: if (hi) *++zd = hi;
2314: for ( ;k; k--) *++zd = *++xd;
2315: return z;
2316: }
2317:
2318: static GEN
2319: quicksqri(GEN a, long na)
2320: {
2321: GEN a0,c;
1.2 ! noro 2322: long n0, n0a, i;
! 2323: gpmem_t av;
1.1 noro 2324:
2325: if (na < KARATSUBA_SQRI_LIMIT) return sqrispec(a,na);
2326: i=(na>>1); n0=na-i; na=i;
2327: av=avma; a0=a+na; n0a=n0;
2328: while (!*a0 && n0a) { a0++; n0a--; }
2329: c = quicksqri(a,na);
2330: if (n0a)
2331: {
2332: GEN t, c1, c0 = quicksqri(a0,n0a);
2333: #if 0
2334: c1 = shifti(quickmulii(a0,a, n0a,na),1);
2335: #else /* slower !!! */
2336: t = addiispec(a0,a,n0a,na);
2337: t = quicksqri(t+2,lgefint(t)-2);
2338: c1= addiispec(c0+2,c+2, lgefint(c0)-2, lgefint(c)-2);
2339: c1= subiispec(t+2, c1+2, lgefint(t)-2, lgefint(c1)-2);
2340: #endif
2341: c = addshiftw(c,c1, n0);
2342: c = addshiftw(c,c0, n0);
2343: }
2344: else
2345: c = addshiftw(c,gzero,n0<<1);
2346: return gerepileuptoint(av, c);
2347: }
2348:
2349: GEN
2350: sqri(GEN a) { return quicksqri(a+2, lgefint(a)-2); }
2351:
2352: #define MULRR_LIMIT 32
2353: #define MULRR2_LIMIT 32
2354:
2355: #if 0
2356: GEN
2357: karamulrr1(GEN x, GEN y)
2358: {
2359: long sx,sy;
2360: long i,i1,i2,lx=lg(x),ly=lg(y),e,flag,garde;
2361: long lz2,lz3,lz4;
2362: GEN z,lo1,lo2,hi;
2363:
2364: /* ensure that lg(y) >= lg(x) */
2365: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
2366: if (lx < MULRR_LIMIT) return mulrr(x,y);
1.2 ! noro 2367: sx=signe(x); sy=signe(y); e = expo(x)+expo(y);
! 2368: if (!sx || !sy) return realzero_bit(e);
1.1 noro 2369: if (sy<0) sx = -sx;
2370: ly=lx+flag; z=cgetr(lx);
2371: lz2 = (lx>>1); lz3 = lx-lz2;
2372: x += 2; lx -= 2;
2373: y += 2; ly -= 2;
2374: hi = quickmulii(x,y,lz2,lz2);
2375: i1=lz2; while (i1<lx && !x[i1]) i1++;
2376: lo1 = quickmulii(y,x+i1,lz2,lx-i1);
2377: i2=lz2; while (i2<ly && !y[i2]) i2++;
2378: lo2 = quickmulii(x,y+i2,lz2,ly-i2);
2379:
2380: if (signe(lo1))
2381: {
2382: if (flag) { lo2 = addshiftw(lo1,lo2,1); lz3++; } else lo2=addii(lo1,lo2);
2383: }
2384: lz4=lgefint(lo2)-lz3;
2385: if (lz4>0) hi = addiispec(hi+2,lo2+2, lgefint(hi)-2,lz4);
2386: if (hi[2] < 0)
2387: {
2388: e++; garde=hi[lx+2];
2389: for (i=lx+1; i>=2 ; i--) z[i]=hi[i];
2390: }
2391: else
2392: {
2393: garde = (hi[lx+2] << 1);
2394: shift_left(z,hi,2,lx+1, garde, 1);
2395: }
1.2 ! noro 2396: z[1]=evalsigne(sx) | evalexpo(e);
1.1 noro 2397: if (garde < 0)
2398: { /* round to nearest */
2399: i=lx+2; do z[--i]++; while (z[i]==0);
2400: if (i==1) z[2]=HIGHBIT;
2401: }
1.2 ! noro 2402: avma=(gpmem_t)z; return z;
1.1 noro 2403: }
2404:
2405: GEN
2406: karamulrr2(GEN x, GEN y)
2407: {
2408: long sx,sy,i,lx=lg(x),ly=lg(y),e,flag,garde;
2409: GEN z,hi;
2410:
2411: if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
2412: if (lx < MULRR2_LIMIT) return mulrr(x,y);
2413: ly=lx+flag; sx=signe(x); sy=signe(y);
1.2 ! noro 2414: e = expo(x)+expo(y);
! 2415: if (!sx || !sy) return realzero_bit(e);
1.1 noro 2416: if (sy<0) sx = -sx;
2417: z=cgetr(lx);
2418: hi=quickmulii(y+2,x+2,ly-2,lx-2);
2419: if (hi[2] < 0)
2420: {
2421: e++; garde=hi[lx];
2422: for (i=2; i<lx ; i++) z[i]=hi[i];
2423: }
2424: else
2425: {
2426: garde = (hi[lx] << 1);
2427: shift_left(z,hi,2,lx-1, garde, 1);
2428: }
1.2 ! noro 2429: z[1]=evalsigne(sx) | evalexpo(e);
1.1 noro 2430: if (garde < 0)
2431: { /* round to nearest */
2432: i=lx; do z[--i]++; while (z[i]==0);
2433: if (i==1) z[2]=HIGHBIT;
2434: }
1.2 ! noro 2435: avma=(gpmem_t)z; return z;
1.1 noro 2436: }
2437:
2438: GEN
2439: karamulrr(GEN x, GEN y, long flag)
2440: {
2441: switch(flag)
2442: {
2443: case 1: return karamulrr1(x,y);
2444: case 2: return karamulrr2(x,y);
2445: default: err(flagerr,"karamulrr");
2446: }
2447: return NULL; /* not reached */
2448: }
2449:
2450: GEN
2451: karamulir(GEN x, GEN y, long flag)
2452: {
2453: long sx=signe(x),lz,i;
1.2 ! noro 2454: GEN z, z1;
1.1 noro 2455:
2456: if (!sx) return gzero;
1.2 ! noro 2457: lz = lg(y); z = cgetr(lz);
! 2458: z1 = karamulrr(itor(x, lz+1), y, flag);
1.1 noro 2459: for (i=1; i<lz; i++) z[i]=z1[i];
1.2 ! noro 2460: avma=(gpmem_t)z; return z;
1.1 noro 2461: }
2462: #endif
2463:
2464: #ifdef LONG_IS_64BIT
1.2 ! noro 2465: long
! 2466: expodb(double x)
! 2467: {
! 2468: union { double f; ulong i; } fi;
! 2469: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2470: const int exp_mid = 0x3ff;/* exponent bias */
1.1 noro 2471:
1.2 ! noro 2472: if (x==0.) return -1023;
! 2473: fi.f = x;
! 2474: return ((fi.i & (HIGHBIT-1)) >> mant_len) - exp_mid;
! 2475: }
1.1 noro 2476:
2477: GEN
2478: dbltor(double x)
2479: {
1.2 ! noro 2480: GEN z;
1.1 noro 2481: long e;
2482: union { double f; ulong i; } fi;
2483: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2484: const int exp_mid = 0x3ff;/* exponent bias */
2485: const int expo_len = 11; /* number of bits of exponent */
2486: LOCAL_HIREMAINDER;
2487:
1.2 ! noro 2488: if (x==0.) return realzero_bit(-1023);
! 2489: fi.f = x; z = cgetr(DEFAULTPREC);
1.1 noro 2490: e = ((fi.i & (HIGHBIT-1)) >> mant_len) - exp_mid;
2491: z[1] = evalexpo(e) | evalsigne(x<0? -1: 1);
2492: z[2] = (fi.i << expo_len) | HIGHBIT;
2493: return z;
2494: }
2495:
2496: double
2497: rtodbl(GEN x)
2498: {
2499: long ex,s=signe(x);
2500: ulong a;
2501: union { double f; ulong i; } fi;
2502: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2503: const int exp_mid = 0x3ff;/* exponent bias */
2504: const int expo_len = 11; /* number of bits of exponent */
2505: LOCAL_HIREMAINDER;
2506:
2507: if (typ(x)==t_INT && !s) return 0.0;
2508: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
2509: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
2510:
2511: /* start by rounding to closest */
2512: a = (x[2] & (HIGHBIT-1)) + 0x400;
2513: if (a & HIGHBIT) { ex++; a=0; }
2514: if (ex >= exp_mid) err(rtodber);
2515: fi.i = ((ex + exp_mid) << mant_len) | (a >> expo_len);
2516: if (s<0) fi.i |= HIGHBIT;
2517: return fi.f;
2518: }
2519:
1.2 ! noro 2520: #else /* LONG_IS_64BIT */
1.1 noro 2521:
1.2 ! noro 2522: #if PARI_DOUBLE_FORMAT == 1
1.1 noro 2523: # define INDEX0 1
2524: # define INDEX1 0
1.2 ! noro 2525: #elif PARI_DOUBLE_FORMAT == 0
1.1 noro 2526: # define INDEX0 0
2527: # define INDEX1 1
2528: #endif
2529:
1.2 ! noro 2530: long
! 2531: expodb(double x)
! 2532: {
! 2533: union { double f; ulong i[2]; } fi;
! 2534: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
! 2535: const int exp_mid = 0x3ff;/* exponent bias */
! 2536: const int shift = mant_len-32;
! 2537:
! 2538: if (x==0.) return -1023;
! 2539: fi.f = x;
! 2540: {
! 2541: const ulong a = fi.i[INDEX0];
! 2542: return ((a & (HIGHBIT-1)) >> shift) - exp_mid;
! 2543: }
! 2544: }
! 2545:
1.1 noro 2546: GEN
2547: dbltor(double x)
2548: {
2549: GEN z;
2550: long e;
2551: union { double f; ulong i[2]; } fi;
2552: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2553: const int exp_mid = 0x3ff;/* exponent bias */
1.2 ! noro 2554: const int expo_len = 11; /* number of bits of exponent */
1.1 noro 2555: const int shift = mant_len-32;
2556:
1.2 ! noro 2557: if (x==0.) return realzero_bit(-1023);
! 2558: fi.f = x; z=cgetr(DEFAULTPREC);
1.1 noro 2559: {
2560: const ulong a = fi.i[INDEX0];
2561: const ulong b = fi.i[INDEX1];
2562: e = ((a & (HIGHBIT-1)) >> shift) - exp_mid;
2563: z[1] = evalexpo(e) | evalsigne(x<0? -1: 1);
2564: z[3] = b << expo_len;
2565: z[2] = HIGHBIT | b >> (BITS_IN_LONG-expo_len) | (a << expo_len);
2566: }
2567: return z;
2568: }
2569:
2570: double
2571: rtodbl(GEN x)
2572: {
2573: long ex,s=signe(x),lx=lg(x);
2574: ulong a,b,k;
2575: union { double f; ulong i[2]; } fi;
2576: const int mant_len = 52; /* mantissa bits (excl. hidden bit) */
2577: const int exp_mid = 0x3ff;/* exponent bias */
1.2 ! noro 2578: const int expo_len = 11; /* number of bits of exponent */
1.1 noro 2579: const int shift = mant_len-32;
2580:
2581: if (typ(x)==t_INT && !s) return 0.0;
2582: if (typ(x)!=t_REAL) err(typeer,"rtodbl");
2583: if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
2584:
2585: /* start by rounding to closest */
2586: a = x[2] & (HIGHBIT-1);
2587: if (lx > 3)
2588: {
2589: b = x[3] + 0x400UL; if (b < 0x400UL) a++;
2590: if (a & HIGHBIT) { ex++; a=0; }
2591: }
2592: else b = 0;
2593: if (ex > exp_mid) err(rtodber);
2594: ex += exp_mid;
2595: k = (a >> expo_len) | (ex << shift);
2596: if (s<0) k |= HIGHBIT;
2597: fi.i[INDEX0] = k;
2598: fi.i[INDEX1] = (a << (BITS_IN_LONG-expo_len)) | (b >> expo_len);
2599: return fi.f;
2600: }
1.2 ! noro 2601: #endif /* LONG_IS_64BIT */
1.1 noro 2602:
2603: /* Old cgiv without reference count (which was not used anyway)
2604: * Should be a macro.
2605: */
2606: void
2607: cgiv(GEN x)
2608: {
2609: if (x == (GEN) avma)
1.2 ! noro 2610: avma = (gpmem_t) (x+lg(x));
1.1 noro 2611: }
2612:
2613: /********************************************************************/
2614: /** **/
2615: /** INTEGER EXTENDED GCD (AND INVMOD) **/
2616: /** **/
2617: /********************************************************************/
2618:
2619: /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
2620: * in the context of trying to improve elliptic curve cryptosystem attacking
2621: * algorithms. 2001Jan02 -- added bezout() functionality.
2622: *
2623: * Two basic ideas - (1) avoid many integer divisions, especially when the
2624: * quotient is 1 (which happens more than 40% of the time). (2) Use Lehmer's
2625: * trick as modified by Jebelean of extracting a couple of words' worth of
2626: * leading bits from both operands, and compute partial quotients from them
2627: * as long as we can be sure of their values. The Jebelean modifications
2628: * consist in reliable inequalities from which we can decide fast whether
2629: * to carry on or to return to the outer loop, and in re-shifting after the
2630: * first word's worth of bits has been used up. All of this is described
2631: * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
2632: * quite right (the catch-up divisions needed when one partial quotient is
2633: * larger than a word are missing).
2634: *
2635: * The API consists of invmod() and bezout() below; the single-word routines
2636: * xgcduu and xxgcduu may be called directly if desired; lgcdii() probably
2637: * doesn't make much sense out of context.
2638: *
2639: * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
2640: * ptotically about a factor 2.5 .. 3, depending on processor architecture,
2641: * than the naive continued-division code. Unfortunately, thanks to the
2642: * unrolled loops and all, the code is a bit lengthy.
2643: */
2644:
2645: /*==================================
2646: * xgcduu(d,d1,f,v,v1,s)
2647: * xxgcduu(d,d1,f,u,u1,v,v1,s)
2648: * rgcduu(d,d1,vmax,u,u1,v,v1,s)
2649: *==================================*/
2650: /*
2651: * Fast `final' extended gcd algorithm, acting on two ulongs. Ideally this
2652: * should be replaced with assembler versions wherever possible. The present
2653: * code essentially does `subtract, compare, and possibly divide' at each step,
2654: * which is reasonable when hardware division (a) exists, (b) is a bit slowish
2655: * and (c) does not depend a lot on the operand values (as on i486). When
2656: * wordsize division is in fact an assembler routine based on subtraction,
2657: * this strategy may not be the most efficient one.
2658: *
2659: * xxgcduu() should be called with d > d1 > 0, returns gcd(d,d1), and assigns
2660: * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1] (i.e.,
2661: * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
2662: * the quotient of the first division step), and the information about the
2663: * implied signs to s (-1 when an odd number of divisions has been done,
2664: * 1 otherwise). xgcduu() is exactly the same except that u,u1 are not com-
2665: * puted (and not returned, of course).
2666: *
2667: * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
2668: * (so we can stop the chain division one step early: as soon as the remainder
2669: * equals 1). Use this when you intend to use only what would be v, or only
2670: * what would be u and v, after that final division step, but not u1 and v1.
2671: * With the flag in force and thus without that final step, the interesting
2672: * quantity/ies will still sit in [u1 and] v1, of course.
2673: *
2674: * For computing the inverse of a single-word INTMOD known to exist, pass f=1
2675: * to xgcduu(), and obtain the result from s and v1. (The routine does the
2676: * right thing when d1==1 already.) For finishing a multiword modinv known
2677: * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix (with
2678: * properly adjusted signs) onto the values v' and v1' previously obtained
2679: * from the multiword division steps. Actually, just take the scalar product
2680: * of [v',v1'] with [u1,-v1], and change the sign if s==-1. (If the final
2681: * step had been carried out, it would be [-u,v], and s would also change.)
2682: * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
2683: * Finally, f=0 with xxgcduu() is useful for Bezout computations.
2684: * [Harrumph. In the above prescription, the sign turns out to be precisely
2685: * wrong.]
2686: * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
2687: * make a difference when gcd(d,d1)>1. The speedup is negligible.)
2688: *
2689: * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
2690: * recover the final u,u1 given only v,v1 and s. However, it probably isn't
2691: * worthwhile, as it trades a few multiplications for a division.
2692: *
2693: * Note that these routines do not know and do not need to know about the
2694: * PARI stack.
1.2 ! noro 2695: *
1.1 noro 2696: * Added 2001Jan15:
2697: * rgcduu() is a variant of xxgcduu() which does not have f (the effect is
2698: * that of f=0), but instead has a ulong vmax parameter, for use in rational
2699: * reconstruction below. It returns when v1 exceeds vmax; v will never
2700: * exceed vmax. (vmax=0 is taken as a synonym of MAXULONG i.e. unlimited,
2701: * in which case rgcduu behaves exactly like xxgcduu with f=0.) The return
2702: * value of rgcduu() is typically meaningless; the interesting part is the
2703: * matrix.
2704: */
2705:
2706: ulong
2707: xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
2708: {
2709: ulong xv,xv1, xs, q,res;
2710: LOCAL_HIREMAINDER;
2711:
2712: /* The above blurb contained a lie. The main loop always stops when d1
2713: * has become equal to 1. If (d1 == 1 && !(f&1)) after the loop, we do
2714: * the final `division' of d by 1 `by hand' as it were.
2715: *
2716: * The loop has already been unrolled once. Aggressive optimization could
2717: * well lead to a totally unrolled assembler version...
2718: *
2719: * On modern x86 architectures, this loop is a pig anyway. The division
2720: * instruction always puts its result into the same pair of registers, and
2721: * we always want to use one of them straight away, so pipeline performance
2722: * will suck big time. An assembler version should probably do a first loop
2723: * computing and storing all the quotients -- their number is bounded in
2724: * advance -- and then assembling the matrix in a second pass. On other
2725: * architectures where we can cycle through four or so groups of registers
2726: * and exploit a fast ALU result-to-operand feedback path, this is much less
2727: * of an issue. (Intel sucks. See http://www.x86.org/ ...)
2728: */
2729: xs = res = 0;
2730: xv = 0UL; xv1 = 1UL;
2731: while (d1 > 1UL)
2732: {
2733: d -= d1; /* no need to use subll */
2734: if (d >= d1)
2735: {
2736: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
2737: xv += q * xv1;
2738: }
2739: else
2740: xv += xv1;
2741: /* possible loop exit */
2742: if (d <= 1UL) { xs=1; break; }
2743: /* repeat with inverted roles */
2744: d1 -= d;
2745: if (d1 >= d)
2746: {
2747: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
2748: xv1 += q * xv;
2749: }
2750: else
2751: xv1 += xv;
2752: } /* while */
2753:
2754: if (!(f&1)) /* division by 1 postprocessing if needed */
2755: {
2756: if (xs && d==1)
2757: { xv1 += d1 * xv; xs = 0; res = 1UL; }
2758: else if (!xs && d1==1)
2759: { xv += d * xv1; xs = 1; res = 1UL; }
2760: }
2761:
2762: if (xs)
2763: {
2764: *s = -1; *v = xv1; *v1 = xv;
2765: return (res ? res : (d==1 ? 1UL : d1));
2766: }
2767: else
2768: {
2769: *s = 1; *v = xv; *v1 = xv1;
2770: return (res ? res : (d1==1 ? 1UL : d));
2771: }
2772: }
2773:
2774:
2775: ulong
2776: xxgcduu(ulong d, ulong d1, int f,
2777: ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
2778: {
2779: ulong xu,xu1, xv,xv1, xs, q,res;
2780: LOCAL_HIREMAINDER;
2781:
2782: xs = res = 0;
2783: xu = xv1 = 1UL;
2784: xu1 = xv = 0UL;
2785: while (d1 > 1UL)
2786: {
2787: d -= d1; /* no need to use subll */
2788: if (d >= d1)
2789: {
2790: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
2791: xv += q * xv1;
2792: xu += q * xu1;
2793: }
2794: else
2795: { xv += xv1; xu += xu1; }
2796: /* possible loop exit */
2797: if (d <= 1UL) { xs=1; break; }
2798: /* repeat with inverted roles */
2799: d1 -= d;
2800: if (d1 >= d)
2801: {
2802: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
2803: xv1 += q * xv;
2804: xu1 += q * xu;
2805: }
2806: else
2807: { xv1 += xv; xu1 += xu; }
2808: } /* while */
2809:
2810: if (!(f&1)) /* division by 1 postprocessing if needed */
2811: {
2812: if (xs && d==1)
2813: {
2814: xv1 += d1 * xv;
2815: xu1 += d1 * xu;
2816: xs = 0; res = 1UL;
2817: }
2818: else if (!xs && d1==1)
2819: {
2820: xv += d * xv1;
2821: xu += d * xu1;
2822: xs = 1; res = 1UL;
2823: }
2824: }
2825:
2826: if (xs)
2827: {
2828: *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
2829: return (res ? res : (d==1 ? 1UL : d1));
2830: }
2831: else
2832: {
2833: *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
2834: return (res ? res : (d1==1 ? 1UL : d));
2835: }
2836: }
2837:
2838: ulong
2839: rgcduu(ulong d, ulong d1, ulong vmax,
2840: ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
2841: {
2842: ulong xu,xu1, xv,xv1, xs, q, res=0;
2843: int f = 0;
2844: LOCAL_HIREMAINDER;
2845:
2846: if (vmax == 0) vmax = MAXULONG;
2847: xs = res = 0;
2848: xu = xv1 = 1UL;
2849: xu1 = xv = 0UL;
2850: while (d1 > 1UL)
2851: {
2852: d -= d1; /* no need to use subll */
2853: if (d >= d1)
2854: {
2855: hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
2856: xv += q * xv1;
2857: xu += q * xu1;
2858: }
2859: else
2860: { xv += xv1; xu += xu1; }
2861: /* possible loop exit */
2862: if (xv > vmax) { f=xs=1; break; }
2863: if (d <= 1UL) { xs=1; break; }
2864: /* repeat with inverted roles */
2865: d1 -= d;
2866: if (d1 >= d)
2867: {
2868: hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
2869: xv1 += q * xv;
2870: xu1 += q * xu;
2871: }
2872: else
2873: { xv1 += xv; xu1 += xu; }
2874: /* possible loop exit */
2875: if (xv1 > vmax) { f=1; break; }
2876: } /* while */
2877:
2878: if (!(f&1)) /* division by 1 postprocessing if needed */
2879: {
2880: if (xs && d==1)
2881: {
2882: xv1 += d1 * xv;
2883: xu1 += d1 * xu;
2884: xs = 0; res = 1UL;
2885: }
2886: else if (!xs && d1==1)
2887: {
2888: xv += d * xv1;
2889: xu += d * xu1;
2890: xs = 1; res = 1UL;
2891: }
2892: }
2893:
2894: if (xs)
2895: {
2896: *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
2897: return (res ? res : (d==1 ? 1UL : d1));
2898: }
2899: else
2900: {
2901: *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
2902: return (res ? res : (d1==1 ? 1UL : d));
2903: }
2904: }
2905:
2906: /*==================================
2907: * lgcdii(d,d1,u,u1,v,v1,vmax)
2908: *==================================*/
2909: /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
2910: *
2911: * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
2912: * and a quantity of bits from d1 obtained by a shift of the same displacement,
2913: * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
2914: * the product of all the [0, 1; 1, q_j] thus obtained, where the leftmost
2915: * factor arises from the quotient of the first division step.
1.2 ! noro 2916: *
1.1 noro 2917: * For use in rational reconstruction, input param vmax can be given a
2918: * nonzero value. In this case, we will return early as soon as v1 > vmax
2919: * (i.e. it is guaranteed that v <= vmax). --2001Jan15
2920: *
2921: * MUST be called with d > d1 > 0, and with d occupying more than one
2922: * significant word (if it doesn't, the caller has no business with us;
2923: * he/she/it should use xgcduu() instead). Returns the number of reduction/
2924: * swap steps carried out, possibly zero, or under certain conditions minus
2925: * that number. When the return value is nonzero, the caller should use the
2926: * returned recurrence matrix to update its own copies of d,d1. When the
2927: * return value is non-positive, and the latest remainder after updating
2928: * turns out to be nonzero, the caller should at once attempt a full division,
2929: * rather than first trying lgcdii() again -- this typically happens when we
2930: * are about to encounter a quotient larger than half a word. (This is not
2931: * detected infallibly -- after a positive return value, it is perfectly
2932: * possible that the next stage will end up needing a full division. After
2933: * a negative return value, however, this is certain, and should be acted
2934: * upon.)
2935: *
2936: * (The sign information, for which xgcduu() has its return argument s, is now
2937: * implicit in the LSB of our return value, and the caller may take advantage
2938: * of the fact that a return value of +-1 implies u==0,u1==v==1 [only v1 pro-
2939: * vides interesting information in this case]. One might also use the fact
2940: * that if the return value is +-2, then u==1, but this is rather marginal.)
2941: *
2942: * If it was not possible to determine even the first quotient, either because
2943: * we're too close to an integer quotient or because the quotient would be
2944: * larger than one word (if the `leading digit' of d1 after shifting is all
2945: * zeros), we return 0 and do not bother to assign anything to the last four
2946: * args.
2947: *
2948: * The division chain might (sometimes) even run to completion. It will be
2949: * up to the caller to detect this case.
2950: *
2951: * This routine does _not_ change d or d1; this will also be up to the caller.
2952: *
2953: * Note that this routine does not know and does not need to know about the
2954: * PARI stack.
2955: */
2956: /*#define DEBUG_LEHMER 1 */
2957: int
2958: lgcdii(ulong* d, ulong* d1,
2959: ulong* u, ulong* u1, ulong* v, ulong* v1,
2960: ulong vmax)
2961: {
2962: /* Strategy: (1) Extract/shift most significant bits. We assume that d
2963: * has at least two significant words, but we can cope with a one-word d1.
2964: * Let dd,dd1 be the most significant dividend word and matching part of the
2965: * divisor.
2966: * (2) Check for overflow on the first division. For our purposes, this
2967: * happens when the upper half of dd1 is zero. (Actually this is detected
2968: * during extraction.)
2969: * (3) Get a fix on the first quotient. We compute q = floor(dd/dd1), which
2970: * is an upper bound for floor(d/d1), and which gives the true value of the
2971: * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
2972: * (If it isn't, we give up. This is annoying because the subsequent full
2973: * division will repeat some work already done, but it happens very infre-
2974: * quently. Doing the extra-bit-fetch in this case would be awkward.)
2975: * (4) Finish initializations.
2976: *
2977: * The remainder of the action is comparatively boring... The main loop has
2978: * been unrolled once (so we don't swap things and we can apply Jebelean's
2979: * termination conditions which alternatingly take two different forms during
2980: * successive iterations). When we first run out of sufficient bits to form
2981: * a quotient, and have an extra word of each operand, we pull out two whole
2982: * word's worth of dividend bits, and divisor bits of matching significance;
2983: * to these we apply our partial matrix (disregarding overflow because the
2984: * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
2985: * re-extract one word's worth of the current dividend and a matching amount
2986: * of divisor bits. The affair will normally terminate with matrix entries
2987: * just short of a whole word. (We terminate the inner loop before these can
2988: * possibly overflow.)
2989: */
2990: ulong dd,dd1,ddlo,dd1lo, sh,shc; /* `digits', shift count */
2991: ulong xu,xu1, xv,xv1, q,res; /* recurrences, partial quotient, count */
2992: ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
2993: long ld, ld1, lz; /* t_INT effective lengths */
2994: int skip = 0; /* a boolean flag */
2995: LOCAL_OVERFLOW;
2996: LOCAL_HIREMAINDER;
2997:
2998: #ifdef DEBUG_LEHMER
2999: voir(d, -1); voir(d1, -1);
3000: #endif
3001: /* following is just for convenience: vmax==0 means no bound */
3002: if (vmax == 0) vmax = MAXULONG;
3003: ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
3004: if (lz > 1) return 0; /* rare, quick and desperate exit */
3005:
3006: d += 2; d1 += 2; /* point at the leading `digits' */
3007: dd1lo = 0; /* unless we find something better */
3008: sh = bfffo(*d); /* obtain dividend left shift count */
3009:
3010: if (sh)
3011: { /* do the shifting */
3012: shc = BITS_IN_LONG - sh;
3013: if (lz)
3014: { /* dividend longer than divisor */
3015: dd1 = (*d1 >> shc);
3016: if (!(HIGHMASK & dd1)) return 0; /* overflow detected */
3017: if (ld1 > 3)
3018: dd1lo = (*d1 << sh) + (d1[1] >> shc);
3019: else
3020: dd1lo = (*d1 << sh);
3021: }
3022: else
3023: { /* dividend and divisor have the same length */
3024: /* dd1 = shiftl(d1,sh) would have left hiremainder==0, and dd1 != 0. */
3025: dd1 = (*d1 << sh);
3026: if (!(HIGHMASK & dd1)) return 0;
3027: if (ld1 > 3)
3028: {
3029: dd1 += (d1[1] >> shc);
3030: if (ld1 > 4)
3031: dd1lo = (d1[1] << sh) + (d1[2] >> shc);
3032: else
3033: dd1lo = (d1[1] << sh);
3034: }
3035: }
3036: /* following lines assume d to have 2 or more significant words */
3037: dd = (*d << sh) + (d[1] >> shc);
3038: if (ld > 4)
3039: ddlo = (d[1] << sh) + (d[2] >> shc);
3040: else
3041: ddlo = (d[1] << sh);
3042: }
3043: else
3044: { /* no shift needed */
3045: if (lz) return 0; /* div'd longer than div'r: o'flow automatic */
3046: dd1 = *d1;
3047: if (!(HIGHMASK & dd1)) return 0;
3048: if(ld1 > 3) dd1lo = d1[1];
3049: /* assume again that d has another significant word */
3050: dd = *d; ddlo = d[1];
3051: }
3052: #ifdef DEBUG_LEHMER
3053: fprintferr(" %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
3054: #endif
3055:
3056: /* First subtraction/division stage. (If a subtraction initially suffices,
3057: * we don't divide at all.) If a Jebelean condition is violated, and we
3058: * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
3059: * give up and ask for a full division. Otherwise we commit the result,
3060: * possibly deciding to re-shift immediately afterwards.
3061: */
3062: dd -= dd1;
3063: if (dd < dd1)
3064: { /* first quotient known to be == 1 */
3065: xv1 = 1UL;
3066: if (!dd) /* !(Jebelean condition), extraspecial case */
3067: { /* note this can actually happen... Now
3068: * q==1 is known, but we underflow already.
3069: * OTOH we've just shortened d by a whole word.
3070: * Thus we feel pleased with ourselves and
3071: * return. (The re-shift code below would
3072: * do so anyway.) */
3073: *u = 0; *v = *u1 = *v1 = 1UL;
3074: return -1; /* Next step will be a full division. */
3075: } /* if !(Jebelean) then */
3076: }
3077: else
3078: { /* division indicated */
3079: hiremainder = 0;
3080: xv1 = 1 + divll(dd, dd1); /* xv1: alternative spelling of `q', here ;) */
3081: dd = hiremainder;
3082: if (dd < xv1) /* !(Jebelean cond'), non-extra special case */
3083: { /* Attempt to complete the division using the
3084: * less significant bits, before skipping right
3085: * past the 1st loop to the reshift stage. */
3086: ddlo = subll(ddlo, mulll(xv1, dd1lo));
3087: dd = subllx(dd, hiremainder);
3088:
3089: /* If we now have an overflow, q was _certainly_ too large. Thanks to
3090: * our decision not to get here unless the original dd1 had bits set in
3091: * the upper half of the word, however, we now do know that the correct
3092: * quotient is in fact q-1. Adjust our data accordingly. */
3093: if (overflow)
3094: {
3095: xv1--;
3096: ddlo = addll(ddlo,dd1lo);
3097: dd = addllx(dd,dd1); /* overflows again which cancels the borrow */
3098: /* ...and fall through to skip=1 below */
3099: }
3100: else
3101: /* Test Jebelean condition anew, at this point using _all_ the extracted
3102: * bits we have. This is clutching at straws; we have a more or less
3103: * even chance of succeeding this time. Note that if we fail, we really
3104: * do not know whether the correct quotient would have been q or some
3105: * smaller value. */
3106: if (!dd && ddlo < xv1) return 0;
3107:
3108: /* Otherwise, we now know that q is correct, but we cannot go into the
3109: * 1st loop. Raise a flag so we'll remember to skip past the loop.
3110: * Get here also after the q-1 adjustment case. */
3111: skip = 1;
3112: } /* if !(Jebelean) then */
3113: }
3114: res = 1;
3115: #ifdef DEBUG_LEHMER
3116: fprintferr(" q = %ld, %lx, %lx\n", xv1, dd1, dd);
3117: #endif
3118: if (xv1 > vmax)
3119: { /* gone past the bound already */
3120: *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
3121: return res;
3122: }
3123: xu = 0UL; xv = xu1 = 1UL;
3124:
3125: /* Some invariants from here across the first loop:
3126: *
3127: * At this point, and again after we are finished with the first loop and
3128: * subsequent conditional, a division and the associated update of the
3129: * recurrence matrix have just been carried out completely. The matrix
3130: * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
3131: * columns), and the current remainder == next divisor (dd at the moment)
3132: * is nonzero (it might be zero here, but then skip will have been set).
3133: *
3134: * After the first loop, or when skip is set already, it will also be the
3135: * case that there aren't sufficiently many bits to continue without re-
3136: * shifting. If the divisor after reshifting is zero, or indeed if it
3137: * doesn't have more than half a word's worth of bits, we will have to
3138: * return at that point. Otherwise, we proceed into the second loop.
3139: *
3140: * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
3141: * already reflect the result of applying the current matrix to the old
3142: * ddorig:ddlo and dd1orig:dd1lo. (For the first iteration above, this
3143: * was easy to achieve, and we didn't even need to peek into the (now
3144: * no longer existent!) saved words. After the loop, we'll stop for a
3145: * moment to merge in the ddlo,dd1lo contributions.)
3146: *
3147: * Note that after the first division, even an a priori quotient of 1 cannot
3148: * be trusted until we've checked Jebelean's condition -- it cannot be too
3149: * large, of course, but it might be too small.
3150: */
3151:
3152: if (!skip)
3153: {
3154: for(;;)
3155: {
3156: /* First half of loop divides dd into dd1, and leaves the recurrence
3157: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
3158: * entries) when successful. */
3159: tmpd = dd1 - dd;
3160: if (tmpd < dd)
3161: { /* quotient suspected to be 1 */
3162: #ifdef DEBUG_LEHMER
3163: q = 1;
3164: #endif
3165: tmpu = xu + xu1; /* cannot overflow -- everything bounded by
3166: * the original dd during first loop */
3167: tmpv = xv + xv1;
3168: }
3169: else
3170: { /* division indicated */
3171: hiremainder = 0;
3172: q = 1 + divll(tmpd, dd);
3173: tmpd = hiremainder;
3174: tmpu = xu + q*xu1; /* can't overflow, but may need to be undone */
3175: tmpv = xv + q*xv1;
3176: }
3177:
3178: tmp0 = addll(tmpv, xv1);
3179: if ((tmpd < tmpu) || overflow ||
3180: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
3181: break; /* skip ahead to reshift stage */
3182: else
3183: { /* commit dd1, xu, xv */
3184: res++;
3185: dd1 = tmpd; xu = tmpu; xv = tmpv;
3186: #ifdef DEBUG_LEHMER
3187: fprintferr(" q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
3188: q, dd, dd1, xu1, xu, xv1, xv);
3189: #endif
3190: if (xv > vmax)
3191: { /* time to return */
3192: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3193: return res;
3194: }
3195: }
3196:
3197: /* Second half of loop divides dd1 into dd, and the matrix returns to its
3198: * normal arrangement. */
3199: tmpd = dd - dd1;
3200: if (tmpd < dd1)
3201: { /* quotient suspected to be 1 */
3202: #ifdef DEBUG_LEHMER
3203: q = 1;
3204: #endif
3205: tmpu = xu1 + xu; /* cannot overflow */
3206: tmpv = xv1 + xv;
3207: }
3208: else
3209: { /* division indicated */
3210: hiremainder = 0;
3211: q = 1 + divll(tmpd, dd1);
3212: tmpd = hiremainder;
3213: tmpu = xu1 + q*xu;
3214: tmpv = xv1 + q*xv;
3215: }
3216:
3217: tmp0 = addll(tmpu, xu);
3218: if ((tmpd < tmpv) || overflow ||
3219: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
3220: break; /* skip ahead to reshift stage */
3221: else
3222: { /* commit dd, xu1, xv1 */
3223: res++;
3224: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
3225: #ifdef DEBUG_LEHMER
3226: fprintferr(" q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
3227: q, dd1, dd, xu, xu1, xv, xv1);
3228: #endif
3229: if (xv1 > vmax)
3230: { /* time to return */
3231: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3232: return res;
3233: }
3234: }
3235:
3236: } /* end of first loop */
3237:
3238: /* Intermezzo: update dd:ddlo, dd1:dd1lo. (But not if skip is set.) */
3239:
3240: if (res&1)
3241: {
3242: /* after failed division in 1st half of loop:
3243: * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
3244: * * [ -xu, xu1 ; xv, -xv1 ]
3245: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
3246: * add the high-order remainders + overflows onto [dd1,dd].)
3247: */
3248: tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
3249: tmp1 = subll(mulll(dd1lo,xv), tmp1);
3250: dd1 += subllx(hiremainder, tmp0);
3251: tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
3252: ddlo = subll(tmp2, mulll(dd1lo,xv1));
3253: dd += subllx(tmp0, hiremainder);
3254: dd1lo = tmp1;
3255: }
3256: else
3257: {
3258: /* after failed division in 2nd half of loop:
3259: * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
3260: * * [ xu1, -xu ; -xv1, xv ]
3261: * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
3262: * add the high-order remainders + overflows onto [dd,dd1].)
3263: */
3264: tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
3265: tmp1 = subll(tmp1, mulll(dd1lo,xv1));
3266: dd += subllx(tmp0, hiremainder);
3267: tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
3268: dd1lo = subll(mulll(dd1lo,xv), tmp2);
3269: dd1 += subllx(hiremainder, tmp0);
3270: ddlo = tmp1;
3271: }
3272: #ifdef DEBUG_LEHMER
3273: fprintferr(" %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
3274: #endif
3275: } /* end of skip-pable section: get here also, with res==1, when there
3276: * was a problem immediately after the very first division. */
3277:
3278: /* Re-shift. Note: the shift count _can_ be zero, viz. under the following
3279: * precise conditions: The original dd1 had its topmost bit set, so the 1st
3280: * q was 1, and after subtraction, dd had its topmost bit unset. If now
3281: * dd==0, we'd have taken the return exit already, so we couldn't have got
3282: * here. If not, then it must have been the second division which has gone
3283: * amiss (because dd1 was very close to an exact multiple of the remainder
3284: * dd value, so this will be very rare). At this point, we'd have a fairly
3285: * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
3286: * this is not guaranteed to work. Instead of trying, we return at once.
3287: * The caller will see to it that the initial subtraction is re-done using
3288: * _all_ the bits of both operands, which already helps, and the next round
3289: * will either be a full division (if dd occupied a halfword or less), or
3290: * another llgcdii() first step. In the latter case, since we try a little
3291: * harder during our first step, we may actually be able to fix the problem,
3292: * and get here again with improved low-order bits and with another step
3293: * under our belt. Otherwise we'll have given up above and forced a full-
3294: * blown division.
3295: *
3296: * If res is even, the shift count _cannot_ be zero. (The first step forces
3297: * a zero into the remainder's MSB, and all subsequent remainders will have
3298: * inherited it.)
3299: *
3300: * The re-shift stage exits if the next divisor has at most half a word's
3301: * worth of bits.
3302: *
3303: * For didactic reasons, the second loop will be arranged in the same way
3304: * as the first -- beginning with the division of dd into dd1, as if res
3305: * was odd. To cater for this, if res is actually even, we swap things
3306: * around during reshifting. (During the second loop, the parity of res
3307: * does not matter; we know in which half of the loop we are when we decide
3308: * to return.)
3309: */
3310: #ifdef DEBUG_LEHMER
3311: fprintferr("(sh)");
3312: #endif
3313:
3314: if (res&1)
3315: { /* after odd number of division(s) */
3316: if (dd1 && (sh = bfffo(dd1)))
3317: {
3318: shc = BITS_IN_LONG - sh;
3319: dd = (ddlo >> shc) + (dd << sh);
3320: if (!(HIGHMASK & dd))
3321: {
3322: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3323: return -res; /* full division asked for */
3324: }
3325: dd1 = (dd1lo >> shc) + (dd1 << sh);
3326: }
3327: else
3328: { /* time to return: <= 1 word left, or sh==0 */
3329: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3330: return res;
3331: }
3332: }
3333: else
3334: { /* after even number of divisions */
3335: if (dd)
3336: {
3337: sh = bfffo(dd); /* Known to be positive. */
3338: shc = BITS_IN_LONG - sh;
3339: /* dd:ddlo will become the new dd1, and v.v. */
3340: tmpd = (ddlo >> shc) + (dd << sh);
3341: dd = (dd1lo >> shc) + (dd1 << sh);
3342: dd1 = tmpd;
3343: /* This has completed the swap; now dd is again the current divisor.
3344: * The following test originally inspected dd1 -- a most subtle and
3345: * most annoying bug. The Management. */
3346: if (HIGHMASK & dd)
3347: {
3348: /* recurrence matrix is the wrong way round; swap it. */
3349: tmp0 = xu; xu = xu1; xu1 = tmp0;
3350: tmp0 = xv; xv = xv1; xv1 = tmp0;
3351: }
3352: else
3353: {
3354: /* recurrence matrix is the wrong way round; fix this. */
3355: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3356: return -res; /* full division asked for */
3357: }
3358: }
3359: else
3360: { /* time to return: <= 1 word left */
3361: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3362: return res;
3363: }
3364: } /* end reshift */
3365:
3366: #ifdef DEBUG_LEHMER
3367: fprintferr(" %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
3368: #endif
3369:
3370: /* The Second Loop. Rip-off of the first, but we now check for overflow
3371: * in the recurrences. Returns instead of breaking when we cannot fix the
3372: * quotient any longer.
3373: */
3374:
3375: for(;;)
3376: {
3377: /* First half of loop divides dd into dd1, and leaves the recurrence
3378: * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
3379: * entries) when successful. */
3380: tmpd = dd1 - dd;
3381: if (tmpd < dd)
3382: { /* quotient suspected to be 1 */
3383: #ifdef DEBUG_LEHMER
3384: q = 1;
3385: #endif
3386: tmpu = xu + xu1;
3387: tmpv = addll(xv, xv1); /* xv,xv1 will overflow first */
3388: tmp1 = overflow;
3389: }
3390: else
3391: { /* division indicated */
3392: hiremainder = 0;
3393: q = 1 + divll(tmpd, dd);
3394: tmpd = hiremainder;
3395: tmpu = xu + q*xu1;
3396: tmpv = addll(xv, mulll(q,xv1));
3397: tmp1 = overflow | hiremainder;
3398: }
3399:
3400: tmp0 = addll(tmpv, xv1);
3401: if ((tmpd < tmpu) || overflow || tmp1 ||
3402: (dd - tmpd < tmp0)) /* !(Jebelean cond.) */
3403: {
3404: /* The recurrence matrix has not yet been warped... */
3405: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3406: break;
3407: }
3408: /* commit dd1, xu, xv */
3409: res++;
3410: dd1 = tmpd; xu = tmpu; xv = tmpv;
3411: #ifdef DEBUG_LEHMER
3412: fprintferr(" q = %ld, %lx, %lx\n", q, dd, dd1);
3413: #endif
3414: if (xv > vmax)
3415: { /* time to return */
3416: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3417: return res;
3418: }
3419:
3420: /* Second half of loop divides dd1 into dd, and the matrix returns to its
3421: * normal arrangement. */
3422: tmpd = dd - dd1;
3423: if (tmpd < dd1)
3424: { /* quotient suspected to be 1 */
3425: #ifdef DEBUG_LEHMER
3426: q = 1;
3427: #endif
3428: tmpu = xu1 + xu;
3429: tmpv = addll(xv1, xv);
3430: tmp1 = overflow;
3431: }
3432: else
3433: { /* division indicated */
3434: hiremainder = 0;
3435: q = 1 + divll(tmpd, dd1);
3436: tmpd = hiremainder;
3437: tmpu = xu1 + q*xu;
3438: tmpv = addll(xv1, mulll(q, xv));
3439: tmp1 = overflow | hiremainder;
3440: }
3441:
3442: tmp0 = addll(tmpu, xu);
3443: if ((tmpd < tmpv) || overflow || tmp1 ||
3444: (dd1 - tmpd < tmp0)) /* !(Jebelean cond.) */
3445: {
3446: /* The recurrence matrix has not yet been unwarped, so it is
3447: * the wrong way round; fix this. */
3448: *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
3449: break;
3450: }
1.2 ! noro 3451:
1.1 noro 3452: res++; /* commit dd, xu1, xv1 */
3453: dd = tmpd; xu1 = tmpu; xv1 = tmpv;
3454: #ifdef DEBUG_LEHMER
3455: fprintferr(" q = %ld, %lx, %lx\n", q, dd1, dd);
3456: #endif
3457: if (xv1 > vmax)
3458: { /* time to return */
3459: *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
3460: return res;
3461: }
3462: } /* end of second loop */
3463:
3464: return res;
3465: }
3466:
3467: /*==================================
3468: * invmod(a,b,res)
3469: *==================================
3470: * If a is invertible, return 1, and set res = a^{ -1 }
3471: * Otherwise, return 0, and set res = gcd(a,b)
1.2 ! noro 3472: *
1.1 noro 3473: * This is sufficiently different from bezout() to be implemented separately
3474: * instead of having a bunch of extra conditionals in a single function body
3475: * to meet both purposes.
3476: */
3477:
3478: int
3479: invmod(GEN a, GEN b, GEN *res)
3480: {
3481: GEN v,v1,d,d1,q,r;
1.2 ! noro 3482: gpmem_t av, av1, lim;
1.1 noro 3483: long s;
3484: ulong g;
3485: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
3486: int lhmres; /* Lehmer stage return value */
3487:
3488: if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
3489: if (!signe(b)) { *res=absi(a); return 0; }
3490: av = avma;
3491: if (lgefint(b) == 3) /* single-word affair */
3492: {
3493: d1 = modiu(a, (ulong)(b[2]));
3494: if (d1 == gzero)
3495: {
3496: if (b[2] == 1L)
3497: { *res = gzero; return 1; }
3498: else
3499: { *res = absi(b); return 0; }
3500: }
3501: g = xgcduu((ulong)(b[2]), (ulong)(d1[2]), 1, &xv, &xv1, &s);
3502: #ifdef DEBUG_LEHMER
3503: fprintferr(" <- %lu,%lu\n", (ulong)(b[2]), (ulong)(d1[2]));
3504: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
3505: #endif
3506: avma = av;
3507: if (g != 1UL) { *res = utoi(g); return 0; }
3508: xv = xv1 % (ulong)(b[2]); if (s < 0) xv = ((ulong)(b[2])) - xv;
3509: *res = utoi(xv); return 1;
3510: }
3511:
3512: (void)new_chunk(lgefint(b));
3513: d = absi(b); d1 = modii(a,d);
3514:
3515: v=gzero; v1=gun; /* general case */
3516: #ifdef DEBUG_LEHMER
3517: fprintferr("INVERT: -------------------------\n");
3518: output(d1);
3519: #endif
3520: av1 = avma; lim = stack_lim(av,1);
3521:
3522: while (lgefint(d) > 3 && signe(d1))
3523: {
3524: #ifdef DEBUG_LEHMER
3525: fprintferr("Calling Lehmer:\n");
3526: #endif
3527: lhmres = lgcdii((ulong*)d, (ulong*)d1, &xu, &xu1, &xv, &xv1, MAXULONG);
3528: if (lhmres != 0) /* check progress */
3529: { /* apply matrix */
3530: #ifdef DEBUG_LEHMER
3531: fprintferr("Lehmer returned %d [%lu,%lu;%lu,%lu].\n",
3532: lhmres, xu, xu1, xv, xv1);
3533: #endif
3534: if ((lhmres == 1) || (lhmres == -1))
3535: {
3536: if (xv1 == 1)
3537: {
3538: r = subii(d,d1); d=d1; d1=r;
3539: a = subii(v,v1); v=v1; v1=a;
3540: }
3541: else
3542: {
3543: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
3544: a = subii(v, mului(xv1,v1)); v=v1; v1=a;
3545: }
3546: }
3547: else
3548: {
3549: r = subii(muliu(d,xu), muliu(d1,xv));
3550: a = subii(muliu(v,xu), muliu(v1,xv));
3551: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
3552: v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
3553: if (lhmres&1)
3554: {
3555: setsigne(d,-signe(d));
3556: setsigne(v,-signe(v));
3557: }
3558: else
3559: {
3560: if (signe(d1)) { setsigne(d1,-signe(d1)); }
3561: setsigne(v1,-signe(v1));
3562: }
3563: }
3564: }
3565: #ifdef DEBUG_LEHMER
3566: else
3567: fprintferr("Lehmer returned 0.\n");
3568: output(d); output(d1); output(v); output(v1);
3569: sleep(1);
3570: #endif
3571:
3572: if (lhmres <= 0 && signe(d1))
3573: {
3574: q = dvmdii(d,d1,&r);
3575: #ifdef DEBUG_LEHMER
3576: fprintferr("Full division:\n");
3577: printf(" q = "); output(q); sleep (1);
3578: #endif
3579: a = subii(v,mulii(q,v1));
3580: v=v1; v1=a;
3581: d=d1; d1=r;
3582: }
3583: if (low_stack(lim, stack_lim(av,1)))
3584: {
3585: GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
3586: if(DEBUGMEM>1) err(warnmem,"invmod");
3587: gerepilemany(av1,gptr,4);
3588: }
3589: } /* end while */
3590:
3591: /* Postprocessing - final sprint */
3592: if (signe(d1))
3593: {
3594: /* Assertions: lgefint(d)==lgefint(d1)==3, and
3595: * gcd(d,d1) is nonzero and fits into one word
3596: */
3597: g = xxgcduu((ulong)(d[2]), (ulong)(d1[2]), 1, &xu, &xu1, &xv, &xv1, &s);
3598: #ifdef DEBUG_LEHMER
3599: output(d);output(d1);output(v);output(v1);
3600: fprintferr(" <- %lu,%lu\n", (ulong)(d[2]), (ulong)(d1[2]));
3601: fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
3602: #endif
3603: if (g != 1UL) { avma = av; *res = utoi(g); return 0; }
3604: /* (From the xgcduu() blurb:)
3605: * For finishing the multiword modinv, we now have to multiply the
3606: * returned matrix (with properly adjusted signs) onto the values
3607: * v' and v1' previously obtained from the multiword division steps.
3608: * Actually, it is sufficient to take the scalar product of [v',v1']
3609: * with [u1,-v1], and change the sign if s==1.
3610: */
3611: v = subii(muliu(v,xu1),muliu(v1,xv1));
3612: if (s > 0) setsigne(v,-signe(v));
3613: avma = av; *res = modii(v,b);
3614: #ifdef DEBUG_LEHMER
3615: output(*res); fprintfderr("============================Done.\n");
3616: sleep(1);
3617: #endif
3618: return 1;
3619: }
3620: /* get here when the final sprint was skipped (d1 was zero already) */
3621: avma = av;
3622: if (!egalii(d,gun)) { *res = icopy(d); return 0; }
3623: *res = modii(v,b);
3624: #ifdef DEBUG_LEHMER
3625: output(*res); fprintferr("============================Done.\n");
3626: sleep(1);
3627: #endif
3628: return 1;
3629: }
3630:
3631: /*==================================
3632: * bezout(a,b,pu,pv)
3633: *==================================
3634: * Return g = gcd(a,b) >= 0, and assign GENs u,v through pointers pu,pv
3635: * such that g = u*a + v*b.
3636: * Special cases:
3637: * a == b == 0 ==> pick u=1, v=0
3638: * a != 0 == b ==> keep v=0
3639: * a == 0 != b ==> keep u=0
3640: * |a| == |b| != 0 ==> keep u=0, set v=+-1
3641: * Assignments through pu,pv will be suppressed when the corresponding
3642: * pointer is NULL (but the computations will happen nonetheless).
3643: */
3644:
3645: GEN
3646: bezout(GEN a, GEN b, GEN *pu, GEN *pv)
3647: {
3648: GEN t,u,u1,v,v1,d,d1,q,r;
3649: GEN *pt;
1.2 ! noro 3650: gpmem_t av, av1, lim;
1.1 noro 3651: long s;
3652: int sa, sb;
3653: ulong g;
3654: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
3655: int lhmres; /* Lehmer stage return value */
3656:
3657: if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
3658: s = absi_cmp(a,b);
3659: if (s < 0)
3660: {
3661: t=b; b=a; a=t;
3662: pt=pu; pu=pv; pv=pt;
3663: }
3664: /* now |a| >= |b| */
3665:
3666: sa = signe(a); sb = signe(b);
3667: if (!sb)
3668: {
3669: if (pv != NULL) *pv = gzero;
3670: switch(sa)
3671: {
3672: case 0:
3673: if (pu != NULL) *pu = gun; return gzero;
3674: case 1:
3675: if (pu != NULL) *pu = gun; return icopy(a);
3676: case -1:
3677: if (pu != NULL) *pu = negi(gun); return(negi(a));
3678: }
3679: }
3680: if (s == 0) /* |a| == |b| != 0 */
3681: {
3682: if (pu != NULL) *pu = gzero;
3683: if (sb > 0)
3684: {
3685: if (pv != NULL) *pv = gun; return icopy(b);
3686: }
3687: else
3688: {
3689: if (pv != NULL) *pv = negi(gun); return(negi(b));
3690: }
3691: }
3692: /* now |a| > |b| > 0 */
3693:
3694: if (lgefint(a) == 3) /* single-word affair */
3695: {
3696: g = xxgcduu((ulong)(a[2]), (ulong)(b[2]), 0, &xu, &xu1, &xv, &xv1, &s);
3697: sa = s > 0 ? sa : -sa;
3698: sb = s > 0 ? -sb : sb;
3699: if (pu != NULL)
3700: {
3701: if (xu == 0) *pu = gzero; /* can happen when b divides a */
3702: else if (xu == 1) *pu = sa < 0 ? negi(gun) : gun;
3703: else if (xu == 2) *pu = sa < 0 ? negi(gdeux) : gdeux;
3704: else
3705: {
3706: *pu = cgeti(3);
3707: (*pu)[1] = evalsigne(sa)|evallgefint(3);
3708: (*pu)[2] = xu;
3709: }
3710: }
3711: if (pv != NULL)
3712: {
3713: if (xv == 1) *pv = sb < 0 ? negi(gun) : gun;
3714: else if (xv == 2) *pv = sb < 0 ? negi(gdeux) : gdeux;
3715: else
3716: {
3717: *pv = cgeti(3);
3718: (*pv)[1] = evalsigne(sb)|evallgefint(3);
3719: (*pv)[2] = xv;
3720: }
3721: }
3722: if (g == 1) return gun;
3723: else if (g == 2) return gdeux;
3724: else
3725: {
3726: r = cgeti(3);
3727: r[1] = evalsigne(1)|evallgefint(3);
3728: r[2] = g;
3729: return r;
3730: }
3731: }
3732:
3733: /* general case */
3734: av = avma;
3735: (void)new_chunk(lgefint(b) + (lgefint(a)<<1)); /* room for u,v,gcd */
3736: /* if a is significantly larger than b, calling lgcdii() is not the best
3737: * way to start -- reduce a mod b first
3738: */
3739: if (lgefint(a) > lgefint(b))
3740: {
3741: d = absi(b), q = dvmdii(absi(a), d, &d1);
3742: if (!signe(d1)) /* a == qb */
3743: {
3744: avma = av;
3745: if (pu != NULL) *pu = gzero;
3746: if (pv != NULL) *pv = sb < 0 ? negi(gun) : gun;
3747: return (icopy(d));
3748: }
3749: else
3750: {
3751: u = gzero;
3752: u1 = v = gun;
3753: v1 = negi(q);
3754: }
3755: /* if this results in lgefint(d) == 3, will fall past main loop */
3756: }
3757: else
3758: {
3759: d = absi(a); d1 = absi(b);
3760: u = v1 = gun; u1 = v = gzero;
3761: }
3762: av1 = avma; lim = stack_lim(av, 1);
3763:
3764: /* main loop is almost identical to that of invmod() */
3765: while (lgefint(d) > 3 && signe(d1))
3766: {
3767: lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, MAXULONG);
3768: if (lhmres != 0) /* check progress */
3769: { /* apply matrix */
3770: if ((lhmres == 1) || (lhmres == -1))
3771: {
3772: if (xv1 == 1)
3773: {
3774: r = subii(d,d1); d=d1; d1=r;
3775: a = subii(u,u1); u=u1; u1=a;
3776: a = subii(v,v1); v=v1; v1=a;
3777: }
3778: else
3779: {
3780: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
3781: a = subii(u, mului(xv1,u1)); u=u1; u1=a;
3782: a = subii(v, mului(xv1,v1)); v=v1; v1=a;
3783: }
3784: }
3785: else
3786: {
3787: r = subii(muliu(d,xu), muliu(d1,xv));
3788: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
3789: a = subii(muliu(u,xu), muliu(u1,xv));
3790: u1 = subii(muliu(u,xu1), muliu(u1,xv1)); u = a;
3791: a = subii(muliu(v,xu), muliu(v1,xv));
3792: v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
3793: if (lhmres&1)
3794: {
3795: setsigne(d,-signe(d));
3796: setsigne(u,-signe(u));
3797: setsigne(v,-signe(v));
3798: }
3799: else
3800: {
3801: if (signe(d1)) { setsigne(d1,-signe(d1)); }
3802: setsigne(u1,-signe(u1));
3803: setsigne(v1,-signe(v1));
3804: }
3805: }
3806: }
3807: if (lhmres <= 0 && signe(d1))
3808: {
3809: q = dvmdii(d,d1,&r);
3810: #ifdef DEBUG_LEHMER
3811: fprintferr("Full division:\n");
3812: printf(" q = "); output(q); sleep (1);
3813: #endif
3814: a = subii(u,mulii(q,u1));
3815: u=u1; u1=a;
3816: a = subii(v,mulii(q,v1));
3817: v=v1; v1=a;
3818: d=d1; d1=r;
3819: }
3820: if (low_stack(lim, stack_lim(av,1)))
3821: {
3822: GEN *gptr[6]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&u; gptr[3]=&u1;
3823: gptr[4]=&v; gptr[5]=&v1;
3824: if(DEBUGMEM>1) err(warnmem,"bezout");
3825: gerepilemany(av1,gptr,6);
3826: }
3827: } /* end while */
3828:
3829: /* Postprocessing - final sprint */
3830: if (signe(d1))
3831: {
3832: /* Assertions: lgefint(d)==lgefint(d1)==3, and
3833: * gcd(d,d1) is nonzero and fits into one word
3834: */
3835: g = xxgcduu((ulong)(d[2]), (ulong)(d1[2]), 0, &xu, &xu1, &xv, &xv1, &s);
3836: u = subii(muliu(u,xu), muliu(u1, xv));
3837: v = subii(muliu(v,xu), muliu(v1, xv));
3838: if (s < 0) { sa = -sa; sb = -sb; }
3839: avma = av;
3840: if (pu != NULL) *pu = sa < 0 ? negi(u) : icopy(u);
3841: if (pv != NULL) *pv = sb < 0 ? negi(v) : icopy(v);
3842: if (g == 1) return gun;
3843: else if (g == 2) return gdeux;
3844: else
3845: {
3846: r = cgeti(3);
3847: r[1] = evalsigne(1)|evallgefint(3);
3848: r[2] = g;
3849: return r;
3850: }
3851: }
3852: /* get here when the final sprint was skipped (d1 was zero already).
3853: * Now the matrix is final, and d contains the gcd.
3854: */
3855: avma = av;
3856: if (pu != NULL) *pu = sa < 0 ? negi(u) : icopy(u);
3857: if (pv != NULL) *pv = sb < 0 ? negi(v) : icopy(v);
3858: return icopy(d);
3859: }
3860:
3861: /*==================================
3862: * cbezout(a,b,uu,vv)
3863: *==================================
3864: * Same as bezout() but for C longs.
3865: * Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
3866: * such that g = u*a + v*b.
3867: * Special cases:
3868: * a == b == 0 ==> pick u=1, v=0 (and return 1, surprisingly)
3869: * a != 0 == b ==> keep v=0
3870: * a == 0 != b ==> keep u=0
3871: * |a| == |b| != 0 ==> keep u=0, set v=+-1
3872: * Assignments through uu,vv happen unconditionally; non-NULL pointers
3873: * _must_ be used.
3874: */
3875: long
3876: cbezout(long a,long b,long *uu,long *vv)
3877: {
3878: long s,*t;
3879: ulong d = labs(a), d1 = labs(b);
3880: ulong r,u,u1,v,v1;
3881:
3882: #ifdef DEBUG_CBEZOUT
3883: fprintferr("> cbezout(%ld,%ld,%p,%p)\n", a, b, (void *)uu, (void *)vv);
3884: #endif
3885: if (!b)
3886: {
3887: *vv=0L;
3888: if (!a)
3889: {
3890: *uu=1L;
3891: #ifdef DEBUG_CBEZOUT
3892: fprintferr("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
3893: #endif
3894: return 0L;
3895: }
3896: *uu = a < 0 ? -1L : 1L;
3897: #ifdef DEBUG_CBEZOUT
3898: fprintferr("< %ld (%ld, %ld)\n", (long)d, *uu, *vv);
3899: #endif
3900: return (long)d;
3901: }
3902: else if (!a || (d == d1))
3903: {
3904: *uu = 0L; *vv = b < 0 ? -1L : 1L;
3905: #ifdef DEBUG_CBEZOUT
3906: fprintferr("< %ld (%ld, %ld)\n", (long)d1, *uu, *vv);
3907: #endif
3908: return (long)d1;
3909: }
3910: else if (d == 1) /* frequently used by nfinit */
3911: {
3912: *uu = a; *vv = 0L;
3913: #ifdef DEBUG_CBEZOUT
3914: fprintferr("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
3915: #endif
3916: return 1L;
3917: }
3918: else if (d < d1)
3919: {
3920: /* bug in gcc-2.95.3:
3921: * s = a; a = b; b = s; produces wrong result a = b. This is OK: */
3922: { long _x = a; a = b; b = _x; } /* in order to keep the right signs */
3923: r = d; d = d1; d1 = r;
3924: t = uu; uu = vv; vv = t;
3925: #ifdef DEBUG_CBEZOUT
3926: fprintferr(" swapping\n");
3927: #endif
3928: }
3929: /* d > d1 > 0 */
3930: r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
3931: if (s < 0)
3932: {
3933: *uu = a < 0 ? u : -(long)u;
3934: *vv = b < 0 ? -(long)v : v;
3935: }
3936: else
3937: {
3938: *uu = a < 0 ? -(long)u : u;
3939: *vv = b < 0 ? v : -(long)v;
3940: }
3941: #ifdef DEBUG_CBEZOUT
3942: fprintferr("< %ld (%ld, %ld)\n", (long)r, *uu, *vv);
3943: #endif
3944: return (long)r;
3945: }
3946:
3947: /*==========================================================
3948: * ratlift(GEN x, GEN m, GEN *a, GEN *b, GEN amax, GEN bmax)
3949: *==========================================================
3950: * Reconstruct rational number from its residue x mod m
3951: * Given t_INT x, m, amax>=0, bmax>0 such that
3952: * 0 <= x < m; 2*amax*bmax < m
3953: * attempts to find t_INT a, b such that
3954: * (1) a = b*x (mod m)
3955: * (2) |a| <= amax, 0 < b <= bmax
3956: * (3) gcd(m, b) = gcd(a, b)
3957: * If unsuccessful, it will return 0 and leave a,b unchanged (and
3958: * caller may deduce no such a,b exist). If successful, sets a,b
3959: * and returns 1. If there exist a,b satisfying (1), (2), and
3960: * (3') gcd(m, b) = 1
3961: * then they are uniquely determined subject to (1),(2) and
3962: * (3'') gcd(a, b) = 1,
3963: * and will be returned by the routine. (The caller may wish to
3964: * check gcd(a,b)==1, either directly or based on known prime
3965: * divisors of m, depending on the application.)
3966: * Reference:
3967: @article {MR97c:11116,
3968: AUTHOR = {Collins, George E. and Encarnaci{\'o}n, Mark J.},
3969: TITLE = {Efficient rational number reconstruction},
3970: JOURNAL = {J. Symbolic Comput.},
3971: VOLUME = {20},
3972: YEAR = {1995},
3973: NUMBER = {3},
3974: PAGES = {287--297},
3975: }
1.2 ! noro 3976: * Preprint available from:
! 3977: * ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1994/94-64.ps.gz
1.1 noro 3978: */
3979:
3980: /* #define DEBUG_RATLIFT */
3981:
3982: int
3983: ratlift(GEN x, GEN m, GEN *a, GEN *b, GEN amax, GEN bmax)
3984: {
3985: GEN d,d1,v,v1,q,r;
1.2 ! noro 3986: gpmem_t av = avma, av1, lim;
1.1 noro 3987: long lb,lr,lbb,lbr,s,s0;
3988: ulong vmax;
3989: ulong xu,xu1,xv,xv1; /* Lehmer stage recurrence matrix */
3990: int lhmres; /* Lehmer stage return value */
3991:
3992: if ((typ(x) | typ(m) | typ(amax) | typ(bmax)) != t_INT) err(arither1);
3993: if (signe(bmax) <= 0)
3994: err(talker, "ratlift: bmax must be > 0, found\n\tbmax=%Z\n", bmax);
3995: if (signe(amax) < 0)
3996: err(talker, "ratilft: amax must be >= 0, found\n\tamax=%Z\n", amax);
3997: /* check 2*amax*bmax < m */
3998: if (cmpii(shifti(mulii(amax, bmax), 1), m) >= 0)
3999: err(talker, "ratlift: must have 2*amax*bmax < m, found\n\tamax=%Z\n\tbmax=%Z\n\tm=%Z\n", amax,bmax,m);
4000: /* we _could_ silently replace x with modii(x,m) instead of the following,
4001: * but let's leave this up to the caller
4002: */
4003: avma = av; s = signe(x);
4004: if (s < 0 || cmpii(x,m) >= 0)
4005: err(talker, "ratlift: must have 0 <= x < m, found\n\tx=%Z\n\tm=%Z\n", x,m);
4006:
4007: /* special cases x=0 and/or amax=0 */
4008: if (s == 0)
4009: {
4010: if (a != NULL) *a = gzero;
4011: if (b != NULL) *b = gun;
4012: return 1;
4013: }
4014: else if (signe(amax)==0)
4015: return 0;
4016: /* assert: m > x > 0, amax > 0 */
4017:
4018: /* check here whether a=x, b=1 is a solution */
4019: if (cmpii(x,amax) <= 0)
4020: {
4021: if (a != NULL) *a = icopy(x);
4022: if (b != NULL) *b = gun;
4023: return 1;
4024: }
4025:
4026: /* There is no special case for single-word numbers since this is
4027: * mainly meant to be used with large moduli.
4028: */
4029: (void)new_chunk(lgefint(bmax) + lgefint(amax)); /* room for a,b */
4030: d = m; d1 = x;
4031: v = gzero; v1 = gun;
4032: /* assert d1 > amax, v1 <= bmax here */
4033: lb = lgefint(bmax);
4034: lbb = bfffo(bmax[2]);
4035: s = 1;
4036: av1 = avma; lim = stack_lim(av, 1);
4037:
4038: /* general case: Euclidean division chain starting with m div x, and
4039: * with bounds on the sequence of convergents' denoms v_j.
4040: * Just to be different from what invmod and bezout are doing, we work
4041: * here with the all-nonnegative matrices [u,u1;v,v1]=prod_j([0,1;1,q_j]).
4042: * Loop invariants:
4043: * (a) (sign)*[-v,v1]*x = [d,d1] (mod m) (componentwise)
4044: * (sign initially +1, changes with each Euclidean step)
4045: * so [a,b] will be obtained in the form [-+d,v] or [+-d1,v1];
4046: * this congruence is a consequence of
4047: * (b) [x,m]~ = [u,u1;v,v1]*[d1,d]~,
4048: * where u,u1 is the usual numerator sequence starting with 1,0
4049: * instead of 0,1 (just multiply the eqn on the left by the inverse
4050: * matrix, which is det*[v1,-u1;-v,u], where "det" is the same as the
4051: * "(sign)" in (a)). From m = v*d1 + v1*d and
4052: * (c) d > d1 >= 0, 0 <= v < v1,
4053: * we have d >= m/(2*v1), so while v1 remains smaller than m/(2*amax),
4054: * the pair [-(sign)*d,v] satisfies (1) but violates (2) (d > amax).
4055: * Conversely, v1 > bmax indicates that no further solutions will be
4056: * forthcoming; [-(sign)*d,v] will be the last, and first, candidate.
4057: * Thus there's at most one point in the chain division where a solution
4058: * can live: v < bmax, v1 >= m/(2*amax) > bmax, and this is acceptable
4059: * iff in fact d <= amax (e.g. m=221, x=34 or 35, amax=bmax=10 fail on
4060: * this count while x=32,33,36,37 succeed). However, a division may leave
4061: * a zero residue before we ever reach this point (consider m=210, x=35,
4062: * amax=bmax=10), and our caller may find that gcd(d,v) > 1 (numerous
4063: * examples -- keep m=210 and consider any of x=29,31,32,33,34,36,37,38,
4064: * 39,40,41).
4065: * Furthermore, at the start of the loop body we have in fact
4066: * (c') 0 <= v < v1 <= bmax, d > d1 > amax >= 0,
4067: * (and are never done already).
1.2 ! noro 4068: *
1.1 noro 4069: * Main loop is similar to those of invmod() and bezout(), except for
4070: * having to determine appropriate vmax bounds, and checking termination
4071: * conditions. The signe(d1) condition is only for paranoia
4072: */
4073: while (lgefint(d) > 3 && signe(d1))
4074: {
4075: /* determine vmax for lgcdii so as to ensure v won't overshoot.
4076: * If v+v1 > bmax, the next step would take v1 beyond the limit, so
4077: * since [+-d1,v1] is not a solution, we give up. Otherwise if v+v1
4078: * is way shorter than bmax, use vmax=MAXULUNG. Otherwise, set vmax
4079: * to a crude lower approximation of bmax/(v+v1), or to 1, which will
4080: * allow the inner loop to do one step
4081: */
4082: r = addii(v,v1);
4083: lr = lb - lgefint(r);
4084: lbr = bfffo(r[2]);
4085: if (cmpii(r,bmax) > 0) /* done, not found */
4086: {
4087: avma = av;
4088: return 0;
4089: }
4090: else if (lr > 1) /* still more than a word's worth to go */
4091: {
4092: vmax = MAXULONG;
4093: }
4094: else /* take difference of bit lengths */
4095: {
4096: lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
4097: if (lr > BITS_IN_LONG)
4098: vmax = MAXULONG;
4099: else if (lr == 0)
4100: vmax = 1UL;
4101: else
4102: vmax = 1UL << (lr-1);
4103: /* the latter is pessimistic but faster than a division */
4104: }
4105: /* do a Lehmer-Jebelean round */
4106: lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, vmax);
4107: if (lhmres != 0) /* check progress */
4108: { /* apply matrix */
4109: if ((lhmres == 1) || (lhmres == -1))
4110: {
4111: s = -s;
4112: if (xv1 == 1)
4113: {
4114: /* re-use v+v1 computed above */
4115: v=v1; v1=r;
4116: r = subii(d,d1); d=d1; d1=r;
4117: }
4118: else
4119: {
4120: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
4121: r = addii(v, mului(xv1,v1)); v=v1; v1=r;
4122: }
4123: }
4124: else
4125: {
4126: r = subii(muliu(d,xu), muliu(d1,xv));
4127: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
4128: r = addii(muliu(v,xu), muliu(v1,xv));
4129: v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
4130: if (lhmres&1)
4131: {
4132: setsigne(d,-signe(d));
4133: s = -s;
4134: }
4135: else if (signe(d1))
4136: {
4137: setsigne(d1,-signe(d1));
4138: }
4139: }
4140: /* check whether we're done. Assert v <= bmax here. Examine v1:
4141: * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
4142: * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise
4143: * proceed.
4144: */
4145: if (cmpii(v1,bmax) > 0) /* certainly done */
4146: {
4147: avma = av;
4148: if (cmpii(d,amax) <= 0) /* done, found */
4149: {
4150: if (a != NULL)
4151: {
4152: *a = icopy(d);
4153: setsigne(*a,-s); /* sign opposite to s */
4154: }
4155: if (b != NULL) *b = icopy(v);
4156: return 1;
4157: }
4158: else /* done, not found */
4159: return 0;
4160: }
4161: else if (cmpii(d1,amax) <= 0) /* also done, found */
4162: {
4163: avma = av;
4164: if (a != NULL)
4165: {
4166: if (signe(d1))
4167: {
4168: *a = icopy(d1);
4169: setsigne(*a,s); /* same sign as s */
4170: }
4171: else
4172: *a = gzero;
4173: }
4174: if (b != NULL) *b = icopy(v1);
4175: return 1;
4176: }
4177: } /* lhmres != 0 */
4178:
4179: if (lhmres <= 0 && signe(d1))
4180: {
4181: q = dvmdii(d,d1,&r);
4182: #ifdef DEBUG_LEHMER
4183: fprintferr("Full division:\n");
4184: printf(" q = "); output(q); sleep (1);
4185: #endif
4186: d=d1; d1=r;
4187: r = addii(v,mulii(q,v1));
4188: v=v1; v1=r;
4189: s = -s;
4190: /* check whether we are done now. Since we weren't before the div, it
4191: * suffices to examine v1 and d1 -- the new d (former d1) cannot cut it
4192: */
4193: if (cmpii(v1,bmax) > 0) /* done, not found */
4194: {
4195: avma = av;
4196: return 0;
4197: }
4198: else if (cmpii(d1,amax) <= 0) /* done, found */
4199: {
4200: avma = av;
4201: if (a != NULL)
4202: {
4203: if (signe(d1))
4204: {
4205: *a = icopy(d1);
4206: setsigne(*a,s); /* same sign as s */
4207: }
4208: else
4209: *a = gzero;
4210: }
4211: if (b != NULL) *b = icopy(v1);
4212: return 1;
4213: }
4214: }
4215:
4216: if (low_stack(lim, stack_lim(av,1)))
4217: {
4218: GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
4219: if(DEBUGMEM>1) err(warnmem,"ratlift");
4220: gerepilemany(av1,gptr,4);
4221: }
4222: } /* end while */
4223:
4224: /* Postprocessing - final sprint. Since we usually underestimate vmax,
4225: * this function needs a loop here instead of a simple conditional.
4226: * Note we can only get here when amax fits into one word (which will
4227: * typically not be the case!). The condition is bogus -- d1 is never
4228: * zero at the start of the loop. There will be at most a few iterations,
4229: * so we don't bother collecting garbage
4230: */
4231: while (signe(d1))
4232: {
4233: /* Assertions: lgefint(d)==lgefint(d1)==3.
4234: * Moreover, we aren't done already, or we would have returned by now.
4235: * Recompute vmax...
4236: */
4237: #ifdef DEBUG_RATLIFT
4238: fprintferr("rl-fs: d,d1=%Z,%Z\n", d, d1);
4239: fprintferr("rl-fs: v,v1=%Z,%Z\n", v, v1);
4240: #endif
4241: r = addii(v,v1);
4242: lr = lb - lgefint(r);
4243: lbr = bfffo(r[2]);
4244: if (cmpii(r,bmax) > 0) /* done, not found */
4245: {
4246: avma = av;
4247: return 0;
4248: }
4249: else if (lr > 1) /* still more than a word's worth to go */
4250: {
4251: vmax = MAXULONG; /* (cannot in fact happen) */
4252: }
4253: else /* take difference of bit lengths */
4254: {
4255: lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
4256: if (lr > BITS_IN_LONG)
4257: vmax = MAXULONG;
4258: else if (lr == 0)
4259: vmax = 1UL;
4260: else
4261: vmax = 1UL << (lr-1); /* as above */
4262: }
4263: #ifdef DEBUG_RATLIFT
4264: fprintferr("rl-fs: vmax=%lu\n", vmax);
4265: #endif
4266: /* single-word "Lehmer", discarding the gcd or whatever it returns */
4267: (void)rgcduu((ulong)d[2], (ulong)d1[2], vmax, &xu, &xu1, &xv, &xv1, &s0);
4268: #ifdef DEBUG_RATLIFT
4269: fprintferr("rl-fs: [%lu,%lu; %lu,%lu] %s\n",
4270: xu, xu1, xv, xv1,
4271: s0 < 0 ? "-" : "+");
4272: #endif
4273: if (xv1 == 1) /* avoid multiplications */
4274: {
4275: /* re-use v+v1 computed above */
4276: v=v1; v1=r;
4277: r = subii(d,d1); d=d1; d1=r;
4278: s = -s;
4279: }
4280: else if (xu == 0) /* and xv==1, xu1==1, xv1 > 1 */
4281: {
4282: r = subii(d, mului(xv1,d1)); d=d1; d1=r;
4283: r = addii(v, mului(xv1,v1)); v=v1; v1=r;
4284: s = -s;
4285: }
4286: else
4287: {
4288: r = subii(muliu(d,xu), muliu(d1,xv));
4289: d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
4290: r = addii(muliu(v,xu), muliu(v1,xv));
4291: v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
4292: if (s0 < 0)
4293: {
4294: setsigne(d,-signe(d));
4295: s = -s;
4296: }
4297: else if (signe(d1)) /* sic: might vanish now */
4298: {
4299: setsigne(d1,-signe(d1));
4300: }
4301: }
4302: /* check whether we're done, as above. Assert v <= bmax. Examine v1:
4303: * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
4304: * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise proceed.
4305: */
4306: if (cmpii(v1,bmax) > 0) /* certainly done */
4307: {
4308: avma = av;
4309: if (cmpii(d,amax) <= 0) /* done, found */
4310: {
4311: if (a != NULL)
4312: {
4313: *a = icopy(d);
4314: setsigne(*a,-s); /* sign opposite to s */
4315: }
4316: if (b != NULL) *b = icopy(v);
4317: return 1;
4318: }
4319: else /* done, not found */
4320: return 0;
4321: }
4322: else if (cmpii(d1,amax) <= 0) /* also done, found */
4323: {
4324: avma = av;
4325: if (a != NULL)
4326: {
4327: if (signe(d1))
4328: {
4329: *a = icopy(d1);
4330: setsigne(*a,s); /* same sign as s */
4331: }
4332: else
4333: *a = gzero;
4334: }
4335: if (b != NULL) *b = icopy(v1);
4336: return 1;
4337: }
4338: } /* while */
4339:
4340: /* get here when we have run into d1 == 0 before returning... in fact,
4341: * this cannot happen.
4342: */
4343: err(talker, "ratlift failed to catch d1 == 0\n");
4344: /* NOTREACHED */
4345: return 0;
4346: }
4347:
4348: /********************************************************************/
4349: /** **/
4350: /** INTEGER SQUARE ROOT **/
4351: /** **/
4352: /********************************************************************/
4353:
4354: /* sqrt()'s result may be off by 1 when a is not representable exactly as a
4355: * double [64bit machine] */
4356: ulong
4357: usqrtsafe(ulong a)
4358: {
4359: ulong x = (ulong)sqrt((double)a);
4360: #ifdef LONG_IS_64BIT
4361: ulong y = x+1; if (y <= MAXHALFULONG && y*y <= a) x = y;
4362: #endif
4363: return x;
4364: }
4365:
4366: /* Assume a >= 0 has <= 4 words, return trunc[sqrt(a)] */
4367: ulong
4368: mpsqrtl(GEN a)
4369: {
4370: long l = lgefint(a);
4371: ulong x,y,z,k,m;
4372: int L, s;
1.2 ! noro 4373:
1.1 noro 4374: if (l <= 3) return l==2? 0: usqrtsafe((ulong)a[2]);
4375: s = bfffo(a[2]);
4376: if (s > 1)
4377: {
4378: s &= ~1UL; /* make it even */
4379: z = BITS_IN_LONG - s;
4380: m = (ulong)(a[2] << s) | (a[3] >> z);
4381: L = z>>1;
4382: }
4383: else
4384: {
4385: m = (ulong)a[2];
4386: L = BITS_IN_LONG/2;
4387: }
4388: /* m = BIL-1 (bffo odd) or BIL first bits of a */
4389: k = (long) sqrt((double)m);
4390: if (L == BITS_IN_LONG/2 && k == MAXHALFULONG)
4391: x = MAXULONG;
4392: else
4393: x = (k+1) << L;
4394: do
4395: {
4396: LOCAL_HIREMAINDER;
4397: LOCAL_OVERFLOW;
4398: hiremainder = a[2];
4399: if (hiremainder >= x) return x;
4400: z = addll(x, divll(a[3],x));
4401: z >>= 1; if (overflow) z |= HIGHBIT;
4402: y = x; x = z;
4403: }
4404: while (x < y);
4405: return y;
1.2 ! noro 4406: }
! 4407:
! 4408: /* target should point to a buffer of source_end - source + 1 ulongs.
! 4409:
! 4410: fills this buffer by bits of ulongs in source..source_end-1 shifted
! 4411: right sh units; the "most significant" sh bits of the result are
! 4412: set to be the least significant sh bits of prepend.
! 4413:
! 4414: The ordering of bits in this bitmap is the same as for t_INT.
! 4415:
! 4416: sh should not exceed BITS_IN_LONG.
! 4417: */
! 4418: void
! 4419: shift_r(ulong *target, ulong *source, ulong *source_end, ulong prepend, ulong sh)
! 4420: {
! 4421: if (sh) { /* shift_words_r() works */
! 4422: register ulong sh_complement = BITS_IN_LONG - sh;
! 4423:
! 4424: shift_words_r(target, source, source_end, prepend, sh, sh_complement);
! 4425: } else {
! 4426: int i;
! 4427:
! 4428: for (i=0; i < source_end - source; i++)
! 4429: target[i] = source[i];
! 4430: }
1.1 noro 4431: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>