=================================================================== RCS file: /home/cvs/OpenXM_contrib/pari-2.2/src/kernel/none/Attic/mp.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -p -r1.1 -r1.2 --- OpenXM_contrib/pari-2.2/src/kernel/none/Attic/mp.c 2001/10/02 11:17:09 1.1 +++ OpenXM_contrib/pari-2.2/src/kernel/none/Attic/mp.c 2002/09/11 07:27:00 1.2 @@ -1,4 +1,4 @@ -/* $Id: mp.c,v 1.1 2001/10/02 11:17:09 noro Exp $ +/* $Id: mp.c,v 1.2 2002/09/11 07:27:00 noro Exp $ Copyright (C) 2000 The PARI group. @@ -30,6 +30,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, */ /* z2 := z1[imin..imax].f shifted left sh bits (feeding f from the right) */ +/* These macros work only for sh != 0 !!! */ #define shift_left2(z2,z1,imin,imax,f, sh,m) {\ register ulong _l,_k = ((ulong)f)>>m;\ GEN t1 = z1 + imax, t2 = z2 + imax, T = z1 + imin;\ @@ -45,18 +46,21 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, shift_left2((z2),(z1),(imin),(imax),(f),(sh),(_m));\ } -/* z2 := f.z1[imin..imax-1] shifted right sh bits (feeding f from the left) */ -#define shift_right2(z2,z1,imin,imax,f, sh,m) {\ - register GEN t1 = z1 + imin, t2 = z2 + imin, T = z1 + imax;\ - register ulong _k,_l = *t1++;\ - *t2++ = (_l>>(ulong)sh) | ((ulong)f<<(ulong)m);\ - while (t1 < T) {\ - _k = _l<<(ulong)m; _l = *t1++;\ - *t2++ = (_l>>(ulong)sh) | _k;\ +#define shift_words_r(target,source,source_end,prepend, sh, sh_complement) {\ + register ulong _k,_l = *source++;\ + *target++ = (_l>>(ulong)sh) | ((ulong)prepend<<(ulong)sh_complement);\ + while (source < source_end) {\ + _k = _l<<(ulong)sh_complement; _l = *source++;\ + *target++ = (_l>>(ulong)sh) | _k;\ }\ } +#define shift_right2(z2,z1,imin,imax,f, sh,m) {\ + register GEN s = (z1) + (imin), ta = (z2) + (imin), se = (z1) + (imax);\ + shift_words_r(ta,s,se,(f),(sh),(m)); \ +} +/* z2 := f.z1[imin..imax-1] shifted right sh bits (feeding f from the left) */ #define shift_right(z2,z1,imin,imax,f, sh) {\ - register const ulong _m = BITS_IN_LONG - sh;\ + register const ulong _m = BITS_IN_LONG - (sh);\ shift_right2((z2),(z1),(imin),(imax),(f),(sh),(_m));\ } @@ -95,7 +99,7 @@ addsispec(long s, GEN x, long nx) while (xd > x) *--zd = *--xd; *--zd = evalsigne(1) | evallgefint(lz); *--zd = evaltyp(t_INT) | evallg(lz); - avma=(long)zd; return zd; + avma=(gpmem_t)zd; return zd; } #define swapspec(x,y, nx,ny) {long _a=nx;GEN _z=x; nx=ny; ny=_a; x=y; y=_z;} @@ -129,7 +133,7 @@ addiispec(GEN x, GEN y, long nx, long ny) while (xd > x) *--zd = *--xd; *--zd = evalsigne(1) | evallgefint(lz); *--zd = evaltyp(t_INT) | evallg(lz); - avma=(long)zd; return zd; + avma=(gpmem_t)zd; return zd; } /* assume x >= y */ @@ -158,7 +162,7 @@ subisspec(GEN x, long s, long nx) do *--zd = *--xd; while (xd > x); *--zd = evalsigne(1) | evallgefint(lz); *--zd = evaltyp(t_INT) | evallg(lz); - avma=(long)zd; return zd; + avma=(gpmem_t)zd; return zd; } /* assume x > y */ @@ -191,18 +195,18 @@ subiispec(GEN x, GEN y, long nx, long ny) do *--zd = *--xd; while (xd > x); *--zd = evalsigne(1) | evallgefint(lz); *--zd = evaltyp(t_INT) | evallg(lz); - avma=(long)zd; return zd; + avma=(gpmem_t)zd; return zd; } #ifndef __M68K__ /* prototype of positive small ints */ static long pos_s[] = { - evaltyp(t_INT) | m_evallg(3), evalsigne(1) | evallgefint(3), 0 }; + evaltyp(t_INT) | _evallg(3), evalsigne(1) | evallgefint(3), 0 }; /* prototype of negative small ints */ static long neg_s[] = { - evaltyp(t_INT) | m_evallg(3), evalsigne(-1) | evallgefint(3), 0 }; + evaltyp(t_INT) | _evallg(3), evalsigne(-1) | evallgefint(3), 0 }; void affir(GEN x, GEN y) @@ -213,13 +217,12 @@ affir(GEN x, GEN y) if (!s) { y[1] = evalexpo(-bit_accuracy(ly)); - y[2] = 0; return; + return; } - lx=lgefint(x); sh=bfffo(x[2]); + lx = lgefint(x); sh = bfffo(x[2]); y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1); - if (sh) - { + if (sh) { if (lx>ly) { shift_left(y,x,2,ly-1, x[ly],sh); } else @@ -227,15 +230,15 @@ affir(GEN x, GEN y) for (i=lx; i=ly) - for (i=2; i=ly) + for (i=2; i=ly) @@ -259,6 +262,12 @@ affrr(GEN x, GEN y) GEN shifti(GEN x, long n) { + return shifti3(x, n, 0); +} + +GEN +shifti3(GEN x, long n, long flag) +{ const long s=signe(x); long lx,ly,i,m; GEN y; @@ -285,21 +294,47 @@ shifti(GEN x, long n) } else { + long lyorig; + + if (s > 0) + flag = 0; n = -n; - ly = lx - (n>>TWOPOTBITS_IN_LONG); - if (ly<3) return gzero; + ly = lyorig = lx - (n>>TWOPOTBITS_IN_LONG); + if (ly<3) + return flag ? stoi(-1) : gzero; y = new_chunk(ly); m = n & (BITS_IN_LONG-1); - if (!m) for (i=2; i= lyorig) + if (x[i]) { flag = 1; break; } /* Need to increment y by 1 */ + if (!flag && m) + flag = x[lyorig - 1] & ((1<0) return rcopy(y); #endif - z = cgetr(3 + ((-e)>>TWOPOTBITS_IN_LONG)); - affir(x,z); return z; + return itor(x, 3 + ((-e)>>TWOPOTBITS_IN_LONG)); } ly=lg(y); - if (e>0) + if (e > 0) { l = ly - (e>>TWOPOTBITS_IN_LONG); if (l<3) return rcopy(y); } else l = ly + ((-e)>>TWOPOTBITS_IN_LONG)+1; - z=cgetr(l); affir(x,z); y=addrr(z,y); + y = addrr(itor(x,l), y); z = y+l; ly = lg(y); while (ly--) z[ly] = y[ly]; - avma=(long)z; return z; + avma=(gpmem_t)z; return z; } GEN @@ -554,9 +588,9 @@ addrr(GEN x, GEN y) if (!sx) { if (e > 0) ex=ey; - z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z; + return realzero_bit(ex); } - if (e > 0) { z=cgetr(3); z[1]=evalexpo(ey); z[2]=0; return z; } + if (e > 0) return realzero_bit(ey); lz = 3 + ((-e)>>TWOPOTBITS_IN_LONG); lx = lg(x); if (lz>lx) lz=lx; z = cgetr(lz); while(--lz) z[lz]=x[lz]; @@ -564,7 +598,7 @@ addrr(GEN x, GEN y) } if (!sx) { - if (e < 0) { z=cgetr(3); z[1]=evalexpo(ex); z[2]=0; return z; } + if (e < 0) return realzero_bit(ex); lz = 3 + (e>>TWOPOTBITS_IN_LONG); ly = lg(y); if (lz>ly) lz=ly; z = cgetr(lz); while (--lz) z[lz]=y[lz]; @@ -602,7 +636,7 @@ addrr(GEN x, GEN y) if (sx==sy) { /* addition */ - i=lz-1; avma = (long)z; + i=lz-1; avma = (gpmem_t)z; if (flag==0) { z[i] = y[i]; i--; } overflow=0; for (j=lx-1; j>=2; i--,j--) z[i] = addllx(x[j],y[i]); @@ -613,8 +647,7 @@ addrr(GEN x, GEN y) if (i==1) { shift_right(z,z, 2,lz, 1,1); - ey=evalexpo(ey+1); - z[1] = evalsigne(sx) | ey; return z; + z[1] = evalsigne(sx) | evalexpo(ey+1); return z; } z[i] = y[i]+1; if (z[i--]) break; } @@ -629,9 +662,8 @@ addrr(GEN x, GEN y) i=2; while (i (ulong)x[i]); } @@ -656,11 +688,10 @@ addrr(GEN x, GEN y) x = z+2; i=0; while (!x[i]) i++; lz -= i; z += i; m = bfffo(z[2]); - e = evalexpo(ey - (m | (i<ly) { lz=ly; z=x; x=y; y=z; flag=1; } else flag = (lz!=ly); - z=cgetr(lz); z[1] = evalsigne(sx) | e; + z=cgetr(lz); if (lz==3) { if (flag) @@ -827,8 +856,9 @@ mulrr(GEN x, GEN y) } else garde = mulll(x[2],y[2]); - if ((long)hiremainder<0) { z[2]=hiremainder; z[1]++; } + if ((long)hiremainder<0) { z[2]=hiremainder; e++; } else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1)); + z[1] = evalsigne(sx)|evalexpo(e); return z; } @@ -865,9 +895,8 @@ mulrr(GEN x, GEN y) z[i] = addll(addmul(p1,y1[i]), z[i]); } z[2] = hiremainder+overflow; - if (z[2] < 0) z[1]++; - else - shift_left(z,z,2,lzz,garde, 1); + if (z[2] < 0) e++; else shift_left(z,z,2,lzz,garde, 1); + z[1] = evalsigne(sx) | evalexpo(e); return z; } @@ -883,25 +912,20 @@ mulir(GEN x, GEN y) if (!sx) return gzero; if (!is_bigint(x)) return mulsr(itos(x),y); sy=signe(y); ey=expo(y); - if (!sy) - { - e = evalexpo(expi(x)+ey); - z=cgetr(3); z[1]=e; z[2]=0; return z; - } + if (!sy) return realzero_bit(expi(x)+ey); if (sy<0) sx = -sx; lz=lg(y); z=cgetr(lz); - y1=cgetr(lz+1); - affir(x,y1); x=y; y=y1; - e = evalexpo(expo(y)+ey); - z[1] = evalsigne(sx) | e; + y1 = itor(x, lz+1); x = y; y = y1; + e = expo(y)+ey; if (lz==3) { (void)mulll(x[2],y[3]); garde=addmul(x[2],y[2]); - if ((long)hiremainder < 0) { z[2]=hiremainder; z[1]++; } + if ((long)hiremainder < 0) { z[2]=hiremainder; e++; } else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1)); - avma=(long)z; return z; + z[1] = evalsigne(sx) | evalexpo(e); + avma=(gpmem_t)z; return z; } (void)mulll(x[2],y[lz]); garde=hiremainder; @@ -937,10 +961,11 @@ mulir(GEN x, GEN y) z[i] = addll(addmul(p1,y1[i]), z[i]); } z[2] = hiremainder+overflow; - if (z[2] < 0) z[1]++; + if (z[2] < 0) e++; else shift_left(z,z,2,lzz,garde, 1); - avma=(long)z; return z; + z[1] = evalsigne(sx) | evalexpo(e); + avma=(gpmem_t)z; return z; } /* written by Bruno Haible following an idea of Robert Harley */ @@ -974,7 +999,7 @@ modss(long x, long y) if (!y) err(moder1); if (y<0) y=-y; - hiremainder=0; divll(labs(x),y); + hiremainder=0; (void)divll(labs(x),y); if (!hiremainder) return gzero; return (x < 0) ? stoi(y-hiremainder) : stoi(hiremainder); } @@ -985,7 +1010,7 @@ resss(long x, long y) LOCAL_HIREMAINDER; if (!y) err(reser1); - hiremainder=0; divll(labs(x),labs(y)); + hiremainder=0; (void)divll(labs(x),labs(y)); return (x < 0) ? stoi(-((long)hiremainder)) : stoi(hiremainder); } @@ -1044,6 +1069,29 @@ umodiu(GEN y, ulong x) GEN modiu(GEN y, ulong x) { return utoi(umodiu(y,x)); } +/* return |y| \/ x */ +GEN +diviu(GEN y, ulong x) +{ + long sy=signe(y),ly,i; + GEN z; + LOCAL_HIREMAINDER; + + if (!x) err(diver4); + if (!sy) { hiremainder=0; SAVE_HIREMAINDER; return gzero; } + + ly = lgefint(y); + if (x <= (ulong)y[2]) hiremainder=0; + else + { + if (ly==3) { hiremainder=itou(y); SAVE_HIREMAINDER; return gzero; } + hiremainder=y[2]; ly--; y++; + } + z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(1); + for (i=2; i0? y[2]: -y[2]); - lx=lg(x); z=cgetr(lx); - av=avma; yr=cgetr(lx+1); affir(y,yr); - affrr(divrr(x,yr),z); avma=av; return z; + lx = lg(x); z = cgetr(lx); av = avma; + affrr(divrr(x, itor(y, lx+1)), z); + avma = av; return z; } void diviiz(GEN x, GEN y, GEN z) { - long av=avma,lz; - GEN xr,yr; - - if (typ(z)==t_INT) { affii(divii(x,y),z); avma=av; return; } - lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr); - affrr(divrr(xr,yr),z); avma=av; + gpmem_t av = avma; + if (typ(z) == t_INT) affii(divii(x,y), z); + else { + long lz = lg(z); + affrr(divrr(itor(x,lz), itor(y,lz)), z); + } + avma = av; } void mpdivz(GEN x, GEN y, GEN z) { - long av=avma; + gpmem_t av = avma; + GEN r; if (typ(z)==t_INT) { - if (typ(x)==t_REAL || typ(y)==t_REAL) err(divzer1); - affii(divii(x,y),z); avma=av; return; + if (typ(x) == t_REAL || typ(y) == t_REAL) err(divzer1); + affii(divii(x,y), z); + avma = av; return; } - if (typ(x)==t_INT) - { - GEN xr,yr; - long lz; - if (typ(y)==t_REAL) { affrr(divir(x,y),z); avma=av; return; } - lz=lg(z); xr=cgetr(lz); affir(x,xr); yr=cgetr(lz); affir(y,yr); - affrr(divrr(xr,yr),z); avma=av; return; + if (typ(x) == t_INT) + { + if (typ(y) == t_REAL) + r = divir(x,y); + else + { + long lz = lg(z); + r = divrr(itor(x,lz), itor(y,lz)); + } } - if (typ(y)==t_REAL) { affrr(divrr(x,y),z); avma=av; return; } - affrr(divri(x,y),z); avma=av; + else if (typ(y) == t_REAL) + r = divrr(x,y); + else + r = divri(x,y); + affrr(r, z); avma = av; } GEN divsr(long x, GEN y) { - long av,ly; - GEN p1,z; + gpmem_t av; + long ly; + GEN z; if (!signe(y)) err(diver3); if (!x) return gzero; - ly=lg(y); z=cgetr(ly); av=avma; - p1=cgetr(ly+1); affsr(x,p1); affrr(divrr(p1,y),z); - avma=av; return z; + ly = lg(y); z = cgetr(ly); av = avma; + affrr(divrr(stor(x,ly+1), y), z); + avma = av; return z; } GEN @@ -1182,7 +1236,7 @@ modii(GEN x, GEN y) case 1: return resii(x,y); default: { - long av = avma; + gpmem_t av = avma; (void)new_chunk(lgefint(y)); x = resii(x,y); avma=av; if (x==gzero) return x; @@ -1194,24 +1248,19 @@ modii(GEN x, GEN y) void modiiz(GEN x, GEN y, GEN z) { - const long av = avma; + const gpmem_t av = avma; affii(modii(x,y),z); avma=av; } GEN divrs(GEN x, long y) { - long i,lx,ex,garde,sh,s=signe(x); + long i,lx,garde,sh,s=signe(x); GEN z; LOCAL_HIREMAINDER; if (!y) err(diver6); - if (!s) - { - z=cgetr(3); z[1] = x[1] - (BITS_IN_LONG-1) + bfffo(y); - if (z[1]<0) err(diver7); - z[2]=0; return z; - } + if (!s) return realzero_bit(expo(x) - (BITS_IN_LONG-1)+bfffo(y)); if (y<0) { s = -s; y = -y; } if (y==1) { z=rcopy(x); setsigne(z,s); return z; } @@ -1219,9 +1268,9 @@ divrs(GEN x, long y) for (i=2; i garde */ - garde=divll(0,y); sh=bfffo(z[2]); ex=evalexpo(expo(x)-sh); + garde=divll(0,y); sh=bfffo(z[2]); if (sh) shift_left(z,z, 2,lx-1, garde,sh); - z[1] = evalsigne(s) | ex; + z[1] = evalsigne(s) | evalexpo(expo(x)-sh); return z; } @@ -1233,10 +1282,9 @@ divrr(GEN x, GEN y) GEN z,x1; if (!sy) err(diver9); - e = evalexpo(expo(x) - expo(y)); - if (!sx) { z=cgetr(3); z[1]=e; z[2]=0; return z; } + e = expo(x) - expo(y); + if (!sx) return realzero_bit(e); if (sy<0) sx = -sx; - e = evalsigne(sx) | e; lx=lg(x); ly=lg(y); if (ly==3) { @@ -1248,12 +1296,12 @@ divrr(GEN x, GEN y) l >>= 1; if (k&1) l |= HIGHBIT; k >>= 1; } - z=cgetr(3); z[1]=e; + z = cgetr(3); z[1] = evalsigne(sx) | evalexpo(e); hiremainder=k; z[2]=divll(l,y[2]); return z; } - lz = (lx<=ly)? lx: ly; - x1 = (z=new_chunk(lz))-1; + lz = min(lx,ly); z = new_chunk(lz); + x1 = z-1; x1[1]=0; for (i=2; ilz)? x[lz]: 0; x=z; si=y[2]; saux=y[3]; @@ -1313,13 +1361,15 @@ divrr(GEN x, GEN y) } x1 = x-1; for (j=lz-1; j>=2; j--) x[j]=x1[j]; if (*x) { shift_right(x,x, 2,lz, 1,1); } else e--; - x[0]=evaltyp(t_REAL)|evallg(lz); - x[1]=e; return x; + x[0] = evaltyp(t_REAL)|evallg(lz); + x[1] = evalsigne(sx) | evalexpo(e); + return x; } #endif /* !defined(__M68K__) */ + /* The following ones are not in mp.s (mulii is, with a different algorithm) */ -/* Integer division x / y: +/* Integer division x / y: such that sign(r) = sign(x) * if z = ONLY_REM return remainder, otherwise return quotient * if z != NULL set *z to remainder * *z is the last object on stack (and thus can be disposed of with cgiv @@ -1331,7 +1381,8 @@ GEN dvmdii(GEN x, GEN y, GEN *z) { long sx=signe(x),sy=signe(y); - long av,lx,ly,lz,i,j,sh,k,lq,lr; + long lx, ly, lz, i, j, sh, k, lq, lr; + gpmem_t av; ulong si,qp,saux, *xd,*rd,*qd; GEN r,q,x1; @@ -1447,7 +1498,7 @@ DIVIDE: /* quotient is non-zero */ while (lz--) *--qd = *--xd; *--qd = evalsigne(sy) | evallgefint(lq); *--qd = evaltyp(t_INT) | evallg(lq); - avma = (long)qd; return (GEN)qd; + avma = (gpmem_t)qd; return (GEN)qd; } j=lq; while (j x > 0. return y mod x */ @@ -1536,7 +1587,7 @@ mppgcd_resiu(GEN y, ulong x) void mppgcd_plus_minus(GEN x, GEN y, GEN res) { - long av = avma; + gpmem_t av = avma; long lx = lgefint(x)-1; long ly = lgefint(y)-1, lt,m,i; GEN t; @@ -1579,22 +1630,22 @@ smulss(ulong x, ulong y, ulong *rem) # define DIVCONVI 14 #endif -/* convert integer --> base 10^9 [assume x > 0] */ +/* convert integer --> base 10^9 */ GEN convi(GEN x) { - ulong av=avma, lim; + gpmem_t av = avma, lim; long lz; GEN z,p1; lz = 3 + ((lgefint(x)-2)*15)/DIVCONVI; - z=new_chunk(lz); z[1] = -1; p1 = z+2; + z = new_chunk(lz); z[1] = -1; p1 = z+2; lim = stack_lim(av,1); for(;;) { - x = divis(x,1000000000); *p1++ = hiremainder; - if (!signe(x)) { avma=av; return p1; } - if (low_stack(lim, stack_lim(av,1))) x = gerepileuptoint((long)z,x); + x = diviu(x,1000000000); *p1++ = hiremainder; + if (!signe(x)) { avma = av; return p1; } + if (low_stack(lim, stack_lim(av,1))) x = gerepileuptoint((gpmem_t)z,x); } } #undef DIVCONVI @@ -1641,7 +1692,7 @@ confrac(GEN x) GEN truedvmdii(GEN x, GEN y, GEN *z) { - long av = avma; + gpmem_t av = avma; GEN r, q = dvmdii(x,y,&r); /* assume that r is last on stack */ GEN *gptr[2]; @@ -1653,7 +1704,7 @@ truedvmdii(GEN x, GEN y, GEN *z) } if (z == ONLY_REM) - { + { /* r += sign(y) * y, that is |y| */ r = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2); return gerepileuptoint(av, r); } @@ -1661,14 +1712,107 @@ truedvmdii(GEN x, GEN y, GEN *z) if (!z) return gerepileuptoint(av, q); *z = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2); - gptr[0]=&q; gptr[1]=z; gerepilemanysp(av,(long)r,gptr,2); + gptr[0]=&q; gptr[1]=z; gerepilemanysp(av,(gpmem_t)r,gptr,2); return q; } +/* Montgomery reduction. + * N has k words, assume T >= 0 has less than 2k. + * Return res := T / B^k mod N, where B = 2^BIL + * such that 0 <= res < T/B^k + N and res has less than k words */ +GEN +red_montgomery(GEN T, GEN N, ulong inv) +{ + gpmem_t av; + GEN Te,Td,Ne,Nd, scratch; + ulong m,t,d,k = lgefint(N)-2; + int carry; + long i,j; + LOCAL_HIREMAINDER; + LOCAL_OVERFLOW; + + if (k == 0) return gzero; + d = lgefint(T)-2; /* <= 2*k */ +#ifdef DEBUG + if (d > 2*k) err(bugparier,"red_montgomery"); +#endif + if (k == 1) + { /* as below, special cased for efficiency */ + ulong n = (ulong)N[2]; + t = (ulong)T[d+1]; + m = t * inv; + (void)addll(mulll(m, n), t); /* = 0 */ + t = hiremainder + overflow; + if (d == 2) + { + t = addll((ulong)T[2], t); + if (overflow) t -= n; /* t > n doesn't fit in 1 word */ + } + return utoi(t); + } + /* assume k >= 2 */ + av = avma; scratch = new_chunk(k<<1); /* >= k + 2: result fits */ + + /* copy T to scratch space (pad with zeroes to 2k words) */ + Td = (GEN)av; + Te = T + (d+2); + for (i=0; i < d ; i++) *--Td = *--Te; + for ( ; i < (k<<1); i++) *--Td = 0; + + Te = (GEN)av; /* 1 beyond end of T mantissa */ + Ne = N + k+2; /* 1 beyond end of N mantissa */ + + carry = 0; + for (i=0; i N overflows (k+1 words), set Td := Td - N */ + Td = Te; + Nd = Ne; + t = subll(*--Td, *--Nd); *Td = t; + while (Td > scratch) { t = subllx(*--Td, *--Nd); *Td = t; } + } + + /* copy result */ + Td = (GEN)av; + while (! *scratch) scratch++; /* strip leading zeroes */ + while (Te > scratch) *--Td = *--Te; + k = ((GEN)av - Td) + 2; + *--Td = evalsigne(1) | evallgefint(k); + *--Td = evaltyp(t_INT) | evallg(k); +#ifdef DEBUG +{ + long l = lgefint(N)-2, s = BITS_IN_LONG*l; + GEN R = shifti(gun, s); + GEN res = resii(mulii(T, mpinvmod(R, N)), N); + if (k > lgefint(N) + || !egalii(resii(Td,N),res) + || cmpii(Td, addii(shifti(T, -s), N)) >= 0) err(bugparier,"red_montgomery"); +} +#endif + avma = (gpmem_t)Td; return Td; +} + /* EXACT INTEGER DIVISION */ /* Find c such that 1=c*b mod 2^BITS_IN_LONG, assuming b odd (unchecked) */ -static ulong +ulong invrev(ulong b) { ulong x; @@ -1688,7 +1832,7 @@ invrev(ulong b) } /* assume xy>0, y odd */ -GEN +GEN diviuexact(GEN x, ulong y) { long i,lz,lx; @@ -1697,7 +1841,7 @@ diviuexact(GEN x, ulong y) if (y == 1) return icopy(x); lx = lgefint(x); - if (lx == 3) return stoi((ulong)x[2] / y); + if (lx == 3) return utoi((ulong)x[2] / y); yinv = invrev(y); lz = (y <= (ulong)x[2]) ? lx : lx-1; z = new_chunk(lz); @@ -1729,7 +1873,7 @@ diviuexact(GEN x, ulong y) z += i-2; lz -= i-2; z[0] = evaltyp(t_INT)|evallg(lz); z[1] = evalsigne(1)|evallg(lz); - avma = (ulong)z; return z; + avma = (gpmem_t)z; return z; } /* Find z such that x=y*z, knowing that y | x (unchecked) @@ -1738,7 +1882,8 @@ diviuexact(GEN x, ulong y) GEN diviiexact(GEN x, GEN y) { - long av,lx,ly,lz,vy,i,ii,sx = signe(x), sy = signe(y); + long lx, ly, lz, vy, i, ii, sx = signe(x), sy = signe(y); + gpmem_t av; ulong y0inv,q; GEN z; @@ -1758,7 +1903,7 @@ diviiexact(GEN x, GEN y) avma = av; /* will erase our x,y when exiting */ /* now y is odd */ ly = lgefint(y); - if (ly == 3) + if (ly == 3) { x = diviuexact(x,(ulong)y[2]); if (signe(x)) setsigne(x,sx*sy); /* should have x != 0 at this point */ @@ -1783,7 +1928,7 @@ diviiexact(GEN x, GEN y) /* x := x - q * y */ (void)mulll(q,y[0]); limj = max(lx - lz, ii+3-ly); { /* update neither lowest word (could set it to 0) nor highest ones */ - register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj; + register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj; for (; x0 >= xlim; x0--, y0--) { *x0 = subll(*x0, addmul(q,*y0)); @@ -1810,7 +1955,7 @@ diviiexact(GEN x, GEN y) z += i-2; lz -= (i-2); z[0] = evaltyp(t_INT)|evallg(lz); z[1] = evalsigne(sx*sy)|evallg(lz); - avma = (ulong)z; return z; + avma = (gpmem_t)z; return z; } long @@ -1884,7 +2029,7 @@ absr_cmp(GEN x, GEN y) #define _sqri_l 47 #define _muli_l 25 /* optimal on PII 350MHz + gcc 2.7.2.1 (gp-dyn) */ -#if 0 /* for tunings */ +#if 1 /* for tunings */ long KARATSUBA_SQRI_LIMIT = _sqri_l; long KARATSUBA_MULI_LIMIT = _muli_l; @@ -1945,7 +2090,7 @@ muliispec(GEN x, GEN y, long nx, long ny) if (*zd == 0) { zd++; lz--; } /* normalize */ *--zd = evalsigne(1) | evallgefint(lz); *--zd = evaltyp(t_INT) | evallg(lz); - avma=(long)zd; return zd; + avma=(gpmem_t)zd; return zd; } #ifdef INLINE @@ -1967,7 +2112,7 @@ sqrispec(GEN x, long nx) *--zd = hiremainder; goto END; } xd = x + nx; - + /* compute double products --> zd */ p1 = *--xd; yd = xd; --zd; *--zd = mulll(p1, *--yd); z2e = zd; @@ -2009,7 +2154,7 @@ END: if (*zd == 0) { zd++; lz--; } /* normalize */ *--zd = evalsigne(1) | evallgefint(lz); *--zd = evaltyp(t_INT) | evallg(lz); - avma=(long)zd; return zd; + avma=(gpmem_t)zd; return zd; } /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */ @@ -2048,7 +2193,8 @@ static GEN quickmulii(GEN a, GEN b, long na, long nb) { GEN a0,c,c0; - long av,n0,n0a,i; + long n0, n0a, i; + gpmem_t av; if (na < nb) swapspec(a,b, na,nb); if (nb == 1) return mulsispec(*b, a, na); @@ -2111,7 +2257,8 @@ GEN resiimul(GEN x, GEN sy) { GEN r, q, y = (GEN)sy[1], invy; - long av = avma, k; + long k; + gpmem_t av = avma; k = cmpii(x, y); if (k <= 0) return k? icopy(x): gzero; @@ -2140,10 +2287,10 @@ resmod2n(GEN x, long n) { long hi,l,k,lx,ly; GEN z, xd, zd; - + if (!signe(x) || !n) return gzero; - l = n & (BITS_IN_LONG-1); /* n % BITS_IN_LONG */ + l = n & (BITS_IN_LONG-1); /* n % BITS_IN_LONG */ k = n >> TWOPOTBITS_IN_LONG; /* n / BITS_IN_LONG */ lx = lgefint(x); if (lx < k+3) return icopy(x); @@ -2172,7 +2319,8 @@ static GEN quicksqri(GEN a, long na) { GEN a0,c; - long av,n0,n0a,i; + long n0, n0a, i; + gpmem_t av; if (na < KARATSUBA_SQRI_LIMIT) return sqrispec(a,na); i=(na>>1); n0=na-i; na=i; @@ -2216,9 +2364,8 @@ karamulrr1(GEN x, GEN y) /* ensure that lg(y) >= lg(x) */ if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly); if (lx < MULRR_LIMIT) return mulrr(x,y); - sx=signe(x); sy=signe(y); - e = evalexpo(expo(x)+expo(y)); - if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; } + sx=signe(x); sy=signe(y); e = expo(x)+expo(y); + if (!sx || !sy) return realzero_bit(e); if (sy<0) sx = -sx; ly=lx+flag; z=cgetr(lx); lz2 = (lx>>1); lz3 = lx-lz2; @@ -2246,13 +2393,13 @@ karamulrr1(GEN x, GEN y) garde = (hi[lx+2] << 1); shift_left(z,hi,2,lx+1, garde, 1); } - z[1]=evalsigne(sx) | e; + z[1]=evalsigne(sx) | evalexpo(e); if (garde < 0) { /* round to nearest */ i=lx+2; do z[--i]++; while (z[i]==0); if (i==1) z[2]=HIGHBIT; } - avma=(long)z; return z; + avma=(gpmem_t)z; return z; } GEN @@ -2264,8 +2411,8 @@ karamulrr2(GEN x, GEN y) if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly); if (lx < MULRR2_LIMIT) return mulrr(x,y); ly=lx+flag; sx=signe(x); sy=signe(y); - e = evalexpo(expo(x)+expo(y)); - if (!sx || !sy) { z=cgetr(3); z[2]=0; z[1]=e; return z; } + e = expo(x)+expo(y); + if (!sx || !sy) return realzero_bit(e); if (sy<0) sx = -sx; z=cgetr(lx); hi=quickmulii(y+2,x+2,ly-2,lx-2); @@ -2279,13 +2426,13 @@ karamulrr2(GEN x, GEN y) garde = (hi[lx] << 1); shift_left(z,hi,2,lx-1, garde, 1); } - z[1]=evalsigne(sx) | e; + z[1]=evalsigne(sx) | evalexpo(e); if (garde < 0) { /* round to nearest */ i=lx; do z[--i]++; while (z[i]==0); if (i==1) z[2]=HIGHBIT; } - avma=(long)z; return z; + avma=(gpmem_t)z; return z; } GEN @@ -2304,28 +2451,33 @@ GEN karamulir(GEN x, GEN y, long flag) { long sx=signe(x),lz,i; - GEN z,temp,z1; + GEN z, z1; if (!sx) return gzero; - lz=lg(y); z=cgetr(lz); - temp=cgetr(lz+1); affir(x,temp); - z1=karamulrr(temp,y,flag); + lz = lg(y); z = cgetr(lz); + z1 = karamulrr(itor(x, lz+1), y, flag); for (i=1; i