OpenXM_contrib/pari-2.2/src/kernel/none/mp.c - annotate

Return to mp.c CVS log
Up to [local] / OpenXM_contrib / pari-2.2 / src / kernel / none
Annotation of OpenXM_contrib/pari-2.2/src/kernel/none/mp.c, Revision 1.2

1.2     ! noro        1: /* $Id: mp.c,v 1.74 2002/06/08 22:07:38 karim Exp $
1.1       noro        2:
                      3: Copyright (C) 2000  The PARI group.
                      4:
                      5: This file is part of the PARI/GP package.
                      6:
                      7: PARI/GP is free software; you can redistribute it and/or modify it under the
                      8: terms of the GNU General Public License as published by the Free Software
                      9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
                     10: ANY WARRANTY WHATSOEVER.
                     11:
                     12: Check the License for details. You should have received a copy of it, along
                     13: with the package; see the file 'COPYING'. If not, write to the Free Software
                     14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
                     15:
                     16: /***********************************************************************/
                     17: /**                                                                  **/
                     18: /**                     MULTIPRECISION KERNEL                        **/
                     19: /**                                                                   **/
                     20: /***********************************************************************/
                     21: /* most of the routines in this file are commented out in 68k */
                     22: /* version (#ifdef __M68K__) since they are defined in mp.s   */
                     23: #include "pari.h"
                     24:
                     25: /* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't
                     26:  * GENs but pairs (long *a, long na) representing a list of digits (in basis
                     27:  * BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no
                     28:  * need to reintroduce codewords ]
                     29:  * Use speci(a,na) to visualize the coresponding GEN.
                     30:  */
                     31:
                     32: /* z2 := z1[imin..imax].f shifted left sh bits (feeding f from the right) */
1.2     ! noro       33: /* These macros work only for sh != 0 !!! */
1.1       noro       34: #define shift_left2(z2,z1,imin,imax,f, sh,m) {\
                     35:   register ulong _l,_k = ((ulong)f)>>m;\
                     36:   GEN t1 = z1 + imax, t2 = z2 + imax, T = z1 + imin;\
                     37:   while (t1 > T) {\
                     38:     _l = *t1--;\
                     39:     *t2-- = (_l<<(ulong)sh) | _k;\
                     40:     _k = _l>>(ulong)m;\
                     41:   }\
                     42:   *t2 = (*t1<<(ulong)sh) | _k;\
                     43: }
                     44: #define shift_left(z2,z1,imin,imax,f, sh) {\
                     45:   register const ulong _m = BITS_IN_LONG - sh;\
                     46:   shift_left2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
                     47: }
                     48:
1.2     ! noro       49: #define shift_words_r(target,source,source_end,prepend, sh, sh_complement) {\
        !            50:   register ulong _k,_l = *source++;\
        !            51:   *target++ = (_l>>(ulong)sh) | ((ulong)prepend<<(ulong)sh_complement);\
        !            52:   while (source < source_end) {\
        !            53:     _k = _l<<(ulong)sh_complement; _l = *source++;\
        !            54:     *target++ = (_l>>(ulong)sh) | _k;\
        !            55:   }\
        !            56: }
1.1       noro       57: #define shift_right2(z2,z1,imin,imax,f, sh,m) {\
1.2     ! noro       58:   register GEN s = (z1) + (imin), ta = (z2) + (imin), se = (z1) + (imax);\
        !            59:   shift_words_r(ta,s,se,(f),(sh),(m));                         \
1.1       noro       60: }
1.2     ! noro       61: /* z2 := f.z1[imin..imax-1] shifted right sh bits (feeding f from the left) */
1.1       noro       62: #define shift_right(z2,z1,imin,imax,f, sh) {\
1.2     ! noro       63:   register const ulong _m = BITS_IN_LONG - (sh);\
1.1       noro       64:   shift_right2((z2),(z1),(imin),(imax),(f),(sh),(_m));\
                     65: }
                     66:
                     67: #define MASK(x) (((ulong)(x)) & (LGEFINTBITS | SIGNBITS))
                     68: int
                     69: egalii(GEN x, GEN y)
                     70: {
                     71:   long i;
                     72:   if (MASK(x[1]) != MASK(y[1])) return 0;
                     73:   i = lgefint(x)-1; while (i>1 && x[i]==y[i]) i--;
                     74:   return i==1;
                     75: }
                     76: #undef MASK
                     77:
                     78: #ifdef INLINE
                     79: INLINE
                     80: #endif
                     81: GEN
                     82: addsispec(long s, GEN x, long nx)
                     83: {
                     84:   GEN xd, zd = (GEN)avma;
                     85:   long lz;
                     86:   LOCAL_OVERFLOW;
                     87:
                     88:   lz = nx+3; (void)new_chunk(lz);
                     89:   xd = x + nx;
                     90:   *--zd = addll(*--xd, s);
                     91:   if (overflow)
                     92:     for(;;)
                     93:     {
                     94:       if (xd == x) { *--zd = 1; break; } /* enlarge z */
                     95:       *--zd = ((ulong)*--xd) + 1;
                     96:       if (*zd) { lz--; break; }
                     97:     }
                     98:   else lz--;
                     99:   while (xd > x) *--zd = *--xd;
                    100:   *--zd = evalsigne(1) | evallgefint(lz);
                    101:   *--zd = evaltyp(t_INT) | evallg(lz);
1.2     ! noro      102:   avma=(gpmem_t)zd; return zd;
1.1       noro      103: }
                    104:
                    105: #define swapspec(x,y, nx,ny) {long _a=nx;GEN _z=x; nx=ny; ny=_a; x=y; y=_z;}
                    106:
                    107: #ifdef INLINE
                    108: INLINE
                    109: #endif
                    110: GEN
                    111: addiispec(GEN x, GEN y, long nx, long ny)
                    112: {
                    113:   GEN xd,yd,zd;
                    114:   long lz;
                    115:   LOCAL_OVERFLOW;
                    116:
                    117:   if (nx < ny) swapspec(x,y, nx,ny);
                    118:   if (ny == 1) return addsispec(*y,x,nx);
                    119:   zd = (GEN)avma;
                    120:   lz = nx+3; (void)new_chunk(lz);
                    121:   xd = x + nx;
                    122:   yd = y + ny;
                    123:   *--zd = addll(*--xd, *--yd);
                    124:   while (yd > y) *--zd = addllx(*--xd, *--yd);
                    125:   if (overflow)
                    126:     for(;;)
                    127:     {
                    128:       if (xd == x) { *--zd = 1; break; } /* enlarge z */
                    129:       *--zd = ((ulong)*--xd) + 1;
                    130:       if (*zd) { lz--; break; }
                    131:     }
                    132:   else lz--;
                    133:   while (xd > x) *--zd = *--xd;
                    134:   *--zd = evalsigne(1) | evallgefint(lz);
                    135:   *--zd = evaltyp(t_INT) | evallg(lz);
1.2     ! noro      136:   avma=(gpmem_t)zd; return zd;
1.1       noro      137: }
                    138:
                    139: /* assume x >= y */
                    140: #ifdef INLINE
                    141: INLINE
                    142: #endif
                    143: GEN
                    144: subisspec(GEN x, long s, long nx)
                    145: {
                    146:   GEN xd, zd = (GEN)avma;
                    147:   long lz;
                    148:   LOCAL_OVERFLOW;
                    149:
                    150:   lz = nx+2; (void)new_chunk(lz);
                    151:   xd = x + nx;
                    152:   *--zd = subll(*--xd, s);
                    153:   if (overflow)
                    154:     for(;;)
                    155:     {
                    156:       *--zd = ((ulong)*--xd) - 1;
                    157:       if (*xd) break;
                    158:     }
                    159:   if (xd == x)
                    160:     while (*zd == 0) { zd++; lz--; } /* shorten z */
                    161:   else
                    162:     do  *--zd = *--xd; while (xd > x);
                    163:   *--zd = evalsigne(1) | evallgefint(lz);
                    164:   *--zd = evaltyp(t_INT) | evallg(lz);
1.2     ! noro      165:   avma=(gpmem_t)zd; return zd;
1.1       noro      166: }
                    167:
                    168: /* assume x > y */
                    169: #ifdef INLINE
                    170: INLINE
                    171: #endif
                    172: GEN
                    173: subiispec(GEN x, GEN y, long nx, long ny)
                    174: {
                    175:   GEN xd,yd,zd;
                    176:   long lz;
                    177:   LOCAL_OVERFLOW;
                    178:
                    179:   if (ny==1) return subisspec(x,*y,nx);
                    180:   zd = (GEN)avma;
                    181:   lz = nx+2; (void)new_chunk(lz);
                    182:   xd = x + nx;
                    183:   yd = y + ny;
                    184:   *--zd = subll(*--xd, *--yd);
                    185:   while (yd > y) *--zd = subllx(*--xd, *--yd);
                    186:   if (overflow)
                    187:     for(;;)
                    188:     {
                    189:       *--zd = ((ulong)*--xd) - 1;
                    190:       if (*xd) break;
                    191:     }
                    192:   if (xd == x)
                    193:     while (*zd == 0) { zd++; lz--; } /* shorten z */
                    194:   else
                    195:     do  *--zd = *--xd; while (xd > x);
                    196:   *--zd = evalsigne(1) | evallgefint(lz);
                    197:   *--zd = evaltyp(t_INT) | evallg(lz);
1.2     ! noro      198:   avma=(gpmem_t)zd; return zd;
1.1       noro      199: }
                    200:
                    201: #ifndef __M68K__
                    202:
                    203: /* prototype of positive small ints */
                    204: static long pos_s[] = {
1.2     ! noro      205:   evaltyp(t_INT) | _evallg(3), evalsigne(1) | evallgefint(3), 0 };
1.1       noro      206:
                    207: /* prototype of negative small ints */
                    208: static long neg_s[] = {
1.2     ! noro      209:   evaltyp(t_INT) | _evallg(3), evalsigne(-1) | evallgefint(3), 0 };
1.1       noro      210:
                    211: void
                    212: affir(GEN x, GEN y)
                    213: {
                    214:   const long s=signe(x),ly=lg(y);
                    215:   long lx,sh,i;
                    216:
                    217:   if (!s)
                    218:   {
                    219:     y[1] = evalexpo(-bit_accuracy(ly));
1.2     ! noro      220:     return;
1.1       noro      221:   }
                    222:
1.2     ! noro      223:   lx = lgefint(x); sh = bfffo(x[2]);
1.1       noro      224:   y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
1.2     ! noro      225:   if (sh) {
1.1       noro      226:     if (lx>ly)
                    227:     { shift_left(y,x,2,ly-1, x[ly],sh); }
                    228:     else
                    229:     {
                    230:       for (i=lx; i<ly; i++) y[i]=0;
                    231:       shift_left(y,x,2,lx-1, 0,sh);
                    232:     }
                    233:   }
1.2     ! noro      234:   else {
        !           235:     if (lx>=ly)
        !           236:       for (i=2; i<ly; i++) y[i]=x[i];
        !           237:     else
        !           238:     {
        !           239:       for (i=2; i<lx; i++) y[i]=x[i];
        !           240:       for (   ; i<ly; i++) y[i]=0;
        !           241:     }
1.1       noro      242:   }
                    243: }
                    244:
                    245: void
                    246: affrr(GEN x, GEN y)
                    247: {
                    248:   long lx,ly,i;
                    249:
1.2     ! noro      250:   y[1] = x[1]; if (!signe(x)) return;
1.1       noro      251:
                    252:   lx=lg(x); ly=lg(y);
                    253:   if (lx>=ly)
                    254:     for (i=2; i<ly; i++) y[i]=x[i];
                    255:   else
                    256:   {
                    257:     for (i=2; i<lx; i++) y[i]=x[i];
                    258:     for (   ; i<ly; i++) y[i]=0;
                    259:   }
                    260: }
                    261:
                    262: GEN
                    263: shifti(GEN x, long n)
                    264: {
1.2     ! noro      265:   return shifti3(x, n, 0);
        !           266: }
        !           267:
        !           268: GEN
        !           269: shifti3(GEN x, long n, long flag)
        !           270: {
1.1       noro      271:   const long s=signe(x);
                    272:   long lx,ly,i,m;
                    273:   GEN y;
                    274:
                    275:   if (!s) return gzero;
                    276:   if (!n) return icopy(x);
                    277:   lx = lgefint(x);
                    278:   if (n > 0)
                    279:   {
                    280:     GEN z = (GEN)avma;
                    281:     long d = n>>TWOPOTBITS_IN_LONG;
                    282:
                    283:     ly = lx+d; y = new_chunk(ly);
                    284:     for ( ; d; d--) *--z = 0;
                    285:     m = n & (BITS_IN_LONG-1);
                    286:     if (!m) for (i=2; i<lx; i++) y[i]=x[i];
                    287:     else
                    288:     {
                    289:       register const ulong sh = BITS_IN_LONG - m;
                    290:       shift_left2(y,x, 2,lx-1, 0,m,sh);
                    291:       i = ((ulong)x[2]) >> sh;
                    292:       if (i) { ly++; y = new_chunk(1); y[2] = i; }
                    293:     }
                    294:   }
                    295:   else
                    296:   {
1.2     ! noro      297:     long lyorig;
        !           298:
        !           299:     if (s > 0)
        !           300:         flag = 0;
1.1       noro      301:     n = -n;
1.2     ! noro      302:     ly = lyorig = lx - (n>>TWOPOTBITS_IN_LONG);
        !           303:     if (ly<3)
        !           304:        return flag ? stoi(-1) : gzero;
1.1       noro      305:     y = new_chunk(ly);
                    306:     m = n & (BITS_IN_LONG-1);
1.2     ! noro      307:     if (m) {
1.1       noro      308:       shift_right(y,x, 2,ly, 0,m);
                    309:       if (y[2] == 0)
                    310:       {
1.2     ! noro      311:         if (ly==3) { avma = (gpmem_t)(y+3); return flag ? stoi(-1) : gzero; }
        !           312:         ly--; avma = (gpmem_t)(++y);
        !           313:       }
        !           314:     } else {
        !           315:       for (i=2; i<ly; i++) y[i]=x[i];
        !           316:     }
        !           317:     /* With FLAG: round up instead of rounding down */
        !           318:     if (flag) {                                /* Check low bits of x */
        !           319:       i = lx; flag = 0;
        !           320:       while (--i >= lyorig)
        !           321:        if (x[i]) { flag = 1; break; }  /* Need to increment y by 1 */
        !           322:       if (!flag && m)
        !           323:        flag = x[lyorig - 1] & ((1<<m) - 1);
        !           324:     }
        !           325:     if (flag) {                                /* Increment y */
        !           326:       for (i = ly;;)
        !           327:       {
        !           328:        if (--i < 2)
        !           329:         { /* Extend y on the left */
        !           330:          if (avma <= bot) err(errpile);
        !           331:          avma = (gpmem_t)(--y); ly++;
        !           332:          y[2] = 1; break;
        !           333:        }
        !           334:        if (++y[i]) break;
        !           335:        /* Now propagate the bit into the next longword */
1.1       noro      336:       }
                    337:     }
                    338:   }
                    339:   y[1] = evalsigne(s)|evallgefint(ly);
                    340:   y[0] = evaltyp(t_INT)|evallg(ly); return y;
                    341: }
                    342:
                    343: GEN
                    344: mptrunc(GEN x)
                    345: {
                    346:   long d,e,i,s,m;
                    347:   GEN y;
                    348:
                    349:   if (typ(x)==t_INT) return icopy(x);
                    350:   if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gzero;
                    351:   d = (e>>TWOPOTBITS_IN_LONG) + 3;
                    352:   m = e & (BITS_IN_LONG-1);
                    353:   if (d > lg(x)) err(precer, "mptrunc (precision loss in truncation)");
                    354:
                    355:   y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
                    356:   if (++m == BITS_IN_LONG)
                    357:     for (i=2; i<d; i++) y[i]=x[i];
                    358:   else
                    359:   {
                    360:     register const ulong sh = BITS_IN_LONG - m;
                    361:     shift_right2(y,x, 2,d,0, sh,m);
                    362:   }
                    363:   return y;
                    364: }
                    365:
                    366: /* integral part */
                    367: GEN
                    368: mpent(GEN x)
                    369: {
                    370:   long d,e,i,lx,m;
                    371:   GEN y;
                    372:
                    373:   if (typ(x)==t_INT) return icopy(x);
                    374:   if (signe(x) >= 0) return mptrunc(x);
                    375:   if ((e=expo(x)) < 0) return stoi(-1);
                    376:   d = (e>>TWOPOTBITS_IN_LONG) + 3;
                    377:   m = e & (BITS_IN_LONG-1);
                    378:   lx=lg(x); if (d>lx) err(precer, "mpent (precision loss in trucation)");
                    379:   y = new_chunk(d);
                    380:   if (++m == BITS_IN_LONG)
                    381:   {
                    382:     for (i=2; i<d; i++) y[i]=x[i];
                    383:     i=d; while (i<lx && !x[i]) i++;
                    384:     if (i==lx) goto END;
                    385:   }
                    386:   else
                    387:   {
                    388:     register const ulong sh = BITS_IN_LONG - m;
                    389:     shift_right2(y,x, 2,d,0, sh,m);
                    390:     if (x[d-1]<<m == 0)
                    391:     {
                    392:       i=d; while (i<lx && !x[i]) i++;
                    393:       if (i==lx) goto END;
                    394:     }
                    395:   }
                    396:   /* set y:=y+1 */
                    397:   for (i=d-1; i>=2; i--) { y[i]++; if (y[i]) goto END; }
                    398:   y=new_chunk(1); y[2]=1; d++;
                    399: END:
                    400:   y[1] = evalsigne(-1) | evallgefint(d);
                    401:   y[0] = evaltyp(t_INT) | evallg(d); return y;
                    402: }
                    403:
                    404: int
                    405: cmpsi(long x, GEN y)
                    406: {
                    407:   ulong p;
                    408:
                    409:   if (!x) return -signe(y);
                    410:
                    411:   if (x>0)
                    412:   {
                    413:     if (signe(y)<=0) return 1;
                    414:     if (lgefint(y)>3) return -1;
                    415:     p=y[2]; if (p == (ulong)x) return 0;
                    416:     return p < (ulong)x ? 1 : -1;
                    417:   }
                    418:
                    419:   if (signe(y)>=0) return -1;
                    420:   if (lgefint(y)>3) return 1;
                    421:   p=y[2]; if (p == (ulong)-x) return 0;
                    422:   return p < (ulong)(-x) ? -1 : 1;
                    423: }
                    424:
                    425: int
                    426: cmpii(GEN x, GEN y)
                    427: {
                    428:   const long sx = signe(x), sy = signe(y);
                    429:   long lx,ly,i;
                    430:
                    431:   if (sx<sy) return -1;
                    432:   if (sx>sy) return 1;
                    433:   if (!sx) return 0;
                    434:
                    435:   lx=lgefint(x); ly=lgefint(y);
                    436:   if (lx>ly) return sx;
                    437:   if (lx<ly) return -sx;
                    438:   i=2; while (i<lx && x[i]==y[i]) i++;
                    439:   if (i==lx) return 0;
                    440:   return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
                    441: }
                    442:
                    443: int
                    444: cmprr(GEN x, GEN y)
                    445: {
                    446:   const long sx = signe(x), sy = signe(y);
                    447:   long ex,ey,lx,ly,lz,i;
                    448:
                    449:   if (sx<sy) return -1;
                    450:   if (sx>sy) return 1;
                    451:   if (!sx) return 0;
                    452:
                    453:   ex=expo(x); ey=expo(y);
                    454:   if (ex>ey) return sx;
                    455:   if (ex<ey) return -sx;
                    456:
                    457:   lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
                    458:   i=2; while (i<lz && x[i]==y[i]) i++;
                    459:   if (i<lz) return ((ulong)x[i] > (ulong)y[i]) ? sx : -sx;
                    460:   if (lx>=ly)
                    461:   {
                    462:     while (i<lx && !x[i]) i++;
                    463:     return (i==lx) ? 0 : sx;
                    464:   }
                    465:   while (i<ly && !y[i]) i++;
                    466:   return (i==ly) ? 0 : -sx;
                    467: }
                    468:
                    469: GEN
                    470: addss(long x, long y)
                    471: {
                    472:   if (!x) return stoi(y);
                    473:   if (x>0) { pos_s[2] = x; return addsi(y,pos_s); }
                    474:   neg_s[2] = -x; return addsi(y,neg_s);
                    475: }
                    476:
                    477: GEN
                    478: addsi(long x, GEN y)
                    479: {
                    480:   long sx,sy,ly;
                    481:   GEN z;
                    482:
                    483:   if (!x) return icopy(y);
                    484:   sy=signe(y); if (!sy) return stoi(x);
                    485:   if (x<0) { sx=-1; x=-x; } else sx=1;
                    486:   if (sx==sy)
                    487:   {
                    488:     z = addsispec(x,y+2, lgefint(y)-2);
                    489:     setsigne(z,sy); return z;
                    490:   }
                    491:   ly=lgefint(y);
                    492:   if (ly==3)
                    493:   {
                    494:     const long d = y[2] - x;
                    495:     if (!d) return gzero;
                    496:     z=cgeti(3);
                    497:     if (y[2] < 0 || d > 0) {
                    498:       z[1] = evalsigne(sy) | evallgefint(3);
                    499:       z[2] = d;
                    500:     }
                    501:     else {
                    502:       z[1] = evalsigne(-sy) | evallgefint(3);
                    503:       z[2] =-d;
                    504:     }
                    505:     return z;
                    506:   }
                    507:   z = subisspec(y+2,x, ly-2);
                    508:   setsigne(z,sy); return z;
                    509: }
                    510:
                    511: GEN
                    512: addii(GEN x, GEN y)
                    513: {
                    514:   long sx = signe(x);
                    515:   long sy = signe(y);
                    516:   long lx,ly;
                    517:   GEN z;
                    518:
                    519:   if (!sx) return sy? icopy(y): gzero;
                    520:   if (!sy) return icopy(x);
                    521:   lx=lgefint(x); ly=lgefint(y);
                    522:
                    523:   if (sx==sy)
                    524:     z = addiispec(x+2,y+2,lx-2,ly-2);
                    525:   else
                    526:   { /* sx != sy */
                    527:     long i = lx - ly;
                    528:     if (i==0)
                    529:     {
                    530:       i = absi_cmp(x,y);
                    531:       if (!i) return gzero;
                    532:     }
                    533:     if (i<0) { sx=sy; swapspec(x,y, lx,ly); } /* ensure |x| >= |y| */
                    534:     z = subiispec(x+2,y+2,lx-2,ly-2);
                    535:   }
                    536:   setsigne(z,sx); return z;
                    537: }
                    538:
                    539: GEN
                    540: addsr(long x, GEN y)
                    541: {
                    542:   if (!x) return rcopy(y);
                    543:   if (x>0) { pos_s[2]=x; return addir(pos_s,y); }
                    544:   neg_s[2] = -x; return addir(neg_s,y);
                    545: }
                    546:
                    547: GEN
                    548: addir(GEN x, GEN y)
                    549: {
                    550:   long e,l,ly;
                    551:   GEN z;
                    552:
                    553:   if (!signe(x)) return rcopy(y);
                    554:   e = expo(y)-expi(x);
                    555:   if (!signe(y))
                    556:   {
                    557: #if 0
                    558:     if (e>0) err(adder3);
                    559: #else /* design issue: make 0.0 "absorbing" */
                    560:     if (e>0) return rcopy(y);
                    561: #endif
1.2     ! noro      562:     return itor(x, 3 + ((-e)>>TWOPOTBITS_IN_LONG));
1.1       noro      563:   }
                    564:
                    565:   ly=lg(y);
1.2     ! noro      566:   if (e > 0)
1.1       noro      567:   {
                    568:     l = ly - (e>>TWOPOTBITS_IN_LONG);
                    569:     if (l<3) return rcopy(y);
                    570:   }
                    571:   else l = ly + ((-e)>>TWOPOTBITS_IN_LONG)+1;
1.2     ! noro      572:   y = addrr(itor(x,l), y);
1.1       noro      573:   z = y+l; ly = lg(y); while (ly--) z[ly] = y[ly];
1.2     ! noro      574:   avma=(gpmem_t)z; return z;
1.1       noro      575: }
                    576:
                    577: GEN
                    578: addrr(GEN x, GEN y)
                    579: {
                    580:   long sx=signe(x),sy=signe(y),ex=expo(x),ey=expo(y);
                    581:   long e,m,flag,i,j,f2,lx,ly,lz;
                    582:   GEN z;
                    583:   LOCAL_OVERFLOW;
                    584:
                    585:   e = ey-ex;
                    586:   if (!sy)
                    587:   {
                    588:     if (!sx)
                    589:     {
                    590:       if (e > 0) ex=ey;
1.2     ! noro      591:       return realzero_bit(ex);
1.1       noro      592:     }
1.2     ! noro      593:     if (e > 0) return realzero_bit(ey);
1.1       noro      594:     lz = 3 + ((-e)>>TWOPOTBITS_IN_LONG);
                    595:     lx = lg(x); if (lz>lx) lz=lx;
                    596:     z = cgetr(lz); while(--lz) z[lz]=x[lz];
                    597:     return z;
                    598:   }
                    599:   if (!sx)
                    600:   {
1.2     ! noro      601:     if (e < 0) return realzero_bit(ex);
1.1       noro      602:     lz = 3 + (e>>TWOPOTBITS_IN_LONG);
                    603:     ly = lg(y); if (lz>ly) lz=ly;
                    604:     z = cgetr(lz); while (--lz) z[lz]=y[lz];
                    605:     return z;
                    606:   }
                    607:
                    608:   if (e < 0) { z=x; x=y; y=z; ey=ex; i=sx; sx=sy; sy=i; e=-e; }
                    609:   /* now ey >= ex */
                    610:   lx=lg(x); ly=lg(y);
                    611:   if (e)
                    612:   {
                    613:     long d = e >> TWOPOTBITS_IN_LONG, l = ly-d;
                    614:     if (l > lx)     { flag=0; lz=lx+d+1; }
                    615:     else if (l > 2) { flag=1; lz=ly; lx=l; }
                    616:     else return rcopy(y);
                    617:     m = e & (BITS_IN_LONG-1);
                    618:   }
                    619:   else
                    620:   {
                    621:     if (lx > ly) lx = ly;
                    622:     flag=2; lz=lx; m=0;
                    623:   }
                    624:   z = cgetr(lz);
                    625:   if (m)
                    626:   { /* shift right x m bits */
                    627:     const ulong sh = BITS_IN_LONG-m;
                    628:     GEN p1 = x; x = new_chunk(lx+1);
                    629:     shift_right2(x,p1,2,lx, 0,m,sh);
                    630:     if (flag==0)
                    631:     {
                    632:       x[lx] = p1[lx-1] << sh;
                    633:       if (x[lx]) { flag = 3; lx++; }
                    634:     }
                    635:   }
                    636:
                    637:   if (sx==sy)
                    638:   { /* addition */
1.2     ! noro      639:     i=lz-1; avma = (gpmem_t)z;
1.1       noro      640:     if (flag==0) { z[i] = y[i]; i--; }
                    641:     overflow=0;
                    642:     for (j=lx-1; j>=2; i--,j--) z[i] = addllx(x[j],y[i]);
                    643:
                    644:     if (overflow)
                    645:       for (;;)
                    646:       {
                    647:         if (i==1)
                    648:         {
                    649:           shift_right(z,z, 2,lz, 1,1);
1.2     ! noro      650:           z[1] = evalsigne(sx) | evalexpo(ey+1); return z;
1.1       noro      651:         }
                    652:         z[i] = y[i]+1; if (z[i--]) break;
                    653:       }
                    654:     for (; i>=2; i--) z[i]=y[i];
                    655:     z[1] = evalsigne(sx) | evalexpo(ey); return z;
                    656:   }
                    657:
                    658:   /* subtraction */
                    659:   if (e) f2 = 1;
                    660:   else
                    661:   {
                    662:     i=2; while (i<lx && x[i]==y[i]) i++;
                    663:     if (i==lx)
                    664:     {
1.2     ! noro      665:       avma = (gpmem_t)(z+lz);
        !           666:       return realzero_bit(ey - bit_accuracy(lx));
1.1       noro      667:     }
                    668:     f2 = ((ulong)y[i] > (ulong)x[i]);
                    669:   }
                    670:
                    671:   /* result is non-zero. f2 = (y > x) */
                    672:   i=lz-1;
                    673:   if (f2)
                    674:   {
                    675:     if (flag==0) { z[i] = y[i]; i--; }
                    676:     j=lx-1; z[i] = subll(y[i],x[j]); i--; j--;
                    677:     for (; j>=2; i--,j--) z[i] = subllx(y[i],x[j]);
                    678:     if (overflow)
                    679:       for (;;) { z[i] = y[i]-1; if (y[i--]) break; }
                    680:     for (; i>=2; i--) z[i]=y[i];
                    681:     sx = sy;
                    682:   }
                    683:   else
                    684:   {
                    685:     if (flag==0) { z[i] = -y[i]; i--; overflow=1; } else overflow=0;
                    686:     for (; i>=2; i--) z[i]=subllx(x[i],y[i]);
                    687:   }
                    688:
                    689:   x = z+2; i=0; while (!x[i]) i++;
                    690:   lz -= i; z += i; m = bfffo(z[2]);
                    691:   if (m) shift_left(z,z,2,lz-1, 0,m);
1.2     ! noro      692:   z[1] = evalsigne(sx) | evalexpo(ey - (m | (i<<TWOPOTBITS_IN_LONG)));
1.1       noro      693:   z[0] = evaltyp(t_REAL) | evallg(lz);
1.2     ! noro      694:   avma = (gpmem_t)z; return z;
1.1       noro      695: }
                    696:
                    697: GEN
                    698: mulss(long x, long y)
                    699: {
                    700:   long s,p1;
                    701:   GEN z;
                    702:   LOCAL_HIREMAINDER;
                    703:
                    704:   if (!x || !y) return gzero;
                    705:   if (x<0) { s = -1; x = -x; } else s=1;
                    706:   if (y<0) { s = -s; y = -y; }
                    707:   p1 = mulll(x,y);
                    708:   if (hiremainder)
                    709:   {
                    710:     z=cgeti(4); z[1] = evalsigne(s) | evallgefint(4);
                    711:     z[2]=hiremainder; z[3]=p1; return z;
                    712:   }
                    713:   z=cgeti(3); z[1] = evalsigne(s) | evallgefint(3);
                    714:   z[2]=p1; return z;
                    715: }
                    716: #endif
                    717:
                    718: GEN
                    719: muluu(ulong x, ulong y)
                    720: {
                    721:   long p1;
                    722:   GEN z;
                    723:   LOCAL_HIREMAINDER;
                    724:
                    725:   if (!x || !y) return gzero;
                    726:   p1 = mulll(x,y);
                    727:   if (hiremainder)
                    728:   {
                    729:     z=cgeti(4); z[1] = evalsigne(1) | evallgefint(4);
                    730:     z[2]=hiremainder; z[3]=p1; return z;
                    731:   }
                    732:   z=cgeti(3); z[1] = evalsigne(1) | evallgefint(3);
                    733:   z[2]=p1; return z;
                    734: }
                    735:
                    736: /* assume ny > 0 */
                    737: #ifdef INLINE
                    738: INLINE
                    739: #endif
                    740: GEN
                    741: mulsispec(long x, GEN y, long ny)
                    742: {
                    743:   GEN yd, z = (GEN)avma;
                    744:   long lz = ny+3;
                    745:   LOCAL_HIREMAINDER;
                    746:
                    747:   (void)new_chunk(lz);
                    748:   yd = y + ny; *--z = mulll(x, *--yd);
                    749:   while (yd > y) *--z = addmul(x,*--yd);
                    750:   if (hiremainder) *--z = hiremainder; else lz--;
                    751:   *--z = evalsigne(1) | evallgefint(lz);
                    752:   *--z = evaltyp(t_INT) | evallg(lz);
1.2     ! noro      753:   avma=(gpmem_t)z; return z;
1.1       noro      754: }
                    755:
                    756: GEN
                    757: mului(ulong x, GEN y)
                    758: {
                    759:   long s = signe(y);
                    760:   GEN z;
                    761:
                    762:   if (!s || !x) return gzero;
                    763:   z = mulsispec(x, y+2, lgefint(y)-2);
                    764:   setsigne(z,s); return z;
                    765: }
                    766:
                    767: /* a + b*Y, assume Y >= 0, 0 <= a,b <= VERYBIGINT */
                    768: GEN
                    769: addsmulsi(long a, long b, GEN Y)
                    770: {
                    771:   GEN yd,y,z;
                    772:   long ny,lz;
                    773:   LOCAL_HIREMAINDER;
                    774:   LOCAL_OVERFLOW;
                    775:
                    776:   if (!signe(Y)) return stoi(a);
                    777:
                    778:   y = Y+2; z = (GEN)avma;
                    779:   ny = lgefint(Y)-2;
                    780:   lz = ny+3;
                    781:
                    782:   (void)new_chunk(lz);
                    783:   yd = y + ny; *--z = addll(a, mulll(b, *--yd));
                    784:   if (overflow) hiremainder++; /* can't overflow */
                    785:   while (yd > y) *--z = addmul(b,*--yd);
                    786:   if (hiremainder) *--z = hiremainder; else lz--;
                    787:   *--z = evalsigne(1) | evallgefint(lz);
                    788:   *--z = evaltyp(t_INT) | evallg(lz);
1.2     ! noro      789:   avma=(gpmem_t)z; return z;
1.1       noro      790: }
                    791:
                    792: #ifndef __M68K__
                    793:
                    794: GEN
                    795: mulsi(long x, GEN y)
                    796: {
                    797:   long s = signe(y);
                    798:   GEN z;
                    799:
                    800:   if (!s || !x) return gzero;
                    801:   if (x<0) { s = -s; x = -x; }
                    802:   z = mulsispec(x, y+2, lgefint(y)-2);
                    803:   setsigne(z,s); return z;
                    804: }
                    805:
                    806: GEN
                    807: mulsr(long x, GEN y)
                    808: {
                    809:   long lx,i,s,garde,e,sh,m;
                    810:   GEN z;
                    811:   LOCAL_HIREMAINDER;
                    812:
                    813:   if (!x) return gzero;
                    814:   s = signe(y);
                    815:   if (!s)
                    816:   {
                    817:     if (x<0) x = -x;
1.2     ! noro      818:     e = expo(y) + (BITS_IN_LONG-1)-bfffo(x);
        !           819:     return realzero_bit(e);
1.1       noro      820:   }
                    821:   if (x<0) { s = -s; x = -x; }
                    822:   if (x==1) { z=rcopy(y); setsigne(z,s); return z; }
                    823:
                    824:   lx = lg(y); e = expo(y);
                    825:   z=cgetr(lx); y--; garde=mulll(x,y[lx]);
                    826:   for (i=lx-1; i>=3; i--) z[i]=addmul(x,y[i]);
                    827:   z[2]=hiremainder;
                    828:
                    829:   sh = bfffo(hiremainder); m = BITS_IN_LONG-sh;
                    830:   if (sh) shift_left2(z,z, 2,lx-1, garde,sh,m);
1.2     ! noro      831:   z[1] = evalsigne(s) | evalexpo(m+e); return z;
1.1       noro      832: }
                    833:
                    834: GEN
                    835: mulrr(GEN x, GEN y)
                    836: {
                    837:   long sx = signe(x), sy = signe(y);
                    838:   long i,j,ly,lz,lzz,e,flag,p1;
                    839:   ulong garde;
                    840:   GEN z, y1;
                    841:   LOCAL_HIREMAINDER;
                    842:   LOCAL_OVERFLOW;
                    843:
1.2     ! noro      844:   e = expo(x)+expo(y);
        !           845:   if (!sx || !sy) return realzero_bit(e);
1.1       noro      846:   if (sy<0) sx = -sx;
                    847:   lz=lg(x); ly=lg(y);
                    848:   if (lz>ly) { lz=ly; z=x; x=y; y=z; flag=1; } else flag = (lz!=ly);
1.2     ! noro      849:   z=cgetr(lz);
1.1       noro      850:   if (lz==3)
                    851:   {
                    852:     if (flag)
                    853:     {
                    854:       (void)mulll(x[2],y[3]);
                    855:       garde = addmul(x[2],y[2]);
                    856:     }
                    857:     else
                    858:       garde = mulll(x[2],y[2]);
1.2     ! noro      859:     if ((long)hiremainder<0) { z[2]=hiremainder; e++; }
1.1       noro      860:     else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
1.2     ! noro      861:     z[1] = evalsigne(sx)|evalexpo(e);
1.1       noro      862:     return z;
                    863:   }
                    864:
                    865:   if (flag) { (void)mulll(x[2],y[lz]); garde = hiremainder; } else garde=0;
                    866:   lzz=lz-1; p1=x[lzz];
                    867:   if (p1)
                    868:   {
                    869:     (void)mulll(p1,y[3]);
                    870:     garde = addll(addmul(p1,y[2]), garde);
                    871:     z[lzz] = overflow+hiremainder;
                    872:   }
                    873:   else z[lzz]=0;
                    874:   for (j=lz-2, y1=y-j; j>=3; j--)
                    875:   {
                    876:     p1 = x[j]; y1++;
                    877:     if (p1)
                    878:     {
                    879:       (void)mulll(p1,y1[lz+1]);
                    880:       garde = addll(addmul(p1,y1[lz]), garde);
                    881:       for (i=lzz; i>j; i--)
                    882:       {
                    883:         hiremainder += overflow;
                    884:        z[i] = addll(addmul(p1,y1[i]), z[i]);
                    885:       }
                    886:       z[j] = hiremainder+overflow;
                    887:     }
                    888:     else z[j]=0;
                    889:   }
                    890:   p1=x[2]; y1++;
                    891:   garde = addll(mulll(p1,y1[lz]), garde);
                    892:   for (i=lzz; i>2; i--)
                    893:   {
                    894:     hiremainder += overflow;
                    895:     z[i] = addll(addmul(p1,y1[i]), z[i]);
                    896:   }
                    897:   z[2] = hiremainder+overflow;
1.2     ! noro      898:   if (z[2] < 0) e++; else shift_left(z,z,2,lzz,garde, 1);
        !           899:   z[1] = evalsigne(sx) | evalexpo(e);
1.1       noro      900:   return z;
                    901: }
                    902:
                    903: GEN
                    904: mulir(GEN x, GEN y)
                    905: {
                    906:   long sx=signe(x),sy,lz,lzz,ey,e,p1,i,j;
                    907:   ulong garde;
                    908:   GEN z,y1;
                    909:   LOCAL_HIREMAINDER;
                    910:   LOCAL_OVERFLOW;
                    911:
                    912:   if (!sx) return gzero;
                    913:   if (!is_bigint(x)) return mulsr(itos(x),y);
                    914:   sy=signe(y); ey=expo(y);
1.2     ! noro      915:   if (!sy) return realzero_bit(expi(x)+ey);
1.1       noro      916:
                    917:   if (sy<0) sx = -sx;
                    918:   lz=lg(y); z=cgetr(lz);
1.2     ! noro      919:   y1 = itor(x, lz+1); x = y; y = y1;
        !           920:   e = expo(y)+ey;
1.1       noro      921:   if (lz==3)
                    922:   {
                    923:     (void)mulll(x[2],y[3]);
                    924:     garde=addmul(x[2],y[2]);
1.2     ! noro      925:     if ((long)hiremainder < 0) { z[2]=hiremainder; e++; }
1.1       noro      926:     else z[2]=(hiremainder<<1) | (garde>>(BITS_IN_LONG-1));
1.2     ! noro      927:     z[1] = evalsigne(sx) | evalexpo(e);
        !           928:     avma=(gpmem_t)z; return z;
1.1       noro      929:   }
                    930:
                    931:   (void)mulll(x[2],y[lz]); garde=hiremainder;
                    932:   lzz=lz-1; p1=x[lzz];
                    933:   if (p1)
                    934:   {
                    935:     (void)mulll(p1,y[3]);
                    936:     garde=addll(addmul(p1,y[2]),garde);
                    937:     z[lzz] = overflow+hiremainder;
                    938:   }
                    939:   else z[lzz]=0;
                    940:   for (j=lz-2, y1=y-j; j>=3; j--)
                    941:   {
                    942:     p1=x[j]; y1++;
                    943:     if (p1)
                    944:     {
                    945:       (void)mulll(p1,y1[lz+1]);
                    946:       garde = addll(addmul(p1,y1[lz]), garde);
                    947:       for (i=lzz; i>j; i--)
                    948:       {
                    949:         hiremainder += overflow;
                    950:         z[i] = addll(addmul(p1,y1[i]), z[i]);
                    951:       }
                    952:       z[j] = hiremainder+overflow;
                    953:     }
                    954:     else z[j]=0;
                    955:   }
                    956:   p1=x[2]; y1++;
                    957:   garde = addll(mulll(p1,y1[lz]), garde);
                    958:   for (i=lzz; i>2; i--)
                    959:   {
                    960:     hiremainder += overflow;
                    961:     z[i] = addll(addmul(p1,y1[i]), z[i]);
                    962:   }
                    963:   z[2] = hiremainder+overflow;
1.2     ! noro      964:   if (z[2] < 0) e++;
1.1       noro      965:   else
                    966:     shift_left(z,z,2,lzz,garde, 1);
1.2     ! noro      967:   z[1] = evalsigne(sx) | evalexpo(e);
        !           968:   avma=(gpmem_t)z; return z;
1.1       noro      969: }
                    970:
                    971: /* written by Bruno Haible following an idea of Robert Harley */
                    972: long
                    973: vals(ulong z)
                    974: {
                    975:   static char tab[64]={-1,0,1,12,2,6,-1,13,3,-1,7,-1,-1,-1,-1,14,10,4,-1,-1,8,-1,-1,25,-1,-1,-1,-1,-1,21,27,15,31,11,5,-1,-1,-1,-1,-1,9,-1,-1,24,-1,-1,20,26,30,-1,-1,-1,-1,23,-1,19,29,-1,22,18,28,17,16,-1};
                    976: #ifdef LONG_IS_64BIT
                    977:   long s;
                    978: #endif
                    979:
                    980:   if (!z) return -1;
                    981: #ifdef LONG_IS_64BIT
                    982:   if (! (z&0xffffffff)) { s = 32; z >>=32; } else s = 0;
                    983: #endif
                    984:   z |= ~z + 1;
                    985:   z += z << 4;
                    986:   z += z << 6;
                    987:   z ^= z << 16; /* or  z -= z<<16 */
                    988: #ifdef LONG_IS_64BIT
                    989:   return s + tab[(z&0xffffffff)>>26];
                    990: #else
                    991:   return tab[z>>26];
                    992: #endif
                    993: }
                    994:
                    995: GEN
                    996: modss(long x, long y)
                    997: {
                    998:   LOCAL_HIREMAINDER;
                    999:
                   1000:   if (!y) err(moder1);
                   1001:   if (y<0) y=-y;
1.2     ! noro     1002:   hiremainder=0; (void)divll(labs(x),y);
1.1       noro     1003:   if (!hiremainder) return gzero;
                   1004:   return (x < 0) ? stoi(y-hiremainder) : stoi(hiremainder);
                   1005: }
                   1006:
                   1007: GEN
                   1008: resss(long x, long y)
                   1009: {
                   1010:   LOCAL_HIREMAINDER;
                   1011:
                   1012:   if (!y) err(reser1);
1.2     ! noro     1013:   hiremainder=0; (void)divll(labs(x),labs(y));
1.1       noro     1014:   return (x < 0) ? stoi(-((long)hiremainder)) : stoi(hiremainder);
                   1015: }
                   1016:
                   1017: GEN
                   1018: divsi(long x, GEN y)
                   1019: {
                   1020:   long p1, s = signe(y);
                   1021:   LOCAL_HIREMAINDER;
                   1022:
                   1023:   if (!s) err(diver2);
                   1024:   if (!x || lgefint(y)>3 || ((long)y[2])<0)
                   1025:   {
                   1026:     hiremainder=x; SAVE_HIREMAINDER; return gzero;
                   1027:   }
                   1028:   hiremainder=0; p1=divll(labs(x),y[2]);
                   1029:   if (x<0) { hiremainder = -((long)hiremainder); p1 = -p1; }
                   1030:   if (s<0) p1 = -p1;
                   1031:   SAVE_HIREMAINDER; return stoi(p1);
                   1032: }
                   1033: #endif
                   1034:
                   1035: GEN
                   1036: modui(ulong x, GEN y)
                   1037: {
                   1038:   LOCAL_HIREMAINDER;
                   1039:
                   1040:   if (!signe(y)) err(diver2);
                   1041:   if (!x || lgefint(y)>3) hiremainder=x;
                   1042:   else
                   1043:   {
                   1044:     hiremainder=0; (void)divll(x,y[2]);
                   1045:   }
                   1046:   return utoi(hiremainder);
                   1047: }
                   1048:
                   1049: ulong
                   1050: umodiu(GEN y, ulong x)
                   1051: {
                   1052:   long sy=signe(y),ly,i;
                   1053:   LOCAL_HIREMAINDER;
                   1054:
                   1055:   if (!x) err(diver4);
                   1056:   if (!sy) return 0;
                   1057:   ly = lgefint(y);
                   1058:   if (x <= (ulong)y[2]) hiremainder=0;
                   1059:   else
                   1060:   {
                   1061:     if (ly==3) return (sy > 0)? (ulong)y[2]: x - (ulong)y[2];
                   1062:     hiremainder=y[2]; ly--; y++;
                   1063:   }
                   1064:   for (i=2; i<ly; i++) (void)divll(y[i],x);
                   1065:   if (!hiremainder) return 0;
                   1066:   return (sy > 0)? hiremainder: x - hiremainder;
                   1067: }
                   1068:
                   1069: GEN
                   1070: modiu(GEN y, ulong x) { return utoi(umodiu(y,x)); }
                   1071:
1.2     ! noro     1072: /* return |y| \/ x */
        !          1073: GEN
        !          1074: diviu(GEN y, ulong x)
        !          1075: {
        !          1076:   long sy=signe(y),ly,i;
        !          1077:   GEN z;
        !          1078:   LOCAL_HIREMAINDER;
        !          1079:
        !          1080:   if (!x) err(diver4);
        !          1081:   if (!sy) { hiremainder=0; SAVE_HIREMAINDER; return gzero; }
        !          1082:
        !          1083:   ly = lgefint(y);
        !          1084:   if (x <= (ulong)y[2]) hiremainder=0;
        !          1085:   else
        !          1086:   {
        !          1087:     if (ly==3) { hiremainder=itou(y); SAVE_HIREMAINDER; return gzero; }
        !          1088:     hiremainder=y[2]; ly--; y++;
        !          1089:   }
        !          1090:   z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(1);
        !          1091:   for (i=2; i<ly; i++) z[i]=divll(y[i],x);
        !          1092:   SAVE_HIREMAINDER; return z;
        !          1093: }
        !          1094:
1.1       noro     1095: #ifndef __M68K__
                   1096:
                   1097: GEN
                   1098: modsi(long x, GEN y)
                   1099: {
                   1100:   long s = signe(y);
                   1101:   GEN p1;
                   1102:   LOCAL_HIREMAINDER;
                   1103:
                   1104:   if (!s) err(diver2);
                   1105:   if (!x || lgefint(y)>3 || ((long)y[2])<0) hiremainder=x;
                   1106:   else
                   1107:   {
                   1108:     hiremainder=0; (void)divll(labs(x),y[2]);
                   1109:     if (x<0) hiremainder = -((long)hiremainder);
                   1110:   }
                   1111:   if (!hiremainder) return gzero;
                   1112:   if (x>0) return stoi(hiremainder);
                   1113:   if (s<0)
                   1114:     { setsigne(y,1); p1=addsi(hiremainder,y); setsigne(y,-1); }
                   1115:   else
                   1116:     p1=addsi(hiremainder,y);
                   1117:   return p1;
                   1118: }
                   1119:
                   1120: GEN
                   1121: divis(GEN y, long x)
                   1122: {
                   1123:   long sy=signe(y),ly,s,i;
                   1124:   GEN z;
                   1125:   LOCAL_HIREMAINDER;
                   1126:
                   1127:   if (!x) err(diver4);
                   1128:   if (!sy) { hiremainder=0; SAVE_HIREMAINDER; return gzero; }
                   1129:   if (x<0) { s = -sy; x = -x; } else s=sy;
                   1130:
                   1131:   ly = lgefint(y);
                   1132:   if ((ulong)x <= (ulong)y[2]) hiremainder=0;
                   1133:   else
                   1134:   {
                   1135:     if (ly==3) { hiremainder=itos(y); SAVE_HIREMAINDER; return gzero; }
                   1136:     hiremainder=y[2]; ly--; y++;
                   1137:   }
                   1138:   z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
                   1139:   for (i=2; i<ly; i++) z[i]=divll(y[i],x);
                   1140:   if (sy<0) hiremainder = - ((long)hiremainder);
                   1141:   SAVE_HIREMAINDER; return z;
                   1142: }
                   1143:
                   1144: GEN
                   1145: divir(GEN x, GEN y)
                   1146: {
1.2     ! noro     1147:   GEN z;
        !          1148:   long ly;
        !          1149:   gpmem_t av;
1.1       noro     1150:
                   1151:   if (!signe(y)) err(diver5);
                   1152:   if (!signe(x)) return gzero;
1.2     ! noro     1153:   ly = lg(y); z = cgetr(ly); av = avma;
        !          1154:   affrr(divrr(itor(x, ly+1), y), z);
        !          1155:   avma = av; return z;
1.1       noro     1156: }
                   1157:
                   1158: GEN
                   1159: divri(GEN x, GEN y)
                   1160: {
1.2     ! noro     1161:   long lx, s = signe(y);
        !          1162:   gpmem_t av;
        !          1163:   GEN z;
1.1       noro     1164:
                   1165:   if (!s) err(diver8);
1.2     ! noro     1166:   if (!signe(x)) return realzero_bit(expo(x) - expi(y));
1.1       noro     1167:   if (!is_bigint(y)) return divrs(x, s>0? y[2]: -y[2]);
                   1168:
1.2     ! noro     1169:   lx = lg(x); z = cgetr(lx); av = avma;
        !          1170:   affrr(divrr(x, itor(y, lx+1)), z);
        !          1171:   avma = av; return z;
1.1       noro     1172: }
                   1173:
                   1174: void
                   1175: diviiz(GEN x, GEN y, GEN z)
                   1176: {
1.2     ! noro     1177:   gpmem_t av = avma;
        !          1178:   if (typ(z) == t_INT) affii(divii(x,y), z);
        !          1179:   else {
        !          1180:     long lz = lg(z);
        !          1181:     affrr(divrr(itor(x,lz), itor(y,lz)), z);
        !          1182:   }
        !          1183:   avma = av;
1.1       noro     1184: }
                   1185:
                   1186: void
                   1187: mpdivz(GEN x, GEN y, GEN z)
                   1188: {
1.2     ! noro     1189:   gpmem_t av = avma;
        !          1190:   GEN r;
1.1       noro     1191:
                   1192:   if (typ(z)==t_INT)
                   1193:   {
1.2     ! noro     1194:     if (typ(x) == t_REAL || typ(y) == t_REAL) err(divzer1);
        !          1195:     affii(divii(x,y), z);
        !          1196:     avma = av; return;
1.1       noro     1197:   }
1.2     ! noro     1198:
        !          1199:   if (typ(x) == t_INT)
1.1       noro     1200:   {
1.2     ! noro     1201:     if (typ(y) == t_REAL)
        !          1202:       r = divir(x,y);
        !          1203:     else
        !          1204:     {
        !          1205:       long lz = lg(z);
        !          1206:       r = divrr(itor(x,lz), itor(y,lz));
        !          1207:     }
1.1       noro     1208:   }
1.2     ! noro     1209:   else if (typ(y) == t_REAL)
        !          1210:     r = divrr(x,y);
        !          1211:   else
        !          1212:     r = divri(x,y);
        !          1213:   affrr(r, z); avma = av;
1.1       noro     1214: }
                   1215:
                   1216: GEN
                   1217: divsr(long x, GEN y)
                   1218: {
1.2     ! noro     1219:   gpmem_t av;
        !          1220:   long ly;
        !          1221:   GEN z;
1.1       noro     1222:
                   1223:   if (!signe(y)) err(diver3);
                   1224:   if (!x) return gzero;
1.2     ! noro     1225:   ly = lg(y); z = cgetr(ly); av = avma;
        !          1226:   affrr(divrr(stor(x,ly+1), y), z);
        !          1227:   avma = av; return z;
1.1       noro     1228: }
                   1229:
                   1230: GEN
                   1231: modii(GEN x, GEN y)
                   1232: {
                   1233:   switch(signe(x))
                   1234:   {
                   1235:     case 0: return gzero;
                   1236:     case 1: return resii(x,y);
                   1237:     default:
                   1238:     {
1.2     ! noro     1239:       gpmem_t av = avma;
1.1       noro     1240:       (void)new_chunk(lgefint(y));
                   1241:       x = resii(x,y); avma=av;
                   1242:       if (x==gzero) return x;
                   1243:       return subiispec(y+2,x+2,lgefint(y)-2,lgefint(x)-2);
                   1244:     }
                   1245:   }
                   1246: }
                   1247:
                   1248: void
                   1249: modiiz(GEN x, GEN y, GEN z)
                   1250: {
1.2     ! noro     1251:   const gpmem_t av = avma;
1.1       noro     1252:   affii(modii(x,y),z); avma=av;
                   1253: }
                   1254:
                   1255: GEN
                   1256: divrs(GEN x, long y)
                   1257: {
1.2     ! noro     1258:   long i,lx,garde,sh,s=signe(x);
1.1       noro     1259:   GEN z;
                   1260:   LOCAL_HIREMAINDER;
                   1261:
                   1262:   if (!y) err(diver6);
1.2     ! noro     1263:   if (!s) return realzero_bit(expo(x) - (BITS_IN_LONG-1)+bfffo(y));
1.1       noro     1264:   if (y<0) { s = -s; y = -y; }
                   1265:   if (y==1) { z=rcopy(x); setsigne(z,s); return z; }
                   1266:
                   1267:   z=cgetr(lx=lg(x)); hiremainder=0;
                   1268:   for (i=2; i<lx; i++) z[i]=divll(x[i],y);
                   1269:
                   1270:   /* we may have hiremainder != 0 ==> garde */
1.2     ! noro     1271:   garde=divll(0,y); sh=bfffo(z[2]);
1.1       noro     1272:   if (sh) shift_left(z,z, 2,lx-1, garde,sh);
1.2     ! noro     1273:   z[1] = evalsigne(s) | evalexpo(expo(x)-sh);
1.1       noro     1274:   return z;
                   1275: }
                   1276:
                   1277: GEN
                   1278: divrr(GEN x, GEN y)
                   1279: {
                   1280:   long sx=signe(x), sy=signe(y), lx,ly,lz,e,i,j;
                   1281:   ulong si,saux;
                   1282:   GEN z,x1;
                   1283:
                   1284:   if (!sy) err(diver9);
1.2     ! noro     1285:   e = expo(x) - expo(y);
        !          1286:   if (!sx) return realzero_bit(e);
1.1       noro     1287:   if (sy<0) sx = -sx;
                   1288:   lx=lg(x); ly=lg(y);
                   1289:   if (ly==3)
                   1290:   {
                   1291:     ulong k = x[2], l = (lx>3)? x[3]: 0;
                   1292:     LOCAL_HIREMAINDER;
                   1293:     if (k < (ulong)y[2]) e--;
                   1294:     else
                   1295:     {
                   1296:       l >>= 1; if (k&1) l |= HIGHBIT;
                   1297:       k >>= 1;
                   1298:     }
1.2     ! noro     1299:     z = cgetr(3); z[1] = evalsigne(sx) | evalexpo(e);
1.1       noro     1300:     hiremainder=k; z[2]=divll(l,y[2]); return z;
                   1301:   }
                   1302:
1.2     ! noro     1303:   lz = min(lx,ly); z = new_chunk(lz);
        !          1304:   x1 = z-1;
1.1       noro     1305:   x1[1]=0; for (i=2; i<lz; i++) x1[i]=x[i];
                   1306:   x1[lz] = (lx>lz)? x[lz]: 0;
                   1307:   x=z; si=y[2]; saux=y[3];
                   1308:   for (i=0; i<lz-1; i++)
                   1309:   { /* x1 = x + (i-1) */
                   1310:     ulong k,qp;
                   1311:     LOCAL_HIREMAINDER;
                   1312:     LOCAL_OVERFLOW;
                   1313:     if ((ulong)x1[1] == si)
                   1314:     {
                   1315:       qp = MAXULONG; k=addll(si,x1[2]);
                   1316:     }
                   1317:     else
                   1318:     {
                   1319:       if ((ulong)x1[1] > si) /* can't happen if i=0 */
                   1320:       {
                   1321:         GEN y1 = y+1;
                   1322:        overflow=0;
                   1323:        for (j=lz-i; j>0; j--) x1[j] = subllx(x1[j],y1[j]);
                   1324:        j=i; do x[--j]++; while (j && !x[j]);
                   1325:       }
                   1326:       hiremainder=x1[1]; overflow=0;
                   1327:       qp=divll(x1[2],si); k=hiremainder;
                   1328:     }
                   1329:     if (!overflow)
                   1330:     {
                   1331:       long k3 = subll(mulll(qp,saux), x1[3]);
                   1332:       long k4 = subllx(hiremainder,k);
                   1333:       while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
                   1334:     }
                   1335:     j = lz-i+1;
                   1336:     if (j<ly) (void)mulll(qp,y[j]); else { hiremainder=0 ; j=ly; }
                   1337:     for (j--; j>1; j--)
                   1338:     {
                   1339:       x1[j] = subll(x1[j], addmul(qp,y[j]));
                   1340:       hiremainder += overflow;
                   1341:     }
                   1342:     if ((ulong)x1[1] != hiremainder)
                   1343:     {
                   1344:       if ((ulong)x1[1] < hiremainder)
                   1345:       {
                   1346:         qp--;
                   1347:         overflow=0; for (j=lz-i; j>1; j--) x1[j]=addllx(x1[j], y[j]);
                   1348:       }
                   1349:       else
                   1350:       {
                   1351:        x1[1] -= hiremainder;
                   1352:        while (x1[1])
                   1353:        {
                   1354:          qp++; if (!qp) { j=i; do x[--j]++; while (j && !x[j]); }
                   1355:           overflow=0; for (j=lz-i; j>1; j--) x1[j]=subllx(x1[j],y[j]);
                   1356:          x1[1] -= overflow;
                   1357:        }
                   1358:       }
                   1359:     }
                   1360:     x1[1]=qp; x1++;
                   1361:   }
                   1362:   x1 = x-1; for (j=lz-1; j>=2; j--) x[j]=x1[j];
                   1363:   if (*x) { shift_right(x,x, 2,lz, 1,1); } else e--;
1.2     ! noro     1364:   x[0] = evaltyp(t_REAL)|evallg(lz);
        !          1365:   x[1] = evalsigne(sx) | evalexpo(e);
        !          1366:   return x;
1.1       noro     1367: }
                   1368: #endif /* !defined(__M68K__) */
1.2     ! noro     1369:
1.1       noro     1370: /* The following ones are not in mp.s (mulii is, with a different algorithm) */
                   1371:
1.2     ! noro     1372: /* Integer division x / y: such that sign(r) = sign(x)
1.1       noro     1373:  *   if z = ONLY_REM return remainder, otherwise return quotient
                   1374:  *   if z != NULL set *z to remainder
                   1375:  *   *z is the last object on stack (and thus can be disposed of with cgiv
                   1376:  *   instead of gerepile)
                   1377:  * If *z is zero, we put gzero here and no copy.
                   1378:  * space needed: lx + ly
                   1379:  */
                   1380: GEN
                   1381: dvmdii(GEN x, GEN y, GEN *z)
                   1382: {
                   1383:   long sx=signe(x),sy=signe(y);
1.2     ! noro     1384:   long lx, ly, lz, i, j, sh, k, lq, lr;
        !          1385:   gpmem_t av;
1.1       noro     1386:   ulong si,qp,saux, *xd,*rd,*qd;
                   1387:   GEN r,q,x1;
                   1388:
                   1389:   if (!sy) err(dvmer1);
                   1390:   if (!sx)
                   1391:   {
                   1392:     if (!z || z == ONLY_REM) return gzero;
                   1393:     *z=gzero; return gzero;
                   1394:   }
                   1395:   lx=lgefint(x);
                   1396:   ly=lgefint(y); lz=lx-ly;
                   1397:   if (lz <= 0)
                   1398:   {
                   1399:     if (lz == 0)
                   1400:     {
                   1401:       for (i=2; i<lx; i++)
                   1402:         if (x[i] != y[i])
                   1403:         {
                   1404:           if ((ulong)x[i] > (ulong)y[i]) goto DIVIDE;
                   1405:           goto TRIVIAL;
                   1406:         }
                   1407:       if (z == ONLY_REM) return gzero;
                   1408:       if (z) *z = gzero;
                   1409:       if (sx < 0) sy = -sy;
                   1410:       return stoi(sy);
                   1411:     }
                   1412: TRIVIAL:
                   1413:     if (z == ONLY_REM) return icopy(x);
                   1414:     if (z) *z = icopy(x);
                   1415:     return gzero;
                   1416:   }
                   1417: DIVIDE: /* quotient is non-zero */
                   1418:   av=avma; if (sx<0) sy = -sy;
                   1419:   if (ly==3)
                   1420:   {
                   1421:     LOCAL_HIREMAINDER;
                   1422:     si = y[2];
                   1423:     if (si <= (ulong)x[2]) hiremainder=0;
                   1424:     else
                   1425:     {
                   1426:       hiremainder = x[2]; lx--; x++;
                   1427:     }
                   1428:     q = new_chunk(lx); for (i=2; i<lx; i++) q[i]=divll(x[i],si);
                   1429:     if (z == ONLY_REM)
                   1430:     {
                   1431:       avma=av; if (!hiremainder) return gzero;
                   1432:       r=cgeti(3);
                   1433:       r[1] = evalsigne(sx) | evallgefint(3);
                   1434:       r[2]=hiremainder; return r;
                   1435:     }
                   1436:     q[1] = evalsigne(sy) | evallgefint(lx);
                   1437:     q[0] = evaltyp(t_INT) | evallg(lx);
                   1438:     if (!z) return q;
                   1439:     if (!hiremainder) { *z=gzero; return q; }
                   1440:     r=cgeti(3);
                   1441:     r[1] = evalsigne(sx) | evallgefint(3);
                   1442:     r[2] = hiremainder; *z=r; return q;
                   1443:   }
                   1444:
                   1445:   x1 = new_chunk(lx); sh = bfffo(y[2]);
                   1446:   if (sh)
                   1447:   { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
                   1448:     register const ulong m = BITS_IN_LONG - sh;
                   1449:     r = new_chunk(ly);
                   1450:     shift_left2(r, y,2,ly-1, 0,sh,m); y = r;
                   1451:     shift_left2(x1,x,2,lx-1, 0,sh,m);
                   1452:     x1[1] = ((ulong)x[2]) >> m;
                   1453:   }
                   1454:   else
                   1455:   {
                   1456:     x1[1]=0; for (j=2; j<lx; j++) x1[j]=x[j];
                   1457:   }
                   1458:   x=x1; si=y[2]; saux=y[3];
                   1459:   for (i=0; i<=lz; i++)
                   1460:   { /* x1 = x + i */
                   1461:     LOCAL_HIREMAINDER;
                   1462:     LOCAL_OVERFLOW;
                   1463:     if ((ulong)x1[1] == si)
                   1464:     {
                   1465:       qp = MAXULONG; k=addll(si,x1[2]);
                   1466:     }
                   1467:     else
                   1468:     {
                   1469:       hiremainder=x1[1]; overflow=0;
                   1470:       qp=divll(x1[2],si); k=hiremainder;
                   1471:     }
                   1472:     if (!overflow)
                   1473:     {
                   1474:       long k3 = subll(mulll(qp,saux), x1[3]);
                   1475:       long k4 = subllx(hiremainder,k);
                   1476:       while (!overflow && k4) { qp--; k3=subll(k3,saux); k4=subllx(k4,si); }
                   1477:     }
                   1478:     hiremainder=0;
                   1479:     for (j=ly-1; j>1; j--)
                   1480:     {
                   1481:       x1[j] = subll(x1[j], addmul(qp,y[j]));
                   1482:       hiremainder+=overflow;
                   1483:     }
                   1484:     if ((ulong)x1[1] < hiremainder)
                   1485:     {
                   1486:       overflow=0; qp--;
                   1487:       for (j=ly-1; j>1; j--) x1[j] = addllx(x1[j],y[j]);
                   1488:     }
                   1489:     x1[1]=qp; x1++;
                   1490:   }
                   1491:
                   1492:   lq = lz+2;
                   1493:   if (!z)
                   1494:   {
                   1495:     qd = (ulong*)av;
                   1496:     xd = (ulong*)(x + lq);
                   1497:     if (x[1]) { lz++; lq++; }
                   1498:     while (lz--) *--qd = *--xd;
                   1499:     *--qd = evalsigne(sy) | evallgefint(lq);
                   1500:     *--qd = evaltyp(t_INT) | evallg(lq);
1.2     ! noro     1501:     avma = (gpmem_t)qd; return (GEN)qd;
1.1       noro     1502:   }
                   1503:
                   1504:   j=lq; while (j<lx && !x[j]) j++;
                   1505:   lz = lx-j;
                   1506:   if (z == ONLY_REM)
                   1507:   {
                   1508:     if (lz==0) { avma = av; return gzero; }
                   1509:     rd = (ulong*)av; lr = lz+2;
                   1510:     xd = (ulong*)(x + lx);
                   1511:     if (!sh) while (lz--) *--rd = *--xd;
                   1512:     else
                   1513:     { /* shift remainder right by sh bits */
                   1514:       const ulong shl = BITS_IN_LONG - sh;
                   1515:       ulong l;
                   1516:       xd--;
                   1517:       while (--lz) /* fill r[3..] */
                   1518:       {
                   1519:         l = *xd >> sh;
                   1520:         *--rd = l | (*--xd << shl);
                   1521:       }
                   1522:       l = *xd >> sh;
                   1523:       if (l) *--rd = l; else lr--;
                   1524:     }
                   1525:     *--rd = evalsigne(sx) | evallgefint(lr);
                   1526:     *--rd = evaltyp(t_INT) | evallg(lr);
1.2     ! noro     1527:     avma = (gpmem_t)rd; return (GEN)rd;
1.1       noro     1528:   }
                   1529:
                   1530:   lr = lz+2;
                   1531:   rd = NULL; /* gcc -Wall */
                   1532:   if (lz)
                   1533:   { /* non zero remainder: initialize rd */
                   1534:     xd = (ulong*)(x + lx);
                   1535:     if (!sh)
                   1536:     {
                   1537:       rd = (ulong*)avma; (void)new_chunk(lr);
                   1538:       while (lz--) *--rd = *--xd;
                   1539:     }
                   1540:     else
                   1541:     { /* shift remainder right by sh bits */
                   1542:       const ulong shl = BITS_IN_LONG - sh;
                   1543:       ulong l;
                   1544:       rd = (ulong*)x; /* overwrite shifted y */
                   1545:       xd--;
                   1546:       while (--lz)
                   1547:       {
                   1548:         l = *xd >> sh;
                   1549:         *--rd = l | (*--xd << shl);
                   1550:       }
                   1551:       l = *xd >> sh;
                   1552:       if (l) *--rd = l; else lr--;
                   1553:     }
                   1554:     *--rd = evalsigne(sx) | evallgefint(lr);
                   1555:     *--rd = evaltyp(t_INT) | evallg(lr);
                   1556:     rd += lr;
                   1557:   }
                   1558:   qd = (ulong*)av;
                   1559:   xd = (ulong*)(x + lq);
                   1560:   if (x[1]) lq++;
                   1561:   j = lq-2; while (j--) *--qd = *--xd;
                   1562:   *--qd = evalsigne(sy) | evallgefint(lq);
                   1563:   *--qd = evaltyp(t_INT) | evallg(lq);
                   1564:   q = (GEN)qd;
                   1565:   if (lr==2) *z = gzero;
                   1566:   else
                   1567:   { /* rd has been properly initialized: we had lz > 0 */
                   1568:     while (lr--) *--qd = *--rd;
                   1569:     *z = (GEN)qd;
                   1570:   }
1.2     ! noro     1571:   avma = (gpmem_t)qd; return q;
1.1       noro     1572: }
                   1573:
                   1574: /* assume y > x > 0. return y mod x */
                   1575: ulong
                   1576: mppgcd_resiu(GEN y, ulong x)
                   1577: {
                   1578:   long i, ly = lgefint(y);
                   1579:   LOCAL_HIREMAINDER;
                   1580:
                   1581:   hiremainder = 0;
                   1582:   for (i=2; i<ly; i++) (void)divll(y[i],x);
                   1583:   return hiremainder;
                   1584: }
                   1585:
                   1586: /* Assume x>y>0, both of them odd. return x-y if x=y mod 4, x+y otherwise */
                   1587: void
                   1588: mppgcd_plus_minus(GEN x, GEN y, GEN res)
                   1589: {
1.2     ! noro     1590:   gpmem_t av = avma;
1.1       noro     1591:   long lx = lgefint(x)-1;
                   1592:   long ly = lgefint(y)-1, lt,m,i;
                   1593:   GEN t;
                   1594:
                   1595:   if ((x[lx]^y[ly]) & 3) /* x != y mod 4*/
                   1596:     t = addiispec(x+2,y+2,lx-1,ly-1);
                   1597:   else
                   1598:     t = subiispec(x+2,y+2,lx-1,ly-1);
                   1599:
                   1600:   lt = lgefint(t)-1; while (!t[lt]) lt--;
                   1601:   m = vals(t[lt]); lt++;
                   1602:   if (m == 0) /* 2^32 | t */
                   1603:   {
                   1604:     for (i = 2; i < lt; i++) res[i] = t[i];
                   1605:   }
                   1606:   else if (t[2] >> m)
                   1607:   {
                   1608:     shift_right(res,t, 2,lt, 0,m);
                   1609:   }
                   1610:   else
                   1611:   {
                   1612:     lt--; t++;
                   1613:     shift_right(res,t, 2,lt, t[1],m);
                   1614:   }
                   1615:   res[1] = evalsigne(1)|evallgefint(lt);
                   1616:   avma = av;
                   1617: }
                   1618:
                   1619: /* private version of mulss */
                   1620: ulong
                   1621: smulss(ulong x, ulong y, ulong *rem)
                   1622: {
                   1623:   LOCAL_HIREMAINDER;
                   1624:   x=mulll(x,y); *rem=hiremainder; return x;
                   1625: }
                   1626:
                   1627: #ifdef LONG_IS_64BIT
                   1628: #  define DIVCONVI 7
                   1629: #else
                   1630: #  define DIVCONVI 14
                   1631: #endif
                   1632:
1.2     ! noro     1633: /* convert integer --> base 10^9 */
1.1       noro     1634: GEN
                   1635: convi(GEN x)
                   1636: {
1.2     ! noro     1637:   gpmem_t av = avma, lim;
1.1       noro     1638:   long lz;
                   1639:   GEN z,p1;
                   1640:
                   1641:   lz = 3 + ((lgefint(x)-2)*15)/DIVCONVI;
1.2     ! noro     1642:   z = new_chunk(lz); z[1] = -1; p1 = z+2;
1.1       noro     1643:   lim = stack_lim(av,1);
                   1644:   for(;;)
                   1645:   {
1.2     ! noro     1646:     x = diviu(x,1000000000); *p1++ = hiremainder;
        !          1647:     if (!signe(x)) { avma = av; return p1; }
        !          1648:     if (low_stack(lim, stack_lim(av,1))) x = gerepileuptoint((gpmem_t)z,x);
1.1       noro     1649:   }
                   1650: }
                   1651: #undef DIVCONVI
                   1652:
                   1653: /* convert fractional part --> base 10^9 [assume expo(x) < 0] */
                   1654: GEN
                   1655: confrac(GEN x)
                   1656: {
                   1657:   long lx=lg(x), ex = -expo(x)-1, d,m,ly,ey,lr,i,j;
                   1658:   GEN y,y1;
                   1659:
                   1660:   ey = ex + bit_accuracy(lx);
                   1661:   ly = 1 + ((ey-1) >> TWOPOTBITS_IN_LONG);
                   1662:   d = ex >> TWOPOTBITS_IN_LONG;
                   1663:   m = ex & (BITS_IN_LONG-1);
                   1664:   y = new_chunk(ly); y1 = y + (d-2);
                   1665:   while (d--) y[d]=0;
                   1666:   if (!m)
                   1667:     for (i=2; i<lx; i++) y1[i] = x[i];
                   1668:   else
                   1669:   { /* shift x left sh bits */
                   1670:     const ulong sh=BITS_IN_LONG-m;
                   1671:     ulong k=0, l;
                   1672:     for (i=2; i<lx; i++)
                   1673:     {
                   1674:       l = x[i];
                   1675:       y1[i] = (l>>m)|k;
                   1676:       k = l<<sh;
                   1677:     }
                   1678:     y1[i] = k;
                   1679:   }
                   1680:   lr = 1 + ((long) (ey*L2SL10))/9;
                   1681:   y1 = new_chunk(lr);
                   1682:   for (j=0; j<lr; j++)
                   1683:   {
                   1684:     LOCAL_HIREMAINDER;
                   1685:     hiremainder=0;
                   1686:     for (i=ly-1; i>=0; i--) y[i]=addmul(y[i],1000000000);
                   1687:     y1[j]=hiremainder;
                   1688:   }
                   1689:   return y1;
                   1690: }
                   1691:
                   1692: GEN
                   1693: truedvmdii(GEN x, GEN y, GEN *z)
                   1694: {
1.2     ! noro     1695:   gpmem_t av = avma;
1.1       noro     1696:   GEN r, q = dvmdii(x,y,&r); /* assume that r is last on stack */
                   1697:   GEN *gptr[2];
                   1698:
                   1699:   if (signe(r)>=0)
                   1700:   {
                   1701:     if (z == ONLY_REM) return gerepileuptoint(av,r);
                   1702:     if (z) *z = r; else cgiv(r);
                   1703:     return q;
                   1704:   }
                   1705:
                   1706:   if (z == ONLY_REM)
1.2     ! noro     1707:   { /* r += sign(y) * y, that is |y| */
1.1       noro     1708:     r = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2);
                   1709:     return gerepileuptoint(av, r);
                   1710:   }
                   1711:   q = addsi(-signe(y),q);
                   1712:   if (!z) return gerepileuptoint(av, q);
                   1713:
                   1714:   *z = subiispec(y+2,r+2, lgefint(y)-2,lgefint(r)-2);
1.2     ! noro     1715:   gptr[0]=&q; gptr[1]=z; gerepilemanysp(av,(gpmem_t)r,gptr,2);
1.1       noro     1716:   return q;
                   1717: }
                   1718:
1.2     ! noro     1719: /* Montgomery reduction.
        !          1720:  * N has k words, assume T >= 0 has less than 2k.
        !          1721:  * Return res := T / B^k mod N, where B = 2^BIL
        !          1722:  * such that 0 <= res < T/B^k + N  and  res has less than k words */
        !          1723: GEN
        !          1724: red_montgomery(GEN T, GEN N, ulong inv)
        !          1725: {
        !          1726:   gpmem_t av;
        !          1727:   GEN Te,Td,Ne,Nd, scratch;
        !          1728:   ulong m,t,d,k = lgefint(N)-2;
        !          1729:   int carry;
        !          1730:   long i,j;
        !          1731:   LOCAL_HIREMAINDER;
        !          1732:   LOCAL_OVERFLOW;
        !          1733:
        !          1734:   if (k == 0) return gzero;
        !          1735:   d = lgefint(T)-2; /* <= 2*k */
        !          1736: #ifdef DEBUG
        !          1737:   if (d > 2*k) err(bugparier,"red_montgomery");
        !          1738: #endif
        !          1739:   if (k == 1)
        !          1740:   { /* as below, special cased for efficiency */
        !          1741:     ulong n = (ulong)N[2];
        !          1742:     t = (ulong)T[d+1];
        !          1743:     m = t * inv;
        !          1744:     (void)addll(mulll(m, n), t); /* = 0 */
        !          1745:     t = hiremainder + overflow;
        !          1746:     if (d == 2)
        !          1747:     {
        !          1748:       t = addll((ulong)T[2], t);
        !          1749:       if (overflow) t -= n; /* t > n doesn't fit in 1 word */
        !          1750:     }
        !          1751:     return utoi(t);
        !          1752:   }
        !          1753:   /* assume k >= 2 */
        !          1754:   av = avma; scratch = new_chunk(k<<1); /* >= k + 2: result fits */
        !          1755:
        !          1756:   /* copy T to scratch space (pad with zeroes to 2k words) */
        !          1757:   Td = (GEN)av;
        !          1758:   Te = T + (d+2);
        !          1759:   for (i=0; i < d     ; i++) *--Td = *--Te;
        !          1760:   for (   ; i < (k<<1); i++) *--Td = 0;
        !          1761:
        !          1762:   Te = (GEN)av; /* 1 beyond end of T mantissa */
        !          1763:   Ne = N + k+2; /* 1 beyond end of N mantissa */
        !          1764:
        !          1765:   carry = 0;
        !          1766:   for (i=0; i<k; i++) /* set T := T/B nod N, k times */
        !          1767:   {
        !          1768:     Td = Te; /* one beyond end of (new) T mantissa */
        !          1769:     Nd = Ne;
        !          1770:     m = *--Td * inv; /* solve T + m N = O(B) */
        !          1771:
        !          1772:     /* set T := (T + mN) / B */
        !          1773:     Te = Td;
        !          1774:     (void)addll(mulll(m, *--Nd), *Td); /* = 0 */
        !          1775:     for (j=1; j<k; j++)
        !          1776:     {
        !          1777:       hiremainder += overflow;
        !          1778:       t = addll(addmul(m, *--Nd), *--Td); *Td = t;
        !          1779:     }
        !          1780:     overflow += hiremainder;
        !          1781:     t = addll(overflow, *--Td); *Td = t + carry;
        !          1782:     carry = (overflow || (carry && *Td == 0));
        !          1783:   }
        !          1784:   if (carry)
        !          1785:   { /* Td > N overflows (k+1 words), set Td := Td - N */
        !          1786:     Td = Te;
        !          1787:     Nd = Ne;
        !          1788:     t = subll(*--Td, *--Nd); *Td = t;
        !          1789:     while (Td > scratch) { t = subllx(*--Td, *--Nd); *Td = t; }
        !          1790:   }
        !          1791:
        !          1792:   /* copy result */
        !          1793:   Td = (GEN)av;
        !          1794:   while (! *scratch) scratch++; /* strip leading zeroes */
        !          1795:   while (Te > scratch) *--Td = *--Te;
        !          1796:   k = ((GEN)av - Td) + 2;
        !          1797:   *--Td = evalsigne(1) | evallgefint(k);
        !          1798:   *--Td = evaltyp(t_INT) | evallg(k);
        !          1799: #ifdef DEBUG
        !          1800: {
        !          1801:   long l = lgefint(N)-2, s = BITS_IN_LONG*l;
        !          1802:   GEN R = shifti(gun, s);
        !          1803:   GEN res = resii(mulii(T, mpinvmod(R, N)), N);
        !          1804:   if (k > lgefint(N)
        !          1805:     || !egalii(resii(Td,N),res)
        !          1806:     || cmpii(Td, addii(shifti(T, -s), N)) >= 0) err(bugparier,"red_montgomery");
        !          1807: }
        !          1808: #endif
        !          1809:   avma = (gpmem_t)Td; return Td;
        !          1810: }
        !          1811:
1.1       noro     1812: /* EXACT INTEGER DIVISION */
                   1813:
                   1814: /* Find c such that 1=c*b mod 2^BITS_IN_LONG, assuming b odd (unchecked) */
1.2     ! noro     1815: ulong
1.1       noro     1816: invrev(ulong b)
                   1817: {
                   1818:   ulong x;
                   1819:   switch(b & 7)
                   1820:   {
                   1821:     case 3:
                   1822:     case 5:  x = b+8; break;
                   1823:     default: x = b; break;
                   1824:   } /* x = b^(-1) mod 2^4 */
                   1825:   x = x*(2-b*x);
                   1826:   x = x*(2-b*x);
                   1827:   x = x*(2-b*x); /* b^(-1) mod 2^32 (Newton applied to 1/x - b = 0) */
                   1828: #ifdef LONG_IS_64BIT
                   1829:   x = x*(2-b*x); /* b^(-1) mod 2^64 */
                   1830: #endif
                   1831:   return x;
                   1832: }
                   1833:
                   1834: /* assume xy>0, y odd */
1.2     ! noro     1835: GEN
1.1       noro     1836: diviuexact(GEN x, ulong y)
                   1837: {
                   1838:   long i,lz,lx;
                   1839:   ulong q, yinv;
                   1840:   GEN z, z0, x0, x0min;
                   1841:
                   1842:   if (y == 1) return icopy(x);
                   1843:   lx = lgefint(x);
1.2     ! noro     1844:   if (lx == 3) return utoi((ulong)x[2] / y);
1.1       noro     1845:   yinv = invrev(y);
                   1846:   lz = (y <= (ulong)x[2]) ? lx : lx-1;
                   1847:   z = new_chunk(lz);
                   1848:   z0 = z + lz;
                   1849:   x0 = x + lx; x0min = x + lx-lz+2;
                   1850:
                   1851:   while (x0 > x0min)
                   1852:   {
                   1853:     *--z0 = q = yinv*((ulong)*--x0); /* i-th quotient */
                   1854:     if (!q) continue;
                   1855:     /* x := x - q * y */
                   1856:     { /* update neither lowest word (could set it to 0) nor highest ones */
                   1857:       register GEN x1 = x0 - 1;
                   1858:       LOCAL_HIREMAINDER;
                   1859:       (void)mulll(q,y);
                   1860:       if (hiremainder)
                   1861:       {
                   1862:         if ((ulong)*x1 < hiremainder)
                   1863:         {
                   1864:           *x1 -= hiremainder;
                   1865:           do (*--x1)--; while ((ulong)*x1 == MAXULONG);
                   1866:         }
                   1867:         else
                   1868:           *x1 -= hiremainder;
                   1869:       }
                   1870:     }
                   1871:   }
                   1872:   i=2; while(!z[i]) i++;
                   1873:   z += i-2; lz -= i-2;
                   1874:   z[0] = evaltyp(t_INT)|evallg(lz);
                   1875:   z[1] = evalsigne(1)|evallg(lz);
1.2     ! noro     1876:   avma = (gpmem_t)z; return z;
1.1       noro     1877: }
                   1878:
                   1879: /* Find z such that x=y*z, knowing that y | x (unchecked)
                   1880:  * Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.
                   1881:  *    Set x := (x - z0 y) / B, updating only relevant words, and repeat */
                   1882: GEN
                   1883: diviiexact(GEN x, GEN y)
                   1884: {
1.2     ! noro     1885:   long lx, ly, lz, vy, i, ii, sx = signe(x), sy = signe(y);
        !          1886:   gpmem_t av;
1.1       noro     1887:   ulong y0inv,q;
                   1888:   GEN z;
                   1889:
                   1890:   if (!sy) err(dvmer1);
                   1891:   if (!sx) return gzero;
                   1892:   vy = vali(y); av = avma;
                   1893:   (void)new_chunk(lgefint(x)); /* enough room for z */
                   1894:   if (vy)
                   1895:   { /* make y odd */
                   1896: #if 0
                   1897:     if (vali(x) < vy) err(talker,"impossible division in diviiexact");
                   1898: #endif
                   1899:     y = shifti(y,-vy);
                   1900:     x = shifti(x,-vy);
                   1901:   }
                   1902:   else x = icopy(x); /* necessary because we destroy x */
                   1903:   avma = av; /* will erase our x,y when exiting */
                   1904:   /* now y is odd */
                   1905:   ly = lgefint(y);
1.2     ! noro     1906:   if (ly == 3)
1.1       noro     1907:   {
                   1908:     x = diviuexact(x,(ulong)y[2]);
                   1909:     if (signe(x)) setsigne(x,sx*sy); /* should have x != 0 at this point */
                   1910:     return x;
                   1911:   }
                   1912:   lx = lgefint(x); if (ly>lx) err(talker,"impossible division in diviiexact");
                   1913:   y0inv = invrev(y[ly-1]);
                   1914:   i=2; while (i<ly && y[i]==x[i]) i++;
                   1915:   lz = (i==ly || (ulong)y[i] < (ulong)x[i]) ? lx-ly+3 : lx-ly+2;
                   1916:   z = new_chunk(lz);
                   1917:
                   1918:   y += ly - 1; /* now y[-i] = i-th word of y */
                   1919:   for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
                   1920:   {
                   1921:     long limj;
                   1922:     LOCAL_HIREMAINDER;
                   1923:     LOCAL_OVERFLOW;
                   1924:
                   1925:     z[i] = q = y0inv*((ulong)x[ii]); /* i-th quotient */
                   1926:     if (!q) continue;
                   1927:
                   1928:     /* x := x - q * y */
                   1929:     (void)mulll(q,y[0]); limj = max(lx - lz, ii+3-ly);
                   1930:     { /* update neither lowest word (could set it to 0) nor highest ones */
1.2     ! noro     1931:       register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;
1.1       noro     1932:       for (; x0 >= xlim; x0--, y0--)
                   1933:       {
                   1934:         *x0 = subll(*x0, addmul(q,*y0));
                   1935:         hiremainder += overflow;
                   1936:       }
                   1937:       if (hiremainder && limj != lx - lz)
                   1938:       {
                   1939:         if ((ulong)*x0 < hiremainder)
                   1940:         {
                   1941:           *x0 -= hiremainder;
                   1942:           do (*--x0)--; while ((ulong)*x0 == MAXULONG);
                   1943:         }
                   1944:         else
                   1945:           *x0 -= hiremainder;
                   1946:       }
                   1947:     }
                   1948:   }
                   1949: #if 0
                   1950:   i=2; while(i<lz && !z[i]) i++;
                   1951:   if (i==lz) err(bugparier,"diviiexact");
                   1952: #else
                   1953:   i=2; while(!z[i]) i++;
                   1954: #endif
                   1955:   z += i-2; lz -= (i-2);
                   1956:   z[0] = evaltyp(t_INT)|evallg(lz);
                   1957:   z[1] = evalsigne(sx*sy)|evallg(lz);
1.2     ! noro     1958:   avma = (gpmem_t)z; return z;
1.1       noro     1959: }
                   1960:
                   1961: long
                   1962: smodsi(long x, GEN y)
                   1963: {
                   1964:   if (x<0) err(talker,"negative small integer in smodsi");
                   1965:   (void)divsi(x,y); return hiremainder;
                   1966: }
                   1967:
                   1968: /* x and y are integers. Return 1 if |x| == |y|, 0 otherwise */
                   1969: int
                   1970: absi_equal(GEN x, GEN y)
                   1971: {
                   1972:   long lx,i;
                   1973:
                   1974:   if (!signe(x)) return !signe(y);
                   1975:   if (!signe(y)) return 0;
                   1976:
                   1977:   lx=lgefint(x); if (lx != lgefint(y)) return 0;
                   1978:   i=2; while (i<lx && x[i]==y[i]) i++;
                   1979:   return (i==lx);
                   1980: }
                   1981:
                   1982: /* x and y are integers. Return sign(|x| - |y|) */
                   1983: int
                   1984: absi_cmp(GEN x, GEN y)
                   1985: {
                   1986:   long lx,ly,i;
                   1987:
                   1988:   if (!signe(x)) return signe(y)? -1: 0;
                   1989:   if (!signe(y)) return 1;
                   1990:
                   1991:   lx=lgefint(x); ly=lgefint(y);
                   1992:   if (lx>ly) return 1;
                   1993:   if (lx<ly) return -1;
                   1994:   i=2; while (i<lx && x[i]==y[i]) i++;
                   1995:   if (i==lx) return 0;
                   1996:   return ((ulong)x[i] > (ulong)y[i])? 1: -1;
                   1997: }
                   1998:
                   1999: /* x and y are reals. Return sign(|x| - |y|) */
                   2000: int
                   2001: absr_cmp(GEN x, GEN y)
                   2002: {
                   2003:   long ex,ey,lx,ly,lz,i;
                   2004:
                   2005:   if (!signe(x)) return signe(y)? -1: 0;
                   2006:   if (!signe(y)) return 1;
                   2007:
                   2008:   ex=expo(x); ey=expo(y);
                   2009:   if (ex>ey) return  1;
                   2010:   if (ex<ey) return -1;
                   2011:
                   2012:   lx=lg(x); ly=lg(y); lz = (lx<ly)?lx:ly;
                   2013:   i=2; while (i<lz && x[i]==y[i]) i++;
                   2014:   if (i<lz) return ((ulong)x[i] > (ulong)y[i])? 1: -1;
                   2015:   if (lx>=ly)
                   2016:   {
                   2017:     while (i<lx && !x[i]) i++;
                   2018:     return (i==lx)? 0: 1;
                   2019:   }
                   2020:   while (i<ly && !y[i]) i++;
                   2021:   return (i==ly)? 0: -1;
                   2022: }
                   2023:
                   2024: /********************************************************************/
                   2025: /**                                                                **/
                   2026: /**               INTEGER MULTIPLICATION (KARATSUBA)               **/
                   2027: /**                                                                **/
                   2028: /********************************************************************/
                   2029: #define _sqri_l 47
                   2030: #define _muli_l 25 /* optimal on PII 350MHz + gcc 2.7.2.1 (gp-dyn) */
                   2031:
1.2     ! noro     2032: #if 1 /* for tunings */
1.1       noro     2033: long KARATSUBA_SQRI_LIMIT = _sqri_l;
                   2034: long KARATSUBA_MULI_LIMIT = _muli_l;
                   2035:
                   2036: void setsqri(long a) { KARATSUBA_SQRI_LIMIT = a; }
                   2037: void setmuli(long a) { KARATSUBA_MULI_LIMIT = a; }
                   2038:
                   2039: GEN
                   2040: speci(GEN x, long nx)
                   2041: {
                   2042:   GEN z;
                   2043:   long i;
                   2044:   if (!nx) return gzero;
                   2045:   z = cgeti(nx+2); z[1] = evalsigne(1)|evallgefint(nx+2);
                   2046:   for (i=0; i<nx; i++) z[i+2] = x[i];
                   2047:   return z;
                   2048: }
                   2049: #else
                   2050: #  define KARATSUBA_SQRI_LIMIT _sqri_l
                   2051: #  define KARATSUBA_MULI_LIMIT _muli_l
                   2052: #endif
                   2053:
                   2054: /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
                   2055: #ifdef INLINE
                   2056: INLINE
                   2057: #endif
                   2058: GEN
                   2059: muliispec(GEN x, GEN y, long nx, long ny)
                   2060: {
                   2061:   GEN z2e,z2d,yd,xd,ye,zd;
                   2062:   long p1,lz;
                   2063:   LOCAL_HIREMAINDER;
                   2064:
                   2065:   if (!ny) return gzero;
                   2066:   zd = (GEN)avma; lz = nx+ny+2;
                   2067:   (void)new_chunk(lz);
                   2068:   xd = x + nx;
                   2069:   yd = y + ny;
                   2070:   ye = yd; p1 = *--xd;
                   2071:
                   2072:   *--zd = mulll(p1, *--yd); z2e = zd;
                   2073:   while (yd > y) *--zd = addmul(p1, *--yd);
                   2074:   *--zd = hiremainder;
                   2075:
                   2076:   while (xd > x)
                   2077:   {
                   2078:     LOCAL_OVERFLOW;
                   2079:     yd = ye; p1 = *--xd;
                   2080:
                   2081:     z2d = --z2e;
                   2082:     *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
                   2083:     while (yd > y)
                   2084:     {
                   2085:       hiremainder += overflow;
                   2086:       *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
                   2087:     }
                   2088:     *--zd = hiremainder + overflow;
                   2089:   }
                   2090:   if (*zd == 0) { zd++; lz--; } /* normalize */
                   2091:   *--zd = evalsigne(1) | evallgefint(lz);
                   2092:   *--zd = evaltyp(t_INT) | evallg(lz);
1.2     ! noro     2093:   avma=(gpmem_t)zd; return zd;
1.1       noro     2094: }
                   2095:
                   2096: #ifdef INLINE
                   2097: INLINE
                   2098: #endif
                   2099: GEN
                   2100: sqrispec(GEN x, long nx)
                   2101: {
                   2102:   GEN z2e,z2d,yd,xd,zd,x0,z0;
                   2103:   long p1,lz;
                   2104:   LOCAL_HIREMAINDER;
                   2105:
                   2106:   if (!nx) return gzero;
                   2107:   zd = (GEN)avma; lz = (nx+1) << 1;
                   2108:   z0 = new_chunk(lz);
                   2109:   if (nx == 1)
                   2110:   {
                   2111:     *--zd = mulll(*x, *x);
                   2112:     *--zd = hiremainder; goto END;
                   2113:   }
                   2114:   xd = x + nx;
1.2     ! noro     2115:
1.1       noro     2116:   /* compute double products --> zd */
                   2117:   p1 = *--xd; yd = xd; --zd;
                   2118:   *--zd = mulll(p1, *--yd); z2e = zd;
                   2119:   while (yd > x) *--zd = addmul(p1, *--yd);
                   2120:   *--zd = hiremainder;
                   2121:
                   2122:   x0 = x+1;
                   2123:   while (xd > x0)
                   2124:   {
                   2125:     LOCAL_OVERFLOW;
                   2126:     p1 = *--xd; yd = xd;
                   2127:
                   2128:     z2e -= 2; z2d = z2e;
                   2129:     *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
                   2130:     while (yd > x)
                   2131:     {
                   2132:       hiremainder += overflow;
                   2133:       *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
                   2134:     }
                   2135:     *--zd = hiremainder + overflow;
                   2136:   }
                   2137:   /* multiply zd by 2 (put result in zd - 1) */
                   2138:   zd[-1] = ((*zd & HIGHBIT) != 0);
                   2139:   shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);
                   2140:
                   2141:   /* add the squares */
                   2142:   xd = x + nx; zd = z0 + lz;
                   2143:   p1 = *--xd;
                   2144:   zd--; *zd = mulll(p1,p1);
                   2145:   zd--; *zd = addll(hiremainder, *zd);
                   2146:   while (xd > x)
                   2147:   {
                   2148:     p1 = *--xd;
                   2149:     zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);
                   2150:     zd--; *zd = addll(hiremainder + overflow, *zd);
                   2151:   }
                   2152:
                   2153: END:
                   2154:   if (*zd == 0) { zd++; lz--; } /* normalize */
                   2155:   *--zd = evalsigne(1) | evallgefint(lz);
                   2156:   *--zd = evaltyp(t_INT) | evallg(lz);
1.2     ! noro     2157:   avma=(gpmem_t)zd; return zd;
1.1       noro     2158: }
                   2159:
                   2160: /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
                   2161: static GEN
                   2162: addshiftw(GEN x, GEN y, long d)
                   2163: {
                   2164:   GEN z,z0,y0,yd, zd = (GEN)avma;
                   2165:   long a,lz,ly = lgefint(y);
                   2166:
                   2167:   z0 = new_chunk(d);
                   2168:   a = ly-2; yd = y+ly;
                   2169:   if (a >= d)
                   2170:   {
                   2171:     y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
                   2172:     a -= d;
                   2173:     if (a)
                   2174:       z = addiispec(x+2, y+2, lgefint(x)-2, a);
                   2175:     else
                   2176:       z = icopy(x);
                   2177:   }
                   2178:   else
                   2179:   {
                   2180:     y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
                   2181:     while (zd >= z0) *--zd = 0;    /* complete with 0s */
                   2182:     z = icopy(x);
                   2183:   }
                   2184:   lz = lgefint(z)+d;
                   2185:   z[1] = evalsigne(1) | evallgefint(lz);
                   2186:   z[0] = evaltyp(t_INT) | evallg(lz); return z;
                   2187: }
                   2188:
                   2189: /* Fast product (Karatsuba) of integers. a and b are "special" GENs
                   2190:  * c,c0,c1,c2 are genuine GENs.
                   2191:  */
                   2192: static GEN
                   2193: quickmulii(GEN a, GEN b, long na, long nb)
                   2194: {
                   2195:   GEN a0,c,c0;
1.2     ! noro     2196:   long n0, n0a, i;
        !          2197:   gpmem_t av;
1.1       noro     2198:
                   2199:   if (na < nb) swapspec(a,b, na,nb);
                   2200:   if (nb == 1) return mulsispec(*b, a, na);
                   2201:   if (nb == 0) return gzero;
                   2202:   if (nb < KARATSUBA_MULI_LIMIT) return muliispec(a,b,na,nb);
                   2203:   i=(na>>1); n0=na-i; na=i;
                   2204:   av=avma; a0=a+na; n0a=n0;
                   2205:   while (!*a0 && n0a) { a0++; n0a--; }
                   2206:
                   2207:   if (n0a && nb > n0)
                   2208:   { /* nb <= na <= n0 */
                   2209:     GEN b0,c1,c2;
                   2210:     long n0b;
                   2211:
                   2212:     nb -= n0;
                   2213:     c = quickmulii(a,b,na,nb);
                   2214:     b0 = b+nb; n0b = n0;
                   2215:     while (!*b0 && n0b) { b0++; n0b--; }
                   2216:     if (n0b)
                   2217:     {
                   2218:       c0 = quickmulii(a0,b0, n0a,n0b);
                   2219:
                   2220:       c2 = addiispec(a0,a, n0a,na);
                   2221:       c1 = addiispec(b0,b, n0b,nb);
                   2222:       c1 = quickmulii(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
                   2223:       c2 = addiispec(c0+2, c+2, lgefint(c0)-2,lgefint(c) -2);
                   2224:
                   2225:       c1 = subiispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
                   2226:     }
                   2227:     else
                   2228:     {
                   2229:       c0 = gzero;
                   2230:       c1 = quickmulii(a0,b, n0a,nb);
                   2231:     }
                   2232:     c = addshiftw(c,c1, n0);
                   2233:   }
                   2234:   else
                   2235:   {
                   2236:     c = quickmulii(a,b,na,nb);
                   2237:     c0 = quickmulii(a0,b,n0a,nb);
                   2238:   }
                   2239:   return gerepileuptoint(av, addshiftw(c,c0, n0));
                   2240: }
                   2241:
                   2242: /* actual operations will take place on a+2 and b+2: we strip the codewords */
                   2243: GEN
                   2244: mulii(GEN a,GEN b)
                   2245: {
                   2246:   long sa,sb;
                   2247:   GEN z;
                   2248:
                   2249:   sa=signe(a); if (!sa) return gzero;
                   2250:   sb=signe(b); if (!sb) return gzero;
                   2251:   if (sb<0) sa = -sa;
                   2252:   z = quickmulii(a+2,b+2, lgefint(a)-2,lgefint(b)-2);
                   2253:   setsigne(z,sa); return z;
                   2254: }
                   2255:
                   2256: GEN
                   2257: resiimul(GEN x, GEN sy)
                   2258: {
                   2259:   GEN r, q, y = (GEN)sy[1], invy;
1.2     ! noro     2260:   long k;
        !          2261:   gpmem_t av = avma;
1.1       noro     2262:
                   2263:   k = cmpii(x, y);
                   2264:   if (k <= 0) return k? icopy(x): gzero;
                   2265:   invy = (GEN)sy[2];
                   2266:   q = mulir(x,invy);
                   2267:   q = mptrunc(q); /* <= divii(x, y) (at most 1 less) */
                   2268:   r = subii(x, mulii(y,q));
                   2269:   /* resii(x,y) + y >= r >= resii(x,y) */
                   2270:   k = cmpii(r, y);
                   2271:   if (k >= 0)
                   2272:   {
                   2273:     if (k == 0) { avma = av; return gzero; }
                   2274:     r = subiispec(r+2, y+2, lgefint(r)-2, lgefint(y)-2);
                   2275:   }
                   2276: #if 0
                   2277:   q = subii(r,resii(x,y));
                   2278:   if (signe(q))
                   2279:     err(talker,"bug in resiimul: x = %Z\ny = %Z\ndif = %Z", x,y,q);
                   2280: #endif
                   2281:   return gerepileuptoint(av, r); /* = resii(x, y) */
                   2282: }
                   2283:
                   2284: /* x % (2^n), assuming x, n >= 0 */
                   2285: GEN
                   2286: resmod2n(GEN x, long n)
                   2287: {
                   2288:   long hi,l,k,lx,ly;
                   2289:   GEN z, xd, zd;
1.2     ! noro     2290:
1.1       noro     2291:   if (!signe(x) || !n) return gzero;
                   2292:
1.2     ! noro     2293:   l = n & (BITS_IN_LONG-1);    /* n % BITS_IN_LONG */
1.1       noro     2294:   k = n >> TWOPOTBITS_IN_LONG; /* n / BITS_IN_LONG */
                   2295:   lx = lgefint(x);
                   2296:   if (lx < k+3) return icopy(x);
                   2297:
                   2298:   xd = x + (lx-k-1);
                   2299:   /* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words
                   2300:    *            ^--- initial xd  */
                   2301:   hi = *xd & ((1<<l) - 1); /* last l bits of # = top bits of result */
                   2302:   if (!hi)
                   2303:   { /* strip leading zeroes from result */
                   2304:     xd++; while (k && !*xd) { k--; xd++; }
                   2305:     if (!k) return gzero;
                   2306:     ly = k+2; xd--;
                   2307:   }
                   2308:   else
                   2309:     ly = k+3;
                   2310:
                   2311:   zd = z = cgeti(ly);
                   2312:   *++zd = evalsigne(1) | evallgefint(ly);
                   2313:   if (hi) *++zd = hi;
                   2314:   for ( ;k; k--) *++zd = *++xd;
                   2315:   return z;
                   2316: }
                   2317:
                   2318: static GEN
                   2319: quicksqri(GEN a, long na)
                   2320: {
                   2321:   GEN a0,c;
1.2     ! noro     2322:   long n0, n0a, i;
        !          2323:   gpmem_t av;
1.1       noro     2324:
                   2325:   if (na < KARATSUBA_SQRI_LIMIT) return sqrispec(a,na);
                   2326:   i=(na>>1); n0=na-i; na=i;
                   2327:   av=avma; a0=a+na; n0a=n0;
                   2328:   while (!*a0 && n0a) { a0++; n0a--; }
                   2329:   c = quicksqri(a,na);
                   2330:   if (n0a)
                   2331:   {
                   2332:     GEN t, c1, c0 = quicksqri(a0,n0a);
                   2333: #if 0
                   2334:     c1 = shifti(quickmulii(a0,a, n0a,na),1);
                   2335: #else /* slower !!! */
                   2336:     t = addiispec(a0,a,n0a,na);
                   2337:     t = quicksqri(t+2,lgefint(t)-2);
                   2338:     c1= addiispec(c0+2,c+2, lgefint(c0)-2, lgefint(c)-2);
                   2339:     c1= subiispec(t+2, c1+2, lgefint(t)-2, lgefint(c1)-2);
                   2340: #endif
                   2341:     c = addshiftw(c,c1, n0);
                   2342:     c = addshiftw(c,c0, n0);
                   2343:   }
                   2344:   else
                   2345:     c = addshiftw(c,gzero,n0<<1);
                   2346:   return gerepileuptoint(av, c);
                   2347: }
                   2348:
                   2349: GEN
                   2350: sqri(GEN a) { return quicksqri(a+2, lgefint(a)-2); }
                   2351:
                   2352: #define MULRR_LIMIT  32
                   2353: #define MULRR2_LIMIT 32
                   2354:
                   2355: #if 0
                   2356: GEN
                   2357: karamulrr1(GEN x, GEN y)
                   2358: {
                   2359:   long sx,sy;
                   2360:   long i,i1,i2,lx=lg(x),ly=lg(y),e,flag,garde;
                   2361:   long lz2,lz3,lz4;
                   2362:   GEN z,lo1,lo2,hi;
                   2363:
                   2364:   /* ensure that lg(y) >= lg(x) */
                   2365:   if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
                   2366:   if (lx < MULRR_LIMIT) return mulrr(x,y);
1.2     ! noro     2367:   sx=signe(x); sy=signe(y); e = expo(x)+expo(y);
        !          2368:   if (!sx || !sy) return realzero_bit(e);
1.1       noro     2369:   if (sy<0) sx = -sx;
                   2370:   ly=lx+flag; z=cgetr(lx);
                   2371:   lz2 = (lx>>1); lz3 = lx-lz2;
                   2372:   x += 2; lx -= 2;
                   2373:   y += 2; ly -= 2;
                   2374:   hi = quickmulii(x,y,lz2,lz2);
                   2375:   i1=lz2; while (i1<lx && !x[i1]) i1++;
                   2376:   lo1 = quickmulii(y,x+i1,lz2,lx-i1);
                   2377:   i2=lz2; while (i2<ly && !y[i2]) i2++;
                   2378:   lo2 = quickmulii(x,y+i2,lz2,ly-i2);
                   2379:
                   2380:   if (signe(lo1))
                   2381:   {
                   2382:     if (flag) { lo2 = addshiftw(lo1,lo2,1); lz3++; } else lo2=addii(lo1,lo2);
                   2383:   }
                   2384:   lz4=lgefint(lo2)-lz3;
                   2385:   if (lz4>0) hi = addiispec(hi+2,lo2+2, lgefint(hi)-2,lz4);
                   2386:   if (hi[2] < 0)
                   2387:   {
                   2388:     e++; garde=hi[lx+2];
                   2389:     for (i=lx+1; i>=2 ; i--) z[i]=hi[i];
                   2390:   }
                   2391:   else
                   2392:   {
                   2393:     garde = (hi[lx+2] << 1);
                   2394:     shift_left(z,hi,2,lx+1, garde, 1);
                   2395:   }
1.2     ! noro     2396:   z[1]=evalsigne(sx) | evalexpo(e);
1.1       noro     2397:   if (garde < 0)
                   2398:   { /* round to nearest */
                   2399:     i=lx+2; do z[--i]++; while (z[i]==0);
                   2400:     if (i==1) z[2]=HIGHBIT;
                   2401:   }
1.2     ! noro     2402:   avma=(gpmem_t)z; return z;
1.1       noro     2403: }
                   2404:
                   2405: GEN
                   2406: karamulrr2(GEN x, GEN y)
                   2407: {
                   2408:   long sx,sy,i,lx=lg(x),ly=lg(y),e,flag,garde;
                   2409:   GEN z,hi;
                   2410:
                   2411:   if (lx>ly) { lx=ly; z=x; x=y; y=z; flag=1; } else flag = (lx!=ly);
                   2412:   if (lx < MULRR2_LIMIT) return mulrr(x,y);
                   2413:   ly=lx+flag; sx=signe(x); sy=signe(y);
1.2     ! noro     2414:   e = expo(x)+expo(y);
        !          2415:   if (!sx || !sy) return realzero_bit(e);
1.1       noro     2416:   if (sy<0) sx = -sx;
                   2417:   z=cgetr(lx);
                   2418:   hi=quickmulii(y+2,x+2,ly-2,lx-2);
                   2419:   if (hi[2] < 0)
                   2420:   {
                   2421:     e++; garde=hi[lx];
                   2422:     for (i=2; i<lx ; i++) z[i]=hi[i];
                   2423:   }
                   2424:   else
                   2425:   {
                   2426:     garde = (hi[lx] << 1);
                   2427:     shift_left(z,hi,2,lx-1, garde, 1);
                   2428:   }
1.2     ! noro     2429:   z[1]=evalsigne(sx) | evalexpo(e);
1.1       noro     2430:   if (garde < 0)
                   2431:   { /* round to nearest */
                   2432:     i=lx; do z[--i]++; while (z[i]==0);
                   2433:     if (i==1) z[2]=HIGHBIT;
                   2434:   }
1.2     ! noro     2435:   avma=(gpmem_t)z; return z;
1.1       noro     2436: }
                   2437:
                   2438: GEN
                   2439: karamulrr(GEN x, GEN y, long flag)
                   2440: {
                   2441:   switch(flag)
                   2442:   {
                   2443:     case 1: return karamulrr1(x,y);
                   2444:     case 2: return karamulrr2(x,y);
                   2445:     default: err(flagerr,"karamulrr");
                   2446:   }
                   2447:   return NULL; /* not reached */
                   2448: }
                   2449:
                   2450: GEN
                   2451: karamulir(GEN x, GEN y, long flag)
                   2452: {
                   2453:   long sx=signe(x),lz,i;
1.2     ! noro     2454:   GEN z, z1;
1.1       noro     2455:
                   2456:   if (!sx) return gzero;
1.2     ! noro     2457:   lz = lg(y); z = cgetr(lz);
        !          2458:   z1 = karamulrr(itor(x, lz+1), y, flag);
1.1       noro     2459:   for (i=1; i<lz; i++) z[i]=z1[i];
1.2     ! noro     2460:   avma=(gpmem_t)z; return z;
1.1       noro     2461: }
                   2462: #endif
                   2463:
                   2464: #ifdef LONG_IS_64BIT
1.2     ! noro     2465: long
        !          2466: expodb(double x)
        !          2467: {
        !          2468:   union { double f; ulong i; } fi;
        !          2469:   const int mant_len = 52;  /* mantissa bits (excl. hidden bit) */
        !          2470:   const int exp_mid = 0x3ff;/* exponent bias */
1.1       noro     2471:
1.2     ! noro     2472:   if (x==0.) return -1023;
        !          2473:   fi.f = x;
        !          2474:   return ((fi.i & (HIGHBIT-1)) >> mant_len) - exp_mid;
        !          2475: }
1.1       noro     2476:
                   2477: GEN
                   2478: dbltor(double x)
                   2479: {
1.2     ! noro     2480:   GEN z;
1.1       noro     2481:   long e;
                   2482:   union { double f; ulong i; } fi;
                   2483:   const int mant_len = 52;  /* mantissa bits (excl. hidden bit) */
                   2484:   const int exp_mid = 0x3ff;/* exponent bias */
                   2485:   const int expo_len = 11; /* number of bits of exponent */
                   2486:   LOCAL_HIREMAINDER;
                   2487:
1.2     ! noro     2488:   if (x==0.) return realzero_bit(-1023);
        !          2489:   fi.f = x; z = cgetr(DEFAULTPREC);
1.1       noro     2490:   e = ((fi.i & (HIGHBIT-1)) >> mant_len) - exp_mid;
                   2491:   z[1] = evalexpo(e) | evalsigne(x<0? -1: 1);
                   2492:   z[2] = (fi.i << expo_len) | HIGHBIT;
                   2493:   return z;
                   2494: }
                   2495:
                   2496: double
                   2497: rtodbl(GEN x)
                   2498: {
                   2499:   long ex,s=signe(x);
                   2500:   ulong a;
                   2501:   union { double f; ulong i; } fi;
                   2502:   const int mant_len = 52;  /* mantissa bits (excl. hidden bit) */
                   2503:   const int exp_mid = 0x3ff;/* exponent bias */
                   2504:   const int expo_len = 11; /* number of bits of exponent */
                   2505:   LOCAL_HIREMAINDER;
                   2506:
                   2507:   if (typ(x)==t_INT && !s) return 0.0;
                   2508:   if (typ(x)!=t_REAL) err(typeer,"rtodbl");
                   2509:   if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
                   2510:
                   2511:   /* start by rounding to closest */
                   2512:   a = (x[2] & (HIGHBIT-1)) + 0x400;
                   2513:   if (a & HIGHBIT) { ex++; a=0; }
                   2514:   if (ex >= exp_mid) err(rtodber);
                   2515:   fi.i = ((ex + exp_mid) << mant_len) | (a >> expo_len);
                   2516:   if (s<0) fi.i |= HIGHBIT;
                   2517:   return fi.f;
                   2518: }
                   2519:
1.2     ! noro     2520: #else /* LONG_IS_64BIT */
1.1       noro     2521:
1.2     ! noro     2522: #if   PARI_DOUBLE_FORMAT == 1
1.1       noro     2523: #  define INDEX0 1
                   2524: #  define INDEX1 0
1.2     ! noro     2525: #elif PARI_DOUBLE_FORMAT == 0
1.1       noro     2526: #  define INDEX0 0
                   2527: #  define INDEX1 1
                   2528: #endif
                   2529:
1.2     ! noro     2530: long
        !          2531: expodb(double x)
        !          2532: {
        !          2533:   union { double f; ulong i[2]; } fi;
        !          2534:   const int mant_len = 52;  /* mantissa bits (excl. hidden bit) */
        !          2535:   const int exp_mid = 0x3ff;/* exponent bias */
        !          2536:   const int shift = mant_len-32;
        !          2537:
        !          2538:   if (x==0.) return -1023;
        !          2539:   fi.f = x;
        !          2540:   {
        !          2541:     const ulong a = fi.i[INDEX0];
        !          2542:     return ((a & (HIGHBIT-1)) >> shift) - exp_mid;
        !          2543:   }
        !          2544: }
        !          2545:
1.1       noro     2546: GEN
                   2547: dbltor(double x)
                   2548: {
                   2549:   GEN z;
                   2550:   long e;
                   2551:   union { double f; ulong i[2]; } fi;
                   2552:   const int mant_len = 52;  /* mantissa bits (excl. hidden bit) */
                   2553:   const int exp_mid = 0x3ff;/* exponent bias */
1.2     ! noro     2554:   const int expo_len = 11; /* number of bits of exponent */
1.1       noro     2555:   const int shift = mant_len-32;
                   2556:
1.2     ! noro     2557:   if (x==0.) return realzero_bit(-1023);
        !          2558:   fi.f = x; z=cgetr(DEFAULTPREC);
1.1       noro     2559:   {
                   2560:     const ulong a = fi.i[INDEX0];
                   2561:     const ulong b = fi.i[INDEX1];
                   2562:     e = ((a & (HIGHBIT-1)) >> shift) - exp_mid;
                   2563:     z[1] = evalexpo(e) | evalsigne(x<0? -1: 1);
                   2564:     z[3] = b << expo_len;
                   2565:     z[2] = HIGHBIT | b >> (BITS_IN_LONG-expo_len) | (a << expo_len);
                   2566:   }
                   2567:   return z;
                   2568: }
                   2569:
                   2570: double
                   2571: rtodbl(GEN x)
                   2572: {
                   2573:   long ex,s=signe(x),lx=lg(x);
                   2574:   ulong a,b,k;
                   2575:   union { double f; ulong i[2]; } fi;
                   2576:   const int mant_len = 52;  /* mantissa bits (excl. hidden bit) */
                   2577:   const int exp_mid = 0x3ff;/* exponent bias */
1.2     ! noro     2578:   const int expo_len = 11; /* number of bits of exponent */
1.1       noro     2579:   const int shift = mant_len-32;
                   2580:
                   2581:   if (typ(x)==t_INT && !s) return 0.0;
                   2582:   if (typ(x)!=t_REAL) err(typeer,"rtodbl");
                   2583:   if (!s || (ex=expo(x)) < - exp_mid) return 0.0;
                   2584:
                   2585:   /* start by rounding to closest */
                   2586:   a = x[2] & (HIGHBIT-1);
                   2587:   if (lx > 3)
                   2588:   {
                   2589:     b = x[3] + 0x400UL; if (b < 0x400UL) a++;
                   2590:     if (a & HIGHBIT) { ex++; a=0; }
                   2591:   }
                   2592:   else b = 0;
                   2593:   if (ex > exp_mid) err(rtodber);
                   2594:   ex += exp_mid;
                   2595:   k = (a >> expo_len) | (ex << shift);
                   2596:   if (s<0) k |= HIGHBIT;
                   2597:   fi.i[INDEX0] = k;
                   2598:   fi.i[INDEX1] = (a << (BITS_IN_LONG-expo_len)) | (b >> expo_len);
                   2599:   return fi.f;
                   2600: }
1.2     ! noro     2601: #endif /* LONG_IS_64BIT */
1.1       noro     2602:
                   2603: /* Old cgiv without reference count (which was not used anyway)
                   2604:  * Should be a macro.
                   2605:  */
                   2606: void
                   2607: cgiv(GEN x)
                   2608: {
                   2609:   if (x == (GEN) avma)
1.2     ! noro     2610:     avma = (gpmem_t) (x+lg(x));
1.1       noro     2611: }
                   2612:
                   2613: /********************************************************************/
                   2614: /**                                                                **/
                   2615: /**               INTEGER EXTENDED GCD  (AND INVMOD)               **/
                   2616: /**                                                                **/
                   2617: /********************************************************************/
                   2618:
                   2619: /* GN 1998Oct25, originally developed in January 1998 under 2.0.4.alpha,
                   2620:  * in the context of trying to improve elliptic curve cryptosystem attacking
                   2621:  * algorithms.  2001Jan02 -- added bezout() functionality.
                   2622:  *
                   2623:  * Two basic ideas - (1) avoid many integer divisions, especially when the
                   2624:  * quotient is 1 (which happens more than 40% of the time).  (2) Use Lehmer's
                   2625:  * trick as modified by Jebelean of extracting a couple of words' worth of
                   2626:  * leading bits from both operands, and compute partial quotients from them
                   2627:  * as long as we can be sure of their values.  The Jebelean modifications
                   2628:  * consist in reliable inequalities from which we can decide fast whether
                   2629:  * to carry on or to return to the outer loop, and in re-shifting after the
                   2630:  * first word's worth of bits has been used up.  All of this is described
                   2631:  * in R. Lercier's these [pp148-153 & 163f.], except his outer loop isn't
                   2632:  * quite right  (the catch-up divisions needed when one partial quotient is
                   2633:  * larger than a word are missing).
                   2634:  *
                   2635:  * The API consists of invmod() and bezout() below;  the single-word routines
                   2636:  * xgcduu and xxgcduu may be called directly if desired;  lgcdii() probably
                   2637:  * doesn't make much sense out of context.
                   2638:  *
                   2639:  * The whole lot is a factor 6 .. 8 faster on word-sized operands, and asym-
                   2640:  * ptotically about a factor 2.5 .. 3, depending on processor architecture,
                   2641:  * than the naive continued-division code.  Unfortunately, thanks to the
                   2642:  * unrolled loops and all, the code is a bit lengthy.
                   2643:  */
                   2644:
                   2645: /*==================================
                   2646:  * xgcduu(d,d1,f,v,v1,s)
                   2647:  * xxgcduu(d,d1,f,u,u1,v,v1,s)
                   2648:  * rgcduu(d,d1,vmax,u,u1,v,v1,s)
                   2649:  *==================================*/
                   2650: /*
                   2651:  * Fast `final' extended gcd algorithm, acting on two ulongs.  Ideally this
                   2652:  * should be replaced with assembler versions wherever possible.  The present
                   2653:  * code essentially does `subtract, compare, and possibly divide' at each step,
                   2654:  * which is reasonable when hardware division (a) exists, (b) is a bit slowish
                   2655:  * and (c) does not depend a lot on the operand values (as on i486).  When
                   2656:  * wordsize division is in fact an assembler routine based on subtraction,
                   2657:  * this strategy may not be the most efficient one.
                   2658:  *
                   2659:  * xxgcduu() should be called with  d > d1 > 0, returns gcd(d,d1), and assigns
                   2660:  * the usual signless cont.frac. recurrence matrix to [u, u1; v, v1]  (i.e.,
                   2661:  * the product of all the [0, 1; 1 q_j] where the leftmost factor arises from
                   2662:  * the quotient of the first division step),  and the information about the
                   2663:  * implied signs to s  (-1 when an odd number of divisions has been done,
                   2664:  * 1 otherwise).  xgcduu() is exactly the same except that u,u1 are not com-
                   2665:  * puted (and not returned, of course).
                   2666:  *
                   2667:  * The input flag f should be set to 1 if we know in advance that gcd(d,d1)==1
                   2668:  * (so we can stop the chain division one step early:  as soon as the remainder
                   2669:  * equals 1).  Use this when you intend to use only what would be v, or only
                   2670:  * what would be u and v, after that final division step, but not u1 and v1.
                   2671:  * With the flag in force and thus without that final step, the interesting
                   2672:  * quantity/ies will still sit in [u1 and] v1, of course.
                   2673:  *
                   2674:  * For computing the inverse of a single-word INTMOD known to exist, pass f=1
                   2675:  * to xgcduu(), and obtain the result from s and v1.  (The routine does the
                   2676:  * right thing when d1==1 already.)  For finishing a multiword modinv known
                   2677:  * to exist, pass f=1 to xxgcduu(), and multiply the returned matrix  (with
                   2678:  * properly adjusted signs)  onto the values v' and v1' previously obtained
                   2679:  * from the multiword division steps.  Actually, just take the scalar product
                   2680:  * of [v',v1'] with [u1,-v1], and change the sign if s==-1.  (If the final
                   2681:  * step had been carried out, it would be [-u,v], and s would also change.)
                   2682:  * For reducing a rational number to lowest terms, pass f=0 to xgcduu().
                   2683:  * Finally, f=0 with xxgcduu() is useful for Bezout computations.
                   2684:  * [Harrumph.  In the above prescription, the sign turns out to be precisely
                   2685:  * wrong.]
                   2686:  * (It is safe for invmod() to call xgcduu() with f=1, because f&1 doesn't
                   2687:  * make a difference when gcd(d,d1)>1.  The speedup is negligible.)
                   2688:  *
                   2689:  * In principle, when gcd(d,d1) is known to be 1, it is straightforward to
                   2690:  * recover the final u,u1 given only v,v1 and s.  However, it probably isn't
                   2691:  * worthwhile, as it trades a few multiplications for a division.
                   2692:  *
                   2693:  * Note that these routines do not know and do not need to know about the
                   2694:  * PARI stack.
1.2     ! noro     2695:  *
1.1       noro     2696:  * Added 2001Jan15:
                   2697:  * rgcduu() is a variant of xxgcduu() which does not have f  (the effect is
                   2698:  * that of f=0),  but instead has a ulong vmax parameter, for use in rational
                   2699:  * reconstruction below.  It returns when v1 exceeds vmax;  v will never
                   2700:  * exceed vmax.  (vmax=0 is taken as a synonym of MAXULONG i.e. unlimited,
                   2701:  * in which case rgcduu behaves exactly like xxgcduu with f=0.)  The return
                   2702:  * value of rgcduu() is typically meaningless;  the interesting part is the
                   2703:  * matrix.
                   2704:  */
                   2705:
                   2706: ulong
                   2707: xgcduu(ulong d, ulong d1, int f, ulong* v, ulong* v1, long *s)
                   2708: {
                   2709:   ulong xv,xv1, xs, q,res;
                   2710:   LOCAL_HIREMAINDER;
                   2711:
                   2712:   /* The above blurb contained a lie.  The main loop always stops when d1
                   2713:    * has become equal to 1.  If (d1 == 1 && !(f&1)) after the loop, we do
                   2714:    * the final `division' of d by 1 `by hand' as it were.
                   2715:    *
                   2716:    * The loop has already been unrolled once.  Aggressive optimization could
                   2717:    * well lead to a totally unrolled assembler version...
                   2718:    *
                   2719:    * On modern x86 architectures, this loop is a pig anyway.  The division
                   2720:    * instruction always puts its result into the same pair of registers, and
                   2721:    * we always want to use one of them straight away, so pipeline performance
                   2722:    * will suck big time.  An assembler version should probably do a first loop
                   2723:    * computing and storing all the quotients -- their number is bounded in
                   2724:    * advance -- and then assembling the matrix in a second pass.  On other
                   2725:    * architectures where we can cycle through four or so groups of registers
                   2726:    * and exploit a fast ALU result-to-operand feedback path, this is much less
                   2727:    * of an issue.  (Intel sucks.  See http://www.x86.org/ ...)
                   2728:    */
                   2729:   xs = res = 0;
                   2730:   xv = 0UL; xv1 = 1UL;
                   2731:   while (d1 > 1UL)
                   2732:   {
                   2733:     d -= d1;                   /* no need to use subll */
                   2734:     if (d >= d1)
                   2735:     {
                   2736:       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
                   2737:       xv += q * xv1;
                   2738:     }
                   2739:     else
                   2740:       xv += xv1;
                   2741:                                /* possible loop exit */
                   2742:     if (d <= 1UL) { xs=1; break; }
                   2743:                                /* repeat with inverted roles */
                   2744:     d1 -= d;
                   2745:     if (d1 >= d)
                   2746:     {
                   2747:       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
                   2748:       xv1 += q * xv;
                   2749:     }
                   2750:     else
                   2751:       xv1 += xv;
                   2752:   } /* while */
                   2753:
                   2754:   if (!(f&1))                  /* division by 1 postprocessing if needed */
                   2755:   {
                   2756:     if (xs && d==1)
                   2757:     { xv1 += d1 * xv; xs = 0; res = 1UL; }
                   2758:     else if (!xs && d1==1)
                   2759:     { xv += d * xv1; xs = 1; res = 1UL; }
                   2760:   }
                   2761:
                   2762:   if (xs)
                   2763:   {
                   2764:     *s = -1; *v = xv1; *v1 = xv;
                   2765:     return (res ? res : (d==1 ? 1UL : d1));
                   2766:   }
                   2767:   else
                   2768:   {
                   2769:     *s = 1; *v = xv; *v1 = xv1;
                   2770:     return (res ? res : (d1==1 ? 1UL : d));
                   2771:   }
                   2772: }
                   2773:
                   2774:
                   2775: ulong
                   2776: xxgcduu(ulong d, ulong d1, int f,
                   2777:        ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
                   2778: {
                   2779:   ulong xu,xu1, xv,xv1, xs, q,res;
                   2780:   LOCAL_HIREMAINDER;
                   2781:
                   2782:   xs = res = 0;
                   2783:   xu = xv1 = 1UL;
                   2784:   xu1 = xv = 0UL;
                   2785:   while (d1 > 1UL)
                   2786:   {
                   2787:     d -= d1;                   /* no need to use subll */
                   2788:     if (d >= d1)
                   2789:     {
                   2790:       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
                   2791:       xv += q * xv1;
                   2792:       xu += q * xu1;
                   2793:     }
                   2794:     else
                   2795:     { xv += xv1; xu += xu1; }
                   2796:                                /* possible loop exit */
                   2797:     if (d <= 1UL) { xs=1; break; }
                   2798:                                /* repeat with inverted roles */
                   2799:     d1 -= d;
                   2800:     if (d1 >= d)
                   2801:     {
                   2802:       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
                   2803:       xv1 += q * xv;
                   2804:       xu1 += q * xu;
                   2805:     }
                   2806:     else
                   2807:     { xv1 += xv; xu1 += xu; }
                   2808:   } /* while */
                   2809:
                   2810:   if (!(f&1))                  /* division by 1 postprocessing if needed */
                   2811:   {
                   2812:     if (xs && d==1)
                   2813:     {
                   2814:       xv1 += d1 * xv;
                   2815:       xu1 += d1 * xu;
                   2816:       xs = 0; res = 1UL;
                   2817:     }
                   2818:     else if (!xs && d1==1)
                   2819:     {
                   2820:       xv += d * xv1;
                   2821:       xu += d * xu1;
                   2822:       xs = 1; res = 1UL;
                   2823:     }
                   2824:   }
                   2825:
                   2826:   if (xs)
                   2827:   {
                   2828:     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
                   2829:     return (res ? res : (d==1 ? 1UL : d1));
                   2830:   }
                   2831:   else
                   2832:   {
                   2833:     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
                   2834:     return (res ? res : (d1==1 ? 1UL : d));
                   2835:   }
                   2836: }
                   2837:
                   2838: ulong
                   2839: rgcduu(ulong d, ulong d1, ulong vmax,
                   2840:        ulong* u, ulong* u1, ulong* v, ulong* v1, long *s)
                   2841: {
                   2842:   ulong xu,xu1, xv,xv1, xs, q, res=0;
                   2843:   int f = 0;
                   2844:   LOCAL_HIREMAINDER;
                   2845:
                   2846:   if (vmax == 0) vmax = MAXULONG;
                   2847:   xs = res = 0;
                   2848:   xu = xv1 = 1UL;
                   2849:   xu1 = xv = 0UL;
                   2850:   while (d1 > 1UL)
                   2851:   {
                   2852:     d -= d1;                   /* no need to use subll */
                   2853:     if (d >= d1)
                   2854:     {
                   2855:       hiremainder = 0; q = 1 + divll(d,d1); d = hiremainder;
                   2856:       xv += q * xv1;
                   2857:       xu += q * xu1;
                   2858:     }
                   2859:     else
                   2860:     { xv += xv1; xu += xu1; }
                   2861:                                /* possible loop exit */
                   2862:     if (xv > vmax) { f=xs=1; break; }
                   2863:     if (d <= 1UL) { xs=1; break; }
                   2864:                                /* repeat with inverted roles */
                   2865:     d1 -= d;
                   2866:     if (d1 >= d)
                   2867:     {
                   2868:       hiremainder = 0; q = 1 + divll(d1,d); d1 = hiremainder;
                   2869:       xv1 += q * xv;
                   2870:       xu1 += q * xu;
                   2871:     }
                   2872:     else
                   2873:     { xv1 += xv; xu1 += xu; }
                   2874:                                /* possible loop exit */
                   2875:     if (xv1 > vmax) { f=1; break; }
                   2876:   } /* while */
                   2877:
                   2878:   if (!(f&1))                  /* division by 1 postprocessing if needed */
                   2879:   {
                   2880:     if (xs && d==1)
                   2881:     {
                   2882:       xv1 += d1 * xv;
                   2883:       xu1 += d1 * xu;
                   2884:       xs = 0; res = 1UL;
                   2885:     }
                   2886:     else if (!xs && d1==1)
                   2887:     {
                   2888:       xv += d * xv1;
                   2889:       xu += d * xu1;
                   2890:       xs = 1; res = 1UL;
                   2891:     }
                   2892:   }
                   2893:
                   2894:   if (xs)
                   2895:   {
                   2896:     *s = -1; *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
                   2897:     return (res ? res : (d==1 ? 1UL : d1));
                   2898:   }
                   2899:   else
                   2900:   {
                   2901:     *s = 1; *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
                   2902:     return (res ? res : (d1==1 ? 1UL : d));
                   2903:   }
                   2904: }
                   2905:
                   2906: /*==================================
                   2907:  * lgcdii(d,d1,u,u1,v,v1,vmax)
                   2908:  *==================================*/
                   2909: /* Lehmer's partial extended gcd algorithm, acting on two t_INT GENs.
                   2910:  *
                   2911:  * Tries to determine, using the leading 2*BITS_IN_LONG significant bits of d
                   2912:  * and a quantity of bits from d1 obtained by a shift of the same displacement,
                   2913:  * as many partial quotients of d/d1 as possible, and assigns to [u,u1;v,v1]
                   2914:  * the product of all the [0, 1; 1, q_j] thus obtained, where the leftmost
                   2915:  * factor arises from the quotient of the first division step.
1.2     ! noro     2916:  *
1.1       noro     2917:  * For use in rational reconstruction, input param vmax can be given a
                   2918:  * nonzero value.  In this case, we will return early as soon as v1 > vmax
                   2919:  * (i.e. it is guaranteed that v <= vmax). --2001Jan15
                   2920:  *
                   2921:  * MUST be called with  d > d1 > 0, and with  d  occupying more than one
                   2922:  * significant word  (if it doesn't, the caller has no business with us;
                   2923:  * he/she/it should use xgcduu() instead).  Returns the number of reduction/
                   2924:  * swap steps carried out, possibly zero, or under certain conditions minus
                   2925:  * that number.  When the return value is nonzero, the caller should use the
                   2926:  * returned recurrence matrix to update its own copies of d,d1.  When the
                   2927:  * return value is non-positive, and the latest remainder after updating
                   2928:  * turns out to be nonzero, the caller should at once attempt a full division,
                   2929:  * rather than first trying lgcdii() again -- this typically happens when we
                   2930:  * are about to encounter a quotient larger than half a word.  (This is not
                   2931:  * detected infallibly -- after a positive return value, it is perfectly
                   2932:  * possible that the next stage will end up needing a full division.  After
                   2933:  * a negative return value, however, this is certain, and should be acted
                   2934:  * upon.)
                   2935:  *
                   2936:  * (The sign information, for which xgcduu() has its return argument s, is now
                   2937:  * implicit in the LSB of our return value, and the caller may take advantage
                   2938:  * of the fact that a return value of +-1 implies u==0,u1==v==1  [only v1 pro-
                   2939:  * vides interesting information in this case].  One might also use the fact
                   2940:  * that if the return value is +-2, then u==1, but this is rather marginal.)
                   2941:  *
                   2942:  * If it was not possible to determine even the first quotient, either because
                   2943:  * we're too close to an integer quotient or because the quotient would be
                   2944:  * larger than one word  (if the `leading digit' of d1 after shifting is all
                   2945:  * zeros),  we return 0 and do not bother to assign anything to the last four
                   2946:  * args.
                   2947:  *
                   2948:  * The division chain might (sometimes) even run to completion.  It will be
                   2949:  * up to the caller to detect this case.
                   2950:  *
                   2951:  * This routine does _not_ change d or d1;  this will also be up to the caller.
                   2952:  *
                   2953:  * Note that this routine does not know and does not need to know about the
                   2954:  * PARI stack.
                   2955:  */
                   2956: /*#define DEBUG_LEHMER 1 */
                   2957: int
                   2958: lgcdii(ulong* d, ulong* d1,
                   2959:        ulong* u, ulong* u1, ulong* v, ulong* v1,
                   2960:        ulong vmax)
                   2961: {
                   2962:   /* Strategy:  (1) Extract/shift most significant bits.  We assume that d
                   2963:    * has at least two significant words, but we can cope with a one-word d1.
                   2964:    * Let dd,dd1 be the most significant dividend word and matching part of the
                   2965:    * divisor.
                   2966:    * (2) Check for overflow on the first division.  For our purposes, this
                   2967:    * happens when the upper half of dd1 is zero.  (Actually this is detected
                   2968:    * during extraction.)
                   2969:    * (3) Get a fix on the first quotient.  We compute q = floor(dd/dd1), which
                   2970:    * is an upper bound for floor(d/d1), and which gives the true value of the
                   2971:    * latter if (and-almost-only-if) the remainder dd' = dd-q*dd1 is >= q.
                   2972:    * (If it isn't, we give up.  This is annoying because the subsequent full
                   2973:    * division will repeat some work already done, but it happens very infre-
                   2974:    * quently.  Doing the extra-bit-fetch in this case would be awkward.)
                   2975:    * (4) Finish initializations.
                   2976:    *
                   2977:    * The remainder of the action is comparatively boring... The main loop has
                   2978:    * been unrolled once  (so we don't swap things and we can apply Jebelean's
                   2979:    * termination conditions which alternatingly take two different forms during
                   2980:    * successive iterations).  When we first run out of sufficient bits to form
                   2981:    * a quotient, and have an extra word of each operand, we pull out two whole
                   2982:    * word's worth of dividend bits, and divisor bits of matching significance;
                   2983:    * to these we apply our partial matrix (disregarding overflow because the
                   2984:    * result mod 2^(2*BITS_IN_LONG) will in fact give the correct values), and
                   2985:    * re-extract one word's worth of the current dividend and a matching amount
                   2986:    * of divisor bits.  The affair will normally terminate with matrix entries
                   2987:    * just short of a whole word.  (We terminate the inner loop before these can
                   2988:    * possibly overflow.)
                   2989:    */
                   2990:   ulong dd,dd1,ddlo,dd1lo, sh,shc;        /* `digits', shift count */
                   2991:   ulong xu,xu1, xv,xv1, q,res; /* recurrences, partial quotient, count */
                   2992:   ulong tmp0,tmp1,tmp2,tmpd,tmpu,tmpv; /* temps */
                   2993:   long ld, ld1, lz;            /* t_INT effective lengths */
                   2994:   int skip = 0;                        /* a boolean flag */
                   2995:   LOCAL_OVERFLOW;
                   2996:   LOCAL_HIREMAINDER;
                   2997:
                   2998: #ifdef DEBUG_LEHMER
                   2999:   voir(d, -1); voir(d1, -1);
                   3000: #endif
                   3001:   /* following is just for convenience: vmax==0 means no bound */
                   3002:   if (vmax == 0) vmax = MAXULONG;
                   3003:   ld = lgefint(d); ld1 = lgefint(d1); lz = ld - ld1; /* >= 0 */
                   3004:   if (lz > 1) return 0;                /* rare, quick and desperate exit */
                   3005:
                   3006:   d += 2; d1 += 2;             /* point at the leading `digits' */
                   3007:   dd1lo = 0;                   /* unless we find something better */
                   3008:   sh = bfffo(*d);              /* obtain dividend left shift count */
                   3009:
                   3010:   if (sh)
                   3011:   {                            /* do the shifting */
                   3012:     shc = BITS_IN_LONG - sh;
                   3013:     if (lz)
                   3014:     {                          /* dividend longer than divisor */
                   3015:       dd1 = (*d1 >> shc);
                   3016:       if (!(HIGHMASK & dd1)) return 0;  /* overflow detected */
                   3017:       if (ld1 > 3)
                   3018:         dd1lo = (*d1 << sh) + (d1[1] >> shc);
                   3019:       else
                   3020:         dd1lo = (*d1 << sh);
                   3021:     }
                   3022:     else
                   3023:     {                          /* dividend and divisor have the same length */
                   3024:       /* dd1 = shiftl(d1,sh) would have left hiremainder==0, and dd1 != 0. */
                   3025:       dd1 = (*d1 << sh);
                   3026:       if (!(HIGHMASK & dd1)) return 0;
                   3027:       if (ld1 > 3)
                   3028:       {
                   3029:         dd1 += (d1[1] >> shc);
                   3030:         if (ld1 > 4)
                   3031:           dd1lo = (d1[1] << sh) + (d1[2] >> shc);
                   3032:         else
                   3033:           dd1lo = (d1[1] << sh);
                   3034:       }
                   3035:     }
                   3036:     /* following lines assume d to have 2 or more significant words */
                   3037:     dd = (*d << sh) + (d[1] >> shc);
                   3038:     if (ld > 4)
                   3039:       ddlo = (d[1] << sh) + (d[2] >> shc);
                   3040:     else
                   3041:       ddlo = (d[1] << sh);
                   3042:   }
                   3043:   else
                   3044:   {                            /* no shift needed */
                   3045:     if (lz) return 0;          /* div'd longer than div'r: o'flow automatic */
                   3046:     dd1 = *d1;
                   3047:     if (!(HIGHMASK & dd1)) return 0;
                   3048:     if(ld1 > 3) dd1lo = d1[1];
                   3049:     /* assume again that d has another significant word */
                   3050:     dd = *d; ddlo = d[1];
                   3051:   }
                   3052: #ifdef DEBUG_LEHMER
                   3053:   fprintferr("  %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
                   3054: #endif
                   3055:
                   3056:   /* First subtraction/division stage.  (If a subtraction initially suffices,
                   3057:    * we don't divide at all.)  If a Jebelean condition is violated, and we
                   3058:    * can't fix it even by looking at the low-order bits in ddlo,dd1lo, we
                   3059:    * give up and ask for a full division.  Otherwise we commit the result,
                   3060:    * possibly deciding to re-shift immediately afterwards.
                   3061:    */
                   3062:   dd -= dd1;
                   3063:   if (dd < dd1)
                   3064:   {                            /* first quotient known to be == 1 */
                   3065:     xv1 = 1UL;
                   3066:     if (!dd)                   /* !(Jebelean condition), extraspecial case */
                   3067:     {                          /* note this can actually happen...  Now
                   3068:                                 * q==1 is known, but we underflow already.
                   3069:                                 * OTOH we've just shortened d by a whole word.
                   3070:                                 * Thus we feel pleased with ourselves and
                   3071:                                 * return.  (The re-shift code below would
                   3072:                                 * do so anyway.) */
                   3073:       *u = 0; *v = *u1 = *v1 = 1UL;
                   3074:       return -1;               /* Next step will be a full division. */
                   3075:     } /* if !(Jebelean) then */
                   3076:   }
                   3077:   else
                   3078:   {                            /* division indicated */
                   3079:     hiremainder = 0;
                   3080:     xv1 = 1 + divll(dd, dd1);  /* xv1: alternative spelling of `q', here ;) */
                   3081:     dd = hiremainder;
                   3082:     if (dd < xv1)              /* !(Jebelean cond'), non-extra special case */
                   3083:     {                          /* Attempt to complete the division using the
                   3084:                                 * less significant bits, before skipping right
                   3085:                                 * past the 1st loop to the reshift stage. */
                   3086:       ddlo = subll(ddlo, mulll(xv1, dd1lo));
                   3087:       dd = subllx(dd, hiremainder);
                   3088:
                   3089:       /* If we now have an overflow, q was _certainly_ too large.  Thanks to
                   3090:        * our decision not to get here unless the original dd1 had bits set in
                   3091:        * the upper half of the word, however, we now do know that the correct
                   3092:        * quotient is in fact q-1.  Adjust our data accordingly. */
                   3093:       if (overflow)
                   3094:       {
                   3095:        xv1--;
                   3096:        ddlo = addll(ddlo,dd1lo);
                   3097:        dd = addllx(dd,dd1);    /* overflows again which cancels the borrow */
                   3098:        /* ...and fall through to skip=1 below */
                   3099:       }
                   3100:       else
                   3101:       /* Test Jebelean condition anew, at this point using _all_ the extracted
                   3102:        * bits we have.  This is clutching at straws; we have a more or less
                   3103:        * even chance of succeeding this time.  Note that if we fail, we really
                   3104:        * do not know whether the correct quotient would have been q or some
                   3105:        * smaller value. */
                   3106:        if (!dd && ddlo < xv1) return 0;
                   3107:
                   3108:       /* Otherwise, we now know that q is correct, but we cannot go into the
                   3109:        * 1st loop.  Raise a flag so we'll remember to skip past the loop.
                   3110:        * Get here also after the q-1 adjustment case. */
                   3111:       skip = 1;
                   3112:     } /* if !(Jebelean) then */
                   3113:   }
                   3114:   res = 1;
                   3115: #ifdef DEBUG_LEHMER
                   3116:   fprintferr("  q = %ld, %lx, %lx\n", xv1, dd1, dd);
                   3117: #endif
                   3118:   if (xv1 > vmax)
                   3119:   {                            /* gone past the bound already */
                   3120:     *u = 0UL; *u1 = 1UL; *v = 1UL; *v1 = xv1;
                   3121:     return res;
                   3122:   }
                   3123:   xu = 0UL; xv = xu1 = 1UL;
                   3124:
                   3125:   /* Some invariants from here across the first loop:
                   3126:    *
                   3127:    * At this point, and again after we are finished with the first loop and
                   3128:    * subsequent conditional, a division and the associated update of the
                   3129:    * recurrence matrix have just been carried out completely.  The matrix
                   3130:    * xu,xu1;xv,xv1 has been initialized (or updated, possibly with permuted
                   3131:    * columns), and the current remainder == next divisor (dd at the moment)
                   3132:    * is nonzero (it might be zero here, but then skip will have been set).
                   3133:    *
                   3134:    * After the first loop, or when skip is set already, it will also be the
                   3135:    * case that there aren't sufficiently many bits to continue without re-
                   3136:    * shifting.  If the divisor after reshifting is zero, or indeed if it
                   3137:    * doesn't have more than half a word's worth of bits, we will have to
                   3138:    * return at that point.  Otherwise, we proceed into the second loop.
                   3139:    *
                   3140:    * Furthermore, when we reach the re-shift stage, dd:ddlo and dd1:dd1lo will
                   3141:    * already reflect the result of applying the current matrix to the old
                   3142:    * ddorig:ddlo and dd1orig:dd1lo.  (For the first iteration above, this
                   3143:    * was easy to achieve, and we didn't even need to peek into the (now
                   3144:    * no longer existent!) saved words.  After the loop, we'll stop for a
                   3145:    * moment to merge in the ddlo,dd1lo contributions.)
                   3146:    *
                   3147:    * Note that after the first division, even an a priori quotient of 1 cannot
                   3148:    * be trusted until we've checked Jebelean's condition -- it cannot be too
                   3149:    * large, of course, but it might be too small.
                   3150:    */
                   3151:
                   3152:   if (!skip)
                   3153:   {
                   3154:     for(;;)
                   3155:     {
                   3156:       /* First half of loop divides dd into dd1, and leaves the recurrence
                   3157:        * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
                   3158:        * entries) when successful. */
                   3159:       tmpd = dd1 - dd;
                   3160:       if (tmpd < dd)
                   3161:       {                                /* quotient suspected to be 1 */
                   3162: #ifdef DEBUG_LEHMER
                   3163:        q = 1;
                   3164: #endif
                   3165:        tmpu = xu + xu1;        /* cannot overflow -- everything bounded by
                   3166:                                 * the original dd during first loop */
                   3167:        tmpv = xv + xv1;
                   3168:       }
                   3169:       else
                   3170:       {                                /* division indicated */
                   3171:        hiremainder = 0;
                   3172:        q = 1 + divll(tmpd, dd);
                   3173:        tmpd = hiremainder;
                   3174:        tmpu = xu + q*xu1;      /* can't overflow, but may need to be undone */
                   3175:        tmpv = xv + q*xv1;
                   3176:       }
                   3177:
                   3178:       tmp0 = addll(tmpv, xv1);
                   3179:       if ((tmpd < tmpu) || overflow ||
                   3180:          (dd - tmpd < tmp0))   /* !(Jebelean cond.) */
                   3181:        break;                  /* skip ahead to reshift stage */
                   3182:       else
                   3183:       {                                /* commit dd1, xu, xv */
                   3184:        res++;
                   3185:        dd1 = tmpd; xu = tmpu; xv = tmpv;
                   3186: #ifdef DEBUG_LEHMER
                   3187:        fprintferr("  q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
                   3188:                   q, dd, dd1, xu1, xu, xv1, xv);
                   3189: #endif
                   3190:        if (xv > vmax)
                   3191:        {                       /* time to return */
                   3192:          *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
                   3193:          return res;
                   3194:        }
                   3195:       }
                   3196:
                   3197:       /* Second half of loop divides dd1 into dd, and the matrix returns to its
                   3198:        * normal arrangement. */
                   3199:       tmpd = dd - dd1;
                   3200:       if (tmpd < dd1)
                   3201:       {                                /* quotient suspected to be 1 */
                   3202: #ifdef DEBUG_LEHMER
                   3203:        q = 1;
                   3204: #endif
                   3205:        tmpu = xu1 + xu;        /* cannot overflow */
                   3206:        tmpv = xv1 + xv;
                   3207:       }
                   3208:       else
                   3209:       {                                /* division indicated */
                   3210:        hiremainder = 0;
                   3211:        q = 1 + divll(tmpd, dd1);
                   3212:        tmpd = hiremainder;
                   3213:        tmpu = xu1 + q*xu;
                   3214:        tmpv = xv1 + q*xv;
                   3215:       }
                   3216:
                   3217:       tmp0 = addll(tmpu, xu);
                   3218:       if ((tmpd < tmpv) || overflow ||
                   3219:          (dd1 - tmpd < tmp0))  /* !(Jebelean cond.) */
                   3220:        break;                  /* skip ahead to reshift stage */
                   3221:       else
                   3222:       {                                /* commit dd, xu1, xv1 */
                   3223:        res++;
                   3224:        dd = tmpd; xu1 = tmpu; xv1 = tmpv;
                   3225: #ifdef DEBUG_LEHMER
                   3226:        fprintferr("  q = %ld, %lx, %lx [%lu,%lu;%lu,%lu]\n",
                   3227:                q, dd1, dd, xu, xu1, xv, xv1);
                   3228: #endif
                   3229:        if (xv1 > vmax)
                   3230:        {                       /* time to return */
                   3231:          *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
                   3232:          return res;
                   3233:        }
                   3234:       }
                   3235:
                   3236:     } /* end of first loop */
                   3237:
                   3238:     /* Intermezzo: update dd:ddlo, dd1:dd1lo.  (But not if skip is set.) */
                   3239:
                   3240:     if (res&1)
                   3241:     {
                   3242:       /* after failed division in 1st half of loop:
                   3243:        * [dd1:dd1lo,dd:ddlo] = [ddorig:ddlo,dd1orig:dd1lo]
                   3244:        *                       * [ -xu, xu1 ; xv, -xv1 ]
                   3245:        * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
                   3246:        * add the high-order remainders + overflows onto [dd1,dd].)
                   3247:        */
                   3248:       tmp1 = mulll(ddlo, xu); tmp0 = hiremainder;
                   3249:       tmp1 = subll(mulll(dd1lo,xv), tmp1);
                   3250:       dd1 += subllx(hiremainder, tmp0);
                   3251:       tmp2 = mulll(ddlo, xu1); tmp0 = hiremainder;
                   3252:       ddlo = subll(tmp2, mulll(dd1lo,xv1));
                   3253:       dd += subllx(tmp0, hiremainder);
                   3254:       dd1lo = tmp1;
                   3255:     }
                   3256:     else
                   3257:     {
                   3258:       /* after failed division in 2nd half of loop:
                   3259:        * [dd:ddlo,dd1:dd1lo] = [ddorig:ddlo,dd1orig:dd1lo]
                   3260:        *                       * [ xu1, -xu ; -xv1, xv ]
                   3261:        * (Actually, we only multiply [ddlo,dd1lo] onto the matrix and
                   3262:        * add the high-order remainders + overflows onto [dd,dd1].)
                   3263:        */
                   3264:       tmp1 = mulll(ddlo, xu1); tmp0 = hiremainder;
                   3265:       tmp1 = subll(tmp1, mulll(dd1lo,xv1));
                   3266:       dd += subllx(tmp0, hiremainder);
                   3267:       tmp2 = mulll(ddlo, xu); tmp0 = hiremainder;
                   3268:       dd1lo = subll(mulll(dd1lo,xv), tmp2);
                   3269:       dd1 += subllx(hiremainder, tmp0);
                   3270:       ddlo = tmp1;
                   3271:     }
                   3272: #ifdef DEBUG_LEHMER
                   3273:     fprintferr("  %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
                   3274: #endif
                   3275:   } /* end of skip-pable section:  get here also, with res==1, when there
                   3276:      * was a problem immediately after the very first division. */
                   3277:
                   3278:   /* Re-shift.  Note:  the shift count _can_ be zero, viz. under the following
                   3279:    * precise conditions:  The original dd1 had its topmost bit set, so the 1st
                   3280:    * q was 1, and after subtraction, dd had its topmost bit unset.  If now
                   3281:    * dd==0, we'd have taken the return exit already, so we couldn't have got
                   3282:    * here.  If not, then it must have been the second division which has gone
                   3283:    * amiss  (because dd1 was very close to an exact multiple of the remainder
                   3284:    * dd value, so this will be very rare).  At this point, we'd have a fairly
                   3285:    * slim chance of fixing things by re-examining dd1:dd1lo vs. dd:ddlo, but
                   3286:    * this is not guaranteed to work.  Instead of trying, we return at once.
                   3287:    * The caller will see to it that the initial subtraction is re-done using
                   3288:    * _all_ the bits of both operands, which already helps, and the next round
                   3289:    * will either be a full division  (if dd occupied a halfword or less),  or
                   3290:    * another llgcdii() first step.  In the latter case, since we try a little
                   3291:    * harder during our first step, we may actually be able to fix the problem,
                   3292:    * and get here again with improved low-order bits and with another step
                   3293:    * under our belt.  Otherwise we'll have given up above and forced a full-
                   3294:    * blown division.
                   3295:    *
                   3296:    * If res is even, the shift count _cannot_ be zero.  (The first step forces
                   3297:    * a zero into the remainder's MSB, and all subsequent remainders will have
                   3298:    * inherited it.)
                   3299:    *
                   3300:    * The re-shift stage exits if the next divisor has at most half a word's
                   3301:    * worth of bits.
                   3302:    *
                   3303:    * For didactic reasons, the second loop will be arranged in the same way
                   3304:    * as the first -- beginning with the division of dd into dd1, as if res
                   3305:    * was odd.  To cater for this, if res is actually even, we swap things
                   3306:    * around during reshifting.  (During the second loop, the parity of res
                   3307:    * does not matter;  we know in which half of the loop we are when we decide
                   3308:    * to return.)
                   3309:    */
                   3310: #ifdef DEBUG_LEHMER
                   3311:   fprintferr("(sh)");
                   3312: #endif
                   3313:
                   3314:   if (res&1)
                   3315:   {                            /* after odd number of division(s) */
                   3316:     if (dd1 && (sh = bfffo(dd1)))
                   3317:     {
                   3318:       shc = BITS_IN_LONG - sh;
                   3319:       dd = (ddlo >> shc) + (dd << sh);
                   3320:       if (!(HIGHMASK & dd))
                   3321:       {
                   3322:        *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
                   3323:        return -res;            /* full division asked for */
                   3324:       }
                   3325:       dd1 = (dd1lo >> shc) + (dd1 << sh);
                   3326:     }
                   3327:     else
                   3328:     {                          /* time to return: <= 1 word left, or sh==0 */
                   3329:       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
                   3330:       return res;
                   3331:     }
                   3332:   }
                   3333:   else
                   3334:   {                            /* after even number of divisions */
                   3335:     if (dd)
                   3336:     {
                   3337:       sh = bfffo(dd);          /* Known to be positive. */
                   3338:       shc = BITS_IN_LONG - sh;
                   3339:                                /* dd:ddlo will become the new dd1, and v.v. */
                   3340:       tmpd = (ddlo >> shc) + (dd << sh);
                   3341:       dd = (dd1lo >> shc) + (dd1 << sh);
                   3342:       dd1 = tmpd;
                   3343:       /* This has completed the swap;  now dd is again the current divisor.
                   3344:        * The following test originally inspected dd1 -- a most subtle and
                   3345:        * most annoying bug. The Management. */
                   3346:       if (HIGHMASK & dd)
                   3347:       {
                   3348:        /* recurrence matrix is the wrong way round;  swap it. */
                   3349:        tmp0 = xu; xu = xu1; xu1 = tmp0;
                   3350:        tmp0 = xv; xv = xv1; xv1 = tmp0;
                   3351:       }
                   3352:       else
                   3353:       {
                   3354:        /* recurrence matrix is the wrong way round;  fix this. */
                   3355:        *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
                   3356:        return -res;            /* full division asked for */
                   3357:       }
                   3358:     }
                   3359:     else
                   3360:     {                          /* time to return: <= 1 word left */
                   3361:       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
                   3362:       return res;
                   3363:     }
                   3364:   } /* end reshift */
                   3365:
                   3366: #ifdef DEBUG_LEHMER
                   3367:   fprintferr("  %lx:%lx, %lx:%lx\n", dd, ddlo, dd1, dd1lo);
                   3368: #endif
                   3369:
                   3370:   /* The Second Loop.  Rip-off of the first, but we now check for overflow
                   3371:    * in the recurrences.  Returns instead of breaking when we cannot fix the
                   3372:    * quotient any longer.
                   3373:    */
                   3374:
                   3375:   for(;;)
                   3376:   {
                   3377:     /* First half of loop divides dd into dd1, and leaves the recurrence
                   3378:      * matrix xu,...,xv1 groomed the wrong way round (xu,xv will be the newer
                   3379:      * entries) when successful. */
                   3380:     tmpd = dd1 - dd;
                   3381:     if (tmpd < dd)
                   3382:     {                          /* quotient suspected to be 1 */
                   3383: #ifdef DEBUG_LEHMER
                   3384:       q = 1;
                   3385: #endif
                   3386:       tmpu = xu + xu1;
                   3387:       tmpv = addll(xv, xv1);   /* xv,xv1 will overflow first */
                   3388:       tmp1 = overflow;
                   3389:     }
                   3390:     else
                   3391:     {                          /* division indicated */
                   3392:       hiremainder = 0;
                   3393:       q = 1 + divll(tmpd, dd);
                   3394:       tmpd = hiremainder;
                   3395:       tmpu = xu + q*xu1;
                   3396:       tmpv = addll(xv, mulll(q,xv1));
                   3397:       tmp1 = overflow | hiremainder;
                   3398:     }
                   3399:
                   3400:     tmp0 = addll(tmpv, xv1);
                   3401:     if ((tmpd < tmpu) || overflow || tmp1 ||
                   3402:        (dd - tmpd < tmp0))     /* !(Jebelean cond.) */
                   3403:     {
                   3404:       /* The recurrence matrix has not yet been warped... */
                   3405:       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
                   3406:       break;
                   3407:     }
                   3408:     /* commit dd1, xu, xv */
                   3409:     res++;
                   3410:     dd1 = tmpd; xu = tmpu; xv = tmpv;
                   3411: #ifdef DEBUG_LEHMER
                   3412:     fprintferr("  q = %ld, %lx, %lx\n", q, dd, dd1);
                   3413: #endif
                   3414:     if (xv > vmax)
                   3415:     {                          /* time to return */
                   3416:       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
                   3417:       return res;
                   3418:     }
                   3419:
                   3420:     /* Second half of loop divides dd1 into dd, and the matrix returns to its
                   3421:      * normal arrangement. */
                   3422:     tmpd = dd - dd1;
                   3423:     if (tmpd < dd1)
                   3424:     {                          /* quotient suspected to be 1 */
                   3425: #ifdef DEBUG_LEHMER
                   3426:       q = 1;
                   3427: #endif
                   3428:       tmpu = xu1 + xu;
                   3429:       tmpv = addll(xv1, xv);
                   3430:       tmp1 = overflow;
                   3431:     }
                   3432:     else
                   3433:     {                          /* division indicated */
                   3434:       hiremainder = 0;
                   3435:       q = 1 + divll(tmpd, dd1);
                   3436:       tmpd = hiremainder;
                   3437:       tmpu = xu1 + q*xu;
                   3438:       tmpv = addll(xv1, mulll(q, xv));
                   3439:       tmp1 = overflow | hiremainder;
                   3440:     }
                   3441:
                   3442:     tmp0 = addll(tmpu, xu);
                   3443:     if ((tmpd < tmpv) || overflow || tmp1 ||
                   3444:        (dd1 - tmpd < tmp0))    /* !(Jebelean cond.) */
                   3445:     {
                   3446:       /* The recurrence matrix has not yet been unwarped, so it is
                   3447:        * the wrong way round;  fix this. */
                   3448:       *u = xu1; *u1 = xu; *v = xv1; *v1 = xv;
                   3449:       break;
                   3450:     }
1.2     ! noro     3451:
1.1       noro     3452:     res++; /* commit dd, xu1, xv1 */
                   3453:     dd = tmpd; xu1 = tmpu; xv1 = tmpv;
                   3454: #ifdef DEBUG_LEHMER
                   3455:     fprintferr("  q = %ld, %lx, %lx\n", q, dd1, dd);
                   3456: #endif
                   3457:     if (xv1 > vmax)
                   3458:     {                          /* time to return */
                   3459:       *u = xu; *u1 = xu1; *v = xv; *v1 = xv1;
                   3460:       return res;
                   3461:     }
                   3462:   } /* end of second loop */
                   3463:
                   3464:   return res;
                   3465: }
                   3466:
                   3467: /*==================================
                   3468:  * invmod(a,b,res)
                   3469:  *==================================
                   3470:  *    If a is invertible, return 1, and set res  = a^{ -1 }
                   3471:  *    Otherwise, return 0, and set res = gcd(a,b)
1.2     ! noro     3472:  *
1.1       noro     3473:  * This is sufficiently different from bezout() to be implemented separately
                   3474:  * instead of having a bunch of extra conditionals in a single function body
                   3475:  * to meet both purposes.
                   3476:  */
                   3477:
                   3478: int
                   3479: invmod(GEN a, GEN b, GEN *res)
                   3480: {
                   3481:   GEN v,v1,d,d1,q,r;
1.2     ! noro     3482:   gpmem_t av, av1, lim;
1.1       noro     3483:   long s;
                   3484:   ulong g;
                   3485:   ulong xu,xu1,xv,xv1;         /* Lehmer stage recurrence matrix */
                   3486:   int lhmres;                  /* Lehmer stage return value */
                   3487:
                   3488:   if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
                   3489:   if (!signe(b)) { *res=absi(a); return 0; }
                   3490:   av = avma;
                   3491:   if (lgefint(b) == 3) /* single-word affair */
                   3492:   {
                   3493:     d1 = modiu(a, (ulong)(b[2]));
                   3494:     if (d1 == gzero)
                   3495:     {
                   3496:       if (b[2] == 1L)
                   3497:         { *res = gzero; return 1; }
                   3498:       else
                   3499:         { *res = absi(b); return 0; }
                   3500:     }
                   3501:     g = xgcduu((ulong)(b[2]), (ulong)(d1[2]), 1, &xv, &xv1, &s);
                   3502: #ifdef DEBUG_LEHMER
                   3503:     fprintferr(" <- %lu,%lu\n", (ulong)(b[2]), (ulong)(d1[2]));
                   3504:     fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
                   3505: #endif
                   3506:     avma = av;
                   3507:     if (g != 1UL) { *res = utoi(g); return 0; }
                   3508:     xv = xv1 % (ulong)(b[2]); if (s < 0) xv = ((ulong)(b[2])) - xv;
                   3509:     *res = utoi(xv); return 1;
                   3510:   }
                   3511:
                   3512:   (void)new_chunk(lgefint(b));
                   3513:   d = absi(b); d1 = modii(a,d);
                   3514:
                   3515:   v=gzero; v1=gun;     /* general case */
                   3516: #ifdef DEBUG_LEHMER
                   3517:   fprintferr("INVERT: -------------------------\n");
                   3518:   output(d1);
                   3519: #endif
                   3520:   av1 = avma; lim = stack_lim(av,1);
                   3521:
                   3522:   while (lgefint(d) > 3 && signe(d1))
                   3523:   {
                   3524: #ifdef DEBUG_LEHMER
                   3525:     fprintferr("Calling Lehmer:\n");
                   3526: #endif
                   3527:     lhmres = lgcdii((ulong*)d, (ulong*)d1, &xu, &xu1, &xv, &xv1, MAXULONG);
                   3528:     if (lhmres != 0)           /* check progress */
                   3529:     {                          /* apply matrix */
                   3530: #ifdef DEBUG_LEHMER
                   3531:       fprintferr("Lehmer returned %d [%lu,%lu;%lu,%lu].\n",
                   3532:              lhmres, xu, xu1, xv, xv1);
                   3533: #endif
                   3534:       if ((lhmres == 1) || (lhmres == -1))
                   3535:       {
                   3536:        if (xv1 == 1)
                   3537:        {
                   3538:          r = subii(d,d1); d=d1; d1=r;
                   3539:          a = subii(v,v1); v=v1; v1=a;
                   3540:        }
                   3541:        else
                   3542:        {
                   3543:          r = subii(d, mului(xv1,d1)); d=d1; d1=r;
                   3544:          a = subii(v, mului(xv1,v1)); v=v1; v1=a;
                   3545:        }
                   3546:       }
                   3547:       else
                   3548:       {
                   3549:        r  = subii(muliu(d,xu),  muliu(d1,xv));
                   3550:        a  = subii(muliu(v,xu),  muliu(v1,xv));
                   3551:        d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
                   3552:        v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
                   3553:         if (lhmres&1)
                   3554:        {
                   3555:           setsigne(d,-signe(d));
                   3556:           setsigne(v,-signe(v));
                   3557:         }
                   3558:         else
                   3559:        {
                   3560:           if (signe(d1)) { setsigne(d1,-signe(d1)); }
                   3561:           setsigne(v1,-signe(v1));
                   3562:         }
                   3563:       }
                   3564:     }
                   3565: #ifdef DEBUG_LEHMER
                   3566:     else
                   3567:       fprintferr("Lehmer returned 0.\n");
                   3568:     output(d); output(d1); output(v); output(v1);
                   3569:     sleep(1);
                   3570: #endif
                   3571:
                   3572:     if (lhmres <= 0 && signe(d1))
                   3573:     {
                   3574:       q = dvmdii(d,d1,&r);
                   3575: #ifdef DEBUG_LEHMER
                   3576:       fprintferr("Full division:\n");
                   3577:       printf("  q = "); output(q); sleep (1);
                   3578: #endif
                   3579:       a = subii(v,mulii(q,v1));
                   3580:       v=v1; v1=a;
                   3581:       d=d1; d1=r;
                   3582:     }
                   3583:     if (low_stack(lim, stack_lim(av,1)))
                   3584:     {
                   3585:       GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
                   3586:       if(DEBUGMEM>1) err(warnmem,"invmod");
                   3587:       gerepilemany(av1,gptr,4);
                   3588:     }
                   3589:   } /* end while */
                   3590:
                   3591:   /* Postprocessing - final sprint */
                   3592:   if (signe(d1))
                   3593:   {
                   3594:     /* Assertions: lgefint(d)==lgefint(d1)==3, and
                   3595:      * gcd(d,d1) is nonzero and fits into one word
                   3596:      */
                   3597:     g = xxgcduu((ulong)(d[2]), (ulong)(d1[2]), 1, &xu, &xu1, &xv, &xv1, &s);
                   3598: #ifdef DEBUG_LEHMER
                   3599:     output(d);output(d1);output(v);output(v1);
                   3600:     fprintferr(" <- %lu,%lu\n", (ulong)(d[2]), (ulong)(d1[2]));
                   3601:     fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
                   3602: #endif
                   3603:     if (g != 1UL) { avma = av; *res = utoi(g); return 0; }
                   3604:     /* (From the xgcduu() blurb:)
                   3605:      * For finishing the multiword modinv, we now have to multiply the
                   3606:      * returned matrix  (with properly adjusted signs)  onto the values
                   3607:      * v' and v1' previously obtained from the multiword division steps.
                   3608:      * Actually, it is sufficient to take the scalar product of [v',v1']
                   3609:      * with [u1,-v1], and change the sign if s==1.
                   3610:      */
                   3611:     v = subii(muliu(v,xu1),muliu(v1,xv1));
                   3612:     if (s > 0) setsigne(v,-signe(v));
                   3613:     avma = av; *res = modii(v,b);
                   3614: #ifdef DEBUG_LEHMER
                   3615:     output(*res); fprintfderr("============================Done.\n");
                   3616:     sleep(1);
                   3617: #endif
                   3618:     return 1;
                   3619:   }
                   3620:   /* get here when the final sprint was skipped (d1 was zero already) */
                   3621:   avma = av;
                   3622:   if (!egalii(d,gun)) { *res = icopy(d); return 0; }
                   3623:   *res = modii(v,b);
                   3624: #ifdef DEBUG_LEHMER
                   3625:   output(*res); fprintferr("============================Done.\n");
                   3626:   sleep(1);
                   3627: #endif
                   3628:   return 1;
                   3629: }
                   3630:
                   3631: /*==================================
                   3632:  * bezout(a,b,pu,pv)
                   3633:  *==================================
                   3634:  *    Return g = gcd(a,b) >= 0, and assign GENs u,v through pointers pu,pv
                   3635:  *    such that g = u*a + v*b.
                   3636:  * Special cases:
                   3637:  *    a == b == 0 ==> pick u=1, v=0
                   3638:  *    a != 0 == b ==> keep v=0
                   3639:  *    a == 0 != b ==> keep u=0
                   3640:  *    |a| == |b| != 0 ==> keep u=0, set v=+-1
                   3641:  * Assignments through pu,pv will be suppressed when the corresponding
                   3642:  * pointer is NULL  (but the computations will happen nonetheless).
                   3643:  */
                   3644:
                   3645: GEN
                   3646: bezout(GEN a, GEN b, GEN *pu, GEN *pv)
                   3647: {
                   3648:   GEN t,u,u1,v,v1,d,d1,q,r;
                   3649:   GEN *pt;
1.2     ! noro     3650:   gpmem_t av, av1, lim;
1.1       noro     3651:   long s;
                   3652:   int sa, sb;
                   3653:   ulong g;
                   3654:   ulong xu,xu1,xv,xv1;         /* Lehmer stage recurrence matrix */
                   3655:   int lhmres;                  /* Lehmer stage return value */
                   3656:
                   3657:   if (typ(a) != t_INT || typ(b) != t_INT) err(arither1);
                   3658:   s = absi_cmp(a,b);
                   3659:   if (s < 0)
                   3660:   {
                   3661:     t=b; b=a; a=t;
                   3662:     pt=pu; pu=pv; pv=pt;
                   3663:   }
                   3664:   /* now |a| >= |b| */
                   3665:
                   3666:   sa = signe(a); sb = signe(b);
                   3667:   if (!sb)
                   3668:   {
                   3669:     if (pv != NULL) *pv = gzero;
                   3670:     switch(sa)
                   3671:     {
                   3672:     case  0:
                   3673:       if (pu != NULL) *pu = gun; return gzero;
                   3674:     case  1:
                   3675:       if (pu != NULL) *pu = gun; return icopy(a);
                   3676:     case -1:
                   3677:       if (pu != NULL) *pu = negi(gun); return(negi(a));
                   3678:     }
                   3679:   }
                   3680:   if (s == 0)                  /* |a| == |b| != 0 */
                   3681:   {
                   3682:     if (pu != NULL) *pu = gzero;
                   3683:     if (sb > 0)
                   3684:     {
                   3685:       if (pv != NULL) *pv = gun; return icopy(b);
                   3686:     }
                   3687:     else
                   3688:     {
                   3689:       if (pv != NULL) *pv = negi(gun); return(negi(b));
                   3690:     }
                   3691:   }
                   3692:   /* now |a| > |b| > 0 */
                   3693:
                   3694:   if (lgefint(a) == 3)         /* single-word affair */
                   3695:   {
                   3696:     g = xxgcduu((ulong)(a[2]), (ulong)(b[2]), 0, &xu, &xu1, &xv, &xv1, &s);
                   3697:     sa = s > 0 ? sa : -sa;
                   3698:     sb = s > 0 ? -sb : sb;
                   3699:     if (pu != NULL)
                   3700:     {
                   3701:       if (xu == 0) *pu = gzero; /* can happen when b divides a */
                   3702:       else if (xu == 1) *pu = sa < 0 ? negi(gun) : gun;
                   3703:       else if (xu == 2) *pu = sa < 0 ? negi(gdeux) : gdeux;
                   3704:       else
                   3705:       {
                   3706:        *pu = cgeti(3);
                   3707:        (*pu)[1] = evalsigne(sa)|evallgefint(3);
                   3708:        (*pu)[2] = xu;
                   3709:       }
                   3710:     }
                   3711:     if (pv != NULL)
                   3712:     {
                   3713:       if (xv == 1) *pv = sb < 0 ? negi(gun) : gun;
                   3714:       else if (xv == 2) *pv = sb < 0 ? negi(gdeux) : gdeux;
                   3715:       else
                   3716:       {
                   3717:        *pv = cgeti(3);
                   3718:        (*pv)[1] = evalsigne(sb)|evallgefint(3);
                   3719:        (*pv)[2] = xv;
                   3720:       }
                   3721:     }
                   3722:     if (g == 1) return gun;
                   3723:     else if (g == 2) return gdeux;
                   3724:     else
                   3725:     {
                   3726:       r = cgeti(3);
                   3727:       r[1] = evalsigne(1)|evallgefint(3);
                   3728:       r[2] = g;
                   3729:       return r;
                   3730:     }
                   3731:   }
                   3732:
                   3733:   /* general case */
                   3734:   av = avma;
                   3735:   (void)new_chunk(lgefint(b) + (lgefint(a)<<1)); /* room for u,v,gcd */
                   3736:   /* if a is significantly larger than b, calling lgcdii() is not the best
                   3737:    * way to start -- reduce a mod b first
                   3738:    */
                   3739:   if (lgefint(a) > lgefint(b))
                   3740:   {
                   3741:     d = absi(b), q = dvmdii(absi(a), d, &d1);
                   3742:     if (!signe(d1))            /* a == qb */
                   3743:     {
                   3744:       avma = av;
                   3745:       if (pu != NULL) *pu = gzero;
                   3746:       if (pv != NULL) *pv = sb < 0 ? negi(gun) : gun;
                   3747:       return (icopy(d));
                   3748:     }
                   3749:     else
                   3750:     {
                   3751:       u = gzero;
                   3752:       u1 = v = gun;
                   3753:       v1 = negi(q);
                   3754:     }
                   3755:     /* if this results in lgefint(d) == 3, will fall past main loop */
                   3756:   }
                   3757:   else
                   3758:   {
                   3759:     d = absi(a); d1 = absi(b);
                   3760:     u = v1 = gun; u1 = v = gzero;
                   3761:   }
                   3762:   av1 = avma; lim = stack_lim(av, 1);
                   3763:
                   3764:   /* main loop is almost identical to that of invmod() */
                   3765:   while (lgefint(d) > 3 && signe(d1))
                   3766:   {
                   3767:     lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, MAXULONG);
                   3768:     if (lhmres != 0)           /* check progress */
                   3769:     {                          /* apply matrix */
                   3770:       if ((lhmres == 1) || (lhmres == -1))
                   3771:       {
                   3772:        if (xv1 == 1)
                   3773:        {
                   3774:          r = subii(d,d1); d=d1; d1=r;
                   3775:          a = subii(u,u1); u=u1; u1=a;
                   3776:          a = subii(v,v1); v=v1; v1=a;
                   3777:        }
                   3778:        else
                   3779:        {
                   3780:          r = subii(d, mului(xv1,d1)); d=d1; d1=r;
                   3781:          a = subii(u, mului(xv1,u1)); u=u1; u1=a;
                   3782:          a = subii(v, mului(xv1,v1)); v=v1; v1=a;
                   3783:        }
                   3784:       }
                   3785:       else
                   3786:       {
                   3787:        r  = subii(muliu(d,xu),  muliu(d1,xv));
                   3788:        d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
                   3789:        a  = subii(muliu(u,xu),  muliu(u1,xv));
                   3790:        u1 = subii(muliu(u,xu1), muliu(u1,xv1)); u = a;
                   3791:        a  = subii(muliu(v,xu),  muliu(v1,xv));
                   3792:        v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
                   3793:         if (lhmres&1)
                   3794:        {
                   3795:           setsigne(d,-signe(d));
                   3796:           setsigne(u,-signe(u));
                   3797:           setsigne(v,-signe(v));
                   3798:         }
                   3799:         else
                   3800:        {
                   3801:           if (signe(d1)) { setsigne(d1,-signe(d1)); }
                   3802:           setsigne(u1,-signe(u1));
                   3803:           setsigne(v1,-signe(v1));
                   3804:         }
                   3805:       }
                   3806:     }
                   3807:     if (lhmres <= 0 && signe(d1))
                   3808:     {
                   3809:       q = dvmdii(d,d1,&r);
                   3810: #ifdef DEBUG_LEHMER
                   3811:       fprintferr("Full division:\n");
                   3812:       printf("  q = "); output(q); sleep (1);
                   3813: #endif
                   3814:       a = subii(u,mulii(q,u1));
                   3815:       u=u1; u1=a;
                   3816:       a = subii(v,mulii(q,v1));
                   3817:       v=v1; v1=a;
                   3818:       d=d1; d1=r;
                   3819:     }
                   3820:     if (low_stack(lim, stack_lim(av,1)))
                   3821:     {
                   3822:       GEN *gptr[6]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&u; gptr[3]=&u1;
                   3823:       gptr[4]=&v; gptr[5]=&v1;
                   3824:       if(DEBUGMEM>1) err(warnmem,"bezout");
                   3825:       gerepilemany(av1,gptr,6);
                   3826:     }
                   3827:   } /* end while */
                   3828:
                   3829:   /* Postprocessing - final sprint */
                   3830:   if (signe(d1))
                   3831:   {
                   3832:     /* Assertions: lgefint(d)==lgefint(d1)==3, and
                   3833:      * gcd(d,d1) is nonzero and fits into one word
                   3834:      */
                   3835:     g = xxgcduu((ulong)(d[2]), (ulong)(d1[2]), 0, &xu, &xu1, &xv, &xv1, &s);
                   3836:     u = subii(muliu(u,xu), muliu(u1, xv));
                   3837:     v = subii(muliu(v,xu), muliu(v1, xv));
                   3838:     if (s < 0) { sa = -sa; sb = -sb; }
                   3839:     avma = av;
                   3840:     if (pu != NULL) *pu = sa < 0 ? negi(u) : icopy(u);
                   3841:     if (pv != NULL) *pv = sb < 0 ? negi(v) : icopy(v);
                   3842:     if (g == 1) return gun;
                   3843:     else if (g == 2) return gdeux;
                   3844:     else
                   3845:     {
                   3846:       r = cgeti(3);
                   3847:       r[1] = evalsigne(1)|evallgefint(3);
                   3848:       r[2] = g;
                   3849:       return r;
                   3850:     }
                   3851:   }
                   3852:   /* get here when the final sprint was skipped (d1 was zero already).
                   3853:    * Now the matrix is final, and d contains the gcd.
                   3854:    */
                   3855:   avma = av;
                   3856:   if (pu != NULL) *pu = sa < 0 ? negi(u) : icopy(u);
                   3857:   if (pv != NULL) *pv = sb < 0 ? negi(v) : icopy(v);
                   3858:   return icopy(d);
                   3859: }
                   3860:
                   3861: /*==================================
                   3862:  * cbezout(a,b,uu,vv)
                   3863:  *==================================
                   3864:  * Same as bezout() but for C longs.
                   3865:  *    Return g = gcd(a,b) >= 0, and assign longs u,v through pointers uu,vv
                   3866:  *    such that g = u*a + v*b.
                   3867:  * Special cases:
                   3868:  *    a == b == 0 ==> pick u=1, v=0 (and return 1, surprisingly)
                   3869:  *    a != 0 == b ==> keep v=0
                   3870:  *    a == 0 != b ==> keep u=0
                   3871:  *    |a| == |b| != 0 ==> keep u=0, set v=+-1
                   3872:  * Assignments through uu,vv happen unconditionally;  non-NULL pointers
                   3873:  * _must_ be used.
                   3874:  */
                   3875: long
                   3876: cbezout(long a,long b,long *uu,long *vv)
                   3877: {
                   3878:   long s,*t;
                   3879:   ulong d = labs(a), d1 = labs(b);
                   3880:   ulong r,u,u1,v,v1;
                   3881:
                   3882: #ifdef DEBUG_CBEZOUT
                   3883:   fprintferr("> cbezout(%ld,%ld,%p,%p)\n", a, b, (void *)uu, (void *)vv);
                   3884: #endif
                   3885:   if (!b)
                   3886:   {
                   3887:     *vv=0L;
                   3888:     if (!a)
                   3889:     {
                   3890:       *uu=1L;
                   3891: #ifdef DEBUG_CBEZOUT
                   3892:       fprintferr("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
                   3893: #endif
                   3894:       return 0L;
                   3895:     }
                   3896:     *uu = a < 0 ? -1L : 1L;
                   3897: #ifdef DEBUG_CBEZOUT
                   3898:     fprintferr("< %ld (%ld, %ld)\n", (long)d, *uu, *vv);
                   3899: #endif
                   3900:     return (long)d;
                   3901:   }
                   3902:   else if (!a || (d == d1))
                   3903:   {
                   3904:     *uu = 0L; *vv = b < 0 ? -1L : 1L;
                   3905: #ifdef DEBUG_CBEZOUT
                   3906:     fprintferr("< %ld (%ld, %ld)\n", (long)d1, *uu, *vv);
                   3907: #endif
                   3908:     return (long)d1;
                   3909:   }
                   3910:   else if (d == 1)             /* frequently used by nfinit */
                   3911:   {
                   3912:     *uu = a; *vv = 0L;
                   3913: #ifdef DEBUG_CBEZOUT
                   3914:     fprintferr("< %ld (%ld, %ld)\n", 1L, *uu, *vv);
                   3915: #endif
                   3916:     return 1L;
                   3917:   }
                   3918:   else if (d < d1)
                   3919:   {
                   3920: /* bug in gcc-2.95.3:
                   3921:  * s = a; a = b; b = s; produces wrong result a = b. This is OK:  */
                   3922:     { long _x = a; a = b; b = _x; }    /* in order to keep the right signs */
                   3923:     r = d; d = d1; d1 = r;
                   3924:     t = uu; uu = vv; vv = t;
                   3925: #ifdef DEBUG_CBEZOUT
                   3926:     fprintferr("  swapping\n");
                   3927: #endif
                   3928:   }
                   3929:   /* d > d1 > 0 */
                   3930:   r = xxgcduu(d, d1, 0, &u, &u1, &v, &v1, &s);
                   3931:   if (s < 0)
                   3932:   {
                   3933:     *uu = a < 0 ? u : -(long)u;
                   3934:     *vv = b < 0 ? -(long)v : v;
                   3935:   }
                   3936:   else
                   3937:   {
                   3938:     *uu = a < 0 ? -(long)u : u;
                   3939:     *vv = b < 0 ? v : -(long)v;
                   3940:   }
                   3941: #ifdef DEBUG_CBEZOUT
                   3942:   fprintferr("< %ld (%ld, %ld)\n", (long)r, *uu, *vv);
                   3943: #endif
                   3944:   return (long)r;
                   3945: }
                   3946:
                   3947: /*==========================================================
                   3948:  * ratlift(GEN x, GEN m, GEN *a, GEN *b, GEN amax, GEN bmax)
                   3949:  *==========================================================
                   3950:  * Reconstruct rational number from its residue x mod m
                   3951:  *    Given t_INT x, m, amax>=0, bmax>0 such that
                   3952:  *     0 <= x < m;  2*amax*bmax < m
                   3953:  *    attempts to find t_INT a, b such that
                   3954:  *     (1) a = b*x (mod m)
                   3955:  *     (2) |a| <= amax, 0 < b <= bmax
                   3956:  *     (3) gcd(m, b) = gcd(a, b)
                   3957:  *    If unsuccessful, it will return 0 and leave a,b unchanged  (and
                   3958:  *    caller may deduce no such a,b exist).  If successful, sets a,b
                   3959:  *    and returns 1.  If there exist a,b satisfying (1), (2), and
                   3960:  *     (3') gcd(m, b) = 1
                   3961:  *    then they are uniquely determined subject to (1),(2) and
                   3962:  *     (3'') gcd(a, b) = 1,
                   3963:  *    and will be returned by the routine.  (The caller may wish to
                   3964:  *    check gcd(a,b)==1, either directly or based on known prime
                   3965:  *    divisors of m, depending on the application.)
                   3966:  * Reference:
                   3967:  @article {MR97c:11116,
                   3968:      AUTHOR = {Collins, George E. and Encarnaci{\'o}n, Mark J.},
                   3969:       TITLE = {Efficient rational number reconstruction},
                   3970:     JOURNAL = {J. Symbolic Comput.},
                   3971:      VOLUME = {20},
                   3972:        YEAR = {1995},
                   3973:      NUMBER = {3},
                   3974:       PAGES = {287--297},
                   3975:  }
1.2     ! noro     3976:  * Preprint available from:
        !          3977:  * ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1994/94-64.ps.gz
1.1       noro     3978:  */
                   3979:
                   3980: /* #define DEBUG_RATLIFT */
                   3981:
                   3982: int
                   3983: ratlift(GEN x, GEN m, GEN *a, GEN *b, GEN amax, GEN bmax)
                   3984: {
                   3985:   GEN d,d1,v,v1,q,r;
1.2     ! noro     3986:   gpmem_t av = avma, av1, lim;
1.1       noro     3987:   long lb,lr,lbb,lbr,s,s0;
                   3988:   ulong vmax;
                   3989:   ulong xu,xu1,xv,xv1;         /* Lehmer stage recurrence matrix */
                   3990:   int lhmres;                  /* Lehmer stage return value */
                   3991:
                   3992:   if ((typ(x) | typ(m) | typ(amax) | typ(bmax)) != t_INT) err(arither1);
                   3993:   if (signe(bmax) <= 0)
                   3994:     err(talker, "ratlift: bmax must be > 0, found\n\tbmax=%Z\n", bmax);
                   3995:   if (signe(amax) < 0)
                   3996:     err(talker, "ratilft: amax must be >= 0, found\n\tamax=%Z\n", amax);
                   3997:   /* check 2*amax*bmax < m */
                   3998:   if (cmpii(shifti(mulii(amax, bmax), 1), m) >= 0)
                   3999:     err(talker, "ratlift: must have 2*amax*bmax < m, found\n\tamax=%Z\n\tbmax=%Z\n\tm=%Z\n", amax,bmax,m);
                   4000:   /* we _could_ silently replace x with modii(x,m) instead of the following,
                   4001:    * but let's leave this up to the caller
                   4002:    */
                   4003:   avma = av; s = signe(x);
                   4004:   if (s < 0 || cmpii(x,m) >= 0)
                   4005:     err(talker, "ratlift: must have 0 <= x < m, found\n\tx=%Z\n\tm=%Z\n", x,m);
                   4006:
                   4007:   /* special cases x=0 and/or amax=0 */
                   4008:   if (s == 0)
                   4009:   {
                   4010:     if (a != NULL) *a = gzero;
                   4011:     if (b != NULL) *b = gun;
                   4012:     return 1;
                   4013:   }
                   4014:   else if (signe(amax)==0)
                   4015:     return 0;
                   4016:   /* assert: m > x > 0, amax > 0 */
                   4017:
                   4018:   /* check here whether a=x, b=1 is a solution */
                   4019:   if (cmpii(x,amax) <= 0)
                   4020:   {
                   4021:     if (a != NULL) *a = icopy(x);
                   4022:     if (b != NULL) *b = gun;
                   4023:     return 1;
                   4024:   }
                   4025:
                   4026:   /* There is no special case for single-word numbers since this is
                   4027:    * mainly meant to be used with large moduli.
                   4028:    */
                   4029:   (void)new_chunk(lgefint(bmax) + lgefint(amax)); /* room for a,b */
                   4030:   d = m; d1 = x;
                   4031:   v = gzero; v1 = gun;
                   4032:   /* assert d1 > amax, v1 <= bmax here */
                   4033:   lb = lgefint(bmax);
                   4034:   lbb = bfffo(bmax[2]);
                   4035:   s = 1;
                   4036:   av1 = avma; lim = stack_lim(av, 1);
                   4037:
                   4038:   /* general case: Euclidean division chain starting with m div x, and
                   4039:    * with bounds on the sequence of convergents' denoms v_j.
                   4040:    * Just to be different from what invmod and bezout are doing, we work
                   4041:    * here with the all-nonnegative matrices [u,u1;v,v1]=prod_j([0,1;1,q_j]).
                   4042:    * Loop invariants:
                   4043:    * (a) (sign)*[-v,v1]*x = [d,d1] (mod m)  (componentwise)
                   4044:    * (sign initially +1, changes with each Euclidean step)
                   4045:    * so [a,b] will be obtained in the form [-+d,v] or [+-d1,v1];
                   4046:    * this congruence is a consequence of
                   4047:    * (b) [x,m]~ = [u,u1;v,v1]*[d1,d]~,
                   4048:    * where u,u1 is the usual numerator sequence starting with 1,0
                   4049:    * instead of 0,1  (just multiply the eqn on the left by the inverse
                   4050:    * matrix, which is det*[v1,-u1;-v,u], where "det" is the same as the
                   4051:    * "(sign)" in (a)).  From m = v*d1 + v1*d and
                   4052:    * (c) d > d1 >= 0, 0 <= v < v1,
                   4053:    * we have d >= m/(2*v1), so while v1 remains smaller than m/(2*amax),
                   4054:    * the pair [-(sign)*d,v] satisfies (1) but violates (2) (d > amax).
                   4055:    * Conversely, v1 > bmax indicates that no further solutions will be
                   4056:    * forthcoming;  [-(sign)*d,v] will be the last, and first, candidate.
                   4057:    * Thus there's at most one point in the chain division where a solution
                   4058:    * can live:  v < bmax, v1 >= m/(2*amax) > bmax,  and this is acceptable
                   4059:    * iff in fact d <= amax  (e.g. m=221, x=34 or 35, amax=bmax=10 fail on
                   4060:    * this count while x=32,33,36,37 succeed).  However, a division may leave
                   4061:    * a zero residue before we ever reach this point  (consider m=210, x=35,
                   4062:    * amax=bmax=10),  and our caller may find that gcd(d,v) > 1  (numerous
                   4063:    * examples -- keep m=210 and consider any of x=29,31,32,33,34,36,37,38,
                   4064:    * 39,40,41).
                   4065:    * Furthermore, at the start of the loop body we have in fact
                   4066:    * (c') 0 <= v < v1 <= bmax, d > d1 > amax >= 0,
                   4067:    * (and are never done already).
1.2     ! noro     4068:    *
1.1       noro     4069:    * Main loop is similar to those of invmod() and bezout(), except for
                   4070:    * having to determine appropriate vmax bounds, and checking termination
                   4071:    * conditions.  The signe(d1) condition is only for paranoia
                   4072:    */
                   4073:   while (lgefint(d) > 3 && signe(d1))
                   4074:   {
                   4075:     /* determine vmax for lgcdii so as to ensure v won't overshoot.
                   4076:      * If v+v1 > bmax, the next step would take v1 beyond the limit, so
                   4077:      * since [+-d1,v1] is not a solution, we give up.  Otherwise if v+v1
                   4078:      * is way shorter than bmax, use vmax=MAXULUNG.  Otherwise, set vmax
                   4079:      * to a crude lower approximation of bmax/(v+v1), or to 1, which will
                   4080:      * allow the inner loop to do one step
                   4081:      */
                   4082:     r = addii(v,v1);
                   4083:     lr = lb - lgefint(r);
                   4084:     lbr = bfffo(r[2]);
                   4085:     if (cmpii(r,bmax) > 0)     /* done, not found */
                   4086:     {
                   4087:       avma = av;
                   4088:       return 0;
                   4089:     }
                   4090:     else if (lr > 1)           /* still more than a word's worth to go */
                   4091:     {
                   4092:       vmax = MAXULONG;
                   4093:     }
                   4094:     else                       /* take difference of bit lengths */
                   4095:     {
                   4096:       lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
                   4097:       if (lr > BITS_IN_LONG)
                   4098:        vmax = MAXULONG;
                   4099:       else if (lr == 0)
                   4100:        vmax = 1UL;
                   4101:       else
                   4102:        vmax = 1UL << (lr-1);
                   4103:       /* the latter is pessimistic but faster than a division */
                   4104:     }
                   4105:     /* do a Lehmer-Jebelean round */
                   4106:     lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, vmax);
                   4107:     if (lhmres != 0)           /* check progress */
                   4108:     {                          /* apply matrix */
                   4109:       if ((lhmres == 1) || (lhmres == -1))
                   4110:       {
                   4111:        s = -s;
                   4112:        if (xv1 == 1)
                   4113:        {
                   4114:          /* re-use v+v1 computed above */
                   4115:          v=v1; v1=r;
                   4116:          r = subii(d,d1); d=d1; d1=r;
                   4117:        }
                   4118:        else
                   4119:        {
                   4120:          r = subii(d, mului(xv1,d1)); d=d1; d1=r;
                   4121:          r = addii(v, mului(xv1,v1)); v=v1; v1=r;
                   4122:        }
                   4123:       }
                   4124:       else
                   4125:       {
                   4126:        r  = subii(muliu(d,xu),  muliu(d1,xv));
                   4127:        d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
                   4128:        r  = addii(muliu(v,xu),  muliu(v1,xv));
                   4129:        v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
                   4130:         if (lhmres&1)
                   4131:        {
                   4132:           setsigne(d,-signe(d));
                   4133:          s = -s;
                   4134:         }
                   4135:         else if (signe(d1))
                   4136:        {
                   4137:           setsigne(d1,-signe(d1));
                   4138:         }
                   4139:       }
                   4140:       /* check whether we're done.  Assert v <= bmax here.  Examine v1:
                   4141:        * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
                   4142:        * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise
                   4143:        * proceed.
                   4144:        */
                   4145:       if (cmpii(v1,bmax) > 0) /* certainly done */
                   4146:       {
                   4147:        avma = av;
                   4148:        if (cmpii(d,amax) <= 0) /* done, found */
                   4149:        {
                   4150:          if (a != NULL)
                   4151:          {
                   4152:            *a = icopy(d);
                   4153:            setsigne(*a,-s);    /* sign opposite to s */
                   4154:          }
                   4155:          if (b != NULL) *b = icopy(v);
                   4156:          return 1;
                   4157:        }
                   4158:        else                    /* done, not found */
                   4159:          return 0;
                   4160:       }
                   4161:       else if (cmpii(d1,amax) <= 0) /* also done, found */
                   4162:       {
                   4163:        avma = av;
                   4164:        if (a != NULL)
                   4165:        {
                   4166:          if (signe(d1))
                   4167:          {
                   4168:            *a = icopy(d1);
                   4169:            setsigne(*a,s);     /* same sign as s */
                   4170:          }
                   4171:          else
                   4172:            *a = gzero;
                   4173:        }
                   4174:        if (b != NULL) *b = icopy(v1);
                   4175:        return 1;
                   4176:       }
                   4177:     } /* lhmres != 0 */
                   4178:
                   4179:     if (lhmres <= 0 && signe(d1))
                   4180:     {
                   4181:       q = dvmdii(d,d1,&r);
                   4182: #ifdef DEBUG_LEHMER
                   4183:       fprintferr("Full division:\n");
                   4184:       printf("  q = "); output(q); sleep (1);
                   4185: #endif
                   4186:       d=d1; d1=r;
                   4187:       r = addii(v,mulii(q,v1));
                   4188:       v=v1; v1=r;
                   4189:       s = -s;
                   4190:       /* check whether we are done now.  Since we weren't before the div, it
                   4191:        * suffices to examine v1 and d1 -- the new d (former d1) cannot cut it
                   4192:        */
                   4193:       if (cmpii(v1,bmax) > 0) /* done, not found */
                   4194:       {
                   4195:        avma = av;
                   4196:        return 0;
                   4197:       }
                   4198:       else if (cmpii(d1,amax) <= 0) /* done, found */
                   4199:       {
                   4200:        avma = av;
                   4201:        if (a != NULL)
                   4202:        {
                   4203:          if (signe(d1))
                   4204:          {
                   4205:            *a = icopy(d1);
                   4206:            setsigne(*a,s);     /* same sign as s */
                   4207:          }
                   4208:          else
                   4209:            *a = gzero;
                   4210:        }
                   4211:        if (b != NULL) *b = icopy(v1);
                   4212:        return 1;
                   4213:       }
                   4214:     }
                   4215:
                   4216:     if (low_stack(lim, stack_lim(av,1)))
                   4217:     {
                   4218:       GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
                   4219:       if(DEBUGMEM>1) err(warnmem,"ratlift");
                   4220:       gerepilemany(av1,gptr,4);
                   4221:     }
                   4222:   } /* end while */
                   4223:
                   4224:   /* Postprocessing - final sprint.  Since we usually underestimate vmax,
                   4225:    * this function needs a loop here instead of a simple conditional.
                   4226:    * Note we can only get here when amax fits into one word  (which will
                   4227:    * typically not be the case!).  The condition is bogus -- d1 is never
                   4228:    * zero at the start of the loop.  There will be at most a few iterations,
                   4229:    * so we don't bother collecting garbage
                   4230:    */
                   4231:   while (signe(d1))
                   4232:   {
                   4233:     /* Assertions: lgefint(d)==lgefint(d1)==3.
                   4234:      * Moreover, we aren't done already, or we would have returned by now.
                   4235:      * Recompute vmax...
                   4236:      */
                   4237: #ifdef DEBUG_RATLIFT
                   4238:     fprintferr("rl-fs: d,d1=%Z,%Z\n", d, d1);
                   4239:     fprintferr("rl-fs: v,v1=%Z,%Z\n", v, v1);
                   4240: #endif
                   4241:     r = addii(v,v1);
                   4242:     lr = lb - lgefint(r);
                   4243:     lbr = bfffo(r[2]);
                   4244:     if (cmpii(r,bmax) > 0)     /* done, not found */
                   4245:     {
                   4246:       avma = av;
                   4247:       return 0;
                   4248:     }
                   4249:     else if (lr > 1)           /* still more than a word's worth to go */
                   4250:     {
                   4251:       vmax = MAXULONG;         /* (cannot in fact happen) */
                   4252:     }
                   4253:     else                       /* take difference of bit lengths */
                   4254:     {
                   4255:       lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
                   4256:       if (lr > BITS_IN_LONG)
                   4257:        vmax = MAXULONG;
                   4258:       else if (lr == 0)
                   4259:        vmax = 1UL;
                   4260:       else
                   4261:        vmax = 1UL << (lr-1);   /* as above */
                   4262:     }
                   4263: #ifdef DEBUG_RATLIFT
                   4264:     fprintferr("rl-fs: vmax=%lu\n", vmax);
                   4265: #endif
                   4266:     /* single-word "Lehmer", discarding the gcd or whatever it returns */
                   4267:     (void)rgcduu((ulong)d[2], (ulong)d1[2], vmax, &xu, &xu1, &xv, &xv1, &s0);
                   4268: #ifdef DEBUG_RATLIFT
                   4269:     fprintferr("rl-fs: [%lu,%lu; %lu,%lu] %s\n",
                   4270:               xu, xu1, xv, xv1,
                   4271:               s0 < 0 ? "-" : "+");
                   4272: #endif
                   4273:     if (xv1 == 1)              /* avoid multiplications */
                   4274:     {
                   4275:       /* re-use v+v1 computed above */
                   4276:       v=v1; v1=r;
                   4277:       r = subii(d,d1); d=d1; d1=r;
                   4278:       s = -s;
                   4279:     }
                   4280:     else if (xu == 0)          /* and xv==1, xu1==1, xv1 > 1 */
                   4281:     {
                   4282:       r = subii(d, mului(xv1,d1)); d=d1; d1=r;
                   4283:       r = addii(v, mului(xv1,v1)); v=v1; v1=r;
                   4284:       s = -s;
                   4285:     }
                   4286:     else
                   4287:     {
                   4288:       r  = subii(muliu(d,xu),  muliu(d1,xv));
                   4289:       d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
                   4290:       r  = addii(muliu(v,xu),  muliu(v1,xv));
                   4291:       v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
                   4292:       if (s0 < 0)
                   4293:       {
                   4294:        setsigne(d,-signe(d));
                   4295:        s = -s;
                   4296:       }
                   4297:       else if (signe(d1))              /* sic: might vanish now */
                   4298:       {
                   4299:        setsigne(d1,-signe(d1));
                   4300:       }
                   4301:     }
                   4302:     /* check whether we're done, as above.  Assert v <= bmax.  Examine v1:
                   4303:      * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
                   4304:      * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise proceed.
                   4305:      */
                   4306:     if (cmpii(v1,bmax) > 0) /* certainly done */
                   4307:     {
                   4308:       avma = av;
                   4309:       if (cmpii(d,amax) <= 0) /* done, found */
                   4310:       {
                   4311:        if (a != NULL)
                   4312:        {
                   4313:          *a = icopy(d);
                   4314:          setsigne(*a,-s);      /* sign opposite to s */
                   4315:        }
                   4316:        if (b != NULL) *b = icopy(v);
                   4317:        return 1;
                   4318:       }
                   4319:       else                     /* done, not found */
                   4320:        return 0;
                   4321:     }
                   4322:     else if (cmpii(d1,amax) <= 0) /* also done, found */
                   4323:     {
                   4324:       avma = av;
                   4325:       if (a != NULL)
                   4326:       {
                   4327:        if (signe(d1))
                   4328:        {
                   4329:          *a = icopy(d1);
                   4330:          setsigne(*a,s);       /* same sign as s */
                   4331:        }
                   4332:        else
                   4333:          *a = gzero;
                   4334:       }
                   4335:       if (b != NULL) *b = icopy(v1);
                   4336:       return 1;
                   4337:     }
                   4338:   } /* while */
                   4339:
                   4340:   /* get here when we have run into d1 == 0 before returning... in fact,
                   4341:    * this cannot happen.
                   4342:    */
                   4343:   err(talker, "ratlift failed to catch d1 == 0\n");
                   4344:   /* NOTREACHED */
                   4345:   return 0;
                   4346: }
                   4347:
                   4348: /********************************************************************/
                   4349: /**                                                                **/
                   4350: /**                      INTEGER SQUARE ROOT                       **/
                   4351: /**                                                                **/
                   4352: /********************************************************************/
                   4353:
                   4354: /* sqrt()'s result may be off by 1 when a is not representable exactly as a
                   4355:  * double [64bit machine] */
                   4356: ulong
                   4357: usqrtsafe(ulong a)
                   4358: {
                   4359:   ulong x = (ulong)sqrt((double)a);
                   4360: #ifdef LONG_IS_64BIT
                   4361:   ulong y = x+1; if (y <= MAXHALFULONG && y*y <= a) x = y;
                   4362: #endif
                   4363:   return x;
                   4364: }
                   4365:
                   4366: /* Assume a >= 0 has <= 4 words, return trunc[sqrt(a)] */
                   4367: ulong
                   4368: mpsqrtl(GEN a)
                   4369: {
                   4370:   long l = lgefint(a);
                   4371:   ulong x,y,z,k,m;
                   4372:   int L, s;
1.2     ! noro     4373:
1.1       noro     4374:   if (l <= 3) return l==2? 0: usqrtsafe((ulong)a[2]);
                   4375:   s = bfffo(a[2]);
                   4376:   if (s > 1)
                   4377:   {
                   4378:     s &= ~1UL; /* make it even */
                   4379:     z = BITS_IN_LONG - s;
                   4380:     m = (ulong)(a[2] << s) | (a[3] >> z);
                   4381:     L = z>>1;
                   4382:   }
                   4383:   else
                   4384:   {
                   4385:     m = (ulong)a[2];
                   4386:     L = BITS_IN_LONG/2;
                   4387:   }
                   4388:   /* m = BIL-1 (bffo odd) or BIL first bits of a */
                   4389:   k = (long) sqrt((double)m);
                   4390:   if (L == BITS_IN_LONG/2 && k == MAXHALFULONG)
                   4391:     x = MAXULONG;
                   4392:   else
                   4393:     x = (k+1) << L;
                   4394:   do
                   4395:   {
                   4396:     LOCAL_HIREMAINDER;
                   4397:     LOCAL_OVERFLOW;
                   4398:     hiremainder = a[2];
                   4399:     if (hiremainder >= x) return x;
                   4400:     z = addll(x, divll(a[3],x));
                   4401:     z >>= 1; if (overflow) z |= HIGHBIT;
                   4402:     y = x; x = z;
                   4403:   }
                   4404:   while (x < y);
                   4405:   return y;
1.2     ! noro     4406: }
        !          4407:
        !          4408: /* target should point to a buffer of source_end - source + 1 ulongs.
        !          4409:
        !          4410:    fills this buffer by bits of ulongs in source..source_end-1 shifted
        !          4411:    right sh units; the "most significant" sh bits of the result are
        !          4412:    set to be the least significant sh bits of prepend.
        !          4413:
        !          4414:    The ordering of bits in this bitmap is the same as for t_INT.
        !          4415:
        !          4416:    sh should not exceed BITS_IN_LONG.
        !          4417:  */
        !          4418: void
        !          4419: shift_r(ulong *target, ulong *source, ulong *source_end, ulong prepend, ulong sh)
        !          4420: {
        !          4421:   if (sh) {                            /* shift_words_r() works */
        !          4422:     register ulong sh_complement = BITS_IN_LONG - sh;
        !          4423:
        !          4424:     shift_words_r(target, source, source_end, prepend, sh, sh_complement);
        !          4425:   } else {
        !          4426:     int i;
        !          4427:
        !          4428:     for (i=0; i < source_end - source; i++)
        !          4429:       target[i] = source[i];
        !          4430:   }
1.1       noro     4431: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>