[BACK]Return to up.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib2 / asir2018 / engine

Annotation of OpenXM_contrib2/asir2018/engine/up.c, Revision 1.2

1.1       noro        1: /*
                      2:  * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
                      3:  * All rights reserved.
                      4:  *
                      5:  * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
                      6:  * non-exclusive and royalty-free license to use, copy, modify and
                      7:  * redistribute, solely for non-commercial and non-profit purposes, the
                      8:  * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
                      9:  * conditions of this Agreement. For the avoidance of doubt, you acquire
                     10:  * only a limited right to use the SOFTWARE hereunder, and FLL or any
                     11:  * third party developer retains all rights, including but not limited to
                     12:  * copyrights, in and to the SOFTWARE.
                     13:  *
                     14:  * (1) FLL does not grant you a license in any way for commercial
                     15:  * purposes. You may use the SOFTWARE only for non-commercial and
                     16:  * non-profit purposes only, such as academic, research and internal
                     17:  * business use.
                     18:  * (2) The SOFTWARE is protected by the Copyright Law of Japan and
                     19:  * international copyright treaties. If you make copies of the SOFTWARE,
                     20:  * with or without modification, as permitted hereunder, you shall affix
                     21:  * to all such copies of the SOFTWARE the above copyright notice.
                     22:  * (3) An explicit reference to this SOFTWARE and its copyright owner
                     23:  * shall be made on your publication or presentation in any form of the
                     24:  * results obtained by use of the SOFTWARE.
                     25:  * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
                     26:  * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
                     27:  * for such modification or the source code of the modified part of the
                     28:  * SOFTWARE.
                     29:  *
                     30:  * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
                     31:  * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
                     32:  * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
                     33:  * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
                     34:  * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
                     35:  * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
                     36:  * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
                     37:  * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
                     38:  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
                     39:  * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
                     40:  * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
                     41:  * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
                     42:  * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
                     43:  * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
                     44:  * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
                     45:  * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
                     46:  * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
                     47:  *
1.2     ! noro       48:  * $OpenXM: OpenXM_contrib2/asir2018/engine/up.c,v 1.1 2018/09/19 05:45:07 noro Exp $
1.1       noro       49: */
                     50: #include "ca.h"
                     51: #include <math.h>
                     52:
                     53: struct oEGT eg_chrem,eg_fore,eg_back;
                     54:
                     55: /*
                     56: int up_kara_mag=15;
                     57: int up_tkara_mag=15;
                     58: */
                     59: /*
                     60: int up_kara_mag=30;
                     61: int up_tkara_mag=20;
                     62: */
                     63: int up_kara_mag=20;
                     64: int up_tkara_mag=15;
                     65:
                     66: #if defined(sparc)
                     67: int up_fft_mag=50;
                     68: #else
                     69: int up_fft_mag=80;
                     70: #endif
                     71:
                     72: int up_rem_mag=1;
                     73: int debug_up;
                     74: int up_lazy;
                     75: extern int lm_lazy;
                     76: extern int current_ff;
                     77: extern int GC_dont_gc;
                     78:
                     79: void monicup(UP a,UP *b)
                     80: {
                     81:   UP w;
                     82:
                     83:   if ( !a )
                     84:     *b = 0;
                     85:   else {
                     86:     w = W_UPALLOC(0); w->d = 0;
                     87:     divnum(0,(Num)ONE,(Num)a->c[a->d],(Num *)&w->c[0]);
                     88:     mulup(a,w,b);
                     89:   }
                     90: }
                     91:
                     92: void simpup(UP a,UP *b)
                     93: {
                     94:   int i,d;
                     95:   UP c;
                     96:
                     97:   if ( !a )
                     98:     *b = 0;
                     99:   else {
                    100:     d = a->d;
                    101:     c = UPALLOC(d);
                    102:
                    103:     for ( i = 0; i <= d; i++ )
                    104:       simpnum(a->c[i],&c->c[i]);
                    105:     for ( i = d; i >= 0 && !c->c[i]; i-- );
                    106:     if ( i < 0 )
                    107:       *b = 0;
                    108:     else {
                    109:       c->d = i;
                    110:       *b = c;
                    111:     }
                    112:   }
                    113: }
                    114:
                    115: void simpnum(Num a,Num *b)
                    116: {
                    117:   LM lm;
                    118:   GF2N gf;
                    119:   GFPN gfpn;
                    120:
                    121:   if ( !a )
                    122:     *b = 0;
                    123:   else
                    124:     switch ( NID(a) ) {
                    125:       case N_LM:
                    126:         simplm((LM)a,&lm); *b = (Num)lm; break;
                    127:       case N_GF2N:
                    128:         simpgf2n((GF2N)a,&gf); *b = (Num)gf; break;
                    129:       case N_GFPN:
                    130:         simpgfpn((GFPN)a,&gfpn); *b = (Num)gfpn; break;
                    131:       default:
                    132:         *b = a; break;
                    133:     }
                    134: }
                    135:
                    136: void uremp(P p1,P p2,P *rp)
                    137: {
                    138:   UP n1,n2,r;
                    139:
                    140:   if ( !p1 || NUM(p1) )
                    141:     *rp = p1;
                    142:   else {
                    143:     ptoup(p1,&n1); ptoup(p2,&n2);
                    144:     remup(n1,n2,&r);
                    145:     uptop(r,rp);
                    146:   }
                    147: }
                    148:
                    149: void ugcdp(P p1,P p2,P *rp)
                    150: {
                    151:   UP n1,n2,r;
                    152:
                    153:   ptoup(p1,&n1); ptoup(p2,&n2);
                    154:   gcdup(n1,n2,&r);
                    155:   uptop(r,rp);
                    156: }
                    157:
                    158: void reversep(P p1,Q d,P *rp)
                    159: {
                    160:   UP n1,r;
                    161:
                    162:   if ( !p1 )
                    163:     *rp = 0;
                    164:   else {
                    165:     ptoup(p1,&n1);
1.2     ! noro      166:     reverseup(n1,ZTOS(d),&r);
1.1       noro      167:     uptop(r,rp);
                    168:   }
                    169: }
                    170:
                    171: void invmodp(P p1,Q d,P *rp)
                    172: {
                    173:   UP n1,r;
                    174:
                    175:   if ( !p1 )
                    176:     error("invmodp : invalid input");
                    177:   else {
                    178:     ptoup(p1,&n1);
1.2     ! noro      179:     invmodup(n1,ZTOS(d),&r);
1.1       noro      180:     uptop(r,rp);
                    181:   }
                    182: }
                    183:
                    184: void addup(UP n1,UP n2,UP *nr)
                    185: {
                    186:   UP r,t;
                    187:   int i,d1,d2;
                    188:   Num *c,*c1,*c2;
                    189:
                    190:   if ( !n1 ) {
                    191:     *nr = n2;
                    192:   } else if ( !n2 ) {
                    193:     *nr = n1;
                    194:   } else {
                    195:     if ( n2->d > n1->d ) {
                    196:       t = n1; n1 = n2; n2 = t;
                    197:     }
                    198:     d1 = n1->d; d2 = n2->d;
                    199:     *nr = r = UPALLOC(d1);
                    200:     c1 = n1->c; c2 = n2->c; c = r->c;
                    201:     for ( i = 0; i <= d2; i++ )
                    202:       addnum(0,*c1++,*c2++,c++);
                    203:     for ( ; i <= d1; i++ )
                    204:       *c++ = *c1++;
                    205:     for ( i = d1, c = r->c; i >=0 && !c[i]; i-- );
                    206:     if ( i < 0 )
                    207:       *nr = 0;
                    208:     else
                    209:       r->d = i;
                    210:   }
                    211: }
                    212:
                    213: void subup(UP n1,UP n2,UP *nr)
                    214: {
                    215:   UP r;
                    216:   int i,d1,d2,d;
                    217:   Num *c,*c1,*c2;
                    218:
                    219:   if ( !n1 ) {
                    220:     chsgnup(n2,nr);
                    221:   } else if ( !n2 ) {
                    222:     *nr = n1;
                    223:   } else {
                    224:     d1 = n1->d; d2 = n2->d; d = MAX(d1,d2);
                    225:     *nr = r = UPALLOC(d);
                    226:     c1 = n1->c; c2 = n2->c; c = r->c;
                    227:     if ( d1 >= d2 ) {
                    228:       for ( i = 0; i <= d2; i++ )
                    229:         subnum(0,*c1++,*c2++,c++);
                    230:       for ( ; i <= d1; i++ )
                    231:         *c++ = *c1++;
                    232:     } else {
                    233:       for ( i = 0; i <= d1; i++ )
                    234:         subnum(0,*c1++,*c2++,c++);
                    235:       for ( ; i <= d2; i++ )
                    236:         chsgnnum(*c2++,c++);
                    237:     }
                    238:     for ( i = d, c = r->c; i >=0 && !c[i]; i-- );
                    239:     if ( i < 0 )
                    240:       *nr = 0;
                    241:     else
                    242:       r->d = i;
                    243:   }
                    244: }
                    245:
                    246: void chsgnup(UP n1,UP *nr)
                    247: {
                    248:   UP r;
                    249:   int d1,i;
                    250:   Num *c1,*c;
                    251:
                    252:   if ( !n1 ) {
                    253:     *nr = 0;
                    254:   } else {
                    255:     d1 = n1->d;
                    256:     *nr = r = UPALLOC(d1); r->d = d1;
                    257:     c1 = n1->c; c = r->c;
                    258:     for ( i = 0; i <= d1; i++ )
                    259:       chsgnnum(*c1++,c++);
                    260:   }
                    261: }
                    262:
                    263: void hybrid_mulup(int ff,UP n1,UP n2,UP *nr)
                    264: {
                    265:   if ( !n1 || !n2 )
                    266:     *nr = 0;
                    267:   else if ( MAX(n1->d,n2->d) < up_fft_mag )
                    268:     kmulup(n1,n2,nr);
                    269:   else
                    270:     switch ( ff ) {
                    271:       case FF_GFP:
                    272:         fft_mulup_lm(n1,n2,nr); break;
                    273:       case FF_GF2N:
                    274:         kmulup(n1,n2,nr); break;
                    275:       default:
                    276:         fft_mulup(n1,n2,nr); break;
                    277:     }
                    278: }
                    279:
                    280: void hybrid_squareup(int ff,UP n1,UP *nr)
                    281: {
                    282:   if ( !n1 )
                    283:     *nr = 0;
                    284:   else if ( n1->d < up_fft_mag )
                    285:     ksquareup(n1,nr);
                    286:   else
                    287:     switch ( ff ) {
                    288:       case FF_GFP:
                    289:         fft_squareup_lm(n1,nr); break;
                    290:       case FF_GF2N:
                    291:         ksquareup(n1,nr); break;
                    292:       default:
                    293:         fft_squareup(n1,nr); break;
                    294:     }
                    295: }
                    296:
                    297: void hybrid_tmulup(int ff,UP n1,UP n2,int d,UP *nr)
                    298: {
                    299:   if ( !n1 || !n2 )
                    300:     *nr = 0;
                    301:   else if ( MAX(n1->d,n2->d) < up_fft_mag )
                    302:     tkmulup(n1,n2,d,nr);
                    303:   else
                    304:     switch ( ff ) {
                    305:       case FF_GFP:
                    306:         trunc_fft_mulup_lm(n1,n2,d,nr); break;
                    307:       case FF_GF2N:
                    308:         tkmulup(n1,n2,d,nr); break;
                    309:       default:
                    310:         trunc_fft_mulup(n1,n2,d,nr); break;
                    311:     }
                    312: }
                    313:
                    314: void mulup(UP n1,UP n2,UP *nr)
                    315: {
                    316:   UP r;
                    317:   Num *pc1,*pc,*c1,*c2,*c;
                    318:   Num mul,t,s;
                    319:   int i,j,d1,d2;
                    320:
                    321:   if ( !n1 || !n2 )
                    322:     *nr = 0;
                    323:   else {
                    324:     d1 = n1->d; d2 = n2->d;
                    325:     *nr = r = UPALLOC(d1+d2); r->d = d1+d2;
                    326:     c1 = n1->c; c2 = n2->c; c = r->c;
                    327:     lm_lazy = 1;
                    328:     for ( i = 0; i <= d2; i++, c++ )
                    329:       if ( (mul = *c2++) != 0 )
                    330:         for ( j = 0, pc1 = c1, pc = c; j <= d1; j++ ) {
                    331:           mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
                    332:       }
                    333:     lm_lazy = 0;
                    334:     if ( !up_lazy )
                    335:       for ( i = 0, c = r->c; i <= r->d; i++ ) {
                    336:         simpnum(c[i],&t); c[i] = t;
                    337:       }
                    338:   }
                    339: }
                    340:
                    341: /* nr = c*n1 */
                    342:
                    343: void mulcup(Num c,UP n1,UP *nr)
                    344: {
                    345:   int d;
                    346:   UP r;
                    347:   Num t;
                    348:   Num *c1,*cr;
                    349:   int i;
                    350:
                    351:   if ( !c || !n1 )
                    352:     *nr = 0;
                    353:   else {
                    354:     d = n1->d;
                    355:     *nr = r = UPALLOC(d); r->d = d;
                    356:     c1 = n1->c; cr = r->c;
                    357:     lm_lazy = 1;
                    358:     for ( i = 0; i <= d; i++ )
                    359:       mulnum(0,c1[i],c,&cr[i]);
                    360:     lm_lazy = 0;
                    361:     if ( !up_lazy )
                    362:       for ( i = 0; i <= d; i++ ) {
                    363:         simpnum(cr[i],&t); cr[i] = t;
                    364:       }
                    365:   }
                    366: }
                    367:
                    368: void tmulup(UP n1,UP n2,int d,UP *nr)
                    369: {
                    370:   UP r;
                    371:   Num *pc1,*pc,*c1,*c2,*c;
                    372:   Num mul,t,s;
                    373:   int i,j,d1,d2,dr;
                    374:
                    375:   if ( !n1 || !n2 )
                    376:     *nr = 0;
                    377:   else {
                    378:     d1 = n1->d; d2 = n2->d;
                    379:     dr = MAX(d-1,d1+d2);
                    380:     *nr = r = UPALLOC(dr);
                    381:     c1 = n1->c; c2 = n2->c; c = r->c;
                    382:     lm_lazy = 1;
                    383:     for ( i = 0; i <= d2; i++, c++ )
                    384:       if ( ( mul = *c2++ ) != 0 )
                    385:         for ( j = 0, pc1 = c1, pc = c; (j <= d1) && (i+j < d); j++ ) {
                    386:           mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
                    387:       }
                    388:     lm_lazy = 0;
                    389:     c = r->c;
                    390:     if ( !up_lazy )
                    391:       for ( i = 0; i < d; i++ ) {
                    392:         simpnum(c[i],&t); c[i] = t;
                    393:       }
                    394:     for ( i = d-1; i >= 0 && !c[i]; i-- );
                    395:     if ( i < 0 )
                    396:       *nr = 0;
                    397:     else
                    398:       r->d = i;
                    399:   }
                    400: }
                    401:
                    402: void squareup(UP n1,UP *nr)
                    403: {
                    404:   UP r;
                    405:   Num *c1,*c;
                    406:   Num t,s,u;
                    407:   int i,j,h,d1,d;
                    408:
                    409:   if ( !n1 )
                    410:     *nr = 0;
                    411:   else if ( !n1->d ) {
                    412:     *nr = r = UPALLOC(0); r->d = 0;
                    413:     mulnum(0,n1->c[0],n1->c[0],&r->c[0]);
                    414:   } else {
                    415:     d1 = n1->d;
                    416:     d = 2*d1;
                    417:     *nr = r = UPALLOC(2*d); r->d = d;
                    418:     c1 = n1->c; c = r->c;
                    419:     lm_lazy = 1;
                    420:     for ( i = 0; i <= d1; i++ )
                    421:       mulnum(0,c1[i],c1[i],&c[2*i]);
                    422:     for ( j = 1; j < d; j++ ) {
                    423:       h = (j-1)/2;
                    424:       for ( i = MAX(j-d1,0), s = 0; i <= h; i++ ) {
                    425:         mulnum(0,c1[i],c1[j-i],&t); addnum(0,t,s,&u); s = u;
                    426:       }
                    427:       addnum(0,s,s,&t); addnum(0,t,c[j],&u); c[j] = u;
                    428:     }
                    429:     lm_lazy = 0;
                    430:     if ( !up_lazy )
                    431:       for ( i = 0, c = r->c; i <= d; i++ ) {
                    432:         simpnum(c[i],&t); c[i] = t;
                    433:       }
                    434:   }
                    435: }
                    436:
                    437: void remup(UP n1,UP n2,UP *nr)
                    438: {
                    439:   UP w,r;
                    440:
                    441:   if ( !n2 )
                    442:     error("remup : division by 0");
                    443:   else if ( !n1 || !n2->d )
                    444:     *nr = 0;
                    445:   else if ( n1->d < n2->d )
                    446:     *nr = n1;
                    447:   else {
                    448:     w = W_UPALLOC(n1->d); copyup(n1,w);
                    449:     remup_destructive(w,n2);
                    450:     if ( w->d < 0 )
                    451:       *nr = 0;
                    452:     else {
                    453:       *nr = r = UPALLOC(w->d); copyup(w,r);
                    454:     }
                    455:   }
                    456: }
                    457:
                    458: void remup_destructive(UP n1,UP n2)
                    459: {
                    460:   Num *c1,*c2;
                    461:   int d1,d2,i,j;
                    462:   Num m,t,s,mhc;
                    463:
                    464:   c1 = n1->c; c2 = n2->c;
                    465:   d1 = n1->d; d2 = n2->d;
                    466:   chsgnnum(c2[d2],&mhc);
                    467:   for ( i = d1; i >= d2; i-- ) {
                    468:     simpnum(c1[i],&t); c1[i] = t;
                    469:     if ( !c1[i] )
                    470:       continue;
                    471:     divnum(0,c1[i],mhc,&m);
                    472:     lm_lazy = 1;
                    473:     for ( j = d2; j >= 0; j-- ) {
                    474:       mulnum(0,c2[j],m,&t); addnum(0,c1[i+j-d2],t,&s);
                    475:       c1[i+j-d2] = s;
                    476:     }
                    477:     lm_lazy = 0;
                    478:   }
                    479:   for ( i = 0; i < d2; i++ ) {
                    480:     simpnum(c1[i],&t); c1[i] = t;
                    481:   }
                    482:   for ( i = d2-1; i >= 0 && !c1[i]; i-- );
                    483:   n1->d = i;
                    484: }
                    485:
                    486: void qrup(UP n1,UP n2,UP *nq,UP *nr)
                    487: {
                    488:   UP w,r,q;
                    489:   struct oUP t;
                    490:   Num m;
                    491:   int d;
                    492:
                    493:   if ( !n2 )
                    494:     error("qrup : division by 0");
                    495:   else if ( !n1 ) {
                    496:     *nq = 0; *nr = 0;
                    497:   } else if ( !n2->d )
                    498:     if ( !n1->d ) {
                    499:       divnum(0,n1->c[0],n2->c[0],(Num *)nq); *nr = 0;
                    500:     } else {
                    501:       divnum(0,(Num)ONE,n2->c[0],&m);
                    502:       t.d = 0; t.c[0] = m;
                    503:       mulup(n1,&t,nq); *nr = 0;
                    504:     }
                    505:   else if ( n1->d < n2->d ) {
                    506:     *nq = 0; *nr = n1;
                    507:   } else {
                    508:     w = W_UPALLOC(n1->d); copyup(n1,w);
                    509:     qrup_destructive(w,n2);
                    510:     d = n1->d-n2->d;
                    511:     *nq = q = UPALLOC(d); q->d = d;
                    512:     bcopy(w->c+n2->d,q->c,(d+1)*sizeof(Num));
                    513:     if ( w->d < 0 )
                    514:       *nr = 0;
                    515:     else {
                    516:       *nr = r = UPALLOC(w->d); copyup(w,r);
                    517:     }
                    518:   }
                    519: }
                    520:
                    521: void qrup_destructive(UP n1,UP n2)
                    522: {
                    523:   Num *c1,*c2;
                    524:   int d1,d2,i,j;
                    525:   Num q,t,s,hc;
                    526:
                    527:   c1 = n1->c; c2 = n2->c;
                    528:   d1 = n1->d; d2 = n2->d;
                    529:   hc = c2[d2];
                    530:   for ( i = d1; i >= d2; i-- ) {
                    531:     simpnum(c1[i],&t); c1[i] = t;
                    532:     if ( c1[i] ) {
                    533:       divnum(0,c1[i],hc,&q);
                    534:       lm_lazy = 1;
                    535:       for ( j = d2; j >= 0; j-- ) {
                    536:         mulnum(0,c2[j],q,&t); subnum(0,c1[i+j-d2],t,&s);
                    537:         c1[i+j-d2] = s;
                    538:       }
                    539:       lm_lazy = 0;
                    540:     } else
                    541:       q = 0;
                    542:     c1[i] = q;
                    543:   }
                    544:   for ( i = 0; i < d2; i++ ) {
                    545:     simpnum(c1[i],&t); c1[i] = t;
                    546:   }
                    547:   for ( i = d2-1; i >= 0 && !c1[i]; i-- );
                    548:   n1->d = i;
                    549: }
                    550:
                    551: void gcdup(UP n1,UP n2,UP *nr)
                    552: {
                    553:   UP w1,w2,t,r;
                    554:   int d1,d2;
                    555:
                    556:   if ( !n1 )
                    557:     *nr = n2;
                    558:   else if ( !n2 )
                    559:     *nr = n1;
                    560:   else if ( !n1->d || !n2->d ) {
                    561:     *nr = r = UPALLOC(0); r->d = 0;
                    562:     divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]);
                    563:   } else {
                    564:     d1 = n1->d; d2 = n2->d;
                    565:     if ( d2 > d1 ) {
                    566:       w1 = W_UPALLOC(d2); copyup(n2,w1);
                    567:       w2 = W_UPALLOC(d1); copyup(n1,w2);
                    568:     } else {
                    569:       w1 = W_UPALLOC(d1); copyup(n1,w1);
                    570:       w2 = W_UPALLOC(d2); copyup(n2,w2);
                    571:     }
                    572:     d1 = w1->d; d2 = w2->d;
                    573:     while ( 1 ) {
                    574:       remup_destructive(w1,w2);
                    575:       if ( w1->d < 0 ) {
                    576:         *nr = r = UPALLOC(w2->d); copyup(w2,r); return;
                    577:       } else if ( !w1->d ) {
                    578:         *nr = r = UPALLOC(0); r->d = 0;
                    579:         divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]); return;
                    580:       } else {
                    581:         t = w1; w1 = w2; w2 = t;
                    582:       }
                    583:     }
                    584:   }
                    585: }
                    586:
                    587: /* compute r s.t. a*r = 1 mod m */
                    588:
                    589: void extended_gcdup(UP a,UP m,UP *r)
                    590: {
                    591:   UP one,g1,g2,a1,a2,a3,b1,b2,b3,inv,quo,rem,t;
                    592:   Num i;
                    593:
                    594:   one = UPALLOC(0); one->d = 0; one->c[0] = (Num)ONELM;
                    595:   g1 = m; g2 = a;
                    596:   a1 = one; a2 = 0;
                    597:   b1 = 0; b2 = one;
                    598:   /* a2*m+b2*a = g2 always holds */
                    599:   while ( g2->d ) {
                    600:     qrup(g1,g2,&quo,&rem); g1 = g2; g2 = rem;
                    601:     mulup(a2,quo,&t); subup(a1,t,&a3); a1 = a2; a2 = a3;
                    602:     mulup(b2,quo,&t); subup(b1,t,&b3); b1 = b2; b2 = b3;
                    603:   }
                    604:   /* now a2*m+b2*a = g2 (const) */
                    605:   divnum(0,(Num)ONE,g2->c[0],&i);
                    606:   inv = UPALLOC(0); inv->d = 0; inv->c[0] = i;
                    607:   mulup(b2,inv,r);
                    608: }
                    609:
                    610: void reverseup(UP n1,int d,UP *nr)
                    611: {
                    612:   Num *c1,*c;
                    613:   int i,d1;
                    614:   UP r;
                    615:
                    616:   if ( !n1 )
                    617:     *nr = 0;
                    618:   else if ( d < (d1 = n1->d) )
                    619:     error("reverseup : invalid input");
                    620:   else {
                    621:     *nr = r = UPALLOC(d);
                    622:     c = r->c;
                    623:     bzero((char *)c,(d+1)*sizeof(Num));
                    624:     c1 = n1->c;
                    625:     for ( i = 0; i <= d1; i++ )
                    626:       c[d-i] = c1[i];
                    627:     for ( i = d; i >= 0 && !c[i]; i-- );
                    628:     r->d = i;
                    629:     if ( i < 0 )
                    630:       *nr = 0;
                    631:   }
                    632: }
                    633:
                    634: void invmodup(UP n1,int d,UP *nr)
                    635: {
                    636:   UP r;
                    637:   Num s,t,u,hinv;
                    638:   int i,j,dmin;
                    639:   Num *w,*c,*cr;
                    640:
                    641:   if ( !n1 || !n1->c[0] )
                    642:     error("invmodup : invalid input");
                    643:   else if ( !n1->d ) {
                    644:     *nr = r = UPALLOC(0); r->d = 0;
                    645:     divnum(0,(Num)ONE,n1->c[0],&r->c[0]);
                    646:   } else {
                    647:     *nr = r = UPALLOC(d);
                    648:     w = (Num *)ALLOCA((d+1)*sizeof(Q));
                    649:     bzero((char *)w,(d+1)*sizeof(Q));
                    650:     dmin = MIN(d,n1->d);
                    651:     divnum(0,(Num)ONE,n1->c[0],&hinv);
                    652:     for ( i = 0, c = n1->c; i <= dmin; i++ )
                    653:       mulnum(0,c[i],hinv,&w[i]);
                    654:     cr = r->c;
                    655:     cr[0] = w[0];
                    656:     for ( i = 1; i <= d; i++ ) {
                    657:       for ( j = 1, s = w[i]; j < i; j++ ) {
                    658:         mulnum(0,cr[j],w[i-j],&t);
                    659:         addnum(0,s,t,&u);
                    660:         s = u;
                    661:       }
                    662:       chsgnnum(s,&cr[i]);
                    663:     }
                    664:     for ( i = 0; i <= d; i++ ) {
                    665:       mulnum(0,cr[i],hinv,&t); cr[i] = t;
                    666:     }
                    667:     for ( i = d; i >= 0 && !cr[i]; i-- );
                    668:     r->d = i;
                    669:   }
                    670: }
                    671:
                    672: void pwrup(UP n,Z e,UP *nr)
                    673: {
                    674:   UP y,z,t;
                    675:   Z q,r,two;
                    676:
                    677:   y = UPALLOC(0); y->d = 0; y->c[0] = (Num)ONE;
                    678:   z = n;
                    679:   if ( !e )
                    680:     *nr = y;
                    681:   else if ( UNIQ(e) )
                    682:     *nr = n;
                    683:   else {
1.2     ! noro      684:     STOZ(2,two);
1.1       noro      685:     while ( 1 ) {
                    686:       divqrz(e,two,&q,&r); e = q;
                    687:       if ( r ) {
                    688:         mulup(z,y,&t); y = t;
                    689:         if ( !e ) {
                    690:           *nr = y;
                    691:           return;
                    692:         }
                    693:       }
                    694:       mulup(z,z,&t); z = t;
                    695:     }
                    696:   }
                    697: }
                    698:
                    699: int compup(UP n1,UP n2)
                    700: {
                    701:   int i,r;
                    702:
                    703:   if ( !n1 )
                    704:     return n2 ? -1 : 0;
                    705:   else if ( !n2 )
                    706:     return 1;
                    707:   else if ( n1->d > n2->d )
                    708:     return 1;
                    709:   else if ( n1->d < n2->d )
                    710:     return -1;
                    711:   else {
                    712:     for ( i = n1->d; i >= 0; i-- ) {
                    713:       r = compnum(CO,n1->c[i],n2->c[i]);
                    714:       if ( r )
                    715:         return r;
                    716:     }
                    717:     return 0;
                    718:   }
                    719: }
                    720:
                    721: void kmulp(VL vl,P n1,P n2,P *nr)
                    722: {
                    723:   UP b1,b2,br;
                    724:
                    725:   if ( !n1 || !n2 )
                    726:     *nr = 0;
                    727:   else if ( OID(n1) == O_N || OID(n2) == O_N )
                    728:     mulp(vl,n1,n2,nr);
                    729:   else {
                    730:     ptoup(n1,&b1); ptoup(n2,&b2);
                    731:     kmulup(b1,b2,&br);
                    732:     uptop(br,nr);
                    733:   }
                    734: }
                    735:
                    736: void ksquarep(VL vl,P n1,P *nr)
                    737: {
                    738:   UP b1,br;
                    739:
                    740:   if ( !n1 )
                    741:     *nr = 0;
                    742:   else if ( OID(n1) == O_N )
                    743:     mulp(vl,n1,n1,nr);
                    744:   else {
                    745:     ptoup(n1,&b1);
                    746:     ksquareup(b1,&br);
                    747:     uptop(br,nr);
                    748:   }
                    749: }
                    750:
                    751: void kmulup(UP n1,UP n2,UP *nr)
                    752: {
                    753:   int d1,d2;
                    754:
                    755:   if ( !n1 || !n2 ) {
                    756:     *nr = 0; return;
                    757:   }
                    758:   d1 = DEG(n1)+1; d2 = DEG(n2)+1;
                    759:   if ( (d1 < up_kara_mag) && (d2 < up_kara_mag) )
                    760:     mulup(n1,n2,nr);
                    761:   else
                    762:     kmulupmain(n1,n2,nr);
                    763: }
                    764:
                    765: void ksquareup(UP n1,UP *nr)
                    766: {
                    767:   int d1;
                    768:
                    769:   if ( !n1 ) {
                    770:     *nr = 0; return;
                    771:   }
                    772:   d1 = DEG(n1)+1;
                    773:   if ( (d1 < up_kara_mag) )
                    774:     squareup(n1,nr);
                    775:   else
                    776:     ksquareupmain(n1,nr);
                    777: }
                    778:
                    779: void copyup(UP n1,UP n2)
                    780: {
                    781:   n2->d = n1->d;
                    782:   bcopy((char *)n1->c,(char *)n2->c,(n1->d+1)*sizeof(Q));
                    783: }
                    784:
                    785: void c_copyup(UP n,int len,pointer *p)
                    786: {
                    787:   if ( n )
                    788:     bcopy((char *)COEF(n),(char *)p,MIN((DEG(n)+1),len)*sizeof(pointer));
                    789: }
                    790:
                    791: void kmulupmain(UP n1,UP n2,UP *nr)
                    792: {
                    793:   int d1,d2,h;
                    794:   UP n1lo,n1hi,n2lo,n2hi,hi,lo,mid1,mid2,mid,s1,s2,t1,t2;
                    795:
                    796:   d1 = DEG(n1)+1; d2 = DEG(n2)+1; h = (MAX(d1,d2)+1)/2;
                    797:   decompup(n1,h,&n1lo,&n1hi);
                    798:   decompup(n2,h,&n2lo,&n2hi);
                    799:   kmulup(n1lo,n2lo,&lo);
                    800:   kmulup(n1hi,n2hi,&hi);
                    801:   shiftup(hi,2*h,&t2);
                    802:   addup(lo,t2,&t1);
                    803:   addup(hi,lo,&mid1);
                    804:   subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2); kmulup(s1,s2,&mid2);
                    805:   addup(mid1,mid2,&mid);
                    806:   shiftup(mid,h,&t2);
                    807:   addup(t1,t2,nr);
                    808: }
                    809:
                    810: void ksquareupmain(UP n1,UP *nr)
                    811: {
                    812:   int d1,h;
                    813:   UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
                    814:
                    815:   d1 = DEG(n1)+1; h = (d1+1)/2;
                    816:   decompup(n1,h,&n1lo,&n1hi);
                    817:   ksquareup(n1hi,&hi); ksquareup(n1lo,&lo);
                    818:   shiftup(hi,2*h,&t2);
                    819:   addup(lo,t2,&t1);
                    820:   addup(hi,lo,&mid1);
                    821:   subup(n1hi,n1lo,&s1); ksquareup(s1,&mid2);
                    822:   subup(mid1,mid2,&mid);
                    823:   shiftup(mid,h,&t2);
                    824:   addup(t1,t2,nr);
                    825: }
                    826:
                    827: void rembymulup(UP n1,UP n2,UP *nr)
                    828: {
                    829:   int d1,d2,d;
                    830:   UP r1,r2,inv2,t,s,q;
                    831:
                    832:   if ( !n2 )
                    833:     error("rembymulup : division by 0");
                    834:   else if ( !n1 || !n2->d )
                    835:     *nr = 0;
                    836:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    837:     *nr = n1;
                    838:   else {
                    839:     d = d1-d2;
                    840:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    841:     reverseup(n2,d2,&r2);
                    842:     invmodup(r2,d,&inv2);
                    843:     kmulup(r1,inv2,&t);
                    844:     truncup(t,d+1,&s);
                    845:     reverseup(s,d,&q);
                    846:     kmulup(q,n2,&t); subup(n1,t,nr);
                    847:   }
                    848: }
                    849:
                    850: /*
                    851:   deg n2 = d
                    852:   deg n1 <= 2*d-1
                    853:   inv2 = inverse of reversep(n2) mod x^d
                    854: */
                    855:
                    856: void hybrid_rembymulup_special(int ff,UP n1,UP n2,UP inv2,UP *nr)
                    857: {
                    858:   int d1,d2,d;
                    859:   UP r1,t,s,q;
                    860:
                    861:   if ( !n2 )
                    862:     error("hybrid_rembymulup : division by 0");
                    863:   else if ( !n1 || !n2->d )
                    864:     *nr = 0;
                    865:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    866:     *nr = n1;
                    867:   else {
                    868:     d = d1-d2;
                    869:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    870:     hybrid_tmulup(ff,r1,inv2,d+1,&t);
                    871:     truncup(t,d+1,&s);
                    872:     reverseup(s,d,&q);
                    873:
                    874:     hybrid_tmulup(ff,q,n2,d2,&t);
                    875:     truncup(n1,d2,&s);
                    876:     subup(s,t,nr);
                    877:   }
                    878: }
                    879:
                    880: void rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
                    881: {
                    882:   int d1,d2,d;
                    883:   UP r1,t,s,q;
                    884:
                    885:   if ( !n2 )
                    886:     error("rembymulup : division by 0");
                    887:   else if ( !n1 || !n2->d )
                    888:     *nr = 0;
                    889:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    890:     *nr = n1;
                    891:   else {
                    892:     d = d1-d2;
                    893:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    894:     tkmulup(r1,inv2,d+1,&t);
                    895:     truncup(t,d+1,&s);
                    896:     reverseup(s,d,&q);
                    897:
                    898:     tkmulup(q,n2,d2,&t);
                    899:     truncup(n1,d2,&s);
                    900:     subup(s,t,nr);
                    901:   }
                    902: }
                    903:
                    904: /* *nr = n1*n2 mod x^d */
                    905:
                    906: void tkmulup(UP n1,UP n2,int d,UP *nr)
                    907: {
                    908:   int m;
                    909:   UP n1l,n1h,n2l,n2h,l,h,t,s,u;
                    910:
                    911:   if ( d < 0 )
                    912:     error("tkmulup : invalid argument");
                    913:   else if ( d == 0 )
                    914:     *nr = 0;
                    915:   else {
                    916:     truncup(n1,d,&t); n1 = t;
                    917:     truncup(n2,d,&t); n2 = t;
                    918:     if ( !n1 || !n2 )
                    919:       *nr = 0;
                    920:     else if ( n1->d < up_tkara_mag || n2->d < up_tkara_mag )
                    921:       tmulup(n1,n2,d,nr);
                    922:     else {
                    923:       m = (d+1)/2;
                    924:       decompup(n1,m,&n1l,&n1h);
                    925:       decompup(n2,m,&n2l,&n2h);
                    926:       kmulup(n1l,n2l,&l);
                    927:       tkmulup(n1l,n2h,d-m,&t);
                    928:       tkmulup(n2l,n1h,d-m,&s);
                    929:       addup(t,s,&u);
                    930:       shiftup(u,m,&h);
                    931:       addup(l,h,&t);
                    932:       truncup(t,d,nr);
                    933:     }
                    934:   }
                    935: }
                    936:
                    937: /* n->n*x^d */
                    938:
                    939: void shiftup(UP n,int d,UP *nr)
                    940: {
                    941:   int dr;
                    942:   UP r;
                    943:
                    944:   if ( !n )
                    945:     *nr = 0;
                    946:   else {
                    947:     dr = n->d+d;
                    948:     *nr = r = UPALLOC(dr); r->d = dr;
                    949:     bzero(r->c,(dr+1)*sizeof(Num));
                    950:     bcopy(n->c,r->c+d,(n->d+1)*sizeof(Num));
                    951:   }
                    952: }
                    953:
                    954: void fft_rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
                    955: {
                    956:   int d1,d2,d;
                    957:   UP r1,t,s,q,u;
                    958:
                    959:   if ( !n2 )
                    960:     error("rembymulup : division by 0");
                    961:   else if ( !n1 || !n2->d )
                    962:     *nr = 0;
                    963:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    964:     *nr = n1;
                    965:   else {
                    966:     d = d1-d2;
                    967:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    968:     trunc_fft_mulup(r1,inv2,d+1,&t);
                    969:     truncup(t,d+1,&s);
                    970:     reverseup(s,d,&q);
                    971:     trunc_fft_mulup(q,n2,d2,&t); truncup(t,d2,&u);
                    972:     truncup(n1,d2,&s);
                    973:     subup(s,u,nr);
                    974:   }
                    975: }
                    976:
                    977: void set_degreeup(UP n,int d)
                    978: {
                    979:   int i;
                    980:
                    981:   for ( i = d; i >= 0 && !n->c[i]; i-- );
                    982:   n->d = i;
                    983: }
                    984:
                    985: /* n -> n0 + x^d n1 */
                    986:
                    987: void decompup(UP n,int d,UP *n0,UP *n1)
                    988: {
                    989:   int dn;
                    990:   UP r0,r1;
                    991:
                    992:   if ( !n || d > n->d ) {
                    993:     *n0 = n; *n1 = 0;
                    994:   } else if ( d < 0 )
                    995:     error("decompup : invalid argument");
                    996:   else if ( d == 0 ) {
                    997:     *n0 = 0; *n1 = n;
                    998:   } else {
                    999:     dn = n->d;
                   1000:     *n0 = r0 = UPALLOC(d-1);
                   1001:     *n1 = r1 = UPALLOC(dn-d);
                   1002:     bcopy(n->c,r0->c,d*sizeof(Num));
                   1003:     bcopy(n->c+d,r1->c,(dn-d+1)*sizeof(Num));
                   1004:     set_degreeup(r0,d-1);
                   1005:     if ( r0->d < 0 )
                   1006:       *n0 = 0;
                   1007:     set_degreeup(r1,dn-d);
                   1008:     if ( r1->d < 0 )
                   1009:       *n1 = 0;
                   1010:   }
                   1011: }
                   1012:
                   1013: /* n -> n mod x^d */
                   1014:
                   1015: void truncup(UP n1,int d,UP *nr)
                   1016: {
                   1017:   int i;
                   1018:   UP r;
                   1019:
                   1020:   if ( !n1 || d > n1->d )
                   1021:     *nr = n1;
                   1022:   else if ( d < 0 )
                   1023:     error("truncup : invalid argument");
                   1024:   else if ( d == 0 )
                   1025:     *nr = 0;
                   1026:   else {
                   1027:      *nr = r = UPALLOC(d-1);
                   1028:     bcopy(n1->c,r->c,d*sizeof(Num));
                   1029:     for ( i = d-1; i >= 0 && !r->c[i]; i-- );
                   1030:     if ( i < 0 )
                   1031:       *nr = 0;
                   1032:     else
                   1033:       r->d = i;
                   1034:   }
                   1035: }
                   1036:
                   1037: int int_bits(int t)
                   1038: {
                   1039:   int k;
                   1040:
                   1041:   for ( k = 0; t; t>>=1, k++);
                   1042:   return k;
                   1043: }
                   1044:
                   1045: /* n is assumed to be LM or integer coefficient */
                   1046:
                   1047: int maxblenup(UP n)
                   1048: {
                   1049:   int m,r,i,d;
                   1050:   Num *c;
                   1051:
                   1052:   if ( !n )
                   1053:     return 0;
                   1054:   d = n->d; c = (Num *)n->c;
                   1055:   for ( r = 0, i = 0; i <= d; i++ )
                   1056:     if ( c[i] ) {
                   1057:       switch ( NID(c[i]) ) {
                   1058:         case N_LM:
                   1059:           m = mpz_sizeinbase(BDY((LM)c[i]),2);
                   1060:           break;
                   1061:         case N_Q:
                   1062:           m = mpz_sizeinbase(BDY((Z)c[i]),2);
                   1063:           break;
                   1064:         default:
                   1065:           error("maxblenup : invalid coefficient");
                   1066:       }
                   1067:       r = MAX(m,r);
                   1068:     }
                   1069:   return r;
                   1070: }
                   1071:
                   1072: /* YYY */
                   1073:
                   1074: void uptofmarray(int mod,UP n,ModNum *f)
                   1075: {
                   1076:   int d,i;
                   1077:   unsigned int r;
                   1078:   Num *c;
                   1079:   Z z;
                   1080:
                   1081:   if ( !n )
                   1082:     return;
                   1083:   else {
                   1084:     d = n->d; c = (Num *)n->c;
                   1085:     for ( i = 0; i <= d; i++ ) {
                   1086:       if ( c[i] ) {
                   1087:         switch ( NID(c[i]) ) {
                   1088:           case N_LM:
                   1089:             MPZTOZ(BDY((LM)c[i]),z);
                   1090:             f[i] = remqi((Q)z,mod);
                   1091:             break;
                   1092:           case N_Q:
                   1093:             f[i] = remqi((Q)c[i],mod);
                   1094:             break;
                   1095:           default:
                   1096:             error("uptofmarray : invalid coefficient");
                   1097:         }
                   1098:       } else
                   1099:         f[i] = 0;
                   1100:     }
                   1101:   }
                   1102: }
                   1103:
                   1104: void fmarraytoup(ModNum *f,int d,UP *nr)
                   1105: {
                   1106:   int i;
                   1107:   Z *c;
                   1108:   UP r;
                   1109:
                   1110:   if ( d < 0 ) {
                   1111:     *nr = 0;
                   1112:   } else {
                   1113:     *nr = r = UPALLOC(d); c = (Z *)r->c;
                   1114:     for ( i = 0; i <= d; i++ ) {
1.2     ! noro     1115:       UTOZ((unsigned int)f[i],c[i]);
1.1       noro     1116:     }
                   1117:     for ( i = d; i >= 0 && !c[i]; i-- );
                   1118:     if ( i < 0 )
                   1119:       *nr = 0;
                   1120:     else
                   1121:       r->d = i;
                   1122:   }
                   1123: }
                   1124:
                   1125: /* f[i]: an array of length n */
                   1126:
                   1127: void uiarraytoup(unsigned int **f,int n,int d,UP *nr)
                   1128: {
                   1129:   int i,j;
                   1130:   Z *c;
                   1131:   UP r;
                   1132:
                   1133:   if ( d < 0 ) {
                   1134:     *nr = 0;
                   1135:   } else {
                   1136:     *nr = r = UPALLOC(d); c = (Z *)r->c;
                   1137:     for ( i = 0; i <= d; i++ )
                   1138:       intarraytoz(n,f[i],&c[i]);
                   1139:     for ( i = d; i >= 0 && !c[i]; i-- );
                   1140:     if ( i < 0 )
                   1141:       *nr = 0;
                   1142:     else
                   1143:       r->d = i;
                   1144:   }
                   1145: }
                   1146:
                   1147: void adj_coefup(UP n,Z m,Z m2,UP *nr)
                   1148: {
                   1149:   int d;
                   1150:   Z *c,*cr;
                   1151:   Z mq;
                   1152:   int i;
                   1153:   UP r;
                   1154:
                   1155:   if ( !n )
                   1156:     *nr = 0;
                   1157:   else {
                   1158:     d = n->d; c = (Z *)n->c;
                   1159:     *nr = r = UPALLOC(d); cr = (Z *)r->c;
                   1160:     for ( i = 0; i <= d; i++ ) {
                   1161:       if ( !c[i] )
                   1162:         continue;
                   1163:       if ( cmpz(c[i],m2) > 0 )
                   1164:         subz(c[i],m,&cr[i]);
                   1165:       else
                   1166:         cr[i] = c[i];
                   1167:     }
                   1168:     for ( i = d; i >= 0 && !cr[i]; i-- );
                   1169:     if ( i < 0 )
                   1170:       *nr = 0;
                   1171:     else
                   1172:       r->d = i;
                   1173:   }
                   1174: }
                   1175:
                   1176: /* n is assumed to have positive integer coefficients. */
                   1177:
                   1178: void remcup(UP n,Z mod,UP *nr)
                   1179: {
                   1180:   int i,d;
                   1181:   Z *c,*cr;
                   1182:   UP r;
                   1183:   Z t;
                   1184:
                   1185:   if ( !n )
                   1186:     *nr = 0;
                   1187:   else {
                   1188:     d = n->d; c = (Z *)n->c;
                   1189:     *nr = r = UPALLOC(d); cr = (Z *)r->c;
                   1190:     for ( i = 0; i <= d; i++ )
                   1191:       remz(c[i],mod,&cr[i]);
                   1192:     for ( i = d; i >= 0 && !cr[i]; i-- );
                   1193:     if ( i < 0 )
                   1194:       *nr = 0;
                   1195:     else
                   1196:       r->d = i;
                   1197:   }
                   1198: }
                   1199:
                   1200: void fft_mulup_main(UP,UP,int,UP *);
                   1201:
                   1202: void fft_mulup(UP n1,UP n2,UP *nr)
                   1203: {
                   1204:   int d1,d2,d,b1,b2,h;
                   1205:   UP n1lo,n1hi,n2lo,n2hi,lo,hi,t1,t2,mid1,mid2,mid,s1,s2;
                   1206:
                   1207:   if ( !n1 || !n2 )
                   1208:     *nr = 0;
                   1209:   else {
                   1210:     d1 = n1->d; d2 = n2->d;
                   1211:     if ( MAX(d1,d2) < up_fft_mag )
                   1212:       kmulup(n1,n2,nr);
                   1213:     else {
                   1214:       b1 = maxblenup(n1); b2 = maxblenup(n2);
                   1215:       if ( fft_available(d1,b1,d2,b2) )
                   1216:         fft_mulup_main(n1,n2,0,nr);
                   1217:       else {
                   1218:         d = MAX(d1,d2)+1; h = (d+1)/2;
                   1219:         decompup(n1,h,&n1lo,&n1hi);
                   1220:         decompup(n2,h,&n2lo,&n2hi);
                   1221:         fft_mulup(n1lo,n2lo,&lo);
                   1222:         fft_mulup(n1hi,n2hi,&hi);
                   1223:         shiftup(hi,2*h,&t2);
                   1224:         addup(lo,t2,&t1);
                   1225:         addup(hi,lo,&mid1);
                   1226:         subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2);
                   1227:         fft_mulup(s1,s2,&mid2);
                   1228:         addup(mid1,mid2,&mid);
                   1229:         shiftup(mid,h,&t2);
                   1230:         addup(t1,t2,nr);
                   1231:       }
                   1232:     }
                   1233:   }
                   1234: }
                   1235:
                   1236: void trunc_fft_mulup(UP n1,UP n2,int dbd,UP *nr)
                   1237: {
                   1238:   int d1,d2,b1,b2,m;
                   1239:   UP n1l,n1h,n2l,n2h,l,h,t,s,u;
                   1240:
                   1241:   if ( !n1 || !n2 )
                   1242:     *nr = 0;
                   1243:   else if ( dbd < 0 )
                   1244:     error("trunc_fft_mulup : invalid argument");
                   1245:   else if ( dbd == 0 )
                   1246:     *nr = 0;
                   1247:   else {
                   1248:     truncup(n1,dbd,&t); n1 = t;
                   1249:     truncup(n2,dbd,&t); n2 = t;
                   1250:     d1 = n1->d; d2 = n2->d;
                   1251:     if ( MAX(d1,d2) < up_fft_mag )
                   1252:       tkmulup(n1,n2,dbd,nr);
                   1253:     else {
                   1254:       b1 = maxblenup(n1); b2 = maxblenup(n2);
                   1255:       if ( fft_available(d1,b1,d2,b2) )
                   1256:         fft_mulup_main(n1,n2,dbd,nr);
                   1257:       else {
                   1258:         m = (dbd+1)/2;
                   1259:         decompup(n1,m,&n1l,&n1h);
                   1260:         decompup(n2,m,&n2l,&n2h);
                   1261:         fft_mulup(n1l,n2l,&l);
                   1262:         trunc_fft_mulup(n1l,n2h,dbd-m,&t);
                   1263:         trunc_fft_mulup(n2l,n1h,dbd-m,&s);
                   1264:         addup(t,s,&u);
                   1265:         shiftup(u,m,&h);
                   1266:         addup(l,h,&t);
                   1267:         truncup(t,dbd,nr);
                   1268:       }
                   1269:     }
                   1270:   }
                   1271: }
                   1272:
                   1273: void fft_squareup(UP n1,UP *nr)
                   1274: {
                   1275:   int d1,d,h,b1;
                   1276:   UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
                   1277:
                   1278:   if ( !n1 )
                   1279:     *nr = 0;
                   1280:   else {
                   1281:     d1 = n1->d;
                   1282:     if ( d1 < up_fft_mag )
                   1283:       ksquareup(n1,nr);
                   1284:     else {
                   1285:       b1 = maxblenup(n1);
                   1286:       if ( fft_available(d1,b1,d1,b1) )
                   1287:         fft_mulup_main(n1,n1,0,nr);
                   1288:       else {
                   1289:         d = d1+1; h = (d1+1)/2;
                   1290:         decompup(n1,h,&n1lo,&n1hi);
                   1291:         fft_squareup(n1hi,&hi);
                   1292:         fft_squareup(n1lo,&lo);
                   1293:         shiftup(hi,2*h,&t2);
                   1294:         addup(lo,t2,&t1);
                   1295:         addup(hi,lo,&mid1);
                   1296:         subup(n1hi,n1lo,&s1);
                   1297:         fft_squareup(s1,&mid2);
                   1298:         subup(mid1,mid2,&mid);
                   1299:         shiftup(mid,h,&t2);
                   1300:         addup(t1,t2,nr);
                   1301:       }
                   1302:     }
                   1303:   }
                   1304: }
                   1305:
                   1306: /*
                   1307:  * dbd == 0 => n1 * n2
                   1308:  * dbd > 0  => n1 * n2 mod x^dbd
                   1309:  * n1 == n2 => squaring
                   1310:  */
                   1311:
                   1312: void fft_mulup_main(UP n1,UP n2,int dbd,UP *nr)
                   1313: {
                   1314:   ModNum *f1,*f2,*w,*fr;
                   1315:   ModNum **frarray,**fa;
                   1316:   int *modarray,*ma;
                   1317:   int modarray_len;
                   1318:   int frarray_index = 0;
                   1319:   Z m,m1,m2,two,rem;
                   1320:   int d1,d2,dmin,i,mod,root,d,cond,bound;
                   1321:   UP r;
                   1322:
                   1323:   if ( !n1 || !n2 ) {
                   1324:     *nr = 0; return;
                   1325:   }
                   1326:   d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
                   1327:   if ( !d1 || !d2 ) {
                   1328:     mulup(n1,n2,nr); return;
                   1329:   }
                   1330:   m = ONE;
                   1331:   bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
                   1332:   f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1333:   if ( n1 == n2 )
                   1334:     f2 = 0;
                   1335:   else
                   1336:     f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1337:   w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
                   1338:   modarray_len = 1024;
                   1339:   modarray = (int *)MALLOC_ATOMIC(modarray_len*sizeof(unsigned int));
                   1340:   frarray = (ModNum **)MALLOC(modarray_len*sizeof(ModNum *));
                   1341:   for ( i = 0; i < NPrimes; i++ ) {
                   1342:     FFT_primes(i,&mod,&root,&d);
                   1343:     if ( (1<<d) < d1+d2+1 )
                   1344:       continue;
                   1345:     if ( frarray_index == modarray_len ) {
                   1346:       ma = (int *)MALLOC_ATOMIC(2*modarray_len*sizeof(unsigned int));
                   1347:       bcopy(modarray,ma,modarray_len*sizeof(unsigned int));
                   1348:       modarray = ma;
                   1349:       fa = (ModNum **)MALLOC(2*modarray_len*sizeof(ModNum *));
                   1350:       bcopy(frarray,fa,modarray_len*sizeof(ModNum *));
                   1351:       frarray = fa;
                   1352:       modarray_len *= 2;
                   1353:     }
                   1354:     modarray[frarray_index] = mod;
                   1355:     frarray[frarray_index++] = fr
                   1356:       = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1357:     uptofmarray(mod,n1,f1);
                   1358:     if ( !f2 )
                   1359:       cond = FFT_pol_square(d1,f1,fr,i,w);
                   1360:     else {
                   1361:       uptofmarray(mod,n2,f2);
                   1362:       cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
                   1363:     }
                   1364:     if ( cond )
                   1365:       error("fft_mulup : error in FFT_pol_product");
1.2     ! noro     1366:     STOZ(mod,m1); mulz(m,m1,&m2); m = m2;
1.1       noro     1367:     if ( z_bits((Q)m) > bound ) {
                   1368:       if ( !dbd )
                   1369:         dbd = d1+d2+1;
                   1370:       crup(frarray,MIN(d1+d2,dbd-1),modarray,frarray_index,m,&r);
1.2     ! noro     1371:       STOZ(2,two);
1.1       noro     1372:       divqrz(m,two,&m2,&rem);
                   1373:       adj_coefup(r,m,m2,nr);
                   1374:       return;
                   1375:     }
                   1376:   }
                   1377:   error("fft_mulup : FFT_primes exhausted");
                   1378: }
                   1379:
                   1380: /* improved version */
                   1381:
                   1382: void crup(ModNum **f,int d,int *mod,int index,Z m,UP *r)
                   1383: {
                   1384:   mpz_t cof,c,rem;
                   1385:   mpz_t *s;
                   1386:   UP u;
                   1387:   int inv,i,j,t;
                   1388:
                   1389:   mpz_init(c); mpz_init(cof); mpz_init(rem);
                   1390:   s = (mpz_t *)MALLOC((d+1)*sizeof(mpz_t));
                   1391:   for ( j = 0; j <= d; j++ )
                   1392:     mpz_init_set_ui(s[j],0);
                   1393:   for ( i = 0; i < index; i++ ) {
                   1394:     mpz_fdiv_q_ui(cof,BDY(m),mod[i]);
                   1395:     t = mpz_fdiv_r_ui(rem,cof,mod[i]);
                   1396:     inv = invm(t,mod[i]);
                   1397:     mpz_mul_ui(c,cof,inv);
                   1398:     /* s += c*f[i] */
                   1399:     for ( j = 0; j <= d; j++ )
                   1400:       if ( f[i][j] )
                   1401:         mpz_addmul_ui(s[j],c,f[i][j]);
                   1402:   }
                   1403:   for ( i = d; i >= 0; i-- )
                   1404:     if ( s[i] )
                   1405:       break;
                   1406:   if ( i < 0 )
                   1407:     *r = 0;
                   1408:   else {
                   1409:     u = UPALLOC(i);
                   1410:     DEG(u) = i;
                   1411:     for ( j = 0; j <= i; j++ )
                   1412:       COEF(u)[j] = (Num)s[j];
                   1413:     remcup(u,m,r);
                   1414:   }
                   1415: }
                   1416:
                   1417: /*
                   1418:  * dbd == 0 => n1 * n2
                   1419:  * dbd > 0  => n1 * n2 mod x^dbd
                   1420:  * n1 == n2 => squaring
                   1421:  * return: n1*n2 mod Primes[modind[0]]*.prime...*Primes[modind[nmod-1]].prime
                   1422:  */
                   1423:
                   1424: void fft_mulup_specialmod_main(UP n1,UP n2,int dbd,int *modind,int nmod,UP *nr)
                   1425: {
                   1426:   ModNum *f1,*f2,*w,*fr;
                   1427:   ModNum **frarray;
                   1428:   Z m,m1,m2;
                   1429:   int *modarray;
                   1430:   int d1,d2,dmin,i,root,d,cond,bound;
                   1431:
                   1432:   if ( !n1 || !n2 ) {
                   1433:     *nr = 0; return;
                   1434:   }
                   1435:   d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
                   1436:   if ( !d1 || !d2 ) {
                   1437:     mulup(n1,n2,nr); return;
                   1438:   }
                   1439:   m = ONE;
                   1440:   bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
                   1441:   f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1442:   if ( n1 == n2 )
                   1443:     f2 = 0;
                   1444:   else
                   1445:     f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1446:   w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
                   1447:   frarray = (ModNum **)MALLOC(nmod*sizeof(ModNum *));
                   1448:   modarray = (int *)MALLOC_ATOMIC(nmod*sizeof(unsigned int *));
                   1449:
                   1450:   for ( i = 0; i < nmod; i++ ) {
                   1451:     FFT_primes(modind[i],&modarray[i],&root,&d);
                   1452:       if ( (1<<d) < d1+d2+1 )
                   1453:         error("fft_mulup_specialmod_main : invalid modulus");
                   1454:     frarray[i] = fr
                   1455:       = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1456:     uptofmarray(modarray[i],n1,f1);
                   1457:     if ( !f2 )
                   1458:       cond = FFT_pol_square(d1,f1,fr,modind[i],w);
                   1459:     else {
                   1460:       uptofmarray(modarray[i],n2,f2);
                   1461:       cond = FFT_pol_product(d1,f1,d2,f2,fr,modind[i],w);
                   1462:     }
                   1463:     if ( cond )
                   1464:       error("fft_mulup_specialmod_main : error in FFT_pol_product");
1.2     ! noro     1465:     STOZ(modarray[i],m1); mulz(m,m1,&m2); m = m2;
1.1       noro     1466:   }
                   1467:   if ( !dbd )
                   1468:     dbd = d1+d2+1;
                   1469:   crup(frarray,MIN(d1+d2,dbd-1),modarray,nmod,m,nr);
                   1470: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>