[BACK]Return to up.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib2 / asir2018 / engine

Annotation of OpenXM_contrib2/asir2018/engine/up.c, Revision 1.4

1.1       noro        1: /*
                      2:  * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
                      3:  * All rights reserved.
                      4:  *
                      5:  * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
                      6:  * non-exclusive and royalty-free license to use, copy, modify and
                      7:  * redistribute, solely for non-commercial and non-profit purposes, the
                      8:  * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
                      9:  * conditions of this Agreement. For the avoidance of doubt, you acquire
                     10:  * only a limited right to use the SOFTWARE hereunder, and FLL or any
                     11:  * third party developer retains all rights, including but not limited to
                     12:  * copyrights, in and to the SOFTWARE.
                     13:  *
                     14:  * (1) FLL does not grant you a license in any way for commercial
                     15:  * purposes. You may use the SOFTWARE only for non-commercial and
                     16:  * non-profit purposes only, such as academic, research and internal
                     17:  * business use.
                     18:  * (2) The SOFTWARE is protected by the Copyright Law of Japan and
                     19:  * international copyright treaties. If you make copies of the SOFTWARE,
                     20:  * with or without modification, as permitted hereunder, you shall affix
                     21:  * to all such copies of the SOFTWARE the above copyright notice.
                     22:  * (3) An explicit reference to this SOFTWARE and its copyright owner
                     23:  * shall be made on your publication or presentation in any form of the
                     24:  * results obtained by use of the SOFTWARE.
                     25:  * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
                     26:  * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
                     27:  * for such modification or the source code of the modified part of the
                     28:  * SOFTWARE.
                     29:  *
                     30:  * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
                     31:  * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
                     32:  * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
                     33:  * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
                     34:  * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
                     35:  * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
                     36:  * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
                     37:  * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
                     38:  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
                     39:  * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
                     40:  * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
                     41:  * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
                     42:  * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
                     43:  * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
                     44:  * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
                     45:  * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
                     46:  * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
                     47:  *
1.4     ! noro       48:  * $OpenXM: OpenXM_contrib2/asir2018/engine/up.c,v 1.3 2019/03/03 05:21:17 noro Exp $
1.1       noro       49: */
                     50: #include "ca.h"
                     51: #include <math.h>
                     52:
1.4     ! noro       53: extern struct oEGT eg_chrem,eg_fore,eg_back;
1.1       noro       54:
                     55: /*
                     56: int up_kara_mag=15;
                     57: int up_tkara_mag=15;
                     58: */
                     59: /*
                     60: int up_kara_mag=30;
                     61: int up_tkara_mag=20;
                     62: */
                     63: int up_kara_mag=20;
                     64: int up_tkara_mag=15;
                     65:
                     66: #if defined(sparc)
                     67: int up_fft_mag=50;
                     68: #else
                     69: int up_fft_mag=80;
                     70: #endif
                     71:
                     72: int up_rem_mag=1;
                     73: int debug_up;
                     74: int up_lazy;
                     75: extern int lm_lazy;
                     76: extern int current_ff;
                     77: extern int GC_dont_gc;
                     78:
                     79: void monicup(UP a,UP *b)
                     80: {
                     81:   UP w;
                     82:
                     83:   if ( !a )
                     84:     *b = 0;
                     85:   else {
                     86:     w = W_UPALLOC(0); w->d = 0;
                     87:     divnum(0,(Num)ONE,(Num)a->c[a->d],(Num *)&w->c[0]);
                     88:     mulup(a,w,b);
                     89:   }
                     90: }
                     91:
                     92: void simpup(UP a,UP *b)
                     93: {
                     94:   int i,d;
                     95:   UP c;
                     96:
                     97:   if ( !a )
                     98:     *b = 0;
                     99:   else {
                    100:     d = a->d;
                    101:     c = UPALLOC(d);
                    102:
                    103:     for ( i = 0; i <= d; i++ )
                    104:       simpnum(a->c[i],&c->c[i]);
                    105:     for ( i = d; i >= 0 && !c->c[i]; i-- );
                    106:     if ( i < 0 )
                    107:       *b = 0;
                    108:     else {
                    109:       c->d = i;
                    110:       *b = c;
                    111:     }
                    112:   }
                    113: }
                    114:
                    115: void simpnum(Num a,Num *b)
                    116: {
                    117:   LM lm;
                    118:   GF2N gf;
                    119:   GFPN gfpn;
                    120:
                    121:   if ( !a )
                    122:     *b = 0;
                    123:   else
                    124:     switch ( NID(a) ) {
                    125:       case N_LM:
                    126:         simplm((LM)a,&lm); *b = (Num)lm; break;
                    127:       case N_GF2N:
                    128:         simpgf2n((GF2N)a,&gf); *b = (Num)gf; break;
                    129:       case N_GFPN:
                    130:         simpgfpn((GFPN)a,&gfpn); *b = (Num)gfpn; break;
                    131:       default:
                    132:         *b = a; break;
                    133:     }
                    134: }
                    135:
                    136: void uremp(P p1,P p2,P *rp)
                    137: {
                    138:   UP n1,n2,r;
                    139:
                    140:   if ( !p1 || NUM(p1) )
                    141:     *rp = p1;
                    142:   else {
                    143:     ptoup(p1,&n1); ptoup(p2,&n2);
                    144:     remup(n1,n2,&r);
                    145:     uptop(r,rp);
                    146:   }
                    147: }
                    148:
                    149: void ugcdp(P p1,P p2,P *rp)
                    150: {
                    151:   UP n1,n2,r;
                    152:
                    153:   ptoup(p1,&n1); ptoup(p2,&n2);
                    154:   gcdup(n1,n2,&r);
                    155:   uptop(r,rp);
                    156: }
                    157:
                    158: void reversep(P p1,Q d,P *rp)
                    159: {
                    160:   UP n1,r;
                    161:
                    162:   if ( !p1 )
                    163:     *rp = 0;
                    164:   else {
                    165:     ptoup(p1,&n1);
1.2       noro      166:     reverseup(n1,ZTOS(d),&r);
1.1       noro      167:     uptop(r,rp);
                    168:   }
                    169: }
                    170:
                    171: void invmodp(P p1,Q d,P *rp)
                    172: {
                    173:   UP n1,r;
                    174:
                    175:   if ( !p1 )
                    176:     error("invmodp : invalid input");
                    177:   else {
                    178:     ptoup(p1,&n1);
1.2       noro      179:     invmodup(n1,ZTOS(d),&r);
1.1       noro      180:     uptop(r,rp);
                    181:   }
                    182: }
                    183:
                    184: void addup(UP n1,UP n2,UP *nr)
                    185: {
                    186:   UP r,t;
                    187:   int i,d1,d2;
                    188:   Num *c,*c1,*c2;
                    189:
                    190:   if ( !n1 ) {
                    191:     *nr = n2;
                    192:   } else if ( !n2 ) {
                    193:     *nr = n1;
                    194:   } else {
                    195:     if ( n2->d > n1->d ) {
                    196:       t = n1; n1 = n2; n2 = t;
                    197:     }
                    198:     d1 = n1->d; d2 = n2->d;
                    199:     *nr = r = UPALLOC(d1);
                    200:     c1 = n1->c; c2 = n2->c; c = r->c;
                    201:     for ( i = 0; i <= d2; i++ )
                    202:       addnum(0,*c1++,*c2++,c++);
                    203:     for ( ; i <= d1; i++ )
                    204:       *c++ = *c1++;
                    205:     for ( i = d1, c = r->c; i >=0 && !c[i]; i-- );
                    206:     if ( i < 0 )
                    207:       *nr = 0;
                    208:     else
                    209:       r->d = i;
                    210:   }
                    211: }
                    212:
                    213: void subup(UP n1,UP n2,UP *nr)
                    214: {
                    215:   UP r;
                    216:   int i,d1,d2,d;
                    217:   Num *c,*c1,*c2;
                    218:
                    219:   if ( !n1 ) {
                    220:     chsgnup(n2,nr);
                    221:   } else if ( !n2 ) {
                    222:     *nr = n1;
                    223:   } else {
                    224:     d1 = n1->d; d2 = n2->d; d = MAX(d1,d2);
                    225:     *nr = r = UPALLOC(d);
                    226:     c1 = n1->c; c2 = n2->c; c = r->c;
                    227:     if ( d1 >= d2 ) {
                    228:       for ( i = 0; i <= d2; i++ )
                    229:         subnum(0,*c1++,*c2++,c++);
                    230:       for ( ; i <= d1; i++ )
                    231:         *c++ = *c1++;
                    232:     } else {
                    233:       for ( i = 0; i <= d1; i++ )
                    234:         subnum(0,*c1++,*c2++,c++);
                    235:       for ( ; i <= d2; i++ )
                    236:         chsgnnum(*c2++,c++);
                    237:     }
                    238:     for ( i = d, c = r->c; i >=0 && !c[i]; i-- );
                    239:     if ( i < 0 )
                    240:       *nr = 0;
                    241:     else
                    242:       r->d = i;
                    243:   }
                    244: }
                    245:
                    246: void chsgnup(UP n1,UP *nr)
                    247: {
                    248:   UP r;
                    249:   int d1,i;
                    250:   Num *c1,*c;
                    251:
                    252:   if ( !n1 ) {
                    253:     *nr = 0;
                    254:   } else {
                    255:     d1 = n1->d;
                    256:     *nr = r = UPALLOC(d1); r->d = d1;
                    257:     c1 = n1->c; c = r->c;
                    258:     for ( i = 0; i <= d1; i++ )
                    259:       chsgnnum(*c1++,c++);
                    260:   }
                    261: }
                    262:
                    263: void hybrid_mulup(int ff,UP n1,UP n2,UP *nr)
                    264: {
                    265:   if ( !n1 || !n2 )
                    266:     *nr = 0;
                    267:   else if ( MAX(n1->d,n2->d) < up_fft_mag )
                    268:     kmulup(n1,n2,nr);
                    269:   else
                    270:     switch ( ff ) {
                    271:       case FF_GFP:
                    272:         fft_mulup_lm(n1,n2,nr); break;
                    273:       case FF_GF2N:
                    274:         kmulup(n1,n2,nr); break;
                    275:       default:
                    276:         fft_mulup(n1,n2,nr); break;
                    277:     }
                    278: }
                    279:
                    280: void hybrid_squareup(int ff,UP n1,UP *nr)
                    281: {
                    282:   if ( !n1 )
                    283:     *nr = 0;
                    284:   else if ( n1->d < up_fft_mag )
                    285:     ksquareup(n1,nr);
                    286:   else
                    287:     switch ( ff ) {
                    288:       case FF_GFP:
                    289:         fft_squareup_lm(n1,nr); break;
                    290:       case FF_GF2N:
                    291:         ksquareup(n1,nr); break;
                    292:       default:
                    293:         fft_squareup(n1,nr); break;
                    294:     }
                    295: }
                    296:
                    297: void hybrid_tmulup(int ff,UP n1,UP n2,int d,UP *nr)
                    298: {
1.3       noro      299:   UP t;
                    300:
1.1       noro      301:   if ( !n1 || !n2 )
                    302:     *nr = 0;
                    303:   else if ( MAX(n1->d,n2->d) < up_fft_mag )
                    304:     tkmulup(n1,n2,d,nr);
                    305:   else
                    306:     switch ( ff ) {
                    307:       case FF_GFP:
                    308:         trunc_fft_mulup_lm(n1,n2,d,nr); break;
                    309:       case FF_GF2N:
                    310:         tkmulup(n1,n2,d,nr); break;
                    311:       default:
                    312:         trunc_fft_mulup(n1,n2,d,nr); break;
                    313:     }
                    314: }
                    315:
                    316: void mulup(UP n1,UP n2,UP *nr)
                    317: {
                    318:   UP r;
                    319:   Num *pc1,*pc,*c1,*c2,*c;
                    320:   Num mul,t,s;
                    321:   int i,j,d1,d2;
                    322:
                    323:   if ( !n1 || !n2 )
                    324:     *nr = 0;
                    325:   else {
                    326:     d1 = n1->d; d2 = n2->d;
                    327:     *nr = r = UPALLOC(d1+d2); r->d = d1+d2;
                    328:     c1 = n1->c; c2 = n2->c; c = r->c;
                    329:     lm_lazy = 1;
                    330:     for ( i = 0; i <= d2; i++, c++ )
                    331:       if ( (mul = *c2++) != 0 )
                    332:         for ( j = 0, pc1 = c1, pc = c; j <= d1; j++ ) {
                    333:           mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
                    334:       }
                    335:     lm_lazy = 0;
                    336:     if ( !up_lazy )
                    337:       for ( i = 0, c = r->c; i <= r->d; i++ ) {
                    338:         simpnum(c[i],&t); c[i] = t;
                    339:       }
                    340:   }
                    341: }
                    342:
                    343: /* nr = c*n1 */
                    344:
                    345: void mulcup(Num c,UP n1,UP *nr)
                    346: {
                    347:   int d;
                    348:   UP r;
                    349:   Num t;
                    350:   Num *c1,*cr;
                    351:   int i;
                    352:
                    353:   if ( !c || !n1 )
                    354:     *nr = 0;
                    355:   else {
                    356:     d = n1->d;
                    357:     *nr = r = UPALLOC(d); r->d = d;
                    358:     c1 = n1->c; cr = r->c;
                    359:     lm_lazy = 1;
                    360:     for ( i = 0; i <= d; i++ )
                    361:       mulnum(0,c1[i],c,&cr[i]);
                    362:     lm_lazy = 0;
                    363:     if ( !up_lazy )
                    364:       for ( i = 0; i <= d; i++ ) {
                    365:         simpnum(cr[i],&t); cr[i] = t;
                    366:       }
                    367:   }
                    368: }
                    369:
                    370: void tmulup(UP n1,UP n2,int d,UP *nr)
                    371: {
                    372:   UP r;
                    373:   Num *pc1,*pc,*c1,*c2,*c;
                    374:   Num mul,t,s;
                    375:   int i,j,d1,d2,dr;
                    376:
                    377:   if ( !n1 || !n2 )
                    378:     *nr = 0;
                    379:   else {
                    380:     d1 = n1->d; d2 = n2->d;
                    381:     dr = MAX(d-1,d1+d2);
                    382:     *nr = r = UPALLOC(dr);
                    383:     c1 = n1->c; c2 = n2->c; c = r->c;
                    384:     lm_lazy = 1;
                    385:     for ( i = 0; i <= d2; i++, c++ )
                    386:       if ( ( mul = *c2++ ) != 0 )
                    387:         for ( j = 0, pc1 = c1, pc = c; (j <= d1) && (i+j < d); j++ ) {
                    388:           mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
                    389:       }
                    390:     lm_lazy = 0;
                    391:     c = r->c;
                    392:     if ( !up_lazy )
                    393:       for ( i = 0; i < d; i++ ) {
                    394:         simpnum(c[i],&t); c[i] = t;
                    395:       }
                    396:     for ( i = d-1; i >= 0 && !c[i]; i-- );
                    397:     if ( i < 0 )
                    398:       *nr = 0;
                    399:     else
                    400:       r->d = i;
                    401:   }
                    402: }
                    403:
                    404: void squareup(UP n1,UP *nr)
                    405: {
                    406:   UP r;
                    407:   Num *c1,*c;
                    408:   Num t,s,u;
                    409:   int i,j,h,d1,d;
                    410:
                    411:   if ( !n1 )
                    412:     *nr = 0;
                    413:   else if ( !n1->d ) {
                    414:     *nr = r = UPALLOC(0); r->d = 0;
                    415:     mulnum(0,n1->c[0],n1->c[0],&r->c[0]);
                    416:   } else {
                    417:     d1 = n1->d;
                    418:     d = 2*d1;
                    419:     *nr = r = UPALLOC(2*d); r->d = d;
                    420:     c1 = n1->c; c = r->c;
                    421:     lm_lazy = 1;
                    422:     for ( i = 0; i <= d1; i++ )
                    423:       mulnum(0,c1[i],c1[i],&c[2*i]);
                    424:     for ( j = 1; j < d; j++ ) {
                    425:       h = (j-1)/2;
                    426:       for ( i = MAX(j-d1,0), s = 0; i <= h; i++ ) {
                    427:         mulnum(0,c1[i],c1[j-i],&t); addnum(0,t,s,&u); s = u;
                    428:       }
                    429:       addnum(0,s,s,&t); addnum(0,t,c[j],&u); c[j] = u;
                    430:     }
                    431:     lm_lazy = 0;
                    432:     if ( !up_lazy )
                    433:       for ( i = 0, c = r->c; i <= d; i++ ) {
                    434:         simpnum(c[i],&t); c[i] = t;
                    435:       }
                    436:   }
                    437: }
                    438:
                    439: void remup(UP n1,UP n2,UP *nr)
                    440: {
                    441:   UP w,r;
                    442:
                    443:   if ( !n2 )
                    444:     error("remup : division by 0");
                    445:   else if ( !n1 || !n2->d )
                    446:     *nr = 0;
                    447:   else if ( n1->d < n2->d )
                    448:     *nr = n1;
                    449:   else {
                    450:     w = W_UPALLOC(n1->d); copyup(n1,w);
                    451:     remup_destructive(w,n2);
                    452:     if ( w->d < 0 )
                    453:       *nr = 0;
                    454:     else {
                    455:       *nr = r = UPALLOC(w->d); copyup(w,r);
                    456:     }
                    457:   }
                    458: }
                    459:
                    460: void remup_destructive(UP n1,UP n2)
                    461: {
                    462:   Num *c1,*c2;
                    463:   int d1,d2,i,j;
                    464:   Num m,t,s,mhc;
                    465:
                    466:   c1 = n1->c; c2 = n2->c;
                    467:   d1 = n1->d; d2 = n2->d;
                    468:   chsgnnum(c2[d2],&mhc);
                    469:   for ( i = d1; i >= d2; i-- ) {
                    470:     simpnum(c1[i],&t); c1[i] = t;
                    471:     if ( !c1[i] )
                    472:       continue;
                    473:     divnum(0,c1[i],mhc,&m);
                    474:     lm_lazy = 1;
                    475:     for ( j = d2; j >= 0; j-- ) {
                    476:       mulnum(0,c2[j],m,&t); addnum(0,c1[i+j-d2],t,&s);
                    477:       c1[i+j-d2] = s;
                    478:     }
                    479:     lm_lazy = 0;
                    480:   }
                    481:   for ( i = 0; i < d2; i++ ) {
                    482:     simpnum(c1[i],&t); c1[i] = t;
                    483:   }
                    484:   for ( i = d2-1; i >= 0 && !c1[i]; i-- );
                    485:   n1->d = i;
                    486: }
                    487:
                    488: void qrup(UP n1,UP n2,UP *nq,UP *nr)
                    489: {
                    490:   UP w,r,q;
                    491:   struct oUP t;
                    492:   Num m;
                    493:   int d;
                    494:
                    495:   if ( !n2 )
                    496:     error("qrup : division by 0");
                    497:   else if ( !n1 ) {
                    498:     *nq = 0; *nr = 0;
                    499:   } else if ( !n2->d )
                    500:     if ( !n1->d ) {
                    501:       divnum(0,n1->c[0],n2->c[0],(Num *)nq); *nr = 0;
                    502:     } else {
                    503:       divnum(0,(Num)ONE,n2->c[0],&m);
                    504:       t.d = 0; t.c[0] = m;
                    505:       mulup(n1,&t,nq); *nr = 0;
                    506:     }
                    507:   else if ( n1->d < n2->d ) {
                    508:     *nq = 0; *nr = n1;
                    509:   } else {
                    510:     w = W_UPALLOC(n1->d); copyup(n1,w);
                    511:     qrup_destructive(w,n2);
                    512:     d = n1->d-n2->d;
                    513:     *nq = q = UPALLOC(d); q->d = d;
                    514:     bcopy(w->c+n2->d,q->c,(d+1)*sizeof(Num));
                    515:     if ( w->d < 0 )
                    516:       *nr = 0;
                    517:     else {
                    518:       *nr = r = UPALLOC(w->d); copyup(w,r);
                    519:     }
                    520:   }
                    521: }
                    522:
                    523: void qrup_destructive(UP n1,UP n2)
                    524: {
                    525:   Num *c1,*c2;
                    526:   int d1,d2,i,j;
                    527:   Num q,t,s,hc;
                    528:
                    529:   c1 = n1->c; c2 = n2->c;
                    530:   d1 = n1->d; d2 = n2->d;
                    531:   hc = c2[d2];
                    532:   for ( i = d1; i >= d2; i-- ) {
                    533:     simpnum(c1[i],&t); c1[i] = t;
                    534:     if ( c1[i] ) {
                    535:       divnum(0,c1[i],hc,&q);
                    536:       lm_lazy = 1;
                    537:       for ( j = d2; j >= 0; j-- ) {
                    538:         mulnum(0,c2[j],q,&t); subnum(0,c1[i+j-d2],t,&s);
                    539:         c1[i+j-d2] = s;
                    540:       }
                    541:       lm_lazy = 0;
                    542:     } else
                    543:       q = 0;
                    544:     c1[i] = q;
                    545:   }
                    546:   for ( i = 0; i < d2; i++ ) {
                    547:     simpnum(c1[i],&t); c1[i] = t;
                    548:   }
                    549:   for ( i = d2-1; i >= 0 && !c1[i]; i-- );
                    550:   n1->d = i;
                    551: }
                    552:
                    553: void gcdup(UP n1,UP n2,UP *nr)
                    554: {
                    555:   UP w1,w2,t,r;
                    556:   int d1,d2;
                    557:
                    558:   if ( !n1 )
                    559:     *nr = n2;
                    560:   else if ( !n2 )
                    561:     *nr = n1;
                    562:   else if ( !n1->d || !n2->d ) {
                    563:     *nr = r = UPALLOC(0); r->d = 0;
                    564:     divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]);
                    565:   } else {
                    566:     d1 = n1->d; d2 = n2->d;
                    567:     if ( d2 > d1 ) {
                    568:       w1 = W_UPALLOC(d2); copyup(n2,w1);
                    569:       w2 = W_UPALLOC(d1); copyup(n1,w2);
                    570:     } else {
                    571:       w1 = W_UPALLOC(d1); copyup(n1,w1);
                    572:       w2 = W_UPALLOC(d2); copyup(n2,w2);
                    573:     }
                    574:     d1 = w1->d; d2 = w2->d;
                    575:     while ( 1 ) {
                    576:       remup_destructive(w1,w2);
                    577:       if ( w1->d < 0 ) {
                    578:         *nr = r = UPALLOC(w2->d); copyup(w2,r); return;
                    579:       } else if ( !w1->d ) {
                    580:         *nr = r = UPALLOC(0); r->d = 0;
                    581:         divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]); return;
                    582:       } else {
                    583:         t = w1; w1 = w2; w2 = t;
                    584:       }
                    585:     }
                    586:   }
                    587: }
                    588:
                    589: /* compute r s.t. a*r = 1 mod m */
                    590:
                    591: void extended_gcdup(UP a,UP m,UP *r)
                    592: {
                    593:   UP one,g1,g2,a1,a2,a3,b1,b2,b3,inv,quo,rem,t;
                    594:   Num i;
                    595:
                    596:   one = UPALLOC(0); one->d = 0; one->c[0] = (Num)ONELM;
                    597:   g1 = m; g2 = a;
                    598:   a1 = one; a2 = 0;
                    599:   b1 = 0; b2 = one;
                    600:   /* a2*m+b2*a = g2 always holds */
                    601:   while ( g2->d ) {
                    602:     qrup(g1,g2,&quo,&rem); g1 = g2; g2 = rem;
                    603:     mulup(a2,quo,&t); subup(a1,t,&a3); a1 = a2; a2 = a3;
                    604:     mulup(b2,quo,&t); subup(b1,t,&b3); b1 = b2; b2 = b3;
                    605:   }
                    606:   /* now a2*m+b2*a = g2 (const) */
                    607:   divnum(0,(Num)ONE,g2->c[0],&i);
                    608:   inv = UPALLOC(0); inv->d = 0; inv->c[0] = i;
                    609:   mulup(b2,inv,r);
                    610: }
                    611:
                    612: void reverseup(UP n1,int d,UP *nr)
                    613: {
                    614:   Num *c1,*c;
                    615:   int i,d1;
                    616:   UP r;
                    617:
                    618:   if ( !n1 )
                    619:     *nr = 0;
                    620:   else if ( d < (d1 = n1->d) )
                    621:     error("reverseup : invalid input");
                    622:   else {
                    623:     *nr = r = UPALLOC(d);
                    624:     c = r->c;
                    625:     bzero((char *)c,(d+1)*sizeof(Num));
                    626:     c1 = n1->c;
                    627:     for ( i = 0; i <= d1; i++ )
                    628:       c[d-i] = c1[i];
                    629:     for ( i = d; i >= 0 && !c[i]; i-- );
                    630:     r->d = i;
                    631:     if ( i < 0 )
                    632:       *nr = 0;
                    633:   }
                    634: }
                    635:
                    636: void invmodup(UP n1,int d,UP *nr)
                    637: {
                    638:   UP r;
                    639:   Num s,t,u,hinv;
                    640:   int i,j,dmin;
                    641:   Num *w,*c,*cr;
                    642:
                    643:   if ( !n1 || !n1->c[0] )
                    644:     error("invmodup : invalid input");
                    645:   else if ( !n1->d ) {
                    646:     *nr = r = UPALLOC(0); r->d = 0;
                    647:     divnum(0,(Num)ONE,n1->c[0],&r->c[0]);
                    648:   } else {
                    649:     *nr = r = UPALLOC(d);
                    650:     w = (Num *)ALLOCA((d+1)*sizeof(Q));
                    651:     bzero((char *)w,(d+1)*sizeof(Q));
                    652:     dmin = MIN(d,n1->d);
                    653:     divnum(0,(Num)ONE,n1->c[0],&hinv);
                    654:     for ( i = 0, c = n1->c; i <= dmin; i++ )
                    655:       mulnum(0,c[i],hinv,&w[i]);
                    656:     cr = r->c;
                    657:     cr[0] = w[0];
                    658:     for ( i = 1; i <= d; i++ ) {
                    659:       for ( j = 1, s = w[i]; j < i; j++ ) {
                    660:         mulnum(0,cr[j],w[i-j],&t);
                    661:         addnum(0,s,t,&u);
                    662:         s = u;
                    663:       }
                    664:       chsgnnum(s,&cr[i]);
                    665:     }
                    666:     for ( i = 0; i <= d; i++ ) {
                    667:       mulnum(0,cr[i],hinv,&t); cr[i] = t;
                    668:     }
                    669:     for ( i = d; i >= 0 && !cr[i]; i-- );
                    670:     r->d = i;
                    671:   }
                    672: }
                    673:
                    674: void pwrup(UP n,Z e,UP *nr)
                    675: {
                    676:   UP y,z,t;
                    677:   Z q,r,two;
                    678:
                    679:   y = UPALLOC(0); y->d = 0; y->c[0] = (Num)ONE;
                    680:   z = n;
                    681:   if ( !e )
                    682:     *nr = y;
                    683:   else if ( UNIQ(e) )
                    684:     *nr = n;
                    685:   else {
1.2       noro      686:     STOZ(2,two);
1.1       noro      687:     while ( 1 ) {
                    688:       divqrz(e,two,&q,&r); e = q;
                    689:       if ( r ) {
                    690:         mulup(z,y,&t); y = t;
                    691:         if ( !e ) {
                    692:           *nr = y;
                    693:           return;
                    694:         }
                    695:       }
                    696:       mulup(z,z,&t); z = t;
                    697:     }
                    698:   }
                    699: }
                    700:
                    701: int compup(UP n1,UP n2)
                    702: {
                    703:   int i,r;
                    704:
                    705:   if ( !n1 )
                    706:     return n2 ? -1 : 0;
                    707:   else if ( !n2 )
                    708:     return 1;
                    709:   else if ( n1->d > n2->d )
                    710:     return 1;
                    711:   else if ( n1->d < n2->d )
                    712:     return -1;
                    713:   else {
                    714:     for ( i = n1->d; i >= 0; i-- ) {
                    715:       r = compnum(CO,n1->c[i],n2->c[i]);
                    716:       if ( r )
                    717:         return r;
                    718:     }
                    719:     return 0;
                    720:   }
                    721: }
                    722:
                    723: void kmulp(VL vl,P n1,P n2,P *nr)
                    724: {
                    725:   UP b1,b2,br;
                    726:
                    727:   if ( !n1 || !n2 )
                    728:     *nr = 0;
                    729:   else if ( OID(n1) == O_N || OID(n2) == O_N )
                    730:     mulp(vl,n1,n2,nr);
                    731:   else {
                    732:     ptoup(n1,&b1); ptoup(n2,&b2);
                    733:     kmulup(b1,b2,&br);
                    734:     uptop(br,nr);
                    735:   }
                    736: }
                    737:
                    738: void ksquarep(VL vl,P n1,P *nr)
                    739: {
                    740:   UP b1,br;
                    741:
                    742:   if ( !n1 )
                    743:     *nr = 0;
                    744:   else if ( OID(n1) == O_N )
                    745:     mulp(vl,n1,n1,nr);
                    746:   else {
                    747:     ptoup(n1,&b1);
                    748:     ksquareup(b1,&br);
                    749:     uptop(br,nr);
                    750:   }
                    751: }
                    752:
                    753: void kmulup(UP n1,UP n2,UP *nr)
                    754: {
                    755:   int d1,d2;
                    756:
                    757:   if ( !n1 || !n2 ) {
                    758:     *nr = 0; return;
                    759:   }
                    760:   d1 = DEG(n1)+1; d2 = DEG(n2)+1;
                    761:   if ( (d1 < up_kara_mag) && (d2 < up_kara_mag) )
                    762:     mulup(n1,n2,nr);
                    763:   else
                    764:     kmulupmain(n1,n2,nr);
                    765: }
                    766:
                    767: void ksquareup(UP n1,UP *nr)
                    768: {
                    769:   int d1;
                    770:
                    771:   if ( !n1 ) {
                    772:     *nr = 0; return;
                    773:   }
                    774:   d1 = DEG(n1)+1;
                    775:   if ( (d1 < up_kara_mag) )
                    776:     squareup(n1,nr);
                    777:   else
                    778:     ksquareupmain(n1,nr);
                    779: }
                    780:
                    781: void copyup(UP n1,UP n2)
                    782: {
                    783:   n2->d = n1->d;
                    784:   bcopy((char *)n1->c,(char *)n2->c,(n1->d+1)*sizeof(Q));
                    785: }
                    786:
                    787: void c_copyup(UP n,int len,pointer *p)
                    788: {
                    789:   if ( n )
                    790:     bcopy((char *)COEF(n),(char *)p,MIN((DEG(n)+1),len)*sizeof(pointer));
                    791: }
                    792:
                    793: void kmulupmain(UP n1,UP n2,UP *nr)
                    794: {
                    795:   int d1,d2,h;
                    796:   UP n1lo,n1hi,n2lo,n2hi,hi,lo,mid1,mid2,mid,s1,s2,t1,t2;
                    797:
                    798:   d1 = DEG(n1)+1; d2 = DEG(n2)+1; h = (MAX(d1,d2)+1)/2;
                    799:   decompup(n1,h,&n1lo,&n1hi);
                    800:   decompup(n2,h,&n2lo,&n2hi);
                    801:   kmulup(n1lo,n2lo,&lo);
                    802:   kmulup(n1hi,n2hi,&hi);
                    803:   shiftup(hi,2*h,&t2);
                    804:   addup(lo,t2,&t1);
                    805:   addup(hi,lo,&mid1);
                    806:   subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2); kmulup(s1,s2,&mid2);
                    807:   addup(mid1,mid2,&mid);
                    808:   shiftup(mid,h,&t2);
                    809:   addup(t1,t2,nr);
                    810: }
                    811:
                    812: void ksquareupmain(UP n1,UP *nr)
                    813: {
                    814:   int d1,h;
                    815:   UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
                    816:
                    817:   d1 = DEG(n1)+1; h = (d1+1)/2;
                    818:   decompup(n1,h,&n1lo,&n1hi);
                    819:   ksquareup(n1hi,&hi); ksquareup(n1lo,&lo);
                    820:   shiftup(hi,2*h,&t2);
                    821:   addup(lo,t2,&t1);
                    822:   addup(hi,lo,&mid1);
                    823:   subup(n1hi,n1lo,&s1); ksquareup(s1,&mid2);
                    824:   subup(mid1,mid2,&mid);
                    825:   shiftup(mid,h,&t2);
                    826:   addup(t1,t2,nr);
                    827: }
                    828:
                    829: void rembymulup(UP n1,UP n2,UP *nr)
                    830: {
                    831:   int d1,d2,d;
                    832:   UP r1,r2,inv2,t,s,q;
                    833:
                    834:   if ( !n2 )
                    835:     error("rembymulup : division by 0");
                    836:   else if ( !n1 || !n2->d )
                    837:     *nr = 0;
                    838:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    839:     *nr = n1;
                    840:   else {
                    841:     d = d1-d2;
                    842:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    843:     reverseup(n2,d2,&r2);
                    844:     invmodup(r2,d,&inv2);
                    845:     kmulup(r1,inv2,&t);
                    846:     truncup(t,d+1,&s);
                    847:     reverseup(s,d,&q);
                    848:     kmulup(q,n2,&t); subup(n1,t,nr);
                    849:   }
                    850: }
                    851:
                    852: /*
                    853:   deg n2 = d
                    854:   deg n1 <= 2*d-1
                    855:   inv2 = inverse of reversep(n2) mod x^d
                    856: */
                    857:
                    858: void hybrid_rembymulup_special(int ff,UP n1,UP n2,UP inv2,UP *nr)
                    859: {
                    860:   int d1,d2,d;
1.3       noro      861:   UP r1,t,s,q,t1;
1.1       noro      862:
                    863:   if ( !n2 )
                    864:     error("hybrid_rembymulup : division by 0");
                    865:   else if ( !n1 || !n2->d )
                    866:     *nr = 0;
                    867:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    868:     *nr = n1;
                    869:   else {
                    870:     d = d1-d2;
                    871:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    872:     hybrid_tmulup(ff,r1,inv2,d+1,&t);
                    873:     truncup(t,d+1,&s);
                    874:     reverseup(s,d,&q);
                    875:
                    876:     hybrid_tmulup(ff,q,n2,d2,&t);
                    877:     truncup(n1,d2,&s);
                    878:     subup(s,t,nr);
                    879:   }
                    880: }
                    881:
                    882: void rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
                    883: {
                    884:   int d1,d2,d;
                    885:   UP r1,t,s,q;
                    886:
                    887:   if ( !n2 )
                    888:     error("rembymulup : division by 0");
                    889:   else if ( !n1 || !n2->d )
                    890:     *nr = 0;
                    891:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    892:     *nr = n1;
                    893:   else {
                    894:     d = d1-d2;
                    895:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    896:     tkmulup(r1,inv2,d+1,&t);
                    897:     truncup(t,d+1,&s);
                    898:     reverseup(s,d,&q);
                    899:
                    900:     tkmulup(q,n2,d2,&t);
                    901:     truncup(n1,d2,&s);
                    902:     subup(s,t,nr);
                    903:   }
                    904: }
                    905:
                    906: /* *nr = n1*n2 mod x^d */
                    907:
                    908: void tkmulup(UP n1,UP n2,int d,UP *nr)
                    909: {
                    910:   int m;
                    911:   UP n1l,n1h,n2l,n2h,l,h,t,s,u;
                    912:
                    913:   if ( d < 0 )
                    914:     error("tkmulup : invalid argument");
                    915:   else if ( d == 0 )
                    916:     *nr = 0;
                    917:   else {
                    918:     truncup(n1,d,&t); n1 = t;
                    919:     truncup(n2,d,&t); n2 = t;
                    920:     if ( !n1 || !n2 )
                    921:       *nr = 0;
                    922:     else if ( n1->d < up_tkara_mag || n2->d < up_tkara_mag )
                    923:       tmulup(n1,n2,d,nr);
                    924:     else {
                    925:       m = (d+1)/2;
                    926:       decompup(n1,m,&n1l,&n1h);
                    927:       decompup(n2,m,&n2l,&n2h);
                    928:       kmulup(n1l,n2l,&l);
                    929:       tkmulup(n1l,n2h,d-m,&t);
                    930:       tkmulup(n2l,n1h,d-m,&s);
                    931:       addup(t,s,&u);
                    932:       shiftup(u,m,&h);
                    933:       addup(l,h,&t);
                    934:       truncup(t,d,nr);
                    935:     }
                    936:   }
                    937: }
                    938:
                    939: /* n->n*x^d */
                    940:
                    941: void shiftup(UP n,int d,UP *nr)
                    942: {
                    943:   int dr;
                    944:   UP r;
                    945:
                    946:   if ( !n )
                    947:     *nr = 0;
                    948:   else {
                    949:     dr = n->d+d;
                    950:     *nr = r = UPALLOC(dr); r->d = dr;
                    951:     bzero(r->c,(dr+1)*sizeof(Num));
                    952:     bcopy(n->c,r->c+d,(n->d+1)*sizeof(Num));
                    953:   }
                    954: }
                    955:
                    956: void fft_rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
                    957: {
                    958:   int d1,d2,d;
                    959:   UP r1,t,s,q,u;
                    960:
                    961:   if ( !n2 )
                    962:     error("rembymulup : division by 0");
                    963:   else if ( !n1 || !n2->d )
                    964:     *nr = 0;
                    965:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    966:     *nr = n1;
                    967:   else {
                    968:     d = d1-d2;
                    969:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    970:     trunc_fft_mulup(r1,inv2,d+1,&t);
                    971:     truncup(t,d+1,&s);
                    972:     reverseup(s,d,&q);
                    973:     trunc_fft_mulup(q,n2,d2,&t); truncup(t,d2,&u);
                    974:     truncup(n1,d2,&s);
                    975:     subup(s,u,nr);
                    976:   }
                    977: }
                    978:
                    979: void set_degreeup(UP n,int d)
                    980: {
                    981:   int i;
                    982:
                    983:   for ( i = d; i >= 0 && !n->c[i]; i-- );
                    984:   n->d = i;
                    985: }
                    986:
                    987: /* n -> n0 + x^d n1 */
                    988:
                    989: void decompup(UP n,int d,UP *n0,UP *n1)
                    990: {
                    991:   int dn;
                    992:   UP r0,r1;
                    993:
                    994:   if ( !n || d > n->d ) {
                    995:     *n0 = n; *n1 = 0;
                    996:   } else if ( d < 0 )
                    997:     error("decompup : invalid argument");
                    998:   else if ( d == 0 ) {
                    999:     *n0 = 0; *n1 = n;
                   1000:   } else {
                   1001:     dn = n->d;
                   1002:     *n0 = r0 = UPALLOC(d-1);
                   1003:     *n1 = r1 = UPALLOC(dn-d);
                   1004:     bcopy(n->c,r0->c,d*sizeof(Num));
                   1005:     bcopy(n->c+d,r1->c,(dn-d+1)*sizeof(Num));
                   1006:     set_degreeup(r0,d-1);
                   1007:     if ( r0->d < 0 )
                   1008:       *n0 = 0;
                   1009:     set_degreeup(r1,dn-d);
                   1010:     if ( r1->d < 0 )
                   1011:       *n1 = 0;
                   1012:   }
                   1013: }
                   1014:
                   1015: /* n -> n mod x^d */
                   1016:
                   1017: void truncup(UP n1,int d,UP *nr)
                   1018: {
                   1019:   int i;
                   1020:   UP r;
                   1021:
                   1022:   if ( !n1 || d > n1->d )
                   1023:     *nr = n1;
                   1024:   else if ( d < 0 )
                   1025:     error("truncup : invalid argument");
                   1026:   else if ( d == 0 )
                   1027:     *nr = 0;
                   1028:   else {
                   1029:      *nr = r = UPALLOC(d-1);
                   1030:     bcopy(n1->c,r->c,d*sizeof(Num));
                   1031:     for ( i = d-1; i >= 0 && !r->c[i]; i-- );
                   1032:     if ( i < 0 )
                   1033:       *nr = 0;
                   1034:     else
                   1035:       r->d = i;
                   1036:   }
                   1037: }
                   1038:
                   1039: int int_bits(int t)
                   1040: {
                   1041:   int k;
                   1042:
                   1043:   for ( k = 0; t; t>>=1, k++);
                   1044:   return k;
                   1045: }
                   1046:
                   1047: /* n is assumed to be LM or integer coefficient */
                   1048:
                   1049: int maxblenup(UP n)
                   1050: {
                   1051:   int m,r,i,d;
                   1052:   Num *c;
                   1053:
                   1054:   if ( !n )
                   1055:     return 0;
                   1056:   d = n->d; c = (Num *)n->c;
                   1057:   for ( r = 0, i = 0; i <= d; i++ )
                   1058:     if ( c[i] ) {
                   1059:       switch ( NID(c[i]) ) {
                   1060:         case N_LM:
                   1061:           m = mpz_sizeinbase(BDY((LM)c[i]),2);
                   1062:           break;
                   1063:         case N_Q:
                   1064:           m = mpz_sizeinbase(BDY((Z)c[i]),2);
                   1065:           break;
                   1066:         default:
                   1067:           error("maxblenup : invalid coefficient");
                   1068:       }
                   1069:       r = MAX(m,r);
                   1070:     }
                   1071:   return r;
                   1072: }
                   1073:
                   1074: /* YYY */
                   1075:
                   1076: void uptofmarray(int mod,UP n,ModNum *f)
                   1077: {
                   1078:   int d,i;
                   1079:   unsigned int r;
                   1080:   Num *c;
                   1081:   Z z;
                   1082:
                   1083:   if ( !n )
                   1084:     return;
                   1085:   else {
                   1086:     d = n->d; c = (Num *)n->c;
                   1087:     for ( i = 0; i <= d; i++ ) {
                   1088:       if ( c[i] ) {
                   1089:         switch ( NID(c[i]) ) {
                   1090:           case N_LM:
                   1091:             MPZTOZ(BDY((LM)c[i]),z);
                   1092:             f[i] = remqi((Q)z,mod);
                   1093:             break;
                   1094:           case N_Q:
                   1095:             f[i] = remqi((Q)c[i],mod);
                   1096:             break;
                   1097:           default:
                   1098:             error("uptofmarray : invalid coefficient");
                   1099:         }
                   1100:       } else
                   1101:         f[i] = 0;
                   1102:     }
                   1103:   }
                   1104: }
                   1105:
                   1106: void fmarraytoup(ModNum *f,int d,UP *nr)
                   1107: {
                   1108:   int i;
                   1109:   Z *c;
                   1110:   UP r;
                   1111:
                   1112:   if ( d < 0 ) {
                   1113:     *nr = 0;
                   1114:   } else {
                   1115:     *nr = r = UPALLOC(d); c = (Z *)r->c;
                   1116:     for ( i = 0; i <= d; i++ ) {
1.2       noro     1117:       UTOZ((unsigned int)f[i],c[i]);
1.1       noro     1118:     }
                   1119:     for ( i = d; i >= 0 && !c[i]; i-- );
                   1120:     if ( i < 0 )
                   1121:       *nr = 0;
                   1122:     else
                   1123:       r->d = i;
                   1124:   }
                   1125: }
                   1126:
                   1127: /* f[i]: an array of length n */
                   1128:
                   1129: void uiarraytoup(unsigned int **f,int n,int d,UP *nr)
                   1130: {
                   1131:   int i,j;
                   1132:   Z *c;
                   1133:   UP r;
                   1134:
                   1135:   if ( d < 0 ) {
                   1136:     *nr = 0;
                   1137:   } else {
                   1138:     *nr = r = UPALLOC(d); c = (Z *)r->c;
                   1139:     for ( i = 0; i <= d; i++ )
                   1140:       intarraytoz(n,f[i],&c[i]);
                   1141:     for ( i = d; i >= 0 && !c[i]; i-- );
                   1142:     if ( i < 0 )
                   1143:       *nr = 0;
                   1144:     else
                   1145:       r->d = i;
                   1146:   }
                   1147: }
                   1148:
                   1149: void adj_coefup(UP n,Z m,Z m2,UP *nr)
                   1150: {
                   1151:   int d;
                   1152:   Z *c,*cr;
                   1153:   Z mq;
                   1154:   int i;
                   1155:   UP r;
                   1156:
                   1157:   if ( !n )
                   1158:     *nr = 0;
                   1159:   else {
                   1160:     d = n->d; c = (Z *)n->c;
                   1161:     *nr = r = UPALLOC(d); cr = (Z *)r->c;
                   1162:     for ( i = 0; i <= d; i++ ) {
                   1163:       if ( !c[i] )
                   1164:         continue;
                   1165:       if ( cmpz(c[i],m2) > 0 )
                   1166:         subz(c[i],m,&cr[i]);
                   1167:       else
                   1168:         cr[i] = c[i];
                   1169:     }
                   1170:     for ( i = d; i >= 0 && !cr[i]; i-- );
                   1171:     if ( i < 0 )
                   1172:       *nr = 0;
                   1173:     else
                   1174:       r->d = i;
                   1175:   }
                   1176: }
                   1177:
                   1178: /* n is assumed to have positive integer coefficients. */
                   1179:
                   1180: void remcup(UP n,Z mod,UP *nr)
                   1181: {
                   1182:   int i,d;
                   1183:   Z *c,*cr;
                   1184:   UP r;
                   1185:   Z t;
                   1186:
                   1187:   if ( !n )
                   1188:     *nr = 0;
                   1189:   else {
                   1190:     d = n->d; c = (Z *)n->c;
                   1191:     *nr = r = UPALLOC(d); cr = (Z *)r->c;
                   1192:     for ( i = 0; i <= d; i++ )
                   1193:       remz(c[i],mod,&cr[i]);
                   1194:     for ( i = d; i >= 0 && !cr[i]; i-- );
                   1195:     if ( i < 0 )
                   1196:       *nr = 0;
                   1197:     else
                   1198:       r->d = i;
                   1199:   }
                   1200: }
                   1201:
                   1202: void fft_mulup_main(UP,UP,int,UP *);
                   1203:
                   1204: void fft_mulup(UP n1,UP n2,UP *nr)
                   1205: {
                   1206:   int d1,d2,d,b1,b2,h;
                   1207:   UP n1lo,n1hi,n2lo,n2hi,lo,hi,t1,t2,mid1,mid2,mid,s1,s2;
                   1208:
                   1209:   if ( !n1 || !n2 )
                   1210:     *nr = 0;
                   1211:   else {
                   1212:     d1 = n1->d; d2 = n2->d;
                   1213:     if ( MAX(d1,d2) < up_fft_mag )
                   1214:       kmulup(n1,n2,nr);
                   1215:     else {
                   1216:       b1 = maxblenup(n1); b2 = maxblenup(n2);
                   1217:       if ( fft_available(d1,b1,d2,b2) )
                   1218:         fft_mulup_main(n1,n2,0,nr);
                   1219:       else {
                   1220:         d = MAX(d1,d2)+1; h = (d+1)/2;
                   1221:         decompup(n1,h,&n1lo,&n1hi);
                   1222:         decompup(n2,h,&n2lo,&n2hi);
                   1223:         fft_mulup(n1lo,n2lo,&lo);
                   1224:         fft_mulup(n1hi,n2hi,&hi);
                   1225:         shiftup(hi,2*h,&t2);
                   1226:         addup(lo,t2,&t1);
                   1227:         addup(hi,lo,&mid1);
                   1228:         subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2);
                   1229:         fft_mulup(s1,s2,&mid2);
                   1230:         addup(mid1,mid2,&mid);
                   1231:         shiftup(mid,h,&t2);
                   1232:         addup(t1,t2,nr);
                   1233:       }
                   1234:     }
                   1235:   }
                   1236: }
                   1237:
                   1238: void trunc_fft_mulup(UP n1,UP n2,int dbd,UP *nr)
                   1239: {
                   1240:   int d1,d2,b1,b2,m;
                   1241:   UP n1l,n1h,n2l,n2h,l,h,t,s,u;
                   1242:
                   1243:   if ( !n1 || !n2 )
                   1244:     *nr = 0;
                   1245:   else if ( dbd < 0 )
                   1246:     error("trunc_fft_mulup : invalid argument");
                   1247:   else if ( dbd == 0 )
                   1248:     *nr = 0;
                   1249:   else {
                   1250:     truncup(n1,dbd,&t); n1 = t;
                   1251:     truncup(n2,dbd,&t); n2 = t;
                   1252:     d1 = n1->d; d2 = n2->d;
                   1253:     if ( MAX(d1,d2) < up_fft_mag )
                   1254:       tkmulup(n1,n2,dbd,nr);
                   1255:     else {
                   1256:       b1 = maxblenup(n1); b2 = maxblenup(n2);
                   1257:       if ( fft_available(d1,b1,d2,b2) )
                   1258:         fft_mulup_main(n1,n2,dbd,nr);
                   1259:       else {
                   1260:         m = (dbd+1)/2;
                   1261:         decompup(n1,m,&n1l,&n1h);
                   1262:         decompup(n2,m,&n2l,&n2h);
                   1263:         fft_mulup(n1l,n2l,&l);
                   1264:         trunc_fft_mulup(n1l,n2h,dbd-m,&t);
                   1265:         trunc_fft_mulup(n2l,n1h,dbd-m,&s);
                   1266:         addup(t,s,&u);
                   1267:         shiftup(u,m,&h);
                   1268:         addup(l,h,&t);
                   1269:         truncup(t,dbd,nr);
                   1270:       }
                   1271:     }
                   1272:   }
                   1273: }
                   1274:
                   1275: void fft_squareup(UP n1,UP *nr)
                   1276: {
                   1277:   int d1,d,h,b1;
                   1278:   UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
                   1279:
                   1280:   if ( !n1 )
                   1281:     *nr = 0;
                   1282:   else {
                   1283:     d1 = n1->d;
                   1284:     if ( d1 < up_fft_mag )
                   1285:       ksquareup(n1,nr);
                   1286:     else {
                   1287:       b1 = maxblenup(n1);
                   1288:       if ( fft_available(d1,b1,d1,b1) )
                   1289:         fft_mulup_main(n1,n1,0,nr);
                   1290:       else {
                   1291:         d = d1+1; h = (d1+1)/2;
                   1292:         decompup(n1,h,&n1lo,&n1hi);
                   1293:         fft_squareup(n1hi,&hi);
                   1294:         fft_squareup(n1lo,&lo);
                   1295:         shiftup(hi,2*h,&t2);
                   1296:         addup(lo,t2,&t1);
                   1297:         addup(hi,lo,&mid1);
                   1298:         subup(n1hi,n1lo,&s1);
                   1299:         fft_squareup(s1,&mid2);
                   1300:         subup(mid1,mid2,&mid);
                   1301:         shiftup(mid,h,&t2);
                   1302:         addup(t1,t2,nr);
                   1303:       }
                   1304:     }
                   1305:   }
                   1306: }
                   1307:
                   1308: /*
                   1309:  * dbd == 0 => n1 * n2
                   1310:  * dbd > 0  => n1 * n2 mod x^dbd
                   1311:  * n1 == n2 => squaring
                   1312:  */
                   1313:
                   1314: void fft_mulup_main(UP n1,UP n2,int dbd,UP *nr)
                   1315: {
                   1316:   ModNum *f1,*f2,*w,*fr;
                   1317:   ModNum **frarray,**fa;
                   1318:   int *modarray,*ma;
                   1319:   int modarray_len;
                   1320:   int frarray_index = 0;
                   1321:   Z m,m1,m2,two,rem;
                   1322:   int d1,d2,dmin,i,mod,root,d,cond,bound;
                   1323:   UP r;
                   1324:
                   1325:   if ( !n1 || !n2 ) {
                   1326:     *nr = 0; return;
                   1327:   }
                   1328:   d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
                   1329:   if ( !d1 || !d2 ) {
                   1330:     mulup(n1,n2,nr); return;
                   1331:   }
                   1332:   m = ONE;
                   1333:   bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
                   1334:   f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1335:   if ( n1 == n2 )
                   1336:     f2 = 0;
                   1337:   else
                   1338:     f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1339:   w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
                   1340:   modarray_len = 1024;
                   1341:   modarray = (int *)MALLOC_ATOMIC(modarray_len*sizeof(unsigned int));
                   1342:   frarray = (ModNum **)MALLOC(modarray_len*sizeof(ModNum *));
                   1343:   for ( i = 0; i < NPrimes; i++ ) {
                   1344:     FFT_primes(i,&mod,&root,&d);
                   1345:     if ( (1<<d) < d1+d2+1 )
                   1346:       continue;
                   1347:     if ( frarray_index == modarray_len ) {
                   1348:       ma = (int *)MALLOC_ATOMIC(2*modarray_len*sizeof(unsigned int));
                   1349:       bcopy(modarray,ma,modarray_len*sizeof(unsigned int));
                   1350:       modarray = ma;
                   1351:       fa = (ModNum **)MALLOC(2*modarray_len*sizeof(ModNum *));
                   1352:       bcopy(frarray,fa,modarray_len*sizeof(ModNum *));
                   1353:       frarray = fa;
                   1354:       modarray_len *= 2;
                   1355:     }
                   1356:     modarray[frarray_index] = mod;
                   1357:     frarray[frarray_index++] = fr
                   1358:       = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1359:     uptofmarray(mod,n1,f1);
                   1360:     if ( !f2 )
                   1361:       cond = FFT_pol_square(d1,f1,fr,i,w);
                   1362:     else {
                   1363:       uptofmarray(mod,n2,f2);
                   1364:       cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
                   1365:     }
                   1366:     if ( cond )
                   1367:       error("fft_mulup : error in FFT_pol_product");
1.2       noro     1368:     STOZ(mod,m1); mulz(m,m1,&m2); m = m2;
1.1       noro     1369:     if ( z_bits((Q)m) > bound ) {
                   1370:       if ( !dbd )
                   1371:         dbd = d1+d2+1;
                   1372:       crup(frarray,MIN(d1+d2,dbd-1),modarray,frarray_index,m,&r);
1.2       noro     1373:       STOZ(2,two);
1.1       noro     1374:       divqrz(m,two,&m2,&rem);
                   1375:       adj_coefup(r,m,m2,nr);
                   1376:       return;
                   1377:     }
                   1378:   }
                   1379:   error("fft_mulup : FFT_primes exhausted");
                   1380: }
                   1381:
                   1382: /* improved version */
                   1383:
                   1384: void crup(ModNum **f,int d,int *mod,int index,Z m,UP *r)
                   1385: {
                   1386:   mpz_t cof,c,rem;
                   1387:   mpz_t *s;
                   1388:   UP u;
1.3       noro     1389:   Z z;
1.1       noro     1390:   int inv,i,j,t;
                   1391:
                   1392:   mpz_init(c); mpz_init(cof); mpz_init(rem);
                   1393:   s = (mpz_t *)MALLOC((d+1)*sizeof(mpz_t));
                   1394:   for ( j = 0; j <= d; j++ )
                   1395:     mpz_init_set_ui(s[j],0);
                   1396:   for ( i = 0; i < index; i++ ) {
                   1397:     mpz_fdiv_q_ui(cof,BDY(m),mod[i]);
                   1398:     t = mpz_fdiv_r_ui(rem,cof,mod[i]);
                   1399:     inv = invm(t,mod[i]);
                   1400:     mpz_mul_ui(c,cof,inv);
                   1401:     /* s += c*f[i] */
                   1402:     for ( j = 0; j <= d; j++ )
                   1403:       if ( f[i][j] )
                   1404:         mpz_addmul_ui(s[j],c,f[i][j]);
                   1405:   }
                   1406:   for ( i = d; i >= 0; i-- )
                   1407:     if ( s[i] )
                   1408:       break;
                   1409:   if ( i < 0 )
                   1410:     *r = 0;
                   1411:   else {
                   1412:     u = UPALLOC(i);
                   1413:     DEG(u) = i;
1.3       noro     1414:     for ( j = 0; j <= i; j++ ) {
                   1415:       MPZTOZ(s[j],z); COEF(u)[j] = (Num)z;
                   1416:     }
1.1       noro     1417:     remcup(u,m,r);
                   1418:   }
                   1419: }
                   1420:
                   1421: /*
                   1422:  * dbd == 0 => n1 * n2
                   1423:  * dbd > 0  => n1 * n2 mod x^dbd
                   1424:  * n1 == n2 => squaring
                   1425:  * return: n1*n2 mod Primes[modind[0]]*.prime...*Primes[modind[nmod-1]].prime
                   1426:  */
                   1427:
                   1428: void fft_mulup_specialmod_main(UP n1,UP n2,int dbd,int *modind,int nmod,UP *nr)
                   1429: {
                   1430:   ModNum *f1,*f2,*w,*fr;
                   1431:   ModNum **frarray;
                   1432:   Z m,m1,m2;
                   1433:   int *modarray;
                   1434:   int d1,d2,dmin,i,root,d,cond,bound;
                   1435:
                   1436:   if ( !n1 || !n2 ) {
                   1437:     *nr = 0; return;
                   1438:   }
                   1439:   d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
                   1440:   if ( !d1 || !d2 ) {
                   1441:     mulup(n1,n2,nr); return;
                   1442:   }
                   1443:   m = ONE;
                   1444:   bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
                   1445:   f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1446:   if ( n1 == n2 )
                   1447:     f2 = 0;
                   1448:   else
                   1449:     f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1450:   w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
                   1451:   frarray = (ModNum **)MALLOC(nmod*sizeof(ModNum *));
                   1452:   modarray = (int *)MALLOC_ATOMIC(nmod*sizeof(unsigned int *));
                   1453:
                   1454:   for ( i = 0; i < nmod; i++ ) {
                   1455:     FFT_primes(modind[i],&modarray[i],&root,&d);
                   1456:       if ( (1<<d) < d1+d2+1 )
                   1457:         error("fft_mulup_specialmod_main : invalid modulus");
                   1458:     frarray[i] = fr
                   1459:       = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1460:     uptofmarray(modarray[i],n1,f1);
                   1461:     if ( !f2 )
                   1462:       cond = FFT_pol_square(d1,f1,fr,modind[i],w);
                   1463:     else {
                   1464:       uptofmarray(modarray[i],n2,f2);
                   1465:       cond = FFT_pol_product(d1,f1,d2,f2,fr,modind[i],w);
                   1466:     }
                   1467:     if ( cond )
                   1468:       error("fft_mulup_specialmod_main : error in FFT_pol_product");
1.2       noro     1469:     STOZ(modarray[i],m1); mulz(m,m1,&m2); m = m2;
1.1       noro     1470:   }
                   1471:   if ( !dbd )
                   1472:     dbd = d1+d2+1;
                   1473:   crup(frarray,MIN(d1+d2,dbd-1),modarray,nmod,m,nr);
                   1474: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>