[BACK]Return to up.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib2 / asir2000 / engine

Annotation of OpenXM_contrib2/asir2000/engine/up.c, Revision 1.7

1.3       noro        1: /*
                      2:  * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
                      3:  * All rights reserved.
                      4:  *
                      5:  * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
                      6:  * non-exclusive and royalty-free license to use, copy, modify and
                      7:  * redistribute, solely for non-commercial and non-profit purposes, the
                      8:  * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
                      9:  * conditions of this Agreement. For the avoidance of doubt, you acquire
                     10:  * only a limited right to use the SOFTWARE hereunder, and FLL or any
                     11:  * third party developer retains all rights, including but not limited to
                     12:  * copyrights, in and to the SOFTWARE.
                     13:  *
                     14:  * (1) FLL does not grant you a license in any way for commercial
                     15:  * purposes. You may use the SOFTWARE only for non-commercial and
                     16:  * non-profit purposes only, such as academic, research and internal
                     17:  * business use.
                     18:  * (2) The SOFTWARE is protected by the Copyright Law of Japan and
                     19:  * international copyright treaties. If you make copies of the SOFTWARE,
                     20:  * with or without modification, as permitted hereunder, you shall affix
                     21:  * to all such copies of the SOFTWARE the above copyright notice.
                     22:  * (3) An explicit reference to this SOFTWARE and its copyright owner
                     23:  * shall be made on your publication or presentation in any form of the
                     24:  * results obtained by use of the SOFTWARE.
                     25:  * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
1.4       noro       26:  * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
1.3       noro       27:  * for such modification or the source code of the modified part of the
                     28:  * SOFTWARE.
                     29:  *
                     30:  * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
                     31:  * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
                     32:  * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
                     33:  * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
                     34:  * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
                     35:  * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
                     36:  * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
                     37:  * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
                     38:  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
                     39:  * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
                     40:  * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
                     41:  * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
                     42:  * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
                     43:  * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
                     44:  * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
                     45:  * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
                     46:  * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
                     47:  *
1.7     ! ohara      48:  * $OpenXM: OpenXM_contrib2/asir2000/engine/up.c,v 1.6 2018/03/29 01:32:52 noro Exp $
1.3       noro       49: */
1.1       noro       50: #include "ca.h"
                     51: #include <math.h>
                     52:
1.7     ! ohara      53: extern struct oEGT eg_chrem,eg_fore,eg_back;
1.1       noro       54:
                     55: /*
                     56: int up_kara_mag=15;
                     57: int up_tkara_mag=15;
                     58: */
                     59: /*
                     60: int up_kara_mag=30;
                     61: int up_tkara_mag=20;
                     62: */
                     63: int up_kara_mag=20;
                     64: int up_tkara_mag=15;
                     65:
                     66: #if defined(sparc)
                     67: int up_fft_mag=50;
                     68: #else
                     69: int up_fft_mag=80;
                     70: #endif
                     71:
                     72: int up_rem_mag=1;
                     73: int debug_up;
                     74: int up_lazy;
                     75: extern int lm_lazy;
                     76: extern int current_ff;
                     77: extern int GC_dont_gc;
                     78:
1.5       noro       79: void monicup(UP a,UP *b)
1.1       noro       80: {
1.6       noro       81:   UP w;
1.1       noro       82:
1.6       noro       83:   if ( !a )
                     84:     *b = 0;
                     85:   else {
                     86:     w = W_UPALLOC(0); w->d = 0;
                     87:     divnum(0,(Num)ONE,(Num)a->c[a->d],(Num *)&w->c[0]);
                     88:     mulup(a,w,b);
                     89:   }
1.1       noro       90: }
                     91:
1.5       noro       92: void simpup(UP a,UP *b)
1.1       noro       93: {
1.6       noro       94:   int i,d;
                     95:   UP c;
1.1       noro       96:
1.6       noro       97:   if ( !a )
                     98:     *b = 0;
                     99:   else {
                    100:     d = a->d;
                    101:     c = UPALLOC(d);
                    102:
                    103:     for ( i = 0; i <= d; i++ )
                    104:       simpnum(a->c[i],&c->c[i]);
                    105:     for ( i = d; i >= 0 && !c->c[i]; i-- );
                    106:     if ( i < 0 )
                    107:       *b = 0;
                    108:     else {
                    109:       c->d = i;
                    110:       *b = c;
                    111:     }
                    112:   }
1.1       noro      113: }
                    114:
1.5       noro      115: void simpnum(Num a,Num *b)
1.1       noro      116: {
1.6       noro      117:   LM lm;
                    118:   GF2N gf;
                    119:   GFPN gfpn;
                    120:
                    121:   if ( !a )
                    122:     *b = 0;
                    123:   else
                    124:     switch ( NID(a) ) {
                    125:       case N_LM:
                    126:         simplm((LM)a,&lm); *b = (Num)lm; break;
                    127:       case N_GF2N:
                    128:         simpgf2n((GF2N)a,&gf); *b = (Num)gf; break;
                    129:       case N_GFPN:
                    130:         simpgfpn((GFPN)a,&gfpn); *b = (Num)gfpn; break;
                    131:       default:
                    132:         *b = a; break;
                    133:     }
1.1       noro      134: }
                    135:
1.5       noro      136: void uremp(P p1,P p2,P *rp)
1.1       noro      137: {
1.6       noro      138:   UP n1,n2,r;
1.1       noro      139:
1.6       noro      140:   if ( !p1 || NUM(p1) )
                    141:     *rp = p1;
                    142:   else {
                    143:     ptoup(p1,&n1); ptoup(p2,&n2);
                    144:     remup(n1,n2,&r);
                    145:     uptop(r,rp);
                    146:   }
1.1       noro      147: }
                    148:
1.5       noro      149: void ugcdp(P p1,P p2,P *rp)
1.1       noro      150: {
1.6       noro      151:   UP n1,n2,r;
1.1       noro      152:
1.6       noro      153:   ptoup(p1,&n1); ptoup(p2,&n2);
                    154:   gcdup(n1,n2,&r);
                    155:   uptop(r,rp);
1.1       noro      156: }
                    157:
1.5       noro      158: void reversep(P p1,Q d,P *rp)
1.1       noro      159: {
1.6       noro      160:   UP n1,r;
1.1       noro      161:
1.6       noro      162:   if ( !p1 )
                    163:     *rp = 0;
                    164:   else {
                    165:     ptoup(p1,&n1);
                    166:     reverseup(n1,QTOS(d),&r);
                    167:     uptop(r,rp);
                    168:   }
1.1       noro      169: }
                    170:
1.5       noro      171: void invmodp(P p1,Q d,P *rp)
1.1       noro      172: {
1.6       noro      173:   UP n1,r;
1.1       noro      174:
1.6       noro      175:   if ( !p1 )
                    176:     error("invmodp : invalid input");
                    177:   else {
                    178:     ptoup(p1,&n1);
                    179:     invmodup(n1,QTOS(d),&r);
                    180:     uptop(r,rp);
                    181:   }
1.1       noro      182: }
                    183:
1.5       noro      184: void addup(UP n1,UP n2,UP *nr)
1.1       noro      185: {
1.6       noro      186:   UP r,t;
                    187:   int i,d1,d2;
                    188:   Num *c,*c1,*c2;
                    189:
                    190:   if ( !n1 ) {
                    191:     *nr = n2;
                    192:   } else if ( !n2 ) {
                    193:     *nr = n1;
                    194:   } else {
                    195:     if ( n2->d > n1->d ) {
                    196:       t = n1; n1 = n2; n2 = t;
                    197:     }
                    198:     d1 = n1->d; d2 = n2->d;
                    199:     *nr = r = UPALLOC(d1);
                    200:     c1 = n1->c; c2 = n2->c; c = r->c;
                    201:     for ( i = 0; i <= d2; i++ )
                    202:       addnum(0,*c1++,*c2++,c++);
                    203:     for ( ; i <= d1; i++ )
                    204:       *c++ = *c1++;
                    205:     for ( i = d1, c = r->c; i >=0 && !c[i]; i-- );
                    206:     if ( i < 0 )
                    207:       *nr = 0;
                    208:     else
                    209:       r->d = i;
                    210:   }
1.1       noro      211: }
                    212:
1.5       noro      213: void subup(UP n1,UP n2,UP *nr)
1.1       noro      214: {
1.6       noro      215:   UP r;
                    216:   int i,d1,d2,d;
                    217:   Num *c,*c1,*c2;
                    218:
                    219:   if ( !n1 ) {
                    220:     chsgnup(n2,nr);
                    221:   } else if ( !n2 ) {
                    222:     *nr = n1;
                    223:   } else {
                    224:     d1 = n1->d; d2 = n2->d; d = MAX(d1,d2);
                    225:     *nr = r = UPALLOC(d);
                    226:     c1 = n1->c; c2 = n2->c; c = r->c;
                    227:     if ( d1 >= d2 ) {
                    228:       for ( i = 0; i <= d2; i++ )
                    229:         subnum(0,*c1++,*c2++,c++);
                    230:       for ( ; i <= d1; i++ )
                    231:         *c++ = *c1++;
                    232:     } else {
                    233:       for ( i = 0; i <= d1; i++ )
                    234:         subnum(0,*c1++,*c2++,c++);
                    235:       for ( ; i <= d2; i++ )
                    236:         chsgnnum(*c2++,c++);
                    237:     }
                    238:     for ( i = d, c = r->c; i >=0 && !c[i]; i-- );
                    239:     if ( i < 0 )
                    240:       *nr = 0;
                    241:     else
                    242:       r->d = i;
                    243:   }
1.1       noro      244: }
                    245:
1.5       noro      246: void chsgnup(UP n1,UP *nr)
1.1       noro      247: {
1.6       noro      248:   UP r;
                    249:   int d1,i;
                    250:   Num *c1,*c;
                    251:
                    252:   if ( !n1 ) {
                    253:     *nr = 0;
                    254:   } else {
                    255:     d1 = n1->d;
                    256:     *nr = r = UPALLOC(d1); r->d = d1;
                    257:     c1 = n1->c; c = r->c;
                    258:     for ( i = 0; i <= d1; i++ )
                    259:       chsgnnum(*c1++,c++);
                    260:   }
1.1       noro      261: }
                    262:
1.5       noro      263: void hybrid_mulup(int ff,UP n1,UP n2,UP *nr)
1.1       noro      264: {
1.6       noro      265:   if ( !n1 || !n2 )
                    266:     *nr = 0;
                    267:   else if ( MAX(n1->d,n2->d) < up_fft_mag )
                    268:     kmulup(n1,n2,nr);
                    269:   else
                    270:     switch ( ff ) {
                    271:       case FF_GFP:
                    272:         fft_mulup_lm(n1,n2,nr); break;
                    273:       case FF_GF2N:
                    274:         kmulup(n1,n2,nr); break;
                    275:       default:
                    276:         fft_mulup(n1,n2,nr); break;
                    277:     }
1.1       noro      278: }
                    279:
1.5       noro      280: void hybrid_squareup(int ff,UP n1,UP *nr)
1.1       noro      281: {
1.6       noro      282:   if ( !n1 )
                    283:     *nr = 0;
                    284:   else if ( n1->d < up_fft_mag )
                    285:     ksquareup(n1,nr);
                    286:   else
                    287:     switch ( ff ) {
                    288:       case FF_GFP:
                    289:         fft_squareup_lm(n1,nr); break;
                    290:       case FF_GF2N:
                    291:         ksquareup(n1,nr); break;
                    292:       default:
                    293:         fft_squareup(n1,nr); break;
                    294:     }
1.1       noro      295: }
                    296:
1.5       noro      297: void hybrid_tmulup(int ff,UP n1,UP n2,int d,UP *nr)
1.1       noro      298: {
1.6       noro      299:   if ( !n1 || !n2 )
                    300:     *nr = 0;
                    301:   else if ( MAX(n1->d,n2->d) < up_fft_mag )
                    302:     tkmulup(n1,n2,d,nr);
                    303:   else
                    304:     switch ( ff ) {
                    305:       case FF_GFP:
                    306:         trunc_fft_mulup_lm(n1,n2,d,nr); break;
                    307:       case FF_GF2N:
                    308:         tkmulup(n1,n2,d,nr); break;
                    309:       default:
                    310:         trunc_fft_mulup(n1,n2,d,nr); break;
                    311:     }
1.1       noro      312: }
                    313:
1.5       noro      314: void mulup(UP n1,UP n2,UP *nr)
1.1       noro      315: {
1.6       noro      316:   UP r;
                    317:   Num *pc1,*pc,*c1,*c2,*c;
                    318:   Num mul,t,s;
                    319:   int i,j,d1,d2;
                    320:
                    321:   if ( !n1 || !n2 )
                    322:     *nr = 0;
                    323:   else {
                    324:     d1 = n1->d; d2 = n2->d;
                    325:     *nr = r = UPALLOC(d1+d2); r->d = d1+d2;
                    326:     c1 = n1->c; c2 = n2->c; c = r->c;
                    327:     lm_lazy = 1;
                    328:     for ( i = 0; i <= d2; i++, c++ )
                    329:       if ( mul = *c2++ )
                    330:         for ( j = 0, pc1 = c1, pc = c; j <= d1; j++ ) {
                    331:           mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
                    332:       }
                    333:     lm_lazy = 0;
                    334:     if ( !up_lazy )
                    335:       for ( i = 0, c = r->c; i <= r->d; i++ ) {
                    336:         simpnum(c[i],&t); c[i] = t;
                    337:       }
                    338:   }
1.1       noro      339: }
                    340:
                    341: /* nr = c*n1 */
                    342:
1.5       noro      343: void mulcup(Num c,UP n1,UP *nr)
1.1       noro      344: {
1.6       noro      345:   int d;
                    346:   UP r;
                    347:   Num t;
                    348:   Num *c1,*cr;
                    349:   int i;
                    350:
                    351:   if ( !c || !n1 )
                    352:     *nr = 0;
                    353:   else {
                    354:     d = n1->d;
                    355:     *nr = r = UPALLOC(d); r->d = d;
                    356:     c1 = n1->c; cr = r->c;
                    357:     lm_lazy = 1;
                    358:     for ( i = 0; i <= d; i++ )
                    359:       mulnum(0,c1[i],c,&cr[i]);
                    360:     lm_lazy = 0;
                    361:     if ( !up_lazy )
                    362:       for ( i = 0; i <= d; i++ ) {
                    363:         simpnum(cr[i],&t); cr[i] = t;
                    364:       }
                    365:   }
1.1       noro      366: }
                    367:
1.5       noro      368: void tmulup(UP n1,UP n2,int d,UP *nr)
1.1       noro      369: {
1.6       noro      370:   UP r;
                    371:   Num *pc1,*pc,*c1,*c2,*c;
                    372:   Num mul,t,s;
                    373:   int i,j,d1,d2,dr;
                    374:
                    375:   if ( !n1 || !n2 )
                    376:     *nr = 0;
                    377:   else {
                    378:     d1 = n1->d; d2 = n2->d;
                    379:     dr = MAX(d-1,d1+d2);
                    380:     *nr = r = UPALLOC(dr);
                    381:     c1 = n1->c; c2 = n2->c; c = r->c;
                    382:     lm_lazy = 1;
                    383:     for ( i = 0; i <= d2; i++, c++ )
                    384:       if ( mul = *c2++ )
                    385:         for ( j = 0, pc1 = c1, pc = c; (j <= d1) && (i+j < d); j++ ) {
                    386:           mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
                    387:       }
                    388:     lm_lazy = 0;
                    389:     c = r->c;
                    390:     if ( !up_lazy )
                    391:       for ( i = 0; i < d; i++ ) {
                    392:         simpnum(c[i],&t); c[i] = t;
                    393:       }
                    394:     for ( i = d-1; i >= 0 && !c[i]; i-- );
                    395:     if ( i < 0 )
                    396:       *nr = 0;
                    397:     else
                    398:       r->d = i;
                    399:   }
1.1       noro      400: }
                    401:
1.5       noro      402: void squareup(UP n1,UP *nr)
1.1       noro      403: {
1.6       noro      404:   UP r;
                    405:   Num *c1,*c;
                    406:   Num t,s,u;
                    407:   int i,j,h,d1,d;
                    408:
                    409:   if ( !n1 )
                    410:     *nr = 0;
                    411:   else if ( !n1->d ) {
                    412:     *nr = r = UPALLOC(0); r->d = 0;
                    413:     mulnum(0,n1->c[0],n1->c[0],&r->c[0]);
                    414:   } else {
                    415:     d1 = n1->d;
                    416:     d = 2*d1;
                    417:     *nr = r = UPALLOC(2*d); r->d = d;
                    418:     c1 = n1->c; c = r->c;
                    419:     lm_lazy = 1;
                    420:     for ( i = 0; i <= d1; i++ )
                    421:       mulnum(0,c1[i],c1[i],&c[2*i]);
                    422:     for ( j = 1; j < d; j++ ) {
                    423:       h = (j-1)/2;
                    424:       for ( i = MAX(j-d1,0), s = 0; i <= h; i++ ) {
                    425:         mulnum(0,c1[i],c1[j-i],&t); addnum(0,t,s,&u); s = u;
                    426:       }
                    427:       addnum(0,s,s,&t); addnum(0,t,c[j],&u); c[j] = u;
                    428:     }
                    429:     lm_lazy = 0;
                    430:     if ( !up_lazy )
                    431:       for ( i = 0, c = r->c; i <= d; i++ ) {
                    432:         simpnum(c[i],&t); c[i] = t;
                    433:       }
                    434:   }
1.1       noro      435: }
                    436:
1.5       noro      437: void remup(UP n1,UP n2,UP *nr)
1.1       noro      438: {
1.6       noro      439:   UP w,r;
1.1       noro      440:
1.6       noro      441:   if ( !n2 )
                    442:     error("remup : division by 0");
                    443:   else if ( !n1 || !n2->d )
                    444:     *nr = 0;
                    445:   else if ( n1->d < n2->d )
                    446:     *nr = n1;
                    447:   else {
                    448:     w = W_UPALLOC(n1->d); copyup(n1,w);
                    449:     remup_destructive(w,n2);
                    450:     if ( w->d < 0 )
                    451:       *nr = 0;
                    452:     else {
                    453:       *nr = r = UPALLOC(w->d); copyup(w,r);
                    454:     }
                    455:   }
1.1       noro      456: }
                    457:
1.5       noro      458: void remup_destructive(UP n1,UP n2)
1.1       noro      459: {
1.6       noro      460:   Num *c1,*c2;
                    461:   int d1,d2,i,j;
                    462:   Num m,t,s,mhc;
                    463:
                    464:   c1 = n1->c; c2 = n2->c;
                    465:   d1 = n1->d; d2 = n2->d;
                    466:   chsgnnum(c2[d2],&mhc);
                    467:   for ( i = d1; i >= d2; i-- ) {
                    468:     simpnum(c1[i],&t); c1[i] = t;
                    469:     if ( !c1[i] )
                    470:       continue;
                    471:     divnum(0,c1[i],mhc,&m);
                    472:     lm_lazy = 1;
                    473:     for ( j = d2; j >= 0; j-- ) {
                    474:       mulnum(0,c2[j],m,&t); addnum(0,c1[i+j-d2],t,&s);
                    475:       c1[i+j-d2] = s;
                    476:     }
                    477:     lm_lazy = 0;
                    478:   }
                    479:   for ( i = 0; i < d2; i++ ) {
                    480:     simpnum(c1[i],&t); c1[i] = t;
                    481:   }
                    482:   for ( i = d2-1; i >= 0 && !c1[i]; i-- );
                    483:   n1->d = i;
1.1       noro      484: }
                    485:
1.5       noro      486: void qrup(UP n1,UP n2,UP *nq,UP *nr)
1.1       noro      487: {
1.6       noro      488:   UP w,r,q;
                    489:   struct oUP t;
                    490:   Num m;
                    491:   int d;
                    492:
                    493:   if ( !n2 )
                    494:     error("qrup : division by 0");
                    495:   else if ( !n1 ) {
                    496:     *nq = 0; *nr = 0;
                    497:   } else if ( !n2->d )
                    498:     if ( !n1->d ) {
                    499:       divnum(0,n1->c[0],n2->c[0],(Num *)nq); *nr = 0;
                    500:     } else {
                    501:       divnum(0,(Num)ONE,n2->c[0],&m);
                    502:       t.d = 0; t.c[0] = m;
                    503:       mulup(n1,&t,nq); *nr = 0;
                    504:     }
                    505:   else if ( n1->d < n2->d ) {
                    506:     *nq = 0; *nr = n1;
                    507:   } else {
                    508:     w = W_UPALLOC(n1->d); copyup(n1,w);
                    509:     qrup_destructive(w,n2);
                    510:     d = n1->d-n2->d;
                    511:     *nq = q = UPALLOC(d); q->d = d;
                    512:     bcopy(w->c+n2->d,q->c,(d+1)*sizeof(Num));
                    513:     if ( w->d < 0 )
                    514:       *nr = 0;
                    515:     else {
                    516:       *nr = r = UPALLOC(w->d); copyup(w,r);
                    517:     }
                    518:   }
1.1       noro      519: }
                    520:
1.5       noro      521: void qrup_destructive(UP n1,UP n2)
1.1       noro      522: {
1.6       noro      523:   Num *c1,*c2;
                    524:   int d1,d2,i,j;
                    525:   Num q,t,s,hc;
                    526:
                    527:   c1 = n1->c; c2 = n2->c;
                    528:   d1 = n1->d; d2 = n2->d;
                    529:   hc = c2[d2];
                    530:   for ( i = d1; i >= d2; i-- ) {
                    531:     simpnum(c1[i],&t); c1[i] = t;
                    532:     if ( c1[i] ) {
                    533:       divnum(0,c1[i],hc,&q);
                    534:       lm_lazy = 1;
                    535:       for ( j = d2; j >= 0; j-- ) {
                    536:         mulnum(0,c2[j],q,&t); subnum(0,c1[i+j-d2],t,&s);
                    537:         c1[i+j-d2] = s;
                    538:       }
                    539:       lm_lazy = 0;
                    540:     } else
                    541:       q = 0;
                    542:     c1[i] = q;
                    543:   }
                    544:   for ( i = 0; i < d2; i++ ) {
                    545:     simpnum(c1[i],&t); c1[i] = t;
                    546:   }
                    547:   for ( i = d2-1; i >= 0 && !c1[i]; i-- );
                    548:   n1->d = i;
1.1       noro      549: }
                    550:
1.5       noro      551: void gcdup(UP n1,UP n2,UP *nr)
1.1       noro      552: {
1.6       noro      553:   UP w1,w2,t,r;
                    554:   int d1,d2;
1.1       noro      555:
1.6       noro      556:   if ( !n1 )
                    557:     *nr = n2;
                    558:   else if ( !n2 )
                    559:     *nr = n1;
                    560:   else if ( !n1->d || !n2->d ) {
                    561:     *nr = r = UPALLOC(0); r->d = 0;
                    562:     divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]);
                    563:   } else {
                    564:     d1 = n1->d; d2 = n2->d;
                    565:     if ( d2 > d1 ) {
                    566:       w1 = W_UPALLOC(d2); copyup(n2,w1);
                    567:       w2 = W_UPALLOC(d1); copyup(n1,w2);
                    568:     } else {
                    569:       w1 = W_UPALLOC(d1); copyup(n1,w1);
                    570:       w2 = W_UPALLOC(d2); copyup(n2,w2);
                    571:     }
                    572:     d1 = w1->d; d2 = w2->d;
                    573:     while ( 1 ) {
                    574:       remup_destructive(w1,w2);
                    575:       if ( w1->d < 0 ) {
                    576:         *nr = r = UPALLOC(w2->d); copyup(w2,r); return;
                    577:       } else if ( !w1->d ) {
                    578:         *nr = r = UPALLOC(0); r->d = 0;
                    579:         divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]); return;
                    580:       } else {
                    581:         t = w1; w1 = w2; w2 = t;
                    582:       }
                    583:     }
                    584:   }
1.1       noro      585: }
                    586:
                    587: /* compute r s.t. a*r = 1 mod m */
                    588:
1.5       noro      589: void extended_gcdup(UP a,UP m,UP *r)
1.1       noro      590: {
1.6       noro      591:   UP one,g1,g2,a1,a2,a3,b1,b2,b3,inv,quo,rem,t;
                    592:   Num i;
1.1       noro      593:
1.6       noro      594:   one = UPALLOC(0); one->d = 0; one->c[0] = (Num)ONELM;
                    595:   g1 = m; g2 = a;
                    596:   a1 = one; a2 = 0;
                    597:   b1 = 0; b2 = one;
                    598:   /* a2*m+b2*a = g2 always holds */
                    599:   while ( g2->d ) {
                    600:     qrup(g1,g2,&quo,&rem); g1 = g2; g2 = rem;
                    601:     mulup(a2,quo,&t); subup(a1,t,&a3); a1 = a2; a2 = a3;
                    602:     mulup(b2,quo,&t); subup(b1,t,&b3); b1 = b2; b2 = b3;
                    603:   }
                    604:   /* now a2*m+b2*a = g2 (const) */
                    605:   divnum(0,(Num)ONE,g2->c[0],&i);
                    606:   inv = UPALLOC(0); inv->d = 0; inv->c[0] = i;
                    607:   mulup(b2,inv,r);
1.1       noro      608: }
                    609:
1.5       noro      610: void reverseup(UP n1,int d,UP *nr)
1.1       noro      611: {
1.6       noro      612:   Num *c1,*c;
                    613:   int i,d1;
                    614:   UP r;
                    615:
                    616:   if ( !n1 )
                    617:     *nr = 0;
                    618:   else if ( d < (d1 = n1->d) )
                    619:     error("reverseup : invalid input");
                    620:   else {
                    621:     *nr = r = UPALLOC(d);
                    622:     c = r->c;
                    623:     bzero((char *)c,(d+1)*sizeof(Num));
                    624:     c1 = n1->c;
                    625:     for ( i = 0; i <= d1; i++ )
                    626:       c[d-i] = c1[i];
                    627:     for ( i = d; i >= 0 && !c[i]; i-- );
                    628:     r->d = i;
                    629:     if ( i < 0 )
                    630:       *nr = 0;
                    631:   }
1.1       noro      632: }
                    633:
1.5       noro      634: void invmodup(UP n1,int d,UP *nr)
1.1       noro      635: {
1.6       noro      636:   UP r;
                    637:   Num s,t,u,hinv;
                    638:   int i,j,dmin;
                    639:   Num *w,*c,*cr;
                    640:
                    641:   if ( !n1 || !n1->c[0] )
                    642:     error("invmodup : invalid input");
                    643:   else if ( !n1->d ) {
                    644:     *nr = r = UPALLOC(0); r->d = 0;
                    645:     divnum(0,(Num)ONE,n1->c[0],&r->c[0]);
                    646:   } else {
                    647:     *nr = r = UPALLOC(d);
                    648:     w = (Num *)ALLOCA((d+1)*sizeof(Q));
                    649:     bzero((char *)w,(d+1)*sizeof(Q));
                    650:     dmin = MIN(d,n1->d);
                    651:     divnum(0,(Num)ONE,n1->c[0],&hinv);
                    652:     for ( i = 0, c = n1->c; i <= dmin; i++ )
                    653:       mulnum(0,c[i],hinv,&w[i]);
                    654:     cr = r->c;
                    655:     cr[0] = w[0];
                    656:     for ( i = 1; i <= d; i++ ) {
                    657:       for ( j = 1, s = w[i]; j < i; j++ ) {
                    658:         mulnum(0,cr[j],w[i-j],&t);
                    659:         addnum(0,s,t,&u);
                    660:         s = u;
                    661:       }
                    662:       chsgnnum(s,&cr[i]);
                    663:     }
                    664:     for ( i = 0; i <= d; i++ ) {
                    665:       mulnum(0,cr[i],hinv,&t); cr[i] = t;
                    666:     }
                    667:     for ( i = d; i >= 0 && !cr[i]; i-- );
                    668:     r->d = i;
                    669:   }
1.1       noro      670: }
                    671:
1.5       noro      672: void pwrup(UP n,Q e,UP *nr)
1.1       noro      673: {
1.6       noro      674:   UP y,z,t;
                    675:   N en,qn;
                    676:   int r;
                    677:
                    678:   y = UPALLOC(0); y->d = 0; y->c[0] = (Num)ONE;
                    679:   z = n;
                    680:   if ( !e )
                    681:     *nr = y;
                    682:   else if ( UNIQ(e) )
                    683:     *nr = n;
                    684:   else {
                    685:     en = NM(e);
                    686:     while ( 1 ) {
                    687:       r = divin(en,2,&qn); en = qn;
                    688:       if ( r ) {
                    689:         mulup(z,y,&t); y = t;
                    690:         if ( !en ) {
                    691:           *nr = y;
                    692:           return;
                    693:         }
                    694:       }
                    695:       mulup(z,z,&t); z = t;
                    696:     }
                    697:   }
1.1       noro      698: }
                    699:
1.5       noro      700: int compup(UP n1,UP n2)
1.1       noro      701: {
1.6       noro      702:   int i,r;
1.1       noro      703:
1.6       noro      704:   if ( !n1 )
                    705:     return n2 ? -1 : 0;
                    706:   else if ( !n2 )
                    707:     return 1;
                    708:   else if ( n1->d > n2->d )
                    709:     return 1;
                    710:   else if ( n1->d < n2->d )
                    711:     return -1;
                    712:   else {
                    713:     for ( i = n1->d; i >= 0; i-- ) {
                    714:       r = compnum(CO,n1->c[i],n2->c[i]);
                    715:       if ( r )
                    716:         return r;
                    717:     }
                    718:     return 0;
                    719:   }
1.1       noro      720: }
                    721:
1.5       noro      722: void kmulp(VL vl,P n1,P n2,P *nr)
1.1       noro      723: {
1.6       noro      724:   UP b1,b2,br;
1.1       noro      725:
1.6       noro      726:   if ( !n1 || !n2 )
                    727:     *nr = 0;
                    728:   else if ( OID(n1) == O_N || OID(n2) == O_N )
                    729:     mulp(vl,n1,n2,nr);
                    730:   else {
                    731:     ptoup(n1,&b1); ptoup(n2,&b2);
                    732:     kmulup(b1,b2,&br);
                    733:     uptop(br,nr);
                    734:   }
1.1       noro      735: }
                    736:
1.5       noro      737: void ksquarep(VL vl,P n1,P *nr)
1.1       noro      738: {
1.6       noro      739:   UP b1,br;
1.1       noro      740:
1.6       noro      741:   if ( !n1 )
                    742:     *nr = 0;
                    743:   else if ( OID(n1) == O_N )
                    744:     mulp(vl,n1,n1,nr);
                    745:   else {
                    746:     ptoup(n1,&b1);
                    747:     ksquareup(b1,&br);
                    748:     uptop(br,nr);
                    749:   }
1.1       noro      750: }
                    751:
1.5       noro      752: void kmulup(UP n1,UP n2,UP *nr)
1.1       noro      753: {
1.6       noro      754:   int d1,d2;
1.1       noro      755:
1.6       noro      756:   if ( !n1 || !n2 ) {
                    757:     *nr = 0; return;
                    758:   }
                    759:   d1 = DEG(n1)+1; d2 = DEG(n2)+1;
                    760:   if ( (d1 < up_kara_mag) && (d2 < up_kara_mag) )
                    761:     mulup(n1,n2,nr);
                    762:   else
                    763:     kmulupmain(n1,n2,nr);
1.1       noro      764: }
                    765:
1.5       noro      766: void ksquareup(UP n1,UP *nr)
1.1       noro      767: {
1.6       noro      768:   int d1;
                    769:   extern Q TWO;
1.1       noro      770:
1.6       noro      771:   if ( !n1 ) {
                    772:     *nr = 0; return;
                    773:   }
                    774:   d1 = DEG(n1)+1;
                    775:   if ( (d1 < up_kara_mag) )
                    776:     squareup(n1,nr);
                    777:   else
                    778:     ksquareupmain(n1,nr);
1.1       noro      779: }
                    780:
1.5       noro      781: void copyup(UP n1,UP n2)
1.1       noro      782: {
1.6       noro      783:   n2->d = n1->d;
                    784:   bcopy((char *)n1->c,(char *)n2->c,(n1->d+1)*sizeof(Q));
1.1       noro      785: }
                    786:
1.5       noro      787: void c_copyup(UP n,int len,pointer *p)
1.1       noro      788: {
1.6       noro      789:   if ( n )
                    790:     bcopy((char *)COEF(n),(char *)p,MIN((DEG(n)+1),len)*sizeof(pointer));
1.1       noro      791: }
                    792:
1.5       noro      793: void kmulupmain(UP n1,UP n2,UP *nr)
1.1       noro      794: {
1.6       noro      795:   int d1,d2,h;
                    796:   UP n1lo,n1hi,n2lo,n2hi,hi,lo,mid1,mid2,mid,s1,s2,t1,t2;
1.1       noro      797:
1.6       noro      798:   d1 = DEG(n1)+1; d2 = DEG(n2)+1; h = (MAX(d1,d2)+1)/2;
                    799:   decompup(n1,h,&n1lo,&n1hi);
                    800:   decompup(n2,h,&n2lo,&n2hi);
                    801:   kmulup(n1lo,n2lo,&lo);
                    802:   kmulup(n1hi,n2hi,&hi);
                    803:   shiftup(hi,2*h,&t2);
                    804:   addup(lo,t2,&t1);
                    805:   addup(hi,lo,&mid1);
                    806:   subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2); kmulup(s1,s2,&mid2);
                    807:   addup(mid1,mid2,&mid);
                    808:   shiftup(mid,h,&t2);
                    809:   addup(t1,t2,nr);
1.1       noro      810: }
                    811:
1.5       noro      812: void ksquareupmain(UP n1,UP *nr)
1.1       noro      813: {
1.6       noro      814:   int d1,h;
                    815:   UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
1.1       noro      816:
1.6       noro      817:   d1 = DEG(n1)+1; h = (d1+1)/2;
                    818:   decompup(n1,h,&n1lo,&n1hi);
                    819:   ksquareup(n1hi,&hi); ksquareup(n1lo,&lo);
                    820:   shiftup(hi,2*h,&t2);
                    821:   addup(lo,t2,&t1);
                    822:   addup(hi,lo,&mid1);
                    823:   subup(n1hi,n1lo,&s1); ksquareup(s1,&mid2);
                    824:   subup(mid1,mid2,&mid);
                    825:   shiftup(mid,h,&t2);
                    826:   addup(t1,t2,nr);
1.1       noro      827: }
                    828:
1.5       noro      829: void rembymulup(UP n1,UP n2,UP *nr)
1.1       noro      830: {
1.6       noro      831:   int d1,d2,d;
                    832:   UP r1,r2,inv2,t,s,q;
1.1       noro      833:
1.6       noro      834:   if ( !n2 )
                    835:     error("rembymulup : division by 0");
                    836:   else if ( !n1 || !n2->d )
                    837:     *nr = 0;
                    838:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    839:     *nr = n1;
                    840:   else {
                    841:     d = d1-d2;
                    842:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    843:     reverseup(n2,d2,&r2);
                    844:     invmodup(r2,d,&inv2);
                    845:     kmulup(r1,inv2,&t);
                    846:     truncup(t,d+1,&s);
                    847:     reverseup(s,d,&q);
                    848:     kmulup(q,n2,&t); subup(n1,t,nr);
                    849:   }
1.1       noro      850: }
                    851:
                    852: /*
1.6       noro      853:   deg n2 = d
                    854:   deg n1 <= 2*d-1
                    855:   inv2 = inverse of reversep(n2) mod x^d
1.1       noro      856: */
                    857:
1.5       noro      858: void hybrid_rembymulup_special(int ff,UP n1,UP n2,UP inv2,UP *nr)
1.1       noro      859: {
1.6       noro      860:   int d1,d2,d;
                    861:   UP r1,t,s,q;
1.1       noro      862:
1.6       noro      863:   if ( !n2 )
                    864:     error("hybrid_rembymulup : division by 0");
                    865:   else if ( !n1 || !n2->d )
                    866:     *nr = 0;
                    867:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    868:     *nr = n1;
                    869:   else {
                    870:     d = d1-d2;
                    871:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    872:     hybrid_tmulup(ff,r1,inv2,d+1,&t);
                    873:     truncup(t,d+1,&s);
                    874:     reverseup(s,d,&q);
                    875:
                    876:     hybrid_tmulup(ff,q,n2,d2,&t);
                    877:     truncup(n1,d2,&s);
                    878:     subup(s,t,nr);
                    879:   }
1.1       noro      880: }
                    881:
1.5       noro      882: void rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
1.1       noro      883: {
1.6       noro      884:   int d1,d2,d;
                    885:   UP r1,t,s,q;
1.1       noro      886:
1.6       noro      887:   if ( !n2 )
                    888:     error("rembymulup : division by 0");
                    889:   else if ( !n1 || !n2->d )
                    890:     *nr = 0;
                    891:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    892:     *nr = n1;
                    893:   else {
                    894:     d = d1-d2;
                    895:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    896:     tkmulup(r1,inv2,d+1,&t);
                    897:     truncup(t,d+1,&s);
                    898:     reverseup(s,d,&q);
                    899:
                    900:     tkmulup(q,n2,d2,&t);
                    901:     truncup(n1,d2,&s);
                    902:     subup(s,t,nr);
                    903:   }
1.1       noro      904: }
                    905:
                    906: /* *nr = n1*n2 mod x^d */
                    907:
1.5       noro      908: void tkmulup(UP n1,UP n2,int d,UP *nr)
1.1       noro      909: {
1.6       noro      910:   int m;
                    911:   UP n1l,n1h,n2l,n2h,l,h,t,s,u;
1.1       noro      912:
1.6       noro      913:   if ( d < 0 )
                    914:     error("tkmulup : invalid argument");
                    915:   else if ( d == 0 )
                    916:     *nr = 0;
                    917:   else {
                    918:     truncup(n1,d,&t); n1 = t;
                    919:     truncup(n2,d,&t); n2 = t;
                    920:     if ( !n1 || !n2 )
                    921:       *nr = 0;
                    922:     else if ( n1->d < up_tkara_mag || n2->d < up_tkara_mag )
                    923:       tmulup(n1,n2,d,nr);
                    924:     else {
                    925:       m = (d+1)/2;
                    926:       decompup(n1,m,&n1l,&n1h);
                    927:       decompup(n2,m,&n2l,&n2h);
                    928:       kmulup(n1l,n2l,&l);
                    929:       tkmulup(n1l,n2h,d-m,&t);
                    930:       tkmulup(n2l,n1h,d-m,&s);
                    931:       addup(t,s,&u);
                    932:       shiftup(u,m,&h);
                    933:       addup(l,h,&t);
                    934:       truncup(t,d,nr);
                    935:     }
                    936:   }
1.1       noro      937: }
                    938:
                    939: /* n->n*x^d */
                    940:
1.5       noro      941: void shiftup(UP n,int d,UP *nr)
1.1       noro      942: {
1.6       noro      943:   int dr;
                    944:   UP r;
1.1       noro      945:
1.6       noro      946:   if ( !n )
                    947:     *nr = 0;
                    948:   else {
                    949:     dr = n->d+d;
                    950:     *nr = r = UPALLOC(dr); r->d = dr;
                    951:     bzero(r->c,(dr+1)*sizeof(Num));
                    952:     bcopy(n->c,r->c+d,(n->d+1)*sizeof(Num));
                    953:   }
1.1       noro      954: }
                    955:
1.5       noro      956: void fft_rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
1.1       noro      957: {
1.6       noro      958:   int d1,d2,d;
                    959:   UP r1,t,s,q,u;
1.1       noro      960:
1.6       noro      961:   if ( !n2 )
                    962:     error("rembymulup : division by 0");
                    963:   else if ( !n1 || !n2->d )
                    964:     *nr = 0;
                    965:   else if ( (d1 = n1->d) < (d2 = n2->d) )
                    966:     *nr = n1;
                    967:   else {
                    968:     d = d1-d2;
                    969:     reverseup(n1,d1,&t); truncup(t,d+1,&r1);
                    970:     trunc_fft_mulup(r1,inv2,d+1,&t);
                    971:     truncup(t,d+1,&s);
                    972:     reverseup(s,d,&q);
                    973:     trunc_fft_mulup(q,n2,d2,&t); truncup(t,d2,&u);
                    974:     truncup(n1,d2,&s);
                    975:     subup(s,u,nr);
                    976:   }
1.1       noro      977: }
                    978:
1.5       noro      979: void set_degreeup(UP n,int d)
1.1       noro      980: {
1.6       noro      981:   int i;
1.1       noro      982:
1.6       noro      983:   for ( i = d; i >= 0 && !n->c[i]; i-- );
                    984:   n->d = i;
1.1       noro      985: }
                    986:
                    987: /* n -> n0 + x^d n1 */
                    988:
1.5       noro      989: void decompup(UP n,int d,UP *n0,UP *n1)
1.1       noro      990: {
1.6       noro      991:   int dn;
                    992:   UP r0,r1;
1.1       noro      993:
1.6       noro      994:   if ( !n || d > n->d ) {
                    995:     *n0 = n; *n1 = 0;
                    996:   } else if ( d < 0 )
                    997:     error("decompup : invalid argument");
                    998:   else if ( d == 0 ) {
                    999:     *n0 = 0; *n1 = n;
                   1000:   } else {
                   1001:     dn = n->d;
                   1002:     *n0 = r0 = UPALLOC(d-1);
                   1003:     *n1 = r1 = UPALLOC(dn-d);
                   1004:     bcopy(n->c,r0->c,d*sizeof(Num));
                   1005:     bcopy(n->c+d,r1->c,(dn-d+1)*sizeof(Num));
                   1006:     set_degreeup(r0,d-1);
                   1007:     if ( r0->d < 0 )
                   1008:       *n0 = 0;
                   1009:     set_degreeup(r1,dn-d);
                   1010:     if ( r1->d < 0 )
                   1011:       *n1 = 0;
                   1012:   }
1.1       noro     1013: }
                   1014:
                   1015: /* n -> n mod x^d */
                   1016:
1.5       noro     1017: void truncup(UP n1,int d,UP *nr)
1.1       noro     1018: {
1.6       noro     1019:   int i;
                   1020:   UP r;
1.1       noro     1021:
1.6       noro     1022:   if ( !n1 || d > n1->d )
                   1023:     *nr = n1;
                   1024:   else if ( d < 0 )
                   1025:     error("truncup : invalid argument");
                   1026:   else if ( d == 0 )
                   1027:     *nr = 0;
                   1028:   else {
                   1029:      *nr = r = UPALLOC(d-1);
                   1030:     bcopy(n1->c,r->c,d*sizeof(Num));
                   1031:     for ( i = d-1; i >= 0 && !r->c[i]; i-- );
                   1032:     if ( i < 0 )
                   1033:       *nr = 0;
                   1034:     else
                   1035:       r->d = i;
                   1036:   }
1.1       noro     1037: }
                   1038:
1.5       noro     1039: int int_bits(int t)
1.1       noro     1040: {
1.6       noro     1041:   int k;
1.1       noro     1042:
1.6       noro     1043:   for ( k = 0; t; t>>=1, k++);
                   1044:   return k;
1.1       noro     1045: }
                   1046:
                   1047: /* n is assumed to be LM or integer coefficient */
                   1048:
1.5       noro     1049: int maxblenup(UP n)
1.1       noro     1050: {
1.6       noro     1051:   int m,r,i,d;
                   1052:   Num *c;
1.1       noro     1053:
1.6       noro     1054:   if ( !n )
                   1055:     return 0;
                   1056:   d = n->d; c = (Num *)n->c;
                   1057:   for ( r = 0, i = 0; i <= d; i++ )
                   1058:     if ( c[i] ) {
                   1059:       switch ( NID(c[i]) ) {
                   1060:         case N_LM:
                   1061:           m = n_bits(((LM)c[i])->body);
                   1062:           break;
                   1063:         case N_Q:
                   1064:           m = n_bits(((Q)c[i])->nm);
                   1065:           break;
                   1066:         default:
                   1067:           error("maxblenup : invalid coefficient");
                   1068:       }
                   1069:       r = MAX(m,r);
                   1070:     }
                   1071:   return r;
1.1       noro     1072: }
                   1073:
1.5       noro     1074: void uptofmarray(int mod,UP n,ModNum *f)
1.1       noro     1075: {
1.6       noro     1076:   int d,i;
                   1077:   unsigned int r;
                   1078:   Num *c;
                   1079:
                   1080:   if ( !n )
                   1081:     return;
                   1082:   else {
                   1083:     d = n->d; c = (Num *)n->c;
                   1084:     for ( i = 0; i <= d; i++ ) {
                   1085:       if ( c[i] ) {
                   1086:         switch ( NID(c[i]) ) {
                   1087:           case N_LM:
                   1088:             f[i] = (ModNum)rem(((LM)c[i])->body,mod);
                   1089:             break;
                   1090:           case N_Q:
                   1091:             r = rem(NM((Q)c[i]),mod);
                   1092:             if ( r && (SGN((Q)c[i])<0) )
                   1093:               r = mod-r;
                   1094:             f[i] = (ModNum)r;
                   1095:             break;
                   1096:           default:
                   1097:             error("uptofmarray : invalid coefficient");
                   1098:         }
                   1099:       } else
                   1100:         f[i] = 0;
                   1101:     }
                   1102:   }
1.1       noro     1103: }
                   1104:
1.5       noro     1105: void fmarraytoup(ModNum *f,int d,UP *nr)
1.1       noro     1106: {
1.6       noro     1107:   int i;
                   1108:   Q *c;
                   1109:   UP r;
                   1110:
                   1111:   if ( d < 0 ) {
                   1112:     *nr = 0;
                   1113:   } else {
                   1114:     *nr = r = UPALLOC(d); c = (Q *)r->c;
                   1115:     for ( i = 0; i <= d; i++ ) {
                   1116:       UTOQ((unsigned int)f[i],c[i]);
                   1117:     }
                   1118:     for ( i = d; i >= 0 && !c[i]; i-- );
                   1119:     if ( i < 0 )
                   1120:       *nr = 0;
                   1121:     else
                   1122:       r->d = i;
                   1123:   }
1.1       noro     1124: }
                   1125:
                   1126: /* f[i]: an array of length n */
                   1127:
1.5       noro     1128: void uiarraytoup(unsigned int **f,int n,int d,UP *nr)
1.1       noro     1129: {
1.6       noro     1130:   int i,j;
                   1131:   unsigned int *fi;
                   1132:   N ci;
                   1133:   Q *c;
                   1134:   UP r;
                   1135:
                   1136:   if ( d < 0 ) {
                   1137:     *nr = 0;
                   1138:   } else {
                   1139:     *nr = r = UPALLOC(d); c = (Q *)r->c;
                   1140:     for ( i = 0; i <= d; i++ ) {
                   1141:       fi = f[i];
                   1142:       for ( j = n-1; j >= 0 && !fi[j]; j-- );
                   1143:       j++;
                   1144:       if ( j ) {
                   1145:         ci = NALLOC(j);
                   1146:         PL(ci) = j;
                   1147:         bcopy(fi,BD(ci),j*sizeof(unsigned int));
                   1148:         NTOQ(ci,1,c[i]);
                   1149:       } else
                   1150:         c[i] = 0;
                   1151:     }
                   1152:     for ( i = d; i >= 0 && !c[i]; i-- );
                   1153:     if ( i < 0 )
                   1154:       *nr = 0;
                   1155:     else
                   1156:       r->d = i;
                   1157:   }
1.1       noro     1158: }
                   1159:
1.5       noro     1160: void adj_coefup(UP n,N m,N m2,UP *nr)
1.1       noro     1161: {
1.6       noro     1162:   int d;
                   1163:   Q *c,*cr;
                   1164:   Q mq;
                   1165:   int i;
                   1166:   UP r;
                   1167:
                   1168:   if ( !n )
                   1169:     *nr = 0;
                   1170:   else {
                   1171:     d = n->d; c = (Q *)n->c;
                   1172:     *nr = r = UPALLOC(d); cr = (Q *)r->c;
                   1173:     NTOQ(m,1,mq);
                   1174:     for ( i = 0; i <= d; i++ ) {
                   1175:       if ( !c[i] )
                   1176:         continue;
                   1177:       if ( cmpn(NM(c[i]),m2) > 0 )
                   1178:         subq(c[i],mq,&cr[i]);
                   1179:       else
                   1180:         cr[i] = c[i];
                   1181:     }
                   1182:     for ( i = d; i >= 0 && !cr[i]; i-- );
                   1183:     if ( i < 0 )
                   1184:       *nr = 0;
                   1185:     else
                   1186:       r->d = i;
                   1187:   }
1.1       noro     1188: }
                   1189:
                   1190: /* n is assumed to have positive integer coefficients. */
                   1191:
1.5       noro     1192: void remcup(UP n,N mod,UP *nr)
1.1       noro     1193: {
1.6       noro     1194:   int i,d;
                   1195:   Q *c,*cr;
                   1196:   UP r;
                   1197:   N t;
                   1198:
                   1199:   if ( !n )
                   1200:     *nr = 0;
                   1201:   else {
                   1202:     d = n->d; c = (Q *)n->c;
                   1203:     *nr = r = UPALLOC(d); cr = (Q *)r->c;
                   1204:     for ( i = 0; i <= d; i++ )
                   1205:       if ( c[i] ) {
                   1206:         remn(NM(c[i]),mod,&t);
                   1207:         if ( t )
                   1208:           NTOQ(t,1,cr[i]);
                   1209:         else
                   1210:           cr[i] = 0;
                   1211:       } else
                   1212:         cr[i] = 0;
                   1213:     for ( i = d; i >= 0 && !cr[i]; i-- );
                   1214:     if ( i < 0 )
                   1215:       *nr = 0;
                   1216:     else
                   1217:       r->d = i;
                   1218:   }
1.1       noro     1219: }
                   1220:
                   1221: void fft_mulup_main(UP,UP,int,UP *);
                   1222:
1.5       noro     1223: void fft_mulup(UP n1,UP n2,UP *nr)
1.1       noro     1224: {
1.6       noro     1225:   int d1,d2,d,b1,b2,h;
                   1226:   UP n1lo,n1hi,n2lo,n2hi,lo,hi,t1,t2,mid1,mid2,mid,s1,s2;
1.1       noro     1227:
1.6       noro     1228:   if ( !n1 || !n2 )
                   1229:     *nr = 0;
                   1230:   else {
                   1231:     d1 = n1->d; d2 = n2->d;
                   1232:     if ( MAX(d1,d2) < up_fft_mag )
                   1233:       kmulup(n1,n2,nr);
                   1234:     else {
                   1235:       b1 = maxblenup(n1); b2 = maxblenup(n2);
                   1236:       if ( fft_available(d1,b1,d2,b2) )
                   1237:         fft_mulup_main(n1,n2,0,nr);
                   1238:       else {
                   1239:         d = MAX(d1,d2)+1; h = (d+1)/2;
                   1240:         decompup(n1,h,&n1lo,&n1hi);
                   1241:         decompup(n2,h,&n2lo,&n2hi);
                   1242:         fft_mulup(n1lo,n2lo,&lo);
                   1243:         fft_mulup(n1hi,n2hi,&hi);
                   1244:         shiftup(hi,2*h,&t2);
                   1245:         addup(lo,t2,&t1);
                   1246:         addup(hi,lo,&mid1);
                   1247:         subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2);
                   1248:         fft_mulup(s1,s2,&mid2);
                   1249:         addup(mid1,mid2,&mid);
                   1250:         shiftup(mid,h,&t2);
                   1251:         addup(t1,t2,nr);
                   1252:       }
                   1253:     }
                   1254:   }
1.1       noro     1255: }
                   1256:
1.5       noro     1257: void trunc_fft_mulup(UP n1,UP n2,int dbd,UP *nr)
1.1       noro     1258: {
1.6       noro     1259:   int d1,d2,b1,b2,m;
                   1260:   UP n1l,n1h,n2l,n2h,l,h,t,s,u;
1.1       noro     1261:
1.6       noro     1262:   if ( !n1 || !n2 )
                   1263:     *nr = 0;
                   1264:   else if ( dbd < 0 )
                   1265:     error("trunc_fft_mulup : invalid argument");
                   1266:   else if ( dbd == 0 )
                   1267:     *nr = 0;
                   1268:   else {
                   1269:     truncup(n1,dbd,&t); n1 = t;
                   1270:     truncup(n2,dbd,&t); n2 = t;
                   1271:     d1 = n1->d; d2 = n2->d;
                   1272:     if ( MAX(d1,d2) < up_fft_mag )
                   1273:       tkmulup(n1,n2,dbd,nr);
                   1274:     else {
                   1275:       b1 = maxblenup(n1); b2 = maxblenup(n2);
                   1276:       if ( fft_available(d1,b1,d2,b2) )
                   1277:         fft_mulup_main(n1,n2,dbd,nr);
                   1278:       else {
                   1279:         m = (dbd+1)/2;
                   1280:         decompup(n1,m,&n1l,&n1h);
                   1281:         decompup(n2,m,&n2l,&n2h);
                   1282:         fft_mulup(n1l,n2l,&l);
                   1283:         trunc_fft_mulup(n1l,n2h,dbd-m,&t);
                   1284:         trunc_fft_mulup(n2l,n1h,dbd-m,&s);
                   1285:         addup(t,s,&u);
                   1286:         shiftup(u,m,&h);
                   1287:         addup(l,h,&t);
                   1288:         truncup(t,dbd,nr);
                   1289:       }
                   1290:     }
                   1291:   }
1.1       noro     1292: }
                   1293:
1.5       noro     1294: void fft_squareup(UP n1,UP *nr)
1.1       noro     1295: {
1.6       noro     1296:   int d1,d,h,b1;
                   1297:   UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
1.1       noro     1298:
1.6       noro     1299:   if ( !n1 )
                   1300:     *nr = 0;
                   1301:   else {
                   1302:     d1 = n1->d;
                   1303:     if ( d1 < up_fft_mag )
                   1304:       ksquareup(n1,nr);
                   1305:     else {
                   1306:       b1 = maxblenup(n1);
                   1307:       if ( fft_available(d1,b1,d1,b1) )
                   1308:         fft_mulup_main(n1,n1,0,nr);
                   1309:       else {
                   1310:         d = d1+1; h = (d1+1)/2;
                   1311:         decompup(n1,h,&n1lo,&n1hi);
                   1312:         fft_squareup(n1hi,&hi);
                   1313:         fft_squareup(n1lo,&lo);
                   1314:         shiftup(hi,2*h,&t2);
                   1315:         addup(lo,t2,&t1);
                   1316:         addup(hi,lo,&mid1);
                   1317:         subup(n1hi,n1lo,&s1);
                   1318:         fft_squareup(s1,&mid2);
                   1319:         subup(mid1,mid2,&mid);
                   1320:         shiftup(mid,h,&t2);
                   1321:         addup(t1,t2,nr);
                   1322:       }
                   1323:     }
                   1324:   }
1.1       noro     1325: }
                   1326:
                   1327: /*
                   1328:  * dbd == 0 => n1 * n2
                   1329:  * dbd > 0  => n1 * n2 mod x^dbd
                   1330:  * n1 == n2 => squaring
                   1331:  */
                   1332:
1.5       noro     1333: void fft_mulup_main(UP n1,UP n2,int dbd,UP *nr)
1.1       noro     1334: {
1.6       noro     1335:   ModNum *f1,*f2,*w,*fr;
                   1336:   ModNum **frarray,**fa;
                   1337:   unsigned int *modarray,*ma;
                   1338:   int modarray_len;
                   1339:   int frarray_index = 0;
                   1340:   N m,m1,m2;
                   1341:   int d1,d2,dmin,i,mod,root,d,cond,bound;
                   1342:   UP r;
                   1343:
                   1344:   if ( !n1 || !n2 ) {
                   1345:     *nr = 0; return;
                   1346:   }
                   1347:   d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
                   1348:   if ( !d1 || !d2 ) {
                   1349:     mulup(n1,n2,nr); return;
                   1350:   }
                   1351:   m = ONEN;
                   1352:   bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
                   1353:   f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1354:   if ( n1 == n2 )
                   1355:     f2 = 0;
                   1356:   else
                   1357:     f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1358:   w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
                   1359:   modarray_len = 1024;
                   1360:   modarray = (unsigned int *)MALLOC_ATOMIC(modarray_len*sizeof(unsigned int));
                   1361:   frarray = (ModNum **)MALLOC(modarray_len*sizeof(ModNum *));
                   1362:   for ( i = 0; i < NPrimes; i++ ) {
                   1363:     FFT_primes(i,&mod,&root,&d);
                   1364:     if ( (1<<d) < d1+d2+1 )
                   1365:       continue;
                   1366:     if ( frarray_index == modarray_len ) {
                   1367:       ma = (unsigned int *)MALLOC_ATOMIC(2*modarray_len*sizeof(unsigned int));
                   1368:       bcopy(modarray,ma,modarray_len*sizeof(unsigned int));
                   1369:       modarray = ma;
                   1370:       fa = (ModNum **)MALLOC(2*modarray_len*sizeof(ModNum *));
                   1371:       bcopy(frarray,fa,modarray_len*sizeof(ModNum *));
                   1372:       frarray = fa;
                   1373:       modarray_len *= 2;
                   1374:     }
                   1375:     modarray[frarray_index] = mod;
                   1376:     frarray[frarray_index++] = fr
                   1377:       = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1378:     uptofmarray(mod,n1,f1);
                   1379:     if ( !f2 )
                   1380:       cond = FFT_pol_square(d1,f1,fr,i,w);
                   1381:     else {
                   1382:       uptofmarray(mod,n2,f2);
                   1383:       cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
                   1384:     }
                   1385:     if ( cond )
                   1386:       error("fft_mulup : error in FFT_pol_product");
                   1387:     STON(mod,m1); muln(m,m1,&m2); m = m2;
                   1388:     if ( n_bits(m) > bound ) {
                   1389:       if ( !dbd )
                   1390:         dbd = d1+d2+1;
                   1391:       crup(frarray,MIN(d1+d2,dbd-1),modarray,frarray_index,m,&r);
                   1392:       divin(m,2,&m2);
                   1393:       adj_coefup(r,m,m2,nr);
                   1394:       return;
                   1395:     }
                   1396:   }
                   1397:   error("fft_mulup : FFT_primes exhausted");
1.1       noro     1398: }
                   1399: #if 0
                   1400: /* inefficient version */
                   1401:
1.5       noro     1402: void crup(ModNum **f,int d,int *mod,int index,N m,UP *r)
1.1       noro     1403: {
1.6       noro     1404:   N *cof,*c;
                   1405:   int *inv;
                   1406:   struct oUP w;
                   1407:   int i;
                   1408:   UP s,s1,s2;
                   1409:   N t;
                   1410:   UP *g;
                   1411:   Q q;
                   1412:   struct oEGT eg0,eg1;
                   1413:
                   1414:   get_eg(&eg0);
                   1415:   w.d = 0;
                   1416:   inv = (int *)ALLOCA(index*sizeof(int));
                   1417:   cof = (N *)ALLOCA(index*sizeof(N));
                   1418:   c = (N *)ALLOCA(index*sizeof(N));
                   1419:   g = (UP *)ALLOCA(index*sizeof(UP));
                   1420:   up_lazy = 1;
                   1421:   for ( i = 0, s = 0; i < index; i++ ) {
                   1422:     divin(m,mod[i],&cof[i]);
                   1423:     inv[i] = invm(rem(cof[i],mod[i]),mod[i]);
                   1424:     STON(inv[i],t);
                   1425:     muln(cof[i],t,&c[i]);
                   1426:     NTOQ(c[i],1,q); w.c[0] = (Num)q;
                   1427:     fmarraytoup(f[i],d,&g[i]);
                   1428:     mulup(g[i],&w,&s1);
                   1429:     addup(s,s1,&s2);
                   1430:     s = s2;
                   1431:   }
                   1432:   up_lazy = 0;
                   1433:   remcup(s,m,r);
                   1434:   get_eg(&eg1);
                   1435:   add_eg(&eg_chrem,&eg0,&eg1);
1.1       noro     1436: }
                   1437:
                   1438: #else
                   1439: /* improved version */
                   1440:
1.5       noro     1441: void crup(ModNum **f,int d,int *mod,int index,N m,UP *r)
1.1       noro     1442: {
1.6       noro     1443:   N cof,c,t,w,w1;
                   1444:   struct oN fc;
                   1445:   N *s;
                   1446:   UP u;
                   1447:   Q q;
                   1448:   int inv,i,j,rlen;
                   1449:
                   1450:   rlen = PL(m)+10; /* XXX */
                   1451:   PL(&fc) = 1;
                   1452:   c = NALLOC(rlen);
                   1453:   w = NALLOC(rlen);
                   1454:   w1 = NALLOC(rlen);
                   1455:   s = (N *)MALLOC((d+1)*sizeof(N));
                   1456:   for ( i = 0; i <= d; i++ ) {
                   1457:     s[i] = NALLOC(rlen);
                   1458:     PL(s[i]) = 0;
                   1459:     bzero(BD(s[i]),rlen*sizeof(unsigned int));
                   1460:   }
                   1461:   for ( i = 0; i < index; i++ ) {
                   1462:     divin(m,mod[i],&cof);
                   1463:     inv = invm(rem(cof,mod[i]),mod[i]); STON(inv,t);
                   1464:     _muln(cof,t,c);
                   1465:     /* s += c*f[i] */
                   1466:     for ( j = 0; j <= d; j++ )
                   1467:       if ( f[i][j] ) {
                   1468:         BD(&fc)[0] = f[i][j];
                   1469:         _muln(c,&fc,w);
                   1470:         _addn(s[j],w,w1);
                   1471:         dupn(w1,s[j]);
                   1472:       }
                   1473:   }
                   1474:   for ( i = d; i >= 0; i-- )
                   1475:     if ( s[i] )
                   1476:       break;
                   1477:   if ( i < 0 )
                   1478:     *r = 0;
                   1479:   else {
                   1480:     u = UPALLOC(i);
                   1481:     DEG(u) = i;
                   1482:     for ( j = 0; j <= i; j++ ) {
                   1483:       if ( PL(s[j]) )
                   1484:         NTOQ(s[j],1,q);
                   1485:       else
                   1486:         q = 0;
                   1487:       COEF(u)[j] = (Num)q;
                   1488:     }
                   1489:     remcup(u,m,r);
                   1490:   }
1.1       noro     1491: }
                   1492: #endif
                   1493:
1.2       noro     1494: /*
                   1495:  * dbd == 0 => n1 * n2
                   1496:  * dbd > 0  => n1 * n2 mod x^dbd
                   1497:  * n1 == n2 => squaring
                   1498:  * return: n1*n2 mod Primes[modind[0]]*.prime...*Primes[modind[nmod-1]].prime
                   1499:  */
                   1500:
1.5       noro     1501: void fft_mulup_specialmod_main(UP n1,UP n2,int dbd,int *modind,int nmod,UP *nr)
1.2       noro     1502: {
1.6       noro     1503:   ModNum *f1,*f2,*w,*fr;
                   1504:   ModNum **frarray;
                   1505:   N m,m1,m2;
                   1506:   unsigned int *modarray;
                   1507:   int d1,d2,dmin,i,root,d,cond,bound;
                   1508:
                   1509:   if ( !n1 || !n2 ) {
                   1510:     *nr = 0; return;
                   1511:   }
                   1512:   d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
                   1513:   if ( !d1 || !d2 ) {
                   1514:     mulup(n1,n2,nr); return;
                   1515:   }
                   1516:   m = ONEN;
                   1517:   bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
                   1518:   f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1519:   if ( n1 == n2 )
                   1520:     f2 = 0;
                   1521:   else
                   1522:     f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1523:   w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
                   1524:   frarray = (ModNum **)MALLOC(nmod*sizeof(ModNum *));
                   1525:   modarray = (unsigned int *)MALLOC_ATOMIC(nmod*sizeof(unsigned int *));
                   1526:
                   1527:   for ( i = 0; i < nmod; i++ ) {
                   1528:     FFT_primes(modind[i],&modarray[i],&root,&d);
                   1529:       if ( (1<<d) < d1+d2+1 )
                   1530:         error("fft_mulup_specialmod_main : invalid modulus");
                   1531:     frarray[i] = fr
                   1532:       = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
                   1533:     uptofmarray(modarray[i],n1,f1);
                   1534:     if ( !f2 )
                   1535:       cond = FFT_pol_square(d1,f1,fr,modind[i],w);
                   1536:     else {
                   1537:       uptofmarray(modarray[i],n2,f2);
                   1538:       cond = FFT_pol_product(d1,f1,d2,f2,fr,modind[i],w);
                   1539:     }
                   1540:     if ( cond )
                   1541:       error("fft_mulup_specialmod_main : error in FFT_pol_product");
                   1542:     STON(modarray[i],m1); muln(m,m1,&m2); m = m2;
                   1543:   }
                   1544:   if ( !dbd )
                   1545:     dbd = d1+d2+1;
                   1546:   crup(frarray,MIN(d1+d2,dbd-1),modarray,nmod,m,nr);
1.2       noro     1547: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>