Annotation of OpenXM_contrib2/asir2000/engine/up.c, Revision 1.6
1.3 noro 1: /*
2: * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
3: * All rights reserved.
4: *
5: * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
6: * non-exclusive and royalty-free license to use, copy, modify and
7: * redistribute, solely for non-commercial and non-profit purposes, the
8: * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
9: * conditions of this Agreement. For the avoidance of doubt, you acquire
10: * only a limited right to use the SOFTWARE hereunder, and FLL or any
11: * third party developer retains all rights, including but not limited to
12: * copyrights, in and to the SOFTWARE.
13: *
14: * (1) FLL does not grant you a license in any way for commercial
15: * purposes. You may use the SOFTWARE only for non-commercial and
16: * non-profit purposes only, such as academic, research and internal
17: * business use.
18: * (2) The SOFTWARE is protected by the Copyright Law of Japan and
19: * international copyright treaties. If you make copies of the SOFTWARE,
20: * with or without modification, as permitted hereunder, you shall affix
21: * to all such copies of the SOFTWARE the above copyright notice.
22: * (3) An explicit reference to this SOFTWARE and its copyright owner
23: * shall be made on your publication or presentation in any form of the
24: * results obtained by use of the SOFTWARE.
25: * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
1.4 noro 26: * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
1.3 noro 27: * for such modification or the source code of the modified part of the
28: * SOFTWARE.
29: *
30: * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
31: * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
32: * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
33: * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
34: * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
35: * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
36: * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
37: * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
38: * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
39: * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
40: * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
41: * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
42: * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
43: * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
44: * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
45: * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
46: * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
47: *
1.6 ! noro 48: * $OpenXM: OpenXM_contrib2/asir2000/engine/up.c,v 1.5 2001/10/09 01:36:13 noro Exp $
1.3 noro 49: */
1.1 noro 50: #include "ca.h"
51: #include <math.h>
52:
53: struct oEGT eg_chrem,eg_fore,eg_back;
54:
55: /*
56: int up_kara_mag=15;
57: int up_tkara_mag=15;
58: */
59: /*
60: int up_kara_mag=30;
61: int up_tkara_mag=20;
62: */
63: int up_kara_mag=20;
64: int up_tkara_mag=15;
65:
66: #if defined(sparc)
67: int up_fft_mag=50;
68: #else
69: int up_fft_mag=80;
70: #endif
71:
72: int up_rem_mag=1;
73: int debug_up;
74: int up_lazy;
75: extern int lm_lazy;
76: extern int current_ff;
77: extern int GC_dont_gc;
78:
1.5 noro 79: void monicup(UP a,UP *b)
1.1 noro 80: {
1.6 ! noro 81: UP w;
1.1 noro 82:
1.6 ! noro 83: if ( !a )
! 84: *b = 0;
! 85: else {
! 86: w = W_UPALLOC(0); w->d = 0;
! 87: divnum(0,(Num)ONE,(Num)a->c[a->d],(Num *)&w->c[0]);
! 88: mulup(a,w,b);
! 89: }
1.1 noro 90: }
91:
1.5 noro 92: void simpup(UP a,UP *b)
1.1 noro 93: {
1.6 ! noro 94: int i,d;
! 95: UP c;
1.1 noro 96:
1.6 ! noro 97: if ( !a )
! 98: *b = 0;
! 99: else {
! 100: d = a->d;
! 101: c = UPALLOC(d);
! 102:
! 103: for ( i = 0; i <= d; i++ )
! 104: simpnum(a->c[i],&c->c[i]);
! 105: for ( i = d; i >= 0 && !c->c[i]; i-- );
! 106: if ( i < 0 )
! 107: *b = 0;
! 108: else {
! 109: c->d = i;
! 110: *b = c;
! 111: }
! 112: }
1.1 noro 113: }
114:
1.5 noro 115: void simpnum(Num a,Num *b)
1.1 noro 116: {
1.6 ! noro 117: LM lm;
! 118: GF2N gf;
! 119: GFPN gfpn;
! 120:
! 121: if ( !a )
! 122: *b = 0;
! 123: else
! 124: switch ( NID(a) ) {
! 125: case N_LM:
! 126: simplm((LM)a,&lm); *b = (Num)lm; break;
! 127: case N_GF2N:
! 128: simpgf2n((GF2N)a,&gf); *b = (Num)gf; break;
! 129: case N_GFPN:
! 130: simpgfpn((GFPN)a,&gfpn); *b = (Num)gfpn; break;
! 131: default:
! 132: *b = a; break;
! 133: }
1.1 noro 134: }
135:
1.5 noro 136: void uremp(P p1,P p2,P *rp)
1.1 noro 137: {
1.6 ! noro 138: UP n1,n2,r;
1.1 noro 139:
1.6 ! noro 140: if ( !p1 || NUM(p1) )
! 141: *rp = p1;
! 142: else {
! 143: ptoup(p1,&n1); ptoup(p2,&n2);
! 144: remup(n1,n2,&r);
! 145: uptop(r,rp);
! 146: }
1.1 noro 147: }
148:
1.5 noro 149: void ugcdp(P p1,P p2,P *rp)
1.1 noro 150: {
1.6 ! noro 151: UP n1,n2,r;
1.1 noro 152:
1.6 ! noro 153: ptoup(p1,&n1); ptoup(p2,&n2);
! 154: gcdup(n1,n2,&r);
! 155: uptop(r,rp);
1.1 noro 156: }
157:
1.5 noro 158: void reversep(P p1,Q d,P *rp)
1.1 noro 159: {
1.6 ! noro 160: UP n1,r;
1.1 noro 161:
1.6 ! noro 162: if ( !p1 )
! 163: *rp = 0;
! 164: else {
! 165: ptoup(p1,&n1);
! 166: reverseup(n1,QTOS(d),&r);
! 167: uptop(r,rp);
! 168: }
1.1 noro 169: }
170:
1.5 noro 171: void invmodp(P p1,Q d,P *rp)
1.1 noro 172: {
1.6 ! noro 173: UP n1,r;
1.1 noro 174:
1.6 ! noro 175: if ( !p1 )
! 176: error("invmodp : invalid input");
! 177: else {
! 178: ptoup(p1,&n1);
! 179: invmodup(n1,QTOS(d),&r);
! 180: uptop(r,rp);
! 181: }
1.1 noro 182: }
183:
1.5 noro 184: void addup(UP n1,UP n2,UP *nr)
1.1 noro 185: {
1.6 ! noro 186: UP r,t;
! 187: int i,d1,d2;
! 188: Num *c,*c1,*c2;
! 189:
! 190: if ( !n1 ) {
! 191: *nr = n2;
! 192: } else if ( !n2 ) {
! 193: *nr = n1;
! 194: } else {
! 195: if ( n2->d > n1->d ) {
! 196: t = n1; n1 = n2; n2 = t;
! 197: }
! 198: d1 = n1->d; d2 = n2->d;
! 199: *nr = r = UPALLOC(d1);
! 200: c1 = n1->c; c2 = n2->c; c = r->c;
! 201: for ( i = 0; i <= d2; i++ )
! 202: addnum(0,*c1++,*c2++,c++);
! 203: for ( ; i <= d1; i++ )
! 204: *c++ = *c1++;
! 205: for ( i = d1, c = r->c; i >=0 && !c[i]; i-- );
! 206: if ( i < 0 )
! 207: *nr = 0;
! 208: else
! 209: r->d = i;
! 210: }
1.1 noro 211: }
212:
1.5 noro 213: void subup(UP n1,UP n2,UP *nr)
1.1 noro 214: {
1.6 ! noro 215: UP r;
! 216: int i,d1,d2,d;
! 217: Num *c,*c1,*c2;
! 218:
! 219: if ( !n1 ) {
! 220: chsgnup(n2,nr);
! 221: } else if ( !n2 ) {
! 222: *nr = n1;
! 223: } else {
! 224: d1 = n1->d; d2 = n2->d; d = MAX(d1,d2);
! 225: *nr = r = UPALLOC(d);
! 226: c1 = n1->c; c2 = n2->c; c = r->c;
! 227: if ( d1 >= d2 ) {
! 228: for ( i = 0; i <= d2; i++ )
! 229: subnum(0,*c1++,*c2++,c++);
! 230: for ( ; i <= d1; i++ )
! 231: *c++ = *c1++;
! 232: } else {
! 233: for ( i = 0; i <= d1; i++ )
! 234: subnum(0,*c1++,*c2++,c++);
! 235: for ( ; i <= d2; i++ )
! 236: chsgnnum(*c2++,c++);
! 237: }
! 238: for ( i = d, c = r->c; i >=0 && !c[i]; i-- );
! 239: if ( i < 0 )
! 240: *nr = 0;
! 241: else
! 242: r->d = i;
! 243: }
1.1 noro 244: }
245:
1.5 noro 246: void chsgnup(UP n1,UP *nr)
1.1 noro 247: {
1.6 ! noro 248: UP r;
! 249: int d1,i;
! 250: Num *c1,*c;
! 251:
! 252: if ( !n1 ) {
! 253: *nr = 0;
! 254: } else {
! 255: d1 = n1->d;
! 256: *nr = r = UPALLOC(d1); r->d = d1;
! 257: c1 = n1->c; c = r->c;
! 258: for ( i = 0; i <= d1; i++ )
! 259: chsgnnum(*c1++,c++);
! 260: }
1.1 noro 261: }
262:
1.5 noro 263: void hybrid_mulup(int ff,UP n1,UP n2,UP *nr)
1.1 noro 264: {
1.6 ! noro 265: if ( !n1 || !n2 )
! 266: *nr = 0;
! 267: else if ( MAX(n1->d,n2->d) < up_fft_mag )
! 268: kmulup(n1,n2,nr);
! 269: else
! 270: switch ( ff ) {
! 271: case FF_GFP:
! 272: fft_mulup_lm(n1,n2,nr); break;
! 273: case FF_GF2N:
! 274: kmulup(n1,n2,nr); break;
! 275: default:
! 276: fft_mulup(n1,n2,nr); break;
! 277: }
1.1 noro 278: }
279:
1.5 noro 280: void hybrid_squareup(int ff,UP n1,UP *nr)
1.1 noro 281: {
1.6 ! noro 282: if ( !n1 )
! 283: *nr = 0;
! 284: else if ( n1->d < up_fft_mag )
! 285: ksquareup(n1,nr);
! 286: else
! 287: switch ( ff ) {
! 288: case FF_GFP:
! 289: fft_squareup_lm(n1,nr); break;
! 290: case FF_GF2N:
! 291: ksquareup(n1,nr); break;
! 292: default:
! 293: fft_squareup(n1,nr); break;
! 294: }
1.1 noro 295: }
296:
1.5 noro 297: void hybrid_tmulup(int ff,UP n1,UP n2,int d,UP *nr)
1.1 noro 298: {
1.6 ! noro 299: if ( !n1 || !n2 )
! 300: *nr = 0;
! 301: else if ( MAX(n1->d,n2->d) < up_fft_mag )
! 302: tkmulup(n1,n2,d,nr);
! 303: else
! 304: switch ( ff ) {
! 305: case FF_GFP:
! 306: trunc_fft_mulup_lm(n1,n2,d,nr); break;
! 307: case FF_GF2N:
! 308: tkmulup(n1,n2,d,nr); break;
! 309: default:
! 310: trunc_fft_mulup(n1,n2,d,nr); break;
! 311: }
1.1 noro 312: }
313:
1.5 noro 314: void mulup(UP n1,UP n2,UP *nr)
1.1 noro 315: {
1.6 ! noro 316: UP r;
! 317: Num *pc1,*pc,*c1,*c2,*c;
! 318: Num mul,t,s;
! 319: int i,j,d1,d2;
! 320:
! 321: if ( !n1 || !n2 )
! 322: *nr = 0;
! 323: else {
! 324: d1 = n1->d; d2 = n2->d;
! 325: *nr = r = UPALLOC(d1+d2); r->d = d1+d2;
! 326: c1 = n1->c; c2 = n2->c; c = r->c;
! 327: lm_lazy = 1;
! 328: for ( i = 0; i <= d2; i++, c++ )
! 329: if ( mul = *c2++ )
! 330: for ( j = 0, pc1 = c1, pc = c; j <= d1; j++ ) {
! 331: mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
! 332: }
! 333: lm_lazy = 0;
! 334: if ( !up_lazy )
! 335: for ( i = 0, c = r->c; i <= r->d; i++ ) {
! 336: simpnum(c[i],&t); c[i] = t;
! 337: }
! 338: }
1.1 noro 339: }
340:
341: /* nr = c*n1 */
342:
1.5 noro 343: void mulcup(Num c,UP n1,UP *nr)
1.1 noro 344: {
1.6 ! noro 345: int d;
! 346: UP r;
! 347: Num t;
! 348: Num *c1,*cr;
! 349: int i;
! 350:
! 351: if ( !c || !n1 )
! 352: *nr = 0;
! 353: else {
! 354: d = n1->d;
! 355: *nr = r = UPALLOC(d); r->d = d;
! 356: c1 = n1->c; cr = r->c;
! 357: lm_lazy = 1;
! 358: for ( i = 0; i <= d; i++ )
! 359: mulnum(0,c1[i],c,&cr[i]);
! 360: lm_lazy = 0;
! 361: if ( !up_lazy )
! 362: for ( i = 0; i <= d; i++ ) {
! 363: simpnum(cr[i],&t); cr[i] = t;
! 364: }
! 365: }
1.1 noro 366: }
367:
1.5 noro 368: void tmulup(UP n1,UP n2,int d,UP *nr)
1.1 noro 369: {
1.6 ! noro 370: UP r;
! 371: Num *pc1,*pc,*c1,*c2,*c;
! 372: Num mul,t,s;
! 373: int i,j,d1,d2,dr;
! 374:
! 375: if ( !n1 || !n2 )
! 376: *nr = 0;
! 377: else {
! 378: d1 = n1->d; d2 = n2->d;
! 379: dr = MAX(d-1,d1+d2);
! 380: *nr = r = UPALLOC(dr);
! 381: c1 = n1->c; c2 = n2->c; c = r->c;
! 382: lm_lazy = 1;
! 383: for ( i = 0; i <= d2; i++, c++ )
! 384: if ( mul = *c2++ )
! 385: for ( j = 0, pc1 = c1, pc = c; (j <= d1) && (i+j < d); j++ ) {
! 386: mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
! 387: }
! 388: lm_lazy = 0;
! 389: c = r->c;
! 390: if ( !up_lazy )
! 391: for ( i = 0; i < d; i++ ) {
! 392: simpnum(c[i],&t); c[i] = t;
! 393: }
! 394: for ( i = d-1; i >= 0 && !c[i]; i-- );
! 395: if ( i < 0 )
! 396: *nr = 0;
! 397: else
! 398: r->d = i;
! 399: }
1.1 noro 400: }
401:
1.5 noro 402: void squareup(UP n1,UP *nr)
1.1 noro 403: {
1.6 ! noro 404: UP r;
! 405: Num *c1,*c;
! 406: Num t,s,u;
! 407: int i,j,h,d1,d;
! 408:
! 409: if ( !n1 )
! 410: *nr = 0;
! 411: else if ( !n1->d ) {
! 412: *nr = r = UPALLOC(0); r->d = 0;
! 413: mulnum(0,n1->c[0],n1->c[0],&r->c[0]);
! 414: } else {
! 415: d1 = n1->d;
! 416: d = 2*d1;
! 417: *nr = r = UPALLOC(2*d); r->d = d;
! 418: c1 = n1->c; c = r->c;
! 419: lm_lazy = 1;
! 420: for ( i = 0; i <= d1; i++ )
! 421: mulnum(0,c1[i],c1[i],&c[2*i]);
! 422: for ( j = 1; j < d; j++ ) {
! 423: h = (j-1)/2;
! 424: for ( i = MAX(j-d1,0), s = 0; i <= h; i++ ) {
! 425: mulnum(0,c1[i],c1[j-i],&t); addnum(0,t,s,&u); s = u;
! 426: }
! 427: addnum(0,s,s,&t); addnum(0,t,c[j],&u); c[j] = u;
! 428: }
! 429: lm_lazy = 0;
! 430: if ( !up_lazy )
! 431: for ( i = 0, c = r->c; i <= d; i++ ) {
! 432: simpnum(c[i],&t); c[i] = t;
! 433: }
! 434: }
1.1 noro 435: }
436:
1.5 noro 437: void remup(UP n1,UP n2,UP *nr)
1.1 noro 438: {
1.6 ! noro 439: UP w,r;
1.1 noro 440:
1.6 ! noro 441: if ( !n2 )
! 442: error("remup : division by 0");
! 443: else if ( !n1 || !n2->d )
! 444: *nr = 0;
! 445: else if ( n1->d < n2->d )
! 446: *nr = n1;
! 447: else {
! 448: w = W_UPALLOC(n1->d); copyup(n1,w);
! 449: remup_destructive(w,n2);
! 450: if ( w->d < 0 )
! 451: *nr = 0;
! 452: else {
! 453: *nr = r = UPALLOC(w->d); copyup(w,r);
! 454: }
! 455: }
1.1 noro 456: }
457:
1.5 noro 458: void remup_destructive(UP n1,UP n2)
1.1 noro 459: {
1.6 ! noro 460: Num *c1,*c2;
! 461: int d1,d2,i,j;
! 462: Num m,t,s,mhc;
! 463:
! 464: c1 = n1->c; c2 = n2->c;
! 465: d1 = n1->d; d2 = n2->d;
! 466: chsgnnum(c2[d2],&mhc);
! 467: for ( i = d1; i >= d2; i-- ) {
! 468: simpnum(c1[i],&t); c1[i] = t;
! 469: if ( !c1[i] )
! 470: continue;
! 471: divnum(0,c1[i],mhc,&m);
! 472: lm_lazy = 1;
! 473: for ( j = d2; j >= 0; j-- ) {
! 474: mulnum(0,c2[j],m,&t); addnum(0,c1[i+j-d2],t,&s);
! 475: c1[i+j-d2] = s;
! 476: }
! 477: lm_lazy = 0;
! 478: }
! 479: for ( i = 0; i < d2; i++ ) {
! 480: simpnum(c1[i],&t); c1[i] = t;
! 481: }
! 482: for ( i = d2-1; i >= 0 && !c1[i]; i-- );
! 483: n1->d = i;
1.1 noro 484: }
485:
1.5 noro 486: void qrup(UP n1,UP n2,UP *nq,UP *nr)
1.1 noro 487: {
1.6 ! noro 488: UP w,r,q;
! 489: struct oUP t;
! 490: Num m;
! 491: int d;
! 492:
! 493: if ( !n2 )
! 494: error("qrup : division by 0");
! 495: else if ( !n1 ) {
! 496: *nq = 0; *nr = 0;
! 497: } else if ( !n2->d )
! 498: if ( !n1->d ) {
! 499: divnum(0,n1->c[0],n2->c[0],(Num *)nq); *nr = 0;
! 500: } else {
! 501: divnum(0,(Num)ONE,n2->c[0],&m);
! 502: t.d = 0; t.c[0] = m;
! 503: mulup(n1,&t,nq); *nr = 0;
! 504: }
! 505: else if ( n1->d < n2->d ) {
! 506: *nq = 0; *nr = n1;
! 507: } else {
! 508: w = W_UPALLOC(n1->d); copyup(n1,w);
! 509: qrup_destructive(w,n2);
! 510: d = n1->d-n2->d;
! 511: *nq = q = UPALLOC(d); q->d = d;
! 512: bcopy(w->c+n2->d,q->c,(d+1)*sizeof(Num));
! 513: if ( w->d < 0 )
! 514: *nr = 0;
! 515: else {
! 516: *nr = r = UPALLOC(w->d); copyup(w,r);
! 517: }
! 518: }
1.1 noro 519: }
520:
1.5 noro 521: void qrup_destructive(UP n1,UP n2)
1.1 noro 522: {
1.6 ! noro 523: Num *c1,*c2;
! 524: int d1,d2,i,j;
! 525: Num q,t,s,hc;
! 526:
! 527: c1 = n1->c; c2 = n2->c;
! 528: d1 = n1->d; d2 = n2->d;
! 529: hc = c2[d2];
! 530: for ( i = d1; i >= d2; i-- ) {
! 531: simpnum(c1[i],&t); c1[i] = t;
! 532: if ( c1[i] ) {
! 533: divnum(0,c1[i],hc,&q);
! 534: lm_lazy = 1;
! 535: for ( j = d2; j >= 0; j-- ) {
! 536: mulnum(0,c2[j],q,&t); subnum(0,c1[i+j-d2],t,&s);
! 537: c1[i+j-d2] = s;
! 538: }
! 539: lm_lazy = 0;
! 540: } else
! 541: q = 0;
! 542: c1[i] = q;
! 543: }
! 544: for ( i = 0; i < d2; i++ ) {
! 545: simpnum(c1[i],&t); c1[i] = t;
! 546: }
! 547: for ( i = d2-1; i >= 0 && !c1[i]; i-- );
! 548: n1->d = i;
1.1 noro 549: }
550:
1.5 noro 551: void gcdup(UP n1,UP n2,UP *nr)
1.1 noro 552: {
1.6 ! noro 553: UP w1,w2,t,r;
! 554: int d1,d2;
1.1 noro 555:
1.6 ! noro 556: if ( !n1 )
! 557: *nr = n2;
! 558: else if ( !n2 )
! 559: *nr = n1;
! 560: else if ( !n1->d || !n2->d ) {
! 561: *nr = r = UPALLOC(0); r->d = 0;
! 562: divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]);
! 563: } else {
! 564: d1 = n1->d; d2 = n2->d;
! 565: if ( d2 > d1 ) {
! 566: w1 = W_UPALLOC(d2); copyup(n2,w1);
! 567: w2 = W_UPALLOC(d1); copyup(n1,w2);
! 568: } else {
! 569: w1 = W_UPALLOC(d1); copyup(n1,w1);
! 570: w2 = W_UPALLOC(d2); copyup(n2,w2);
! 571: }
! 572: d1 = w1->d; d2 = w2->d;
! 573: while ( 1 ) {
! 574: remup_destructive(w1,w2);
! 575: if ( w1->d < 0 ) {
! 576: *nr = r = UPALLOC(w2->d); copyup(w2,r); return;
! 577: } else if ( !w1->d ) {
! 578: *nr = r = UPALLOC(0); r->d = 0;
! 579: divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]); return;
! 580: } else {
! 581: t = w1; w1 = w2; w2 = t;
! 582: }
! 583: }
! 584: }
1.1 noro 585: }
586:
587: /* compute r s.t. a*r = 1 mod m */
588:
1.5 noro 589: void extended_gcdup(UP a,UP m,UP *r)
1.1 noro 590: {
1.6 ! noro 591: UP one,g1,g2,a1,a2,a3,b1,b2,b3,inv,quo,rem,t;
! 592: Num i;
1.1 noro 593:
1.6 ! noro 594: one = UPALLOC(0); one->d = 0; one->c[0] = (Num)ONELM;
! 595: g1 = m; g2 = a;
! 596: a1 = one; a2 = 0;
! 597: b1 = 0; b2 = one;
! 598: /* a2*m+b2*a = g2 always holds */
! 599: while ( g2->d ) {
! 600: qrup(g1,g2,&quo,&rem); g1 = g2; g2 = rem;
! 601: mulup(a2,quo,&t); subup(a1,t,&a3); a1 = a2; a2 = a3;
! 602: mulup(b2,quo,&t); subup(b1,t,&b3); b1 = b2; b2 = b3;
! 603: }
! 604: /* now a2*m+b2*a = g2 (const) */
! 605: divnum(0,(Num)ONE,g2->c[0],&i);
! 606: inv = UPALLOC(0); inv->d = 0; inv->c[0] = i;
! 607: mulup(b2,inv,r);
1.1 noro 608: }
609:
1.5 noro 610: void reverseup(UP n1,int d,UP *nr)
1.1 noro 611: {
1.6 ! noro 612: Num *c1,*c;
! 613: int i,d1;
! 614: UP r;
! 615:
! 616: if ( !n1 )
! 617: *nr = 0;
! 618: else if ( d < (d1 = n1->d) )
! 619: error("reverseup : invalid input");
! 620: else {
! 621: *nr = r = UPALLOC(d);
! 622: c = r->c;
! 623: bzero((char *)c,(d+1)*sizeof(Num));
! 624: c1 = n1->c;
! 625: for ( i = 0; i <= d1; i++ )
! 626: c[d-i] = c1[i];
! 627: for ( i = d; i >= 0 && !c[i]; i-- );
! 628: r->d = i;
! 629: if ( i < 0 )
! 630: *nr = 0;
! 631: }
1.1 noro 632: }
633:
1.5 noro 634: void invmodup(UP n1,int d,UP *nr)
1.1 noro 635: {
1.6 ! noro 636: UP r;
! 637: Num s,t,u,hinv;
! 638: int i,j,dmin;
! 639: Num *w,*c,*cr;
! 640:
! 641: if ( !n1 || !n1->c[0] )
! 642: error("invmodup : invalid input");
! 643: else if ( !n1->d ) {
! 644: *nr = r = UPALLOC(0); r->d = 0;
! 645: divnum(0,(Num)ONE,n1->c[0],&r->c[0]);
! 646: } else {
! 647: *nr = r = UPALLOC(d);
! 648: w = (Num *)ALLOCA((d+1)*sizeof(Q));
! 649: bzero((char *)w,(d+1)*sizeof(Q));
! 650: dmin = MIN(d,n1->d);
! 651: divnum(0,(Num)ONE,n1->c[0],&hinv);
! 652: for ( i = 0, c = n1->c; i <= dmin; i++ )
! 653: mulnum(0,c[i],hinv,&w[i]);
! 654: cr = r->c;
! 655: cr[0] = w[0];
! 656: for ( i = 1; i <= d; i++ ) {
! 657: for ( j = 1, s = w[i]; j < i; j++ ) {
! 658: mulnum(0,cr[j],w[i-j],&t);
! 659: addnum(0,s,t,&u);
! 660: s = u;
! 661: }
! 662: chsgnnum(s,&cr[i]);
! 663: }
! 664: for ( i = 0; i <= d; i++ ) {
! 665: mulnum(0,cr[i],hinv,&t); cr[i] = t;
! 666: }
! 667: for ( i = d; i >= 0 && !cr[i]; i-- );
! 668: r->d = i;
! 669: }
1.1 noro 670: }
671:
1.5 noro 672: void pwrup(UP n,Q e,UP *nr)
1.1 noro 673: {
1.6 ! noro 674: UP y,z,t;
! 675: N en,qn;
! 676: int r;
! 677:
! 678: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)ONE;
! 679: z = n;
! 680: if ( !e )
! 681: *nr = y;
! 682: else if ( UNIQ(e) )
! 683: *nr = n;
! 684: else {
! 685: en = NM(e);
! 686: while ( 1 ) {
! 687: r = divin(en,2,&qn); en = qn;
! 688: if ( r ) {
! 689: mulup(z,y,&t); y = t;
! 690: if ( !en ) {
! 691: *nr = y;
! 692: return;
! 693: }
! 694: }
! 695: mulup(z,z,&t); z = t;
! 696: }
! 697: }
1.1 noro 698: }
699:
1.5 noro 700: int compup(UP n1,UP n2)
1.1 noro 701: {
1.6 ! noro 702: int i,r;
1.1 noro 703:
1.6 ! noro 704: if ( !n1 )
! 705: return n2 ? -1 : 0;
! 706: else if ( !n2 )
! 707: return 1;
! 708: else if ( n1->d > n2->d )
! 709: return 1;
! 710: else if ( n1->d < n2->d )
! 711: return -1;
! 712: else {
! 713: for ( i = n1->d; i >= 0; i-- ) {
! 714: r = compnum(CO,n1->c[i],n2->c[i]);
! 715: if ( r )
! 716: return r;
! 717: }
! 718: return 0;
! 719: }
1.1 noro 720: }
721:
1.5 noro 722: void kmulp(VL vl,P n1,P n2,P *nr)
1.1 noro 723: {
1.6 ! noro 724: UP b1,b2,br;
1.1 noro 725:
1.6 ! noro 726: if ( !n1 || !n2 )
! 727: *nr = 0;
! 728: else if ( OID(n1) == O_N || OID(n2) == O_N )
! 729: mulp(vl,n1,n2,nr);
! 730: else {
! 731: ptoup(n1,&b1); ptoup(n2,&b2);
! 732: kmulup(b1,b2,&br);
! 733: uptop(br,nr);
! 734: }
1.1 noro 735: }
736:
1.5 noro 737: void ksquarep(VL vl,P n1,P *nr)
1.1 noro 738: {
1.6 ! noro 739: UP b1,br;
1.1 noro 740:
1.6 ! noro 741: if ( !n1 )
! 742: *nr = 0;
! 743: else if ( OID(n1) == O_N )
! 744: mulp(vl,n1,n1,nr);
! 745: else {
! 746: ptoup(n1,&b1);
! 747: ksquareup(b1,&br);
! 748: uptop(br,nr);
! 749: }
1.1 noro 750: }
751:
1.5 noro 752: void kmulup(UP n1,UP n2,UP *nr)
1.1 noro 753: {
1.6 ! noro 754: int d1,d2;
1.1 noro 755:
1.6 ! noro 756: if ( !n1 || !n2 ) {
! 757: *nr = 0; return;
! 758: }
! 759: d1 = DEG(n1)+1; d2 = DEG(n2)+1;
! 760: if ( (d1 < up_kara_mag) && (d2 < up_kara_mag) )
! 761: mulup(n1,n2,nr);
! 762: else
! 763: kmulupmain(n1,n2,nr);
1.1 noro 764: }
765:
1.5 noro 766: void ksquareup(UP n1,UP *nr)
1.1 noro 767: {
1.6 ! noro 768: int d1;
! 769: extern Q TWO;
1.1 noro 770:
1.6 ! noro 771: if ( !n1 ) {
! 772: *nr = 0; return;
! 773: }
! 774: d1 = DEG(n1)+1;
! 775: if ( (d1 < up_kara_mag) )
! 776: squareup(n1,nr);
! 777: else
! 778: ksquareupmain(n1,nr);
1.1 noro 779: }
780:
1.5 noro 781: void copyup(UP n1,UP n2)
1.1 noro 782: {
1.6 ! noro 783: n2->d = n1->d;
! 784: bcopy((char *)n1->c,(char *)n2->c,(n1->d+1)*sizeof(Q));
1.1 noro 785: }
786:
1.5 noro 787: void c_copyup(UP n,int len,pointer *p)
1.1 noro 788: {
1.6 ! noro 789: if ( n )
! 790: bcopy((char *)COEF(n),(char *)p,MIN((DEG(n)+1),len)*sizeof(pointer));
1.1 noro 791: }
792:
1.5 noro 793: void kmulupmain(UP n1,UP n2,UP *nr)
1.1 noro 794: {
1.6 ! noro 795: int d1,d2,h;
! 796: UP n1lo,n1hi,n2lo,n2hi,hi,lo,mid1,mid2,mid,s1,s2,t1,t2;
1.1 noro 797:
1.6 ! noro 798: d1 = DEG(n1)+1; d2 = DEG(n2)+1; h = (MAX(d1,d2)+1)/2;
! 799: decompup(n1,h,&n1lo,&n1hi);
! 800: decompup(n2,h,&n2lo,&n2hi);
! 801: kmulup(n1lo,n2lo,&lo);
! 802: kmulup(n1hi,n2hi,&hi);
! 803: shiftup(hi,2*h,&t2);
! 804: addup(lo,t2,&t1);
! 805: addup(hi,lo,&mid1);
! 806: subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2); kmulup(s1,s2,&mid2);
! 807: addup(mid1,mid2,&mid);
! 808: shiftup(mid,h,&t2);
! 809: addup(t1,t2,nr);
1.1 noro 810: }
811:
1.5 noro 812: void ksquareupmain(UP n1,UP *nr)
1.1 noro 813: {
1.6 ! noro 814: int d1,h;
! 815: UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
1.1 noro 816:
1.6 ! noro 817: d1 = DEG(n1)+1; h = (d1+1)/2;
! 818: decompup(n1,h,&n1lo,&n1hi);
! 819: ksquareup(n1hi,&hi); ksquareup(n1lo,&lo);
! 820: shiftup(hi,2*h,&t2);
! 821: addup(lo,t2,&t1);
! 822: addup(hi,lo,&mid1);
! 823: subup(n1hi,n1lo,&s1); ksquareup(s1,&mid2);
! 824: subup(mid1,mid2,&mid);
! 825: shiftup(mid,h,&t2);
! 826: addup(t1,t2,nr);
1.1 noro 827: }
828:
1.5 noro 829: void rembymulup(UP n1,UP n2,UP *nr)
1.1 noro 830: {
1.6 ! noro 831: int d1,d2,d;
! 832: UP r1,r2,inv2,t,s,q;
1.1 noro 833:
1.6 ! noro 834: if ( !n2 )
! 835: error("rembymulup : division by 0");
! 836: else if ( !n1 || !n2->d )
! 837: *nr = 0;
! 838: else if ( (d1 = n1->d) < (d2 = n2->d) )
! 839: *nr = n1;
! 840: else {
! 841: d = d1-d2;
! 842: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
! 843: reverseup(n2,d2,&r2);
! 844: invmodup(r2,d,&inv2);
! 845: kmulup(r1,inv2,&t);
! 846: truncup(t,d+1,&s);
! 847: reverseup(s,d,&q);
! 848: kmulup(q,n2,&t); subup(n1,t,nr);
! 849: }
1.1 noro 850: }
851:
852: /*
1.6 ! noro 853: deg n2 = d
! 854: deg n1 <= 2*d-1
! 855: inv2 = inverse of reversep(n2) mod x^d
1.1 noro 856: */
857:
1.5 noro 858: void hybrid_rembymulup_special(int ff,UP n1,UP n2,UP inv2,UP *nr)
1.1 noro 859: {
1.6 ! noro 860: int d1,d2,d;
! 861: UP r1,t,s,q;
1.1 noro 862:
1.6 ! noro 863: if ( !n2 )
! 864: error("hybrid_rembymulup : division by 0");
! 865: else if ( !n1 || !n2->d )
! 866: *nr = 0;
! 867: else if ( (d1 = n1->d) < (d2 = n2->d) )
! 868: *nr = n1;
! 869: else {
! 870: d = d1-d2;
! 871: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
! 872: hybrid_tmulup(ff,r1,inv2,d+1,&t);
! 873: truncup(t,d+1,&s);
! 874: reverseup(s,d,&q);
! 875:
! 876: hybrid_tmulup(ff,q,n2,d2,&t);
! 877: truncup(n1,d2,&s);
! 878: subup(s,t,nr);
! 879: }
1.1 noro 880: }
881:
1.5 noro 882: void rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
1.1 noro 883: {
1.6 ! noro 884: int d1,d2,d;
! 885: UP r1,t,s,q;
1.1 noro 886:
1.6 ! noro 887: if ( !n2 )
! 888: error("rembymulup : division by 0");
! 889: else if ( !n1 || !n2->d )
! 890: *nr = 0;
! 891: else if ( (d1 = n1->d) < (d2 = n2->d) )
! 892: *nr = n1;
! 893: else {
! 894: d = d1-d2;
! 895: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
! 896: tkmulup(r1,inv2,d+1,&t);
! 897: truncup(t,d+1,&s);
! 898: reverseup(s,d,&q);
! 899:
! 900: tkmulup(q,n2,d2,&t);
! 901: truncup(n1,d2,&s);
! 902: subup(s,t,nr);
! 903: }
1.1 noro 904: }
905:
906: /* *nr = n1*n2 mod x^d */
907:
1.5 noro 908: void tkmulup(UP n1,UP n2,int d,UP *nr)
1.1 noro 909: {
1.6 ! noro 910: int m;
! 911: UP n1l,n1h,n2l,n2h,l,h,t,s,u;
1.1 noro 912:
1.6 ! noro 913: if ( d < 0 )
! 914: error("tkmulup : invalid argument");
! 915: else if ( d == 0 )
! 916: *nr = 0;
! 917: else {
! 918: truncup(n1,d,&t); n1 = t;
! 919: truncup(n2,d,&t); n2 = t;
! 920: if ( !n1 || !n2 )
! 921: *nr = 0;
! 922: else if ( n1->d < up_tkara_mag || n2->d < up_tkara_mag )
! 923: tmulup(n1,n2,d,nr);
! 924: else {
! 925: m = (d+1)/2;
! 926: decompup(n1,m,&n1l,&n1h);
! 927: decompup(n2,m,&n2l,&n2h);
! 928: kmulup(n1l,n2l,&l);
! 929: tkmulup(n1l,n2h,d-m,&t);
! 930: tkmulup(n2l,n1h,d-m,&s);
! 931: addup(t,s,&u);
! 932: shiftup(u,m,&h);
! 933: addup(l,h,&t);
! 934: truncup(t,d,nr);
! 935: }
! 936: }
1.1 noro 937: }
938:
939: /* n->n*x^d */
940:
1.5 noro 941: void shiftup(UP n,int d,UP *nr)
1.1 noro 942: {
1.6 ! noro 943: int dr;
! 944: UP r;
1.1 noro 945:
1.6 ! noro 946: if ( !n )
! 947: *nr = 0;
! 948: else {
! 949: dr = n->d+d;
! 950: *nr = r = UPALLOC(dr); r->d = dr;
! 951: bzero(r->c,(dr+1)*sizeof(Num));
! 952: bcopy(n->c,r->c+d,(n->d+1)*sizeof(Num));
! 953: }
1.1 noro 954: }
955:
1.5 noro 956: void fft_rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
1.1 noro 957: {
1.6 ! noro 958: int d1,d2,d;
! 959: UP r1,t,s,q,u;
1.1 noro 960:
1.6 ! noro 961: if ( !n2 )
! 962: error("rembymulup : division by 0");
! 963: else if ( !n1 || !n2->d )
! 964: *nr = 0;
! 965: else if ( (d1 = n1->d) < (d2 = n2->d) )
! 966: *nr = n1;
! 967: else {
! 968: d = d1-d2;
! 969: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
! 970: trunc_fft_mulup(r1,inv2,d+1,&t);
! 971: truncup(t,d+1,&s);
! 972: reverseup(s,d,&q);
! 973: trunc_fft_mulup(q,n2,d2,&t); truncup(t,d2,&u);
! 974: truncup(n1,d2,&s);
! 975: subup(s,u,nr);
! 976: }
1.1 noro 977: }
978:
1.5 noro 979: void set_degreeup(UP n,int d)
1.1 noro 980: {
1.6 ! noro 981: int i;
1.1 noro 982:
1.6 ! noro 983: for ( i = d; i >= 0 && !n->c[i]; i-- );
! 984: n->d = i;
1.1 noro 985: }
986:
987: /* n -> n0 + x^d n1 */
988:
1.5 noro 989: void decompup(UP n,int d,UP *n0,UP *n1)
1.1 noro 990: {
1.6 ! noro 991: int dn;
! 992: UP r0,r1;
1.1 noro 993:
1.6 ! noro 994: if ( !n || d > n->d ) {
! 995: *n0 = n; *n1 = 0;
! 996: } else if ( d < 0 )
! 997: error("decompup : invalid argument");
! 998: else if ( d == 0 ) {
! 999: *n0 = 0; *n1 = n;
! 1000: } else {
! 1001: dn = n->d;
! 1002: *n0 = r0 = UPALLOC(d-1);
! 1003: *n1 = r1 = UPALLOC(dn-d);
! 1004: bcopy(n->c,r0->c,d*sizeof(Num));
! 1005: bcopy(n->c+d,r1->c,(dn-d+1)*sizeof(Num));
! 1006: set_degreeup(r0,d-1);
! 1007: if ( r0->d < 0 )
! 1008: *n0 = 0;
! 1009: set_degreeup(r1,dn-d);
! 1010: if ( r1->d < 0 )
! 1011: *n1 = 0;
! 1012: }
1.1 noro 1013: }
1014:
1015: /* n -> n mod x^d */
1016:
1.5 noro 1017: void truncup(UP n1,int d,UP *nr)
1.1 noro 1018: {
1.6 ! noro 1019: int i;
! 1020: UP r;
1.1 noro 1021:
1.6 ! noro 1022: if ( !n1 || d > n1->d )
! 1023: *nr = n1;
! 1024: else if ( d < 0 )
! 1025: error("truncup : invalid argument");
! 1026: else if ( d == 0 )
! 1027: *nr = 0;
! 1028: else {
! 1029: *nr = r = UPALLOC(d-1);
! 1030: bcopy(n1->c,r->c,d*sizeof(Num));
! 1031: for ( i = d-1; i >= 0 && !r->c[i]; i-- );
! 1032: if ( i < 0 )
! 1033: *nr = 0;
! 1034: else
! 1035: r->d = i;
! 1036: }
1.1 noro 1037: }
1038:
1.5 noro 1039: int int_bits(int t)
1.1 noro 1040: {
1.6 ! noro 1041: int k;
1.1 noro 1042:
1.6 ! noro 1043: for ( k = 0; t; t>>=1, k++);
! 1044: return k;
1.1 noro 1045: }
1046:
1047: /* n is assumed to be LM or integer coefficient */
1048:
1.5 noro 1049: int maxblenup(UP n)
1.1 noro 1050: {
1.6 ! noro 1051: int m,r,i,d;
! 1052: Num *c;
1.1 noro 1053:
1.6 ! noro 1054: if ( !n )
! 1055: return 0;
! 1056: d = n->d; c = (Num *)n->c;
! 1057: for ( r = 0, i = 0; i <= d; i++ )
! 1058: if ( c[i] ) {
! 1059: switch ( NID(c[i]) ) {
! 1060: case N_LM:
! 1061: m = n_bits(((LM)c[i])->body);
! 1062: break;
! 1063: case N_Q:
! 1064: m = n_bits(((Q)c[i])->nm);
! 1065: break;
! 1066: default:
! 1067: error("maxblenup : invalid coefficient");
! 1068: }
! 1069: r = MAX(m,r);
! 1070: }
! 1071: return r;
1.1 noro 1072: }
1073:
1.5 noro 1074: void uptofmarray(int mod,UP n,ModNum *f)
1.1 noro 1075: {
1.6 ! noro 1076: int d,i;
! 1077: unsigned int r;
! 1078: Num *c;
! 1079:
! 1080: if ( !n )
! 1081: return;
! 1082: else {
! 1083: d = n->d; c = (Num *)n->c;
! 1084: for ( i = 0; i <= d; i++ ) {
! 1085: if ( c[i] ) {
! 1086: switch ( NID(c[i]) ) {
! 1087: case N_LM:
! 1088: f[i] = (ModNum)rem(((LM)c[i])->body,mod);
! 1089: break;
! 1090: case N_Q:
! 1091: r = rem(NM((Q)c[i]),mod);
! 1092: if ( r && (SGN((Q)c[i])<0) )
! 1093: r = mod-r;
! 1094: f[i] = (ModNum)r;
! 1095: break;
! 1096: default:
! 1097: error("uptofmarray : invalid coefficient");
! 1098: }
! 1099: } else
! 1100: f[i] = 0;
! 1101: }
! 1102: }
1.1 noro 1103: }
1104:
1.5 noro 1105: void fmarraytoup(ModNum *f,int d,UP *nr)
1.1 noro 1106: {
1.6 ! noro 1107: int i;
! 1108: Q *c;
! 1109: UP r;
! 1110:
! 1111: if ( d < 0 ) {
! 1112: *nr = 0;
! 1113: } else {
! 1114: *nr = r = UPALLOC(d); c = (Q *)r->c;
! 1115: for ( i = 0; i <= d; i++ ) {
! 1116: UTOQ((unsigned int)f[i],c[i]);
! 1117: }
! 1118: for ( i = d; i >= 0 && !c[i]; i-- );
! 1119: if ( i < 0 )
! 1120: *nr = 0;
! 1121: else
! 1122: r->d = i;
! 1123: }
1.1 noro 1124: }
1125:
1126: /* f[i]: an array of length n */
1127:
1.5 noro 1128: void uiarraytoup(unsigned int **f,int n,int d,UP *nr)
1.1 noro 1129: {
1.6 ! noro 1130: int i,j;
! 1131: unsigned int *fi;
! 1132: N ci;
! 1133: Q *c;
! 1134: UP r;
! 1135:
! 1136: if ( d < 0 ) {
! 1137: *nr = 0;
! 1138: } else {
! 1139: *nr = r = UPALLOC(d); c = (Q *)r->c;
! 1140: for ( i = 0; i <= d; i++ ) {
! 1141: fi = f[i];
! 1142: for ( j = n-1; j >= 0 && !fi[j]; j-- );
! 1143: j++;
! 1144: if ( j ) {
! 1145: ci = NALLOC(j);
! 1146: PL(ci) = j;
! 1147: bcopy(fi,BD(ci),j*sizeof(unsigned int));
! 1148: NTOQ(ci,1,c[i]);
! 1149: } else
! 1150: c[i] = 0;
! 1151: }
! 1152: for ( i = d; i >= 0 && !c[i]; i-- );
! 1153: if ( i < 0 )
! 1154: *nr = 0;
! 1155: else
! 1156: r->d = i;
! 1157: }
1.1 noro 1158: }
1159:
1.5 noro 1160: void adj_coefup(UP n,N m,N m2,UP *nr)
1.1 noro 1161: {
1.6 ! noro 1162: int d;
! 1163: Q *c,*cr;
! 1164: Q mq;
! 1165: int i;
! 1166: UP r;
! 1167:
! 1168: if ( !n )
! 1169: *nr = 0;
! 1170: else {
! 1171: d = n->d; c = (Q *)n->c;
! 1172: *nr = r = UPALLOC(d); cr = (Q *)r->c;
! 1173: NTOQ(m,1,mq);
! 1174: for ( i = 0; i <= d; i++ ) {
! 1175: if ( !c[i] )
! 1176: continue;
! 1177: if ( cmpn(NM(c[i]),m2) > 0 )
! 1178: subq(c[i],mq,&cr[i]);
! 1179: else
! 1180: cr[i] = c[i];
! 1181: }
! 1182: for ( i = d; i >= 0 && !cr[i]; i-- );
! 1183: if ( i < 0 )
! 1184: *nr = 0;
! 1185: else
! 1186: r->d = i;
! 1187: }
1.1 noro 1188: }
1189:
1190: /* n is assumed to have positive integer coefficients. */
1191:
1.5 noro 1192: void remcup(UP n,N mod,UP *nr)
1.1 noro 1193: {
1.6 ! noro 1194: int i,d;
! 1195: Q *c,*cr;
! 1196: UP r;
! 1197: N t;
! 1198:
! 1199: if ( !n )
! 1200: *nr = 0;
! 1201: else {
! 1202: d = n->d; c = (Q *)n->c;
! 1203: *nr = r = UPALLOC(d); cr = (Q *)r->c;
! 1204: for ( i = 0; i <= d; i++ )
! 1205: if ( c[i] ) {
! 1206: remn(NM(c[i]),mod,&t);
! 1207: if ( t )
! 1208: NTOQ(t,1,cr[i]);
! 1209: else
! 1210: cr[i] = 0;
! 1211: } else
! 1212: cr[i] = 0;
! 1213: for ( i = d; i >= 0 && !cr[i]; i-- );
! 1214: if ( i < 0 )
! 1215: *nr = 0;
! 1216: else
! 1217: r->d = i;
! 1218: }
1.1 noro 1219: }
1220:
1221: void fft_mulup_main(UP,UP,int,UP *);
1222:
1.5 noro 1223: void fft_mulup(UP n1,UP n2,UP *nr)
1.1 noro 1224: {
1.6 ! noro 1225: int d1,d2,d,b1,b2,h;
! 1226: UP n1lo,n1hi,n2lo,n2hi,lo,hi,t1,t2,mid1,mid2,mid,s1,s2;
1.1 noro 1227:
1.6 ! noro 1228: if ( !n1 || !n2 )
! 1229: *nr = 0;
! 1230: else {
! 1231: d1 = n1->d; d2 = n2->d;
! 1232: if ( MAX(d1,d2) < up_fft_mag )
! 1233: kmulup(n1,n2,nr);
! 1234: else {
! 1235: b1 = maxblenup(n1); b2 = maxblenup(n2);
! 1236: if ( fft_available(d1,b1,d2,b2) )
! 1237: fft_mulup_main(n1,n2,0,nr);
! 1238: else {
! 1239: d = MAX(d1,d2)+1; h = (d+1)/2;
! 1240: decompup(n1,h,&n1lo,&n1hi);
! 1241: decompup(n2,h,&n2lo,&n2hi);
! 1242: fft_mulup(n1lo,n2lo,&lo);
! 1243: fft_mulup(n1hi,n2hi,&hi);
! 1244: shiftup(hi,2*h,&t2);
! 1245: addup(lo,t2,&t1);
! 1246: addup(hi,lo,&mid1);
! 1247: subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2);
! 1248: fft_mulup(s1,s2,&mid2);
! 1249: addup(mid1,mid2,&mid);
! 1250: shiftup(mid,h,&t2);
! 1251: addup(t1,t2,nr);
! 1252: }
! 1253: }
! 1254: }
1.1 noro 1255: }
1256:
1.5 noro 1257: void trunc_fft_mulup(UP n1,UP n2,int dbd,UP *nr)
1.1 noro 1258: {
1.6 ! noro 1259: int d1,d2,b1,b2,m;
! 1260: UP n1l,n1h,n2l,n2h,l,h,t,s,u;
1.1 noro 1261:
1.6 ! noro 1262: if ( !n1 || !n2 )
! 1263: *nr = 0;
! 1264: else if ( dbd < 0 )
! 1265: error("trunc_fft_mulup : invalid argument");
! 1266: else if ( dbd == 0 )
! 1267: *nr = 0;
! 1268: else {
! 1269: truncup(n1,dbd,&t); n1 = t;
! 1270: truncup(n2,dbd,&t); n2 = t;
! 1271: d1 = n1->d; d2 = n2->d;
! 1272: if ( MAX(d1,d2) < up_fft_mag )
! 1273: tkmulup(n1,n2,dbd,nr);
! 1274: else {
! 1275: b1 = maxblenup(n1); b2 = maxblenup(n2);
! 1276: if ( fft_available(d1,b1,d2,b2) )
! 1277: fft_mulup_main(n1,n2,dbd,nr);
! 1278: else {
! 1279: m = (dbd+1)/2;
! 1280: decompup(n1,m,&n1l,&n1h);
! 1281: decompup(n2,m,&n2l,&n2h);
! 1282: fft_mulup(n1l,n2l,&l);
! 1283: trunc_fft_mulup(n1l,n2h,dbd-m,&t);
! 1284: trunc_fft_mulup(n2l,n1h,dbd-m,&s);
! 1285: addup(t,s,&u);
! 1286: shiftup(u,m,&h);
! 1287: addup(l,h,&t);
! 1288: truncup(t,dbd,nr);
! 1289: }
! 1290: }
! 1291: }
1.1 noro 1292: }
1293:
1.5 noro 1294: void fft_squareup(UP n1,UP *nr)
1.1 noro 1295: {
1.6 ! noro 1296: int d1,d,h,b1;
! 1297: UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
1.1 noro 1298:
1.6 ! noro 1299: if ( !n1 )
! 1300: *nr = 0;
! 1301: else {
! 1302: d1 = n1->d;
! 1303: if ( d1 < up_fft_mag )
! 1304: ksquareup(n1,nr);
! 1305: else {
! 1306: b1 = maxblenup(n1);
! 1307: if ( fft_available(d1,b1,d1,b1) )
! 1308: fft_mulup_main(n1,n1,0,nr);
! 1309: else {
! 1310: d = d1+1; h = (d1+1)/2;
! 1311: decompup(n1,h,&n1lo,&n1hi);
! 1312: fft_squareup(n1hi,&hi);
! 1313: fft_squareup(n1lo,&lo);
! 1314: shiftup(hi,2*h,&t2);
! 1315: addup(lo,t2,&t1);
! 1316: addup(hi,lo,&mid1);
! 1317: subup(n1hi,n1lo,&s1);
! 1318: fft_squareup(s1,&mid2);
! 1319: subup(mid1,mid2,&mid);
! 1320: shiftup(mid,h,&t2);
! 1321: addup(t1,t2,nr);
! 1322: }
! 1323: }
! 1324: }
1.1 noro 1325: }
1326:
1327: /*
1328: * dbd == 0 => n1 * n2
1329: * dbd > 0 => n1 * n2 mod x^dbd
1330: * n1 == n2 => squaring
1331: */
1332:
1.5 noro 1333: void fft_mulup_main(UP n1,UP n2,int dbd,UP *nr)
1.1 noro 1334: {
1.6 ! noro 1335: ModNum *f1,*f2,*w,*fr;
! 1336: ModNum **frarray,**fa;
! 1337: unsigned int *modarray,*ma;
! 1338: int modarray_len;
! 1339: int frarray_index = 0;
! 1340: N m,m1,m2;
! 1341: int d1,d2,dmin,i,mod,root,d,cond,bound;
! 1342: UP r;
! 1343:
! 1344: if ( !n1 || !n2 ) {
! 1345: *nr = 0; return;
! 1346: }
! 1347: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
! 1348: if ( !d1 || !d2 ) {
! 1349: mulup(n1,n2,nr); return;
! 1350: }
! 1351: m = ONEN;
! 1352: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
! 1353: f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
! 1354: if ( n1 == n2 )
! 1355: f2 = 0;
! 1356: else
! 1357: f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
! 1358: w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
! 1359: modarray_len = 1024;
! 1360: modarray = (unsigned int *)MALLOC_ATOMIC(modarray_len*sizeof(unsigned int));
! 1361: frarray = (ModNum **)MALLOC(modarray_len*sizeof(ModNum *));
! 1362: for ( i = 0; i < NPrimes; i++ ) {
! 1363: FFT_primes(i,&mod,&root,&d);
! 1364: if ( (1<<d) < d1+d2+1 )
! 1365: continue;
! 1366: if ( frarray_index == modarray_len ) {
! 1367: ma = (unsigned int *)MALLOC_ATOMIC(2*modarray_len*sizeof(unsigned int));
! 1368: bcopy(modarray,ma,modarray_len*sizeof(unsigned int));
! 1369: modarray = ma;
! 1370: fa = (ModNum **)MALLOC(2*modarray_len*sizeof(ModNum *));
! 1371: bcopy(frarray,fa,modarray_len*sizeof(ModNum *));
! 1372: frarray = fa;
! 1373: modarray_len *= 2;
! 1374: }
! 1375: modarray[frarray_index] = mod;
! 1376: frarray[frarray_index++] = fr
! 1377: = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
! 1378: uptofmarray(mod,n1,f1);
! 1379: if ( !f2 )
! 1380: cond = FFT_pol_square(d1,f1,fr,i,w);
! 1381: else {
! 1382: uptofmarray(mod,n2,f2);
! 1383: cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
! 1384: }
! 1385: if ( cond )
! 1386: error("fft_mulup : error in FFT_pol_product");
! 1387: STON(mod,m1); muln(m,m1,&m2); m = m2;
! 1388: if ( n_bits(m) > bound ) {
! 1389: if ( !dbd )
! 1390: dbd = d1+d2+1;
! 1391: crup(frarray,MIN(d1+d2,dbd-1),modarray,frarray_index,m,&r);
! 1392: divin(m,2,&m2);
! 1393: adj_coefup(r,m,m2,nr);
! 1394: return;
! 1395: }
! 1396: }
! 1397: error("fft_mulup : FFT_primes exhausted");
1.1 noro 1398: }
1399: #if 0
1400: /* inefficient version */
1401:
1.5 noro 1402: void crup(ModNum **f,int d,int *mod,int index,N m,UP *r)
1.1 noro 1403: {
1.6 ! noro 1404: N *cof,*c;
! 1405: int *inv;
! 1406: struct oUP w;
! 1407: int i;
! 1408: UP s,s1,s2;
! 1409: N t;
! 1410: UP *g;
! 1411: Q q;
! 1412: struct oEGT eg0,eg1;
! 1413:
! 1414: get_eg(&eg0);
! 1415: w.d = 0;
! 1416: inv = (int *)ALLOCA(index*sizeof(int));
! 1417: cof = (N *)ALLOCA(index*sizeof(N));
! 1418: c = (N *)ALLOCA(index*sizeof(N));
! 1419: g = (UP *)ALLOCA(index*sizeof(UP));
! 1420: up_lazy = 1;
! 1421: for ( i = 0, s = 0; i < index; i++ ) {
! 1422: divin(m,mod[i],&cof[i]);
! 1423: inv[i] = invm(rem(cof[i],mod[i]),mod[i]);
! 1424: STON(inv[i],t);
! 1425: muln(cof[i],t,&c[i]);
! 1426: NTOQ(c[i],1,q); w.c[0] = (Num)q;
! 1427: fmarraytoup(f[i],d,&g[i]);
! 1428: mulup(g[i],&w,&s1);
! 1429: addup(s,s1,&s2);
! 1430: s = s2;
! 1431: }
! 1432: up_lazy = 0;
! 1433: remcup(s,m,r);
! 1434: get_eg(&eg1);
! 1435: add_eg(&eg_chrem,&eg0,&eg1);
1.1 noro 1436: }
1437:
1438: #else
1439: /* improved version */
1440:
1.5 noro 1441: void crup(ModNum **f,int d,int *mod,int index,N m,UP *r)
1.1 noro 1442: {
1.6 ! noro 1443: N cof,c,t,w,w1;
! 1444: struct oN fc;
! 1445: N *s;
! 1446: UP u;
! 1447: Q q;
! 1448: int inv,i,j,rlen;
! 1449:
! 1450: rlen = PL(m)+10; /* XXX */
! 1451: PL(&fc) = 1;
! 1452: c = NALLOC(rlen);
! 1453: w = NALLOC(rlen);
! 1454: w1 = NALLOC(rlen);
! 1455: s = (N *)MALLOC((d+1)*sizeof(N));
! 1456: for ( i = 0; i <= d; i++ ) {
! 1457: s[i] = NALLOC(rlen);
! 1458: PL(s[i]) = 0;
! 1459: bzero(BD(s[i]),rlen*sizeof(unsigned int));
! 1460: }
! 1461: for ( i = 0; i < index; i++ ) {
! 1462: divin(m,mod[i],&cof);
! 1463: inv = invm(rem(cof,mod[i]),mod[i]); STON(inv,t);
! 1464: _muln(cof,t,c);
! 1465: /* s += c*f[i] */
! 1466: for ( j = 0; j <= d; j++ )
! 1467: if ( f[i][j] ) {
! 1468: BD(&fc)[0] = f[i][j];
! 1469: _muln(c,&fc,w);
! 1470: _addn(s[j],w,w1);
! 1471: dupn(w1,s[j]);
! 1472: }
! 1473: }
! 1474: for ( i = d; i >= 0; i-- )
! 1475: if ( s[i] )
! 1476: break;
! 1477: if ( i < 0 )
! 1478: *r = 0;
! 1479: else {
! 1480: u = UPALLOC(i);
! 1481: DEG(u) = i;
! 1482: for ( j = 0; j <= i; j++ ) {
! 1483: if ( PL(s[j]) )
! 1484: NTOQ(s[j],1,q);
! 1485: else
! 1486: q = 0;
! 1487: COEF(u)[j] = (Num)q;
! 1488: }
! 1489: remcup(u,m,r);
! 1490: }
1.1 noro 1491: }
1492: #endif
1493:
1.2 noro 1494: /*
1495: * dbd == 0 => n1 * n2
1496: * dbd > 0 => n1 * n2 mod x^dbd
1497: * n1 == n2 => squaring
1498: * return: n1*n2 mod Primes[modind[0]]*.prime...*Primes[modind[nmod-1]].prime
1499: */
1500:
1.5 noro 1501: void fft_mulup_specialmod_main(UP n1,UP n2,int dbd,int *modind,int nmod,UP *nr)
1.2 noro 1502: {
1.6 ! noro 1503: ModNum *f1,*f2,*w,*fr;
! 1504: ModNum **frarray;
! 1505: N m,m1,m2;
! 1506: unsigned int *modarray;
! 1507: int d1,d2,dmin,i,root,d,cond,bound;
! 1508:
! 1509: if ( !n1 || !n2 ) {
! 1510: *nr = 0; return;
! 1511: }
! 1512: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
! 1513: if ( !d1 || !d2 ) {
! 1514: mulup(n1,n2,nr); return;
! 1515: }
! 1516: m = ONEN;
! 1517: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
! 1518: f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
! 1519: if ( n1 == n2 )
! 1520: f2 = 0;
! 1521: else
! 1522: f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
! 1523: w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
! 1524: frarray = (ModNum **)MALLOC(nmod*sizeof(ModNum *));
! 1525: modarray = (unsigned int *)MALLOC_ATOMIC(nmod*sizeof(unsigned int *));
! 1526:
! 1527: for ( i = 0; i < nmod; i++ ) {
! 1528: FFT_primes(modind[i],&modarray[i],&root,&d);
! 1529: if ( (1<<d) < d1+d2+1 )
! 1530: error("fft_mulup_specialmod_main : invalid modulus");
! 1531: frarray[i] = fr
! 1532: = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
! 1533: uptofmarray(modarray[i],n1,f1);
! 1534: if ( !f2 )
! 1535: cond = FFT_pol_square(d1,f1,fr,modind[i],w);
! 1536: else {
! 1537: uptofmarray(modarray[i],n2,f2);
! 1538: cond = FFT_pol_product(d1,f1,d2,f2,fr,modind[i],w);
! 1539: }
! 1540: if ( cond )
! 1541: error("fft_mulup_specialmod_main : error in FFT_pol_product");
! 1542: STON(modarray[i],m1); muln(m,m1,&m2); m = m2;
! 1543: }
! 1544: if ( !dbd )
! 1545: dbd = d1+d2+1;
! 1546: crup(frarray,MIN(d1+d2,dbd-1),modarray,nmod,m,nr);
1.2 noro 1547: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>