Annotation of OpenXM_contrib2/asir2000/engine/up.c, Revision 1.5
1.3 noro 1: /*
2: * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
3: * All rights reserved.
4: *
5: * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
6: * non-exclusive and royalty-free license to use, copy, modify and
7: * redistribute, solely for non-commercial and non-profit purposes, the
8: * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
9: * conditions of this Agreement. For the avoidance of doubt, you acquire
10: * only a limited right to use the SOFTWARE hereunder, and FLL or any
11: * third party developer retains all rights, including but not limited to
12: * copyrights, in and to the SOFTWARE.
13: *
14: * (1) FLL does not grant you a license in any way for commercial
15: * purposes. You may use the SOFTWARE only for non-commercial and
16: * non-profit purposes only, such as academic, research and internal
17: * business use.
18: * (2) The SOFTWARE is protected by the Copyright Law of Japan and
19: * international copyright treaties. If you make copies of the SOFTWARE,
20: * with or without modification, as permitted hereunder, you shall affix
21: * to all such copies of the SOFTWARE the above copyright notice.
22: * (3) An explicit reference to this SOFTWARE and its copyright owner
23: * shall be made on your publication or presentation in any form of the
24: * results obtained by use of the SOFTWARE.
25: * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
1.4 noro 26: * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
1.3 noro 27: * for such modification or the source code of the modified part of the
28: * SOFTWARE.
29: *
30: * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
31: * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
32: * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
33: * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
34: * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
35: * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
36: * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
37: * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
38: * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
39: * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
40: * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
41: * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
42: * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
43: * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
44: * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
45: * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
46: * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
47: *
1.5 ! noro 48: * $OpenXM: OpenXM_contrib2/asir2000/engine/up.c,v 1.4 2000/08/22 05:04:06 noro Exp $
1.3 noro 49: */
1.1 noro 50: #include "ca.h"
51: #include <math.h>
52:
53: struct oEGT eg_chrem,eg_fore,eg_back;
54:
55: /*
56: int up_kara_mag=15;
57: int up_tkara_mag=15;
58: */
59: /*
60: int up_kara_mag=30;
61: int up_tkara_mag=20;
62: */
63: int up_kara_mag=20;
64: int up_tkara_mag=15;
65:
66: #if defined(sparc)
67: int up_fft_mag=50;
68: #else
69: int up_fft_mag=80;
70: #endif
71:
72: int up_rem_mag=1;
73: int debug_up;
74: int up_lazy;
75: extern int lm_lazy;
76: extern int current_ff;
77: extern int GC_dont_gc;
78:
1.5 ! noro 79: void monicup(UP a,UP *b)
1.1 noro 80: {
81: UP w;
82:
83: if ( !a )
84: *b = 0;
85: else {
86: w = W_UPALLOC(0); w->d = 0;
87: divnum(0,(Num)ONE,(Num)a->c[a->d],(Num *)&w->c[0]);
88: mulup(a,w,b);
89: }
90: }
91:
1.5 ! noro 92: void simpup(UP a,UP *b)
1.1 noro 93: {
94: int i,d;
95: UP c;
96:
97: if ( !a )
98: *b = 0;
99: else {
100: d = a->d;
101: c = UPALLOC(d);
102:
103: for ( i = 0; i <= d; i++ )
104: simpnum(a->c[i],&c->c[i]);
105: for ( i = d; i >= 0 && !c->c[i]; i-- );
106: if ( i < 0 )
107: *b = 0;
108: else {
109: c->d = i;
110: *b = c;
111: }
112: }
113: }
114:
1.5 ! noro 115: void simpnum(Num a,Num *b)
1.1 noro 116: {
117: LM lm;
118: GF2N gf;
119: GFPN gfpn;
120:
121: if ( !a )
122: *b = 0;
123: else
124: switch ( NID(a) ) {
125: case N_LM:
126: simplm((LM)a,&lm); *b = (Num)lm; break;
127: case N_GF2N:
128: simpgf2n((GF2N)a,&gf); *b = (Num)gf; break;
129: case N_GFPN:
130: simpgfpn((GFPN)a,&gfpn); *b = (Num)gfpn; break;
131: default:
132: *b = a; break;
133: }
134: }
135:
1.5 ! noro 136: void uremp(P p1,P p2,P *rp)
1.1 noro 137: {
138: UP n1,n2,r;
139:
140: if ( !p1 || NUM(p1) )
141: *rp = p1;
142: else {
143: ptoup(p1,&n1); ptoup(p2,&n2);
144: remup(n1,n2,&r);
145: uptop(r,rp);
146: }
147: }
148:
1.5 ! noro 149: void ugcdp(P p1,P p2,P *rp)
1.1 noro 150: {
151: UP n1,n2,r;
152:
153: ptoup(p1,&n1); ptoup(p2,&n2);
154: gcdup(n1,n2,&r);
155: uptop(r,rp);
156: }
157:
1.5 ! noro 158: void reversep(P p1,Q d,P *rp)
1.1 noro 159: {
160: UP n1,r;
161:
162: if ( !p1 )
163: *rp = 0;
164: else {
165: ptoup(p1,&n1);
166: reverseup(n1,QTOS(d),&r);
167: uptop(r,rp);
168: }
169: }
170:
1.5 ! noro 171: void invmodp(P p1,Q d,P *rp)
1.1 noro 172: {
173: UP n1,r;
174:
175: if ( !p1 )
176: error("invmodp : invalid input");
177: else {
178: ptoup(p1,&n1);
179: invmodup(n1,QTOS(d),&r);
180: uptop(r,rp);
181: }
182: }
183:
1.5 ! noro 184: void addup(UP n1,UP n2,UP *nr)
1.1 noro 185: {
186: UP r,t;
187: int i,d1,d2;
188: Num *c,*c1,*c2;
189:
190: if ( !n1 ) {
191: *nr = n2;
192: } else if ( !n2 ) {
193: *nr = n1;
194: } else {
195: if ( n2->d > n1->d ) {
196: t = n1; n1 = n2; n2 = t;
197: }
198: d1 = n1->d; d2 = n2->d;
199: *nr = r = UPALLOC(d1);
200: c1 = n1->c; c2 = n2->c; c = r->c;
201: for ( i = 0; i <= d2; i++ )
202: addnum(0,*c1++,*c2++,c++);
203: for ( ; i <= d1; i++ )
204: *c++ = *c1++;
205: for ( i = d1, c = r->c; i >=0 && !c[i]; i-- );
206: if ( i < 0 )
207: *nr = 0;
208: else
209: r->d = i;
210: }
211: }
212:
1.5 ! noro 213: void subup(UP n1,UP n2,UP *nr)
1.1 noro 214: {
215: UP r;
216: int i,d1,d2,d;
217: Num *c,*c1,*c2;
218:
219: if ( !n1 ) {
220: chsgnup(n2,nr);
221: } else if ( !n2 ) {
222: *nr = n1;
223: } else {
224: d1 = n1->d; d2 = n2->d; d = MAX(d1,d2);
225: *nr = r = UPALLOC(d);
226: c1 = n1->c; c2 = n2->c; c = r->c;
227: if ( d1 >= d2 ) {
228: for ( i = 0; i <= d2; i++ )
229: subnum(0,*c1++,*c2++,c++);
230: for ( ; i <= d1; i++ )
231: *c++ = *c1++;
232: } else {
233: for ( i = 0; i <= d1; i++ )
234: subnum(0,*c1++,*c2++,c++);
235: for ( ; i <= d2; i++ )
236: chsgnnum(*c2++,c++);
237: }
238: for ( i = d, c = r->c; i >=0 && !c[i]; i-- );
239: if ( i < 0 )
240: *nr = 0;
241: else
242: r->d = i;
243: }
244: }
245:
1.5 ! noro 246: void chsgnup(UP n1,UP *nr)
1.1 noro 247: {
248: UP r;
249: int d1,i;
250: Num *c1,*c;
251:
252: if ( !n1 ) {
253: *nr = 0;
254: } else {
255: d1 = n1->d;
256: *nr = r = UPALLOC(d1); r->d = d1;
257: c1 = n1->c; c = r->c;
258: for ( i = 0; i <= d1; i++ )
259: chsgnnum(*c1++,c++);
260: }
261: }
262:
1.5 ! noro 263: void hybrid_mulup(int ff,UP n1,UP n2,UP *nr)
1.1 noro 264: {
265: if ( !n1 || !n2 )
266: *nr = 0;
267: else if ( MAX(n1->d,n2->d) < up_fft_mag )
268: kmulup(n1,n2,nr);
269: else
270: switch ( ff ) {
271: case FF_GFP:
272: fft_mulup_lm(n1,n2,nr); break;
273: case FF_GF2N:
274: kmulup(n1,n2,nr); break;
275: default:
276: fft_mulup(n1,n2,nr); break;
277: }
278: }
279:
1.5 ! noro 280: void hybrid_squareup(int ff,UP n1,UP *nr)
1.1 noro 281: {
282: if ( !n1 )
283: *nr = 0;
284: else if ( n1->d < up_fft_mag )
285: ksquareup(n1,nr);
286: else
287: switch ( ff ) {
288: case FF_GFP:
289: fft_squareup_lm(n1,nr); break;
290: case FF_GF2N:
291: ksquareup(n1,nr); break;
292: default:
293: fft_squareup(n1,nr); break;
294: }
295: }
296:
1.5 ! noro 297: void hybrid_tmulup(int ff,UP n1,UP n2,int d,UP *nr)
1.1 noro 298: {
299: if ( !n1 || !n2 )
300: *nr = 0;
301: else if ( MAX(n1->d,n2->d) < up_fft_mag )
302: tkmulup(n1,n2,d,nr);
303: else
304: switch ( ff ) {
305: case FF_GFP:
306: trunc_fft_mulup_lm(n1,n2,d,nr); break;
307: case FF_GF2N:
308: tkmulup(n1,n2,d,nr); break;
309: default:
310: trunc_fft_mulup(n1,n2,d,nr); break;
311: }
312: }
313:
1.5 ! noro 314: void mulup(UP n1,UP n2,UP *nr)
1.1 noro 315: {
316: UP r;
317: Num *pc1,*pc,*c1,*c2,*c;
318: Num mul,t,s;
319: int i,j,d1,d2;
320:
321: if ( !n1 || !n2 )
322: *nr = 0;
323: else {
324: d1 = n1->d; d2 = n2->d;
325: *nr = r = UPALLOC(d1+d2); r->d = d1+d2;
326: c1 = n1->c; c2 = n2->c; c = r->c;
327: lm_lazy = 1;
328: for ( i = 0; i <= d2; i++, c++ )
329: if ( mul = *c2++ )
330: for ( j = 0, pc1 = c1, pc = c; j <= d1; j++ ) {
331: mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
332: }
333: lm_lazy = 0;
334: if ( !up_lazy )
335: for ( i = 0, c = r->c; i <= r->d; i++ ) {
336: simpnum(c[i],&t); c[i] = t;
337: }
338: }
339: }
340:
341: /* nr = c*n1 */
342:
1.5 ! noro 343: void mulcup(Num c,UP n1,UP *nr)
1.1 noro 344: {
345: int d;
346: UP r;
347: Num t;
348: Num *c1,*cr;
349: int i;
350:
351: if ( !c || !n1 )
352: *nr = 0;
353: else {
354: d = n1->d;
355: *nr = r = UPALLOC(d); r->d = d;
356: c1 = n1->c; cr = r->c;
357: lm_lazy = 1;
358: for ( i = 0; i <= d; i++ )
359: mulnum(0,c1[i],c,&cr[i]);
360: lm_lazy = 0;
361: if ( !up_lazy )
362: for ( i = 0; i <= d; i++ ) {
363: simpnum(cr[i],&t); cr[i] = t;
364: }
365: }
366: }
367:
1.5 ! noro 368: void tmulup(UP n1,UP n2,int d,UP *nr)
1.1 noro 369: {
370: UP r;
371: Num *pc1,*pc,*c1,*c2,*c;
372: Num mul,t,s;
373: int i,j,d1,d2,dr;
374:
375: if ( !n1 || !n2 )
376: *nr = 0;
377: else {
378: d1 = n1->d; d2 = n2->d;
379: dr = MAX(d-1,d1+d2);
380: *nr = r = UPALLOC(dr);
381: c1 = n1->c; c2 = n2->c; c = r->c;
382: lm_lazy = 1;
383: for ( i = 0; i <= d2; i++, c++ )
384: if ( mul = *c2++ )
385: for ( j = 0, pc1 = c1, pc = c; (j <= d1) && (i+j < d); j++ ) {
386: mulnum(0,*pc1++,mul,&t); addnum(0,*pc,t,&s); *pc++ = s;
387: }
388: lm_lazy = 0;
389: c = r->c;
390: if ( !up_lazy )
391: for ( i = 0; i < d; i++ ) {
392: simpnum(c[i],&t); c[i] = t;
393: }
394: for ( i = d-1; i >= 0 && !c[i]; i-- );
395: if ( i < 0 )
396: *nr = 0;
397: else
398: r->d = i;
399: }
400: }
401:
1.5 ! noro 402: void squareup(UP n1,UP *nr)
1.1 noro 403: {
404: UP r;
405: Num *c1,*c;
406: Num t,s,u;
407: int i,j,h,d1,d;
408:
409: if ( !n1 )
410: *nr = 0;
411: else if ( !n1->d ) {
412: *nr = r = UPALLOC(0); r->d = 0;
413: mulnum(0,n1->c[0],n1->c[0],&r->c[0]);
414: } else {
415: d1 = n1->d;
416: d = 2*d1;
417: *nr = r = UPALLOC(2*d); r->d = d;
418: c1 = n1->c; c = r->c;
419: lm_lazy = 1;
420: for ( i = 0; i <= d1; i++ )
421: mulnum(0,c1[i],c1[i],&c[2*i]);
422: for ( j = 1; j < d; j++ ) {
423: h = (j-1)/2;
424: for ( i = MAX(j-d1,0), s = 0; i <= h; i++ ) {
425: mulnum(0,c1[i],c1[j-i],&t); addnum(0,t,s,&u); s = u;
426: }
427: addnum(0,s,s,&t); addnum(0,t,c[j],&u); c[j] = u;
428: }
429: lm_lazy = 0;
430: if ( !up_lazy )
431: for ( i = 0, c = r->c; i <= d; i++ ) {
432: simpnum(c[i],&t); c[i] = t;
433: }
434: }
435: }
436:
1.5 ! noro 437: void remup(UP n1,UP n2,UP *nr)
1.1 noro 438: {
439: UP w,r;
440:
441: if ( !n2 )
442: error("remup : division by 0");
443: else if ( !n1 || !n2->d )
444: *nr = 0;
445: else if ( n1->d < n2->d )
446: *nr = n1;
447: else {
448: w = W_UPALLOC(n1->d); copyup(n1,w);
449: remup_destructive(w,n2);
450: if ( w->d < 0 )
451: *nr = 0;
452: else {
453: *nr = r = UPALLOC(w->d); copyup(w,r);
454: }
455: }
456: }
457:
1.5 ! noro 458: void remup_destructive(UP n1,UP n2)
1.1 noro 459: {
460: Num *c1,*c2;
461: int d1,d2,i,j;
462: Num m,t,s,mhc;
463:
464: c1 = n1->c; c2 = n2->c;
465: d1 = n1->d; d2 = n2->d;
466: chsgnnum(c2[d2],&mhc);
467: for ( i = d1; i >= d2; i-- ) {
468: simpnum(c1[i],&t); c1[i] = t;
469: if ( !c1[i] )
470: continue;
471: divnum(0,c1[i],mhc,&m);
472: lm_lazy = 1;
473: for ( j = d2; j >= 0; j-- ) {
474: mulnum(0,c2[j],m,&t); addnum(0,c1[i+j-d2],t,&s);
475: c1[i+j-d2] = s;
476: }
477: lm_lazy = 0;
478: }
479: for ( i = 0; i < d2; i++ ) {
480: simpnum(c1[i],&t); c1[i] = t;
481: }
482: for ( i = d2-1; i >= 0 && !c1[i]; i-- );
483: n1->d = i;
484: }
485:
1.5 ! noro 486: void qrup(UP n1,UP n2,UP *nq,UP *nr)
1.1 noro 487: {
488: UP w,r,q;
489: struct oUP t;
490: Num m;
491: int d;
492:
493: if ( !n2 )
494: error("qrup : division by 0");
495: else if ( !n1 ) {
496: *nq = 0; *nr = 0;
497: } else if ( !n2->d )
498: if ( !n1->d ) {
499: divnum(0,n1->c[0],n2->c[0],(Num *)nq); *nr = 0;
500: } else {
501: divnum(0,(Num)ONE,n2->c[0],&m);
502: t.d = 0; t.c[0] = m;
503: mulup(n1,&t,nq); *nr = 0;
504: }
505: else if ( n1->d < n2->d ) {
506: *nq = 0; *nr = n1;
507: } else {
508: w = W_UPALLOC(n1->d); copyup(n1,w);
509: qrup_destructive(w,n2);
510: d = n1->d-n2->d;
511: *nq = q = UPALLOC(d); q->d = d;
512: bcopy(w->c+n2->d,q->c,(d+1)*sizeof(Num));
513: if ( w->d < 0 )
514: *nr = 0;
515: else {
516: *nr = r = UPALLOC(w->d); copyup(w,r);
517: }
518: }
519: }
520:
1.5 ! noro 521: void qrup_destructive(UP n1,UP n2)
1.1 noro 522: {
523: Num *c1,*c2;
524: int d1,d2,i,j;
525: Num q,t,s,hc;
526:
527: c1 = n1->c; c2 = n2->c;
528: d1 = n1->d; d2 = n2->d;
529: hc = c2[d2];
530: for ( i = d1; i >= d2; i-- ) {
531: simpnum(c1[i],&t); c1[i] = t;
532: if ( c1[i] ) {
533: divnum(0,c1[i],hc,&q);
534: lm_lazy = 1;
535: for ( j = d2; j >= 0; j-- ) {
536: mulnum(0,c2[j],q,&t); subnum(0,c1[i+j-d2],t,&s);
537: c1[i+j-d2] = s;
538: }
539: lm_lazy = 0;
540: } else
541: q = 0;
542: c1[i] = q;
543: }
544: for ( i = 0; i < d2; i++ ) {
545: simpnum(c1[i],&t); c1[i] = t;
546: }
547: for ( i = d2-1; i >= 0 && !c1[i]; i-- );
548: n1->d = i;
549: }
550:
1.5 ! noro 551: void gcdup(UP n1,UP n2,UP *nr)
1.1 noro 552: {
553: UP w1,w2,t,r;
554: int d1,d2;
555:
556: if ( !n1 )
557: *nr = n2;
558: else if ( !n2 )
559: *nr = n1;
560: else if ( !n1->d || !n2->d ) {
561: *nr = r = UPALLOC(0); r->d = 0;
562: divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]);
563: } else {
564: d1 = n1->d; d2 = n2->d;
565: if ( d2 > d1 ) {
566: w1 = W_UPALLOC(d2); copyup(n2,w1);
567: w2 = W_UPALLOC(d1); copyup(n1,w2);
568: } else {
569: w1 = W_UPALLOC(d1); copyup(n1,w1);
570: w2 = W_UPALLOC(d2); copyup(n2,w2);
571: }
572: d1 = w1->d; d2 = w2->d;
573: while ( 1 ) {
574: remup_destructive(w1,w2);
575: if ( w1->d < 0 ) {
576: *nr = r = UPALLOC(w2->d); copyup(w2,r); return;
577: } else if ( !w1->d ) {
578: *nr = r = UPALLOC(0); r->d = 0;
579: divnum(0,n1->c[n1->d],n1->c[n1->d],&r->c[0]); return;
580: } else {
581: t = w1; w1 = w2; w2 = t;
582: }
583: }
584: }
585: }
586:
587: /* compute r s.t. a*r = 1 mod m */
588:
1.5 ! noro 589: void extended_gcdup(UP a,UP m,UP *r)
1.1 noro 590: {
591: UP one,g1,g2,a1,a2,a3,b1,b2,b3,inv,quo,rem,t;
592: Num i;
593:
594: one = UPALLOC(0); one->d = 0; one->c[0] = (Num)ONELM;
595: g1 = m; g2 = a;
596: a1 = one; a2 = 0;
597: b1 = 0; b2 = one;
598: /* a2*m+b2*a = g2 always holds */
599: while ( g2->d ) {
600: qrup(g1,g2,&quo,&rem); g1 = g2; g2 = rem;
601: mulup(a2,quo,&t); subup(a1,t,&a3); a1 = a2; a2 = a3;
602: mulup(b2,quo,&t); subup(b1,t,&b3); b1 = b2; b2 = b3;
603: }
604: /* now a2*m+b2*a = g2 (const) */
605: divnum(0,(Num)ONE,g2->c[0],&i);
606: inv = UPALLOC(0); inv->d = 0; inv->c[0] = i;
607: mulup(b2,inv,r);
608: }
609:
1.5 ! noro 610: void reverseup(UP n1,int d,UP *nr)
1.1 noro 611: {
612: Num *c1,*c;
613: int i,d1;
614: UP r;
615:
616: if ( !n1 )
617: *nr = 0;
618: else if ( d < (d1 = n1->d) )
619: error("reverseup : invalid input");
620: else {
621: *nr = r = UPALLOC(d);
622: c = r->c;
623: bzero((char *)c,(d+1)*sizeof(Num));
624: c1 = n1->c;
625: for ( i = 0; i <= d1; i++ )
626: c[d-i] = c1[i];
627: for ( i = d; i >= 0 && !c[i]; i-- );
628: r->d = i;
629: if ( i < 0 )
630: *nr = 0;
631: }
632: }
633:
1.5 ! noro 634: void invmodup(UP n1,int d,UP *nr)
1.1 noro 635: {
636: UP r;
637: Num s,t,u,hinv;
638: int i,j,dmin;
639: Num *w,*c,*cr;
640:
641: if ( !n1 || !n1->c[0] )
642: error("invmodup : invalid input");
643: else if ( !n1->d ) {
644: *nr = r = UPALLOC(0); r->d = 0;
645: divnum(0,(Num)ONE,n1->c[0],&r->c[0]);
646: } else {
647: *nr = r = UPALLOC(d);
648: w = (Num *)ALLOCA((d+1)*sizeof(Q));
649: bzero((char *)w,(d+1)*sizeof(Q));
650: dmin = MIN(d,n1->d);
651: divnum(0,(Num)ONE,n1->c[0],&hinv);
652: for ( i = 0, c = n1->c; i <= dmin; i++ )
653: mulnum(0,c[i],hinv,&w[i]);
654: cr = r->c;
655: cr[0] = w[0];
656: for ( i = 1; i <= d; i++ ) {
657: for ( j = 1, s = w[i]; j < i; j++ ) {
658: mulnum(0,cr[j],w[i-j],&t);
659: addnum(0,s,t,&u);
660: s = u;
661: }
662: chsgnnum(s,&cr[i]);
663: }
664: for ( i = 0; i <= d; i++ ) {
665: mulnum(0,cr[i],hinv,&t); cr[i] = t;
666: }
667: for ( i = d; i >= 0 && !cr[i]; i-- );
668: r->d = i;
669: }
670: }
671:
1.5 ! noro 672: void pwrup(UP n,Q e,UP *nr)
1.1 noro 673: {
674: UP y,z,t;
675: N en,qn;
676: int r;
677:
678: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)ONE;
679: z = n;
680: if ( !e )
681: *nr = y;
682: else if ( UNIQ(e) )
683: *nr = n;
684: else {
685: en = NM(e);
686: while ( 1 ) {
687: r = divin(en,2,&qn); en = qn;
688: if ( r ) {
689: mulup(z,y,&t); y = t;
690: if ( !en ) {
691: *nr = y;
692: return;
693: }
694: }
695: mulup(z,z,&t); z = t;
696: }
697: }
698: }
699:
1.5 ! noro 700: int compup(UP n1,UP n2)
1.1 noro 701: {
702: int i,r;
703:
704: if ( !n1 )
705: return n2 ? -1 : 0;
706: else if ( !n2 )
707: return 1;
708: else if ( n1->d > n2->d )
709: return 1;
710: else if ( n1->d < n2->d )
711: return -1;
712: else {
713: for ( i = n1->d; i >= 0; i-- ) {
714: r = compnum(CO,n1->c[i],n2->c[i]);
715: if ( r )
716: return r;
717: }
718: return 0;
719: }
720: }
721:
1.5 ! noro 722: void kmulp(VL vl,P n1,P n2,P *nr)
1.1 noro 723: {
724: UP b1,b2,br;
725:
726: if ( !n1 || !n2 )
727: *nr = 0;
728: else if ( OID(n1) == O_N || OID(n2) == O_N )
729: mulp(vl,n1,n2,nr);
730: else {
731: ptoup(n1,&b1); ptoup(n2,&b2);
732: kmulup(b1,b2,&br);
733: uptop(br,nr);
734: }
735: }
736:
1.5 ! noro 737: void ksquarep(VL vl,P n1,P *nr)
1.1 noro 738: {
739: UP b1,br;
740:
741: if ( !n1 )
742: *nr = 0;
743: else if ( OID(n1) == O_N )
744: mulp(vl,n1,n1,nr);
745: else {
746: ptoup(n1,&b1);
747: ksquareup(b1,&br);
748: uptop(br,nr);
749: }
750: }
751:
1.5 ! noro 752: void kmulup(UP n1,UP n2,UP *nr)
1.1 noro 753: {
1.5 ! noro 754: int d1,d2;
1.1 noro 755:
756: if ( !n1 || !n2 ) {
757: *nr = 0; return;
758: }
759: d1 = DEG(n1)+1; d2 = DEG(n2)+1;
760: if ( (d1 < up_kara_mag) && (d2 < up_kara_mag) )
761: mulup(n1,n2,nr);
762: else
763: kmulupmain(n1,n2,nr);
764: }
765:
1.5 ! noro 766: void ksquareup(UP n1,UP *nr)
1.1 noro 767: {
768: int d1;
769: extern Q TWO;
770:
771: if ( !n1 ) {
772: *nr = 0; return;
773: }
774: d1 = DEG(n1)+1;
775: if ( (d1 < up_kara_mag) )
776: squareup(n1,nr);
777: else
778: ksquareupmain(n1,nr);
779: }
780:
1.5 ! noro 781: void copyup(UP n1,UP n2)
1.1 noro 782: {
783: n2->d = n1->d;
784: bcopy((char *)n1->c,(char *)n2->c,(n1->d+1)*sizeof(Q));
785: }
786:
1.5 ! noro 787: void c_copyup(UP n,int len,pointer *p)
1.1 noro 788: {
789: if ( n )
790: bcopy((char *)COEF(n),(char *)p,MIN((DEG(n)+1),len)*sizeof(pointer));
791: }
792:
1.5 ! noro 793: void kmulupmain(UP n1,UP n2,UP *nr)
1.1 noro 794: {
1.5 ! noro 795: int d1,d2,h;
1.1 noro 796: UP n1lo,n1hi,n2lo,n2hi,hi,lo,mid1,mid2,mid,s1,s2,t1,t2;
797:
798: d1 = DEG(n1)+1; d2 = DEG(n2)+1; h = (MAX(d1,d2)+1)/2;
799: decompup(n1,h,&n1lo,&n1hi);
800: decompup(n2,h,&n2lo,&n2hi);
801: kmulup(n1lo,n2lo,&lo);
802: kmulup(n1hi,n2hi,&hi);
803: shiftup(hi,2*h,&t2);
804: addup(lo,t2,&t1);
805: addup(hi,lo,&mid1);
806: subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2); kmulup(s1,s2,&mid2);
807: addup(mid1,mid2,&mid);
808: shiftup(mid,h,&t2);
809: addup(t1,t2,nr);
810: }
811:
1.5 ! noro 812: void ksquareupmain(UP n1,UP *nr)
1.1 noro 813: {
1.5 ! noro 814: int d1,h;
1.1 noro 815: UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
816:
817: d1 = DEG(n1)+1; h = (d1+1)/2;
818: decompup(n1,h,&n1lo,&n1hi);
819: ksquareup(n1hi,&hi); ksquareup(n1lo,&lo);
820: shiftup(hi,2*h,&t2);
821: addup(lo,t2,&t1);
822: addup(hi,lo,&mid1);
823: subup(n1hi,n1lo,&s1); ksquareup(s1,&mid2);
824: subup(mid1,mid2,&mid);
825: shiftup(mid,h,&t2);
826: addup(t1,t2,nr);
827: }
828:
1.5 ! noro 829: void rembymulup(UP n1,UP n2,UP *nr)
1.1 noro 830: {
831: int d1,d2,d;
832: UP r1,r2,inv2,t,s,q;
833:
834: if ( !n2 )
835: error("rembymulup : division by 0");
836: else if ( !n1 || !n2->d )
837: *nr = 0;
838: else if ( (d1 = n1->d) < (d2 = n2->d) )
839: *nr = n1;
840: else {
841: d = d1-d2;
842: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
843: reverseup(n2,d2,&r2);
844: invmodup(r2,d,&inv2);
845: kmulup(r1,inv2,&t);
846: truncup(t,d+1,&s);
847: reverseup(s,d,&q);
848: kmulup(q,n2,&t); subup(n1,t,nr);
849: }
850: }
851:
852: /*
853: deg n2 = d
854: deg n1 <= 2*d-1
855: inv2 = inverse of reversep(n2) mod x^d
856: */
857:
1.5 ! noro 858: void hybrid_rembymulup_special(int ff,UP n1,UP n2,UP inv2,UP *nr)
1.1 noro 859: {
860: int d1,d2,d;
1.5 ! noro 861: UP r1,t,s,q;
1.1 noro 862:
863: if ( !n2 )
864: error("hybrid_rembymulup : division by 0");
865: else if ( !n1 || !n2->d )
866: *nr = 0;
867: else if ( (d1 = n1->d) < (d2 = n2->d) )
868: *nr = n1;
869: else {
870: d = d1-d2;
871: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
872: hybrid_tmulup(ff,r1,inv2,d+1,&t);
873: truncup(t,d+1,&s);
874: reverseup(s,d,&q);
875:
876: hybrid_tmulup(ff,q,n2,d2,&t);
877: truncup(n1,d2,&s);
878: subup(s,t,nr);
879: }
880: }
881:
1.5 ! noro 882: void rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
1.1 noro 883: {
884: int d1,d2,d;
1.5 ! noro 885: UP r1,t,s,q;
1.1 noro 886:
887: if ( !n2 )
888: error("rembymulup : division by 0");
889: else if ( !n1 || !n2->d )
890: *nr = 0;
891: else if ( (d1 = n1->d) < (d2 = n2->d) )
892: *nr = n1;
893: else {
894: d = d1-d2;
895: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
896: tkmulup(r1,inv2,d+1,&t);
897: truncup(t,d+1,&s);
898: reverseup(s,d,&q);
899:
900: tkmulup(q,n2,d2,&t);
901: truncup(n1,d2,&s);
902: subup(s,t,nr);
903: }
904: }
905:
906: /* *nr = n1*n2 mod x^d */
907:
1.5 ! noro 908: void tkmulup(UP n1,UP n2,int d,UP *nr)
1.1 noro 909: {
910: int m;
1.5 ! noro 911: UP n1l,n1h,n2l,n2h,l,h,t,s,u;
1.1 noro 912:
913: if ( d < 0 )
914: error("tkmulup : invalid argument");
915: else if ( d == 0 )
916: *nr = 0;
917: else {
918: truncup(n1,d,&t); n1 = t;
919: truncup(n2,d,&t); n2 = t;
920: if ( !n1 || !n2 )
921: *nr = 0;
922: else if ( n1->d < up_tkara_mag || n2->d < up_tkara_mag )
923: tmulup(n1,n2,d,nr);
924: else {
925: m = (d+1)/2;
926: decompup(n1,m,&n1l,&n1h);
927: decompup(n2,m,&n2l,&n2h);
928: kmulup(n1l,n2l,&l);
929: tkmulup(n1l,n2h,d-m,&t);
930: tkmulup(n2l,n1h,d-m,&s);
931: addup(t,s,&u);
932: shiftup(u,m,&h);
933: addup(l,h,&t);
934: truncup(t,d,nr);
935: }
936: }
937: }
938:
939: /* n->n*x^d */
940:
1.5 ! noro 941: void shiftup(UP n,int d,UP *nr)
1.1 noro 942: {
943: int dr;
944: UP r;
945:
946: if ( !n )
947: *nr = 0;
948: else {
949: dr = n->d+d;
950: *nr = r = UPALLOC(dr); r->d = dr;
951: bzero(r->c,(dr+1)*sizeof(Num));
952: bcopy(n->c,r->c+d,(n->d+1)*sizeof(Num));
953: }
954: }
955:
1.5 ! noro 956: void fft_rembymulup_special(UP n1,UP n2,UP inv2,UP *nr)
1.1 noro 957: {
958: int d1,d2,d;
959: UP r1,t,s,q,u;
960:
961: if ( !n2 )
962: error("rembymulup : division by 0");
963: else if ( !n1 || !n2->d )
964: *nr = 0;
965: else if ( (d1 = n1->d) < (d2 = n2->d) )
966: *nr = n1;
967: else {
968: d = d1-d2;
969: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
970: trunc_fft_mulup(r1,inv2,d+1,&t);
971: truncup(t,d+1,&s);
972: reverseup(s,d,&q);
973: trunc_fft_mulup(q,n2,d2,&t); truncup(t,d2,&u);
974: truncup(n1,d2,&s);
975: subup(s,u,nr);
976: }
977: }
978:
1.5 ! noro 979: void set_degreeup(UP n,int d)
1.1 noro 980: {
981: int i;
982:
983: for ( i = d; i >= 0 && !n->c[i]; i-- );
984: n->d = i;
985: }
986:
987: /* n -> n0 + x^d n1 */
988:
1.5 ! noro 989: void decompup(UP n,int d,UP *n0,UP *n1)
1.1 noro 990: {
991: int dn;
992: UP r0,r1;
993:
994: if ( !n || d > n->d ) {
995: *n0 = n; *n1 = 0;
996: } else if ( d < 0 )
997: error("decompup : invalid argument");
998: else if ( d == 0 ) {
999: *n0 = 0; *n1 = n;
1000: } else {
1001: dn = n->d;
1002: *n0 = r0 = UPALLOC(d-1);
1003: *n1 = r1 = UPALLOC(dn-d);
1004: bcopy(n->c,r0->c,d*sizeof(Num));
1005: bcopy(n->c+d,r1->c,(dn-d+1)*sizeof(Num));
1006: set_degreeup(r0,d-1);
1007: if ( r0->d < 0 )
1008: *n0 = 0;
1009: set_degreeup(r1,dn-d);
1010: if ( r1->d < 0 )
1011: *n1 = 0;
1012: }
1013: }
1014:
1015: /* n -> n mod x^d */
1016:
1.5 ! noro 1017: void truncup(UP n1,int d,UP *nr)
1.1 noro 1018: {
1019: int i;
1020: UP r;
1021:
1022: if ( !n1 || d > n1->d )
1023: *nr = n1;
1024: else if ( d < 0 )
1025: error("truncup : invalid argument");
1026: else if ( d == 0 )
1027: *nr = 0;
1028: else {
1029: *nr = r = UPALLOC(d-1);
1030: bcopy(n1->c,r->c,d*sizeof(Num));
1031: for ( i = d-1; i >= 0 && !r->c[i]; i-- );
1032: if ( i < 0 )
1033: *nr = 0;
1034: else
1035: r->d = i;
1036: }
1037: }
1038:
1.5 ! noro 1039: int int_bits(int t)
1.1 noro 1040: {
1041: int k;
1042:
1043: for ( k = 0; t; t>>=1, k++);
1044: return k;
1045: }
1046:
1047: /* n is assumed to be LM or integer coefficient */
1048:
1.5 ! noro 1049: int maxblenup(UP n)
1.1 noro 1050: {
1051: int m,r,i,d;
1052: Num *c;
1053:
1054: if ( !n )
1055: return 0;
1056: d = n->d; c = (Num *)n->c;
1057: for ( r = 0, i = 0; i <= d; i++ )
1058: if ( c[i] ) {
1059: switch ( NID(c[i]) ) {
1060: case N_LM:
1061: m = n_bits(((LM)c[i])->body);
1062: break;
1063: case N_Q:
1064: m = n_bits(((Q)c[i])->nm);
1065: break;
1066: default:
1067: error("maxblenup : invalid coefficient");
1068: }
1069: r = MAX(m,r);
1070: }
1071: return r;
1072: }
1073:
1.5 ! noro 1074: void uptofmarray(int mod,UP n,ModNum *f)
1.1 noro 1075: {
1076: int d,i;
1077: unsigned int r;
1078: Num *c;
1079:
1080: if ( !n )
1081: return;
1082: else {
1083: d = n->d; c = (Num *)n->c;
1084: for ( i = 0; i <= d; i++ ) {
1085: if ( c[i] ) {
1086: switch ( NID(c[i]) ) {
1087: case N_LM:
1088: f[i] = (ModNum)rem(((LM)c[i])->body,mod);
1089: break;
1090: case N_Q:
1091: r = rem(NM((Q)c[i]),mod);
1092: if ( r && (SGN((Q)c[i])<0) )
1093: r = mod-r;
1094: f[i] = (ModNum)r;
1095: break;
1096: default:
1097: error("uptofmarray : invalid coefficient");
1098: }
1099: } else
1100: f[i] = 0;
1101: }
1102: }
1103: }
1104:
1.5 ! noro 1105: void fmarraytoup(ModNum *f,int d,UP *nr)
1.1 noro 1106: {
1107: int i;
1108: Q *c;
1109: UP r;
1110:
1111: if ( d < 0 ) {
1112: *nr = 0;
1113: } else {
1114: *nr = r = UPALLOC(d); c = (Q *)r->c;
1115: for ( i = 0; i <= d; i++ ) {
1116: UTOQ((unsigned int)f[i],c[i]);
1117: }
1118: for ( i = d; i >= 0 && !c[i]; i-- );
1119: if ( i < 0 )
1120: *nr = 0;
1121: else
1122: r->d = i;
1123: }
1124: }
1125:
1126: /* f[i]: an array of length n */
1127:
1.5 ! noro 1128: void uiarraytoup(unsigned int **f,int n,int d,UP *nr)
1.1 noro 1129: {
1130: int i,j;
1131: unsigned int *fi;
1132: N ci;
1133: Q *c;
1134: UP r;
1135:
1136: if ( d < 0 ) {
1137: *nr = 0;
1138: } else {
1139: *nr = r = UPALLOC(d); c = (Q *)r->c;
1140: for ( i = 0; i <= d; i++ ) {
1141: fi = f[i];
1142: for ( j = n-1; j >= 0 && !fi[j]; j-- );
1143: j++;
1144: if ( j ) {
1145: ci = NALLOC(j);
1146: PL(ci) = j;
1147: bcopy(fi,BD(ci),j*sizeof(unsigned int));
1148: NTOQ(ci,1,c[i]);
1149: } else
1150: c[i] = 0;
1151: }
1152: for ( i = d; i >= 0 && !c[i]; i-- );
1153: if ( i < 0 )
1154: *nr = 0;
1155: else
1156: r->d = i;
1157: }
1158: }
1159:
1.5 ! noro 1160: void adj_coefup(UP n,N m,N m2,UP *nr)
1.1 noro 1161: {
1162: int d;
1163: Q *c,*cr;
1164: Q mq;
1165: int i;
1166: UP r;
1167:
1168: if ( !n )
1169: *nr = 0;
1170: else {
1171: d = n->d; c = (Q *)n->c;
1172: *nr = r = UPALLOC(d); cr = (Q *)r->c;
1173: NTOQ(m,1,mq);
1174: for ( i = 0; i <= d; i++ ) {
1175: if ( !c[i] )
1176: continue;
1177: if ( cmpn(NM(c[i]),m2) > 0 )
1178: subq(c[i],mq,&cr[i]);
1179: else
1180: cr[i] = c[i];
1181: }
1182: for ( i = d; i >= 0 && !cr[i]; i-- );
1183: if ( i < 0 )
1184: *nr = 0;
1185: else
1186: r->d = i;
1187: }
1188: }
1189:
1190: /* n is assumed to have positive integer coefficients. */
1191:
1.5 ! noro 1192: void remcup(UP n,N mod,UP *nr)
1.1 noro 1193: {
1194: int i,d;
1195: Q *c,*cr;
1196: UP r;
1197: N t;
1198:
1199: if ( !n )
1200: *nr = 0;
1201: else {
1202: d = n->d; c = (Q *)n->c;
1203: *nr = r = UPALLOC(d); cr = (Q *)r->c;
1204: for ( i = 0; i <= d; i++ )
1205: if ( c[i] ) {
1206: remn(NM(c[i]),mod,&t);
1207: if ( t )
1208: NTOQ(t,1,cr[i]);
1209: else
1210: cr[i] = 0;
1211: } else
1212: cr[i] = 0;
1213: for ( i = d; i >= 0 && !cr[i]; i-- );
1214: if ( i < 0 )
1215: *nr = 0;
1216: else
1217: r->d = i;
1218: }
1219: }
1220:
1221: void fft_mulup_main(UP,UP,int,UP *);
1222:
1.5 ! noro 1223: void fft_mulup(UP n1,UP n2,UP *nr)
1.1 noro 1224: {
1225: int d1,d2,d,b1,b2,h;
1226: UP n1lo,n1hi,n2lo,n2hi,lo,hi,t1,t2,mid1,mid2,mid,s1,s2;
1227:
1228: if ( !n1 || !n2 )
1229: *nr = 0;
1230: else {
1231: d1 = n1->d; d2 = n2->d;
1232: if ( MAX(d1,d2) < up_fft_mag )
1233: kmulup(n1,n2,nr);
1234: else {
1235: b1 = maxblenup(n1); b2 = maxblenup(n2);
1236: if ( fft_available(d1,b1,d2,b2) )
1237: fft_mulup_main(n1,n2,0,nr);
1238: else {
1239: d = MAX(d1,d2)+1; h = (d+1)/2;
1240: decompup(n1,h,&n1lo,&n1hi);
1241: decompup(n2,h,&n2lo,&n2hi);
1242: fft_mulup(n1lo,n2lo,&lo);
1243: fft_mulup(n1hi,n2hi,&hi);
1244: shiftup(hi,2*h,&t2);
1245: addup(lo,t2,&t1);
1246: addup(hi,lo,&mid1);
1247: subup(n1hi,n1lo,&s1); subup(n2lo,n2hi,&s2);
1248: fft_mulup(s1,s2,&mid2);
1249: addup(mid1,mid2,&mid);
1250: shiftup(mid,h,&t2);
1251: addup(t1,t2,nr);
1252: }
1253: }
1254: }
1255: }
1256:
1.5 ! noro 1257: void trunc_fft_mulup(UP n1,UP n2,int dbd,UP *nr)
1.1 noro 1258: {
1259: int d1,d2,b1,b2,m;
1260: UP n1l,n1h,n2l,n2h,l,h,t,s,u;
1261:
1262: if ( !n1 || !n2 )
1263: *nr = 0;
1264: else if ( dbd < 0 )
1265: error("trunc_fft_mulup : invalid argument");
1266: else if ( dbd == 0 )
1267: *nr = 0;
1268: else {
1269: truncup(n1,dbd,&t); n1 = t;
1270: truncup(n2,dbd,&t); n2 = t;
1271: d1 = n1->d; d2 = n2->d;
1272: if ( MAX(d1,d2) < up_fft_mag )
1273: tkmulup(n1,n2,dbd,nr);
1274: else {
1275: b1 = maxblenup(n1); b2 = maxblenup(n2);
1276: if ( fft_available(d1,b1,d2,b2) )
1277: fft_mulup_main(n1,n2,dbd,nr);
1278: else {
1279: m = (dbd+1)/2;
1280: decompup(n1,m,&n1l,&n1h);
1281: decompup(n2,m,&n2l,&n2h);
1282: fft_mulup(n1l,n2l,&l);
1283: trunc_fft_mulup(n1l,n2h,dbd-m,&t);
1284: trunc_fft_mulup(n2l,n1h,dbd-m,&s);
1285: addup(t,s,&u);
1286: shiftup(u,m,&h);
1287: addup(l,h,&t);
1288: truncup(t,dbd,nr);
1289: }
1290: }
1291: }
1292: }
1293:
1.5 ! noro 1294: void fft_squareup(UP n1,UP *nr)
1.1 noro 1295: {
1.5 ! noro 1296: int d1,d,h,b1;
1.1 noro 1297: UP n1lo,n1hi,hi,lo,mid1,mid2,mid,s1,t1,t2;
1298:
1299: if ( !n1 )
1300: *nr = 0;
1301: else {
1302: d1 = n1->d;
1303: if ( d1 < up_fft_mag )
1304: ksquareup(n1,nr);
1305: else {
1306: b1 = maxblenup(n1);
1307: if ( fft_available(d1,b1,d1,b1) )
1308: fft_mulup_main(n1,n1,0,nr);
1309: else {
1310: d = d1+1; h = (d1+1)/2;
1311: decompup(n1,h,&n1lo,&n1hi);
1312: fft_squareup(n1hi,&hi);
1313: fft_squareup(n1lo,&lo);
1314: shiftup(hi,2*h,&t2);
1315: addup(lo,t2,&t1);
1316: addup(hi,lo,&mid1);
1317: subup(n1hi,n1lo,&s1);
1318: fft_squareup(s1,&mid2);
1319: subup(mid1,mid2,&mid);
1320: shiftup(mid,h,&t2);
1321: addup(t1,t2,nr);
1322: }
1323: }
1324: }
1325: }
1326:
1327: /*
1328: * dbd == 0 => n1 * n2
1329: * dbd > 0 => n1 * n2 mod x^dbd
1330: * n1 == n2 => squaring
1331: */
1332:
1.5 ! noro 1333: void fft_mulup_main(UP n1,UP n2,int dbd,UP *nr)
1.1 noro 1334: {
1335: ModNum *f1,*f2,*w,*fr;
1336: ModNum **frarray,**fa;
1337: unsigned int *modarray,*ma;
1338: int modarray_len;
1339: int frarray_index = 0;
1340: N m,m1,m2;
1341: int d1,d2,dmin,i,mod,root,d,cond,bound;
1342: UP r;
1343:
1344: if ( !n1 || !n2 ) {
1345: *nr = 0; return;
1346: }
1347: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
1348: if ( !d1 || !d2 ) {
1349: mulup(n1,n2,nr); return;
1350: }
1351: m = ONEN;
1352: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
1353: f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
1354: if ( n1 == n2 )
1355: f2 = 0;
1356: else
1357: f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
1358: w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
1359: modarray_len = 1024;
1360: modarray = (unsigned int *)MALLOC_ATOMIC(modarray_len*sizeof(unsigned int));
1361: frarray = (ModNum **)MALLOC(modarray_len*sizeof(ModNum *));
1362: for ( i = 0; i < NPrimes; i++ ) {
1363: FFT_primes(i,&mod,&root,&d);
1364: if ( (1<<d) < d1+d2+1 )
1365: continue;
1366: if ( frarray_index == modarray_len ) {
1367: ma = (unsigned int *)MALLOC_ATOMIC(2*modarray_len*sizeof(unsigned int));
1368: bcopy(modarray,ma,modarray_len*sizeof(unsigned int));
1369: modarray = ma;
1370: fa = (ModNum **)MALLOC(2*modarray_len*sizeof(ModNum *));
1371: bcopy(frarray,fa,modarray_len*sizeof(ModNum *));
1372: frarray = fa;
1373: modarray_len *= 2;
1374: }
1375: modarray[frarray_index] = mod;
1376: frarray[frarray_index++] = fr
1377: = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
1378: uptofmarray(mod,n1,f1);
1379: if ( !f2 )
1380: cond = FFT_pol_square(d1,f1,fr,i,w);
1381: else {
1382: uptofmarray(mod,n2,f2);
1383: cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
1384: }
1385: if ( cond )
1386: error("fft_mulup : error in FFT_pol_product");
1387: STON(mod,m1); muln(m,m1,&m2); m = m2;
1388: if ( n_bits(m) > bound ) {
1389: if ( !dbd )
1390: dbd = d1+d2+1;
1391: crup(frarray,MIN(d1+d2,dbd-1),modarray,frarray_index,m,&r);
1392: divin(m,2,&m2);
1393: adj_coefup(r,m,m2,nr);
1394: return;
1395: }
1396: }
1397: error("fft_mulup : FFT_primes exhausted");
1398: }
1399: #if 0
1400: /* inefficient version */
1401:
1.5 ! noro 1402: void crup(ModNum **f,int d,int *mod,int index,N m,UP *r)
1.1 noro 1403: {
1404: N *cof,*c;
1405: int *inv;
1406: struct oUP w;
1407: int i;
1408: UP s,s1,s2;
1409: N t;
1410: UP *g;
1411: Q q;
1412: struct oEGT eg0,eg1;
1413:
1414: get_eg(&eg0);
1415: w.d = 0;
1416: inv = (int *)ALLOCA(index*sizeof(int));
1417: cof = (N *)ALLOCA(index*sizeof(N));
1418: c = (N *)ALLOCA(index*sizeof(N));
1419: g = (UP *)ALLOCA(index*sizeof(UP));
1420: up_lazy = 1;
1421: for ( i = 0, s = 0; i < index; i++ ) {
1422: divin(m,mod[i],&cof[i]);
1423: inv[i] = invm(rem(cof[i],mod[i]),mod[i]);
1424: STON(inv[i],t);
1425: muln(cof[i],t,&c[i]);
1426: NTOQ(c[i],1,q); w.c[0] = (Num)q;
1427: fmarraytoup(f[i],d,&g[i]);
1428: mulup(g[i],&w,&s1);
1429: addup(s,s1,&s2);
1430: s = s2;
1431: }
1432: up_lazy = 0;
1433: remcup(s,m,r);
1434: get_eg(&eg1);
1435: add_eg(&eg_chrem,&eg0,&eg1);
1436: }
1437:
1438: #else
1439: /* improved version */
1440:
1.5 ! noro 1441: void crup(ModNum **f,int d,int *mod,int index,N m,UP *r)
1.1 noro 1442: {
1443: N cof,c,t,w,w1;
1444: struct oN fc;
1445: N *s;
1446: UP u;
1447: Q q;
1448: int inv,i,j,rlen;
1449:
1450: rlen = PL(m)+10; /* XXX */
1451: PL(&fc) = 1;
1452: c = NALLOC(rlen);
1453: w = NALLOC(rlen);
1454: w1 = NALLOC(rlen);
1455: s = (N *)MALLOC((d+1)*sizeof(N));
1456: for ( i = 0; i <= d; i++ ) {
1457: s[i] = NALLOC(rlen);
1458: PL(s[i]) = 0;
1459: bzero(BD(s[i]),rlen*sizeof(unsigned int));
1460: }
1461: for ( i = 0; i < index; i++ ) {
1462: divin(m,mod[i],&cof);
1463: inv = invm(rem(cof,mod[i]),mod[i]); STON(inv,t);
1464: _muln(cof,t,c);
1465: /* s += c*f[i] */
1466: for ( j = 0; j <= d; j++ )
1467: if ( f[i][j] ) {
1468: BD(&fc)[0] = f[i][j];
1469: _muln(c,&fc,w);
1470: _addn(s[j],w,w1);
1471: dupn(w1,s[j]);
1472: }
1473: }
1474: for ( i = d; i >= 0; i-- )
1475: if ( s[i] )
1476: break;
1477: if ( i < 0 )
1478: *r = 0;
1479: else {
1480: u = UPALLOC(i);
1481: DEG(u) = i;
1482: for ( j = 0; j <= i; j++ ) {
1483: if ( PL(s[j]) )
1484: NTOQ(s[j],1,q);
1485: else
1486: q = 0;
1487: COEF(u)[j] = (Num)q;
1488: }
1489: remcup(u,m,r);
1490: }
1491: }
1492: #endif
1493:
1.2 noro 1494: /*
1495: * dbd == 0 => n1 * n2
1496: * dbd > 0 => n1 * n2 mod x^dbd
1497: * n1 == n2 => squaring
1498: * return: n1*n2 mod Primes[modind[0]]*.prime...*Primes[modind[nmod-1]].prime
1499: */
1500:
1.5 ! noro 1501: void fft_mulup_specialmod_main(UP n1,UP n2,int dbd,int *modind,int nmod,UP *nr)
1.2 noro 1502: {
1503: ModNum *f1,*f2,*w,*fr;
1.5 ! noro 1504: ModNum **frarray;
1.2 noro 1505: N m,m1,m2;
1506: unsigned int *modarray;
1.5 ! noro 1507: int d1,d2,dmin,i,root,d,cond,bound;
1.2 noro 1508:
1509: if ( !n1 || !n2 ) {
1510: *nr = 0; return;
1511: }
1512: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
1513: if ( !d1 || !d2 ) {
1514: mulup(n1,n2,nr); return;
1515: }
1516: m = ONEN;
1517: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+1;
1518: f1 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
1519: if ( n1 == n2 )
1520: f2 = 0;
1521: else
1522: f2 = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
1523: w = (ModNum *)MALLOC_ATOMIC(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
1524: frarray = (ModNum **)MALLOC(nmod*sizeof(ModNum *));
1525: modarray = (unsigned int *)MALLOC_ATOMIC(nmod*sizeof(unsigned int *));
1526:
1527: for ( i = 0; i < nmod; i++ ) {
1528: FFT_primes(modind[i],&modarray[i],&root,&d);
1529: if ( (1<<d) < d1+d2+1 )
1530: error("fft_mulup_specialmod_main : invalid modulus");
1531: frarray[i] = fr
1532: = (ModNum *)MALLOC_ATOMIC((d1+d2+1)*sizeof(ModNum));
1533: uptofmarray(modarray[i],n1,f1);
1534: if ( !f2 )
1535: cond = FFT_pol_square(d1,f1,fr,modind[i],w);
1536: else {
1537: uptofmarray(modarray[i],n2,f2);
1538: cond = FFT_pol_product(d1,f1,d2,f2,fr,modind[i],w);
1539: }
1540: if ( cond )
1541: error("fft_mulup_specialmod_main : error in FFT_pol_product");
1542: STON(modarray[i],m1); muln(m,m1,&m2); m = m2;
1543: }
1544: if ( !dbd )
1545: dbd = d1+d2+1;
1546: crup(frarray,MIN(d1+d2,dbd-1),modarray,nmod,m,nr);
1547: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>