Annotation of OpenXM_contrib2/asir2000/engine/up_gfpn.c, Revision 1.1.1.1
1.1 noro 1: /* $OpenXM: OpenXM/src/asir99/engine/up_gfpn.c,v 1.1.1.1 1999/11/10 08:12:26 noro Exp $ */
2: #include "ca.h"
3: #include <math.h>
4:
5: extern GC_dont_gc;
6: extern struct oEGT eg_chrem,eg_fore,eg_back;
7: extern int debug_up;
8: extern int up_lazy;
9:
10: void crup_lm(ModNum **,int,int *,int,N,N,UP *);
11:
12: void fft_mulup_lm(n1,n2,nr)
13: UP n1,n2;
14: UP *nr;
15: {
16: ModNum *f1,*f2,*w,*fr;
17: ModNum *frarray[1024];
18: int modarray[1024];
19: int frarray_index = 0;
20: N m,m1,m2,lm_mod;
21: int d1,d2,dmin,i,mod,root,d,cond,bound;
22: UP r;
23:
24: if ( !n1 || !n2 ) {
25: *nr = 0; return;
26: }
27: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
28: if ( !d1 || !d2 ) {
29: mulup(n1,n2,nr); return;
30: }
31: getmod_lm(&lm_mod);
32: if ( !lm_mod )
33: error("fft_mulup_lm : current_mod_lm is not set");
34: m = ONEN;
35: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+2;
36: f1 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
37: f2 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
38: w = (ModNum *)ALLOCA(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
39: for ( i = 0; i < NPrimes; i++ ) {
40: FFT_primes(i,&mod,&root,&d);
41: if ( (1<<d) < d1+d2+1 )
42: continue;
43: modarray[frarray_index] = mod;
44: frarray[frarray_index++] = fr
45: = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
46: uptofmarray(mod,n1,f1);
47: uptofmarray(mod,n2,f2);
48: cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
49: if ( cond )
50: error("fft_mulup : error in FFT_pol_product");
51: STON(mod,m1); muln(m,m1,&m2); m = m2;
52: if ( n_bits(m) > bound ) {
53: /* GC_dont_gc = 1; */
54: crup_lm(frarray,d1+d2,modarray,frarray_index,m,lm_mod,&r);
55: uptolmup(r,nr);
56: GC_dont_gc = 0;
57: return;
58: }
59: }
60: error("fft_mulup : FFT_primes exhausted");
61: }
62:
63: void fft_squareup_lm(n1,nr)
64: UP n1;
65: UP *nr;
66: {
67: ModNum *f1,*w,*fr;
68: ModNum *frarray[1024];
69: int modarray[1024];
70: int frarray_index = 0;
71: N m,m1,m2,lm_mod;
72: int d1,dmin,i,mod,root,d,cond,bound;
73: UP r;
74:
75: if ( !n1 ) {
76: *nr = 0; return;
77: }
78: d1 = n1->d; dmin = d1;
79: if ( !d1 ) {
80: mulup(n1,n1,nr); return;
81: }
82: getmod_lm(&lm_mod);
83: if ( !lm_mod )
84: error("fft_squareup_lm : current_mod_lm is not set");
85: m = ONEN;
86: bound = 2*maxblenup(n1)+int_bits(d1)+2;
87: f1 = (ModNum *)ALLOCA((2*d1+1)*sizeof(ModNum));
88: w = (ModNum *)ALLOCA(6*(1<<int_bits(2*d1+1))*sizeof(ModNum));
89: for ( i = 0; i < NPrimes; i++ ) {
90: FFT_primes(i,&mod,&root,&d);
91: if ( (1<<d) < 2*d1+1 )
92: continue;
93: modarray[frarray_index] = mod;
94: frarray[frarray_index++] = fr
95: = (ModNum *)ALLOCA((2*d1+1)*sizeof(ModNum));
96: uptofmarray(mod,n1,f1);
97: cond = FFT_pol_square(d1,f1,fr,i,w);
98: if ( cond )
99: error("fft_mulup : error in FFT_pol_product");
100: STON(mod,m1); muln(m,m1,&m2); m = m2;
101: if ( n_bits(m) > bound ) {
102: /* GC_dont_gc = 1; */
103: crup_lm(frarray,2*d1,modarray,frarray_index,m,lm_mod,&r);
104: uptolmup(r,nr);
105: GC_dont_gc = 0;
106: return;
107: }
108: }
109: error("fft_squareup : FFT_primes exhausted");
110: }
111:
112: void trunc_fft_mulup_lm(n1,n2,dbd,nr)
113: UP n1,n2;
114: int dbd;
115: UP *nr;
116: {
117: ModNum *f1,*f2,*fr,*w;
118: ModNum *frarray[1024];
119: int modarray[1024];
120: int frarray_index = 0;
121: N m,m1,m2,lm_mod;
122: int d1,d2,dmin,i,mod,root,d,cond,bound;
123: UP r;
124:
125: if ( !n1 || !n2 ) {
126: *nr = 0; return;
127: }
128: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
129: if ( !d1 || !d2 ) {
130: tmulup(n1,n2,dbd,nr); return;
131: }
132: getmod_lm(&lm_mod);
133: if ( !lm_mod )
134: error("trunc_fft_mulup_lm : current_mod_lm is not set");
135: m = ONEN;
136: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+2;
137: f1 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
138: f2 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
139: w = (ModNum *)ALLOCA(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
140: for ( i = 0; i < NPrimes; i++ ) {
141: FFT_primes(i,&mod,&root,&d);
142: if ( (1<<d) < d1+d2+1 )
143: continue;
144:
145: modarray[frarray_index] = mod;
146: frarray[frarray_index++] = fr
147: = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
148: uptofmarray(mod,n1,f1);
149: uptofmarray(mod,n2,f2);
150: cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
151: if ( cond )
152: error("fft_mulup : error in FFT_pol_product");
153: STON(mod,m1); muln(m,m1,&m2); m = m2;
154: if ( n_bits(m) > bound ) {
155: /* GC_dont_gc = 1; */
156: crup_lm(frarray,MIN(dbd-1,d1+d2),modarray,frarray_index,m,lm_mod,&r);
157: uptolmup(r,nr);
158: GC_dont_gc = 0;
159: return;
160: }
161: }
162: error("trunc_fft_mulup : FFT_primes exhausted");
163: }
164:
165: void crup_lm(f,d,mod,index,m,lm_mod,r)
166: ModNum **f;
167: int d;
168: int *mod;
169: int index;
170: N m;
171: N lm_mod;
172: UP *r;
173: {
174: double *k;
175: double c2;
176: unsigned int *w;
177: unsigned int zi,au,al;
178: UL a;
179: int i,j,l,len;
180: UP s,s1,u;
181: struct oUP c;
182: N t,ci,mi,qn;
183: unsigned int **sum;
184: unsigned int *sum_b;
185: Q q;
186: struct oEGT eg0,eg1;
187:
188: if ( !lm_mod )
189: error("crup_lm : current_mod_lm is not set");
190: k = (double *)ALLOCA((d+1)*sizeof(double));
191: for ( j = 0; j <= d; j++ )
192: k[j] = 0.5;
193: up_lazy = 1;
194: sum = (unsigned int **)ALLOCA((d+1)*sizeof(unsigned int *));
195: len = (int_bits(index)+n_bits(lm_mod)+31)/32+1;
196: w = (unsigned int *)ALLOCA((len+1)*sizeof(unsigned int));
197: sum_b = (unsigned int *)MALLOC_ATOMIC((d+1)*len*sizeof(unsigned int));
198: for ( j = 0; j <= d; j++ ) {
199: sum[j] = sum_b+len*j;
200: bzero((char *)sum[j],len*sizeof(unsigned int));
201: }
202: for ( i = 0, s = 0; i < index; i++ ) {
203: divin(m,mod[i],&ci);
204: zi = (unsigned int)invm((unsigned int)rem(ci,mod[i]),mod[i]);
205:
206: STON(zi,t); muln(ci,t,&mi);
207: divn(mi,lm_mod,&qn,&t);
208: if ( t )
209: for ( j = 0; j <= d; j++ ) {
210: bzero((char *)w,(len+1)*sizeof(unsigned int));
211: muln_1(BD(t),PL(t),(unsigned int)f[i][j],w);
212: for ( l = PL(t); l >= 0 && !w[l]; l--);
213: l++;
214: if ( l )
215: addarray_to(w,l,sum[j],len);
216: }
217: c2 = (double)zi/(double)mod[i];
218: for ( j = 0; j <= d; j++ )
219: k[j] += c2*f[i][j];
220: }
221: uiarraytoup(sum,len,d,&s);
222: GC_free(sum_b);
223:
224: u = UPALLOC(d);
225: for ( j = 0; j <= d; j++ ) {
226: #if 1
227: a = (UL)floor(k[j]);
228: #if defined(i386) || defined(__alpha) || defined(VISUAL)
229: au = ((unsigned int *)&a)[1];
230: al = ((unsigned int *)&a)[0];
231: #else
232: al = ((unsigned int *)&a)[1];
233: au = ((unsigned int *)&a)[0];
234: #endif
235: if ( au ) {
236: NEWQ(q); SGN(q) = 1; NM(q)=NALLOC(2); DN(q)=0;
237: PL(NM(q))=2; BD(NM(q))[0]=al; BD(NM(q))[1] = au;
238: } else
239: UTOQ(al,q);
240: #else
241: al = (int)floor(k[j]); STOQ(al,q);
242: #endif
243: u->c[j] = (Num)q;
244: }
245: for ( j = d; j >= 0 && !u->c[j]; j-- );
246: if ( j < 0 )
247: u = 0;
248: else
249: u->d = j;
250: divn(m,lm_mod,&qn,&t); NTOQ(t,-1,q);
251: c.d = 0; c.c[0] = (Num)q;
252: mulup(u,&c,&s1);
253: up_lazy = 0;
254:
255: addup(s,s1,r);
256: }
257:
258: void fft_rembymulup_special_lm(n1,n2,inv2,nr)
259: UP n1,n2,inv2;
260: UP *nr;
261: {
262: int d1,d2,d;
263: UP r1,t,s,q,u;
264:
265: if ( !n2 )
266: error("rembymulup : division by 0");
267: else if ( !n1 || !n2->d )
268: *nr = 0;
269: else if ( (d1 = n1->d) < (d2 = n2->d) )
270: *nr = n1;
271: else {
272: d = d1-d2;
273: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
274: trunc_fft_mulup_lm(r1,inv2,d+1,&t);
275: truncup(t,d+1,&s);
276: reverseup(s,d,&q);
277: trunc_fft_mulup_lm(q,n2,d2,&t); truncup(t,d2,&u);
278: truncup(n1,d2,&s);
279: subup(s,u,nr);
280: }
281: }
282:
283: void uptolmup(n,nr)
284: UP n;
285: UP *nr;
286: {
287: int i,d;
288: Q *c;
289: LM *cr;
290: UP r;
291:
292: if ( !n )
293: *nr = 0;
294: else {
295: d = n->d; c = (Q *)n->c;
296: *nr = r = UPALLOC(d); cr = (LM *)r->c;
297: for ( i = 0; i <= d; i++ )
298: qtolm(c[i],&cr[i]);
299: for ( i = d; i >= 0 && !cr[i]; i-- );
300: if ( i < 0 )
301: *nr = 0;
302: else
303: r->d = i;
304: }
305: }
306:
307: void hybrid_powermodup(f,xp)
308: UP f;
309: UP *xp;
310: {
311: N n;
312: UP x,y,t,invf,s;
313: int k;
314: LM lm;
315: struct oEGT eg_sq,eg_rem,eg_mul,eg_inv,eg0,eg1,eg2,eg3;
316:
317: getmod_lm(&n);
318: if ( !n )
319: error("hybrid_powermodup : current_mod_lm is not set");
320: MKLM(ONEN,lm);
321: x = UPALLOC(1); x->d = 1; x->c[1] = (Num)lm;
322: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
323:
324: reverseup(f,f->d,&t);
325: invmodup(t,f->d,&invf);
326: for ( k = n_bits(n)-1; k >= 0; k-- ) {
327: hybrid_squareup(FF_GFP,y,&t);
328: hybrid_rembymulup_special(FF_GFP,t,f,invf,&s);
329: y = s;
330: if ( n->b[k/32] & (1<<(k%32)) ) {
331: mulup(y,x,&t);
332: remup(t,f,&s);
333: y = s;
334: }
335: }
336: *xp = y;
337: }
338:
339: void powermodup(f,xp)
340: UP f;
341: UP *xp;
342: {
343: N n;
344: UP x,y,t,invf,s;
345: int k;
346: LM lm;
347: struct oEGT eg_sq,eg_rem,eg_mul,eg_inv,eg0,eg1,eg2,eg3;
348:
349: getmod_lm(&n);
350: if ( !n )
351: error("powermodup : current_mod_lm is not set");
352: MKLM(ONEN,lm);
353: x = UPALLOC(1); x->d = 1; x->c[1] = (Num)lm;
354: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
355:
356: reverseup(f,f->d,&t);
357: invmodup(t,f->d,&invf);
358: for ( k = n_bits(n)-1; k >= 0; k-- ) {
359: ksquareup(y,&t);
360: rembymulup_special(t,f,invf,&s);
361: y = s;
362: if ( n->b[k/32] & (1<<(k%32)) ) {
363: mulup(y,x,&t);
364: remup(t,f,&s);
365: y = s;
366: }
367: }
368: *xp = y;
369: }
370:
371: /* g^d mod f */
372:
373: void hybrid_generic_powermodup(g,f,d,xp)
374: UP g,f;
375: Q d;
376: UP *xp;
377: {
378: N e;
379: UP x,y,t,invf,s;
380: int k;
381: LM lm;
382: struct oEGT eg_sq,eg_rem,eg_mul,eg_inv,eg0,eg1,eg2,eg3;
383:
384: e = NM(d);
385: MKLM(ONEN,lm);
386: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
387: remup(g,f,&x);
388: if ( !x ) {
389: *xp = !d ? y : 0;
390: return;
391: } else if ( !x->d ) {
392: pwrup(x,d,xp);
393: return;
394: }
395: reverseup(f,f->d,&t);
396: invmodup(t,f->d,&invf);
397: for ( k = n_bits(e)-1; k >= 0; k-- ) {
398: hybrid_squareup(FF_GFP,y,&t);
399: hybrid_rembymulup_special(FF_GFP,t,f,invf,&s);
400: y = s;
401: if ( e->b[k/32] & (1<<(k%32)) ) {
402: mulup(y,x,&t);
403: remup(t,f,&s);
404: y = s;
405: }
406: }
407: *xp = y;
408: }
409:
410: void generic_powermodup(g,f,d,xp)
411: UP g,f;
412: Q d;
413: UP *xp;
414: {
415: N e;
416: UP x,y,t,invf,s;
417: int k;
418: LM lm;
419: struct oEGT eg_sq,eg_rem,eg_mul,eg_inv,eg0,eg1,eg2,eg3;
420:
421: e = NM(d);
422: MKLM(ONEN,lm);
423: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
424: remup(g,f,&x);
425: if ( !x ) {
426: *xp = !d ? y : 0;
427: return;
428: } else if ( !x->d ) {
429: pwrup(x,d,xp);
430: return;
431: }
432: reverseup(f,f->d,&t);
433: invmodup(t,f->d,&invf);
434: for ( k = n_bits(e)-1; k >= 0; k-- ) {
435: ksquareup(y,&t);
436: rembymulup_special(t,f,invf,&s);
437: y = s;
438: if ( e->b[k/32] & (1<<(k%32)) ) {
439: mulup(y,x,&t);
440: remup(t,f,&s);
441: y = s;
442: }
443: }
444: *xp = y;
445: }
446:
447: void hybrid_powertabup(f,xp,tab)
448: UP f;
449: UP xp;
450: UP *tab;
451: {
452: UP y,t,invf;
453: int i,d;
454: LM lm;
455: struct oEGT eg_rem,eg_mul,eg0,eg1,eg2;
456:
457: d = f->d;
458: MKLM(ONEN,lm);
459: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
460: tab[0] = y;
461: tab[1] = xp;
462:
463: reverseup(f,f->d,&t);
464: invmodup(t,f->d,&invf);
465:
466: for ( i = 2; i < d; i++ ) {
467: if ( debug_up )
468: fprintf(stderr,".");
469: hybrid_mulup(FF_GFP,tab[i-1],xp,&t);
470: hybrid_rembymulup_special(FF_GFP,t,f,invf,&tab[i]);
471: }
472: }
473:
474: void powertabup(f,xp,tab)
475: UP f;
476: UP xp;
477: UP *tab;
478: {
479: UP y,t,invf;
480: int i,d;
481: LM lm;
482: struct oEGT eg_rem,eg_mul,eg0,eg1,eg2;
483:
484: d = f->d;
485: MKLM(ONEN,lm);
486: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
487: tab[0] = y;
488: tab[1] = xp;
489:
490: reverseup(f,f->d,&t);
491: invmodup(t,f->d,&invf);
492:
493: for ( i = 2; i < d; i++ ) {
494: if ( debug_up )
495: fprintf(stderr,".");
496: kmulup(tab[i-1],xp,&t);
497: rembymulup_special(t,f,invf,&tab[i]);
498: }
499: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>