Annotation of OpenXM_contrib2/asir2000/engine/up_lm.c, Revision 1.1.1.1
1.1 noro 1: /* $OpenXM: OpenXM/src/asir99/engine/up_lm.c,v 1.2 1999/11/18 05:42:02 noro Exp $ */
2: #include "ca.h"
3: #include <math.h>
4:
5: extern GC_dont_gc;
6: extern struct oEGT eg_chrem,eg_fore,eg_back;
7: extern int debug_up;
8: extern int up_lazy;
9:
10: void crup_lm(ModNum **,int,int *,int,N,N,UP *);
11:
12: void fft_mulup_lm(n1,n2,nr)
13: UP n1,n2;
14: UP *nr;
15: {
16: ModNum *f1,*f2,*w,*fr;
17: ModNum *frarray[1024];
18: int modarray[1024];
19: int frarray_index = 0;
20: N m,m1,m2,lm_mod;
21: int d1,d2,dmin,i,mod,root,d,cond,bound;
22: UP r;
23:
24: if ( !n1 || !n2 ) {
25: *nr = 0; return;
26: }
27: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
28: if ( !d1 || !d2 ) {
29: mulup(n1,n2,nr); return;
30: }
31: getmod_lm(&lm_mod);
32: if ( !lm_mod )
33: error("fft_mulup_lm : current_mod_lm is not set");
34: m = ONEN;
35: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+2;
36: f1 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
37: f2 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
38: w = (ModNum *)ALLOCA(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
39: for ( i = 0; i < NPrimes; i++ ) {
40: FFT_primes(i,&mod,&root,&d);
41: if ( (1<<d) < d1+d2+1 )
42: continue;
43: modarray[frarray_index] = mod;
44: frarray[frarray_index++] = fr
45: = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
46: uptofmarray(mod,n1,f1);
47: uptofmarray(mod,n2,f2);
48: cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
49: if ( cond )
50: error("fft_mulup : error in FFT_pol_product");
51: STON(mod,m1); muln(m,m1,&m2); m = m2;
52: if ( n_bits(m) > bound ) {
53: /* GC_dont_gc = 1; */
54: crup_lm(frarray,d1+d2,modarray,frarray_index,m,lm_mod,&r);
55: uptolmup(r,nr);
56: GC_dont_gc = 0;
57: return;
58: }
59: }
60: error("fft_mulup : FFT_primes exhausted");
61: }
62:
63: void fft_squareup_lm(n1,nr)
64: UP n1;
65: UP *nr;
66: {
67: ModNum *f1,*w,*fr;
68: ModNum *frarray[1024];
69: int modarray[1024];
70: int frarray_index = 0;
71: N m,m1,m2,lm_mod;
72: int d1,dmin,i,mod,root,d,cond,bound;
73: UP r;
74:
75: if ( !n1 ) {
76: *nr = 0; return;
77: }
78: d1 = n1->d; dmin = d1;
79: if ( !d1 ) {
80: mulup(n1,n1,nr); return;
81: }
82: getmod_lm(&lm_mod);
83: if ( !lm_mod )
84: error("fft_squareup_lm : current_mod_lm is not set");
85: m = ONEN;
86: bound = 2*maxblenup(n1)+int_bits(d1)+2;
87: f1 = (ModNum *)ALLOCA((2*d1+1)*sizeof(ModNum));
88: w = (ModNum *)ALLOCA(6*(1<<int_bits(2*d1+1))*sizeof(ModNum));
89: for ( i = 0; i < NPrimes; i++ ) {
90: FFT_primes(i,&mod,&root,&d);
91: if ( (1<<d) < 2*d1+1 )
92: continue;
93: modarray[frarray_index] = mod;
94: frarray[frarray_index++] = fr
95: = (ModNum *)ALLOCA((2*d1+1)*sizeof(ModNum));
96: uptofmarray(mod,n1,f1);
97: cond = FFT_pol_square(d1,f1,fr,i,w);
98: if ( cond )
99: error("fft_mulup : error in FFT_pol_product");
100: STON(mod,m1); muln(m,m1,&m2); m = m2;
101: if ( n_bits(m) > bound ) {
102: /* GC_dont_gc = 1; */
103: crup_lm(frarray,2*d1,modarray,frarray_index,m,lm_mod,&r);
104: uptolmup(r,nr);
105: GC_dont_gc = 0;
106: return;
107: }
108: }
109: error("fft_squareup : FFT_primes exhausted");
110: }
111:
112: void trunc_fft_mulup_lm(n1,n2,dbd,nr)
113: UP n1,n2;
114: int dbd;
115: UP *nr;
116: {
117: ModNum *f1,*f2,*fr,*w;
118: ModNum *frarray[1024];
119: int modarray[1024];
120: int frarray_index = 0;
121: N m,m1,m2,lm_mod;
122: int d1,d2,dmin,i,mod,root,d,cond,bound;
123: UP r;
124:
125: if ( !n1 || !n2 ) {
126: *nr = 0; return;
127: }
128: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
129: if ( !d1 || !d2 ) {
130: tmulup(n1,n2,dbd,nr); return;
131: }
132: getmod_lm(&lm_mod);
133: if ( !lm_mod )
134: error("trunc_fft_mulup_lm : current_mod_lm is not set");
135: m = ONEN;
136: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+2;
137: f1 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
138: f2 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
139: w = (ModNum *)ALLOCA(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
140: for ( i = 0; i < NPrimes; i++ ) {
141: FFT_primes(i,&mod,&root,&d);
142: if ( (1<<d) < d1+d2+1 )
143: continue;
144:
145: modarray[frarray_index] = mod;
146: frarray[frarray_index++] = fr
147: = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
148: uptofmarray(mod,n1,f1);
149: uptofmarray(mod,n2,f2);
150: cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
151: if ( cond )
152: error("fft_mulup : error in FFT_pol_product");
153: STON(mod,m1); muln(m,m1,&m2); m = m2;
154: if ( n_bits(m) > bound ) {
155: /* GC_dont_gc = 1; */
156: crup_lm(frarray,MIN(dbd-1,d1+d2),modarray,frarray_index,m,lm_mod,&r);
157: uptolmup(r,nr);
158: GC_dont_gc = 0;
159: return;
160: }
161: }
162: error("trunc_fft_mulup : FFT_primes exhausted");
163: }
164:
165: void crup_lm(f,d,mod,index,m,lm_mod,r)
166: ModNum **f;
167: int d;
168: int *mod;
169: int index;
170: N m;
171: N lm_mod;
172: UP *r;
173: {
174: double *k;
175: double c2;
176: unsigned int *w;
177: unsigned int zi,au,al;
178: UL a;
179: int i,j,l,len;
180: UP s,s1,u;
181: struct oUP c;
182: N t,ci,mi,qn;
183: unsigned int **sum;
184: unsigned int *sum_b;
185: Q q;
186: struct oEGT eg0,eg1;
187:
188: if ( !lm_mod )
189: error("crup_lm : current_mod_lm is not set");
190: k = (double *)ALLOCA((d+1)*sizeof(double));
191: for ( j = 0; j <= d; j++ )
192: k[j] = 0.5;
193: up_lazy = 1;
194: sum = (unsigned int **)ALLOCA((d+1)*sizeof(unsigned int *));
195: len = (int_bits(index)+n_bits(lm_mod)+31)/32+1;
196: w = (unsigned int *)ALLOCA((len+1)*sizeof(unsigned int));
197: sum_b = (unsigned int *)MALLOC_ATOMIC((d+1)*len*sizeof(unsigned int));
198: for ( j = 0; j <= d; j++ ) {
199: sum[j] = sum_b+len*j;
200: bzero((char *)sum[j],len*sizeof(unsigned int));
201: }
202: for ( i = 0, s = 0; i < index; i++ ) {
203: divin(m,mod[i],&ci);
204: zi = (unsigned int)invm((unsigned int)rem(ci,mod[i]),mod[i]);
205:
206: STON(zi,t); muln(ci,t,&mi);
207: divn(mi,lm_mod,&qn,&t);
208: if ( t )
209: for ( j = 0; j <= d; j++ ) {
210: bzero((char *)w,(len+1)*sizeof(unsigned int));
211: muln_1(BD(t),PL(t),(unsigned int)f[i][j],w);
212: for ( l = PL(t); l >= 0 && !w[l]; l--);
213: l++;
214: if ( l )
215: addarray_to(w,l,sum[j],len);
216: }
217: c2 = (double)zi/(double)mod[i];
218: for ( j = 0; j <= d; j++ )
219: k[j] += c2*f[i][j];
220: }
221: uiarraytoup(sum,len,d,&s);
222: GC_free(sum_b);
223:
224: u = UPALLOC(d);
225: for ( j = 0; j <= d; j++ ) {
226: #if 1
227: a = (UL)floor(k[j]);
228: #if defined(i386) || defined(__alpha) || defined(VISUAL)
229: au = ((unsigned int *)&a)[1];
230: al = ((unsigned int *)&a)[0];
231: #else
232: al = ((unsigned int *)&a)[1];
233: au = ((unsigned int *)&a)[0];
234: #endif
235: if ( au ) {
236: NEWQ(q); SGN(q) = 1; NM(q)=NALLOC(2); DN(q)=0;
237: PL(NM(q))=2; BD(NM(q))[0]=al; BD(NM(q))[1] = au;
238: } else
239: UTOQ(al,q);
240: #else
241: al = (int)floor(k[j]); STOQ(al,q);
242: #endif
243: u->c[j] = (Num)q;
244: }
245: for ( j = d; j >= 0 && !u->c[j]; j-- );
246: if ( j < 0 )
247: u = 0;
248: else
249: u->d = j;
250: divn(m,lm_mod,&qn,&t); NTOQ(t,-1,q);
251: c.d = 0; c.c[0] = (Num)q;
252: mulup(u,&c,&s1);
253: up_lazy = 0;
254:
255: addup(s,s1,r);
256: }
257:
258: void fft_rembymulup_special_lm(n1,n2,inv2,nr)
259: UP n1,n2,inv2;
260: UP *nr;
261: {
262: int d1,d2,d;
263: UP r1,t,s,q,u;
264:
265: if ( !n2 )
266: error("rembymulup : division by 0");
267: else if ( !n1 || !n2->d )
268: *nr = 0;
269: else if ( (d1 = n1->d) < (d2 = n2->d) )
270: *nr = n1;
271: else {
272: d = d1-d2;
273: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
274: trunc_fft_mulup_lm(r1,inv2,d+1,&t);
275: truncup(t,d+1,&s);
276: reverseup(s,d,&q);
277: trunc_fft_mulup_lm(q,n2,d2,&t); truncup(t,d2,&u);
278: truncup(n1,d2,&s);
279: subup(s,u,nr);
280: }
281: }
282:
283: void uptolmup(n,nr)
284: UP n;
285: UP *nr;
286: {
287: int i,d;
288: Q *c;
289: LM *cr;
290: UP r;
291:
292: if ( !n )
293: *nr = 0;
294: else {
295: d = n->d; c = (Q *)n->c;
296: *nr = r = UPALLOC(d); cr = (LM *)r->c;
297: for ( i = 0; i <= d; i++ )
298: qtolm(c[i],&cr[i]);
299: for ( i = d; i >= 0 && !cr[i]; i-- );
300: if ( i < 0 )
301: *nr = 0;
302: else
303: r->d = i;
304: }
305: }
306:
307: save_up(obj,name)
308: UP obj;
309: char *name;
310: {
311: P p;
312: Obj ret;
313: NODE n0,n1;
314: STRING s;
315:
316: uptop(obj,&p);
317: MKSTR(s,name);
318: MKNODE(n1,s,0); MKNODE(n0,p,n1);
319: Pbsave(n0,&ret);
320: }
321:
322: void hybrid_powermodup(f,xp)
323: UP f;
324: UP *xp;
325: {
326: N n;
327: UP x,y,t,invf,s;
328: int k;
329: LM lm;
330: struct oEGT eg_sq,eg_rem,eg_mul,eg_inv,eg0,eg1,eg2,eg3;
331: char name[BUFSIZ];
332:
333: getmod_lm(&n);
334: if ( !n )
335: error("hybrid_powermodup : current_mod_lm is not set");
336: MKLM(ONEN,lm);
337: x = UPALLOC(1); x->d = 1; x->c[1] = (Num)lm;
338: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
339:
340: reverseup(f,f->d,&t);
341: invmodup(t,f->d,&s); uptolmup(s,&invf);
342: for ( k = n_bits(n)-1; k >= 0; k-- ) {
343: hybrid_squareup(FF_GFP,y,&t);
344: hybrid_rembymulup_special(FF_GFP,t,f,invf,&s);
345: y = s;
346: if ( n->b[k/32] & (1<<(k%32)) ) {
347: mulup(y,x,&t);
348: remup(t,f,&s);
349: y = s;
350: }
351: }
352: *xp = y;
353: }
354:
355: void powermodup(f,xp)
356: UP f;
357: UP *xp;
358: {
359: N n;
360: UP x,y,t,invf,s;
361: int k;
362: LM lm;
363: struct oEGT eg_sq,eg_rem,eg_mul,eg_inv,eg0,eg1,eg2,eg3;
364:
365: field_order_ff(&n);
366: if ( !n )
367: error("powermodup : current_mod_lm is not set");
368: MKLM(ONEN,lm);
369: x = UPALLOC(1); x->d = 1; x->c[1] = (Num)lm;
370: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
371:
372: reverseup(f,f->d,&t);
373: invmodup(t,f->d,&s); uptolmup(s,&invf);
374: for ( k = n_bits(n)-1; k >= 0; k-- ) {
375: ksquareup(y,&t);
376: rembymulup_special(t,f,invf,&s);
377: y = s;
378: if ( n->b[k/32] & (1<<(k%32)) ) {
379: mulup(y,x,&t);
380: remup(t,f,&s);
381: y = s;
382: }
383: }
384: *xp = y;
385: }
386:
387: /* g^d mod f */
388:
389: void hybrid_generic_powermodup(g,f,d,xp)
390: UP g,f;
391: Q d;
392: UP *xp;
393: {
394: N e;
395: UP x,y,t,invf,s;
396: int k;
397: LM lm;
398: struct oEGT eg_sq,eg_rem,eg_mul,eg_inv,eg0,eg1,eg2,eg3;
399:
400: e = NM(d);
401: MKLM(ONEN,lm);
402: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
403: remup(g,f,&x);
404: if ( !x ) {
405: *xp = !d ? y : 0;
406: return;
407: } else if ( !x->d ) {
408: pwrup(x,d,xp);
409: return;
410: }
411: reverseup(f,f->d,&t);
412: invmodup(t,f->d,&invf);
413: for ( k = n_bits(e)-1; k >= 0; k-- ) {
414: hybrid_squareup(FF_GFP,y,&t);
415: hybrid_rembymulup_special(FF_GFP,t,f,invf,&s);
416: y = s;
417: if ( e->b[k/32] & (1<<(k%32)) ) {
418: mulup(y,x,&t);
419: remup(t,f,&s);
420: y = s;
421: }
422: }
423: *xp = y;
424: }
425:
426: void generic_powermodup(g,f,d,xp)
427: UP g,f;
428: Q d;
429: UP *xp;
430: {
431: N e;
432: UP x,y,t,invf,s;
433: int k;
434: LM lm;
435: struct oEGT eg_sq,eg_rem,eg_mul,eg_inv,eg0,eg1,eg2,eg3;
436:
437: e = NM(d);
438: MKLM(ONEN,lm);
439: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
440: remup(g,f,&x);
441: if ( !x ) {
442: *xp = !d ? y : 0;
443: return;
444: } else if ( !x->d ) {
445: pwrup(x,d,xp);
446: return;
447: }
448: reverseup(f,f->d,&t);
449: invmodup(t,f->d,&invf);
450: for ( k = n_bits(e)-1; k >= 0; k-- ) {
451: ksquareup(y,&t);
452: rembymulup_special(t,f,invf,&s);
453: y = s;
454: if ( e->b[k/32] & (1<<(k%32)) ) {
455: mulup(y,x,&t);
456: remup(t,f,&s);
457: y = s;
458: }
459: }
460: *xp = y;
461: }
462:
463: void hybrid_powertabup(f,xp,tab)
464: UP f;
465: UP xp;
466: UP *tab;
467: {
468: UP y,t,invf;
469: int i,d;
470: LM lm;
471: struct oEGT eg_rem,eg_mul,eg0,eg1,eg2;
472:
473: d = f->d;
474: MKLM(ONEN,lm);
475: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
476: tab[0] = y;
477: tab[1] = xp;
478:
479: reverseup(f,f->d,&t);
480: invmodup(t,f->d,&invf);
481:
482: for ( i = 2; i < d; i++ ) {
483: if ( debug_up )
484: fprintf(stderr,".");
485: hybrid_mulup(FF_GFP,tab[i-1],xp,&t);
486: hybrid_rembymulup_special(FF_GFP,t,f,invf,&tab[i]);
487: }
488: }
489:
490: void powertabup(f,xp,tab)
491: UP f;
492: UP xp;
493: UP *tab;
494: {
495: UP y,t,invf;
496: int i,d;
497: LM lm;
498: struct oEGT eg_rem,eg_mul,eg0,eg1,eg2;
499:
500: d = f->d;
501: MKLM(ONEN,lm);
502: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
503: tab[0] = y;
504: tab[1] = xp;
505:
506: reverseup(f,f->d,&t);
507: invmodup(t,f->d,&invf);
508:
509: for ( i = 2; i < d; i++ ) {
510: if ( debug_up )
511: fprintf(stderr,".");
512: kmulup(tab[i-1],xp,&t);
513: rembymulup_special(t,f,invf,&tab[i]);
514: }
515: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>