Annotation of OpenXM_contrib2/asir2000/engine/up_lm.c, Revision 1.7
1.2 noro 1: /*
2: * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
3: * All rights reserved.
4: *
5: * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
6: * non-exclusive and royalty-free license to use, copy, modify and
7: * redistribute, solely for non-commercial and non-profit purposes, the
8: * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
9: * conditions of this Agreement. For the avoidance of doubt, you acquire
10: * only a limited right to use the SOFTWARE hereunder, and FLL or any
11: * third party developer retains all rights, including but not limited to
12: * copyrights, in and to the SOFTWARE.
13: *
14: * (1) FLL does not grant you a license in any way for commercial
15: * purposes. You may use the SOFTWARE only for non-commercial and
16: * non-profit purposes only, such as academic, research and internal
17: * business use.
18: * (2) The SOFTWARE is protected by the Copyright Law of Japan and
19: * international copyright treaties. If you make copies of the SOFTWARE,
20: * with or without modification, as permitted hereunder, you shall affix
21: * to all such copies of the SOFTWARE the above copyright notice.
22: * (3) An explicit reference to this SOFTWARE and its copyright owner
23: * shall be made on your publication or presentation in any form of the
24: * results obtained by use of the SOFTWARE.
25: * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
1.3 noro 26: * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
1.2 noro 27: * for such modification or the source code of the modified part of the
28: * SOFTWARE.
29: *
30: * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
31: * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
32: * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
33: * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
34: * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
35: * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
36: * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
37: * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
38: * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
39: * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
40: * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
41: * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
42: * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
43: * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
44: * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
45: * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
46: * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
47: *
1.7 ! noro 48: * $OpenXM: OpenXM_contrib2/asir2000/engine/up_lm.c,v 1.6 2001/10/09 01:36:14 noro Exp $
1.2 noro 49: */
1.1 noro 50: #include "ca.h"
51: #include <math.h>
52:
53: extern GC_dont_gc;
54: extern struct oEGT eg_chrem,eg_fore,eg_back;
55: extern int debug_up;
56: extern int up_lazy;
1.4 noro 57: extern int current_ff;
1.1 noro 58:
1.6 noro 59: void fft_mulup_lm(UP n1,UP n2,UP *nr)
1.1 noro 60: {
61: ModNum *f1,*f2,*w,*fr;
62: ModNum *frarray[1024];
63: int modarray[1024];
64: int frarray_index = 0;
65: N m,m1,m2,lm_mod;
66: int d1,d2,dmin,i,mod,root,d,cond,bound;
67: UP r;
68:
69: if ( !n1 || !n2 ) {
70: *nr = 0; return;
71: }
72: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
73: if ( !d1 || !d2 ) {
74: mulup(n1,n2,nr); return;
75: }
76: getmod_lm(&lm_mod);
77: if ( !lm_mod )
78: error("fft_mulup_lm : current_mod_lm is not set");
79: m = ONEN;
80: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+2;
81: f1 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
82: f2 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
83: w = (ModNum *)ALLOCA(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
84: for ( i = 0; i < NPrimes; i++ ) {
85: FFT_primes(i,&mod,&root,&d);
86: if ( (1<<d) < d1+d2+1 )
87: continue;
88: modarray[frarray_index] = mod;
89: frarray[frarray_index++] = fr
90: = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
91: uptofmarray(mod,n1,f1);
92: uptofmarray(mod,n2,f2);
93: cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
94: if ( cond )
95: error("fft_mulup : error in FFT_pol_product");
96: STON(mod,m1); muln(m,m1,&m2); m = m2;
97: if ( n_bits(m) > bound ) {
98: /* GC_dont_gc = 1; */
99: crup_lm(frarray,d1+d2,modarray,frarray_index,m,lm_mod,&r);
100: uptolmup(r,nr);
101: GC_dont_gc = 0;
102: return;
103: }
104: }
105: error("fft_mulup : FFT_primes exhausted");
106: }
107:
1.6 noro 108: void fft_squareup_lm(UP n1,UP *nr)
1.1 noro 109: {
110: ModNum *f1,*w,*fr;
111: ModNum *frarray[1024];
112: int modarray[1024];
113: int frarray_index = 0;
114: N m,m1,m2,lm_mod;
115: int d1,dmin,i,mod,root,d,cond,bound;
116: UP r;
117:
118: if ( !n1 ) {
119: *nr = 0; return;
120: }
121: d1 = n1->d; dmin = d1;
122: if ( !d1 ) {
123: mulup(n1,n1,nr); return;
124: }
125: getmod_lm(&lm_mod);
126: if ( !lm_mod )
127: error("fft_squareup_lm : current_mod_lm is not set");
128: m = ONEN;
129: bound = 2*maxblenup(n1)+int_bits(d1)+2;
130: f1 = (ModNum *)ALLOCA((2*d1+1)*sizeof(ModNum));
131: w = (ModNum *)ALLOCA(6*(1<<int_bits(2*d1+1))*sizeof(ModNum));
132: for ( i = 0; i < NPrimes; i++ ) {
133: FFT_primes(i,&mod,&root,&d);
134: if ( (1<<d) < 2*d1+1 )
135: continue;
136: modarray[frarray_index] = mod;
137: frarray[frarray_index++] = fr
138: = (ModNum *)ALLOCA((2*d1+1)*sizeof(ModNum));
139: uptofmarray(mod,n1,f1);
140: cond = FFT_pol_square(d1,f1,fr,i,w);
141: if ( cond )
142: error("fft_mulup : error in FFT_pol_product");
143: STON(mod,m1); muln(m,m1,&m2); m = m2;
144: if ( n_bits(m) > bound ) {
145: /* GC_dont_gc = 1; */
146: crup_lm(frarray,2*d1,modarray,frarray_index,m,lm_mod,&r);
147: uptolmup(r,nr);
148: GC_dont_gc = 0;
149: return;
150: }
151: }
152: error("fft_squareup : FFT_primes exhausted");
153: }
154:
1.6 noro 155: void trunc_fft_mulup_lm(UP n1,UP n2,int dbd,UP *nr)
1.1 noro 156: {
157: ModNum *f1,*f2,*fr,*w;
158: ModNum *frarray[1024];
159: int modarray[1024];
160: int frarray_index = 0;
161: N m,m1,m2,lm_mod;
162: int d1,d2,dmin,i,mod,root,d,cond,bound;
163: UP r;
164:
165: if ( !n1 || !n2 ) {
166: *nr = 0; return;
167: }
168: d1 = n1->d; d2 = n2->d; dmin = MIN(d1,d2);
169: if ( !d1 || !d2 ) {
170: tmulup(n1,n2,dbd,nr); return;
171: }
172: getmod_lm(&lm_mod);
173: if ( !lm_mod )
174: error("trunc_fft_mulup_lm : current_mod_lm is not set");
175: m = ONEN;
176: bound = maxblenup(n1)+maxblenup(n2)+int_bits(dmin)+2;
177: f1 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
178: f2 = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
179: w = (ModNum *)ALLOCA(6*(1<<int_bits(d1+d2+1))*sizeof(ModNum));
180: for ( i = 0; i < NPrimes; i++ ) {
181: FFT_primes(i,&mod,&root,&d);
182: if ( (1<<d) < d1+d2+1 )
183: continue;
184:
185: modarray[frarray_index] = mod;
186: frarray[frarray_index++] = fr
187: = (ModNum *)ALLOCA((d1+d2+1)*sizeof(ModNum));
188: uptofmarray(mod,n1,f1);
189: uptofmarray(mod,n2,f2);
190: cond = FFT_pol_product(d1,f1,d2,f2,fr,i,w);
191: if ( cond )
192: error("fft_mulup : error in FFT_pol_product");
193: STON(mod,m1); muln(m,m1,&m2); m = m2;
194: if ( n_bits(m) > bound ) {
195: /* GC_dont_gc = 1; */
196: crup_lm(frarray,MIN(dbd-1,d1+d2),modarray,frarray_index,m,lm_mod,&r);
197: uptolmup(r,nr);
198: GC_dont_gc = 0;
199: return;
200: }
201: }
202: error("trunc_fft_mulup : FFT_primes exhausted");
203: }
204:
1.6 noro 205: void crup_lm(ModNum **f,int d,int *mod,int index,N m,N lm_mod,UP *r)
1.1 noro 206: {
207: double *k;
208: double c2;
209: unsigned int *w;
210: unsigned int zi,au,al;
211: UL a;
212: int i,j,l,len;
213: UP s,s1,u;
214: struct oUP c;
215: N t,ci,mi,qn;
216: unsigned int **sum;
217: unsigned int *sum_b;
218: Q q;
219:
220: if ( !lm_mod )
221: error("crup_lm : current_mod_lm is not set");
222: k = (double *)ALLOCA((d+1)*sizeof(double));
223: for ( j = 0; j <= d; j++ )
224: k[j] = 0.5;
225: up_lazy = 1;
226: sum = (unsigned int **)ALLOCA((d+1)*sizeof(unsigned int *));
227: len = (int_bits(index)+n_bits(lm_mod)+31)/32+1;
228: w = (unsigned int *)ALLOCA((len+1)*sizeof(unsigned int));
229: sum_b = (unsigned int *)MALLOC_ATOMIC((d+1)*len*sizeof(unsigned int));
230: for ( j = 0; j <= d; j++ ) {
231: sum[j] = sum_b+len*j;
232: bzero((char *)sum[j],len*sizeof(unsigned int));
233: }
234: for ( i = 0, s = 0; i < index; i++ ) {
235: divin(m,mod[i],&ci);
236: zi = (unsigned int)invm((unsigned int)rem(ci,mod[i]),mod[i]);
237:
238: STON(zi,t); muln(ci,t,&mi);
239: divn(mi,lm_mod,&qn,&t);
240: if ( t )
241: for ( j = 0; j <= d; j++ ) {
242: bzero((char *)w,(len+1)*sizeof(unsigned int));
243: muln_1(BD(t),PL(t),(unsigned int)f[i][j],w);
244: for ( l = PL(t); l >= 0 && !w[l]; l--);
245: l++;
246: if ( l )
247: addarray_to(w,l,sum[j],len);
248: }
249: c2 = (double)zi/(double)mod[i];
250: for ( j = 0; j <= d; j++ )
251: k[j] += c2*f[i][j];
252: }
253: uiarraytoup(sum,len,d,&s);
254: GC_free(sum_b);
255:
256: u = UPALLOC(d);
257: for ( j = 0; j <= d; j++ ) {
258: #if 1
259: a = (UL)floor(k[j]);
1.7 ! noro 260: #if defined(i386) || defined(__alpha) || defined(VISUAL) || defined(__x86_64)
1.1 noro 261: au = ((unsigned int *)&a)[1];
262: al = ((unsigned int *)&a)[0];
263: #else
264: al = ((unsigned int *)&a)[1];
265: au = ((unsigned int *)&a)[0];
266: #endif
267: if ( au ) {
268: NEWQ(q); SGN(q) = 1; NM(q)=NALLOC(2); DN(q)=0;
269: PL(NM(q))=2; BD(NM(q))[0]=al; BD(NM(q))[1] = au;
270: } else
271: UTOQ(al,q);
272: #else
273: al = (int)floor(k[j]); STOQ(al,q);
274: #endif
275: u->c[j] = (Num)q;
276: }
277: for ( j = d; j >= 0 && !u->c[j]; j-- );
278: if ( j < 0 )
279: u = 0;
280: else
281: u->d = j;
282: divn(m,lm_mod,&qn,&t); NTOQ(t,-1,q);
283: c.d = 0; c.c[0] = (Num)q;
284: mulup(u,&c,&s1);
285: up_lazy = 0;
286:
287: addup(s,s1,r);
288: }
289:
1.6 noro 290: void fft_rembymulup_special_lm(UP n1,UP n2,UP inv2,UP *nr)
1.1 noro 291: {
292: int d1,d2,d;
293: UP r1,t,s,q,u;
294:
295: if ( !n2 )
296: error("rembymulup : division by 0");
297: else if ( !n1 || !n2->d )
298: *nr = 0;
299: else if ( (d1 = n1->d) < (d2 = n2->d) )
300: *nr = n1;
301: else {
302: d = d1-d2;
303: reverseup(n1,d1,&t); truncup(t,d+1,&r1);
304: trunc_fft_mulup_lm(r1,inv2,d+1,&t);
305: truncup(t,d+1,&s);
306: reverseup(s,d,&q);
307: trunc_fft_mulup_lm(q,n2,d2,&t); truncup(t,d2,&u);
308: truncup(n1,d2,&s);
309: subup(s,u,nr);
310: }
311: }
312:
1.6 noro 313: void uptolmup(UP n,UP *nr)
1.1 noro 314: {
315: int i,d;
316: Q *c;
317: LM *cr;
318: UP r;
319:
320: if ( !n )
321: *nr = 0;
322: else {
323: d = n->d; c = (Q *)n->c;
324: *nr = r = UPALLOC(d); cr = (LM *)r->c;
325: for ( i = 0; i <= d; i++ )
326: qtolm(c[i],&cr[i]);
327: for ( i = d; i >= 0 && !cr[i]; i-- );
328: if ( i < 0 )
329: *nr = 0;
330: else
331: r->d = i;
332: }
333: }
334:
1.6 noro 335: void save_up(UP obj,char *name)
1.1 noro 336: {
337: P p;
338: Obj ret;
339: NODE n0,n1;
340: STRING s;
1.6 noro 341: void Pbsave();
1.1 noro 342:
343: uptop(obj,&p);
344: MKSTR(s,name);
345: MKNODE(n1,s,0); MKNODE(n0,p,n1);
346: Pbsave(n0,&ret);
347: }
348:
1.6 noro 349: void hybrid_powermodup(UP f,UP *xp)
1.1 noro 350: {
351: N n;
352: UP x,y,t,invf,s;
353: int k;
354: LM lm;
355:
356: getmod_lm(&n);
357: if ( !n )
358: error("hybrid_powermodup : current_mod_lm is not set");
359: MKLM(ONEN,lm);
360: x = UPALLOC(1); x->d = 1; x->c[1] = (Num)lm;
361: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
362:
363: reverseup(f,f->d,&t);
364: invmodup(t,f->d,&s); uptolmup(s,&invf);
365: for ( k = n_bits(n)-1; k >= 0; k-- ) {
366: hybrid_squareup(FF_GFP,y,&t);
367: hybrid_rembymulup_special(FF_GFP,t,f,invf,&s);
368: y = s;
369: if ( n->b[k/32] & (1<<(k%32)) ) {
370: mulup(y,x,&t);
371: remup(t,f,&s);
372: y = s;
373: }
374: }
375: *xp = y;
376: }
377:
1.6 noro 378: void powermodup(UP f,UP *xp)
1.1 noro 379: {
380: N n;
381: UP x,y,t,invf,s;
382: int k;
1.4 noro 383: Num c;
1.1 noro 384:
1.4 noro 385: if ( !current_ff )
386: error("powermodup : current_ff is not set");
1.1 noro 387: field_order_ff(&n);
1.5 noro 388: one_ff(&c);
1.4 noro 389: x = UPALLOC(1); x->d = 1; x->c[1] = c;
390: y = UPALLOC(0); y->d = 0; y->c[0] = c;
1.1 noro 391:
392: reverseup(f,f->d,&t);
1.4 noro 393: invmodup(t,f->d,&s);
394: switch ( current_ff ) {
395: case FF_GFP:
396: case FF_GFPN:
397: uptolmup(s,&invf);
398: break;
399: case FF_GFS:
1.5 noro 400: case FF_GFSN:
1.4 noro 401: invf = s; /* XXX */
402: break;
403: default:
404: error("powermodup : not implemented yet");
405: }
1.1 noro 406: for ( k = n_bits(n)-1; k >= 0; k-- ) {
407: ksquareup(y,&t);
408: rembymulup_special(t,f,invf,&s);
409: y = s;
410: if ( n->b[k/32] & (1<<(k%32)) ) {
411: mulup(y,x,&t);
412: remup(t,f,&s);
413: y = s;
414: }
415: }
416: *xp = y;
417: }
418:
419: /* g^d mod f */
420:
1.6 noro 421: void hybrid_generic_powermodup(UP g,UP f,Q d,UP *xp)
1.1 noro 422: {
423: N e;
424: UP x,y,t,invf,s;
425: int k;
426: LM lm;
427:
428: e = NM(d);
429: MKLM(ONEN,lm);
430: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
431: remup(g,f,&x);
432: if ( !x ) {
433: *xp = !d ? y : 0;
434: return;
435: } else if ( !x->d ) {
436: pwrup(x,d,xp);
437: return;
438: }
439: reverseup(f,f->d,&t);
440: invmodup(t,f->d,&invf);
441: for ( k = n_bits(e)-1; k >= 0; k-- ) {
442: hybrid_squareup(FF_GFP,y,&t);
443: hybrid_rembymulup_special(FF_GFP,t,f,invf,&s);
444: y = s;
445: if ( e->b[k/32] & (1<<(k%32)) ) {
446: mulup(y,x,&t);
447: remup(t,f,&s);
448: y = s;
449: }
450: }
451: *xp = y;
452: }
453:
1.6 noro 454: void generic_powermodup(UP g,UP f,Q d,UP *xp)
1.1 noro 455: {
456: N e;
457: UP x,y,t,invf,s;
458: int k;
1.4 noro 459: Num c;
1.1 noro 460:
461: e = NM(d);
1.5 noro 462: one_ff(&c);
1.4 noro 463: y = UPALLOC(0); y->d = 0; y->c[0] = c;
1.1 noro 464: remup(g,f,&x);
465: if ( !x ) {
466: *xp = !d ? y : 0;
467: return;
468: } else if ( !x->d ) {
469: pwrup(x,d,xp);
470: return;
471: }
472: reverseup(f,f->d,&t);
473: invmodup(t,f->d,&invf);
474: for ( k = n_bits(e)-1; k >= 0; k-- ) {
475: ksquareup(y,&t);
476: rembymulup_special(t,f,invf,&s);
477: y = s;
478: if ( e->b[k/32] & (1<<(k%32)) ) {
479: mulup(y,x,&t);
480: remup(t,f,&s);
481: y = s;
482: }
483: }
484: *xp = y;
485: }
486:
1.6 noro 487: void hybrid_powertabup(UP f,UP xp,UP *tab)
1.1 noro 488: {
489: UP y,t,invf;
490: int i,d;
491: LM lm;
492:
493: d = f->d;
494: MKLM(ONEN,lm);
495: y = UPALLOC(0); y->d = 0; y->c[0] = (Num)lm;
496: tab[0] = y;
497: tab[1] = xp;
498:
499: reverseup(f,f->d,&t);
500: invmodup(t,f->d,&invf);
501:
502: for ( i = 2; i < d; i++ ) {
503: if ( debug_up )
504: fprintf(stderr,".");
505: hybrid_mulup(FF_GFP,tab[i-1],xp,&t);
506: hybrid_rembymulup_special(FF_GFP,t,f,invf,&tab[i]);
507: }
508: }
509:
1.6 noro 510: void powertabup(UP f,UP xp,UP *tab)
1.1 noro 511: {
512: UP y,t,invf;
513: int i,d;
1.4 noro 514: Num c;
1.1 noro 515:
516: d = f->d;
1.5 noro 517: one_ff(&c);
1.4 noro 518: y = UPALLOC(0); y->d = 0; y->c[0] = c;
1.1 noro 519: tab[0] = y;
520: tab[1] = xp;
521:
522: reverseup(f,f->d,&t);
523: invmodup(t,f->d,&invf);
524:
525: for ( i = 2; i < d; i++ ) {
526: if ( debug_up )
527: fprintf(stderr,".");
528: kmulup(tab[i-1],xp,&t);
529: rembymulup_special(t,f,invf,&tab[i]);
530: }
531: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>