Annotation of OpenXM_contrib2/asir2000/fft/polmul.c, Revision 1.3
1.2 noro 1: /*
2: * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
3: * All rights reserved.
4: *
5: * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
6: * non-exclusive and royalty-free license to use, copy, modify and
7: * redistribute, solely for non-commercial and non-profit purposes, the
8: * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
9: * conditions of this Agreement. For the avoidance of doubt, you acquire
10: * only a limited right to use the SOFTWARE hereunder, and FLL or any
11: * third party developer retains all rights, including but not limited to
12: * copyrights, in and to the SOFTWARE.
13: *
14: * (1) FLL does not grant you a license in any way for commercial
15: * purposes. You may use the SOFTWARE only for non-commercial and
16: * non-profit purposes only, such as academic, research and internal
17: * business use.
18: * (2) The SOFTWARE is protected by the Copyright Law of Japan and
19: * international copyright treaties. If you make copies of the SOFTWARE,
20: * with or without modification, as permitted hereunder, you shall affix
21: * to all such copies of the SOFTWARE the above copyright notice.
22: * (3) An explicit reference to this SOFTWARE and its copyright owner
23: * shall be made on your publication or presentation in any form of the
24: * results obtained by use of the SOFTWARE.
25: * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
1.3 ! noro 26: * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
1.2 noro 27: * for such modification or the source code of the modified part of the
28: * SOFTWARE.
29: *
30: * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
31: * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
32: * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
33: * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
34: * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
35: * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
36: * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
37: * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
38: * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
39: * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
40: * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
41: * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
42: * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
43: * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
44: * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
45: * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
46: * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
47: *
1.3 ! noro 48: * $OpenXM: OpenXM_contrib2/asir2000/fft/polmul.c,v 1.2 2000/08/21 08:31:34 noro Exp $
1.2 noro 49: */
1.1 noro 50: #include "dft.h"
51: extern struct PrimesS Primes[];
52:
53: /*
54: #define TIMING
55: */
56:
57: #ifdef TIMING
58:
59: #define MAXTIMING 20
60:
61: #include <sys/time.h>
62: #include <sys/resource.h>
63: #if 0
64: #if define(hitm)
65: #include <machine/xclock.h>
66: #endif
67: #endif
68:
69: static struct rusage ru_time[MAXTIMING];
70:
71: struct timing {
72: struct timeval user, sys;
73: };
74: static struct timing time_duration[MAXTIMING];
75:
76: #define RECORD_TIME(i) getrusage( RUSAGE_SELF, &ru_time[i] )
77: #endif /* TIMING */
78:
79: void FFT_primes(Num, p_prime, p_primroot, p_d)
80: int Num, *p_prime, *p_primroot, *p_d;
81: {
82: *p_prime = Primes[Num].prime;
83: *p_primroot = Primes[Num].primroot;
84: *p_d = Primes[Num].d;
85: }
86:
87: int FFT_pol_square( d1, C1, Prod, MinMod, wk)
88: unsigned int d1;
89: int MinMod;
90: unsigned int C1[], Prod[], wk[];
91: {
92:
93: unsigned int Low0bits;
94: unsigned int Proot, Prime;
95: double Pinv;
96: void MNpol_square_DFT();
97:
98: if ( MinMod < 0 || MinMod > NPrimes - 1 ) return 2;
99: Prime = Primes[MinMod].prime;
100: Proot = Primes[MinMod].primroot;
101: Low0bits = Primes[MinMod].d;
102: Pinv = (((double)1.0)/((double)Prime));
103:
104: MNpol_square_DFT( d1, C1, Prod, Proot, Low0bits, Prime, Pinv, wk );
105:
106: return 0;
107: }
108:
109: int FFT_pol_product( d1, C1, d2, C2, Prod, MinMod, wk)
110: unsigned int d1, d2;
111: int MinMod;
112: unsigned int C1[], C2[], Prod[], wk[];
113: {
114:
115: unsigned int Low0bits;
116: unsigned int Proot, Prime;
117: double Pinv;
118: void MNpol_product_DFT();
119:
120: if ( MinMod < 0 || MinMod > NPrimes - 1 ) return 2;
121: Prime = Primes[MinMod].prime;
122: Proot = Primes[MinMod].primroot;
123: Low0bits = Primes[MinMod].d;
124: Pinv = (((double)1.0)/((double)Prime));
125: MNpol_product_DFT( d1, C1, d2, C2, Prod, Proot, Low0bits, Prime, Pinv, wk );
126: return 0;
127: }
128:
129: struct oEGT {
130: double exectime,gctime;
131: };
132:
133: extern struct oEGT eg_fore,eg_back;
134:
135: void get_eg(struct oEGT *);
136: void add_eg(struct oEGT *,struct oEGT *, struct oEGT *);
137:
138: void MNpol_product_DFT( d1, C1, d2, C2, Prod, a, low0s, P, Pinv, wk )
139: unsigned int d1, d2, low0s;
140: unsigned int C1[], C2[], Prod[], a, P, wk[];
141: double Pinv;
142: /*
143: * The amount of space of wk[] must be >= (11/2)*2^{\lceil \log_2(d1+d2+1) \rceil}.
144: */
145: {
146: int i, d, n, log2n, halfn;
147: unsigned int *dft1, *dft2, *dftprod, *powa, *true_wk, ninv;
148: struct oEGT eg0,eg1;
149:
150: d = d1 + d2;
151: /**/
152: for ( n = 2, log2n = 1; n <= d; n <<= 1) log2n++;
153: halfn = n >> 1;
154: dft1 = wk, dft2 = &wk[n];
155: dftprod = &dft2[n];
156: powa = &dftprod[n];
157: true_wk = &powa[halfn]; /* 2*n areas */
158: /**/
159: C_PREP_ALPHA( a, low0s, log2n, halfn -1, powa, P, Pinv );
160: #ifdef TIMING
161: RECORD_TIME(1);
162: #endif
163: /**/
164: get_eg(&eg0);
165: C_DFT_FORE( C1, d1 + 1, 1, log2n, powa, dft1, 1, P, Pinv, true_wk );
166: #ifdef TIMING
167: RECORD_TIME(2);
168: #endif
169: C_DFT_FORE( C2, d2 + 1, 1, log2n, powa, dft2, 1, P, Pinv, true_wk );
170: get_eg(&eg1); add_eg(&eg_fore,&eg0,&eg1);
171: #ifdef TIMING
172: RECORD_TIME(3);
173: #endif
174: /**/
175: for ( i = 0; i < n; i++ ) {
176: AxBmodP( dftprod[i], unsigned int, dft1[i], dft2[i], P, Pinv );
177: }
178: #ifdef TIMING
179: RECORD_TIME(4);
180: #endif
181: /**/
182: ninv = P - (unsigned int)(((int)P) >> log2n);
183: get_eg(&eg0);
184: C_DFT_BACK( dftprod, n, 1, log2n, powa, Prod, 1, 0, d + 1, ninv, P, Pinv, true_wk );
185: get_eg(&eg1); add_eg(&eg_back,&eg0,&eg1);
186: }
187:
188: void MNpol_square_DFT( d1, C1, Prod, a, low0s, P, Pinv, wk )
189: unsigned int d1, low0s;
190: unsigned int C1[], Prod[], a, P, wk[];
191: double Pinv;
192: /*
193: * The amount of space of wk[] must be >= (11/2)*2^{\lceil \log_2(d1+d2+1) \rceil}.
194: */
195: {
196: int i, d, n, log2n, halfn;
197: unsigned int *dft1, *dft2, *dftprod, *powa, *true_wk, ninv;
198: struct oEGT eg0,eg1;
199:
200: d = d1 + d1;
201: /**/
202: for ( n = 2, log2n = 1; n <= d; n <<= 1) log2n++;
203: halfn = n >> 1;
204: dft1 = wk, dft2 = &wk[n];
205: dftprod = &dft2[n];
206: powa = &dftprod[n];
207: true_wk = &powa[halfn]; /* 2*n areas */
208: /**/
209: C_PREP_ALPHA( a, low0s, log2n, halfn -1, powa, P, Pinv );
210: #ifdef TIMING
211: RECORD_TIME(1);
212: #endif
213: /**/
214: get_eg(&eg0);
215: C_DFT_FORE( C1, d1 + 1, 1, log2n, powa, dft1, 1, P, Pinv, true_wk );
216: get_eg(&eg1); add_eg(&eg_fore,&eg0,&eg1);
217: /**/
218: for ( i = 0; i < n; i++ ) {
219: AxBmodP( dftprod[i], unsigned int, dft1[i], dft1[i], P, Pinv );
220: }
221: #ifdef TIMING
222: RECORD_TIME(4);
223: #endif
224: /**/
225: ninv = P - (unsigned int)(((int)P) >> log2n);
226: get_eg(&eg0);
227: C_DFT_BACK( dftprod, n, 1, log2n, powa, Prod, 1, 0, d + 1, ninv, P, Pinv, true_wk );
228: get_eg(&eg1); add_eg(&eg_back,&eg0,&eg1);
229: }
230:
231: /****************** TEST CODE ****************/
232:
233: /*
234: #define TEST
235: */
236: #ifdef TEST
237:
238: #define RETI 100
239:
240: #include <stdio.h>
241:
242: #define Prime 65537 /* 2^16 + 1 */
243: #define Low0bits 16
244: #define PrimRoot 3
245:
246: #define PInv (((double)1.0)/((double)Prime))
247:
248: #define MaxNPnts (1<<Low0bits)
249:
250: static unsigned int Pol1[MaxNPnts], Pol2[MaxNPnts], PolProdDFT[MaxNPnts], PolProd[MaxNPnts], wk[6*MaxNPnts];
251:
252: static void generate_pol(), dump_pol(), compare_vals();
253:
254: #ifdef TIMING
255: static void time_spent();
256:
257: #define pr_timing(pTiming) fprintf(stderr, "user:%3d.%06d + sys:%3d.%06d = %3d.%06d", \
258: (pTiming)->user.tv_sec,(pTiming)->user.tv_usec, \
259: (pTiming)->sys.tv_sec,(pTiming)->sys.tv_usec, \
260: (pTiming)->user.tv_sec+(pTiming)->sys.tv_sec, \
261: (pTiming)->user.tv_usec+(pTiming)->sys.tv_usec)
262:
263: #define Pr_timing(i) pr_timing(&time_duration[i])
264: #endif /* TIMING */
265: void main( argc, argv )
266: int argc;
267: char **argv;
268: {
269: int i, ac = argc - 1, d1, d2, d;
270:
271: int ii, *error;
272:
273: char **av = &argv[1];
274: unsigned int mnP = (unsigned int)Prime, mnProot = (unsigned int)PrimRoot;
275: double Pinv = PInv;
276:
277: for ( ; ac >= 2; ac -= 2, av += 2 ) {
278: d = (d1 = atoi( av[0] )) + (d2 = atoi( av[1] ));
279: if ( d1 <= 0 || d2 <= 0 || d >= MaxNPnts ) {
280: fprintf( stderr, "*** invalid degrees %s & %s. (sum < %d)\n",
281: av[0], av[1], MaxNPnts );
282: continue;
283: }
284: generate_pol( d1, Pol1 );
285: /* dump_pol( d1, Pol1 ); */
286: generate_pol( d2, Pol2 );
287: /* dump_pol( d2, Pol2 ); */
288: /**/
289:
290:
291:
292: for ( ii=0; ii < RETI; ii++ ){
293: #ifdef TIMING
294: RECORD_TIME(0);
295: #endif
296: MNpol_product_DFT( d1, Pol1, d2, Pol2, PolProdDFT, mnProot, Low0bits, mnP, Pinv, wk );
297: #ifdef TIMING
298: RECORD_TIME(5);
299: time_spent( 0, 1, 0 );
300: time_spent( 1, 2, 1 );
301: time_spent( 2, 3, 2 );
302: time_spent( 3, 4, 3 );
303: time_spent( 4, 5, 4 );
304: time_spent( 0, 5, 5 );
305: #endif
306: }
307:
308:
309: fprintf( stderr, "DFT mul done ( %d times )\n", RETI );
310:
311:
312:
313:
314:
315: for ( ii=0; ii < RETI; ii++ ){
316: #ifdef TIMING
317: RECORD_TIME(10);
318: #endif
319: MNpol_product( d1, Pol1, d2, Pol2, PolProd, mnP, Pinv );
320: #ifdef TIMING
321: RECORD_TIME(11);
322: time_spent( 10, 11, 10 );
323: #endif
324: }
325:
326:
327:
328: fprintf( stderr, "mul by classical alg. done ( %d times )\n", RETI );
329: /**/
330:
331:
332: /* PolProdDFT[20] = 0.0; */
333:
334:
335: compare_vals( PolProdDFT, PolProd, d+1 , error);
336: if ( *error == 0)
337: fprintf( stderr, "******* Result is OK *******\n" );
338: else
339: fprintf( stderr, "******* Result is NG *******\n" );
340:
341: /**/
342: #ifdef TIMING
343: fprintf( stderr, "DFT mul prep: " ); Pr_timing( 0 );
344: fprintf( stderr, "\nDFT transform 1: " ); Pr_timing( 1 );
345: fprintf( stderr, "\nDFT transform 2: " ); Pr_timing( 2 );
346: fprintf( stderr, "\nDFT mult: " ); Pr_timing( 3 );
347: fprintf( stderr, "\nDFT inv trans: " ); Pr_timing( 4 );
348: fprintf( stderr, "\nDFT pol-mult Tot: " ); Pr_timing( 5 );
349: /**/
350: fprintf( stderr, "\nClassical mult: " ); Pr_timing( 10 );
351: fprintf( stderr, "\n" );
352: #endif /* TIMING */
353: }
354: }
355:
356: static void generate_pol( d, cf )
357: int d;
358: unsigned int cf[];
359: {
360: unsigned int mnP = (unsigned int)Prime;
361: int i, c;
362:
363: for ( i = 0; i < d; i++ ) cf[i] = random() % Prime;
364: while ( (c = random() % Prime) == 0 ) ;
365: cf[d] = c;
366: }
367:
368: static void dump_pol( d, cf )
369: int d;
370: unsigned int cf[];
371: {
372: int i;
373:
374: printf( "Pol of degree %d over Z/(%d) :\n", d, Prime );
375: for ( i = 0; i <= d; i++ ) {
376: if ( (i%5) == 0 ) printf( "\t" );
377: printf( "%10d,", (int)cf[i] );
378: if ( (i%5) == 4 ) printf( "\n" );
379: }
380: if ( (i%5) != 0 ) printf( "\n" );
381: }
382:
383: void compare_vals( v1, v2, n , error)
384: unsigned int v1[], v2[];
385: int n;
386: int *error;
387: {
388: int i, tmp;
389:
390: *error = 0;
391:
392: for ( i = 0; i < n; i ++) {
393: tmp = (int)v1[i] - (int)v2[i];
394: tmp = abs(tmp);
395: if ( tmp > *error ) *error = tmp;
396: }
397:
398:
399:
400: /*
401: int i, j;
402:
403:
404:
405: for ( i = 0; i < n; i += 5 ) {
406: printf( " %6d:%10d", i, (int)v1[i] );
407: for ( j = 1; j < 5 && i + j < n; j++ ) printf( ",%10d", (int)v1[i+j] );
408: printf( "\n" );
409: if ( (int)v1[i] == (int)v2[i] && (int)v1[i+1] == (int)v2[i+1]
410: && (int)v1[i+2] == (int)v2[i+2] && (int)v1[i+3] == (int)v2[i+3]
411: && (int)v1[i+4] == (int)v2[i+4] ) continue;
412: printf( " " );
413: for ( j = 0; j < 5 && i + j < n; j++ )
414: if ( (int)v1[i+j] == (int)v2[i+j] ) printf( " " );
415: else printf( "%10d,", (int)v2[i+j] );
416: printf( "\n" );
417: }
418: */
419:
420:
421:
422: }
423:
424: #ifdef TIMING
425:
426: static void time_spent( s, e, n )
427: int s, e, n;
428: {
429: struct rusage *rus = &ru_time[s], *rue = &ru_time[e];
430: struct timeval *tms, *tme;
431: long sec, usec;
432: struct timing *pt = &time_duration[n];
433:
434: tms = &rus->ru_utime, tme = &rue->ru_utime;
435: sec = tme->tv_sec - tms->tv_sec,
436: usec = tme->tv_usec - tms->tv_usec;
437: if ( usec < 0 ) usec += 1000000, sec--;
438: /* pt->user.tv_sec = sec, pt->user.tv_usec = usec; */
439: pt->user.tv_sec += sec, pt->user.tv_usec += usec;
440:
441:
442:
443: if ( pt->user.tv_usec > 999999 ){
444: pt->user.tv_usec -= 1000000;
445: pt->user.tv_sec++;
446: }
447:
448:
449:
450: /**/
451: tms = &rus->ru_stime, tme = &rue->ru_stime;
452: sec = tme->tv_sec - tms->tv_sec,
453: usec = tme->tv_usec - tms->tv_usec;
454: if ( usec < 0 ) usec += 1000000, sec--;
455: /* pt->sys.tv_sec = sec, pt->sys.tv_usec = usec; */
456: pt->sys.tv_sec += sec, pt->sys.tv_usec += usec;
457:
458:
459:
460: if ( pt->sys.tv_usec > 999999 ){
461: pt->sys.tv_usec -= 1000000;
462: pt->sys.tv_sec++;
463: }
464:
465:
466: }
467: #endif /* TIMING */
468:
469: #endif /* TEST */
470:
471: void MNpol_product( d1, C1, d2, C2, Prod, P, Pinv )
472: unsigned int d1, d2;
473: unsigned int C1[], C2[], Prod[], P;
474: double Pinv;
475: {
476: unsigned int i, j;
477: unsigned int c;
478:
479: c = C1[0];
480: AxBmodP( Prod[0], unsigned int, c, C2[0], P, Pinv );
481: for ( i = 1; i <= d2; i++ ) {
482: AxBmodPnostrchk( Prod[i], unsigned int, c, C2[i], P, Pinv );
483: }
484: c = C1[d1];
485: if ( d1 > d2 ) {
486: for ( i = d2 + 1; i < d1; i++ ) Prod[i] = (unsigned int)0;
487: for ( i = 0; i < d2; i++ ) {
488: AxBmodPnostrchk( Prod[i+d1], unsigned int, c, C2[i], P, Pinv );
489: }
490: } else {
491: j = d2 - d1;
492: for ( i = 0; i <= j; i++ ) {
493: AxBplusCmodPnostrchk( Prod[i+d1], unsigned int, c, C2[i], Prod[i+d1], P, Pinv );
494: }
495: for ( ; i < d2; i++ ) {
496: AxBmodPnostrchk( Prod[i+d1], unsigned int, c, C2[i], P, Pinv );
497: }
498: }
499: AxBmodP( Prod[d1+d2], unsigned int, c, C2[d2], P, Pinv );
500: /**/
501: for ( j = 1; j < d1 - 1; j++ ) {
502: c = C1[j];
503: if ( c == (unsigned int)0 ) {
504: Prod[j] = AstrictmodP( Prod[j], P );
505: continue;
506: }
507: AxBplusCmodP( Prod[j], unsigned int, c, C2[0], Prod[j], P, Pinv );
508: for ( i = 1; i <= d2; i++ ) {
509: AxBplusCmodPnostrchk( Prod[i+j], unsigned int, c, C2[i], Prod[i+j], P, Pinv );
510: }
511: }
512: c = C1[j = d1-1];
513: for ( i = 0; i <= d2; i++ ) {
514: AxBplusCmodP( Prod[i+j], unsigned int, c, C2[i], Prod[i+j], P, Pinv );
515: }
516: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>