Annotation of OpenXM_contrib2/asir2018/fft/polmul.c, Revision 1.1
1.1 ! noro 1: /*
! 2: * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
! 3: * All rights reserved.
! 4: *
! 5: * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
! 6: * non-exclusive and royalty-free license to use, copy, modify and
! 7: * redistribute, solely for non-commercial and non-profit purposes, the
! 8: * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
! 9: * conditions of this Agreement. For the avoidance of doubt, you acquire
! 10: * only a limited right to use the SOFTWARE hereunder, and FLL or any
! 11: * third party developer retains all rights, including but not limited to
! 12: * copyrights, in and to the SOFTWARE.
! 13: *
! 14: * (1) FLL does not grant you a license in any way for commercial
! 15: * purposes. You may use the SOFTWARE only for non-commercial and
! 16: * non-profit purposes only, such as academic, research and internal
! 17: * business use.
! 18: * (2) The SOFTWARE is protected by the Copyright Law of Japan and
! 19: * international copyright treaties. If you make copies of the SOFTWARE,
! 20: * with or without modification, as permitted hereunder, you shall affix
! 21: * to all such copies of the SOFTWARE the above copyright notice.
! 22: * (3) An explicit reference to this SOFTWARE and its copyright owner
! 23: * shall be made on your publication or presentation in any form of the
! 24: * results obtained by use of the SOFTWARE.
! 25: * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
! 26: * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
! 27: * for such modification or the source code of the modified part of the
! 28: * SOFTWARE.
! 29: *
! 30: * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
! 31: * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
! 32: * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
! 33: * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
! 34: * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
! 35: * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
! 36: * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
! 37: * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
! 38: * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
! 39: * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
! 40: * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
! 41: * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
! 42: * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
! 43: * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
! 44: * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
! 45: * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
! 46: * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
! 47: *
! 48: * $OpenXM$
! 49: */
! 50: #include "dft.h"
! 51: extern struct PrimesS Primes[];
! 52:
! 53: /*
! 54: #define TIMING
! 55: */
! 56:
! 57: #ifdef TIMING
! 58:
! 59: #define MAXTIMING 20
! 60:
! 61: #include <sys/time.h>
! 62: #include <sys/resource.h>
! 63: #if 0
! 64: #if define(hitm)
! 65: #include <machine/xclock.h>
! 66: #endif
! 67: #endif
! 68:
! 69: static struct rusage ru_time[MAXTIMING];
! 70:
! 71: struct timing {
! 72: struct timeval user, sys;
! 73: };
! 74: static struct timing time_duration[MAXTIMING];
! 75:
! 76: #define RECORD_TIME(i) getrusage( RUSAGE_SELF, &ru_time[i] )
! 77: #endif /* TIMING */
! 78:
! 79: void FFT_primes(Num, p_prime, p_primroot, p_d)
! 80: int Num, *p_prime, *p_primroot, *p_d;
! 81: {
! 82: *p_prime = Primes[Num].prime;
! 83: *p_primroot = Primes[Num].primroot;
! 84: *p_d = Primes[Num].d;
! 85: }
! 86:
! 87: int FFT_pol_square( d1, C1, Prod, MinMod, wk)
! 88: unsigned int d1;
! 89: int MinMod;
! 90: unsigned int C1[], Prod[], wk[];
! 91: {
! 92:
! 93: unsigned int Low0bits;
! 94: unsigned int Proot, Prime;
! 95: double Pinv;
! 96: void MNpol_square_DFT();
! 97:
! 98: if ( MinMod < 0 || MinMod > NPrimes - 1 ) return 2;
! 99: Prime = Primes[MinMod].prime;
! 100: Proot = Primes[MinMod].primroot;
! 101: Low0bits = Primes[MinMod].d;
! 102: Pinv = (((double)1.0)/((double)Prime));
! 103:
! 104: MNpol_square_DFT( d1, C1, Prod, Proot, Low0bits, Prime, Pinv, wk );
! 105:
! 106: return 0;
! 107: }
! 108:
! 109: int FFT_pol_product( d1, C1, d2, C2, Prod, MinMod, wk)
! 110: unsigned int d1, d2;
! 111: int MinMod;
! 112: unsigned int C1[], C2[], Prod[], wk[];
! 113: {
! 114:
! 115: unsigned int Low0bits;
! 116: unsigned int Proot, Prime;
! 117: double Pinv;
! 118: void MNpol_product_DFT();
! 119:
! 120: if ( MinMod < 0 || MinMod > NPrimes - 1 ) return 2;
! 121: Prime = Primes[MinMod].prime;
! 122: Proot = Primes[MinMod].primroot;
! 123: Low0bits = Primes[MinMod].d;
! 124: Pinv = (((double)1.0)/((double)Prime));
! 125: MNpol_product_DFT( d1, C1, d2, C2, Prod, Proot, Low0bits, Prime, Pinv, wk );
! 126: return 0;
! 127: }
! 128:
! 129: struct oEGT {
! 130: double exectime,gctime;
! 131: };
! 132:
! 133: extern struct oEGT eg_fore,eg_back;
! 134:
! 135: void get_eg(struct oEGT *);
! 136: void add_eg(struct oEGT *,struct oEGT *, struct oEGT *);
! 137:
! 138: void MNpol_product_DFT( d1, C1, d2, C2, Prod, a, low0s, P, Pinv, wk )
! 139: unsigned int d1, d2, low0s;
! 140: unsigned int C1[], C2[], Prod[], a, P, wk[];
! 141: double Pinv;
! 142: /*
! 143: * The amount of space of wk[] must be >= (11/2)*2^{\lceil \log_2(d1+d2+1) \rceil}.
! 144: */
! 145: {
! 146: int i, d, n, log2n, halfn;
! 147: unsigned int *dft1, *dft2, *dftprod, *powa, *true_wk, ninv;
! 148: struct oEGT eg0,eg1;
! 149:
! 150: d = d1 + d2;
! 151: /**/
! 152: for ( n = 2, log2n = 1; n <= d; n <<= 1) log2n++;
! 153: halfn = n >> 1;
! 154: dft1 = wk, dft2 = &wk[n];
! 155: dftprod = &dft2[n];
! 156: powa = &dftprod[n];
! 157: true_wk = &powa[halfn]; /* 2*n areas */
! 158: /**/
! 159: C_PREP_ALPHA( a, low0s, log2n, halfn -1, powa, P, Pinv );
! 160: #ifdef TIMING
! 161: RECORD_TIME(1);
! 162: #endif
! 163: /**/
! 164: get_eg(&eg0);
! 165: C_DFT_FORE( C1, d1 + 1, 1, log2n, powa, dft1, 1, P, Pinv, true_wk );
! 166: #ifdef TIMING
! 167: RECORD_TIME(2);
! 168: #endif
! 169: C_DFT_FORE( C2, d2 + 1, 1, log2n, powa, dft2, 1, P, Pinv, true_wk );
! 170: get_eg(&eg1); add_eg(&eg_fore,&eg0,&eg1);
! 171: #ifdef TIMING
! 172: RECORD_TIME(3);
! 173: #endif
! 174: /**/
! 175: for ( i = 0; i < n; i++ ) {
! 176: AxBmodP( dftprod[i], unsigned int, dft1[i], dft2[i], P, Pinv );
! 177: }
! 178: #ifdef TIMING
! 179: RECORD_TIME(4);
! 180: #endif
! 181: /**/
! 182: ninv = P - (unsigned int)(((int)P) >> log2n);
! 183: get_eg(&eg0);
! 184: C_DFT_BACK( dftprod, n, 1, log2n, powa, Prod, 1, 0, d + 1, ninv, P, Pinv, true_wk );
! 185: get_eg(&eg1); add_eg(&eg_back,&eg0,&eg1);
! 186: }
! 187:
! 188: void MNpol_square_DFT( d1, C1, Prod, a, low0s, P, Pinv, wk )
! 189: unsigned int d1, low0s;
! 190: unsigned int C1[], Prod[], a, P, wk[];
! 191: double Pinv;
! 192: /*
! 193: * The amount of space of wk[] must be >= (11/2)*2^{\lceil \log_2(d1+d2+1) \rceil}.
! 194: */
! 195: {
! 196: int i, d, n, log2n, halfn;
! 197: unsigned int *dft1, *dft2, *dftprod, *powa, *true_wk, ninv;
! 198: struct oEGT eg0,eg1;
! 199:
! 200: d = d1 + d1;
! 201: /**/
! 202: for ( n = 2, log2n = 1; n <= d; n <<= 1) log2n++;
! 203: halfn = n >> 1;
! 204: dft1 = wk, dft2 = &wk[n];
! 205: dftprod = &dft2[n];
! 206: powa = &dftprod[n];
! 207: true_wk = &powa[halfn]; /* 2*n areas */
! 208: /**/
! 209: C_PREP_ALPHA( a, low0s, log2n, halfn -1, powa, P, Pinv );
! 210: #ifdef TIMING
! 211: RECORD_TIME(1);
! 212: #endif
! 213: /**/
! 214: get_eg(&eg0);
! 215: C_DFT_FORE( C1, d1 + 1, 1, log2n, powa, dft1, 1, P, Pinv, true_wk );
! 216: get_eg(&eg1); add_eg(&eg_fore,&eg0,&eg1);
! 217: /**/
! 218: for ( i = 0; i < n; i++ ) {
! 219: AxBmodP( dftprod[i], unsigned int, dft1[i], dft1[i], P, Pinv );
! 220: }
! 221: #ifdef TIMING
! 222: RECORD_TIME(4);
! 223: #endif
! 224: /**/
! 225: ninv = P - (unsigned int)(((int)P) >> log2n);
! 226: get_eg(&eg0);
! 227: C_DFT_BACK( dftprod, n, 1, log2n, powa, Prod, 1, 0, d + 1, ninv, P, Pinv, true_wk );
! 228: get_eg(&eg1); add_eg(&eg_back,&eg0,&eg1);
! 229: }
! 230:
! 231: /****************** TEST CODE ****************/
! 232:
! 233: /*
! 234: #define TEST
! 235: */
! 236: #ifdef TEST
! 237:
! 238: #define RETI 100
! 239:
! 240: #include <stdio.h>
! 241:
! 242: #define Prime 65537 /* 2^16 + 1 */
! 243: #define Low0bits 16
! 244: #define PrimRoot 3
! 245:
! 246: #define PInv (((double)1.0)/((double)Prime))
! 247:
! 248: #define MaxNPnts (1<<Low0bits)
! 249:
! 250: static unsigned int Pol1[MaxNPnts], Pol2[MaxNPnts], PolProdDFT[MaxNPnts], PolProd[MaxNPnts], wk[6*MaxNPnts];
! 251:
! 252: static void generate_pol(), dump_pol(), compare_vals();
! 253:
! 254: #ifdef TIMING
! 255: static void time_spent();
! 256:
! 257: #define pr_timing(pTiming) fprintf(stderr, "user:%3d.%06d + sys:%3d.%06d = %3d.%06d", \
! 258: (pTiming)->user.tv_sec,(pTiming)->user.tv_usec, \
! 259: (pTiming)->sys.tv_sec,(pTiming)->sys.tv_usec, \
! 260: (pTiming)->user.tv_sec+(pTiming)->sys.tv_sec, \
! 261: (pTiming)->user.tv_usec+(pTiming)->sys.tv_usec)
! 262:
! 263: #define Pr_timing(i) pr_timing(&time_duration[i])
! 264: #endif /* TIMING */
! 265: void main( argc, argv )
! 266: int argc;
! 267: char **argv;
! 268: {
! 269: int i, ac = argc - 1, d1, d2, d;
! 270:
! 271: int ii, *error;
! 272:
! 273: char **av = &argv[1];
! 274: unsigned int mnP = (unsigned int)Prime, mnProot = (unsigned int)PrimRoot;
! 275: double Pinv = PInv;
! 276:
! 277: for ( ; ac >= 2; ac -= 2, av += 2 ) {
! 278: d = (d1 = atoi( av[0] )) + (d2 = atoi( av[1] ));
! 279: if ( d1 <= 0 || d2 <= 0 || d >= MaxNPnts ) {
! 280: fprintf( stderr, "*** invalid degrees %s & %s. (sum < %d)\n",
! 281: av[0], av[1], MaxNPnts );
! 282: continue;
! 283: }
! 284: generate_pol( d1, Pol1 );
! 285: /* dump_pol( d1, Pol1 ); */
! 286: generate_pol( d2, Pol2 );
! 287: /* dump_pol( d2, Pol2 ); */
! 288: /**/
! 289:
! 290:
! 291:
! 292: for ( ii=0; ii < RETI; ii++ ){
! 293: #ifdef TIMING
! 294: RECORD_TIME(0);
! 295: #endif
! 296: MNpol_product_DFT( d1, Pol1, d2, Pol2, PolProdDFT, mnProot, Low0bits, mnP, Pinv, wk );
! 297: #ifdef TIMING
! 298: RECORD_TIME(5);
! 299: time_spent( 0, 1, 0 );
! 300: time_spent( 1, 2, 1 );
! 301: time_spent( 2, 3, 2 );
! 302: time_spent( 3, 4, 3 );
! 303: time_spent( 4, 5, 4 );
! 304: time_spent( 0, 5, 5 );
! 305: #endif
! 306: }
! 307:
! 308:
! 309: fprintf( stderr, "DFT mul done ( %d times )\n", RETI );
! 310:
! 311:
! 312:
! 313:
! 314:
! 315: for ( ii=0; ii < RETI; ii++ ){
! 316: #ifdef TIMING
! 317: RECORD_TIME(10);
! 318: #endif
! 319: MNpol_product( d1, Pol1, d2, Pol2, PolProd, mnP, Pinv );
! 320: #ifdef TIMING
! 321: RECORD_TIME(11);
! 322: time_spent( 10, 11, 10 );
! 323: #endif
! 324: }
! 325:
! 326:
! 327:
! 328: fprintf( stderr, "mul by classical alg. done ( %d times )\n", RETI );
! 329: /**/
! 330:
! 331:
! 332: /* PolProdDFT[20] = 0.0; */
! 333:
! 334:
! 335: compare_vals( PolProdDFT, PolProd, d+1 , error);
! 336: if ( *error == 0)
! 337: fprintf( stderr, "******* Result is OK *******\n" );
! 338: else
! 339: fprintf( stderr, "******* Result is NG *******\n" );
! 340:
! 341: /**/
! 342: #ifdef TIMING
! 343: fprintf( stderr, "DFT mul prep: " ); Pr_timing( 0 );
! 344: fprintf( stderr, "\nDFT transform 1: " ); Pr_timing( 1 );
! 345: fprintf( stderr, "\nDFT transform 2: " ); Pr_timing( 2 );
! 346: fprintf( stderr, "\nDFT mult: " ); Pr_timing( 3 );
! 347: fprintf( stderr, "\nDFT inv trans: " ); Pr_timing( 4 );
! 348: fprintf( stderr, "\nDFT pol-mult Tot: " ); Pr_timing( 5 );
! 349: /**/
! 350: fprintf( stderr, "\nClassical mult: " ); Pr_timing( 10 );
! 351: fprintf( stderr, "\n" );
! 352: #endif /* TIMING */
! 353: }
! 354: }
! 355:
! 356: static void generate_pol( d, cf )
! 357: int d;
! 358: unsigned int cf[];
! 359: {
! 360: unsigned int mnP = (unsigned int)Prime;
! 361: int i, c;
! 362:
! 363: for ( i = 0; i < d; i++ ) cf[i] = random() % Prime;
! 364: while ( (c = random() % Prime) == 0 ) ;
! 365: cf[d] = c;
! 366: }
! 367:
! 368: static void dump_pol( d, cf )
! 369: int d;
! 370: unsigned int cf[];
! 371: {
! 372: int i;
! 373:
! 374: printf( "Pol of degree %d over Z/(%d) :\n", d, Prime );
! 375: for ( i = 0; i <= d; i++ ) {
! 376: if ( (i%5) == 0 ) printf( "\t" );
! 377: printf( "%10d,", (int)cf[i] );
! 378: if ( (i%5) == 4 ) printf( "\n" );
! 379: }
! 380: if ( (i%5) != 0 ) printf( "\n" );
! 381: }
! 382:
! 383: void compare_vals( v1, v2, n , error)
! 384: unsigned int v1[], v2[];
! 385: int n;
! 386: int *error;
! 387: {
! 388: int i, tmp;
! 389:
! 390: *error = 0;
! 391:
! 392: for ( i = 0; i < n; i ++) {
! 393: tmp = (int)v1[i] - (int)v2[i];
! 394: tmp = abs(tmp);
! 395: if ( tmp > *error ) *error = tmp;
! 396: }
! 397:
! 398:
! 399:
! 400: /*
! 401: int i, j;
! 402:
! 403:
! 404:
! 405: for ( i = 0; i < n; i += 5 ) {
! 406: printf( " %6d:%10d", i, (int)v1[i] );
! 407: for ( j = 1; j < 5 && i + j < n; j++ ) printf( ",%10d", (int)v1[i+j] );
! 408: printf( "\n" );
! 409: if ( (int)v1[i] == (int)v2[i] && (int)v1[i+1] == (int)v2[i+1]
! 410: && (int)v1[i+2] == (int)v2[i+2] && (int)v1[i+3] == (int)v2[i+3]
! 411: && (int)v1[i+4] == (int)v2[i+4] ) continue;
! 412: printf( " " );
! 413: for ( j = 0; j < 5 && i + j < n; j++ )
! 414: if ( (int)v1[i+j] == (int)v2[i+j] ) printf( " " );
! 415: else printf( "%10d,", (int)v2[i+j] );
! 416: printf( "\n" );
! 417: }
! 418: */
! 419:
! 420:
! 421:
! 422: }
! 423:
! 424: #ifdef TIMING
! 425:
! 426: static void time_spent( s, e, n )
! 427: int s, e, n;
! 428: {
! 429: struct rusage *rus = &ru_time[s], *rue = &ru_time[e];
! 430: struct timeval *tms, *tme;
! 431: long sec, usec;
! 432: struct timing *pt = &time_duration[n];
! 433:
! 434: tms = &rus->ru_utime, tme = &rue->ru_utime;
! 435: sec = tme->tv_sec - tms->tv_sec,
! 436: usec = tme->tv_usec - tms->tv_usec;
! 437: if ( usec < 0 ) usec += 1000000, sec--;
! 438: /* pt->user.tv_sec = sec, pt->user.tv_usec = usec; */
! 439: pt->user.tv_sec += sec, pt->user.tv_usec += usec;
! 440:
! 441:
! 442:
! 443: if ( pt->user.tv_usec > 999999 ){
! 444: pt->user.tv_usec -= 1000000;
! 445: pt->user.tv_sec++;
! 446: }
! 447:
! 448:
! 449:
! 450: /**/
! 451: tms = &rus->ru_stime, tme = &rue->ru_stime;
! 452: sec = tme->tv_sec - tms->tv_sec,
! 453: usec = tme->tv_usec - tms->tv_usec;
! 454: if ( usec < 0 ) usec += 1000000, sec--;
! 455: /* pt->sys.tv_sec = sec, pt->sys.tv_usec = usec; */
! 456: pt->sys.tv_sec += sec, pt->sys.tv_usec += usec;
! 457:
! 458:
! 459:
! 460: if ( pt->sys.tv_usec > 999999 ){
! 461: pt->sys.tv_usec -= 1000000;
! 462: pt->sys.tv_sec++;
! 463: }
! 464:
! 465:
! 466: }
! 467: #endif /* TIMING */
! 468:
! 469: #endif /* TEST */
! 470:
! 471: void MNpol_product( d1, C1, d2, C2, Prod, P, Pinv )
! 472: unsigned int d1, d2;
! 473: unsigned int C1[], C2[], Prod[], P;
! 474: double Pinv;
! 475: {
! 476: unsigned int i, j;
! 477: unsigned int c;
! 478:
! 479: c = C1[0];
! 480: AxBmodP( Prod[0], unsigned int, c, C2[0], P, Pinv );
! 481: for ( i = 1; i <= d2; i++ ) {
! 482: AxBmodPnostrchk( Prod[i], unsigned int, c, C2[i], P, Pinv );
! 483: }
! 484: c = C1[d1];
! 485: if ( d1 > d2 ) {
! 486: for ( i = d2 + 1; i < d1; i++ ) Prod[i] = (unsigned int)0;
! 487: for ( i = 0; i < d2; i++ ) {
! 488: AxBmodPnostrchk( Prod[i+d1], unsigned int, c, C2[i], P, Pinv );
! 489: }
! 490: } else {
! 491: j = d2 - d1;
! 492: for ( i = 0; i <= j; i++ ) {
! 493: AxBplusCmodPnostrchk( Prod[i+d1], unsigned int, c, C2[i], Prod[i+d1], P, Pinv );
! 494: }
! 495: for ( ; i < d2; i++ ) {
! 496: AxBmodPnostrchk( Prod[i+d1], unsigned int, c, C2[i], P, Pinv );
! 497: }
! 498: }
! 499: AxBmodP( Prod[d1+d2], unsigned int, c, C2[d2], P, Pinv );
! 500: /**/
! 501: for ( j = 1; j < d1 - 1; j++ ) {
! 502: c = C1[j];
! 503: if ( c == (unsigned int)0 ) {
! 504: Prod[j] = AstrictmodP( Prod[j], P );
! 505: continue;
! 506: }
! 507: AxBplusCmodP( Prod[j], unsigned int, c, C2[0], Prod[j], P, Pinv );
! 508: for ( i = 1; i <= d2; i++ ) {
! 509: AxBplusCmodPnostrchk( Prod[i+j], unsigned int, c, C2[i], Prod[i+j], P, Pinv );
! 510: }
! 511: }
! 512: c = C1[j = d1-1];
! 513: for ( i = 0; i <= d2; i++ ) {
! 514: AxBplusCmodP( Prod[i+j], unsigned int, c, C2[i], Prod[i+j], P, Pinv );
! 515: }
! 516: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>