Annotation of OpenXM_contrib2/asir2018/fft/dft.c, Revision 1.1
1.1 ! noro 1: /*
! 2: * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
! 3: * All rights reserved.
! 4: *
! 5: * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
! 6: * non-exclusive and royalty-free license to use, copy, modify and
! 7: * redistribute, solely for non-commercial and non-profit purposes, the
! 8: * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
! 9: * conditions of this Agreement. For the avoidance of doubt, you acquire
! 10: * only a limited right to use the SOFTWARE hereunder, and FLL or any
! 11: * third party developer retains all rights, including but not limited to
! 12: * copyrights, in and to the SOFTWARE.
! 13: *
! 14: * (1) FLL does not grant you a license in any way for commercial
! 15: * purposes. You may use the SOFTWARE only for non-commercial and
! 16: * non-profit purposes only, such as academic, research and internal
! 17: * business use.
! 18: * (2) The SOFTWARE is protected by the Copyright Law of Japan and
! 19: * international copyright treaties. If you make copies of the SOFTWARE,
! 20: * with or without modification, as permitted hereunder, you shall affix
! 21: * to all such copies of the SOFTWARE the above copyright notice.
! 22: * (3) An explicit reference to this SOFTWARE and its copyright owner
! 23: * shall be made on your publication or presentation in any form of the
! 24: * results obtained by use of the SOFTWARE.
! 25: * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
! 26: * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
! 27: * for such modification or the source code of the modified part of the
! 28: * SOFTWARE.
! 29: *
! 30: * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
! 31: * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
! 32: * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
! 33: * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
! 34: * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
! 35: * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
! 36: * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
! 37: * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
! 38: * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
! 39: * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
! 40: * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
! 41: * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
! 42: * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
! 43: * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
! 44: * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
! 45: * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
! 46: * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
! 47: *
! 48: * $OpenXM$
! 49: */
! 50: /*
! 51: * store arith modulus
! 52: * -------------------------------------------------------
! 53: * IEEE: float (23+1) <-> double (52+1) 24 bits
! 54: * int (32) <-> double (52+1) 26 bits
! 55: * 370: float (24) <-> double (56) 24/25 bits
! 56: * int (32) <-> double (56) 28 bits
! 57: */
! 58:
! 59:
! 60: #include "dft.h"
! 61:
! 62:
! 63: /*
! 64: #define C_DFT_FORE C_DFTfore
! 65: #define C_DFT_BACK C_DFTback
! 66: #define C_PREP_ALPHA C_prep_alpha
! 67: */
! 68:
! 69:
! 70: /*****************************************************/
! 71:
! 72: void C_DFT_FORE( in, nin, i1, K, powa,
! 73: #ifdef POWA_STRIDE
! 74: a1,
! 75: #endif
! 76: out, o1, P, Pinv, wk )
! 77: ModNum in[], powa[], out[], wk[];
! 78: ModNum P;
! 79: int nin, i1, K, o1;
! 80: #ifdef POWA_STRIDE
! 81: int a1;
! 82: #endif
! 83: double Pinv;
! 84: /*
! 85: * Let N = 2^K, and \alpha be a primitive N-th root of 1.
! 86: * out[0:N-1][o1] = DFT of in[0:nin-1][i1] using \alpha modulo P.
! 87: * The powers \alpha (to the degrees upto N/2=2^{K-1} at least) are given in powa[*],
! 88: * (or powa[a1**]).
! 89: * nin <= N assumed.
! 90: * wk[] must have 2^K entries at least.
! 91: */
! 92: {
! 93: int istep, ostep, i, j, k, idist, odist, n, Ndiv2n, K_k,
! 94: #ifndef OPT
! 95: K_k_1,
! 96: #endif
! 97: #ifdef POWA_STRIDE
! 98: aj,
! 99: #endif
! 100: inwk;
! 101: int idistwk, idistout, iostep, oostep;
! 102: ModNum *qi, *qo, *qis, *qos, a, t0, tn;
! 103:
! 104: /*
! 105: * for k = K-1, ..., 0 do the following:
! 106: *
! 107: * Out[2^{K-k}*i + j] <= t0 + \alpha^{2^k * j} * tn;
! 108: * Out[2^{K-k}*i + j + 2^{K-k-1}] <= t0 - \alpha^{2^k * j} * tn;
! 109: *
! 110: * where t0 = In[2^{K-k-1}*i + j] and
! 111: * tn = In[2^{K-k-1}*i + j + 2^{K-1}],
! 112: * for 0 <= i < 2^k and 0 <= j < 2^{K-k-1}.
! 113: *
! 114: */
! 115: /* In the following, n = 2^k and Ndiv2n = 2^{K-k-1}.
! 116: */
! 117: n = 1 << (k = K - 1);
! 118: idistwk = n, idistout = o1 << k;
! 119: /* when k = K-1, Out[2*i] <= In[i] + In[i+2^{K-1}],
! 120: * and Out[2*i+1] <= In[i] - In[i+2^{K-1}], for 0 <= i < 2^{K-1}.
! 121: */
! 122: if ( k&1 ) qo = wk, ostep = 2, inwk = 1, odist = 1;
! 123: else qo = out, ostep = o1 << 1, inwk = 0, odist = o1;
! 124: qi = in;
! 125: /**/
! 126: if ( nin <= n ) {
! 127: for ( i = nin; i > 0; i--, qi += i1, qo += ostep ) qo[0] = qo[odist] = qi[0];
! 128: for ( i = n - nin; i > 0; i--, qo += ostep ) qo[0] = qo[odist] = 0;
! 129: } else {
! 130: idist = i1 << k; /* distance = 2^{K-1} */
! 131: for ( j = i = nin - n; i > 0; i--, qi += i1, qo += ostep ) {
! 132: t0 = qi[0], tn = qi[idist];
! 133: if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
! 134: else {
! 135: qo[0] = AplusBmodP( t0, tn, P );
! 136: qo[odist] = A_BmodP( t0, tn, P );
! 137: }
! 138: }
! 139: for ( i = n - j; i > 0; i--, qi += i1, qo += ostep )
! 140: qo[0] = qo[odist] = qi[0];
! 141: }
! 142: /******/
! 143: #ifndef J_INNER
! 144: for ( K_k = 1, Ndiv2n = 1; --k > 0; inwk = 1 - inwk ) {
! 145: #ifndef OPT
! 146: K_k = (K_k_1 = K_k) + 1;
! 147: #endif
! 148: n >>= 1; /* == 2^k */
! 149: Ndiv2n <<= 1; /* == 2^{K-k-1} */
! 150: if ( inwk )
! 151: qi = wk, qo = out, iostep = 1, oostep = o1, idist = idistwk;
! 152: else
! 153: qi = out, qo = wk, iostep = o1, oostep = 1, idist = idistout;
! 154: qis = qi, qos = qo;
! 155: /**/
! 156: istep = ostep; /* = iostep << K_k_1; */
! 157: #ifndef OPT
! 158: ostep = oostep << K_k;
! 159: odist = oostep << K_k_1;
! 160: #else
! 161: odist = oostep << K_k;
! 162: ++K_k;
! 163: ostep = oostep << K_k;
! 164: #endif /* OPT */
! 165: /* j = 0 */
! 166: for ( i = n; i-- > 0; qi += istep, qo += ostep ) {
! 167: t0 = qi[0], tn = qi[idist];
! 168: if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
! 169: else {
! 170: qo[0] = AplusBmodP( t0, tn, P );
! 171: qo[odist] = A_BmodP( t0, tn, P );
! 172: }
! 173: }
! 174: /**/
! 175: #ifdef POWA_STRIDE
! 176: for ( aj = a1, j = 1; j < Ndiv2n; aj += a1, j++ ) {
! 177: a = powa[aj << k];
! 178: #else
! 179: for ( j = 1; j < Ndiv2n; j++ ) {
! 180: a = powa[j << k];
! 181: #endif
! 182: qi = (qis += iostep), qo = (qos += oostep);
! 183: for ( i = 0; i < n; i++, qi += istep, qo += ostep ) {
! 184: t0 = qi[0], tn = qi[idist];
! 185: if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
! 186: else {
! 187: AxBmodP( tn, ModNum, tn, a, P, Pinv );
! 188: qo[0] = AplusBmodP( t0, tn, P );
! 189: qo[odist] = A_BmodP( t0, tn, P );
! 190: }
! 191: }
! 192: }
! 193: }
! 194: /*
! 195: * When k = 0, for i = 0 (0 <= i < 2^0=1) and 0 <= j < 2^{K-1},
! 196: * Out[j] <= t0 + \alpha^{j}*tn and Out[j+2^{K-1}] <= t0 - \alpha^{j}*tn,
! 197: * where t0 = In[j] and tn = In[j + 2^{K-1}].
! 198: */
! 199: qi = wk, qo = out, idist = idistwk, odist = idistout;
! 200: /* == 2^{K-1}, == o1 << (K-1) */
! 201: t0 = qi[0], tn = qi[idist];
! 202: if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
! 203: else {
! 204: qo[0] = AplusBmodP( t0, tn, P );
! 205: qo[odist] = A_BmodP( t0, tn, P );
! 206: }
! 207: #ifdef POWA_STRIDE
! 208: for ( k = o1, aj = a1, j = 1; j < idist; aj += a1, j++ ) {
! 209: #else
! 210: for ( k = o1, j = 1; j < idist; j++ ) {
! 211: #endif
! 212: qi++, qo += k;
! 213: t0 = qi[0], tn = qi[idist];
! 214: if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
! 215: else {
! 216: #ifdef POWA_STRIDE
! 217: a = powa[aj];
! 218: #else
! 219: a = powa[j];
! 220: #endif
! 221: AxBmodP( tn, ModNum, tn, a, P, Pinv );
! 222: qo[0] = AplusBmodP( t0, tn, P );
! 223: qo[odist] = A_BmodP( t0, tn, P );
! 224: }
! 225: }
! 226: #else
! 227: for ( K_k = 1, Ndiv2n = 1; --k >= 0; inwk = 1 - inwk ) {
! 228: #ifndef OPT
! 229: K_k = (K_k_1 = K_k) + 1;
! 230: #endif
! 231: n >>= 1; /* == 2^k */
! 232: Ndiv2n <<= 1; /* == 2^{K-k-1} */
! 233: if ( inwk ) qi = wk, qo = out, istep = 1, ostep = o1, idist = idistwk;
! 234: else qi = out, qo = wk, istep = o1, ostep = 1, idist = idistout;
! 235: qis = qi, qos = qo;
! 236: /**/
! 237: iostep = oostep; /* = istep << K_k_1; */
! 238: #ifndef OPT
! 239: oostep = ostep << K_k;
! 240: odist = ostep << K_k_1;
! 241: #else
! 242: odist = ostep << K_k;
! 243: ++K_k;
! 244: oostep = ostep << K_k;
! 245: #endif /* OPT */
! 246: for ( i = n; i-- > 0; qis += iostep, qos += oostep ) {
! 247: /* j = 0 */
! 248: t0 = qis[0], tn = qis[idist];
! 249: if ( tn == (ModNum)0 ) qos[0] = qos[odist] = t0;
! 250: else {
! 251: qos[0] = AplusBmodP( t0, tn, P );
! 252: qos[odist] = A_BmodP( t0, tn, P );
! 253: }
! 254: #ifdef POWA_STRIDE
! 255: for ( aj = a1, j = 1, qi = qis, qo = qos; j < Ndiv2n; aj += a1, j++ ) {
! 256: #else
! 257: for ( j = 1, qi = qis, qo = qos; j < Ndiv2n; j++ ) {
! 258: #endif
! 259: qi += istep, qo += ostep;
! 260: t0 = qi[0], tn = qi[idist];
! 261: if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
! 262: else {
! 263: #ifdef POWA_STRIDE
! 264: a = powa[aj << k];
! 265: #else
! 266: a = powa[j << k];
! 267: #endif
! 268: AxBmodP( tn, ModNum, tn, a, P, Pinv );
! 269: qo[0] = AplusBmodP( t0, tn, P );
! 270: qo[odist] = A_BmodP( t0, tn, P );
! 271: }
! 272: }
! 273: }
! 274: }
! 275: #endif
! 276: }
! 277:
! 278: /*********************************************************************
! 279: *
! 280: * NOTE:
! 281: * (1) Let \alpha be a primitive N-th root of unity modulo P,
! 282: * where N is even,
! 283: * and t be an integer s.t. 0 <= t < N/2.
! 284: * Then, 1/\alpha^t is given by (- \alpha^s) with s = N/2 - t.
! 285: *
! 286: * (2) Let P be a prime s.t. P = 2^n*Q + 1, where Q is an odd integer.
! 287: * Then, for 0 < s <= n, 2^{-s} \equiv - 2^{n-s}*Q \bmod P,
! 288: * because 1 \equiv - 2^n*Q \bmod P.
! 289: *
! 290: **********************************************************************/
! 291:
! 292: void C_DFT_BACK( in, N, i1, log2N, powa,
! 293: #ifdef POWA_STRIDE
! 294: a1,
! 295: #endif
! 296: out, o1, osi, nout, Ninv, P, Pinv, wk )
! 297: ModNum in[], powa[], out[], wk[];
! 298: int N, log2N, osi, nout, i1, o1;
! 299: #ifdef POWA_STRIDE
! 300: int a1;
! 301: #endif
! 302: ModNum P, Ninv;
! 303: double Pinv;
! 304: /*
! 305: * Let K denote log2N. Let N = 2^K, and \alpha be a primitive N-th root of 1.
! 306: * This routine computes the inverse discrete-Fourier transform of in[0:N-1][i1]
! 307: * using \alpha^{-1} modulo P, and store the `nout' transformed data from the
! 308: * `osi'-th in out[0:nout-1][o1]. 0 <= osi < N-1 and 0 < nout <= N.
! 309: * The powers of \alpha (to the degrees upto N/2=2^{K-1} at least) are given in powa[*] (or powa[a1* *]).
! 310: * The area wk[] is used to contain the intermediate transformed data, and thus,
! 311: * must have 2*N entries at least. Notice that `out' cannot be used because
! 312: * its space amount (`nout') may not be sufficient for this purpose.
! 313: */
! 314: {
! 315: int i, j, k, n, is, os, istep, ostep, idist, inwk, halfN;
! 316: #ifdef POWA_STRIDE
! 317: int aj, halfNa;
! 318: #endif
! 319: ModNum *qi, *qo, *qis, *qos, a, t0, tn, tt;
! 320:
! 321: /*
! 322: * for k = K-1, ..., 0 do the following:
! 323: *
! 324: * Out[2^{K-k}*i + j] <= t0 + \alpha^{- 2^k * j} * tn;
! 325: * = t0 - \alpha^{N/2 - 2^k * j}*tn;
! 326: * Out[2^{K-k}*i + j + 2^{K-k-1}] <= t0 - \alpha^{- 2^k * j} * tn;
! 327: * = t0 + \alpha^{N/2 - 2^k * j}*tn;
! 328: *
! 329: * where t0 = In[2^{K-k-1}*i + j] and
! 330: * tn = In[2^{K-k-1}*i + j + 2^{K-1}],
! 331: * for 0 <= i < 2^k and 0 <= j < 2^{K-k-1}.
! 332: *
! 333: */
! 334: if ( log2N == 1 ) {
! 335: /* K = 1, k = 0, 0 <= i < 1, 0 <= j < 1.
! 336: * Out[0] <= t0 + tn, Out[1] <= t0 - tn,
! 337: * where t0 = In[0] and Tn = In[1].
! 338: */
! 339: t0 = in[0], tn = in[i1];
! 340: if ( osi == 0 ) {
! 341: /* out[0] = (t0 + tn)*Ninv; */
! 342: tt = tn == (ModNum)0 ? t0 : AplusBmodP( t0, tn, P );
! 343: if ( tt != (ModNum)0 ) {
! 344: AxBmodP( tt, ModNum, tt, Ninv, P, Pinv );
! 345: }
! 346: out[0] = tt;
! 347: i = 1;
! 348: } else i = 0;
! 349: /**/
! 350: if ( osi + nout >= 2 ) {
! 351: /* out[osi == 0 ? 1 : 0] = (t0 - tn)*Ninv; */
! 352: tt = tn == (ModNum)0 ? t0 : A_BmodP( t0, tn, P );
! 353: if ( tt != (ModNum)0 ) {
! 354: AxBmodP( tt, ModNum, tt, Ninv, P, Pinv );
! 355: }
! 356: out[i] = tt;
! 357: }
! 358: return;
! 359: }
! 360: /****/
! 361: halfN = n = 1 << (k = log2N - 1); /* == 2^{K-1} */
! 362: #ifdef POWA_STRIDE
! 363: halfNa = a1 << k;
! 364: #endif
! 365: /*
! 366: * when k = K-1,
! 367: * Out[2*i] <= t0 + tn, and Out[2*i+1] <= t0 - tn,
! 368: * where t0 = In[i] and tn = In[i+2^{K-1}],
! 369: * for 0 <= i < 2^{K-1}.
! 370: */
! 371: qi = in, istep = i1, idist = i1 << k, qo = wk, inwk = 0;
! 372: for ( i = n; i-- > 0; qi += istep, qo += 2 ) {
! 373: t0 = qi[0], tn = qi[idist];
! 374: if ( tn == (ModNum)0 ) qo[0] = qo[1] = t0;
! 375: else {
! 376: qo[0] = AplusBmodP( t0, tn, P );
! 377: qo[1] = A_BmodP( t0, tn, P );
! 378: }
! 379: }
! 380: #ifdef EBUG
! 381: fprintf( stderr, "::: DFT^{-1} ::: after the first step\n" );
! 382: for ( qo = wk, i = 0; i < N; i++ ) {
! 383: if ( (i%5) == 0 ) fprintf( stderr, "\t" );
! 384: fprintf( stderr, "%10d,", (int)wk[i] );
! 385: if ( (i%5) == 4 ) fprintf( stderr, "\n" );
! 386: }
! 387: if ( (i%5) != 0 ) fprintf( stderr, "\n" );
! 388: #endif
! 389: /**/
! 390: idist = halfN;
! 391: for ( ostep = 2; --k > 0; inwk = 1 - inwk ) {
! 392: n >>= 1;
! 393: istep = ostep; /* == 2^{K-k-1} */
! 394: ostep <<= 1; /* == 2^{K-k} */
! 395: if ( inwk ) qi = &wk[N], qo = wk;
! 396: else qi = wk, qo = &wk[N];
! 397: qis = qi, qos = qo;
! 398: /*
! 399: * for j = 0,
! 400: * Out[2^{K-k}*i] <= t0 + tn, and Out[2^{K-k}*i + 2^{K-k-1}] <= t0 - tn,
! 401: * where t0 = In[2^{K-k-1}*i] and tn = In[2^{K-k-1}*i + 2^{K-1}].
! 402: */
! 403: for ( i = n, is = os = 0; i-- > 0; qi += istep, qo += ostep ) {
! 404: t0 = qi[0], tn = qi[idist];
! 405: if ( tn == (ModNum)0 ) qo[0] = qo[istep] = t0;
! 406: else {
! 407: qo[0] = AplusBmodP( t0, tn, P );
! 408: qo[istep] = A_BmodP( t0, tn, P );
! 409: }
! 410: }
! 411: #ifdef POWA_STRIDE
! 412: for ( aj = a1, j = 1; j < istep; aj += a1, j++ ) {
! 413: /*** a = P - powa[halfNa - (aj << k)]; ***/
! 414: a = powa[halfNa - (aj << k)];
! 415: #else
! 416: for ( j = 1; j < istep; j++ ) {
! 417: /*** a = P - powa[halfN - (j << k)]; ***/
! 418: a = powa[halfN - (j << k)];
! 419: #endif
! 420: qi = ++qis, qo = ++qos;
! 421: for ( i = n; i-- > 0; qi += istep, qo += ostep ) {
! 422: t0 = qi[0], tn = qi[idist];
! 423: if ( tn == (ModNum)0 ) qo[0] = qo[istep] = t0;
! 424: else {
! 425: AxBmodP( tn, ModNum, tn, a, P, Pinv );
! 426: /*** qo[0] = AplusBmodP( t0, tn, P ); ***/
! 427: /*** qo[istep] = A_BmodP( t0, tn, P ); ***/
! 428: qo[0] = A_BmodP( t0, tn, P );
! 429: qo[istep] = AplusBmodP( t0, tn, P );
! 430: }
! 431: }
! 432: }
! 433: }
! 434: #if 0
! 435: final:
! 436: #endif
! 437: /*
! 438: * The final step of k = 0. i can take only the value 0.
! 439: *
! 440: * Out[j] <= t0 + \alpha^{-j} * tn;
! 441: * Out[j + 2^{K-1}] <= t0 - \alpha^{-j} * tn;
! 442: *
! 443: * where t0 = In[j] and tn = In[j + 2^{K-1}],
! 444: * for 0 <= j < 2^{K-1}.
! 445: *
! 446: */
! 447: qo = out, qi = &wk[inwk ? N : 0];
! 448: if ( (n = osi + nout) > N ) nout = (n = N) - osi; /* true nout */
! 449: if ( (k = nout - halfN) <= 0 ) { /* no overlap, i.e., no +- */
! 450: if ( osi <= 0 ) {
! 451: t0 = qi[0], tn = qi[idist];
! 452: if ( tn != (ModNum)0 ) t0 = AplusBmodP( t0, tn, P );
! 453: if ( t0 == (ModNum)0 ) qo[0] = t0;
! 454: else {
! 455: AxBmodP( qo[0], ModNum, t0, Ninv, P, Pinv );
! 456: }
! 457: #ifdef POWA_STRIDE
! 458: aj = a1;
! 459: #endif
! 460: j = 1, k = n, i = 0, qo += o1;
! 461: } else {
! 462: j = osi;
! 463: #ifdef POWA_STRIDE
! 464: aj = osi*a1;
! 465: #endif
! 466: if ( n <= halfN ) i = 0, k = n; /* lower only */
! 467: else if ( j == halfN ) goto L_halfN; /* start with j = N/2 */
! 468: else if ( j > halfN ) goto L_upper; /* upper only */
! 469: else i = n - halfN + 1, k = halfN; /* we have lower, and i upper */
! 470: }
! 471: } else {
! 472: os = o1*halfN;
! 473: #ifdef EBUG
! 474: fprintf( stderr, "::::: DFT^{-1} ::: os=%d*%d=%d\n", o1, halfN, os );
! 475: #endif
! 476: if ( osi <= 0 ) {
! 477: t0 = qi[0], tn = qi[idist];
! 478: if ( tn == (ModNum)0 ) {
! 479: if ( t0 == (ModNum)0 ) tt = t0;
! 480: else { AxBmodP( tt, ModNum, t0, Ninv, P, Pinv ); }
! 481: qo[0] = qo[os] = tt;
! 482: } else {
! 483: tt = AplusBmodP( t0, tn, P );
! 484: if ( tt == (ModNum)0 ) qo[0] = tt;
! 485: else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
! 486: tt = A_BmodP( t0, tn, P );
! 487: if ( tt == (ModNum)0 ) qo[os] = tt;
! 488: else { AxBmodP( qo[os], ModNum, tt, Ninv, P, Pinv ); }
! 489: }
! 490: #ifdef EBUG
! 491: fprintf( stderr, "::::: DFT^{-1} ::: [0]=%d, [%d]=%d\n", (int)qo[0], os, (int)qo[os] );
! 492: #endif
! 493: #ifdef POWA_STRIDE
! 494: j = 1, aj = a1, n = halfN, i = 0, qo += o1, k--;
! 495: } else j = osi, aj = osi*a1, i = (n -= k) - halfN;
! 496: /**/
! 497: for ( ; k-- > 0; aj += a1, j++, qo += o1 ) {
! 498: #else
! 499: j = 1, n = halfN, i = 0, qo += o1, k--;
! 500: } else j = osi, i = (n -= k) - halfN;
! 501: /**/
! 502: for ( ; k-- > 0; j++, qo += o1 ) {
! 503: #endif
! 504: t0 = qi[j], tn = qi[j+idist];
! 505: if ( tn == (ModNum)0 ) {
! 506: if ( t0 != (ModNum)0 ) {
! 507: AxBmodP( t0, ModNum, t0, Ninv, P, Pinv );
! 508: }
! 509: qo[0] = qo[os] = t0;
! 510: } else {
! 511: #ifdef POWA_STRIDE
! 512: /*** a = P - powa[halfNa - aj]; ***/
! 513: a = powa[halfNa - aj];
! 514: #else
! 515: /*** a = P - powa[halfN - j]; ***/
! 516: a = powa[halfN - j];
! 517: #endif
! 518: AxBmodP( tn, ModNum, tn, a, P, Pinv );
! 519: /*** tt = AplusBmodP( t0, tn, P ); ***/
! 520: tt = A_BmodP( t0, tn, P );
! 521: if ( tt == (ModNum)0 ) qo[0] = tt;
! 522: else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
! 523: /*** tt = A_BmodP( t0, tn, P ); ***/
! 524: tt = AplusBmodP( t0, tn, P );
! 525: if ( tt == (ModNum)0 ) qo[os] = tt;
! 526: else { AxBmodP( qo[os], ModNum, tt, Ninv, P, Pinv ); }
! 527: }
! 528: }
! 529: k = halfN;
! 530: }
! 531: /*
! 532: * At this point, k is an upper limit < N/2 for j, i is the number of j's
! 533: * > N/2, and n is the real upper limit of j.
! 534: */
! 535: #ifdef POWA_STRIDE
! 536: for ( ; j < k; aj += a1, j++, qo += o1 ) {
! 537: #else
! 538: for ( ; j < k; j++, qo += o1 ) {
! 539: #endif
! 540: t0 = qi[j], tn = qi[j+idist];
! 541: if ( tn == (ModNum)0 ) {
! 542: if ( t0 == (ModNum)0 ) qo[0] = t0;
! 543: else { AxBmodP( qo[0], ModNum, t0, Ninv, P, Pinv ); }
! 544: } else {
! 545: #ifdef POWA_STRIDE
! 546: a = P - powa[halfNa - aj];
! 547: #else
! 548: a = P - powa[halfN - j];
! 549: #endif
! 550: AxBplusCmodP( tt, ModNum, tn, a, t0, P, Pinv );
! 551: if ( tt == (ModNum)0 ) qo[0] = tt;
! 552: else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
! 553: }
! 554: }
! 555: if ( i <= 0 ) return;
! 556: /**/
! 557: L_halfN:
! 558: t0 = qi[0], tn = qi[idist];
! 559: tt = tn == (ModNum)0 ? t0 : A_BmodP( t0, tn, P );
! 560: if ( tt == (ModNum)0 ) qo[0] = tt;
! 561: else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
! 562: qo += o1, j++;
! 563: /**/
! 564: L_upper:
! 565: #ifdef POWA_STRIDE
! 566: for ( n -= halfN, aj = a1, j = 1; j < n; aj += a1, j++, qo += o1 ) {
! 567: #else
! 568: for ( n -= halfN, j = 1; j < n; j++, qo += o1 ) {
! 569: #endif
! 570: t0 = qi[j], tn = qi[j+idist];
! 571: if ( tn == (ModNum)0 ) {
! 572: if ( t0 == (ModNum)0 ) qo[0] = t0;
! 573: else { AxBmodP( qo[0], ModNum, t0, Ninv, P, Pinv ); }
! 574: } else {
! 575: #ifdef POWA_STRIDE
! 576: a = powa[halfNa - aj];
! 577: #else
! 578: a = powa[halfN - j];
! 579: #endif
! 580: AxBplusCmodP( tt, ModNum, tn, a, t0, P, Pinv );
! 581: if ( tt == (ModNum)0 ) qo[0] = tt;
! 582: else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
! 583: }
! 584: }
! 585: }
! 586:
! 587: #if defined(USE_FLOAT)
! 588: void C_PREP_ALPHA( r, log2ord, log2k, n, tbl, P, Pinv )
! 589: ModNum r, tbl[], P;
! 590: int log2ord, log2k, n;
! 591: double Pinv;
! 592: /*
! 593: * Let K and k denote log2ord and log2k, respectively.
! 594: * Let r be a primitive (2^K)-th root of unity in Z/(P), where P is a prime.
! 595: * Compute a = r^{2^{K-k}}, a primitive (2^k)-th root of unity, and
! 596: * its powers to the degrees upto n in tbl[*].
! 597: */
! 598: {
! 599: int i;
! 600: ModNum *q;
! 601: double t, w, dp = (double) P;
! 602:
! 603: for ( w = (double) r, i = log2ord - log2k; i > 0; i-- ) {
! 604: w *= w;
! 605: if ( w < dp ) continue;
! 606: w -= (dp*(double)((int) (w*Pinv)));
! 607: }
! 608: tbl[0] = 1;
! 609: tbl[1] = (ModNum)(t = w);
! 610: for ( q = &tbl[2], i = n - 1; i > 0; i-- ) {
! 611: t *= w;
! 612: if ( t >= dp ) t -= (dp*(double)((int) (t*Pinv)));
! 613: *q++ = t;
! 614: }
! 615: }
! 616: #else
! 617:
! 618: void C_PREP_ALPHA( r, log2ord, log2k, n, tbl, P, Pinv )
! 619: ModNum r, tbl[], P;
! 620: int log2ord, log2k, n;
! 621: double Pinv;
! 622: /*
! 623: * Let K and k denote log2ord and log2k, respectively.
! 624: * Let r be a primitive (2^K)-th root of unity in Z/(P), where P is a prime.
! 625: * Compute a = r^{2^{K-k}}, a primitive (2^k)-th root of unity, and
! 626: * its powers to the degrees upto n in tbl[*].
! 627: */
! 628: {
! 629: int i;
! 630: ModNum *q;
! 631: ModNum t, w, s;
! 632:
! 633: for ( w = r, i = log2ord - log2k; i > 0; i-- ) {
! 634: AxBmodP( t, ModNum, w, w, P, Pinv );
! 635: w = t;
! 636: }
! 637: tbl[0] = 1;
! 638: tbl[1] = (ModNum)(t = w);
! 639: for ( q = &tbl[2], i = n - 1; i > 0; i-- ) {
! 640: AxBmodP( s, ModNum, t, w, P, Pinv );
! 641: t = s;
! 642: *q++ = t;
! 643: }
! 644: }
! 645: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>