[BACK]Return to dft.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib2 / asir2000 / fft

Annotation of OpenXM_contrib2/asir2000/fft/dft.c, Revision 1.4

1.2       noro        1: /*
                      2:  * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
                      3:  * All rights reserved.
                      4:  *
                      5:  * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
                      6:  * non-exclusive and royalty-free license to use, copy, modify and
                      7:  * redistribute, solely for non-commercial and non-profit purposes, the
                      8:  * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
                      9:  * conditions of this Agreement. For the avoidance of doubt, you acquire
                     10:  * only a limited right to use the SOFTWARE hereunder, and FLL or any
                     11:  * third party developer retains all rights, including but not limited to
                     12:  * copyrights, in and to the SOFTWARE.
                     13:  *
                     14:  * (1) FLL does not grant you a license in any way for commercial
                     15:  * purposes. You may use the SOFTWARE only for non-commercial and
                     16:  * non-profit purposes only, such as academic, research and internal
                     17:  * business use.
                     18:  * (2) The SOFTWARE is protected by the Copyright Law of Japan and
                     19:  * international copyright treaties. If you make copies of the SOFTWARE,
                     20:  * with or without modification, as permitted hereunder, you shall affix
                     21:  * to all such copies of the SOFTWARE the above copyright notice.
                     22:  * (3) An explicit reference to this SOFTWARE and its copyright owner
                     23:  * shall be made on your publication or presentation in any form of the
                     24:  * results obtained by use of the SOFTWARE.
                     25:  * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
1.3       noro       26:  * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
1.2       noro       27:  * for such modification or the source code of the modified part of the
                     28:  * SOFTWARE.
                     29:  *
                     30:  * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
                     31:  * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
                     32:  * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
                     33:  * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
                     34:  * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
                     35:  * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
                     36:  * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
                     37:  * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
                     38:  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
                     39:  * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
                     40:  * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
                     41:  * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
                     42:  * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
                     43:  * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
                     44:  * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
                     45:  * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
                     46:  * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
                     47:  *
1.4     ! ohara      48:  * $OpenXM: OpenXM_contrib2/asir2000/fft/dft.c,v 1.3 2000/08/22 05:04:12 noro Exp $
1.2       noro       49: */
1.1       noro       50: /*
                     51:  *             store                   arith           modulus
                     52:  *     -------------------------------------------------------
                     53:  *     IEEE:   float (23+1)    <->     double (52+1)   24 bits
                     54:  *             int   (32)      <->     double (52+1)   26 bits
                     55:  *     370:    float (24)      <->     double (56)     24/25 bits
                     56:  *             int   (32)      <->     double (56)     28 bits
                     57:  */
                     58:
                     59:
                     60: #include "dft.h"
                     61:
                     62:
                     63: /*
                     64: #define C_DFT_FORE C_DFTfore
                     65: #define C_DFT_BACK C_DFTback
                     66: #define C_PREP_ALPHA C_prep_alpha
                     67: */
                     68:
                     69:
                     70: /*****************************************************/
                     71:
                     72: void C_DFT_FORE( in, nin, i1, K, powa,
                     73: #ifdef POWA_STRIDE
                     74:                 a1,
                     75: #endif
                     76:                 out, o1, P, Pinv, wk )
                     77: ModNum in[], powa[], out[], wk[];
                     78: ModNum P;
                     79: int nin, i1, K, o1;
                     80: #ifdef POWA_STRIDE
                     81: int a1;
                     82: #endif
                     83: double Pinv;
                     84: /*
                     85:  *  Let N = 2^K, and \alpha be a primitive N-th root of 1.
                     86:  *  out[0:N-1][o1] = DFT of in[0:nin-1][i1] using \alpha modulo P.
                     87:  *  The powers \alpha (to the degrees upto N/2=2^{K-1} at least) are given in powa[*],
                     88:  *  (or powa[a1**]).
                     89:  *  nin <= N assumed.
                     90:  *  wk[] must have 2^K entries at least.
                     91:  */
                     92: {
                     93:   int istep, ostep, i, j, k, idist, odist, n, Ndiv2n, K_k,
                     94: #ifndef OPT
                     95:       K_k_1,
                     96: #endif
                     97: #ifdef POWA_STRIDE
                     98:       aj,
                     99: #endif
                    100:       inwk;
                    101:   int idistwk, idistout, iostep, oostep;
                    102:   ModNum *qi, *qo, *qis, *qos, a, t0, tn;
                    103:
                    104:   /*
                    105:    *   for k = K-1, ..., 0 do the following:
                    106:    *
                    107:    *       Out[2^{K-k}*i + j]             <= t0 + \alpha^{2^k * j} * tn;
                    108:    *       Out[2^{K-k}*i + j + 2^{K-k-1}] <= t0 - \alpha^{2^k * j} * tn;
                    109:    *
                    110:    *      where t0 = In[2^{K-k-1}*i + j] and
                    111:    *           tn = In[2^{K-k-1}*i + j + 2^{K-1}],
                    112:    *      for 0 <= i < 2^k and 0 <= j < 2^{K-k-1}.
                    113:    *
                    114:    */
                    115:   /*  In the following, n = 2^k and Ndiv2n = 2^{K-k-1}.
                    116:    */
                    117:   n = 1 << (k = K - 1);
                    118:   idistwk = n,  idistout = o1 << k;
                    119:   /*  when k = K-1, Out[2*i] <= In[i] + In[i+2^{K-1}],
                    120:    *  and           Out[2*i+1] <= In[i] - In[i+2^{K-1}], for 0 <= i < 2^{K-1}.
                    121:    */
                    122:   if ( k&1 ) qo = wk, ostep = 2, inwk = 1, odist = 1;
                    123:   else qo = out, ostep = o1 << 1, inwk = 0, odist = o1;
                    124:   qi = in;
                    125:   /**/
                    126:   if ( nin <= n ) {
                    127:     for ( i = nin; i > 0; i--, qi += i1, qo += ostep ) qo[0] = qo[odist] = qi[0];
                    128:     for ( i = n - nin; i > 0; i--, qo += ostep ) qo[0] = qo[odist] = 0;
                    129:   } else {
                    130:     idist = i1 << k;   /* distance = 2^{K-1} */
                    131:     for ( j = i = nin - n; i > 0; i--, qi += i1, qo += ostep ) {
                    132:       t0 = qi[0],  tn = qi[idist];
                    133:       if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
                    134:       else {
                    135:        qo[0] = AplusBmodP( t0, tn, P );
                    136:        qo[odist] = A_BmodP( t0, tn, P );
                    137:       }
                    138:     }
                    139:     for ( i = n - j; i > 0; i--, qi += i1, qo += ostep )
                    140:       qo[0] = qo[odist] = qi[0];
                    141:   }
                    142:   /******/
                    143: #ifndef J_INNER
                    144:   for ( K_k = 1, Ndiv2n = 1; --k > 0; inwk = 1 - inwk ) {
                    145: #ifndef OPT
                    146:     K_k = (K_k_1 = K_k) + 1;
                    147: #endif
                    148:     n >>= 1;           /* == 2^k */
                    149:     Ndiv2n <<= 1;      /* == 2^{K-k-1} */
                    150:     if ( inwk )
                    151:       qi = wk, qo = out, iostep = 1, oostep = o1, idist = idistwk;
                    152:     else
                    153:       qi = out, qo = wk, iostep = o1, oostep = 1, idist = idistout;
                    154:     qis = qi,  qos = qo;
                    155:     /**/
                    156:     istep = ostep;     /* = iostep << K_k_1; */
                    157: #ifndef OPT
                    158:     ostep = oostep << K_k;
                    159:     odist = oostep << K_k_1;
                    160: #else
                    161:     odist = oostep << K_k;
                    162:     ++K_k;
                    163:     ostep = oostep << K_k;
                    164: #endif /* OPT */
                    165:     /* j = 0 */
                    166:     for ( i = n; i-- > 0; qi += istep, qo += ostep ) {
                    167:       t0 = qi[0],  tn = qi[idist];
                    168:       if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
                    169:       else {
                    170:        qo[0] = AplusBmodP( t0, tn, P );
                    171:        qo[odist] = A_BmodP( t0, tn, P );
                    172:       }
                    173:     }
                    174:     /**/
                    175: #ifdef POWA_STRIDE
                    176:     for ( aj = a1, j = 1; j < Ndiv2n; aj += a1, j++ ) {
                    177:       a = powa[aj << k];
                    178: #else
                    179:     for ( j = 1; j < Ndiv2n; j++ ) {
                    180:       a = powa[j << k];
                    181: #endif
                    182:       qi = (qis += iostep),  qo = (qos += oostep);
                    183:       for ( i = 0; i < n; i++, qi += istep, qo += ostep ) {
                    184:        t0 = qi[0],  tn = qi[idist];
                    185:        if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
                    186:        else {
                    187:          AxBmodP( tn, ModNum, tn, a, P, Pinv );
                    188:          qo[0] = AplusBmodP( t0, tn, P );
                    189:          qo[odist] = A_BmodP( t0, tn, P );
                    190:        }
                    191:       }
                    192:     }
                    193:   }
                    194:   /*
                    195:    *  When k = 0, for i = 0 (0 <= i < 2^0=1) and 0 <= j < 2^{K-1},
                    196:    *   Out[j] <= t0 + \alpha^{j}*tn and Out[j+2^{K-1}] <= t0 - \alpha^{j}*tn,
                    197:    *  where t0 = In[j] and tn = In[j + 2^{K-1}].
                    198:    */
                    199:   qi = wk, qo = out, idist = idistwk, odist = idistout;
                    200:   /*                       == 2^{K-1},      == o1 << (K-1) */
                    201:   t0 = qi[0],  tn = qi[idist];
                    202:   if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
                    203:   else {
                    204:     qo[0] = AplusBmodP( t0, tn, P );
                    205:     qo[odist] = A_BmodP( t0, tn, P );
                    206:   }
                    207: #ifdef POWA_STRIDE
                    208:   for ( k = o1, aj = a1, j = 1; j < idist; aj += a1, j++ ) {
                    209: #else
                    210:   for ( k = o1, j = 1; j < idist; j++ ) {
                    211: #endif
                    212:     qi++,  qo += k;
                    213:     t0 = qi[0],  tn = qi[idist];
                    214:     if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
                    215:     else {
                    216: #ifdef POWA_STRIDE
                    217:       a = powa[aj];
                    218: #else
                    219:       a = powa[j];
                    220: #endif
                    221:       AxBmodP( tn, ModNum, tn, a, P, Pinv );
                    222:       qo[0] = AplusBmodP( t0, tn, P );
                    223:       qo[odist] = A_BmodP( t0, tn, P );
                    224:     }
                    225:   }
                    226: #else
                    227:   for ( K_k = 1, Ndiv2n = 1; --k >= 0; inwk = 1 - inwk ) {
                    228: #ifndef OPT
                    229:     K_k = (K_k_1 = K_k) + 1;
                    230: #endif
                    231:     n >>= 1;           /* == 2^k */
                    232:     Ndiv2n <<= 1;      /* == 2^{K-k-1} */
                    233:     if ( inwk ) qi = wk, qo = out, istep = 1, ostep = o1, idist = idistwk;
                    234:     else qi = out, qo = wk, istep = o1, ostep = 1, idist = idistout;
                    235:     qis = qi,  qos = qo;
                    236:     /**/
                    237:     iostep = oostep;   /* = istep << K_k_1; */
                    238: #ifndef OPT
                    239:     oostep = ostep << K_k;
                    240:     odist = ostep << K_k_1;
                    241: #else
                    242:     odist = ostep << K_k;
                    243:     ++K_k;
                    244:     oostep = ostep << K_k;
                    245: #endif /* OPT */
                    246:     for ( i = n; i-- > 0; qis += iostep, qos += oostep ) {
                    247:       /* j = 0 */
                    248:       t0 = qis[0],  tn = qis[idist];
                    249:       if ( tn == (ModNum)0 ) qos[0] = qos[odist] = t0;
                    250:       else {
                    251:        qos[0] = AplusBmodP( t0, tn, P );
                    252:        qos[odist] = A_BmodP( t0, tn, P );
                    253:       }
                    254: #ifdef POWA_STRIDE
                    255:       for ( aj = a1, j = 1, qi = qis, qo = qos; j < Ndiv2n; aj += a1, j++ ) {
                    256: #else
                    257:       for ( j = 1, qi = qis, qo = qos; j < Ndiv2n; j++ ) {
                    258: #endif
                    259:        qi += istep,  qo += ostep;
                    260:        t0 = qi[0],  tn = qi[idist];
                    261:        if ( tn == (ModNum)0 ) qo[0] = qo[odist] = t0;
                    262:        else {
                    263: #ifdef POWA_STRIDE
                    264:          a = powa[aj << k];
                    265: #else
                    266:          a = powa[j << k];
                    267: #endif
                    268:          AxBmodP( tn, ModNum, tn, a, P, Pinv );
                    269:          qo[0] = AplusBmodP( t0, tn, P );
                    270:          qo[odist] = A_BmodP( t0, tn, P );
                    271:        }
                    272:       }
                    273:     }
                    274:   }
                    275: #endif
                    276: }
                    277:
                    278: /*********************************************************************
                    279:  *
                    280:  *  NOTE:
                    281:  *  (1) Let \alpha be a primitive N-th root of unity modulo P,
                    282:  *  where N is even,
                    283:  *     and t be an integer s.t. 0 <= t < N/2.
                    284:  *     Then, 1/\alpha^t is given by (- \alpha^s) with s = N/2 - t.
                    285:  *
                    286:  *  (2) Let P be a prime s.t. P = 2^n*Q + 1, where Q is an odd integer.
                    287:  *     Then, for 0 < s <= n, 2^{-s} \equiv - 2^{n-s}*Q \bmod P,
                    288:  *     because 1 \equiv - 2^n*Q \bmod P.
                    289:  *
                    290:  **********************************************************************/
                    291:
                    292: void C_DFT_BACK( in, N, i1, log2N, powa,
                    293: #ifdef POWA_STRIDE
                    294:                 a1,
                    295: #endif
                    296:                 out, o1, osi, nout, Ninv, P, Pinv, wk )
                    297: ModNum in[], powa[], out[], wk[];
                    298: int N, log2N, osi, nout, i1, o1;
                    299: #ifdef POWA_STRIDE
                    300: int a1;
                    301: #endif
                    302: ModNum P, Ninv;
                    303: double Pinv;
                    304: /*
                    305:  *  Let K denote log2N.  Let N = 2^K, and \alpha be a primitive N-th root of 1.
                    306:  *  This routine computes the inverse discrete-Fourier transform of in[0:N-1][i1]
                    307:  *  using \alpha^{-1} modulo P, and store the `nout' transformed data from the
                    308:  *  `osi'-th in out[0:nout-1][o1].  0 <= osi < N-1 and 0 < nout <= N.
                    309:  *  The powers of \alpha (to the degrees upto N/2=2^{K-1} at least) are given in powa[*] (or powa[a1* *]).
                    310:  *  The area wk[] is used to contain the intermediate transformed data, and thus,
                    311:  *  must have 2*N entries at least.  Notice that `out' cannot be used because
                    312:  * its space amount (`nout') may not be sufficient for this purpose.
                    313:  */
                    314: {
                    315:   int i, j, k, n, is, os, istep, ostep, idist, inwk, halfN;
                    316: #ifdef POWA_STRIDE
                    317:   int aj, halfNa;
                    318: #endif
                    319:   ModNum *qi, *qo, *qis, *qos, a, t0, tn, tt;
                    320:
                    321:   /*
                    322:    *   for k = K-1, ..., 0 do the following:
                    323:    *
                    324:    *       Out[2^{K-k}*i + j]             <= t0 + \alpha^{- 2^k * j} * tn;
                    325:    *                                       = t0 - \alpha^{N/2 - 2^k * j}*tn;
                    326:    *       Out[2^{K-k}*i + j + 2^{K-k-1}] <= t0 - \alpha^{- 2^k * j} * tn;
                    327:    *                                       = t0 + \alpha^{N/2 - 2^k * j}*tn;
                    328:    *
                    329:    *      where t0 = In[2^{K-k-1}*i + j] and
                    330:    *           tn = In[2^{K-k-1}*i + j + 2^{K-1}],
                    331:    *      for 0 <= i < 2^k and 0 <= j < 2^{K-k-1}.
                    332:    *
                    333:    */
                    334:   if ( log2N == 1 ) {
                    335:     /*  K = 1, k = 0, 0 <= i < 1, 0 <= j < 1.
                    336:      * Out[0] <= t0 + tn, Out[1] <= t0 - tn,
                    337:      *  where t0 = In[0] and Tn = In[1].
                    338:      */
                    339:     t0 = in[0],  tn = in[i1];
                    340:     if ( osi == 0 ) {
                    341:       /*  out[0] = (t0 + tn)*Ninv; */
                    342:       tt = tn == (ModNum)0 ? t0 : AplusBmodP( t0, tn, P );
                    343:       if ( tt != (ModNum)0 ) {
                    344:        AxBmodP( tt, ModNum, tt, Ninv, P, Pinv );
                    345:       }
                    346:       out[0] = tt;
                    347:       i = 1;
                    348:     } else i = 0;
                    349:     /**/
                    350:     if ( osi + nout >= 2 ) {
                    351:       /*  out[osi == 0 ? 1 : 0] = (t0 - tn)*Ninv; */
                    352:       tt = tn == (ModNum)0 ? t0 : A_BmodP( t0, tn, P );
                    353:       if ( tt != (ModNum)0 ) {
                    354:        AxBmodP( tt, ModNum, tt, Ninv, P, Pinv );
                    355:       }
                    356:       out[i] = tt;
                    357:     }
                    358:     return;
                    359:   }
                    360:   /****/
                    361:   halfN = n = 1 << (k = log2N - 1);    /* == 2^{K-1} */
                    362: #ifdef POWA_STRIDE
                    363:   halfNa = a1 << k;
                    364: #endif
                    365:   /*
                    366:    *  when k = K-1,
                    367:    *   Out[2*i] <= t0 + tn, and Out[2*i+1] <= t0 - tn,
                    368:    *  where t0 = In[i] and tn = In[i+2^{K-1}],
                    369:    *  for 0 <= i < 2^{K-1}.
                    370:    */
                    371:   qi = in, istep = i1, idist = i1 << k, qo = wk, inwk = 0;
                    372:   for ( i = n; i-- > 0; qi += istep, qo += 2 ) {
                    373:     t0 = qi[0],  tn = qi[idist];
                    374:     if ( tn == (ModNum)0 ) qo[0] = qo[1] = t0;
                    375:     else {
                    376:       qo[0] = AplusBmodP( t0, tn, P );
                    377:       qo[1] = A_BmodP( t0, tn, P );
                    378:     }
                    379:   }
                    380: #ifdef EBUG
                    381:   fprintf( stderr, "::: DFT^{-1} ::: after the first step\n" );
                    382:   for ( qo = wk, i = 0; i < N; i++ ) {
                    383:     if ( (i%5) == 0 ) fprintf( stderr, "\t" );
                    384:     fprintf( stderr, "%10d,", (int)wk[i] );
                    385:     if ( (i%5) == 4 ) fprintf( stderr, "\n" );
                    386:   }
                    387:   if ( (i%5) != 0 ) fprintf( stderr, "\n" );
                    388: #endif
                    389:   /**/
                    390:   idist = halfN;
                    391:   for ( ostep = 2; --k > 0; inwk = 1 - inwk ) {
                    392:     n >>= 1;
                    393:     istep = ostep;     /* == 2^{K-k-1} */
                    394:     ostep <<= 1;       /* == 2^{K-k} */
                    395:     if ( inwk ) qi = &wk[N], qo = wk;
                    396:     else qi = wk, qo = &wk[N];
                    397:     qis = qi,  qos = qo;
                    398:     /*
                    399:      *  for j = 0,
                    400:      * Out[2^{K-k}*i] <= t0 + tn, and Out[2^{K-k}*i + 2^{K-k-1}] <= t0 - tn,
                    401:      * where t0 = In[2^{K-k-1}*i] and tn = In[2^{K-k-1}*i + 2^{K-1}].
                    402:      */
                    403:     for ( i = n, is = os = 0; i-- > 0; qi += istep, qo += ostep ) {
                    404:       t0 = qi[0],  tn = qi[idist];
                    405:       if ( tn == (ModNum)0 ) qo[0] = qo[istep] = t0;
                    406:       else {
                    407:        qo[0] = AplusBmodP( t0, tn, P );
                    408:        qo[istep] = A_BmodP( t0, tn, P );
                    409:       }
                    410:     }
                    411: #ifdef POWA_STRIDE
                    412:     for ( aj = a1, j = 1; j < istep; aj += a1, j++ ) {
                    413: /***      a = P - powa[halfNa - (aj << k)]; ***/
                    414:       a = powa[halfNa - (aj << k)];
                    415: #else
                    416:     for ( j = 1; j < istep; j++ ) {
                    417: /***      a = P - powa[halfN - (j << k)]; ***/
                    418:       a = powa[halfN - (j << k)];
                    419: #endif
                    420:       qi = ++qis,  qo = ++qos;
                    421:       for ( i = n; i-- > 0; qi += istep, qo += ostep ) {
                    422:        t0 = qi[0],  tn = qi[idist];
                    423:        if ( tn == (ModNum)0 ) qo[0] = qo[istep] = t0;
                    424:        else {
                    425:          AxBmodP( tn, ModNum, tn, a, P, Pinv );
                    426: /***     qo[0] = AplusBmodP( t0, tn, P ); ***/
                    427: /***     qo[istep] = A_BmodP( t0, tn, P ); ***/
                    428:          qo[0] = A_BmodP( t0, tn, P );
                    429:          qo[istep] = AplusBmodP( t0, tn, P );
                    430:        }
                    431:       }
                    432:     }
                    433:   }
                    434: #if 0
                    435:  final:
                    436: #endif
                    437:   /*
                    438:    *  The final step of k = 0.  i can take only the value 0.
                    439:    *
                    440:    *       Out[j]           <= t0 + \alpha^{-j} * tn;
                    441:    *       Out[j + 2^{K-1}] <= t0 - \alpha^{-j} * tn;
                    442:    *
                    443:    *      where t0 = In[j] and tn = In[j + 2^{K-1}],
                    444:    *      for 0 <= j < 2^{K-1}.
                    445:    *
                    446:    */
                    447:   qo = out,  qi = &wk[inwk ? N : 0];
                    448:   if ( (n = osi + nout) > N ) nout = (n = N) - osi;    /* true nout */
                    449:   if ( (k = nout - halfN) <= 0 ) {     /* no overlap, i.e., no +- */
                    450:     if ( osi <= 0 ) {
                    451:       t0 = qi[0],  tn = qi[idist];
                    452:       if ( tn != (ModNum)0 ) t0 = AplusBmodP( t0, tn, P );
                    453:       if ( t0 == (ModNum)0 ) qo[0] = t0;
                    454:       else {
                    455:        AxBmodP( qo[0], ModNum, t0, Ninv, P, Pinv );
                    456:       }
                    457: #ifdef POWA_STRIDE
                    458:       aj = a1;
                    459: #endif
                    460:       j = 1,  k = n,  i = 0,  qo += o1;
                    461:     } else {
                    462:       j = osi;
                    463: #ifdef POWA_STRIDE
                    464:       aj = osi*a1;
                    465: #endif
                    466:       if ( n <= halfN ) i = 0, k = n;          /* lower only */
                    467:       else if ( j == halfN ) goto L_halfN;     /* start with j = N/2 */
                    468:       else if ( j > halfN ) goto L_upper;      /* upper only */
                    469:       else i = n - halfN + 1, k = halfN;       /* we have lower, and i upper */
                    470:     }
                    471:   } else {
                    472:     os = o1*halfN;
                    473: #ifdef EBUG
                    474:     fprintf( stderr, "::::: DFT^{-1} ::: os=%d*%d=%d\n", o1, halfN, os );
                    475: #endif
                    476:     if ( osi <= 0 ) {
                    477:       t0 = qi[0],  tn = qi[idist];
                    478:       if ( tn == (ModNum)0 ) {
                    479:        if ( t0 == (ModNum)0 ) tt = t0;
                    480:        else { AxBmodP( tt, ModNum, t0, Ninv, P, Pinv ); }
                    481:        qo[0] = qo[os] = tt;
                    482:       } else {
                    483:        tt = AplusBmodP( t0, tn, P );
                    484:        if ( tt == (ModNum)0 ) qo[0] = tt;
                    485:        else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
                    486:        tt = A_BmodP( t0, tn, P );
                    487:        if ( tt == (ModNum)0 ) qo[os] = tt;
                    488:        else { AxBmodP( qo[os], ModNum, tt, Ninv, P, Pinv ); }
                    489:       }
                    490: #ifdef EBUG
                    491:     fprintf( stderr, "::::: DFT^{-1} ::: [0]=%d, [%d]=%d\n", (int)qo[0], os, (int)qo[os] );
                    492: #endif
                    493: #ifdef POWA_STRIDE
                    494:       j = 1,  aj = a1, n = halfN,  i = 0,  qo += o1,  k--;
                    495:     } else j = osi, aj = osi*a1,  i = (n -= k) - halfN;
                    496:     /**/
                    497:     for ( ; k-- > 0; aj += a1, j++, qo += o1 ) {
                    498: #else
                    499:       j = 1,  n = halfN,  i = 0,  qo += o1,  k--;
                    500:     } else j = osi,  i = (n -= k) - halfN;
                    501:     /**/
                    502:     for ( ; k-- > 0; j++, qo += o1 ) {
                    503: #endif
                    504:       t0 = qi[j],  tn = qi[j+idist];
                    505:       if ( tn == (ModNum)0 ) {
                    506:        if ( t0 != (ModNum)0 ) {
                    507:          AxBmodP( t0, ModNum, t0, Ninv, P, Pinv );
                    508:        }
                    509:        qo[0] = qo[os] = t0;
                    510:       } else {
                    511: #ifdef POWA_STRIDE
                    512: /***   a = P - powa[halfNa - aj]; ***/
                    513:        a = powa[halfNa - aj];
                    514: #else
                    515: /***   a = P - powa[halfN - j]; ***/
                    516:        a = powa[halfN - j];
                    517: #endif
                    518:        AxBmodP( tn, ModNum, tn, a, P, Pinv );
                    519: /***   tt = AplusBmodP( t0, tn, P ); ***/
                    520:        tt = A_BmodP( t0, tn, P );
                    521:        if ( tt == (ModNum)0 ) qo[0] = tt;
                    522:        else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
                    523: /***   tt = A_BmodP( t0, tn, P ); ***/
                    524:        tt = AplusBmodP( t0, tn, P );
                    525:        if ( tt == (ModNum)0 ) qo[os] = tt;
                    526:        else { AxBmodP( qo[os], ModNum, tt, Ninv, P, Pinv ); }
                    527:       }
                    528:     }
                    529:     k = halfN;
                    530:   }
                    531:   /*
                    532:    *  At this point, k is an upper limit < N/2 for j, i is the number of j's
                    533:    *  > N/2, and n is the real upper limit of j.
                    534:    */
                    535: #ifdef POWA_STRIDE
                    536:   for ( ; j < k; aj += a1, j++, qo += o1 ) {
                    537: #else
                    538:   for ( ; j < k; j++, qo += o1 ) {
                    539: #endif
                    540:     t0 = qi[j],  tn = qi[j+idist];
                    541:     if ( tn == (ModNum)0 ) {
                    542:       if ( t0 == (ModNum)0 ) qo[0] = t0;
                    543:       else { AxBmodP( qo[0], ModNum, t0, Ninv, P, Pinv ); }
                    544:     } else {
                    545: #ifdef POWA_STRIDE
                    546:       a = P - powa[halfNa - aj];
                    547: #else
                    548:       a = P - powa[halfN - j];
                    549: #endif
                    550:       AxBplusCmodP( tt, ModNum, tn, a, t0, P, Pinv );
                    551:       if ( tt == (ModNum)0 ) qo[0] = tt;
                    552:       else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
                    553:     }
                    554:   }
                    555:   if ( i <= 0 ) return;
                    556:   /**/
                    557:  L_halfN:
                    558:   t0 = qi[0],  tn = qi[idist];
                    559:   tt = tn == (ModNum)0 ? t0 : A_BmodP( t0, tn, P );
                    560:   if ( tt == (ModNum)0 ) qo[0] = tt;
                    561:   else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
                    562:   qo += o1,  j++;
                    563:   /**/
                    564:  L_upper:
                    565: #ifdef POWA_STRIDE
                    566:   for ( n -= halfN, aj = a1, j = 1; j < n; aj += a1, j++, qo += o1 ) {
                    567: #else
                    568:   for ( n -= halfN, j = 1; j < n; j++, qo += o1 ) {
                    569: #endif
                    570:     t0 = qi[j],  tn = qi[j+idist];
                    571:     if ( tn == (ModNum)0 ) {
                    572:       if ( t0 == (ModNum)0 ) qo[0] = t0;
                    573:       else { AxBmodP( qo[0], ModNum, t0, Ninv, P, Pinv ); }
                    574:     } else {
                    575: #ifdef POWA_STRIDE
                    576:       a = powa[halfNa - aj];
                    577: #else
                    578:       a = powa[halfN - j];
                    579: #endif
                    580:       AxBplusCmodP( tt, ModNum, tn, a, t0, P, Pinv );
                    581:       if ( tt == (ModNum)0 ) qo[0] = tt;
                    582:       else { AxBmodP( qo[0], ModNum, tt, Ninv, P, Pinv ); }
                    583:     }
                    584:   }
                    585: }
                    586:
1.4     ! ohara     587: #if defined(USE_FLOAT)
1.1       noro      588: void C_PREP_ALPHA( r, log2ord, log2k, n, tbl, P, Pinv )
                    589: ModNum r, tbl[], P;
                    590: int log2ord, log2k, n;
                    591: double Pinv;
                    592: /*
                    593:  *  Let K and k denote log2ord and log2k, respectively.
                    594:  *  Let r be a primitive (2^K)-th root of unity in Z/(P), where P is a prime.
                    595:  *  Compute a = r^{2^{K-k}}, a primitive (2^k)-th root of unity, and
                    596:  *  its powers to the degrees upto n in tbl[*].
                    597:  */
                    598: {
                    599:   int i;
                    600:   ModNum *q;
                    601:   double t, w, dp = (double) P;
                    602:
                    603:   for ( w = (double) r, i = log2ord - log2k; i > 0; i-- ) {
                    604:     w *= w;
                    605:     if ( w < dp ) continue;
                    606:     w -= (dp*(double)((int) (w*Pinv)));
                    607:   }
                    608:   tbl[0] = 1;
                    609:   tbl[1] = (ModNum)(t = w);
                    610:   for ( q = &tbl[2], i = n - 1; i > 0; i-- ) {
                    611:     t *= w;
                    612:     if ( t >= dp ) t -= (dp*(double)((int) (t*Pinv)));
                    613:     *q++ = t;
                    614:   }
                    615: }
                    616: #else
                    617:
                    618: void C_PREP_ALPHA( r, log2ord, log2k, n, tbl, P, Pinv )
                    619: ModNum r, tbl[], P;
                    620: int log2ord, log2k, n;
                    621: double Pinv;
                    622: /*
                    623:  *  Let K and k denote log2ord and log2k, respectively.
                    624:  *  Let r be a primitive (2^K)-th root of unity in Z/(P), where P is a prime.
                    625:  *  Compute a = r^{2^{K-k}}, a primitive (2^k)-th root of unity, and
                    626:  *  its powers to the degrees upto n in tbl[*].
                    627:  */
                    628: {
                    629:   int i;
                    630:   ModNum *q;
                    631:   ModNum t, w, s;
                    632:
                    633:   for ( w = r, i = log2ord - log2k; i > 0; i-- ) {
                    634:     AxBmodP( t, ModNum, w, w, P, Pinv );
                    635:     w = t;
                    636:   }
                    637:   tbl[0] = 1;
                    638:   tbl[1] = (ModNum)(t = w);
                    639:   for ( q = &tbl[2], i = n - 1; i > 0; i-- ) {
                    640:     AxBmodP( s, ModNum, t, w, P, Pinv );
                    641:     t = s;
                    642:     *q++ = t;
                    643:   }
                    644: }
                    645: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>