OpenXM_contrib2/asir2000/engine-27/N-27.c - annotate

Return to N-27.c CVS log
Up to [local] / OpenXM_contrib2 / asir2000 / engine-27
Annotation of OpenXM_contrib2/asir2000/engine-27/N-27.c, Revision 1.3

1.2       noro        1: /*
                      2:  * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
                      3:  * All rights reserved.
                      4:  *
                      5:  * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
                      6:  * non-exclusive and royalty-free license to use, copy, modify and
                      7:  * redistribute, solely for non-commercial and non-profit purposes, the
                      8:  * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
                      9:  * conditions of this Agreement. For the avoidance of doubt, you acquire
                     10:  * only a limited right to use the SOFTWARE hereunder, and FLL or any
                     11:  * third party developer retains all rights, including but not limited to
                     12:  * copyrights, in and to the SOFTWARE.
                     13:  *
                     14:  * (1) FLL does not grant you a license in any way for commercial
                     15:  * purposes. You may use the SOFTWARE only for non-commercial and
                     16:  * non-profit purposes only, such as academic, research and internal
                     17:  * business use.
                     18:  * (2) The SOFTWARE is protected by the Copyright Law of Japan and
                     19:  * international copyright treaties. If you make copies of the SOFTWARE,
                     20:  * with or without modification, as permitted hereunder, you shall affix
                     21:  * to all such copies of the SOFTWARE the above copyright notice.
                     22:  * (3) An explicit reference to this SOFTWARE and its copyright owner
                     23:  * shall be made on your publication or presentation in any form of the
                     24:  * results obtained by use of the SOFTWARE.
                     25:  * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
1.3     ! noro       26:  * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
1.2       noro       27:  * for such modification or the source code of the modified part of the
                     28:  * SOFTWARE.
                     29:  *
                     30:  * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
                     31:  * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
                     32:  * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
                     33:  * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
                     34:  * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
                     35:  * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
                     36:  * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
                     37:  * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
                     38:  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
                     39:  * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
                     40:  * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
                     41:  * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
                     42:  * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
                     43:  * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
                     44:  * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
                     45:  * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
                     46:  * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
                     47:  *
1.3     ! noro       48:  * $OpenXM: OpenXM_contrib2/asir2000/engine-27/N-27.c,v 1.2 2000/08/21 08:31:32 noro Exp $
1.2       noro       49: */
1.1       noro       50: #include "ca-27.h"
                     51: #include "base.h"
                     52:
                     53: #ifndef HMEXT
                     54: #define HMEXT
                     55: #endif
                     56:
                     57: #ifdef HMEXT
                     58: #undef FULLSET
                     59: #undef CALL
                     60: #undef DIVISION
                     61:
                     62: int igcd_algorithm = 0;
                     63:     /* == 0 : Euclid,
                     64:      * == 1 : binary,
                     65:      * == 2 : bmod,
                     66:      * >= 3 : (Weber's accelerated)/(Jebelean's generalized binary) algorithm,
                     67:      */
                     68: int igcd_thre_inidiv = 50;
                     69:     /*
                     70:      *  In the non-Euclidean algorithms, if the ratio of the lengths (number
                     71:      *  of words) of two integers is >= igcd_thre_inidiv, we first perform
                     72:      *  remainder calculation.
                     73:      *  If == 0, this remainder calculation is not performed.
                     74:      */
                     75: int igcdacc_thre = 10;
                     76:     /*
                     77:      *  In the accelerated algorithm, if the bit-lengths of two integers is
                     78:      *  > igcdacc_thre, "bmod" reduction is done.
                     79:      */
                     80:
                     81: #include "inline.h"
                     82:
                     83: #define TRAILINGZEROS(t,cntr) for(cntr=0;(t&1)==0;t>>=1)cntr++;
                     84:
                     85: #define W_NALLOC(d) ((N)ALLOCA(TRUESIZE(oN,(d)-1,int)))
                     86:
                     87: #define ShouldCompRemInit(n1,n2) (igcd_thre_inidiv != 0 && PL(n1) >= igcd_thre_inidiv*PL(n2))
                     88:
                     89: #define IniDiv(n1,n2) \
                     90:        if ( ShouldCompRemInit(n1,n2) ) {\
                     91:                N q, r; int w, b; \
                     92:                divn_27(n1,n2,&q,&r); \
                     93:                if ( !r ) return(n2); \
                     94:                b = trailingzerosn( r, &w ); \
                     95:                q = n1;  n1 = n2;  n2 = q; \
                     96:                rshiftn( r, w, b, n2 ); \
                     97:        }
                     98:
                     99: /*
                    100:  *     Binary GCD algorithm by J.Stein
                    101:  *     [J. Comp. Phys. Vol. 1 (1967), pp. 397-405)]:
                    102:  *     The right-shift binary algorithm is used.
                    103:  */
                    104:
                    105:
                    106: /*
                    107:  *     subsidiary routines for gcdbinn below.
                    108:  */
                    109: static int /* number of bits */ trailingzeros_nbd( /* BD of N */ nbd, pnw )
                    110: int *nbd, *pnw /* number of zero words */;
                    111: {
                    112:        int nw, nb, w;
                    113:
                    114:        for ( nw = 0; (w = *nbd) == 0; nbd++ ) nw++;
                    115:        TRAILINGZEROS(w,nb);
                    116:        *pnw = nw;
                    117:        return nb;
                    118: }
                    119:
                    120: #define trailingzerosn(n,pnw) trailingzeros_nbd(BD(n),pnw)
                    121:
                    122: static int /* PL of N */ rshift_nbd( /* BD of N */ nbd, /* PL of N */ nl,
                    123:                       /* # words */ shw, /* # bits */ shb, /* BD of N */ p )
                    124: int *nbd, nl, shw, shb, *p;
                    125: {
                    126:        int i, v, w, lshb;
                    127:
                    128:        nbd += shw,  i = (nl -= shw);
                    129:        if ( shb == 0 ) {
                    130:                for ( ; nl > 0; nl-- ) *p++ = *nbd++;
                    131:                return i;
                    132:        } else if ( nl < 2 ) {
                    133:                *p = (*nbd) >> shb;
                    134:                return 1;
                    135:        }
                    136:        for ( lshb = BSH27 - shb, v = *nbd++; --nl > 0; v = w ) {
                    137:                w = *nbd++;
                    138:                *p++ = (v >> shb) | ((w << lshb)&BMASK27);
                    139:        }
                    140:        if ( (v >>= shb) == 0 ) return( i-1 );
                    141:        *p = v;
                    142:        return i;
                    143: }
                    144:
                    145: #define rshiftn(ns,shw,shb,nd) (PL(nd)=rshift_nbd(BD(ns),PL(ns),shw,shb,BD(nd)))
                    146:      /* nd <= ns << (shb + shw*BSH27), returns PL of the result */
                    147:
                    148: #ifdef FULLSET
                    149: static N N_of_i_lshifted_by_wb( i, gw, gb )
                    150: int i, gw, gb;
                    151:        /*
                    152:         *      returns pointer to a new struct (N)(((int)i) >> (gb + gw*BSH27))
                    153:         */
                    154: {
                    155:        int j, l, *p;
                    156:        N n;
                    157:
                    158:        j = i >> (BSH27 - gb);
                    159:        i = (i << gb)&BMASK27;
                    160:        l = j != 0 ? gw + 2 : gw + 1;
                    161:        n = NALLOC(l);
                    162:        PL(n) = l;
                    163:        for ( p = BD(n); gw-- > 0; ) *p++ = 0;
                    164:        *p++ = i;
                    165:        if ( j != 0 ) *p = j;
                    166:        return n;
                    167: }
                    168: #endif /* FULLSET */
                    169:
                    170: /*
                    171:  *     routines to make a new struct
                    172:  *             (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH27))
                    173:  */
                    174: static N N_of_nbd_lshifted_by_wb( /* BD of N */ b, /* PL of N */ lb, gw, gb )
                    175: int *b, lb, gw, gb;
                    176:        /*
                    177:         *      returns pointer to a new struct
                    178:         *              (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH27))
                    179:         */
                    180: {
                    181:        int rsh, s, t, *p, l;
                    182:        N n;
                    183:
                    184:        l = lb + gw;
                    185:        if ( gb == 0 ) {
                    186:                n = NALLOC(l);
                    187:                PL(n) = l;
                    188:                for ( p = BD(n); gw-- > 0; ) *p++ = 0;
                    189:                while ( lb-- > 0 ) *p++ = *b++;
                    190:                return n;
                    191:        }
                    192:        rsh = BSH27 - gb;  s = b[lb-1];
                    193:        if ( (t = s >> rsh) != 0 ) {
                    194:                n = NALLOC(l+1);
                    195:                PL(n) = l+1;
                    196:                (p = BD(n))[l] = t;
                    197:        } else {
                    198:                n = NALLOC(l);
                    199:                PL(n) = l;
                    200:                p = BD(n);
                    201:        }
                    202:        while ( gw-- > 0 ) *p++ = 0;
                    203:        *p++ = ((t = *b++) << gb)&BMASK27;
                    204:        for ( ; --lb > 0; t = s )
                    205:                *p++ = (t >> rsh) | (((s = *b++) << gb)&BMASK27);
                    206:        return n;
                    207: }
                    208:
                    209: #define N_of_n_lshifted_by_wb(a,gw,gb) N_of_nbd_lshifted_by_wb(BD(a),PL(a),gw,gb)
                    210:
                    211: #define SWAP(a,b,Type) { Type temp=a;a=b;b=temp;}
                    212: #define SIGNED_VAL(a,s) ((s)>0?(a):-(a))
                    213:
                    214:
                    215: #ifdef CALL
                    216: static int bw_int32( n )
                    217: int n;
                    218: {
                    219:        int w;
                    220:
                    221:        w = 0;
                    222: #if BSH27 >= 32
                    223:        if ( n > 0xffffffff ) w += 32, n >>= 32;
                    224: #endif
                    225:        if ( n >= 0x10000 ) w += 16, n >>= 16;
                    226:        if ( n >=   0x100 ) w +=  8, n >>=  8;
                    227:        if ( n >=    0x10 ) w +=  4, n >>=  4;
                    228:        if ( n >=     0x4 ) w +=  2, n >>=  2;
                    229:        if ( n >=     0x2 ) w +=  1, n >>=  1;
                    230:        if ( n != 0 ) ++w;
                    231:        return w;
                    232: }
                    233: #define BitWidth(n,bw) bw = bw_int32( n )
                    234: #else
                    235:
                    236: #if BSH27 >= 32
                    237: #define BitWidth(n,bw) {\
                    238:        int k = (n); \
                    239:        bw = 0; \
                    240:        if ( BSH27 >= 32 ) if ( k > 0xffffffff ) bw += 32, k >>= 32; \
                    241:        if ( k >= 0x10000 ) bw += 16, k >>= 16; \
                    242:        if ( k >=   0x100 ) bw +=  8, k >>=  8; \
                    243:        if ( k >=    0x10 ) bw +=  4, k >>=  4; \
                    244:        if ( k >=     0x4 ) bw +=  2, k >>=  2; \
                    245:        if ( k >=     0x2 ) bw +=  1, k >>=  1; \
                    246:        if ( k != 0 ) bw++; \
                    247: }
                    248: #else
                    249: #define BitWidth(n,bw) {\
                    250:        int k = (n); \
                    251:        bw = 0; \
                    252:        if ( k >= 0x10000 ) bw += 16, k >>= 16; \
                    253:        if ( k >=   0x100 ) bw +=  8, k >>=  8; \
                    254:        if ( k >=    0x10 ) bw +=  4, k >>=  4; \
                    255:        if ( k >=     0x4 ) bw +=  2, k >>=  2; \
                    256:        if ( k >=     0x2 ) bw +=  1, k >>=  1; \
                    257:        if ( k != 0 ) bw++; \
                    258: }
                    259: #endif
                    260: #endif
                    261:
                    262: #include "igcdhack.c"
                    263:
                    264: /*
                    265:  *     Implementation of the binary GCD algorithm for two oN structs
                    266:  *     (big-integers) in risa.
                    267:  *
                    268:  *     The major operations in the following algorithms are the binary-shifts
                    269:  *     and the updates of (u, v) by (min(u,v), |u-v|), and are to be open-coded
                    270:  *     without using routines for oN structures just as in addn() or subn().
                    271:  */
                    272:
                    273: static int igcd_binary_2w( u, lu, v, lv, pans )
                    274: int *u, lu, *v, lv, *pans;
                    275:        /*  both u[0:lu-1] and v[0:lv-1] are assumed to be odd */
                    276: {
                    277:        int i, h1, l1, h2, l2;
                    278:
                    279:        l1 = u[0],  l2 = v[0];
                    280:        h1 = lu <= 1 ? 0 : u[1];
                    281:        h2 = lv <= 1 ? 0 : v[1];
                    282:        /**/
                    283: loop:  if ( h1 == 0 ) {
                    284:        no_hi1: if ( h2 == 0 ) goto one_word;
                    285:        no_hi1n:if ( l1 == 1 ) return 0;
                    286:                if ( (l2 -= l1) == 0 ) {
                    287:                        for ( l2 = h2; (l2&1) == 0; l2 >>= 1 ) ;
                    288:                        goto one_word;
                    289:                } else if ( l2 < 0 ) h2--, l2 += BASE27;
                    290:                i = 0;  do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
                    291:                l2 |= ((h2 << (BSH27 - i)) & BMASK27);
                    292:                h2 >>= i;
                    293:                goto no_hi1;
                    294:        } else if ( h2 == 0 ) {
                    295:        no_hi2: if ( l2 == 1 ) return 0;
                    296:                if ( (l1 -= l2) == 0 ) {
                    297:                        for ( l1 = h1; (l1&1) == 0; l1 >>= 1 ) ;
                    298:                        goto one_word;
                    299:                } else if ( l1 < 0 ) h1--, l1 += BASE27;
                    300:                i = 0;  do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
                    301:                l1 |= ((h1 << (BSH27 - i)) & BMASK27);
                    302:                if ( (h1 >>= i) == 0 ) goto one_word;
                    303:                goto no_hi2;
                    304:        } else if ( l1 == l2 ) {
                    305:                if ( h1 == h2 ) {
                    306:                        pans[0] = l1, pans[1] = h1;
                    307:                        return 2;
                    308:                } else if ( h1 > h2 ) {
                    309:                        for ( l1 = h1 - h2; (l1&1) == 0; l1 >>= 1 ) ;
                    310:                        goto no_hi1n;
                    311:                } else {
                    312:                        for ( l2 = h2 - h1; (l2&1) == 0; l2 >>= 1 ) ;
                    313:                        goto no_hi2;
                    314:                }
                    315:        } else if ( h1 == h2 ) {
                    316:                if ( l1 > l2 ) {
                    317:                        for ( l1 -= l2; (l1&1) == 0; l1 >>= 1 ) ;
                    318:                        goto no_hi1n;
                    319:                } else {
                    320:                        for ( l2 -= l1; (l2&1) == 0; l2 >>= 1 ) ;
                    321:                        goto no_hi2;
                    322:                }
                    323:        } else if ( h1 > h2 ) {
                    324:                h1 -= h2;
                    325:                if ( (l1 -= l2) < 0 ) h1--, l1 += BASE27;
                    326:                i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
                    327:                l1 |= ((h1 << (BSH27 - i)) & BMASK27);
                    328:                h1 >>= i;
                    329:        } else {
                    330:                h2 -= h1;
                    331:                if ( (l2 -= l1) < 0 ) h2--, l2 += BASE27;
                    332:                i = 0;  do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
                    333:                l2 |= ((h2 << (BSH27 - i)) & BMASK27);
                    334:                h2 >>= i;
                    335:        }
                    336:        goto loop;
                    337: one_word:
                    338:        if ( l1 == 1 || l2 == 1 ) return 0;
                    339:        else if ( l1 == l2 ) {
                    340:                pans[0] = l1;
                    341:                return 1;
                    342:        }
                    343: one_word_neq:
                    344:        if ( l1 > l2 ) {
                    345:                l1 -= l2;
                    346:                do { l1 >>= 1; } while ( (l1&1) == 0 );
                    347:                goto one_word;
                    348:        } else {
                    349:                l2 -= l1;
                    350:                do { l2 >>= 1; } while ( (l2&1) == 0 );
                    351:                goto one_word;
                    352:        }
                    353: }
                    354:
                    355: static N igcd_binary( n1, n2, nt )
                    356: N n1, n2, nt;
                    357:        /*  both n1 and n2 are assumed to be odd */
                    358: {
                    359:        int l1, *b1, l2, *b2, *bt = BD(nt);
                    360:        int l;
                    361:
                    362:        if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
                    363:        else if ( l < 0 ) { SWAP( n1, n2, N ); }
                    364:        IniDiv( n1, n2 );
                    365:        if ( UNIN(n2) ) return 0;
                    366:        l1 = PL(n1), b1 = BD(n1),  l2 = PL(n2), b2 = BD(n2);
                    367: loop:  if ( l1 <= 2 && l2 <= 2 ) {
                    368:                l = igcd_binary_2w( b1, l1, b2, l2, bt );
                    369:                if ( l == 0 ) return 0;
                    370:                PL(nt) = l;
                    371:                return nt;
                    372:        }
                    373:        /**/
                    374:        l = abs_U_V_maxrshift( b1, l1, b2, l2, bt );
                    375:        /**/
                    376:        if ( l == 0 ) {
                    377:                PL(n1) = l1;
                    378:                return n1;
                    379:        } else if ( l > 0 ) {
                    380:                l1 = l;
                    381:                SWAP( b1, bt, int * ); SWAP( n1, nt, N );
                    382:        } else {
                    383:                l2 = -l;
                    384:                SWAP( b2, bt, int * ); SWAP( n2, nt, N );
                    385:        }
                    386:        goto loop;
                    387: }
                    388:
                    389: #define RetTrueGCD(p,gw,gb,nr,l0) \
                    390:   if (p==0) { l0: if (gw==0&&gb==0) { *(nr)=ONEN; return; } else p=ONEN; } \
                    391:   *(nr) = N_of_n_lshifted_by_wb(p,gw,gb); \
                    392:   return;
                    393:
                    394: void gcdbinn( n1, n2, nr )
                    395: N n1, n2, *nr;
                    396: {
                    397:        int s1, s2, gw, gb, t1, t2;
                    398:        int w1, w2;
                    399:        N tn1, tn2, tnt, p;
                    400:
                    401:        if ( !n1 ) {
                    402:                *nr = n2;
                    403:                return;
                    404:        } else if ( !n2 ) {
                    405:                *nr = n1;
                    406:                return;
                    407:        }
                    408:        s1 = trailingzerosn( n1, &w1 );
                    409:        s2 = trailingzerosn( n2, &w2 );
                    410:        if ( w1 == w2 ) gw = w1,  gb = s1 <= s2 ? s1 : s2;
                    411:        else if ( w1 < w2 ) gw = w1,  gb = s1;
                    412:        else gw = w2,  gb = s2;
                    413:        /*
                    414:         *      true GCD must be multiplied by 2^{gw*BSH27+gb}.
                    415:         */
                    416:        t1 = PL(n1) - w1;
                    417:        t2 = PL(n2) - w2;
                    418:        if ( t1 < t2 ) t1 = t2;
                    419:        tn1 = W_NALLOC(t1);  tn2 = W_NALLOC(t1);  tnt = W_NALLOC(t1);
                    420:        rshiftn( n1, w1, s1, tn1 );
                    421:        rshiftn( n2, w2, s2, tn2 );
                    422:        p = igcd_binary( tn1, tn2, tnt );
                    423:        RetTrueGCD( p, gw, gb, nr, L0 )
                    424: }
                    425:
                    426:
                    427: /*
                    428:  *     The bmod gcd algorithm stated briefly in K.Weber's paper
                    429:  *     [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122].
                    430:  *     It replaces the subtraction (n1 - n2) in the binary algorithm
                    431:  *     by (n1 - S*n2) with such an S that (n1 - S*n2) \equiv 0 \bmod 2^BSH27,
                    432:  *     which should improve the efficiency when n1 \gg n2.
                    433:  */
                    434:
                    435: /* subsidiary routines */
                    436: #ifdef CALL
                    437: #ifndef DIVISION
                    438: static int u_div_v_mod_2tos( u, v, s )
                    439: int u, v, s;
                    440:        /*
                    441:         *    u/v mod 2^s.
                    442:         */
                    443: {
                    444:        int i,lsh_i, mask, m, two_to_s;
                    445:
                    446:        mask = (two_to_s = 1 << s) - 1;
                    447:        lsh_i = (sizeof(int) << 3) - 1;
                    448:        m = i = 0;
                    449:        u &= mask,  v &= mask;
                    450:        do {
                    451:                if ( u == 0 ) break;
                    452:                if ( (u << lsh_i) != 0 ) {
                    453:                        m += (1 << i);
                    454:                        u -= (v << i);
                    455:                        u &= mask;
                    456:                }
                    457:                lsh_i--;
                    458:        } while ( ++i != s );
                    459:        return m;
                    460: }
                    461: #else
                    462: static int u_div_v_mod_2tos( u, v, s )
                    463: int u, v, s;
                    464: {
                    465:        int f1 = 1 << s, f2 = u, q, r, c1 = 0, c2 = v, m;
                    466:
                    467:        m = f1 - 1;
                    468:        do {    q = f1 / f2;
                    469:                r = f1 - q*f2;  f1 = f2;  f2 = r;
                    470:                r = c1 - q*c2;  c1 = c2;  c2 = r;
                    471:        } while ( f2 != 1 );
                    472:        return( c2 & m );
                    473: }
                    474: #endif /* DIVISION */
                    475:
                    476: #define Comp_U_div_V_mod_BASE27(U,V,R) R = u_div_v_mod_2tos(U,V,BSH27)
                    477: #else
                    478: #ifndef DIVISION
                    479: #define Comp_U_div_V_mod_BASE27(U,V,R) {\
                    480:        int u = (U), v = (V), i, lsh; \
                    481:        /* U and V are assumed to be odd */ \
                    482:        i = R = 1, lsh = (sizeof(int) << 3) - 2;  u = (u - v) & BMASK27; \
                    483:        do {    if ( u == 0 ) break; \
                    484:                if ( (u << lsh) != 0 ) R += (1 << i),  u = (u - (v << i)) & BMASK27; \
                    485:                i++, lsh--; \
                    486:        } while ( i < BSH27 ); \
                    487: }
                    488: #else
                    489: #define Comp_U_div_V_mod_BASE27(U,V,R) {\
                    490:        int f1 = BASE27, f2 = (V), q, r, c1 = 0, c2 = (U); \
                    491:        do {    q = f1 / f2; \
                    492:                r = f1 - q*f2;  f1 = f2;  f2 = r; \
                    493:                r = c1 - q*c2;  c1 = c2;  c2 = r; \
                    494:        } while ( f2 != 1 ); \
                    495:        R = c2 & BMASK27; \
                    496: }
                    497: #endif /* DIVISION */
                    498: #endif
                    499:
                    500:
                    501: static int bmod_n( nu, nv, na )
                    502: N nu, nv, na;
                    503:        /*
                    504:         *      Computes (u[] \bmod v[]) >> (as much as possible) in r[].
                    505:         */
                    506: {
                    507:        int *u = BD(nu), lu = PL(nu), *v = BD(nv), lv = PL(nv),
                    508:                     *r = BD(na);
                    509:        int *p, a, t, l, z, v0, vh, bv, v0r;
                    510:
                    511:        v0 = v[0];
                    512:        if ( lv == 1 ) {
                    513:                if ( lu == 1 ) a = u[0] % v0;
                    514:                else {
                    515:                        p = &u[--lu];
                    516:                        a = (*p) % v0, t = BASE27 % v0;
                    517:                        for ( ; --lu >= 0; a = l ) {
                    518:                                --p;
                    519:                                DMAR(a,t,*p,v0,l)
                    520:                                /*  l <= (a*t + p[0])%v0   */
                    521:                        }
                    522:                }
                    523:                if ( a == 0 ) return 0;
                    524:                while ( (a&1) == 0 ) a >>= 1;
                    525:                *r = a;
                    526:                return( PL(na) = 1 );
                    527:        }
                    528:        Comp_U_div_V_mod_BASE27( 1, v0, v0r );
                    529:        vh = v[lv -1];
                    530:        BitWidth( vh, bv );
                    531:        bv--;
                    532:        t = 1 << bv;
                    533:        l = lv + 1;
                    534:        for ( z = -1; lu > l || lu == l && u[lu-1] >= t; z = -z ) {
                    535:                a = (v0r*u[0])&BMASK27;
                    536:                /**/
                    537:                lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r );
                    538:                /**/
                    539:                if ( lu == 0 ) return 0;
                    540:                p = r;
                    541:                r = u;
                    542:                u = p;
                    543:        }
                    544:        if ( lu < lv ) goto ret;
                    545:        t = u[lu-1];
                    546:        if ( lu > lv ) l = BSH27;
                    547:        else if ( t < vh ) goto ret;
                    548:        else l = 0;
                    549:        BitWidth( t, a );
                    550:        l += (a - bv);
                    551:        a = (v0r*u[0])&(BMASK27 >> (BSH27 - l));
                    552:        /**/
                    553:        lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r );
                    554:        /**/
                    555:        if ( lu == 0 ) return 0;
                    556:        z = -z;
                    557: ret:   if ( z > 0 ) return( PL(na) = lu );
                    558:        PL(nu) = lu;
                    559:        return( -lu );
                    560: }
                    561:
                    562:
                    563: static N igcd_bmod( n1, n2, nt )
                    564: N n1, n2, nt;
                    565:        /*  both n1 and n2 are assumed to be odd */
                    566: {
                    567:        int l1, l2;
                    568:        int l;
                    569:
                    570:        if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
                    571:        else if ( l < 0 ) { SWAP( n1, n2, N ); }
                    572:        IniDiv( n1, n2 );
                    573:        if ( UNIN(n2) ) return 0;
                    574: loop:  if ( (l1 = PL(n1)) <= 2 && (l2 = PL(n2)) <= 2 ) {
                    575:                l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) );
                    576:                if ( l == 0 ) return 0;
                    577:                PL(nt) = l;
                    578:                return nt;
                    579:        }
                    580:        /**/
                    581:        l = bmod_n( n1, n2, nt );
                    582:        /**/
                    583:        if ( l == 0 ) return n2;
                    584:        else if ( l > 0 ) {
                    585:                N tmp = n1;
                    586:
                    587:                n1 = n2;
                    588:                n2 = nt;
                    589:                nt = tmp;
                    590:        } else SWAP( n1, n2, N );
                    591:        goto loop;
                    592: }
                    593:
                    594: void gcdbmodn( n1, n2, nr )
                    595: N n1, n2, *nr;
                    596: {
                    597:        int s1, s2, gw, gb, t1, t2;
                    598:        int w1, w2;
                    599:        N tn1, tn2, tnt, p;
                    600:
                    601:        if ( !n1 ) {
                    602:                *nr = n2;
                    603:                return;
                    604:        } else if ( !n2 ) {
                    605:                *nr = n1;
                    606:                return;
                    607:        }
                    608:        s1 = trailingzerosn( n1, &w1 );
                    609:        s2 = trailingzerosn( n2, &w2 );
                    610:        if ( w1 == w2 ) gw = w1,  gb = s1 <= s2 ? s1 : s2;
                    611:        else if ( w1 < w2 ) gw = w1,  gb = s1;
                    612:        else gw = w2,  gb = s2;
                    613:        /*
                    614:         *      true GCD must be multiplied by 2^{gw*BSH27+gs}.
                    615:         */
                    616:        t1 = PL(n1) - w1;
                    617:        t2 = PL(n2) - w2;
                    618:        if ( t1 < t2 ) t1 = t2;
                    619:        tn1 = W_NALLOC(t1);  tn2 = W_NALLOC(t1);  tnt = W_NALLOC(t1);
                    620:        rshiftn( n1, w1, s1, tn1 );
                    621:        rshiftn( n2, w2, s2, tn2 );
                    622:        p = igcd_bmod( tn1, tn2, tnt );
                    623:        RetTrueGCD( p, gw, gb, nr, L0 )
                    624: }
                    625:
                    626: /*
                    627:  *     The accelerated integer GCD algorithm by K.Weber
                    628:  *     [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122]:
                    629:  */
                    630:
                    631: static int ReducedRatMod( x, y, pn, pd )
                    632: N x, y;
                    633: int *pn, *pd;
                    634:        /*
                    635:         *    Let m = 2^{2*BSH27} = 2*BASE27.  We assume x, y > 0 and \gcd(x,m)
                    636:         *    = \gcd(y,m) = 1.  This routine computes n and d (resp. returned
                    637:         *    in *pn and *pd) such that 0 < n, |d| < \sqrt{m} and
                    638:         *    n*y \equiv x*d \bmod m.
                    639:         */
                    640: {
                    641:        int n1h, n1l, d1h, d1l, n2h, n2l, d2h, d2l;
                    642:        int s1, s2, l1, l2;
                    643:
                    644:
                    645:        {
                    646:                int xh, xl, yh, yl, tl, i, lsh_i;
                    647:                int th;
                    648:
                    649:                xl = BD(x)[0];
                    650:                xh = PL(x) > 1 ? BD(x)[1] : 0;
                    651:                yl = BD(y)[0];
                    652:                yh = PL(y) > 1 ? BD(y)[1] : 0;
                    653:                Comp_U_div_V_mod_BASE27( xl, yl, n2l );
                    654:                DM27(n2l,yl,th,tl)  /* n2l*yl = tl+th*BASE27, where tl==xl. */;
                    655:                if ( xh > th ) xh -= th;
                    656:                else xh += (BASE27 - th);
                    657:                DM27(n2l,yh,th,tl)  /* n2l*yh = tl+th*BASE27. */;
                    658:                if ( xh > tl ) xh -= tl;
                    659:                else xh += (BASE27 - tl);
                    660:                n2h = i = 0,  lsh_i = 31;
                    661:                do {
                    662:                        if ( xh == 0 ) break;
                    663:                        if ( (xh << lsh_i) != 0 ) {
                    664:                                n2h += (1 << i);
                    665:                                tl = (yl << i)&BMASK27;
                    666:                                if ( xh > tl ) xh -= tl;
                    667:                                else xh += (BASE27 - tl);
                    668:                        }
                    669:                        lsh_i--;
                    670:                } while ( ++i != BSH27 );
                    671:        }
                    672:        /*
                    673:         *      n2l + n2h*BASE27 = x/y mod 2^{2*BSH27}.
                    674:         */
                    675:        n1h = BASE27, n1l = 0, l1 = BSH27,
                    676:        d1h = d1l = 0, s1 = 0,
                    677:        d2h = 0, d2l = 1, s2 = 1;
                    678:        /**/
                    679:        while ( n2h != 0 ) {
                    680:                int i, ir, th, tl;
                    681:
                    682:                BitWidth( n2h, l2 );
                    683:                ir = BSH27 - (i = l1 - l2);
                    684:                do {
                    685:                        if ( i == 0 ) th = n2h, tl = n2l;
                    686:                        else
                    687:                                th = (n2h << i) | (n2l >> ir),
                    688:                                tl = (n2l << i) & BMASK27;
                    689:                        if ( th > n1h || (th == n1h && tl > n1l) ) goto next_i;
                    690:                        if ( tl <= n1l ) n1l -= tl;
                    691:                        else n1l += (BASE27 - tl), n1h--;
                    692:                        n1h -= th;
                    693:                        /*  (s1:d1h,d1l) -= ((s2:d2h,d2l) << i);  */
                    694:                        if ( s2 != 0 ) {
                    695:                                if ( i == 0 ) th = d2h, tl = d2l;
                    696:                                else
                    697:                                        th = (d2h << i)&BMASK27 | (d2l >> ir),
                    698:                                        tl = (d2l << i)&BMASK27;
                    699:                                if ( s1 == 0 )
                    700:                                        s1 = -s2, d1h = th, d1l = tl;
                    701:                                else if ( s1 != s2 ) {
                    702:                                        tl += d1l;
                    703:                                        d1l = tl&BMASK27;
                    704:                                        d1h = (th + (tl >> BSH27))&BMASK27;
                    705:                                        if ( d1h == 0 && d1l == 0 ) s1 = 0;
                    706:                                } else if ( d1h > th ) {
                    707:                                        if ( d1l >= tl ) d1l -= tl;
                    708:                                        else d1l += (BASE27 - tl), d1h--;
                    709:                                        d1h -= th;
                    710:                                } else if ( d1h == th ) {
                    711:                                        d1h = 0;
                    712:                                        if ( d1l == tl ) s1 = d2h = 0;
                    713:                                        else if ( d1l > tl ) d1l -= tl;
                    714:                                        else d1l = tl - d1l, s1 = -s1;
                    715:                                } else {
                    716:                                        if ( tl >= d1l ) d1l = tl - d1l;
                    717:                                        else d1l = tl + (BASE27 - d1l), th--;
                    718:                                        d1h = th - d1h;
                    719:                                        s1 = -s1;
                    720:                                }
                    721:                        }
                    722:                next_i: i--, ir++;
                    723:                } while ( n1h > n2h || (n1h == n2h && n1l >= n2l) );
                    724:                /*  swap 1 and 2  */
                    725:                th = n1h, tl = n1l;
                    726:                n1h = n2h, n1l = n2l;
                    727:                n2h = th, n2l = tl;
                    728:                l1 = l2;
                    729:                th = d1h, tl = d1l, i = s1;
                    730:                d1h = d2h, d1l = d2l, s1 = s2;
                    731:                d2h = th, d2l = tl, s2 = i;
                    732:        }
                    733:        /**/
                    734:        *pn = n2l,  *pd = d2l;
                    735:        return s2;
                    736: }
                    737:
                    738: static int igcd_spurious_factor;
                    739:
                    740: #define SaveN(s,d) {\
                    741:        int i, l; \
                    742:        for ( l = PL(d) = PL(s), i = 0; i < l; i++ ) BD(d)[i] = BD(s)[i]; \
                    743: }
                    744:
                    745: static N igcd_acc( n1, n2, nt )
                    746: N n1, n2, nt;
                    747:        /*  both n1 and n2 are assumed to be odd */
                    748: {
                    749:        int l1, l2, *b1, *b2, bw1, bw2;
                    750:        int l;
                    751:        int n, d;
                    752:        N p, s1, s2;
                    753:
                    754:        if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
                    755:        else if ( l < 0 ) { SWAP( n1, n2, N ); }
                    756:        if ( ShouldCompRemInit(n1,n2) ) {
                    757:                int w, b;
                    758:
                    759:                divn_27( n1, n2, &s1, &s2 );
                    760:                if ( !s2 ) return n2;
                    761:                b = trailingzerosn( s2, &w );
                    762:                p = n1;  n1 = n2;  n2 = p;
                    763:                rshiftn( s2, w, b, n2 );
                    764:                if ( UNIN(n2) ) return 0;
                    765:                l1 = PL(n1);
                    766:                if ( !s1 || PL(s1) < l1 ) s1 = NALLOC(l1);
                    767:        } else if ( UNIN(n2) ) return 0;
                    768:        else {
                    769:                s1 = NALLOC(PL(n1));
                    770:                s2 = NALLOC(PL(n2));
                    771:        }
                    772:        SaveN( n1, s1 );
                    773:        SaveN( n2, s2 );
                    774:        igcd_spurious_factor = 0;
                    775: loop:  l1 = PL(n1), l2 = PL(n2);
                    776:        if ( l1 <= 2 && l2 <= 2 ) {
                    777:                l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) );
                    778:                if ( l == 0 ) return 0;
                    779:                PL(nt) = l;
                    780:                SWAP( n2, nt, N );
                    781:                goto ret;
                    782:        }
                    783:        /**/
                    784:        b1 = BD(n1), b2 = BD(n2);
                    785:        BitWidth( b1[l1 -1], bw1 );
                    786:        BitWidth( b2[l2 -1], bw2 );
                    787:        if ( (l1*BSH27 + bw1) - (l2*BSH27 + bw2) <= igcdacc_thre ) {
                    788:                l = ReducedRatMod( n1, n2, &n, &d );
                    789:                l = l < 0 ? aUplusbV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) ) :
                    790:                    abs_axU_bxV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) );
                    791:                igcd_spurious_factor++;
                    792:                if ( l == 0 ) goto ret;
                    793:                PL(nt) = l;
                    794:        } else {
                    795:                l = bmod_n( n1, n2, nt );
                    796:                if ( l == 0 ) goto ret;
                    797:                else if ( l < 0 ) {
                    798:                        SWAP( n1, n2, N );
                    799:                        goto loop;
                    800:                }
                    801:        }
                    802:        p = n1;
                    803:        n1 = n2;
                    804:        n2 = nt;
                    805:        nt = p;
                    806:        goto loop;
                    807:        /**/
                    808: ret:   if ( igcd_spurious_factor != 0 && !UNIN(n2) ) {
                    809:                if ( (p = igcd_bmod( n2, s1, n1 )) == 0 ) return 0;
                    810:                if ( (p = igcd_bmod( p, s2, nt )) == 0 ) return 0;
                    811:                return p;
                    812:        } else return n2;
                    813: }
                    814:
                    815: void gcdaccn( n1, n2, nr )
                    816: N n1, n2, *nr;
                    817: {
                    818:        int s1, s2, gw, gb, t1, t2;
                    819:        int w1, w2;
                    820:        N tn1, tn2, tnt, p;
                    821:
                    822:        if ( !n1 ) {
                    823:                *nr = n2;
                    824:                return;
                    825:        } else if ( !n2 ) {
                    826:                *nr = n1;
                    827:                return;
                    828:        }
                    829:        s1 = trailingzerosn( n1, &w1 );
                    830:        s2 = trailingzerosn( n2, &w2 );
                    831:        if ( w1 == w2 ) gw = w1,  gb = s1 <= s2 ? s1 : s2;
                    832:        else if ( w1 < w2 ) gw = w1,  gb = s1;
                    833:        else gw = w2,  gb = s2;
                    834:        /*
                    835:         *      true GCD must be multiplied by 2^{gw*BSH27+gs}.
                    836:         */
                    837:        t1 = PL(n1) - w1;
                    838:        t2 = PL(n2) - w2;
                    839:        if ( t1 < t2 ) t1 = t2;
                    840:        tn1 = W_NALLOC(t1);  tn2 = W_NALLOC(t1);  tnt = W_NALLOC(t1);
                    841:        rshiftn( n1, w1, s1, tn1 );
                    842:        rshiftn( n2, w2, s2, tn2 );
                    843:        /**/
                    844:        p = igcd_acc( tn1, tn2, tnt );
                    845:        /**/
                    846:        if ( p == 0 ) goto L0;
                    847:        RetTrueGCD( p, gw, gb, nr, L0 )
                    848: }
                    849:
                    850:
                    851: /********************************/
                    852:
                    853: void gcdBinary_27n( n1, n2, nr )
                    854: N n1, n2, *nr;
                    855: {
                    856:        int b1, b2, w1, w2, gw, gb;
                    857:        int l1, l2;
                    858:        N tn1, tn2, tnt, a;
                    859:
                    860:        if ( !n1 ) {
                    861:                *nr = n2; return;
                    862:        } else if ( !n2 ) {
                    863:                *nr = n1; return;
                    864:        }
                    865:        b1 = trailingzerosn( n1, &w1 );
                    866:        b2 = trailingzerosn( n2, &w2 );
                    867:        if ( w1 == w2 ) gw = w1, gb = b1 <= b2 ? b1 : b2;
                    868:        else if ( w1 < w2 ) gw = w1, gb = b1;
                    869:        else gw = w2, gb = b2;
                    870:        /*
                    871:         *      true GCD must be multiplied by 2^{gw*BSH27+gb}.
                    872:         */
                    873:        l1 = PL(n1) - w1;
                    874:        l2 = PL(n2) - w2;
                    875:        if ( l1 < l2 ) l1 = l2;
                    876:        tn1 = W_NALLOC( l1 );  tn2 = W_NALLOC( l1 );  tnt = W_NALLOC( l1 );
                    877:        rshiftn( n1, w1, b1, tn1 );
                    878:        rshiftn( n2, w2, b2, tn2 );
                    879:        /**/
                    880:        if ( igcd_algorithm == 1 ) {
                    881:                a = igcd_binary( tn1, tn2, tnt );
                    882:        } else if ( igcd_algorithm == 2 ) {
                    883:                a = igcd_bmod( tn1, tn2, tnt );
                    884:        } else {
                    885:                a = igcd_acc( tn1, tn2, tnt, -igcd_algorithm );
                    886:                if ( igcd_spurious_factor != 0 ) {
                    887:                }
                    888:        }
                    889:        RetTrueGCD( a, gw, gb, nr, L0 )
                    890: }
                    891:
                    892: /**************************/
                    893: N maxrshn( n, p )
                    894: N n;
                    895: int *p;
                    896: {
                    897:        int nw, nb, c, l;
                    898:        N new;
                    899:
                    900:        nb = trailingzerosn( n, &nw );
                    901:        l = PL(n);
                    902:        c = BD(n)[l -1];
                    903:        l -= nw;
                    904:        if ( (c >> nb) == 0 ) l--;
                    905:        new = NALLOC(l);
                    906:        rshiftn( n, nw, nb, new );
                    907:        *p = nb + nw*BSH27;
                    908:        return new;
                    909: }
                    910: #endif /* HMEXT */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>