/* * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED * All rights reserved. * * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited, * non-exclusive and royalty-free license to use, copy, modify and * redistribute, solely for non-commercial and non-profit purposes, the * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and * conditions of this Agreement. For the avoidance of doubt, you acquire * only a limited right to use the SOFTWARE hereunder, and FLL or any * third party developer retains all rights, including but not limited to * copyrights, in and to the SOFTWARE. * * (1) FLL does not grant you a license in any way for commercial * purposes. You may use the SOFTWARE only for non-commercial and * non-profit purposes only, such as academic, research and internal * business use. * (2) The SOFTWARE is protected by the Copyright Law of Japan and * international copyright treaties. If you make copies of the SOFTWARE, * with or without modification, as permitted hereunder, you shall affix * to all such copies of the SOFTWARE the above copyright notice. * (3) An explicit reference to this SOFTWARE and its copyright owner * shall be made on your publication or presentation in any form of the * results obtained by use of the SOFTWARE. * (4) In the event that you modify the SOFTWARE, you shall notify FLL by * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification * for such modification or the source code of the modified part of the * SOFTWARE. * * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES' * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY. * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT, * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE, * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE. * */ /* #include "ca.h" #include "base.h" */ #undef CALL /**** Machine specific ****/ #define BIT_WIDTH_OF_INT 32 #if BSH == BIT_WIDTH_OF_INT #define BMask ((unsigned)(-1)) #define BMASK BMask #endif int igcd_algorithm = 0; /* == 0 : Euclid, * == 1 : binary, * == 2 : bmod, * >= 3 : (Weber's accelerated)/(Jebelean's generalized binary) algorithm, */ int igcd_thre_inidiv = 50; /* * In the non-Euclidean algorithms, if the ratio of the lengths (number * of words) of two integers is >= igcd_thre_inidiv, we first perform * remainder calculation. * If == 0, this remainder calculation is not performed. */ int igcdacc_thre = 10; /* * In the accelerated algorithm, if the bit-lengths of two integers is * > igcdacc_thre, "bmod" reduction is done. */ #include "inline.h" #define TRAILINGZEROS(t,cntr) for(cntr=0;(t&1)==0;t>>=1)cntr++; #define W_NALLOC(d) ((N)ALLOCA(TRUESIZE(oN,(d)-1,int))) #define ShouldCompRemInit(n1,n2) (igcd_thre_inidiv != 0 && PL(n1) >= igcd_thre_inidiv*PL(n2)) #define IniDiv(n1,n2) \ if ( ShouldCompRemInit(n1,n2) ) {\ N q, r; int w, b; \ divn(n1,n2,&q,&r); \ if ( !r ) return(n2); \ b = trailingzerosn( r, &w ); \ q = n1; n1 = n2; n2 = q; \ rshiftn( r, w, b, n2 ); \ } /* * Binary GCD algorithm by J.Stein * [J. Comp. Phys. Vol. 1 (1967), pp. 397-405)]: * The right-shift binary algorithm is used. */ /* * subsidiary routines for gcdbinn below. */ static int /* number of bits */ trailingzeros_nbd( /* BD of N */ nbd, pnw ) int *nbd, *pnw /* number of zero words */; { #if BSH == BIT_WIDTH_OF_INT unsigned #endif int nw, nb, w; for ( nw = 0; (w = *nbd) == 0; nbd++ ) nw++; TRAILINGZEROS(w,nb); *pnw = nw; return nb; } #define trailingzerosn(n,pnw) trailingzeros_nbd(BD(n),pnw) static int /* PL of N */ rshift_nbd( /* BD of N */ nbd, /* PL of N */ nl, /* # words */ shw, /* # bits */ shb, /* BD of N */ p ) #if BSH == BIT_WIDTH_OF_INT unsigned #endif int *nbd, nl, shw, shb, *p; { unsigned int i, v, w, lshb; /* <---- */ nbd += shw, i = (nl -= shw); if ( shb == 0 ) { for ( ; nl > 0; nl-- ) *p++ = *nbd++; return i; } else if ( nl < 2 ) { *p = (*nbd) >> shb; return 1; } for ( lshb = BSH - shb, v = *nbd++; --nl > 0; v = w ) { w = *nbd++; #if BSH == BIT_WIDTH_OF_INT *p++ = (v >> shb) | (w << lshb); /********/ #else *p++ = (v >> shb) | ((w << lshb)&BMASK); #endif } if ( (v >>= shb) == 0 ) return( i-1 ); *p = v; return i; } #define rshiftn(ns,shw,shb,nd) (PL(nd)=rshift_nbd(BD(ns),PL(ns),shw,shb,BD(nd))) /* nd <= ns << (shb + shw*BSH), returns PL of the result */ #ifdef FULLSET static N N_of_i_lshifted_by_wb( i, gw, gb ) int i, gw, gb; /* * returns pointer to a new struct (N)(((int)i) >> (gb + gw*BSH)) */ { unsigned int j, l, *p; /* <---- */ N n; j = i >> (BSH - gb); #if BSH == BIT_WIDTH_OF_INT i = (i << gb); /********/ #else i = (i << gb)&BMASK; #endif l = j != 0 ? gw + 2 : gw + 1; n = NALLOC(l); PL(n) = l; for ( p = BD(n); gw-- > 0; ) *p++ = 0; *p++ = i; if ( j != 0 ) *p = j; return n; } #endif /* FULLSET */ /* * routines to make a new struct * (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH)) */ static N N_of_nbd_lshifted_by_wb( /* BD of N */ b, /* PL of N */ lb, gw, gb ) int *b, lb, gw, gb; /* * returns pointer to a new struct * (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH)) */ { unsigned int rsh, s, t, *p, l; /* <---- */ N n; l = lb + gw; if ( gb == 0 ) { n = NALLOC(l); PL(n) = l; for ( p = BD(n); gw-- > 0; ) *p++ = 0; while ( lb-- > 0 ) *p++ = *b++; return n; } rsh = BSH - gb; s = b[lb-1]; if ( (t = s >> rsh) != 0 ) { n = NALLOC(l+1); PL(n) = l+1; (p = BD(n))[l] = t; } else { n = NALLOC(l); PL(n) = l; p = BD(n); } while ( gw-- > 0 ) *p++ = 0; #if BSH == BIT_WIDTH_OF_INT *p++ = (t = *b++) << gb; /********/ for ( ; --lb > 0; t = s ) *p++ = (t >> rsh) | ((s = *b++) << gb); /********/ #else *p++ = ((t = *b++) << gb)&BMASK; for ( ; --lb > 0; t = s ) *p++ = (t >> rsh) | (((s = *b++) << gb)&BMASK); #endif return n; } #define N_of_n_lshifted_by_wb(a,gw,gb) N_of_nbd_lshifted_by_wb(BD(a),PL(a),gw,gb) #define SWAP(a,b,Type) { Type temp=a;a=b;b=temp;} #define SIGNED_VAL(a,s) ((s)>0?(a):-(a)) #ifdef CALL static int bw_int32( n ) unsigned int n; /* <---- */ { int w; w = 0; #if BSH > 32 if ( n > 0xffffffff ) w += 32, n >>= 32; #endif if ( n >= 0x10000 ) w += 16, n >>= 16; if ( n >= 0x100 ) w += 8, n >>= 8; if ( n >= 0x10 ) w += 4, n >>= 4; if ( n >= 0x4 ) w += 2, n >>= 2; if ( n >= 0x2 ) w += 1, n >>= 1; if ( n != 0 ) ++w; return w; } #define BitWidth(n,bw) bw = bw_int32( n ) #else #if BSH > 32 #define BitWidth(n,bw) {\ unsigned int k = (n); \ bw = 0; \ if ( k > 0xffffffff ) bw += 32, k >>= 32; \ if ( k >= 0x10000 ) bw += 16, k >>= 16; \ if ( k >= 0x100 ) bw += 8, k >>= 8; \ if ( k >= 0x10 ) bw += 4, k >>= 4; \ if ( k >= 0x4 ) bw += 2, k >>= 2; \ if ( k >= 0x2 ) bw += 1, k >>= 1; \ if ( k != 0 ) bw++; \ } #else #define BitWidth(n,bw) {\ unsigned int k = (n); \ bw = 0; \ if ( k >= 0x10000 ) bw += 16, k >>= 16; \ if ( k >= 0x100 ) bw += 8, k >>= 8; \ if ( k >= 0x10 ) bw += 4, k >>= 4; \ if ( k >= 0x4 ) bw += 2, k >>= 2; \ if ( k >= 0x2 ) bw += 1, k >>= 1; \ if ( k != 0 ) bw++; \ } #endif #endif #include "igcdhack.c" /* * Implementation of the binary GCD algorithm for two oN structs * (big-integers) in risa. * * The major operations in the following algorithms are the binary-shifts * and the updates of (u, v) by (min(u,v), |u-v|), and are to be open-coded * without using routines for oN structures just as in addn() or subn(). */ static int igcd_binary_2w( u, lu, v, lv, pans ) #if BSH == BIT_WIDTH_OF_INT unsigned #endif int *u, lu, *v, lv, *pans; /* <---- */ /* both u[0:lu-1] and v[0:lv-1] are assumed to be odd */ { #if BSH == BIT_WIDTH_OF_INT unsigned #endif int i, h1, l1, h2, l2; /* <---- */ l1 = u[0], l2 = v[0]; h1 = lu <= 1 ? 0 : u[1]; h2 = lv <= 1 ? 0 : v[1]; /**/ loop: if ( h1 == 0 ) { no_hi1: if ( h2 == 0 ) goto one_word; no_hi1n:if ( l1 == 1 ) return 0; #if BSH == BIT_WIDTH_OF_INT if ( l2 == l1 ) { for ( l2 = h2; (l2&1) == 0; l2 >>= 1 ) ; goto one_word; } else if ( l2 < l1 ) { l2 -= l1, h2--; } else l2 -= l1; i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 ); l2 |= (h2 << (BSH - i)); #else if ( (l2 -= l1) == 0 ) { for ( l2 = h2; (l2&1) == 0; l2 >>= 1 ) ; goto one_word; } else if ( l2 < 0 ) h2--, l2 += BASE; i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 ); l2 |= ((h2 << (BSH - i)) & BMASK); #endif h2 >>= i; goto no_hi1; } else if ( h2 == 0 ) { no_hi2: if ( l2 == 1 ) return 0; #if BSH == BIT_WIDTH_OF_INT if ( l1 == l2 ) { for ( l1 = h1; (l1&1) == 0; l1 >>= 1 ) ; goto one_word; } else if ( l1 < l2 ) { l1 -= l2, h1--; } else l1 -= l2; i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 ); l1 |= (h1 << (BSH - i)); #else if ( (l1 -= l2) == 0 ) { for ( l1 = h1; (l1&1) == 0; l1 >>= 1 ) ; goto one_word; } else if ( l1 < 0 ) h1--, l1 += BASE; i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 ); l1 |= ((h1 << (BSH - i)) & BMASK); #endif if ( (h1 >>= i) == 0 ) goto one_word; goto no_hi2; } else if ( l1 == l2 ) { if ( h1 == h2 ) { pans[0] = l1, pans[1] = h1; return 2; } else if ( h1 > h2 ) { for ( l1 = h1 - h2; (l1&1) == 0; l1 >>= 1 ) ; goto no_hi1n; } else { for ( l2 = h2 - h1; (l2&1) == 0; l2 >>= 1 ) ; goto no_hi2; } } else if ( h1 == h2 ) { if ( l1 > l2 ) { for ( l1 -= l2; (l1&1) == 0; l1 >>= 1 ) ; goto no_hi1n; } else { for ( l2 -= l1; (l2&1) == 0; l2 >>= 1 ) ; goto no_hi2; } } else if ( h1 > h2 ) { h1 -= h2; #if BSH == BIT_WIDTH_OF_INT if ( l1 < l2 ) l1 -= l2, h1--; else l1 -= l2; i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 ); l1 |= (h1 << (BSH - i)); #else if ( (l1 -= l2) < 0 ) h1--, l1 += BASE; i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 ); l1 |= ((h1 << (BSH - i)) & BMASK); #endif h1 >>= i; } else { h2 -= h1; #if BSH == BIT_WIDTH_OF_INT if ( l2 < l1 ) l2 -= l1, h2--; else l2 -= l1; i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 ); l2 |= (h2 << (BSH - i)); #else if ( (l2 -= l1) < 0 ) h2--, l2 += BASE; i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 ); l2 |= ((h2 << (BSH - i)) & BMASK); #endif h2 >>= i; } goto loop; one_word: if ( l1 == 1 || l2 == 1 ) return 0; else if ( l1 == l2 ) { pans[0] = l1; return 1; } one_word_neq: if ( l1 > l2 ) { l1 -= l2; do { l1 >>= 1; } while ( (l1&1) == 0 ); goto one_word; } else { l2 -= l1; do { l2 >>= 1; } while ( (l2&1) == 0 ); goto one_word; } } static N igcd_binary( n1, n2, nt ) N n1, n2, nt; /* both n1 and n2 are assumed to be odd */ { int l1, *b1, l2, *b2, *bt = BD(nt); int l; if ( (l = cmpn( n1, n2 )) == 0 ) return n1; else if ( l < 0 ) { SWAP( n1, n2, N ); } IniDiv( n1, n2 ); if ( UNIN(n2) ) return 0; l1 = PL(n1), b1 = BD(n1), l2 = PL(n2), b2 = BD(n2); loop: #if 0000 { int i, b, w; printf( "===============\n" ); for ( i = 0; i < l1; i++ ) printf( "0x%08x ", b1[i] ); printf( "\n" ); for ( i = 0; i < l2; i++ ) printf( "0x%08x ", b2[i] ); printf( "\n" ); } #endif if ( l1 <= 2 && l2 <= 2 ) { l = igcd_binary_2w( b1, l1, b2, l2, bt ); if ( l == 0 ) return 0; PL(nt) = l; return nt; } /**/ l = abs_U_V_maxrshift( b1, l1, b2, l2, bt ); /**/ if ( l == 0 ) { PL(n1) = l1; return n1; } else if ( l > 0 ) { l1 = l; SWAP( b1, bt, int * ); SWAP( n1, nt, N ); } else { l2 = -l; SWAP( b2, bt, int * ); SWAP( n2, nt, N ); } goto loop; } #define RetTrueGCD(p,gw,gb,nr,l0) \ if (p==0) { l0: if (gw==0&&gb==0) { *(nr)=ONEN; return; } else p=ONEN; } \ *(nr) = N_of_n_lshifted_by_wb(p,gw,gb); \ return; void gcdbinn( n1, n2, nr ) N n1, n2, *nr; { int s1, s2, gw, gb, t1, t2; int w1, w2; N tn1, tn2, tnt, p; if ( !n1 ) { *nr = n2; return; } else if ( !n2 ) { *nr = n1; return; } s1 = trailingzerosn( n1, &w1 ); s2 = trailingzerosn( n2, &w2 ); if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2; else if ( w1 < w2 ) gw = w1, gb = s1; else gw = w2, gb = s2; /* * true GCD must be multiplied by 2^{gw*BSH+gb}. */ t1 = PL(n1) - w1; t2 = PL(n2) - w2; if ( t1 < t2 ) t1 = t2; tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1); rshiftn( n1, w1, s1, tn1 ); rshiftn( n2, w2, s2, tn2 ); p = igcd_binary( tn1, tn2, tnt ); RetTrueGCD( p, gw, gb, nr, L0 ) } /* * The bmod gcd algorithm stated briefly in K.Weber's paper * [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122]. * It replaces the subtraction (n1 - n2) in the binary algorithm * by (n1 - S*n2) with such an S that (n1 - S*n2) \equiv 0 \bmod 2^BSH, * which should improve the efficiency when n1 \gg n2. */ /* subsidiary routines */ #if BSH == BIT_WIDTH_OF_INT #ifdef CALL static int u_div_v_mod_2toBSH( u, v ) unsigned int u, v; /* * u/v mod 2^BSH. */ { unsigned int i, lsh_i, m; lsh_i = (sizeof(int) << 3) - 1; m = i = 0; do { if ( u == 0 ) break; if ( (u << lsh_i) != 0 ) { m += (1 << i); u -= (v << i); } lsh_i--; } while ( ++i != BSH ); return m; } #define Comp_U_div_V_mod_BASE(U,V,R) R = u_div_v_mod_2toBSH(U,V) #else /* CALL */ #define Comp_U_div_V_mod_BASE(U,V,R) {\ unsigned int u = (U), v = (V), i, lsh; \ /* U and V are assumed to be odd */ \ i = R = 1, lsh = (sizeof(int) << 3) - 2; u = (u - v); \ do { if ( u == 0 ) break; \ if ( (u << lsh) != 0 ) R += (1 << i), u = (u - (v << i)); \ i++, lsh--; \ } while ( i < BSH ); \ } #endif /* CALL */ #else #ifdef CALL static int u_div_v_mod_2tos( u, v, s ) int u, v, s; /* * u/v mod 2^s. */ { int i, lsh_i, mask, m; mask = (1 << s) - 1; lsh_i = (sizeof(int) << 3) - 1; m = i = 0; u &= mask, v &= mask; do { if ( u == 0 ) break; if ( (u << lsh_i) != 0 ) { m += (1 << i); u -= (v << i); u &= mask; } lsh_i--; } while ( ++i != s ); return m; } #define Comp_U_div_V_mod_BASE(U,V,R) R = u_div_v_mod_2tos(U,V,BSH) #else #define Comp_U_div_V_mod_BASE(U,V,R) {\ int u = (U), v = (V), i, lsh; \ /* U and V are assumed to be odd */ \ i = R = 1, lsh = (sizeof(int) << 3) - 2; u = (u - v) & BMASK; \ do { if ( u == 0 ) break; \ if ( (u << lsh) != 0 ) R += (1 << i), u = (u - (v << i)) & BMASK; \ i++, lsh--; \ } while ( i < BSH ); \ } #endif #endif static int bmod_n( nu, nv, na ) N nu, nv, na; /* * Computes (u[] \bmod v[]) >> (as much as possible) in r[]. */ { #if BSH == BIT_WIDTH_OF_INT unsigned int *u = BD(nu), *v = BD(nv), *r = BD(na); unsigned int *p, a, t, l, v0, vh, bv, v0r; int lu = PL(nu), lv = PL(nv), z; #else int *u = BD(nu), lu = PL(nu), *v = BD(nv), lv = PL(nv), *r = BD(na); int *p, a, t, l, z, v0, vh, bv, v0r; #endif v0 = v[0]; if ( lv == 1 ) { if ( lu == 1 ) a = u[0] % v0; else { p = &u[--lu]; #if BSH == BIT_WIDTH_OF_INT a = (*p) % v0, t = (unsigned)(-((int)v0)) % v0; #else a = (*p) % v0, t = BASE % v0; #endif for ( ; --lu >= 0; a = l ) { --p; DMAR(a,t,*p,v0,l); /* l <= (a*t + p[0])%v0 */ } } if ( a == 0 ) return 0; while ( (a&1) == 0 ) a >>= 1; *r = a; return( PL(na) = 1 ); } Comp_U_div_V_mod_BASE( 1, v0, v0r ); vh = v[lv -1]; BitWidth( vh, bv ); bv--; t = 1 << bv; l = lv + 1; for ( z = -1; lu > l || lu == l && u[lu-1] >= t; z = -z ) { #if BSH == BIT_WIDTH_OF_INT a = (v0r*u[0]); #else a = (v0r*u[0])&BMASK; #endif /**/ #if 0000 { int i; for ( i = 0; i < lu; i++ ) printf( "0x%08x ", u[i] ); printf( "\n- a=0x%08x, %u*\n", a, a ); for ( i = 0; i < lv; i++ ) printf( "0x%08x ", v[i] ); printf( "\n=>\n" ); } #endif lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r ); /**/ #if 0000 printf( "***lu=%d\n", lu ); if ( lu != 0 ) { int i; for ( i = 0; i < lu; i++ ) printf( "0x%08x ", r[i] ); printf( "\n" ); } #endif if ( lu == 0 ) return 0; p = r; r = u; u = p; } if ( lu < lv ) goto ret; t = u[lu-1]; if ( lu > lv ) l = BSH; else if ( t < vh ) goto ret; else l = 0; BitWidth( t, a ); l += (a - bv); #if BSH == BIT_WIDTH_OF_INT a = (v0r*u[0])&((unsigned)(-1) >> (BSH - l)); #else a = (v0r*u[0])&(BMASK >> (BSH - l)); #endif #if 0000 { int i; for ( i = 0; i < lu; i++ ) printf( "0x%08x ", u[i] ); printf( "\n - a=0x%08x, %u*\n", a, a ); for ( i = 0; i < lv; i++ ) printf( "0x%08x ", v[i] ); printf( "\n =>\n" ); } #endif /**/ lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r ); /**/ #if 0000 printf( "::: lu=%d\n", lu ); if ( lu != 0 ) { int i; for ( i = 0; i < lu; i++ ) printf( "0x%08x ", r[i] ); printf( "\n" ); } #endif if ( lu == 0 ) return 0; z = -z; ret: if ( z > 0 ) return( PL(na) = lu ); PL(nu) = lu; return( -lu ); } static N igcd_bmod( n1, n2, nt ) N n1, n2, nt; /* both n1 and n2 are assumed to be odd */ { int l1, l2; int l; if ( (l = cmpn( n1, n2 )) == 0 ) return n1; else if ( l < 0 ) { SWAP( n1, n2, N ); } IniDiv( n1, n2 ); if ( UNIN(n2) ) return 0; loop: if ( (l1 = PL(n1)) <= 2 && (l2 = PL(n2)) <= 2 ) { l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) ); if ( l == 0 ) return 0; PL(nt) = l; return nt; } /**/ l = bmod_n( n1, n2, nt ); /**/ if ( l == 0 ) return n2; else if ( l > 0 ) { N tmp = n1; n1 = n2; n2 = nt; nt = tmp; } else SWAP( n1, n2, N ); goto loop; } void gcdbmodn( n1, n2, nr ) N n1, n2, *nr; { int s1, s2, gw, gb, t1, t2; int w1, w2; N tn1, tn2, tnt, p; if ( !n1 ) { *nr = n2; return; } else if ( !n2 ) { *nr = n1; return; } s1 = trailingzerosn( n1, &w1 ); s2 = trailingzerosn( n2, &w2 ); if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2; else if ( w1 < w2 ) gw = w1, gb = s1; else gw = w2, gb = s2; /* * true GCD must be multiplied by 2^{gw*BSH+gs}. */ t1 = PL(n1) - w1; t2 = PL(n2) - w2; if ( t1 < t2 ) t1 = t2; tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1); rshiftn( n1, w1, s1, tn1 ); rshiftn( n2, w2, s2, tn2 ); p = igcd_bmod( tn1, tn2, tnt ); RetTrueGCD( p, gw, gb, nr, L0 ) } /* * The accelerated integer GCD algorithm by K.Weber * [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122]: */ static int ReducedRatMod( x, y, pn, pd ) N x, y; #if BSH == BIT_WIDTH_OF_INT unsigned #endif int *pn, *pd; /* * Let m = 2^{2*BSH} = 2*BASE. We assume x, y > 0 and \gcd(x,m) * = \gcd(y,m) = 1. This routine computes n and d (resp. returned * in *pn and *pd) such that 0 < n, |d| < \sqrt{m} and * n*y \equiv x*d \bmod m. */ { #if BSH == BIT_WIDTH_OF_INT unsigned int n1h, n1l, d1h, d1l, n2h, n2l, d2h, d2l; unsigned int th, tl, l1, l2, i, ir; int s1, s2; #else int n1h, n1l, d1h, d1l, n2h, n2l, d2h, d2l; int th, tl, l1, l2, i, ir; int s1, s2; #endif { #if BSH == BIT_WIDTH_OF_INT unsigned #endif int xh, xl, yh, yl, lsh_i; xl = BD(x)[0]; xh = PL(x) > 1 ? BD(x)[1] : 0; yl = BD(y)[0]; yh = PL(y) > 1 ? BD(y)[1] : 0; #if 0000 printf( "*** RedRatMod: (0x%08x:0x%08x=%u*2^%d+%u)\n /(0x%08x:0x%08x=%u*2^%d+%u) mod 2^%d\n", xh,xl, xh,BSH,xl, yh,yl, yh,BSH,yl, BSH ); #endif Comp_U_div_V_mod_BASE( xl, yl, n2l ); DM(n2l,yl,th,tl) /* n2l*yl = tl+th*BASE, where tl==xl. */; #if BSH == BIT_WIDTH_OF_INT xh -= th; #else if ( xh > th ) xh -= th; else xh += (BASE - th); #endif DM(n2l,yh,th,tl) /* n2l*yh = tl+th*BASE. */; #if BSH == BIT_WIDTH_OF_INT xh -= tl; #else if ( xh > tl ) xh -= tl; else xh += (BASE - tl); #endif /* n2h = i = 0, lsh_i = 31;*/ n2h = i = 0, lsh_i = BIT_WIDTH_OF_INT -1; do { if ( xh == 0 ) break; if ( (xh << lsh_i) != 0 ) { n2h += (1 << i); #if BSH == BIT_WIDTH_OF_INT tl = yl << i; xh -= tl; #else tl = (yl << i)&BMASK; if ( xh > tl ) xh -= tl; else xh += (BASE - tl); #endif } lsh_i--; } while ( ++i != BSH ); } /* * n2l + n2h*BASE = x/y mod 2^{2*BSH}. */ #if 0000 printf( "=====> 0x%08x(%u) + 2^%d*0x%08x(%u)\n", n2l, n2l, BSH, n2h, n2h ); #endif d2h = 0, d2l = 1, s2 = 1; #if BSH == BIT_WIDTH_OF_INT if ( n2h == 0 ) goto done; BitWidth( n2h, l2 ); if ( l2 == BSH ) { d1h = 0, d1l = 1, s1 = -1; th = n2h, tl = n2l; } else { i = BSH - l2; d1h = 0, d1l = 1 << i, s1 = -1; th = (n2h << i) | (n2l >> l2); tl = n2l << i; } if ( tl == 0 ) n1h = - th, n1l = 0; else n1h = BMASK - th, n1l = - tl; /**/ if ( n1h < n2h || (n1h == n2h && n1l < n2l) ) goto swap12; BitWidth( n1h, l1 ); goto sub12; #else n1h = BASE, n1l = 0, l1 = BSH, d1h = d1l = 0, s1 = 0; #endif /**/ while ( n2h != 0 ) { BitWidth( n2h, l2 ); sub12: ir = BSH - (i = l1 - l2); do { if ( i == 0 ) th = n2h, tl = n2l; else th = (n2h << i) | (n2l >> ir), #if BSH == BIT_WIDTH_OF_INT tl = n2l << i; #else tl = (n2l << i) & BMASK; #endif if ( th > n1h || (th == n1h && tl > n1l) ) goto next_i; #if BSH == BIT_WIDTH_OF_INT if ( tl > n1l ) n1h--; n1l -= tl; #else if ( tl <= n1l ) n1l -= tl; else n1l += (BASE - tl), n1h--; #endif n1h -= th; /* (s1:d1h,d1l) -= ((s2:d2h,d2l) << i); */ if ( s2 != 0 ) { if ( i == 0 ) th = d2h, tl = d2l; else #if BSH == BIT_WIDTH_OF_INT th = (d2h << i) | (d2l >> ir), tl = (d2l << i); #else th = (d2h << i)&BMASK | (d2l >> ir), tl = (d2l << i)&BMASK; #endif if ( s1 == 0 ) s1 = -s2, d1h = th, d1l = tl; else if ( s1 != s2 ) { #if BSH == BIT_WIDTH_OF_INT d1l += tl, d1h += th; if ( d1l < tl ) d1h++; #else tl += d1l; d1l = tl&BMASK; d1h = (d1h + th + (tl >> BSH))&BMASK; #endif if ( d1h == 0 && d1l == 0 ) s1 = 0; } else if ( d1h > th ) { #if BSH == BIT_WIDTH_OF_INT if ( d1l < tl ) d1h--; d1l -= tl; #else if ( d1l >= tl ) d1l -= tl; else d1l += (BASE - tl), d1h--; #endif d1h -= th; } else if ( d1h == th ) { d1h = 0; if ( d1l == tl ) s1 = d2h = 0; else if ( d1l > tl ) d1l -= tl; else d1l = tl - d1l, s1 = -s1; } else { #if BSH == BIT_WIDTH_OF_INT if ( tl < d1l ) th--; d1l = tl - d1l; #else if ( tl >= d1l ) d1l = tl - d1l; else d1l = tl + (BASE - d1l), th--; #endif d1h = th - d1h; s1 = -s1; } } next_i: i--, ir++; } while ( n1h > n2h || (n1h == n2h && n1l >= n2l) ); swap12: /* swap 1 and 2 */ th = n1h, tl = n1l; n1h = n2h, n1l = n2l; n2h = th, n2l = tl; l1 = l2; th = d1h, tl = d1l, i = s1; d1h = d2h, d1l = d2l, s1 = s2; d2h = th, d2l = tl, s2 = i; } /**/ done: *pn = n2l, *pd = d2l; return s2; } static int igcd_spurious_factor; #define SaveN(s,d) {\ int i, l; \ for ( l = PL(d) = PL(s), i = 0; i < l; i++ ) BD(d)[i] = BD(s)[i]; \ } static N igcd_acc( n1, n2, nt ) N n1, n2, nt; /* both n1 and n2 are assumed to be odd */ { int l1, l2, *b1, *b2, bw1, bw2; int l; int n, d; N p, s1, s2; if ( (l = cmpn( n1, n2 )) == 0 ) return n1; else if ( l < 0 ) { SWAP( n1, n2, N ); } if ( ShouldCompRemInit(n1,n2) ) { int w, b; divn( n1, n2, &s1, &s2 ); if ( !s2 ) return n2; b = trailingzerosn( s2, &w ); p = n1; n1 = n2; n2 = p; rshiftn( s2, w, b, n2 ); if ( UNIN(n2) ) return 0; l1 = PL(n1); if ( !s1 || PL(s1) < l1 ) s1 = NALLOC(l1); } else if ( UNIN(n2) ) return 0; else { s1 = NALLOC(PL(n1)); s2 = NALLOC(PL(n2)); } SaveN( n1, s1 ); SaveN( n2, s2 ); igcd_spurious_factor = 0; loop: l1 = PL(n1), l2 = PL(n2); if ( l1 <= 2 && l2 <= 2 ) { l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) ); if ( l == 0 ) return 0; PL(nt) = l; SWAP( n2, nt, N ); goto ret; } /**/ b1 = BD(n1), b2 = BD(n2); BitWidth( b1[l1 -1], bw1 ); BitWidth( b2[l2 -1], bw2 ); if ( (l1*BSH + bw1) - (l2*BSH + bw2) <= igcdacc_thre ) { l = ReducedRatMod( n1, n2, &n, &d ); l = l < 0 ? aUplusbV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) ) : abs_axU_bxV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) ); igcd_spurious_factor++; if ( l == 0 ) goto ret; PL(nt) = l; } else { l = bmod_n( n1, n2, nt ); if ( l == 0 ) goto ret; else if ( l < 0 ) { SWAP( n1, n2, N ); goto loop; } } p = n1; n1 = n2; n2 = nt; nt = p; goto loop; /**/ ret: if ( igcd_spurious_factor != 0 && !UNIN(n2) ) { if ( (p = igcd_bmod( n2, s1, n1 )) == 0 ) return 0; if ( (p = igcd_bmod( p, s2, nt )) == 0 ) return 0; return p; } else return n2; } void gcdaccn( n1, n2, nr ) N n1, n2, *nr; { int s1, s2, gw, gb, t1, t2; int w1, w2; N tn1, tn2, tnt, p; if ( !n1 ) { *nr = n2; return; } else if ( !n2 ) { *nr = n1; return; } s1 = trailingzerosn( n1, &w1 ); s2 = trailingzerosn( n2, &w2 ); if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2; else if ( w1 < w2 ) gw = w1, gb = s1; else gw = w2, gb = s2; /* * true GCD must be multiplied by 2^{gw*BSH+gs}. */ t1 = PL(n1) - w1; t2 = PL(n2) - w2; if ( t1 < t2 ) t1 = t2; tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1); rshiftn( n1, w1, s1, tn1 ); rshiftn( n2, w2, s2, tn2 ); /**/ p = igcd_acc( tn1, tn2, tnt ); /**/ if ( p == 0 ) goto L0; RetTrueGCD( p, gw, gb, nr, L0 ) } /********************************/ void gcdn_HMEXT( n1, n2, nr ) N n1, n2, *nr; { int b1, b2, w1, w2, gw, gb; int l1, l2; N tn1, tn2, tnt, a; if ( !n1 ) { *nr = n2; return; } else if ( !n2 ) { *nr = n1; return; } b1 = trailingzerosn( n1, &w1 ); b2 = trailingzerosn( n2, &w2 ); if ( w1 == w2 ) gw = w1, gb = b1 <= b2 ? b1 : b2; else if ( w1 < w2 ) gw = w1, gb = b1; else gw = w2, gb = b2; /* * true GCD must be multiplied by 2^{gw*BSH+gb}. */ l1 = PL(n1) - w1; l2 = PL(n2) - w2; if ( l1 < l2 ) l1 = l2; tn1 = W_NALLOC( l1 ); tn2 = W_NALLOC( l1 ); tnt = W_NALLOC( l1 ); rshiftn( n1, w1, b1, tn1 ); rshiftn( n2, w2, b2, tn2 ); /**/ if ( igcd_algorithm == 1 ) { a = igcd_binary( tn1, tn2, tnt ); } else if ( igcd_algorithm == 2 ) { a = igcd_bmod( tn1, tn2, tnt ); } else { a = igcd_acc( tn1, tn2, tnt ); if ( igcd_spurious_factor != 0 ) { } } RetTrueGCD( a, gw, gb, nr, L0 ) } /**************************/ #if 111 N maxrshn( n, p ) N n; int *p; { int nw, nb, c, l; N new; nb = trailingzerosn( n, &nw ); l = PL(n); c = BD(n)[l -1]; l -= nw; if ( (c >> nb) == 0 ) l--; new = NALLOC(l); rshiftn( n, nw, nb, new ); *p = nb + nw*BSH; return new; } #endif