Annotation of OpenXM_contrib2/asir2000/engine-27/N-27.c, Revision 1.1
1.1 ! noro 1: /* $OpenXM: OpenXM/src/asir99/engine-27/N-27.c,v 1.1.1.1 1999/11/10 08:12:27 noro Exp $ */
! 2: #include "ca-27.h"
! 3: #include "base.h"
! 4:
! 5: #ifndef HMEXT
! 6: #define HMEXT
! 7: #endif
! 8:
! 9: #ifdef HMEXT
! 10: #undef FULLSET
! 11: #undef CALL
! 12: #undef DIVISION
! 13:
! 14: int igcd_algorithm = 0;
! 15: /* == 0 : Euclid,
! 16: * == 1 : binary,
! 17: * == 2 : bmod,
! 18: * >= 3 : (Weber's accelerated)/(Jebelean's generalized binary) algorithm,
! 19: */
! 20: int igcd_thre_inidiv = 50;
! 21: /*
! 22: * In the non-Euclidean algorithms, if the ratio of the lengths (number
! 23: * of words) of two integers is >= igcd_thre_inidiv, we first perform
! 24: * remainder calculation.
! 25: * If == 0, this remainder calculation is not performed.
! 26: */
! 27: int igcdacc_thre = 10;
! 28: /*
! 29: * In the accelerated algorithm, if the bit-lengths of two integers is
! 30: * > igcdacc_thre, "bmod" reduction is done.
! 31: */
! 32:
! 33: #include "inline.h"
! 34:
! 35: #define TRAILINGZEROS(t,cntr) for(cntr=0;(t&1)==0;t>>=1)cntr++;
! 36:
! 37: #define W_NALLOC(d) ((N)ALLOCA(TRUESIZE(oN,(d)-1,int)))
! 38:
! 39: #define ShouldCompRemInit(n1,n2) (igcd_thre_inidiv != 0 && PL(n1) >= igcd_thre_inidiv*PL(n2))
! 40:
! 41: #define IniDiv(n1,n2) \
! 42: if ( ShouldCompRemInit(n1,n2) ) {\
! 43: N q, r; int w, b; \
! 44: divn_27(n1,n2,&q,&r); \
! 45: if ( !r ) return(n2); \
! 46: b = trailingzerosn( r, &w ); \
! 47: q = n1; n1 = n2; n2 = q; \
! 48: rshiftn( r, w, b, n2 ); \
! 49: }
! 50:
! 51: /*
! 52: * Binary GCD algorithm by J.Stein
! 53: * [J. Comp. Phys. Vol. 1 (1967), pp. 397-405)]:
! 54: * The right-shift binary algorithm is used.
! 55: */
! 56:
! 57:
! 58: /*
! 59: * subsidiary routines for gcdbinn below.
! 60: */
! 61: static int /* number of bits */ trailingzeros_nbd( /* BD of N */ nbd, pnw )
! 62: int *nbd, *pnw /* number of zero words */;
! 63: {
! 64: int nw, nb, w;
! 65:
! 66: for ( nw = 0; (w = *nbd) == 0; nbd++ ) nw++;
! 67: TRAILINGZEROS(w,nb);
! 68: *pnw = nw;
! 69: return nb;
! 70: }
! 71:
! 72: #define trailingzerosn(n,pnw) trailingzeros_nbd(BD(n),pnw)
! 73:
! 74: static int /* PL of N */ rshift_nbd( /* BD of N */ nbd, /* PL of N */ nl,
! 75: /* # words */ shw, /* # bits */ shb, /* BD of N */ p )
! 76: int *nbd, nl, shw, shb, *p;
! 77: {
! 78: int i, v, w, lshb;
! 79:
! 80: nbd += shw, i = (nl -= shw);
! 81: if ( shb == 0 ) {
! 82: for ( ; nl > 0; nl-- ) *p++ = *nbd++;
! 83: return i;
! 84: } else if ( nl < 2 ) {
! 85: *p = (*nbd) >> shb;
! 86: return 1;
! 87: }
! 88: for ( lshb = BSH27 - shb, v = *nbd++; --nl > 0; v = w ) {
! 89: w = *nbd++;
! 90: *p++ = (v >> shb) | ((w << lshb)&BMASK27);
! 91: }
! 92: if ( (v >>= shb) == 0 ) return( i-1 );
! 93: *p = v;
! 94: return i;
! 95: }
! 96:
! 97: #define rshiftn(ns,shw,shb,nd) (PL(nd)=rshift_nbd(BD(ns),PL(ns),shw,shb,BD(nd)))
! 98: /* nd <= ns << (shb + shw*BSH27), returns PL of the result */
! 99:
! 100: #ifdef FULLSET
! 101: static N N_of_i_lshifted_by_wb( i, gw, gb )
! 102: int i, gw, gb;
! 103: /*
! 104: * returns pointer to a new struct (N)(((int)i) >> (gb + gw*BSH27))
! 105: */
! 106: {
! 107: int j, l, *p;
! 108: N n;
! 109:
! 110: j = i >> (BSH27 - gb);
! 111: i = (i << gb)&BMASK27;
! 112: l = j != 0 ? gw + 2 : gw + 1;
! 113: n = NALLOC(l);
! 114: PL(n) = l;
! 115: for ( p = BD(n); gw-- > 0; ) *p++ = 0;
! 116: *p++ = i;
! 117: if ( j != 0 ) *p = j;
! 118: return n;
! 119: }
! 120: #endif /* FULLSET */
! 121:
! 122: /*
! 123: * routines to make a new struct
! 124: * (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH27))
! 125: */
! 126: static N N_of_nbd_lshifted_by_wb( /* BD of N */ b, /* PL of N */ lb, gw, gb )
! 127: int *b, lb, gw, gb;
! 128: /*
! 129: * returns pointer to a new struct
! 130: * (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH27))
! 131: */
! 132: {
! 133: int rsh, s, t, *p, l;
! 134: N n;
! 135:
! 136: l = lb + gw;
! 137: if ( gb == 0 ) {
! 138: n = NALLOC(l);
! 139: PL(n) = l;
! 140: for ( p = BD(n); gw-- > 0; ) *p++ = 0;
! 141: while ( lb-- > 0 ) *p++ = *b++;
! 142: return n;
! 143: }
! 144: rsh = BSH27 - gb; s = b[lb-1];
! 145: if ( (t = s >> rsh) != 0 ) {
! 146: n = NALLOC(l+1);
! 147: PL(n) = l+1;
! 148: (p = BD(n))[l] = t;
! 149: } else {
! 150: n = NALLOC(l);
! 151: PL(n) = l;
! 152: p = BD(n);
! 153: }
! 154: while ( gw-- > 0 ) *p++ = 0;
! 155: *p++ = ((t = *b++) << gb)&BMASK27;
! 156: for ( ; --lb > 0; t = s )
! 157: *p++ = (t >> rsh) | (((s = *b++) << gb)&BMASK27);
! 158: return n;
! 159: }
! 160:
! 161: #define N_of_n_lshifted_by_wb(a,gw,gb) N_of_nbd_lshifted_by_wb(BD(a),PL(a),gw,gb)
! 162:
! 163: #define SWAP(a,b,Type) { Type temp=a;a=b;b=temp;}
! 164: #define SIGNED_VAL(a,s) ((s)>0?(a):-(a))
! 165:
! 166:
! 167: #ifdef CALL
! 168: static int bw_int32( n )
! 169: int n;
! 170: {
! 171: int w;
! 172:
! 173: w = 0;
! 174: #if BSH27 >= 32
! 175: if ( n > 0xffffffff ) w += 32, n >>= 32;
! 176: #endif
! 177: if ( n >= 0x10000 ) w += 16, n >>= 16;
! 178: if ( n >= 0x100 ) w += 8, n >>= 8;
! 179: if ( n >= 0x10 ) w += 4, n >>= 4;
! 180: if ( n >= 0x4 ) w += 2, n >>= 2;
! 181: if ( n >= 0x2 ) w += 1, n >>= 1;
! 182: if ( n != 0 ) ++w;
! 183: return w;
! 184: }
! 185: #define BitWidth(n,bw) bw = bw_int32( n )
! 186: #else
! 187:
! 188: #if BSH27 >= 32
! 189: #define BitWidth(n,bw) {\
! 190: int k = (n); \
! 191: bw = 0; \
! 192: if ( BSH27 >= 32 ) if ( k > 0xffffffff ) bw += 32, k >>= 32; \
! 193: if ( k >= 0x10000 ) bw += 16, k >>= 16; \
! 194: if ( k >= 0x100 ) bw += 8, k >>= 8; \
! 195: if ( k >= 0x10 ) bw += 4, k >>= 4; \
! 196: if ( k >= 0x4 ) bw += 2, k >>= 2; \
! 197: if ( k >= 0x2 ) bw += 1, k >>= 1; \
! 198: if ( k != 0 ) bw++; \
! 199: }
! 200: #else
! 201: #define BitWidth(n,bw) {\
! 202: int k = (n); \
! 203: bw = 0; \
! 204: if ( k >= 0x10000 ) bw += 16, k >>= 16; \
! 205: if ( k >= 0x100 ) bw += 8, k >>= 8; \
! 206: if ( k >= 0x10 ) bw += 4, k >>= 4; \
! 207: if ( k >= 0x4 ) bw += 2, k >>= 2; \
! 208: if ( k >= 0x2 ) bw += 1, k >>= 1; \
! 209: if ( k != 0 ) bw++; \
! 210: }
! 211: #endif
! 212: #endif
! 213:
! 214: #include "igcdhack.c"
! 215:
! 216: /*
! 217: * Implementation of the binary GCD algorithm for two oN structs
! 218: * (big-integers) in risa.
! 219: *
! 220: * The major operations in the following algorithms are the binary-shifts
! 221: * and the updates of (u, v) by (min(u,v), |u-v|), and are to be open-coded
! 222: * without using routines for oN structures just as in addn() or subn().
! 223: */
! 224:
! 225: static int igcd_binary_2w( u, lu, v, lv, pans )
! 226: int *u, lu, *v, lv, *pans;
! 227: /* both u[0:lu-1] and v[0:lv-1] are assumed to be odd */
! 228: {
! 229: int i, h1, l1, h2, l2;
! 230:
! 231: l1 = u[0], l2 = v[0];
! 232: h1 = lu <= 1 ? 0 : u[1];
! 233: h2 = lv <= 1 ? 0 : v[1];
! 234: /**/
! 235: loop: if ( h1 == 0 ) {
! 236: no_hi1: if ( h2 == 0 ) goto one_word;
! 237: no_hi1n:if ( l1 == 1 ) return 0;
! 238: if ( (l2 -= l1) == 0 ) {
! 239: for ( l2 = h2; (l2&1) == 0; l2 >>= 1 ) ;
! 240: goto one_word;
! 241: } else if ( l2 < 0 ) h2--, l2 += BASE27;
! 242: i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
! 243: l2 |= ((h2 << (BSH27 - i)) & BMASK27);
! 244: h2 >>= i;
! 245: goto no_hi1;
! 246: } else if ( h2 == 0 ) {
! 247: no_hi2: if ( l2 == 1 ) return 0;
! 248: if ( (l1 -= l2) == 0 ) {
! 249: for ( l1 = h1; (l1&1) == 0; l1 >>= 1 ) ;
! 250: goto one_word;
! 251: } else if ( l1 < 0 ) h1--, l1 += BASE27;
! 252: i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
! 253: l1 |= ((h1 << (BSH27 - i)) & BMASK27);
! 254: if ( (h1 >>= i) == 0 ) goto one_word;
! 255: goto no_hi2;
! 256: } else if ( l1 == l2 ) {
! 257: if ( h1 == h2 ) {
! 258: pans[0] = l1, pans[1] = h1;
! 259: return 2;
! 260: } else if ( h1 > h2 ) {
! 261: for ( l1 = h1 - h2; (l1&1) == 0; l1 >>= 1 ) ;
! 262: goto no_hi1n;
! 263: } else {
! 264: for ( l2 = h2 - h1; (l2&1) == 0; l2 >>= 1 ) ;
! 265: goto no_hi2;
! 266: }
! 267: } else if ( h1 == h2 ) {
! 268: if ( l1 > l2 ) {
! 269: for ( l1 -= l2; (l1&1) == 0; l1 >>= 1 ) ;
! 270: goto no_hi1n;
! 271: } else {
! 272: for ( l2 -= l1; (l2&1) == 0; l2 >>= 1 ) ;
! 273: goto no_hi2;
! 274: }
! 275: } else if ( h1 > h2 ) {
! 276: h1 -= h2;
! 277: if ( (l1 -= l2) < 0 ) h1--, l1 += BASE27;
! 278: i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
! 279: l1 |= ((h1 << (BSH27 - i)) & BMASK27);
! 280: h1 >>= i;
! 281: } else {
! 282: h2 -= h1;
! 283: if ( (l2 -= l1) < 0 ) h2--, l2 += BASE27;
! 284: i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
! 285: l2 |= ((h2 << (BSH27 - i)) & BMASK27);
! 286: h2 >>= i;
! 287: }
! 288: goto loop;
! 289: one_word:
! 290: if ( l1 == 1 || l2 == 1 ) return 0;
! 291: else if ( l1 == l2 ) {
! 292: pans[0] = l1;
! 293: return 1;
! 294: }
! 295: one_word_neq:
! 296: if ( l1 > l2 ) {
! 297: l1 -= l2;
! 298: do { l1 >>= 1; } while ( (l1&1) == 0 );
! 299: goto one_word;
! 300: } else {
! 301: l2 -= l1;
! 302: do { l2 >>= 1; } while ( (l2&1) == 0 );
! 303: goto one_word;
! 304: }
! 305: }
! 306:
! 307: static N igcd_binary( n1, n2, nt )
! 308: N n1, n2, nt;
! 309: /* both n1 and n2 are assumed to be odd */
! 310: {
! 311: int l1, *b1, l2, *b2, *bt = BD(nt);
! 312: int l;
! 313:
! 314: if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
! 315: else if ( l < 0 ) { SWAP( n1, n2, N ); }
! 316: IniDiv( n1, n2 );
! 317: if ( UNIN(n2) ) return 0;
! 318: l1 = PL(n1), b1 = BD(n1), l2 = PL(n2), b2 = BD(n2);
! 319: loop: if ( l1 <= 2 && l2 <= 2 ) {
! 320: l = igcd_binary_2w( b1, l1, b2, l2, bt );
! 321: if ( l == 0 ) return 0;
! 322: PL(nt) = l;
! 323: return nt;
! 324: }
! 325: /**/
! 326: l = abs_U_V_maxrshift( b1, l1, b2, l2, bt );
! 327: /**/
! 328: if ( l == 0 ) {
! 329: PL(n1) = l1;
! 330: return n1;
! 331: } else if ( l > 0 ) {
! 332: l1 = l;
! 333: SWAP( b1, bt, int * ); SWAP( n1, nt, N );
! 334: } else {
! 335: l2 = -l;
! 336: SWAP( b2, bt, int * ); SWAP( n2, nt, N );
! 337: }
! 338: goto loop;
! 339: }
! 340:
! 341: #define RetTrueGCD(p,gw,gb,nr,l0) \
! 342: if (p==0) { l0: if (gw==0&&gb==0) { *(nr)=ONEN; return; } else p=ONEN; } \
! 343: *(nr) = N_of_n_lshifted_by_wb(p,gw,gb); \
! 344: return;
! 345:
! 346: void gcdbinn( n1, n2, nr )
! 347: N n1, n2, *nr;
! 348: {
! 349: int s1, s2, gw, gb, t1, t2;
! 350: int w1, w2;
! 351: N tn1, tn2, tnt, p;
! 352:
! 353: if ( !n1 ) {
! 354: *nr = n2;
! 355: return;
! 356: } else if ( !n2 ) {
! 357: *nr = n1;
! 358: return;
! 359: }
! 360: s1 = trailingzerosn( n1, &w1 );
! 361: s2 = trailingzerosn( n2, &w2 );
! 362: if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2;
! 363: else if ( w1 < w2 ) gw = w1, gb = s1;
! 364: else gw = w2, gb = s2;
! 365: /*
! 366: * true GCD must be multiplied by 2^{gw*BSH27+gb}.
! 367: */
! 368: t1 = PL(n1) - w1;
! 369: t2 = PL(n2) - w2;
! 370: if ( t1 < t2 ) t1 = t2;
! 371: tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1);
! 372: rshiftn( n1, w1, s1, tn1 );
! 373: rshiftn( n2, w2, s2, tn2 );
! 374: p = igcd_binary( tn1, tn2, tnt );
! 375: RetTrueGCD( p, gw, gb, nr, L0 )
! 376: }
! 377:
! 378:
! 379: /*
! 380: * The bmod gcd algorithm stated briefly in K.Weber's paper
! 381: * [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122].
! 382: * It replaces the subtraction (n1 - n2) in the binary algorithm
! 383: * by (n1 - S*n2) with such an S that (n1 - S*n2) \equiv 0 \bmod 2^BSH27,
! 384: * which should improve the efficiency when n1 \gg n2.
! 385: */
! 386:
! 387: /* subsidiary routines */
! 388: #ifdef CALL
! 389: #ifndef DIVISION
! 390: static int u_div_v_mod_2tos( u, v, s )
! 391: int u, v, s;
! 392: /*
! 393: * u/v mod 2^s.
! 394: */
! 395: {
! 396: int i,lsh_i, mask, m, two_to_s;
! 397:
! 398: mask = (two_to_s = 1 << s) - 1;
! 399: lsh_i = (sizeof(int) << 3) - 1;
! 400: m = i = 0;
! 401: u &= mask, v &= mask;
! 402: do {
! 403: if ( u == 0 ) break;
! 404: if ( (u << lsh_i) != 0 ) {
! 405: m += (1 << i);
! 406: u -= (v << i);
! 407: u &= mask;
! 408: }
! 409: lsh_i--;
! 410: } while ( ++i != s );
! 411: return m;
! 412: }
! 413: #else
! 414: static int u_div_v_mod_2tos( u, v, s )
! 415: int u, v, s;
! 416: {
! 417: int f1 = 1 << s, f2 = u, q, r, c1 = 0, c2 = v, m;
! 418:
! 419: m = f1 - 1;
! 420: do { q = f1 / f2;
! 421: r = f1 - q*f2; f1 = f2; f2 = r;
! 422: r = c1 - q*c2; c1 = c2; c2 = r;
! 423: } while ( f2 != 1 );
! 424: return( c2 & m );
! 425: }
! 426: #endif /* DIVISION */
! 427:
! 428: #define Comp_U_div_V_mod_BASE27(U,V,R) R = u_div_v_mod_2tos(U,V,BSH27)
! 429: #else
! 430: #ifndef DIVISION
! 431: #define Comp_U_div_V_mod_BASE27(U,V,R) {\
! 432: int u = (U), v = (V), i, lsh; \
! 433: /* U and V are assumed to be odd */ \
! 434: i = R = 1, lsh = (sizeof(int) << 3) - 2; u = (u - v) & BMASK27; \
! 435: do { if ( u == 0 ) break; \
! 436: if ( (u << lsh) != 0 ) R += (1 << i), u = (u - (v << i)) & BMASK27; \
! 437: i++, lsh--; \
! 438: } while ( i < BSH27 ); \
! 439: }
! 440: #else
! 441: #define Comp_U_div_V_mod_BASE27(U,V,R) {\
! 442: int f1 = BASE27, f2 = (V), q, r, c1 = 0, c2 = (U); \
! 443: do { q = f1 / f2; \
! 444: r = f1 - q*f2; f1 = f2; f2 = r; \
! 445: r = c1 - q*c2; c1 = c2; c2 = r; \
! 446: } while ( f2 != 1 ); \
! 447: R = c2 & BMASK27; \
! 448: }
! 449: #endif /* DIVISION */
! 450: #endif
! 451:
! 452:
! 453: static int bmod_n( nu, nv, na )
! 454: N nu, nv, na;
! 455: /*
! 456: * Computes (u[] \bmod v[]) >> (as much as possible) in r[].
! 457: */
! 458: {
! 459: int *u = BD(nu), lu = PL(nu), *v = BD(nv), lv = PL(nv),
! 460: *r = BD(na);
! 461: int *p, a, t, l, z, v0, vh, bv, v0r;
! 462:
! 463: v0 = v[0];
! 464: if ( lv == 1 ) {
! 465: if ( lu == 1 ) a = u[0] % v0;
! 466: else {
! 467: p = &u[--lu];
! 468: a = (*p) % v0, t = BASE27 % v0;
! 469: for ( ; --lu >= 0; a = l ) {
! 470: --p;
! 471: DMAR(a,t,*p,v0,l)
! 472: /* l <= (a*t + p[0])%v0 */
! 473: }
! 474: }
! 475: if ( a == 0 ) return 0;
! 476: while ( (a&1) == 0 ) a >>= 1;
! 477: *r = a;
! 478: return( PL(na) = 1 );
! 479: }
! 480: Comp_U_div_V_mod_BASE27( 1, v0, v0r );
! 481: vh = v[lv -1];
! 482: BitWidth( vh, bv );
! 483: bv--;
! 484: t = 1 << bv;
! 485: l = lv + 1;
! 486: for ( z = -1; lu > l || lu == l && u[lu-1] >= t; z = -z ) {
! 487: a = (v0r*u[0])&BMASK27;
! 488: /**/
! 489: lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r );
! 490: /**/
! 491: if ( lu == 0 ) return 0;
! 492: p = r;
! 493: r = u;
! 494: u = p;
! 495: }
! 496: if ( lu < lv ) goto ret;
! 497: t = u[lu-1];
! 498: if ( lu > lv ) l = BSH27;
! 499: else if ( t < vh ) goto ret;
! 500: else l = 0;
! 501: BitWidth( t, a );
! 502: l += (a - bv);
! 503: a = (v0r*u[0])&(BMASK27 >> (BSH27 - l));
! 504: /**/
! 505: lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r );
! 506: /**/
! 507: if ( lu == 0 ) return 0;
! 508: z = -z;
! 509: ret: if ( z > 0 ) return( PL(na) = lu );
! 510: PL(nu) = lu;
! 511: return( -lu );
! 512: }
! 513:
! 514:
! 515: static N igcd_bmod( n1, n2, nt )
! 516: N n1, n2, nt;
! 517: /* both n1 and n2 are assumed to be odd */
! 518: {
! 519: int l1, l2;
! 520: int l;
! 521:
! 522: if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
! 523: else if ( l < 0 ) { SWAP( n1, n2, N ); }
! 524: IniDiv( n1, n2 );
! 525: if ( UNIN(n2) ) return 0;
! 526: loop: if ( (l1 = PL(n1)) <= 2 && (l2 = PL(n2)) <= 2 ) {
! 527: l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) );
! 528: if ( l == 0 ) return 0;
! 529: PL(nt) = l;
! 530: return nt;
! 531: }
! 532: /**/
! 533: l = bmod_n( n1, n2, nt );
! 534: /**/
! 535: if ( l == 0 ) return n2;
! 536: else if ( l > 0 ) {
! 537: N tmp = n1;
! 538:
! 539: n1 = n2;
! 540: n2 = nt;
! 541: nt = tmp;
! 542: } else SWAP( n1, n2, N );
! 543: goto loop;
! 544: }
! 545:
! 546: void gcdbmodn( n1, n2, nr )
! 547: N n1, n2, *nr;
! 548: {
! 549: int s1, s2, gw, gb, t1, t2;
! 550: int w1, w2;
! 551: N tn1, tn2, tnt, p;
! 552:
! 553: if ( !n1 ) {
! 554: *nr = n2;
! 555: return;
! 556: } else if ( !n2 ) {
! 557: *nr = n1;
! 558: return;
! 559: }
! 560: s1 = trailingzerosn( n1, &w1 );
! 561: s2 = trailingzerosn( n2, &w2 );
! 562: if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2;
! 563: else if ( w1 < w2 ) gw = w1, gb = s1;
! 564: else gw = w2, gb = s2;
! 565: /*
! 566: * true GCD must be multiplied by 2^{gw*BSH27+gs}.
! 567: */
! 568: t1 = PL(n1) - w1;
! 569: t2 = PL(n2) - w2;
! 570: if ( t1 < t2 ) t1 = t2;
! 571: tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1);
! 572: rshiftn( n1, w1, s1, tn1 );
! 573: rshiftn( n2, w2, s2, tn2 );
! 574: p = igcd_bmod( tn1, tn2, tnt );
! 575: RetTrueGCD( p, gw, gb, nr, L0 )
! 576: }
! 577:
! 578: /*
! 579: * The accelerated integer GCD algorithm by K.Weber
! 580: * [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122]:
! 581: */
! 582:
! 583: static int ReducedRatMod( x, y, pn, pd )
! 584: N x, y;
! 585: int *pn, *pd;
! 586: /*
! 587: * Let m = 2^{2*BSH27} = 2*BASE27. We assume x, y > 0 and \gcd(x,m)
! 588: * = \gcd(y,m) = 1. This routine computes n and d (resp. returned
! 589: * in *pn and *pd) such that 0 < n, |d| < \sqrt{m} and
! 590: * n*y \equiv x*d \bmod m.
! 591: */
! 592: {
! 593: int n1h, n1l, d1h, d1l, n2h, n2l, d2h, d2l;
! 594: int s1, s2, l1, l2;
! 595:
! 596:
! 597: {
! 598: int xh, xl, yh, yl, tl, i, lsh_i;
! 599: int th;
! 600:
! 601: xl = BD(x)[0];
! 602: xh = PL(x) > 1 ? BD(x)[1] : 0;
! 603: yl = BD(y)[0];
! 604: yh = PL(y) > 1 ? BD(y)[1] : 0;
! 605: Comp_U_div_V_mod_BASE27( xl, yl, n2l );
! 606: DM27(n2l,yl,th,tl) /* n2l*yl = tl+th*BASE27, where tl==xl. */;
! 607: if ( xh > th ) xh -= th;
! 608: else xh += (BASE27 - th);
! 609: DM27(n2l,yh,th,tl) /* n2l*yh = tl+th*BASE27. */;
! 610: if ( xh > tl ) xh -= tl;
! 611: else xh += (BASE27 - tl);
! 612: n2h = i = 0, lsh_i = 31;
! 613: do {
! 614: if ( xh == 0 ) break;
! 615: if ( (xh << lsh_i) != 0 ) {
! 616: n2h += (1 << i);
! 617: tl = (yl << i)&BMASK27;
! 618: if ( xh > tl ) xh -= tl;
! 619: else xh += (BASE27 - tl);
! 620: }
! 621: lsh_i--;
! 622: } while ( ++i != BSH27 );
! 623: }
! 624: /*
! 625: * n2l + n2h*BASE27 = x/y mod 2^{2*BSH27}.
! 626: */
! 627: n1h = BASE27, n1l = 0, l1 = BSH27,
! 628: d1h = d1l = 0, s1 = 0,
! 629: d2h = 0, d2l = 1, s2 = 1;
! 630: /**/
! 631: while ( n2h != 0 ) {
! 632: int i, ir, th, tl;
! 633:
! 634: BitWidth( n2h, l2 );
! 635: ir = BSH27 - (i = l1 - l2);
! 636: do {
! 637: if ( i == 0 ) th = n2h, tl = n2l;
! 638: else
! 639: th = (n2h << i) | (n2l >> ir),
! 640: tl = (n2l << i) & BMASK27;
! 641: if ( th > n1h || (th == n1h && tl > n1l) ) goto next_i;
! 642: if ( tl <= n1l ) n1l -= tl;
! 643: else n1l += (BASE27 - tl), n1h--;
! 644: n1h -= th;
! 645: /* (s1:d1h,d1l) -= ((s2:d2h,d2l) << i); */
! 646: if ( s2 != 0 ) {
! 647: if ( i == 0 ) th = d2h, tl = d2l;
! 648: else
! 649: th = (d2h << i)&BMASK27 | (d2l >> ir),
! 650: tl = (d2l << i)&BMASK27;
! 651: if ( s1 == 0 )
! 652: s1 = -s2, d1h = th, d1l = tl;
! 653: else if ( s1 != s2 ) {
! 654: tl += d1l;
! 655: d1l = tl&BMASK27;
! 656: d1h = (th + (tl >> BSH27))&BMASK27;
! 657: if ( d1h == 0 && d1l == 0 ) s1 = 0;
! 658: } else if ( d1h > th ) {
! 659: if ( d1l >= tl ) d1l -= tl;
! 660: else d1l += (BASE27 - tl), d1h--;
! 661: d1h -= th;
! 662: } else if ( d1h == th ) {
! 663: d1h = 0;
! 664: if ( d1l == tl ) s1 = d2h = 0;
! 665: else if ( d1l > tl ) d1l -= tl;
! 666: else d1l = tl - d1l, s1 = -s1;
! 667: } else {
! 668: if ( tl >= d1l ) d1l = tl - d1l;
! 669: else d1l = tl + (BASE27 - d1l), th--;
! 670: d1h = th - d1h;
! 671: s1 = -s1;
! 672: }
! 673: }
! 674: next_i: i--, ir++;
! 675: } while ( n1h > n2h || (n1h == n2h && n1l >= n2l) );
! 676: /* swap 1 and 2 */
! 677: th = n1h, tl = n1l;
! 678: n1h = n2h, n1l = n2l;
! 679: n2h = th, n2l = tl;
! 680: l1 = l2;
! 681: th = d1h, tl = d1l, i = s1;
! 682: d1h = d2h, d1l = d2l, s1 = s2;
! 683: d2h = th, d2l = tl, s2 = i;
! 684: }
! 685: /**/
! 686: *pn = n2l, *pd = d2l;
! 687: return s2;
! 688: }
! 689:
! 690: static int igcd_spurious_factor;
! 691:
! 692: #define SaveN(s,d) {\
! 693: int i, l; \
! 694: for ( l = PL(d) = PL(s), i = 0; i < l; i++ ) BD(d)[i] = BD(s)[i]; \
! 695: }
! 696:
! 697: static N igcd_acc( n1, n2, nt )
! 698: N n1, n2, nt;
! 699: /* both n1 and n2 are assumed to be odd */
! 700: {
! 701: int l1, l2, *b1, *b2, bw1, bw2;
! 702: int l;
! 703: int n, d;
! 704: N p, s1, s2;
! 705:
! 706: if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
! 707: else if ( l < 0 ) { SWAP( n1, n2, N ); }
! 708: if ( ShouldCompRemInit(n1,n2) ) {
! 709: int w, b;
! 710:
! 711: divn_27( n1, n2, &s1, &s2 );
! 712: if ( !s2 ) return n2;
! 713: b = trailingzerosn( s2, &w );
! 714: p = n1; n1 = n2; n2 = p;
! 715: rshiftn( s2, w, b, n2 );
! 716: if ( UNIN(n2) ) return 0;
! 717: l1 = PL(n1);
! 718: if ( !s1 || PL(s1) < l1 ) s1 = NALLOC(l1);
! 719: } else if ( UNIN(n2) ) return 0;
! 720: else {
! 721: s1 = NALLOC(PL(n1));
! 722: s2 = NALLOC(PL(n2));
! 723: }
! 724: SaveN( n1, s1 );
! 725: SaveN( n2, s2 );
! 726: igcd_spurious_factor = 0;
! 727: loop: l1 = PL(n1), l2 = PL(n2);
! 728: if ( l1 <= 2 && l2 <= 2 ) {
! 729: l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) );
! 730: if ( l == 0 ) return 0;
! 731: PL(nt) = l;
! 732: SWAP( n2, nt, N );
! 733: goto ret;
! 734: }
! 735: /**/
! 736: b1 = BD(n1), b2 = BD(n2);
! 737: BitWidth( b1[l1 -1], bw1 );
! 738: BitWidth( b2[l2 -1], bw2 );
! 739: if ( (l1*BSH27 + bw1) - (l2*BSH27 + bw2) <= igcdacc_thre ) {
! 740: l = ReducedRatMod( n1, n2, &n, &d );
! 741: l = l < 0 ? aUplusbV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) ) :
! 742: abs_axU_bxV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) );
! 743: igcd_spurious_factor++;
! 744: if ( l == 0 ) goto ret;
! 745: PL(nt) = l;
! 746: } else {
! 747: l = bmod_n( n1, n2, nt );
! 748: if ( l == 0 ) goto ret;
! 749: else if ( l < 0 ) {
! 750: SWAP( n1, n2, N );
! 751: goto loop;
! 752: }
! 753: }
! 754: p = n1;
! 755: n1 = n2;
! 756: n2 = nt;
! 757: nt = p;
! 758: goto loop;
! 759: /**/
! 760: ret: if ( igcd_spurious_factor != 0 && !UNIN(n2) ) {
! 761: if ( (p = igcd_bmod( n2, s1, n1 )) == 0 ) return 0;
! 762: if ( (p = igcd_bmod( p, s2, nt )) == 0 ) return 0;
! 763: return p;
! 764: } else return n2;
! 765: }
! 766:
! 767: void gcdaccn( n1, n2, nr )
! 768: N n1, n2, *nr;
! 769: {
! 770: int s1, s2, gw, gb, t1, t2;
! 771: int w1, w2;
! 772: N tn1, tn2, tnt, p;
! 773:
! 774: if ( !n1 ) {
! 775: *nr = n2;
! 776: return;
! 777: } else if ( !n2 ) {
! 778: *nr = n1;
! 779: return;
! 780: }
! 781: s1 = trailingzerosn( n1, &w1 );
! 782: s2 = trailingzerosn( n2, &w2 );
! 783: if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2;
! 784: else if ( w1 < w2 ) gw = w1, gb = s1;
! 785: else gw = w2, gb = s2;
! 786: /*
! 787: * true GCD must be multiplied by 2^{gw*BSH27+gs}.
! 788: */
! 789: t1 = PL(n1) - w1;
! 790: t2 = PL(n2) - w2;
! 791: if ( t1 < t2 ) t1 = t2;
! 792: tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1);
! 793: rshiftn( n1, w1, s1, tn1 );
! 794: rshiftn( n2, w2, s2, tn2 );
! 795: /**/
! 796: p = igcd_acc( tn1, tn2, tnt );
! 797: /**/
! 798: if ( p == 0 ) goto L0;
! 799: RetTrueGCD( p, gw, gb, nr, L0 )
! 800: }
! 801:
! 802:
! 803: /********************************/
! 804:
! 805: void gcdBinary_27n( n1, n2, nr )
! 806: N n1, n2, *nr;
! 807: {
! 808: int b1, b2, w1, w2, gw, gb;
! 809: int l1, l2;
! 810: N tn1, tn2, tnt, a;
! 811:
! 812: if ( !n1 ) {
! 813: *nr = n2; return;
! 814: } else if ( !n2 ) {
! 815: *nr = n1; return;
! 816: }
! 817: b1 = trailingzerosn( n1, &w1 );
! 818: b2 = trailingzerosn( n2, &w2 );
! 819: if ( w1 == w2 ) gw = w1, gb = b1 <= b2 ? b1 : b2;
! 820: else if ( w1 < w2 ) gw = w1, gb = b1;
! 821: else gw = w2, gb = b2;
! 822: /*
! 823: * true GCD must be multiplied by 2^{gw*BSH27+gb}.
! 824: */
! 825: l1 = PL(n1) - w1;
! 826: l2 = PL(n2) - w2;
! 827: if ( l1 < l2 ) l1 = l2;
! 828: tn1 = W_NALLOC( l1 ); tn2 = W_NALLOC( l1 ); tnt = W_NALLOC( l1 );
! 829: rshiftn( n1, w1, b1, tn1 );
! 830: rshiftn( n2, w2, b2, tn2 );
! 831: /**/
! 832: if ( igcd_algorithm == 1 ) {
! 833: a = igcd_binary( tn1, tn2, tnt );
! 834: } else if ( igcd_algorithm == 2 ) {
! 835: a = igcd_bmod( tn1, tn2, tnt );
! 836: } else {
! 837: a = igcd_acc( tn1, tn2, tnt, -igcd_algorithm );
! 838: if ( igcd_spurious_factor != 0 ) {
! 839: }
! 840: }
! 841: RetTrueGCD( a, gw, gb, nr, L0 )
! 842: }
! 843:
! 844: /**************************/
! 845: N maxrshn( n, p )
! 846: N n;
! 847: int *p;
! 848: {
! 849: int nw, nb, c, l;
! 850: N new;
! 851:
! 852: nb = trailingzerosn( n, &nw );
! 853: l = PL(n);
! 854: c = BD(n)[l -1];
! 855: l -= nw;
! 856: if ( (c >> nb) == 0 ) l--;
! 857: new = NALLOC(l);
! 858: rshiftn( n, nw, nb, new );
! 859: *p = nb + nw*BSH27;
! 860: return new;
! 861: }
! 862: #endif /* HMEXT */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>