Annotation of OpenXM_contrib2/asir2000/engine/Ngcd.c, Revision 1.2
1.1 murao 1: /*
1.2 ! murao 2: * $OpenXM$
1.1 murao 3: */
4: /*
5: #include "ca.h"
6: #include "base.h"
7: */
8:
9: #undef CALL
10:
11: /**** Machine specific ****/
12: #define BIT_WIDTH_OF_INT 32
13:
14:
15: #if BSH == BIT_WIDTH_OF_INT
16: #define BMask ((unsigned)(-1))
17: #define BMASK BMask
18: #endif
19:
20:
21: int igcd_algorithm = 0;
22: /* == 0 : Euclid,
23: * == 1 : binary,
24: * == 2 : bmod,
25: * >= 3 : (Weber's accelerated)/(Jebelean's generalized binary) algorithm,
26: */
27: int igcd_thre_inidiv = 50;
28: /*
29: * In the non-Euclidean algorithms, if the ratio of the lengths (number
30: * of words) of two integers is >= igcd_thre_inidiv, we first perform
31: * remainder calculation.
32: * If == 0, this remainder calculation is not performed.
33: */
34: int igcdacc_thre = 10;
35: /*
36: * In the accelerated algorithm, if the bit-lengths of two integers is
37: * > igcdacc_thre, "bmod" reduction is done.
38: */
39:
40: #include "inline.h"
41:
42: #define TRAILINGZEROS(t,cntr) for(cntr=0;(t&1)==0;t>>=1)cntr++;
43:
44: #define W_NALLOC(d) ((N)ALLOCA(TRUESIZE(oN,(d)-1,int)))
45:
46: #define ShouldCompRemInit(n1,n2) (igcd_thre_inidiv != 0 && PL(n1) >= igcd_thre_inidiv*PL(n2))
47:
48: #define IniDiv(n1,n2) \
49: if ( ShouldCompRemInit(n1,n2) ) {\
50: N q, r; int w, b; \
51: divn(n1,n2,&q,&r); \
52: if ( !r ) return(n2); \
53: b = trailingzerosn( r, &w ); \
54: q = n1; n1 = n2; n2 = q; \
55: rshiftn( r, w, b, n2 ); \
56: }
57:
58: /*
59: * Binary GCD algorithm by J.Stein
60: * [J. Comp. Phys. Vol. 1 (1967), pp. 397-405)]:
61: * The right-shift binary algorithm is used.
62: */
63:
64:
65: /*
66: * subsidiary routines for gcdbinn below.
67: */
68: static int /* number of bits */ trailingzeros_nbd( /* BD of N */ nbd, pnw )
69: int *nbd, *pnw /* number of zero words */;
70: {
71: #if BSH == BIT_WIDTH_OF_INT
72: unsigned
73: #endif
74: int nw, nb, w;
75:
76: for ( nw = 0; (w = *nbd) == 0; nbd++ ) nw++;
77: TRAILINGZEROS(w,nb);
78: *pnw = nw;
79: return nb;
80: }
81:
82: #define trailingzerosn(n,pnw) trailingzeros_nbd(BD(n),pnw)
83:
84: static int /* PL of N */ rshift_nbd( /* BD of N */ nbd, /* PL of N */ nl,
85: /* # words */ shw, /* # bits */ shb, /* BD of N */ p )
86: #if BSH == BIT_WIDTH_OF_INT
87: unsigned
88: #endif
89: int *nbd, nl, shw, shb, *p;
90: {
91: unsigned int i, v, w, lshb; /* <---- */
92:
93: nbd += shw, i = (nl -= shw);
94: if ( shb == 0 ) {
95: for ( ; nl > 0; nl-- ) *p++ = *nbd++;
96: return i;
97: } else if ( nl < 2 ) {
98: *p = (*nbd) >> shb;
99: return 1;
100: }
101: for ( lshb = BSH - shb, v = *nbd++; --nl > 0; v = w ) {
102: w = *nbd++;
103: #if BSH == BIT_WIDTH_OF_INT
104: *p++ = (v >> shb) | (w << lshb); /********/
105: #else
106: *p++ = (v >> shb) | ((w << lshb)&BMASK);
107: #endif
108: }
109: if ( (v >>= shb) == 0 ) return( i-1 );
110: *p = v;
111: return i;
112: }
113:
114: #define rshiftn(ns,shw,shb,nd) (PL(nd)=rshift_nbd(BD(ns),PL(ns),shw,shb,BD(nd)))
115: /* nd <= ns << (shb + shw*BSH), returns PL of the result */
116:
117: #ifdef FULLSET
118: static N N_of_i_lshifted_by_wb( i, gw, gb )
119: int i, gw, gb;
120: /*
121: * returns pointer to a new struct (N)(((int)i) >> (gb + gw*BSH))
122: */
123: {
124: unsigned int j, l, *p; /* <---- */
125: N n;
126:
127: j = i >> (BSH - gb);
128: #if BSH == BIT_WIDTH_OF_INT
129: i = (i << gb); /********/
130: #else
131: i = (i << gb)&BMASK;
132: #endif
133: l = j != 0 ? gw + 2 : gw + 1;
134: n = NALLOC(l);
135: PL(n) = l;
136: for ( p = BD(n); gw-- > 0; ) *p++ = 0;
137: *p++ = i;
138: if ( j != 0 ) *p = j;
139: return n;
140: }
141: #endif /* FULLSET */
142:
143: /*
144: * routines to make a new struct
145: * (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH))
146: */
147: static N N_of_nbd_lshifted_by_wb( /* BD of N */ b, /* PL of N */ lb, gw, gb )
148: int *b, lb, gw, gb;
149: /*
150: * returns pointer to a new struct
151: * (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH))
152: */
153: {
154: unsigned int rsh, s, t, *p, l; /* <---- */
155: N n;
156:
157: l = lb + gw;
158: if ( gb == 0 ) {
159: n = NALLOC(l);
160: PL(n) = l;
161: for ( p = BD(n); gw-- > 0; ) *p++ = 0;
162: while ( lb-- > 0 ) *p++ = *b++;
163: return n;
164: }
165: rsh = BSH - gb; s = b[lb-1];
166: if ( (t = s >> rsh) != 0 ) {
167: n = NALLOC(l+1);
168: PL(n) = l+1;
169: (p = BD(n))[l] = t;
170: } else {
171: n = NALLOC(l);
172: PL(n) = l;
173: p = BD(n);
174: }
175: while ( gw-- > 0 ) *p++ = 0;
176: #if BSH == BIT_WIDTH_OF_INT
177: *p++ = (t = *b++) << gb; /********/
178: for ( ; --lb > 0; t = s )
179: *p++ = (t >> rsh) | ((s = *b++) << gb); /********/
180: #else
181: *p++ = ((t = *b++) << gb)&BMASK;
182: for ( ; --lb > 0; t = s )
183: *p++ = (t >> rsh) | (((s = *b++) << gb)&BMASK);
184: #endif
185: return n;
186: }
187:
188: #define N_of_n_lshifted_by_wb(a,gw,gb) N_of_nbd_lshifted_by_wb(BD(a),PL(a),gw,gb)
189:
190: #define SWAP(a,b,Type) { Type temp=a;a=b;b=temp;}
191: #define SIGNED_VAL(a,s) ((s)>0?(a):-(a))
192:
193:
194: #ifdef CALL
195: static int bw_int32( n )
196: unsigned int n; /* <---- */
197: {
198: int w;
199:
200: w = 0;
201: #if BSH > 32
202: if ( n > 0xffffffff ) w += 32, n >>= 32;
203: #endif
204: if ( n >= 0x10000 ) w += 16, n >>= 16;
205: if ( n >= 0x100 ) w += 8, n >>= 8;
206: if ( n >= 0x10 ) w += 4, n >>= 4;
207: if ( n >= 0x4 ) w += 2, n >>= 2;
208: if ( n >= 0x2 ) w += 1, n >>= 1;
209: if ( n != 0 ) ++w;
210: return w;
211: }
212: #define BitWidth(n,bw) bw = bw_int32( n )
213: #else
214:
215: #if BSH > 32
216: #define BitWidth(n,bw) {\
217: unsigned int k = (n); \
218: bw = 0; \
219: if ( k > 0xffffffff ) bw += 32, k >>= 32; \
220: if ( k >= 0x10000 ) bw += 16, k >>= 16; \
221: if ( k >= 0x100 ) bw += 8, k >>= 8; \
222: if ( k >= 0x10 ) bw += 4, k >>= 4; \
223: if ( k >= 0x4 ) bw += 2, k >>= 2; \
224: if ( k >= 0x2 ) bw += 1, k >>= 1; \
225: if ( k != 0 ) bw++; \
226: }
227: #else
228: #define BitWidth(n,bw) {\
229: unsigned int k = (n); \
230: bw = 0; \
231: if ( k >= 0x10000 ) bw += 16, k >>= 16; \
232: if ( k >= 0x100 ) bw += 8, k >>= 8; \
233: if ( k >= 0x10 ) bw += 4, k >>= 4; \
234: if ( k >= 0x4 ) bw += 2, k >>= 2; \
235: if ( k >= 0x2 ) bw += 1, k >>= 1; \
236: if ( k != 0 ) bw++; \
237: }
238: #endif
239: #endif
240:
241: #include "igcdhack.c"
242:
243: /*
244: * Implementation of the binary GCD algorithm for two oN structs
245: * (big-integers) in risa.
246: *
247: * The major operations in the following algorithms are the binary-shifts
248: * and the updates of (u, v) by (min(u,v), |u-v|), and are to be open-coded
249: * without using routines for oN structures just as in addn() or subn().
250: */
251:
252: static int igcd_binary_2w( u, lu, v, lv, pans )
253: #if BSH == BIT_WIDTH_OF_INT
254: unsigned
255: #endif
256: int *u, lu, *v, lv, *pans; /* <---- */
257: /* both u[0:lu-1] and v[0:lv-1] are assumed to be odd */
258: {
259: #if BSH == BIT_WIDTH_OF_INT
260: unsigned
261: #endif
262: int i, h1, l1, h2, l2; /* <---- */
263:
264: l1 = u[0], l2 = v[0];
265: h1 = lu <= 1 ? 0 : u[1];
266: h2 = lv <= 1 ? 0 : v[1];
267: /**/
268: loop: if ( h1 == 0 ) {
269: no_hi1: if ( h2 == 0 ) goto one_word;
270: no_hi1n:if ( l1 == 1 ) return 0;
271: #if BSH == BIT_WIDTH_OF_INT
272: if ( l2 == l1 ) {
273: for ( l2 = h2; (l2&1) == 0; l2 >>= 1 ) ;
274: goto one_word;
275: } else if ( l2 < l1 ) {
276: l2 -= l1, h2--;
277: } else l2 -= l1;
278: i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
279: l2 |= (h2 << (BSH - i));
280: #else
281: if ( (l2 -= l1) == 0 ) {
282: for ( l2 = h2; (l2&1) == 0; l2 >>= 1 ) ;
283: goto one_word;
284: } else if ( l2 < 0 ) h2--, l2 += BASE;
285: i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
286: l2 |= ((h2 << (BSH - i)) & BMASK);
287: #endif
288: h2 >>= i;
289: goto no_hi1;
290: } else if ( h2 == 0 ) {
291: no_hi2: if ( l2 == 1 ) return 0;
292: #if BSH == BIT_WIDTH_OF_INT
293: if ( l1 == l2 ) {
294: for ( l1 = h1; (l1&1) == 0; l1 >>= 1 ) ;
295: goto one_word;
296: } else if ( l1 < l2 ) {
297: l1 -= l2, h1--;
298: } else l1 -= l2;
299: i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
300: l1 |= (h1 << (BSH - i));
301: #else
302: if ( (l1 -= l2) == 0 ) {
303: for ( l1 = h1; (l1&1) == 0; l1 >>= 1 ) ;
304: goto one_word;
305: } else if ( l1 < 0 ) h1--, l1 += BASE;
306: i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
307: l1 |= ((h1 << (BSH - i)) & BMASK);
308: #endif
309: if ( (h1 >>= i) == 0 ) goto one_word;
310: goto no_hi2;
311: } else if ( l1 == l2 ) {
312: if ( h1 == h2 ) {
313: pans[0] = l1, pans[1] = h1;
314: return 2;
315: } else if ( h1 > h2 ) {
316: for ( l1 = h1 - h2; (l1&1) == 0; l1 >>= 1 ) ;
317: goto no_hi1n;
318: } else {
319: for ( l2 = h2 - h1; (l2&1) == 0; l2 >>= 1 ) ;
320: goto no_hi2;
321: }
322: } else if ( h1 == h2 ) {
323: if ( l1 > l2 ) {
324: for ( l1 -= l2; (l1&1) == 0; l1 >>= 1 ) ;
325: goto no_hi1n;
326: } else {
327: for ( l2 -= l1; (l2&1) == 0; l2 >>= 1 ) ;
328: goto no_hi2;
329: }
330: } else if ( h1 > h2 ) {
331: h1 -= h2;
332: #if BSH == BIT_WIDTH_OF_INT
333: if ( l1 < l2 ) l1 -= l2, h1--;
334: else l1 -= l2;
335: i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
336: l1 |= (h1 << (BSH - i));
337: #else
338: if ( (l1 -= l2) < 0 ) h1--, l1 += BASE;
339: i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
340: l1 |= ((h1 << (BSH - i)) & BMASK);
341: #endif
342: h1 >>= i;
343: } else {
344: h2 -= h1;
345: #if BSH == BIT_WIDTH_OF_INT
346: if ( l2 < l1 ) l2 -= l1, h2--;
347: else l2 -= l1;
348: i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
349: l2 |= (h2 << (BSH - i));
350: #else
351: if ( (l2 -= l1) < 0 ) h2--, l2 += BASE;
352: i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
353: l2 |= ((h2 << (BSH - i)) & BMASK);
354: #endif
355: h2 >>= i;
356: }
357: goto loop;
358: one_word:
359: if ( l1 == 1 || l2 == 1 ) return 0;
360: else if ( l1 == l2 ) {
361: pans[0] = l1;
362: return 1;
363: }
364: one_word_neq:
365: if ( l1 > l2 ) {
366: l1 -= l2;
367: do { l1 >>= 1; } while ( (l1&1) == 0 );
368: goto one_word;
369: } else {
370: l2 -= l1;
371: do { l2 >>= 1; } while ( (l2&1) == 0 );
372: goto one_word;
373: }
374: }
375:
376: static N igcd_binary( n1, n2, nt )
377: N n1, n2, nt;
378: /* both n1 and n2 are assumed to be odd */
379: {
380: int l1, *b1, l2, *b2, *bt = BD(nt);
381: int l;
382:
383: if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
384: else if ( l < 0 ) { SWAP( n1, n2, N ); }
385: IniDiv( n1, n2 );
386: if ( UNIN(n2) ) return 0;
387: l1 = PL(n1), b1 = BD(n1), l2 = PL(n2), b2 = BD(n2);
388: loop:
389: #if 0000
390: {
391: int i, b, w;
392:
393: printf( "===============\n" );
394: for ( i = 0; i < l1; i++ ) printf( "0x%08x ", b1[i] );
395: printf( "\n" );
396: for ( i = 0; i < l2; i++ ) printf( "0x%08x ", b2[i] );
397: printf( "\n" );
398: }
399: #endif
400: if ( l1 <= 2 && l2 <= 2 ) {
401: l = igcd_binary_2w( b1, l1, b2, l2, bt );
402: if ( l == 0 ) return 0;
403: PL(nt) = l;
404: return nt;
405: }
406: /**/
407: l = abs_U_V_maxrshift( b1, l1, b2, l2, bt );
408: /**/
409: if ( l == 0 ) {
410: PL(n1) = l1;
411: return n1;
412: } else if ( l > 0 ) {
413: l1 = l;
414: SWAP( b1, bt, int * ); SWAP( n1, nt, N );
415: } else {
416: l2 = -l;
417: SWAP( b2, bt, int * ); SWAP( n2, nt, N );
418: }
419: goto loop;
420: }
421:
422: #define RetTrueGCD(p,gw,gb,nr,l0) \
423: if (p==0) { l0: if (gw==0&&gb==0) { *(nr)=ONEN; return; } else p=ONEN; } \
424: *(nr) = N_of_n_lshifted_by_wb(p,gw,gb); \
425: return;
426:
427: void gcdbinn( n1, n2, nr )
428: N n1, n2, *nr;
429: {
430: int s1, s2, gw, gb, t1, t2;
431: int w1, w2;
432: N tn1, tn2, tnt, p;
433:
434: if ( !n1 ) {
435: *nr = n2;
436: return;
437: } else if ( !n2 ) {
438: *nr = n1;
439: return;
440: }
441: s1 = trailingzerosn( n1, &w1 );
442: s2 = trailingzerosn( n2, &w2 );
443: if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2;
444: else if ( w1 < w2 ) gw = w1, gb = s1;
445: else gw = w2, gb = s2;
446: /*
447: * true GCD must be multiplied by 2^{gw*BSH+gb}.
448: */
449: t1 = PL(n1) - w1;
450: t2 = PL(n2) - w2;
451: if ( t1 < t2 ) t1 = t2;
452: tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1);
453: rshiftn( n1, w1, s1, tn1 );
454: rshiftn( n2, w2, s2, tn2 );
455: p = igcd_binary( tn1, tn2, tnt );
456: RetTrueGCD( p, gw, gb, nr, L0 )
457: }
458:
459:
460: /*
461: * The bmod gcd algorithm stated briefly in K.Weber's paper
462: * [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122].
463: * It replaces the subtraction (n1 - n2) in the binary algorithm
464: * by (n1 - S*n2) with such an S that (n1 - S*n2) \equiv 0 \bmod 2^BSH,
465: * which should improve the efficiency when n1 \gg n2.
466: */
467:
468: /* subsidiary routines */
469: #if BSH == BIT_WIDTH_OF_INT
470: #ifdef CALL
471: static int u_div_v_mod_2toBSH( u, v )
472: unsigned int u, v;
473: /*
474: * u/v mod 2^BSH.
475: */
476: {
477: unsigned int i, lsh_i, m;
478:
479: lsh_i = (sizeof(int) << 3) - 1;
480: m = i = 0;
481: do {
482: if ( u == 0 ) break;
483: if ( (u << lsh_i) != 0 ) {
484: m += (1 << i);
485: u -= (v << i);
486: }
487: lsh_i--;
488: } while ( ++i != BSH );
489: return m;
490: }
491:
492: #define Comp_U_div_V_mod_BASE(U,V,R) R = u_div_v_mod_2toBSH(U,V)
493: #else /* CALL */
494: #define Comp_U_div_V_mod_BASE(U,V,R) {\
495: unsigned int u = (U), v = (V), i, lsh; \
496: /* U and V are assumed to be odd */ \
497: i = R = 1, lsh = (sizeof(int) << 3) - 2; u = (u - v); \
498: do { if ( u == 0 ) break; \
499: if ( (u << lsh) != 0 ) R += (1 << i), u = (u - (v << i)); \
500: i++, lsh--; \
501: } while ( i < BSH ); \
502: }
503: #endif /* CALL */
504: #else
505: #ifdef CALL
506: static int u_div_v_mod_2tos( u, v, s )
507: int u, v, s;
508: /*
509: * u/v mod 2^s.
510: */
511: {
512: int i, lsh_i, mask, m;
513:
514: mask = (1 << s) - 1;
515: lsh_i = (sizeof(int) << 3) - 1;
516: m = i = 0;
517: u &= mask, v &= mask;
518: do {
519: if ( u == 0 ) break;
520: if ( (u << lsh_i) != 0 ) {
521: m += (1 << i);
522: u -= (v << i);
523: u &= mask;
524: }
525: lsh_i--;
526: } while ( ++i != s );
527: return m;
528: }
529:
530: #define Comp_U_div_V_mod_BASE(U,V,R) R = u_div_v_mod_2tos(U,V,BSH)
531: #else
532: #define Comp_U_div_V_mod_BASE(U,V,R) {\
533: int u = (U), v = (V), i, lsh; \
534: /* U and V are assumed to be odd */ \
535: i = R = 1, lsh = (sizeof(int) << 3) - 2; u = (u - v) & BMASK; \
536: do { if ( u == 0 ) break; \
537: if ( (u << lsh) != 0 ) R += (1 << i), u = (u - (v << i)) & BMASK; \
538: i++, lsh--; \
539: } while ( i < BSH ); \
540: }
541: #endif
542: #endif
543:
544:
545: static int bmod_n( nu, nv, na )
546: N nu, nv, na;
547: /*
548: * Computes (u[] \bmod v[]) >> (as much as possible) in r[].
549: */
550: {
551: #if BSH == BIT_WIDTH_OF_INT
552: unsigned int *u = BD(nu), *v = BD(nv), *r = BD(na);
553: unsigned int *p, a, t, l, v0, vh, bv, v0r;
554: int lu = PL(nu), lv = PL(nv), z;
555: #else
556: int *u = BD(nu), lu = PL(nu), *v = BD(nv), lv = PL(nv),
557: *r = BD(na);
558: int *p, a, t, l, z, v0, vh, bv, v0r;
559: #endif
560:
561: v0 = v[0];
562: if ( lv == 1 ) {
563: if ( lu == 1 ) a = u[0] % v0;
564: else {
565: p = &u[--lu];
566: #if BSH == BIT_WIDTH_OF_INT
567: a = (*p) % v0, t = (unsigned)(-((int)v0)) % v0;
568: #else
569: a = (*p) % v0, t = BASE % v0;
570: #endif
571: for ( ; --lu >= 0; a = l ) {
572: --p;
573: DMAR(a,t,*p,v0,l);
574: /* l <= (a*t + p[0])%v0 */
575: }
576: }
577: if ( a == 0 ) return 0;
578: while ( (a&1) == 0 ) a >>= 1;
579: *r = a;
580: return( PL(na) = 1 );
581: }
582: Comp_U_div_V_mod_BASE( 1, v0, v0r );
583: vh = v[lv -1];
584: BitWidth( vh, bv );
585: bv--;
586: t = 1 << bv;
587: l = lv + 1;
588: for ( z = -1; lu > l || lu == l && u[lu-1] >= t; z = -z ) {
589: #if BSH == BIT_WIDTH_OF_INT
590: a = (v0r*u[0]);
591: #else
592: a = (v0r*u[0])&BMASK;
593: #endif
594: /**/
595: #if 0000
596: {
597: int i;
598: for ( i = 0; i < lu; i++ ) printf( "0x%08x ", u[i] );
599: printf( "\n- a=0x%08x, %u*\n", a, a );
600: for ( i = 0; i < lv; i++ ) printf( "0x%08x ", v[i] );
601: printf( "\n=>\n" );
602: }
603: #endif
604: lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r );
605: /**/
606: #if 0000
607: printf( "***lu=%d\n", lu );
608: if ( lu != 0 ) {
609: int i;
610: for ( i = 0; i < lu; i++ ) printf( "0x%08x ", r[i] );
611: printf( "\n" );
612: }
613: #endif
614: if ( lu == 0 ) return 0;
615: p = r;
616: r = u;
617: u = p;
618: }
619: if ( lu < lv ) goto ret;
620: t = u[lu-1];
621: if ( lu > lv ) l = BSH;
622: else if ( t < vh ) goto ret;
623: else l = 0;
624: BitWidth( t, a );
625: l += (a - bv);
626: #if BSH == BIT_WIDTH_OF_INT
627: a = (v0r*u[0])&((unsigned)(-1) >> (BSH - l));
628: #else
629: a = (v0r*u[0])&(BMASK >> (BSH - l));
630: #endif
631: #if 0000
632: {
633: int i;
634: for ( i = 0; i < lu; i++ ) printf( "0x%08x ", u[i] );
635: printf( "\n - a=0x%08x, %u*\n", a, a );
636: for ( i = 0; i < lv; i++ ) printf( "0x%08x ", v[i] );
637: printf( "\n =>\n" );
638: }
639: #endif
640: /**/
641: lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r );
642: /**/
643: #if 0000
644: printf( "::: lu=%d\n", lu );
645: if ( lu != 0 ) {
646: int i;
647: for ( i = 0; i < lu; i++ ) printf( "0x%08x ", r[i] );
648: printf( "\n" );
649: }
650: #endif
651: if ( lu == 0 ) return 0;
652: z = -z;
653: ret: if ( z > 0 ) return( PL(na) = lu );
654: PL(nu) = lu;
655: return( -lu );
656: }
657:
658:
659: static N igcd_bmod( n1, n2, nt )
660: N n1, n2, nt;
661: /* both n1 and n2 are assumed to be odd */
662: {
663: int l1, l2;
664: int l;
665:
666: if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
667: else if ( l < 0 ) { SWAP( n1, n2, N ); }
668: IniDiv( n1, n2 );
669: if ( UNIN(n2) ) return 0;
670: loop: if ( (l1 = PL(n1)) <= 2 && (l2 = PL(n2)) <= 2 ) {
671: l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) );
672: if ( l == 0 ) return 0;
673: PL(nt) = l;
674: return nt;
675: }
676: /**/
677: l = bmod_n( n1, n2, nt );
678: /**/
679: if ( l == 0 ) return n2;
680: else if ( l > 0 ) {
681: N tmp = n1;
682:
683: n1 = n2;
684: n2 = nt;
685: nt = tmp;
686: } else SWAP( n1, n2, N );
687: goto loop;
688: }
689:
690: void gcdbmodn( n1, n2, nr )
691: N n1, n2, *nr;
692: {
693: int s1, s2, gw, gb, t1, t2;
694: int w1, w2;
695: N tn1, tn2, tnt, p;
696:
697: if ( !n1 ) {
698: *nr = n2;
699: return;
700: } else if ( !n2 ) {
701: *nr = n1;
702: return;
703: }
704: s1 = trailingzerosn( n1, &w1 );
705: s2 = trailingzerosn( n2, &w2 );
706: if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2;
707: else if ( w1 < w2 ) gw = w1, gb = s1;
708: else gw = w2, gb = s2;
709: /*
710: * true GCD must be multiplied by 2^{gw*BSH+gs}.
711: */
712: t1 = PL(n1) - w1;
713: t2 = PL(n2) - w2;
714: if ( t1 < t2 ) t1 = t2;
715: tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1);
716: rshiftn( n1, w1, s1, tn1 );
717: rshiftn( n2, w2, s2, tn2 );
718: p = igcd_bmod( tn1, tn2, tnt );
719: RetTrueGCD( p, gw, gb, nr, L0 )
720: }
721:
722: /*
723: * The accelerated integer GCD algorithm by K.Weber
724: * [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122]:
725: */
726:
727: static int ReducedRatMod( x, y, pn, pd )
728: N x, y;
729: #if BSH == BIT_WIDTH_OF_INT
730: unsigned
731: #endif
732: int *pn, *pd;
733: /*
734: * Let m = 2^{2*BSH} = 2*BASE. We assume x, y > 0 and \gcd(x,m)
735: * = \gcd(y,m) = 1. This routine computes n and d (resp. returned
736: * in *pn and *pd) such that 0 < n, |d| < \sqrt{m} and
737: * n*y \equiv x*d \bmod m.
738: */
739: {
740: #if BSH == BIT_WIDTH_OF_INT
741: unsigned int n1h, n1l, d1h, d1l, n2h, n2l, d2h, d2l;
742: unsigned int th, tl, l1, l2, i, ir;
743: int s1, s2;
744: #else
745: int n1h, n1l, d1h, d1l, n2h, n2l, d2h, d2l;
746: int th, tl, l1, l2, i, ir;
747: int s1, s2;
748: #endif
749:
750: {
751: #if BSH == BIT_WIDTH_OF_INT
752: unsigned
753: #endif
754: int xh, xl, yh, yl, lsh_i;
755:
756: xl = BD(x)[0];
757: xh = PL(x) > 1 ? BD(x)[1] : 0;
758: yl = BD(y)[0];
759: yh = PL(y) > 1 ? BD(y)[1] : 0;
760: #if 0000
761: printf( "*** RedRatMod: (0x%08x:0x%08x=%u*2^%d+%u)\n /(0x%08x:0x%08x=%u*2^%d+%u) mod 2^%d\n",
762: xh,xl, xh,BSH,xl, yh,yl, yh,BSH,yl, BSH );
763: #endif
764: Comp_U_div_V_mod_BASE( xl, yl, n2l );
765: DM(n2l,yl,th,tl) /* n2l*yl = tl+th*BASE, where tl==xl. */;
766: #if BSH == BIT_WIDTH_OF_INT
767: xh -= th;
768: #else
769: if ( xh > th ) xh -= th;
770: else xh += (BASE - th);
771: #endif
772: DM(n2l,yh,th,tl) /* n2l*yh = tl+th*BASE. */;
773: #if BSH == BIT_WIDTH_OF_INT
774: xh -= tl;
775: #else
776: if ( xh > tl ) xh -= tl;
777: else xh += (BASE - tl);
778: #endif
779: /* n2h = i = 0, lsh_i = 31;*/
780: n2h = i = 0, lsh_i = BIT_WIDTH_OF_INT -1;
781: do {
782: if ( xh == 0 ) break;
783: if ( (xh << lsh_i) != 0 ) {
784: n2h += (1 << i);
785: #if BSH == BIT_WIDTH_OF_INT
786: tl = yl << i;
787: xh -= tl;
788: #else
789: tl = (yl << i)&BMASK;
790: if ( xh > tl ) xh -= tl;
791: else xh += (BASE - tl);
792: #endif
793: }
794: lsh_i--;
795: } while ( ++i != BSH );
796: }
797: /*
798: * n2l + n2h*BASE = x/y mod 2^{2*BSH}.
799: */
800: #if 0000
801: printf( "=====> 0x%08x(%u) + 2^%d*0x%08x(%u)\n", n2l, n2l, BSH, n2h, n2h );
802: #endif
803: d2h = 0, d2l = 1, s2 = 1;
804: #if BSH == BIT_WIDTH_OF_INT
805: if ( n2h == 0 ) goto done;
806: BitWidth( n2h, l2 );
807: if ( l2 == BSH ) {
808: d1h = 0, d1l = 1, s1 = -1;
809: th = n2h, tl = n2l;
810: } else {
811: i = BSH - l2;
812: d1h = 0, d1l = 1 << i, s1 = -1;
813: th = (n2h << i) | (n2l >> l2);
814: tl = n2l << i;
815: }
816: if ( tl == 0 ) n1h = - th, n1l = 0;
817: else n1h = BMASK - th, n1l = - tl;
818: /**/
819: if ( n1h < n2h || (n1h == n2h && n1l < n2l) ) goto swap12;
820: BitWidth( n1h, l1 );
821: goto sub12;
822: #else
823: n1h = BASE, n1l = 0, l1 = BSH,
824: d1h = d1l = 0, s1 = 0;
825: #endif
826: /**/
827: while ( n2h != 0 ) {
828: BitWidth( n2h, l2 );
829: sub12: ir = BSH - (i = l1 - l2);
830: do {
831: if ( i == 0 ) th = n2h, tl = n2l;
832: else
833: th = (n2h << i) | (n2l >> ir),
834: #if BSH == BIT_WIDTH_OF_INT
835: tl = n2l << i;
836: #else
837: tl = (n2l << i) & BMASK;
838: #endif
839: if ( th > n1h || (th == n1h && tl > n1l) ) goto next_i;
840: #if BSH == BIT_WIDTH_OF_INT
841: if ( tl > n1l ) n1h--;
842: n1l -= tl;
843: #else
844: if ( tl <= n1l ) n1l -= tl;
845: else n1l += (BASE - tl), n1h--;
846: #endif
847: n1h -= th;
848: /* (s1:d1h,d1l) -= ((s2:d2h,d2l) << i); */
849: if ( s2 != 0 ) {
850: if ( i == 0 ) th = d2h, tl = d2l;
851: else
852: #if BSH == BIT_WIDTH_OF_INT
853: th = (d2h << i) | (d2l >> ir),
854: tl = (d2l << i);
855: #else
856: th = (d2h << i)&BMASK | (d2l >> ir),
857: tl = (d2l << i)&BMASK;
858: #endif
859: if ( s1 == 0 )
860: s1 = -s2, d1h = th, d1l = tl;
861: else if ( s1 != s2 ) {
862: #if BSH == BIT_WIDTH_OF_INT
863: d1l += tl,
864: d1h += th;
865: if ( d1l < tl ) d1h++;
866: #else
867: tl += d1l;
868: d1l = tl&BMASK;
869: d1h = (d1h + th + (tl >> BSH))&BMASK;
870: #endif
871: if ( d1h == 0 && d1l == 0 ) s1 = 0;
872: } else if ( d1h > th ) {
873: #if BSH == BIT_WIDTH_OF_INT
874: if ( d1l < tl ) d1h--;
875: d1l -= tl;
876: #else
877: if ( d1l >= tl ) d1l -= tl;
878: else d1l += (BASE - tl), d1h--;
879: #endif
880: d1h -= th;
881: } else if ( d1h == th ) {
882: d1h = 0;
883: if ( d1l == tl ) s1 = d2h = 0;
884: else if ( d1l > tl ) d1l -= tl;
885: else d1l = tl - d1l, s1 = -s1;
886: } else {
887: #if BSH == BIT_WIDTH_OF_INT
888: if ( tl < d1l ) th--;
889: d1l = tl - d1l;
890: #else
891: if ( tl >= d1l ) d1l = tl - d1l;
892: else d1l = tl + (BASE - d1l), th--;
893: #endif
894: d1h = th - d1h;
895: s1 = -s1;
896: }
897: }
898: next_i: i--, ir++;
899: } while ( n1h > n2h || (n1h == n2h && n1l >= n2l) );
900: swap12: /* swap 1 and 2 */
901: th = n1h, tl = n1l;
902: n1h = n2h, n1l = n2l;
903: n2h = th, n2l = tl;
904: l1 = l2;
905: th = d1h, tl = d1l, i = s1;
906: d1h = d2h, d1l = d2l, s1 = s2;
907: d2h = th, d2l = tl, s2 = i;
908: }
909: /**/
910: done: *pn = n2l, *pd = d2l;
911: return s2;
912: }
913:
914: static int igcd_spurious_factor;
915:
916: #define SaveN(s,d) {\
917: int i, l; \
918: for ( l = PL(d) = PL(s), i = 0; i < l; i++ ) BD(d)[i] = BD(s)[i]; \
919: }
920:
921: static N igcd_acc( n1, n2, nt )
922: N n1, n2, nt;
923: /* both n1 and n2 are assumed to be odd */
924: {
925: int l1, l2, *b1, *b2, bw1, bw2;
926: int l;
927: int n, d;
928: N p, s1, s2;
929:
930: if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
931: else if ( l < 0 ) { SWAP( n1, n2, N ); }
932: if ( ShouldCompRemInit(n1,n2) ) {
933: int w, b;
934:
935: divn( n1, n2, &s1, &s2 );
936: if ( !s2 ) return n2;
937: b = trailingzerosn( s2, &w );
938: p = n1; n1 = n2; n2 = p;
939: rshiftn( s2, w, b, n2 );
940: if ( UNIN(n2) ) return 0;
941: l1 = PL(n1);
942: if ( !s1 || PL(s1) < l1 ) s1 = NALLOC(l1);
943: } else if ( UNIN(n2) ) return 0;
944: else {
945: s1 = NALLOC(PL(n1));
946: s2 = NALLOC(PL(n2));
947: }
948: SaveN( n1, s1 );
949: SaveN( n2, s2 );
950: igcd_spurious_factor = 0;
951: loop: l1 = PL(n1), l2 = PL(n2);
952: if ( l1 <= 2 && l2 <= 2 ) {
953: l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) );
954: if ( l == 0 ) return 0;
955: PL(nt) = l;
956: SWAP( n2, nt, N );
957: goto ret;
958: }
959: /**/
960: b1 = BD(n1), b2 = BD(n2);
961: BitWidth( b1[l1 -1], bw1 );
962: BitWidth( b2[l2 -1], bw2 );
963: if ( (l1*BSH + bw1) - (l2*BSH + bw2) <= igcdacc_thre ) {
964: l = ReducedRatMod( n1, n2, &n, &d );
965: l = l < 0 ? aUplusbV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) ) :
966: abs_axU_bxV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) );
967: igcd_spurious_factor++;
968: if ( l == 0 ) goto ret;
969: PL(nt) = l;
970: } else {
971: l = bmod_n( n1, n2, nt );
972: if ( l == 0 ) goto ret;
973: else if ( l < 0 ) {
974: SWAP( n1, n2, N );
975: goto loop;
976: }
977: }
978: p = n1;
979: n1 = n2;
980: n2 = nt;
981: nt = p;
982: goto loop;
983: /**/
984: ret: if ( igcd_spurious_factor != 0 && !UNIN(n2) ) {
985: if ( (p = igcd_bmod( n2, s1, n1 )) == 0 ) return 0;
986: if ( (p = igcd_bmod( p, s2, nt )) == 0 ) return 0;
987: return p;
988: } else return n2;
989: }
990:
991: void gcdaccn( n1, n2, nr )
992: N n1, n2, *nr;
993: {
994: int s1, s2, gw, gb, t1, t2;
995: int w1, w2;
996: N tn1, tn2, tnt, p;
997:
998: if ( !n1 ) {
999: *nr = n2;
1000: return;
1001: } else if ( !n2 ) {
1002: *nr = n1;
1003: return;
1004: }
1005: s1 = trailingzerosn( n1, &w1 );
1006: s2 = trailingzerosn( n2, &w2 );
1007: if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2;
1008: else if ( w1 < w2 ) gw = w1, gb = s1;
1009: else gw = w2, gb = s2;
1010: /*
1011: * true GCD must be multiplied by 2^{gw*BSH+gs}.
1012: */
1013: t1 = PL(n1) - w1;
1014: t2 = PL(n2) - w2;
1015: if ( t1 < t2 ) t1 = t2;
1016: tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1);
1017: rshiftn( n1, w1, s1, tn1 );
1018: rshiftn( n2, w2, s2, tn2 );
1019: /**/
1020: p = igcd_acc( tn1, tn2, tnt );
1021: /**/
1022: if ( p == 0 ) goto L0;
1023: RetTrueGCD( p, gw, gb, nr, L0 )
1024: }
1025:
1026:
1027: /********************************/
1028:
1029: void gcdn_HMEXT( n1, n2, nr )
1030: N n1, n2, *nr;
1031: {
1032: int b1, b2, w1, w2, gw, gb;
1033: int l1, l2;
1034: N tn1, tn2, tnt, a;
1035:
1036: if ( !n1 ) {
1037: *nr = n2; return;
1038: } else if ( !n2 ) {
1039: *nr = n1; return;
1040: }
1041: b1 = trailingzerosn( n1, &w1 );
1042: b2 = trailingzerosn( n2, &w2 );
1043: if ( w1 == w2 ) gw = w1, gb = b1 <= b2 ? b1 : b2;
1044: else if ( w1 < w2 ) gw = w1, gb = b1;
1045: else gw = w2, gb = b2;
1046: /*
1047: * true GCD must be multiplied by 2^{gw*BSH+gb}.
1048: */
1049: l1 = PL(n1) - w1;
1050: l2 = PL(n2) - w2;
1051: if ( l1 < l2 ) l1 = l2;
1052: tn1 = W_NALLOC( l1 ); tn2 = W_NALLOC( l1 ); tnt = W_NALLOC( l1 );
1053: rshiftn( n1, w1, b1, tn1 );
1054: rshiftn( n2, w2, b2, tn2 );
1055: /**/
1056: if ( igcd_algorithm == 1 ) {
1057: a = igcd_binary( tn1, tn2, tnt );
1058: } else if ( igcd_algorithm == 2 ) {
1059: a = igcd_bmod( tn1, tn2, tnt );
1060: } else {
1061: a = igcd_acc( tn1, tn2, tnt );
1062: if ( igcd_spurious_factor != 0 ) {
1063: }
1064: }
1065: RetTrueGCD( a, gw, gb, nr, L0 )
1066: }
1067:
1068:
1069: /**************************/
1070: #if 111
1071: N maxrshn( n, p )
1072: N n;
1073: int *p;
1074: {
1075: int nw, nb, c, l;
1076: N new;
1077:
1078: nb = trailingzerosn( n, &nw );
1079: l = PL(n);
1080: c = BD(n)[l -1];
1081: l -= nw;
1082: if ( (c >> nb) == 0 ) l--;
1083: new = NALLOC(l);
1084: rshiftn( n, nw, nb, new );
1085: *p = nb + nw*BSH;
1086: return new;
1087: }
1088: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>