Annotation of OpenXM_contrib2/asir2000/engine-27/N-27.c, Revision 1.3
1.2 noro 1: /*
2: * Copyright (c) 1994-2000 FUJITSU LABORATORIES LIMITED
3: * All rights reserved.
4: *
5: * FUJITSU LABORATORIES LIMITED ("FLL") hereby grants you a limited,
6: * non-exclusive and royalty-free license to use, copy, modify and
7: * redistribute, solely for non-commercial and non-profit purposes, the
8: * computer program, "Risa/Asir" ("SOFTWARE"), subject to the terms and
9: * conditions of this Agreement. For the avoidance of doubt, you acquire
10: * only a limited right to use the SOFTWARE hereunder, and FLL or any
11: * third party developer retains all rights, including but not limited to
12: * copyrights, in and to the SOFTWARE.
13: *
14: * (1) FLL does not grant you a license in any way for commercial
15: * purposes. You may use the SOFTWARE only for non-commercial and
16: * non-profit purposes only, such as academic, research and internal
17: * business use.
18: * (2) The SOFTWARE is protected by the Copyright Law of Japan and
19: * international copyright treaties. If you make copies of the SOFTWARE,
20: * with or without modification, as permitted hereunder, you shall affix
21: * to all such copies of the SOFTWARE the above copyright notice.
22: * (3) An explicit reference to this SOFTWARE and its copyright owner
23: * shall be made on your publication or presentation in any form of the
24: * results obtained by use of the SOFTWARE.
25: * (4) In the event that you modify the SOFTWARE, you shall notify FLL by
1.3 ! noro 26: * e-mail at risa-admin@sec.flab.fujitsu.co.jp of the detailed specification
1.2 noro 27: * for such modification or the source code of the modified part of the
28: * SOFTWARE.
29: *
30: * THE SOFTWARE IS PROVIDED AS IS WITHOUT ANY WARRANTY OF ANY KIND. FLL
31: * MAKES ABSOLUTELY NO WARRANTIES, EXPRESSED, IMPLIED OR STATUTORY, AND
32: * EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS
33: * FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT OF THIRD PARTIES'
34: * RIGHTS. NO FLL DEALER, AGENT, EMPLOYEES IS AUTHORIZED TO MAKE ANY
35: * MODIFICATIONS, EXTENSIONS, OR ADDITIONS TO THIS WARRANTY.
36: * UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, TORT, CONTRACT,
37: * OR OTHERWISE, SHALL FLL BE LIABLE TO YOU OR ANY OTHER PERSON FOR ANY
38: * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL
39: * DAMAGES OF ANY CHARACTER, INCLUDING, WITHOUT LIMITATION, DAMAGES
40: * ARISING OUT OF OR RELATING TO THE SOFTWARE OR THIS AGREEMENT, DAMAGES
41: * FOR LOSS OF GOODWILL, WORK STOPPAGE, OR LOSS OF DATA, OR FOR ANY
42: * DAMAGES, EVEN IF FLL SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF
43: * SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. EVEN IF A PART
44: * OF THE SOFTWARE HAS BEEN DEVELOPED BY A THIRD PARTY, THE THIRD PARTY
45: * DEVELOPER SHALL HAVE NO LIABILITY IN CONNECTION WITH THE USE,
46: * PERFORMANCE OR NON-PERFORMANCE OF THE SOFTWARE.
47: *
1.3 ! noro 48: * $OpenXM: OpenXM_contrib2/asir2000/engine-27/N-27.c,v 1.2 2000/08/21 08:31:32 noro Exp $
1.2 noro 49: */
1.1 noro 50: #include "ca-27.h"
51: #include "base.h"
52:
53: #ifndef HMEXT
54: #define HMEXT
55: #endif
56:
57: #ifdef HMEXT
58: #undef FULLSET
59: #undef CALL
60: #undef DIVISION
61:
62: int igcd_algorithm = 0;
63: /* == 0 : Euclid,
64: * == 1 : binary,
65: * == 2 : bmod,
66: * >= 3 : (Weber's accelerated)/(Jebelean's generalized binary) algorithm,
67: */
68: int igcd_thre_inidiv = 50;
69: /*
70: * In the non-Euclidean algorithms, if the ratio of the lengths (number
71: * of words) of two integers is >= igcd_thre_inidiv, we first perform
72: * remainder calculation.
73: * If == 0, this remainder calculation is not performed.
74: */
75: int igcdacc_thre = 10;
76: /*
77: * In the accelerated algorithm, if the bit-lengths of two integers is
78: * > igcdacc_thre, "bmod" reduction is done.
79: */
80:
81: #include "inline.h"
82:
83: #define TRAILINGZEROS(t,cntr) for(cntr=0;(t&1)==0;t>>=1)cntr++;
84:
85: #define W_NALLOC(d) ((N)ALLOCA(TRUESIZE(oN,(d)-1,int)))
86:
87: #define ShouldCompRemInit(n1,n2) (igcd_thre_inidiv != 0 && PL(n1) >= igcd_thre_inidiv*PL(n2))
88:
89: #define IniDiv(n1,n2) \
90: if ( ShouldCompRemInit(n1,n2) ) {\
91: N q, r; int w, b; \
92: divn_27(n1,n2,&q,&r); \
93: if ( !r ) return(n2); \
94: b = trailingzerosn( r, &w ); \
95: q = n1; n1 = n2; n2 = q; \
96: rshiftn( r, w, b, n2 ); \
97: }
98:
99: /*
100: * Binary GCD algorithm by J.Stein
101: * [J. Comp. Phys. Vol. 1 (1967), pp. 397-405)]:
102: * The right-shift binary algorithm is used.
103: */
104:
105:
106: /*
107: * subsidiary routines for gcdbinn below.
108: */
109: static int /* number of bits */ trailingzeros_nbd( /* BD of N */ nbd, pnw )
110: int *nbd, *pnw /* number of zero words */;
111: {
112: int nw, nb, w;
113:
114: for ( nw = 0; (w = *nbd) == 0; nbd++ ) nw++;
115: TRAILINGZEROS(w,nb);
116: *pnw = nw;
117: return nb;
118: }
119:
120: #define trailingzerosn(n,pnw) trailingzeros_nbd(BD(n),pnw)
121:
122: static int /* PL of N */ rshift_nbd( /* BD of N */ nbd, /* PL of N */ nl,
123: /* # words */ shw, /* # bits */ shb, /* BD of N */ p )
124: int *nbd, nl, shw, shb, *p;
125: {
126: int i, v, w, lshb;
127:
128: nbd += shw, i = (nl -= shw);
129: if ( shb == 0 ) {
130: for ( ; nl > 0; nl-- ) *p++ = *nbd++;
131: return i;
132: } else if ( nl < 2 ) {
133: *p = (*nbd) >> shb;
134: return 1;
135: }
136: for ( lshb = BSH27 - shb, v = *nbd++; --nl > 0; v = w ) {
137: w = *nbd++;
138: *p++ = (v >> shb) | ((w << lshb)&BMASK27);
139: }
140: if ( (v >>= shb) == 0 ) return( i-1 );
141: *p = v;
142: return i;
143: }
144:
145: #define rshiftn(ns,shw,shb,nd) (PL(nd)=rshift_nbd(BD(ns),PL(ns),shw,shb,BD(nd)))
146: /* nd <= ns << (shb + shw*BSH27), returns PL of the result */
147:
148: #ifdef FULLSET
149: static N N_of_i_lshifted_by_wb( i, gw, gb )
150: int i, gw, gb;
151: /*
152: * returns pointer to a new struct (N)(((int)i) >> (gb + gw*BSH27))
153: */
154: {
155: int j, l, *p;
156: N n;
157:
158: j = i >> (BSH27 - gb);
159: i = (i << gb)&BMASK27;
160: l = j != 0 ? gw + 2 : gw + 1;
161: n = NALLOC(l);
162: PL(n) = l;
163: for ( p = BD(n); gw-- > 0; ) *p++ = 0;
164: *p++ = i;
165: if ( j != 0 ) *p = j;
166: return n;
167: }
168: #endif /* FULLSET */
169:
170: /*
171: * routines to make a new struct
172: * (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH27))
173: */
174: static N N_of_nbd_lshifted_by_wb( /* BD of N */ b, /* PL of N */ lb, gw, gb )
175: int *b, lb, gw, gb;
176: /*
177: * returns pointer to a new struct
178: * (N)(((BD of N)(b[0],...,b[lb-1])) << (gb + gw*BSH27))
179: */
180: {
181: int rsh, s, t, *p, l;
182: N n;
183:
184: l = lb + gw;
185: if ( gb == 0 ) {
186: n = NALLOC(l);
187: PL(n) = l;
188: for ( p = BD(n); gw-- > 0; ) *p++ = 0;
189: while ( lb-- > 0 ) *p++ = *b++;
190: return n;
191: }
192: rsh = BSH27 - gb; s = b[lb-1];
193: if ( (t = s >> rsh) != 0 ) {
194: n = NALLOC(l+1);
195: PL(n) = l+1;
196: (p = BD(n))[l] = t;
197: } else {
198: n = NALLOC(l);
199: PL(n) = l;
200: p = BD(n);
201: }
202: while ( gw-- > 0 ) *p++ = 0;
203: *p++ = ((t = *b++) << gb)&BMASK27;
204: for ( ; --lb > 0; t = s )
205: *p++ = (t >> rsh) | (((s = *b++) << gb)&BMASK27);
206: return n;
207: }
208:
209: #define N_of_n_lshifted_by_wb(a,gw,gb) N_of_nbd_lshifted_by_wb(BD(a),PL(a),gw,gb)
210:
211: #define SWAP(a,b,Type) { Type temp=a;a=b;b=temp;}
212: #define SIGNED_VAL(a,s) ((s)>0?(a):-(a))
213:
214:
215: #ifdef CALL
216: static int bw_int32( n )
217: int n;
218: {
219: int w;
220:
221: w = 0;
222: #if BSH27 >= 32
223: if ( n > 0xffffffff ) w += 32, n >>= 32;
224: #endif
225: if ( n >= 0x10000 ) w += 16, n >>= 16;
226: if ( n >= 0x100 ) w += 8, n >>= 8;
227: if ( n >= 0x10 ) w += 4, n >>= 4;
228: if ( n >= 0x4 ) w += 2, n >>= 2;
229: if ( n >= 0x2 ) w += 1, n >>= 1;
230: if ( n != 0 ) ++w;
231: return w;
232: }
233: #define BitWidth(n,bw) bw = bw_int32( n )
234: #else
235:
236: #if BSH27 >= 32
237: #define BitWidth(n,bw) {\
238: int k = (n); \
239: bw = 0; \
240: if ( BSH27 >= 32 ) if ( k > 0xffffffff ) bw += 32, k >>= 32; \
241: if ( k >= 0x10000 ) bw += 16, k >>= 16; \
242: if ( k >= 0x100 ) bw += 8, k >>= 8; \
243: if ( k >= 0x10 ) bw += 4, k >>= 4; \
244: if ( k >= 0x4 ) bw += 2, k >>= 2; \
245: if ( k >= 0x2 ) bw += 1, k >>= 1; \
246: if ( k != 0 ) bw++; \
247: }
248: #else
249: #define BitWidth(n,bw) {\
250: int k = (n); \
251: bw = 0; \
252: if ( k >= 0x10000 ) bw += 16, k >>= 16; \
253: if ( k >= 0x100 ) bw += 8, k >>= 8; \
254: if ( k >= 0x10 ) bw += 4, k >>= 4; \
255: if ( k >= 0x4 ) bw += 2, k >>= 2; \
256: if ( k >= 0x2 ) bw += 1, k >>= 1; \
257: if ( k != 0 ) bw++; \
258: }
259: #endif
260: #endif
261:
262: #include "igcdhack.c"
263:
264: /*
265: * Implementation of the binary GCD algorithm for two oN structs
266: * (big-integers) in risa.
267: *
268: * The major operations in the following algorithms are the binary-shifts
269: * and the updates of (u, v) by (min(u,v), |u-v|), and are to be open-coded
270: * without using routines for oN structures just as in addn() or subn().
271: */
272:
273: static int igcd_binary_2w( u, lu, v, lv, pans )
274: int *u, lu, *v, lv, *pans;
275: /* both u[0:lu-1] and v[0:lv-1] are assumed to be odd */
276: {
277: int i, h1, l1, h2, l2;
278:
279: l1 = u[0], l2 = v[0];
280: h1 = lu <= 1 ? 0 : u[1];
281: h2 = lv <= 1 ? 0 : v[1];
282: /**/
283: loop: if ( h1 == 0 ) {
284: no_hi1: if ( h2 == 0 ) goto one_word;
285: no_hi1n:if ( l1 == 1 ) return 0;
286: if ( (l2 -= l1) == 0 ) {
287: for ( l2 = h2; (l2&1) == 0; l2 >>= 1 ) ;
288: goto one_word;
289: } else if ( l2 < 0 ) h2--, l2 += BASE27;
290: i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
291: l2 |= ((h2 << (BSH27 - i)) & BMASK27);
292: h2 >>= i;
293: goto no_hi1;
294: } else if ( h2 == 0 ) {
295: no_hi2: if ( l2 == 1 ) return 0;
296: if ( (l1 -= l2) == 0 ) {
297: for ( l1 = h1; (l1&1) == 0; l1 >>= 1 ) ;
298: goto one_word;
299: } else if ( l1 < 0 ) h1--, l1 += BASE27;
300: i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
301: l1 |= ((h1 << (BSH27 - i)) & BMASK27);
302: if ( (h1 >>= i) == 0 ) goto one_word;
303: goto no_hi2;
304: } else if ( l1 == l2 ) {
305: if ( h1 == h2 ) {
306: pans[0] = l1, pans[1] = h1;
307: return 2;
308: } else if ( h1 > h2 ) {
309: for ( l1 = h1 - h2; (l1&1) == 0; l1 >>= 1 ) ;
310: goto no_hi1n;
311: } else {
312: for ( l2 = h2 - h1; (l2&1) == 0; l2 >>= 1 ) ;
313: goto no_hi2;
314: }
315: } else if ( h1 == h2 ) {
316: if ( l1 > l2 ) {
317: for ( l1 -= l2; (l1&1) == 0; l1 >>= 1 ) ;
318: goto no_hi1n;
319: } else {
320: for ( l2 -= l1; (l2&1) == 0; l2 >>= 1 ) ;
321: goto no_hi2;
322: }
323: } else if ( h1 > h2 ) {
324: h1 -= h2;
325: if ( (l1 -= l2) < 0 ) h1--, l1 += BASE27;
326: i = 0; do { l1 >>= 1, i++; } while ( (l1&1) == 0 );
327: l1 |= ((h1 << (BSH27 - i)) & BMASK27);
328: h1 >>= i;
329: } else {
330: h2 -= h1;
331: if ( (l2 -= l1) < 0 ) h2--, l2 += BASE27;
332: i = 0; do { l2 >>= 1, i++; } while ( (l2&1) == 0 );
333: l2 |= ((h2 << (BSH27 - i)) & BMASK27);
334: h2 >>= i;
335: }
336: goto loop;
337: one_word:
338: if ( l1 == 1 || l2 == 1 ) return 0;
339: else if ( l1 == l2 ) {
340: pans[0] = l1;
341: return 1;
342: }
343: one_word_neq:
344: if ( l1 > l2 ) {
345: l1 -= l2;
346: do { l1 >>= 1; } while ( (l1&1) == 0 );
347: goto one_word;
348: } else {
349: l2 -= l1;
350: do { l2 >>= 1; } while ( (l2&1) == 0 );
351: goto one_word;
352: }
353: }
354:
355: static N igcd_binary( n1, n2, nt )
356: N n1, n2, nt;
357: /* both n1 and n2 are assumed to be odd */
358: {
359: int l1, *b1, l2, *b2, *bt = BD(nt);
360: int l;
361:
362: if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
363: else if ( l < 0 ) { SWAP( n1, n2, N ); }
364: IniDiv( n1, n2 );
365: if ( UNIN(n2) ) return 0;
366: l1 = PL(n1), b1 = BD(n1), l2 = PL(n2), b2 = BD(n2);
367: loop: if ( l1 <= 2 && l2 <= 2 ) {
368: l = igcd_binary_2w( b1, l1, b2, l2, bt );
369: if ( l == 0 ) return 0;
370: PL(nt) = l;
371: return nt;
372: }
373: /**/
374: l = abs_U_V_maxrshift( b1, l1, b2, l2, bt );
375: /**/
376: if ( l == 0 ) {
377: PL(n1) = l1;
378: return n1;
379: } else if ( l > 0 ) {
380: l1 = l;
381: SWAP( b1, bt, int * ); SWAP( n1, nt, N );
382: } else {
383: l2 = -l;
384: SWAP( b2, bt, int * ); SWAP( n2, nt, N );
385: }
386: goto loop;
387: }
388:
389: #define RetTrueGCD(p,gw,gb,nr,l0) \
390: if (p==0) { l0: if (gw==0&&gb==0) { *(nr)=ONEN; return; } else p=ONEN; } \
391: *(nr) = N_of_n_lshifted_by_wb(p,gw,gb); \
392: return;
393:
394: void gcdbinn( n1, n2, nr )
395: N n1, n2, *nr;
396: {
397: int s1, s2, gw, gb, t1, t2;
398: int w1, w2;
399: N tn1, tn2, tnt, p;
400:
401: if ( !n1 ) {
402: *nr = n2;
403: return;
404: } else if ( !n2 ) {
405: *nr = n1;
406: return;
407: }
408: s1 = trailingzerosn( n1, &w1 );
409: s2 = trailingzerosn( n2, &w2 );
410: if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2;
411: else if ( w1 < w2 ) gw = w1, gb = s1;
412: else gw = w2, gb = s2;
413: /*
414: * true GCD must be multiplied by 2^{gw*BSH27+gb}.
415: */
416: t1 = PL(n1) - w1;
417: t2 = PL(n2) - w2;
418: if ( t1 < t2 ) t1 = t2;
419: tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1);
420: rshiftn( n1, w1, s1, tn1 );
421: rshiftn( n2, w2, s2, tn2 );
422: p = igcd_binary( tn1, tn2, tnt );
423: RetTrueGCD( p, gw, gb, nr, L0 )
424: }
425:
426:
427: /*
428: * The bmod gcd algorithm stated briefly in K.Weber's paper
429: * [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122].
430: * It replaces the subtraction (n1 - n2) in the binary algorithm
431: * by (n1 - S*n2) with such an S that (n1 - S*n2) \equiv 0 \bmod 2^BSH27,
432: * which should improve the efficiency when n1 \gg n2.
433: */
434:
435: /* subsidiary routines */
436: #ifdef CALL
437: #ifndef DIVISION
438: static int u_div_v_mod_2tos( u, v, s )
439: int u, v, s;
440: /*
441: * u/v mod 2^s.
442: */
443: {
444: int i,lsh_i, mask, m, two_to_s;
445:
446: mask = (two_to_s = 1 << s) - 1;
447: lsh_i = (sizeof(int) << 3) - 1;
448: m = i = 0;
449: u &= mask, v &= mask;
450: do {
451: if ( u == 0 ) break;
452: if ( (u << lsh_i) != 0 ) {
453: m += (1 << i);
454: u -= (v << i);
455: u &= mask;
456: }
457: lsh_i--;
458: } while ( ++i != s );
459: return m;
460: }
461: #else
462: static int u_div_v_mod_2tos( u, v, s )
463: int u, v, s;
464: {
465: int f1 = 1 << s, f2 = u, q, r, c1 = 0, c2 = v, m;
466:
467: m = f1 - 1;
468: do { q = f1 / f2;
469: r = f1 - q*f2; f1 = f2; f2 = r;
470: r = c1 - q*c2; c1 = c2; c2 = r;
471: } while ( f2 != 1 );
472: return( c2 & m );
473: }
474: #endif /* DIVISION */
475:
476: #define Comp_U_div_V_mod_BASE27(U,V,R) R = u_div_v_mod_2tos(U,V,BSH27)
477: #else
478: #ifndef DIVISION
479: #define Comp_U_div_V_mod_BASE27(U,V,R) {\
480: int u = (U), v = (V), i, lsh; \
481: /* U and V are assumed to be odd */ \
482: i = R = 1, lsh = (sizeof(int) << 3) - 2; u = (u - v) & BMASK27; \
483: do { if ( u == 0 ) break; \
484: if ( (u << lsh) != 0 ) R += (1 << i), u = (u - (v << i)) & BMASK27; \
485: i++, lsh--; \
486: } while ( i < BSH27 ); \
487: }
488: #else
489: #define Comp_U_div_V_mod_BASE27(U,V,R) {\
490: int f1 = BASE27, f2 = (V), q, r, c1 = 0, c2 = (U); \
491: do { q = f1 / f2; \
492: r = f1 - q*f2; f1 = f2; f2 = r; \
493: r = c1 - q*c2; c1 = c2; c2 = r; \
494: } while ( f2 != 1 ); \
495: R = c2 & BMASK27; \
496: }
497: #endif /* DIVISION */
498: #endif
499:
500:
501: static int bmod_n( nu, nv, na )
502: N nu, nv, na;
503: /*
504: * Computes (u[] \bmod v[]) >> (as much as possible) in r[].
505: */
506: {
507: int *u = BD(nu), lu = PL(nu), *v = BD(nv), lv = PL(nv),
508: *r = BD(na);
509: int *p, a, t, l, z, v0, vh, bv, v0r;
510:
511: v0 = v[0];
512: if ( lv == 1 ) {
513: if ( lu == 1 ) a = u[0] % v0;
514: else {
515: p = &u[--lu];
516: a = (*p) % v0, t = BASE27 % v0;
517: for ( ; --lu >= 0; a = l ) {
518: --p;
519: DMAR(a,t,*p,v0,l)
520: /* l <= (a*t + p[0])%v0 */
521: }
522: }
523: if ( a == 0 ) return 0;
524: while ( (a&1) == 0 ) a >>= 1;
525: *r = a;
526: return( PL(na) = 1 );
527: }
528: Comp_U_div_V_mod_BASE27( 1, v0, v0r );
529: vh = v[lv -1];
530: BitWidth( vh, bv );
531: bv--;
532: t = 1 << bv;
533: l = lv + 1;
534: for ( z = -1; lu > l || lu == l && u[lu-1] >= t; z = -z ) {
535: a = (v0r*u[0])&BMASK27;
536: /**/
537: lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r );
538: /**/
539: if ( lu == 0 ) return 0;
540: p = r;
541: r = u;
542: u = p;
543: }
544: if ( lu < lv ) goto ret;
545: t = u[lu-1];
546: if ( lu > lv ) l = BSH27;
547: else if ( t < vh ) goto ret;
548: else l = 0;
549: BitWidth( t, a );
550: l += (a - bv);
551: a = (v0r*u[0])&(BMASK27 >> (BSH27 - l));
552: /**/
553: lu = abs_U_aV_maxrshift( u, lu, a, v, lv, r );
554: /**/
555: if ( lu == 0 ) return 0;
556: z = -z;
557: ret: if ( z > 0 ) return( PL(na) = lu );
558: PL(nu) = lu;
559: return( -lu );
560: }
561:
562:
563: static N igcd_bmod( n1, n2, nt )
564: N n1, n2, nt;
565: /* both n1 and n2 are assumed to be odd */
566: {
567: int l1, l2;
568: int l;
569:
570: if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
571: else if ( l < 0 ) { SWAP( n1, n2, N ); }
572: IniDiv( n1, n2 );
573: if ( UNIN(n2) ) return 0;
574: loop: if ( (l1 = PL(n1)) <= 2 && (l2 = PL(n2)) <= 2 ) {
575: l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) );
576: if ( l == 0 ) return 0;
577: PL(nt) = l;
578: return nt;
579: }
580: /**/
581: l = bmod_n( n1, n2, nt );
582: /**/
583: if ( l == 0 ) return n2;
584: else if ( l > 0 ) {
585: N tmp = n1;
586:
587: n1 = n2;
588: n2 = nt;
589: nt = tmp;
590: } else SWAP( n1, n2, N );
591: goto loop;
592: }
593:
594: void gcdbmodn( n1, n2, nr )
595: N n1, n2, *nr;
596: {
597: int s1, s2, gw, gb, t1, t2;
598: int w1, w2;
599: N tn1, tn2, tnt, p;
600:
601: if ( !n1 ) {
602: *nr = n2;
603: return;
604: } else if ( !n2 ) {
605: *nr = n1;
606: return;
607: }
608: s1 = trailingzerosn( n1, &w1 );
609: s2 = trailingzerosn( n2, &w2 );
610: if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2;
611: else if ( w1 < w2 ) gw = w1, gb = s1;
612: else gw = w2, gb = s2;
613: /*
614: * true GCD must be multiplied by 2^{gw*BSH27+gs}.
615: */
616: t1 = PL(n1) - w1;
617: t2 = PL(n2) - w2;
618: if ( t1 < t2 ) t1 = t2;
619: tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1);
620: rshiftn( n1, w1, s1, tn1 );
621: rshiftn( n2, w2, s2, tn2 );
622: p = igcd_bmod( tn1, tn2, tnt );
623: RetTrueGCD( p, gw, gb, nr, L0 )
624: }
625:
626: /*
627: * The accelerated integer GCD algorithm by K.Weber
628: * [ACM TOMS, Vol.21, No. 1 (1995), pp. 111-122]:
629: */
630:
631: static int ReducedRatMod( x, y, pn, pd )
632: N x, y;
633: int *pn, *pd;
634: /*
635: * Let m = 2^{2*BSH27} = 2*BASE27. We assume x, y > 0 and \gcd(x,m)
636: * = \gcd(y,m) = 1. This routine computes n and d (resp. returned
637: * in *pn and *pd) such that 0 < n, |d| < \sqrt{m} and
638: * n*y \equiv x*d \bmod m.
639: */
640: {
641: int n1h, n1l, d1h, d1l, n2h, n2l, d2h, d2l;
642: int s1, s2, l1, l2;
643:
644:
645: {
646: int xh, xl, yh, yl, tl, i, lsh_i;
647: int th;
648:
649: xl = BD(x)[0];
650: xh = PL(x) > 1 ? BD(x)[1] : 0;
651: yl = BD(y)[0];
652: yh = PL(y) > 1 ? BD(y)[1] : 0;
653: Comp_U_div_V_mod_BASE27( xl, yl, n2l );
654: DM27(n2l,yl,th,tl) /* n2l*yl = tl+th*BASE27, where tl==xl. */;
655: if ( xh > th ) xh -= th;
656: else xh += (BASE27 - th);
657: DM27(n2l,yh,th,tl) /* n2l*yh = tl+th*BASE27. */;
658: if ( xh > tl ) xh -= tl;
659: else xh += (BASE27 - tl);
660: n2h = i = 0, lsh_i = 31;
661: do {
662: if ( xh == 0 ) break;
663: if ( (xh << lsh_i) != 0 ) {
664: n2h += (1 << i);
665: tl = (yl << i)&BMASK27;
666: if ( xh > tl ) xh -= tl;
667: else xh += (BASE27 - tl);
668: }
669: lsh_i--;
670: } while ( ++i != BSH27 );
671: }
672: /*
673: * n2l + n2h*BASE27 = x/y mod 2^{2*BSH27}.
674: */
675: n1h = BASE27, n1l = 0, l1 = BSH27,
676: d1h = d1l = 0, s1 = 0,
677: d2h = 0, d2l = 1, s2 = 1;
678: /**/
679: while ( n2h != 0 ) {
680: int i, ir, th, tl;
681:
682: BitWidth( n2h, l2 );
683: ir = BSH27 - (i = l1 - l2);
684: do {
685: if ( i == 0 ) th = n2h, tl = n2l;
686: else
687: th = (n2h << i) | (n2l >> ir),
688: tl = (n2l << i) & BMASK27;
689: if ( th > n1h || (th == n1h && tl > n1l) ) goto next_i;
690: if ( tl <= n1l ) n1l -= tl;
691: else n1l += (BASE27 - tl), n1h--;
692: n1h -= th;
693: /* (s1:d1h,d1l) -= ((s2:d2h,d2l) << i); */
694: if ( s2 != 0 ) {
695: if ( i == 0 ) th = d2h, tl = d2l;
696: else
697: th = (d2h << i)&BMASK27 | (d2l >> ir),
698: tl = (d2l << i)&BMASK27;
699: if ( s1 == 0 )
700: s1 = -s2, d1h = th, d1l = tl;
701: else if ( s1 != s2 ) {
702: tl += d1l;
703: d1l = tl&BMASK27;
704: d1h = (th + (tl >> BSH27))&BMASK27;
705: if ( d1h == 0 && d1l == 0 ) s1 = 0;
706: } else if ( d1h > th ) {
707: if ( d1l >= tl ) d1l -= tl;
708: else d1l += (BASE27 - tl), d1h--;
709: d1h -= th;
710: } else if ( d1h == th ) {
711: d1h = 0;
712: if ( d1l == tl ) s1 = d2h = 0;
713: else if ( d1l > tl ) d1l -= tl;
714: else d1l = tl - d1l, s1 = -s1;
715: } else {
716: if ( tl >= d1l ) d1l = tl - d1l;
717: else d1l = tl + (BASE27 - d1l), th--;
718: d1h = th - d1h;
719: s1 = -s1;
720: }
721: }
722: next_i: i--, ir++;
723: } while ( n1h > n2h || (n1h == n2h && n1l >= n2l) );
724: /* swap 1 and 2 */
725: th = n1h, tl = n1l;
726: n1h = n2h, n1l = n2l;
727: n2h = th, n2l = tl;
728: l1 = l2;
729: th = d1h, tl = d1l, i = s1;
730: d1h = d2h, d1l = d2l, s1 = s2;
731: d2h = th, d2l = tl, s2 = i;
732: }
733: /**/
734: *pn = n2l, *pd = d2l;
735: return s2;
736: }
737:
738: static int igcd_spurious_factor;
739:
740: #define SaveN(s,d) {\
741: int i, l; \
742: for ( l = PL(d) = PL(s), i = 0; i < l; i++ ) BD(d)[i] = BD(s)[i]; \
743: }
744:
745: static N igcd_acc( n1, n2, nt )
746: N n1, n2, nt;
747: /* both n1 and n2 are assumed to be odd */
748: {
749: int l1, l2, *b1, *b2, bw1, bw2;
750: int l;
751: int n, d;
752: N p, s1, s2;
753:
754: if ( (l = cmpn( n1, n2 )) == 0 ) return n1;
755: else if ( l < 0 ) { SWAP( n1, n2, N ); }
756: if ( ShouldCompRemInit(n1,n2) ) {
757: int w, b;
758:
759: divn_27( n1, n2, &s1, &s2 );
760: if ( !s2 ) return n2;
761: b = trailingzerosn( s2, &w );
762: p = n1; n1 = n2; n2 = p;
763: rshiftn( s2, w, b, n2 );
764: if ( UNIN(n2) ) return 0;
765: l1 = PL(n1);
766: if ( !s1 || PL(s1) < l1 ) s1 = NALLOC(l1);
767: } else if ( UNIN(n2) ) return 0;
768: else {
769: s1 = NALLOC(PL(n1));
770: s2 = NALLOC(PL(n2));
771: }
772: SaveN( n1, s1 );
773: SaveN( n2, s2 );
774: igcd_spurious_factor = 0;
775: loop: l1 = PL(n1), l2 = PL(n2);
776: if ( l1 <= 2 && l2 <= 2 ) {
777: l = igcd_binary_2w( BD(n1), l1, BD(n2), l2, BD(nt) );
778: if ( l == 0 ) return 0;
779: PL(nt) = l;
780: SWAP( n2, nt, N );
781: goto ret;
782: }
783: /**/
784: b1 = BD(n1), b2 = BD(n2);
785: BitWidth( b1[l1 -1], bw1 );
786: BitWidth( b2[l2 -1], bw2 );
787: if ( (l1*BSH27 + bw1) - (l2*BSH27 + bw2) <= igcdacc_thre ) {
788: l = ReducedRatMod( n1, n2, &n, &d );
789: l = l < 0 ? aUplusbV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) ) :
790: abs_axU_bxV_maxrshift( n, b2, l2, d, b1, l1, BD(nt) );
791: igcd_spurious_factor++;
792: if ( l == 0 ) goto ret;
793: PL(nt) = l;
794: } else {
795: l = bmod_n( n1, n2, nt );
796: if ( l == 0 ) goto ret;
797: else if ( l < 0 ) {
798: SWAP( n1, n2, N );
799: goto loop;
800: }
801: }
802: p = n1;
803: n1 = n2;
804: n2 = nt;
805: nt = p;
806: goto loop;
807: /**/
808: ret: if ( igcd_spurious_factor != 0 && !UNIN(n2) ) {
809: if ( (p = igcd_bmod( n2, s1, n1 )) == 0 ) return 0;
810: if ( (p = igcd_bmod( p, s2, nt )) == 0 ) return 0;
811: return p;
812: } else return n2;
813: }
814:
815: void gcdaccn( n1, n2, nr )
816: N n1, n2, *nr;
817: {
818: int s1, s2, gw, gb, t1, t2;
819: int w1, w2;
820: N tn1, tn2, tnt, p;
821:
822: if ( !n1 ) {
823: *nr = n2;
824: return;
825: } else if ( !n2 ) {
826: *nr = n1;
827: return;
828: }
829: s1 = trailingzerosn( n1, &w1 );
830: s2 = trailingzerosn( n2, &w2 );
831: if ( w1 == w2 ) gw = w1, gb = s1 <= s2 ? s1 : s2;
832: else if ( w1 < w2 ) gw = w1, gb = s1;
833: else gw = w2, gb = s2;
834: /*
835: * true GCD must be multiplied by 2^{gw*BSH27+gs}.
836: */
837: t1 = PL(n1) - w1;
838: t2 = PL(n2) - w2;
839: if ( t1 < t2 ) t1 = t2;
840: tn1 = W_NALLOC(t1); tn2 = W_NALLOC(t1); tnt = W_NALLOC(t1);
841: rshiftn( n1, w1, s1, tn1 );
842: rshiftn( n2, w2, s2, tn2 );
843: /**/
844: p = igcd_acc( tn1, tn2, tnt );
845: /**/
846: if ( p == 0 ) goto L0;
847: RetTrueGCD( p, gw, gb, nr, L0 )
848: }
849:
850:
851: /********************************/
852:
853: void gcdBinary_27n( n1, n2, nr )
854: N n1, n2, *nr;
855: {
856: int b1, b2, w1, w2, gw, gb;
857: int l1, l2;
858: N tn1, tn2, tnt, a;
859:
860: if ( !n1 ) {
861: *nr = n2; return;
862: } else if ( !n2 ) {
863: *nr = n1; return;
864: }
865: b1 = trailingzerosn( n1, &w1 );
866: b2 = trailingzerosn( n2, &w2 );
867: if ( w1 == w2 ) gw = w1, gb = b1 <= b2 ? b1 : b2;
868: else if ( w1 < w2 ) gw = w1, gb = b1;
869: else gw = w2, gb = b2;
870: /*
871: * true GCD must be multiplied by 2^{gw*BSH27+gb}.
872: */
873: l1 = PL(n1) - w1;
874: l2 = PL(n2) - w2;
875: if ( l1 < l2 ) l1 = l2;
876: tn1 = W_NALLOC( l1 ); tn2 = W_NALLOC( l1 ); tnt = W_NALLOC( l1 );
877: rshiftn( n1, w1, b1, tn1 );
878: rshiftn( n2, w2, b2, tn2 );
879: /**/
880: if ( igcd_algorithm == 1 ) {
881: a = igcd_binary( tn1, tn2, tnt );
882: } else if ( igcd_algorithm == 2 ) {
883: a = igcd_bmod( tn1, tn2, tnt );
884: } else {
885: a = igcd_acc( tn1, tn2, tnt, -igcd_algorithm );
886: if ( igcd_spurious_factor != 0 ) {
887: }
888: }
889: RetTrueGCD( a, gw, gb, nr, L0 )
890: }
891:
892: /**************************/
893: N maxrshn( n, p )
894: N n;
895: int *p;
896: {
897: int nw, nb, c, l;
898: N new;
899:
900: nb = trailingzerosn( n, &nw );
901: l = PL(n);
902: c = BD(n)[l -1];
903: l -= nw;
904: if ( (c >> nb) == 0 ) l--;
905: new = NALLOC(l);
906: rshiftn( n, nw, nb, new );
907: *p = nb + nw*BSH27;
908: return new;
909: }
910: #endif /* HMEXT */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>