Annotation of OpenXM_contrib/pari/src/basemath/ifactor1.c, Revision 1.1
1.1 ! maekawa 1: /********************************************************************/
! 2: /** **/
! 3: /** INTEGER FACTORIZATION **/
! 4: /** **/
! 5: /********************************************************************/
! 6: /* $Id: ifactor1.c,v 1.1.1.1 1999/09/16 13:47:33 karim Exp $ */
! 7: #include "pari.h"
! 8:
! 9: /*********************************************************************/
! 10: /** **/
! 11: /** PSEUDO PRIMALITY **/
! 12: /** **/
! 13: /*********************************************************************/
! 14: static GEN sqrt1, sqrt2, t1, t;
! 15: static long r1;
! 16:
! 17: /* The following two internal routines don't restore avma -- the caller
! 18: must do so at the end. */
! 19: static GEN
! 20: init_miller(GEN n)
! 21: {
! 22: if (signe(n) < 0) n = absi(n);
! 23: t=addsi(-1,n); r1=vali(t); t1 = shifti(t,-r1);
! 24: sqrt1=cgeti(lg(t)); sqrt1[1]=evalsigne(0)|evallgefint(2);
! 25: sqrt2=cgeti(lg(t)); sqrt2[1]=evalsigne(0)|evallgefint(2);
! 26: return n;
! 27: }
! 28:
! 29: /* is n strong pseudo-prime for base a ? `End matching' (check for square
! 30: * roots of -1) added by GN */
! 31: /* TODO: If ends do mismatch, then we have factored n, and this information
! 32: should somehow be made available to the factoring machinery. --GN */
! 33: static int
! 34: bad_for_base(GEN n, GEN a)
! 35: {
! 36: long r, av=avma, lim=stack_lim(av,1);
! 37: GEN c2, c = powmodulo(a,t1,n);
! 38:
! 39: if (!is_pm1(c) && !egalii(t,c)) /* go fishing for -1, not for 1 */
! 40: {
! 41: for (r=r1-1; r; r--) /* (this saves one squaring/reduction) */
! 42: {
! 43: c2=c; c=resii(sqri(c),n);
! 44: if (egalii(t,c)) break;
! 45: if (low_stack(lim, stack_lim(av,1)))
! 46: {
! 47: GEN *gsav[2]; gsav[0]=&c; gsav[1]=&c2;
! 48: if(DEBUGMEM>1) err(warnmem,"miller(rabin)");
! 49: gerepilemany(av, gsav, 2);
! 50: }
! 51: }
! 52: if (!r) return 1;
! 53: /* sqrt(-1) seen, compare or remember */
! 54: if (signe(sqrt1)) /* we saw one earlier: compare */
! 55: {
! 56: /* check if too many sqrt(-1)s mod n */
! 57: if (!egalii(c2,sqrt1) && !egalii(c2,sqrt2)) return 1;
! 58: }
! 59: else { affii(c2,sqrt1); affii(subii(n,c2),sqrt2); } /* remember */
! 60: }
! 61: return 0;
! 62: }
! 63:
! 64: /* Miller-Rabin test for k random bases */
! 65: long
! 66: millerrabin(GEN n, long k)
! 67: {
! 68: long r,i,av2, av = avma;
! 69:
! 70: if (!signe(n)) return 0;
! 71: /* If |n| <= 3, check if n = +- 1 */
! 72: if (lgefint(n)==3 && (ulong)(n[2])<=3) return (n[2] != 1);
! 73:
! 74: if (!mod2(n)) return 0;
! 75: n = init_miller(n); av2=avma;
! 76: for (i=1; i<=k; i++)
! 77: {
! 78: do r = smodsi(mymyrand(),n); while (!r);
! 79: if (DEBUGLEVEL > 4)
! 80: fprintferr("Miller-Rabin: testing base %ld\n",
! 81: r);
! 82: if (bad_for_base(n, stoi(r))) { avma=av; return 0; }
! 83: avma=av2;
! 84: }
! 85: avma=av; return 1;
! 86: }
! 87:
! 88: /* As above for k bases taken in pr (i.e not random).
! 89: * We must have |n|>2 and 1<=k<=11 (not checked) or k in {16,17} to select
! 90: * some special sets of bases.
! 91: *
! 92: * By computations of Gerhard Jaeschke, `On strong pseudoprimes to several
! 93: * bases', Math.Comp. 61 (1993), 915--926 (see also Chris Caldwell's Prime
! 94: * Number Pages at http://www.utm.edu/research/primes/prove2.html), we have:
! 95: *
! 96: * k == 4 (bases 2,3,5,7) correctly detects all composites
! 97: * n < 118 670 087 467 == 172243 * 688969 with the single exception of
! 98: * n == 3 215 031 751 == 151 * 751 * 28351,
! 99: *
! 100: * k == 5 (bases 2,3,5,7,11) correctly detects all composites
! 101: * n < 2 152 302 898 747 == 6763 * 10627 * 29947,
! 102: *
! 103: * k == 6 (bases 2,3,...,13) correctly detects all composites
! 104: * n < 3 474 749 660 383 == 1303 * 16927 * 157543,
! 105: *
! 106: * k == 7 (bases 2,3,...,17) correctly detects all composites
! 107: * n < 341 550 071 728 321 == 10670053 * 32010157,
! 108: * and even this limiting value is caught by an end mismatch between bases
! 109: * 2 and 5 (or 5 and 17).
! 110: *
! 111: * Moreover, the four bases chosen at
! 112: *
! 113: * k == 16 (2,13,23,1662803) will correctly detect all composites up
! 114: * to at least 10^12, and the combination at
! 115: *
! 116: * k == 17 (31,73) detects most odd composites without prime factors > 100
! 117: * in the range n < 2^36 (with less than 250 exceptions, indeed with fewer
! 118: * than 1400 exceptions up to 2^42). --GN
! 119: * (DATA TO BE COMPLETED)
! 120: */
! 121: int /* no longer static -- needed in mpqs.c */
! 122: miller(GEN n, long k)
! 123: {
! 124: long r,i,av2, av = avma;
! 125: static long pr[] =
! 126: { 0, 2,3,5,7,11,13,17,19,23,29, 31,73, 2,13,23,1662803UL, };
! 127: long *p;
! 128:
! 129: if (!mod2(n)) return 0;
! 130: if (k==16)
! 131: { /* use smaller (faster) bases if possible */
! 132: if (lgefint(n)==3 && (ulong)(n[2]) < 3215031751UL) p = pr; /* 2,3,5,7 */
! 133: else p = pr+13; /* 2,13,23,1662803 */
! 134: k=4;
! 135: }
! 136: else if (k==17)
! 137: {
! 138: if (lgefint(n)==3 && (ulong)(n[2]) < 1373653) p = pr; /* 2,3 */
! 139: else p = pr+11; /* 31,73 */
! 140: k=2;
! 141: }
! 142: else p = pr; /* 2,3,5,... */
! 143: n = init_miller(n); av2=avma;
! 144: for (i=1; i<=k; i++)
! 145: {
! 146: r = smodsi(p[i],n); if (!r) break;
! 147: if (bad_for_base(n, stoi(r))) { avma = av; return 0; }
! 148: avma=av2;
! 149: }
! 150: avma=av; return 1;
! 151: }
! 152:
! 153: /***********************************************************************/
! 154: /** **/
! 155: /** PRIMES IN SUCCESSION **/
! 156: /** (abstracted by GN 1998Aug21 mainly for use in ellfacteur() below) **/
! 157: /** **/
! 158: /***********************************************************************/
! 159:
! 160: /* map from prime residue classes mod 210 to their numbers in {0...47}.
! 161: Subscripts into this array take the form ((k-1)%210)/2, ranging from
! 162: 0 to 104. Unused entries are 128 */
! 163: #define NPRC 128
! 164:
! 165: static
! 166: unsigned char prc210_no[] =
! 167: {
! 168: 0, NPRC, NPRC, NPRC, NPRC, 1, 2, NPRC, 3, 4, NPRC, /* 21 */
! 169: 5, NPRC, NPRC, 6, 7, NPRC, NPRC, 8, NPRC, 9, /* 41 */
! 170: 10, NPRC, 11, NPRC, NPRC, 12, NPRC, NPRC, 13, 14, NPRC, /* 63 */
! 171: NPRC, 15, NPRC, 16, 17, NPRC, NPRC, 18, NPRC, 19, /* 83 */
! 172: NPRC, NPRC, 20, NPRC, NPRC, NPRC, 21, NPRC, 22, 23, NPRC, /* 105 */
! 173: 24, 25, NPRC, 26, NPRC, NPRC, NPRC, 27, NPRC, NPRC, /* 125 */
! 174: 28, NPRC, 29, NPRC, NPRC, 30, 31, NPRC, 32, NPRC, NPRC, /* 147 */
! 175: 33, 34, NPRC, NPRC, 35, NPRC, NPRC, 36, NPRC, 37, /* 167 */
! 176: 38, NPRC, 39, NPRC, NPRC, 40, 41, NPRC, NPRC, 42, NPRC, /* 189 */
! 177: 43, 44, NPRC, 45, 46, NPRC, NPRC, NPRC, NPRC, 47, /* 209 */
! 178: };
! 179:
! 180: /* map from prime residue classes mod 210 (by number) to their smallest
! 181: positive representatives */
! 182: static
! 183: unsigned char prc210_rp[] =
! 184: {
! 185: 1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79,
! 186: 83, 89, 97, 101, 103, 107, 109, 113, 121, 127, 131, 137, 139, 143, 149,
! 187: 151, 157, 163, 167, 169, 173, 179, 181, 187, 191, 193, 197, 199, 209,
! 188: };
! 189:
! 190: /* first differences of the preceding */
! 191: static
! 192: unsigned char prc210_d1[] =
! 193: {
! 194: 10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6,
! 195: 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6,
! 196: 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2, 10, 2,
! 197: };
! 198:
! 199: GEN
! 200: nextprime(GEN n)
! 201: {
! 202: long rc,rc0,rcd,rcn,av1,av2, av = avma;
! 203:
! 204: if (typ(n) != t_INT) n=gceil(n); /* accept arguments in R --GN */
! 205: if (typ(n) != t_INT) err(arither1);
! 206: if (signe(n) <= 0) { avma=av; return gdeux; }
! 207: if (lgefint(n) <= 3)
! 208: { /* check if n <= 7 */
! 209: ulong k = n[2];
! 210: if (k <= 2) { avma=av; return gdeux; }
! 211: if (k == 3) { avma = av; return stoi(3); }
! 212: if (k <= 5) { avma = av; return stoi(5); }
! 213: if (k <= 7) { avma = av; return stoi(7); }
! 214: }
! 215: /* here n > 7 */
! 216: if (!(mod2(n))) n = addsi(1,n);
! 217: rc = rc0 = smodis(n, 210);
! 218: rcn = (long)(prc210_no[rc0>>1]);
! 219: /* find next prime residue class mod 210 */
! 220: while (rcn == NPRC)
! 221: {
! 222: rc += 2; /* cannot wrap since 209 is coprime */
! 223: rcn = (long)(prc210_no[rc>>1]);
! 224: }
! 225: if (rc > rc0) n = addsi(rc - rc0, n);
! 226: /* now find an actual prime */
! 227: av2 = av1 = avma;
! 228: for(;;)
! 229: {
! 230: if (miller(n,10)) break;
! 231: av1 = avma;
! 232: rcd = prc210_d1[rcn];
! 233: if (++rcn > 47) rcn = 0;
! 234: n = addsi(rcd,n);
! 235: }
! 236: if (av1!=av2) return gerepile(av,av1,n);
! 237: return (av1==av)? icopy(n): n;
! 238: }
! 239:
! 240: GEN
! 241: precprime(GEN n)
! 242: {
! 243: long rc,rc0,rcd,rcn,av1,av2, av = avma;
! 244:
! 245: if (typ(n) != t_INT) n=gfloor(n); /* accept arguments in R --GN */
! 246: if (typ(n) != t_INT) err(arither1);
! 247: if (signe(n)<=0) { avma=av; return gzero; }
! 248: if (lgefint(n) <= 3)
! 249: { /* check if n <= 10 */
! 250: ulong k = n[2];
! 251: if (k <= 1) { avma=av; return gzero; }
! 252: if (k == 2) { avma=av; return gdeux; }
! 253: if (k <= 4) { avma=av; return stoi(3); }
! 254: if (k <= 6) { avma=av; return stoi(5); }
! 255: if (k <= 10) { avma=av; return stoi(7); }
! 256: }
! 257: /* here n >= 11 */
! 258: if (!(mod2(n))) n = addsi(-1,n);
! 259: rc = rc0 = smodis(n, 210);
! 260: rcn = (long)(prc210_no[rc0>>1]);
! 261: /* find last prime residue class mod 210 */
! 262: while (rcn == NPRC)
! 263: {
! 264: rc -= 2; /* cannot wrap since 1 is coprime */
! 265: rcn = (long)(prc210_no[rc>>1]);
! 266: }
! 267: if (rc < rc0) n = addsi(rc - rc0, n);
! 268: /* now find an actual prime */
! 269: av2 = av1 = avma;
! 270: for(;;)
! 271: {
! 272: if (miller(n,10)) break;
! 273: av1 = avma;
! 274: if (rcn == 0)
! 275: { rcd = 2; rcn = 47; }
! 276: else
! 277: rcd = prc210_d1[--rcn];
! 278: n = addsi(-rcd,n);
! 279: }
! 280: if (av1!=av2) return gerepile(av,av1,n);
! 281: return (av1==av)? icopy(n): n;
! 282: }
! 283:
! 284: /* find next single-word prime strictly larger than p. If **d is non-NULL,
! 285: this will be p + *(*d)++, using the diffptr table. Otherwise imitate
! 286: nextprime(). Apart from *d, caller must supply a long variable to which
! 287: rcn points, initialized either to NPRC or to the correct residue class
! 288: number for the current p; we'll use this to track the current prime
! 289: residue class mod 210 once we're out of range of the diffptr table, and
! 290: we'll update it before that if it isn't NPRC. *q is incremented when-
! 291: ever q!=NULL and we wrap from 209 mod 210 to 1 mod 210; this make sense
! 292: only when *rcn already held the correct value. Caller must also supply
! 293: the second argument for miller(). --GN1998Aug22 */
! 294: ulong
! 295: snextpr(ulong p, byteptr *d, long *rcn, long *q, long k)
! 296: {
! 297: static ulong pp[] =
! 298: { evaltyp(t_INT)|m_evallg(3), evalsigne(1)|evallgefint(3), 0 };
! 299: static ulong *pp2 = pp + 2;
! 300: static GEN gp = (GEN)pp;
! 301: long d1 = **d, rcn0;
! 302:
! 303: if (d1)
! 304: {
! 305: if (*rcn != NPRC)
! 306: {
! 307: rcn0 = *rcn;
! 308: while (d1 > 0)
! 309: {
! 310: d1 -= prc210_d1[*rcn];
! 311: if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
! 312: }
! 313: if (d1 < 0)
! 314: {
! 315: fprintferr("snextpr: prime %lu wasn\'t %lu mod 210\n",
! 316: p, (ulong)prc210_rp[rcn0]);
! 317: err(bugparier, "[caller of] snextpr");
! 318: }
! 319: }
! 320: return p + *(*d)++;
! 321: }
! 322: /* we are beyond the diffptr table */
! 323: if (*rcn == NPRC) /* we need to initialize this now */
! 324: {
! 325: *rcn = prc210_no[(p % 210) >> 1];
! 326: if (*rcn == NPRC)
! 327: {
! 328: fprintferr("snextpr: %lu should have been prime but isn\'t\n", p);
! 329: err(bugparier, "[caller of] snextpr");
! 330: }
! 331: }
! 332: /* look for the next one */
! 333: *pp2 = p;
! 334: *pp2 += prc210_d1[*rcn];
! 335: if (++*rcn > 47) *rcn = 0;
! 336: while (!miller(gp, k))
! 337: {
! 338: *pp2 += prc210_d1[*rcn];
! 339: if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
! 340: if (*pp2 <= 11) /* wraparound mod 2^BITS_IN_LONG */
! 341: {
! 342: fprintferr("snextpr: integer wraparound after prime %lu\n", p);
! 343: err(bugparier, "[caller of] snextpr");
! 344: }
! 345: }
! 346: return *pp2;
! 347: }
! 348:
! 349:
! 350: /***********************************************************************/
! 351: /** **/
! 352: /** FACTORIZATION (ECM) **/
! 353: /** Integer factorization using the elliptic curves method (ECM). **/
! 354: /** ellfacteur() returns a non trivial factor of N, assuming N>0, **/
! 355: /** is composite, and has no prime divisor below 2^14 or so. **/
! 356: /** Extensively modified by GN Jul-Aug 1998, with much helpful **/
! 357: /** advice by Paul Zimmermann. Thanks also to Guillaume Hanrot **/
! 358: /** and Igor Schein for providing many CPU cycles whilst testing. **/
! 359: /** **/
! 360: /***********************************************************************/
! 361:
! 362: static GEN N, gl, *XAUX;
! 363: #define nbcmax 64 /* max number of simultaneous curves */
! 364: #define bstpmax 1024 /* max number of baby step table entries */
! 365:
! 366: /* addition/doubling/multiplication of a point on an `elliptic curve'
! 367: mod N may result in one of three things: a new bona fide point,
! 368: a point at infinity (betraying itself by a denominator divisible
! 369: by N), or a point which is at infinity mod some nontrivial factor
! 370: of N but finite mod some other factor (betraying itself by a denom-
! 371: inator which has nontrivial gcd with N, and this is of course what
! 372: we want). */
! 373: /* (In the second case, addition/doubling will simply abort, copying one
! 374: of the summands to the destination array of points unless they coincide.
! 375: Multiplication will stop at some unpredictable intermediate stage: The
! 376: destination will contain _some_ multiple of the input point, but not
! 377: necessarily the desired one, which doesn't matter. As long as we're
! 378: multiplying (B1 phase) we simply carry on with the next multiplier.
! 379: During the B2 phase, the only additions are the giant steps, and the
! 380: worst that can happen here is that we lose one residue class mod 210
! 381: of prime multipliers on 4 of the curves, so again, we ignore the problem
! 382: and just carry on.) */
! 383: /* The idea is: Select a handful of curves mod N and one point P on each of
! 384: them. Try to compute, for each such point, the multiple [M]P = Q where
! 385: M is the product of all powers <= B2 of primes <= nextprime(B1), for some
! 386: suitably chosen B1 and B2. Then check whether multiplying Q by one of the
! 387: primes < nextprime(B2) would betray a factor. This second stage proceeds
! 388: by looking separately at the primes in each residue class mod 210, four
! 389: curves at a time, and stepping additively to ever larger multipliers,
! 390: by comparing X coordinates of points which we would need to add in order
! 391: to reach another prime multiplier in the same residue class. `Comparing'
! 392: means that we accumulate a product of differences of X coordinates, and
! 393: from time to time take a gcd of this product with N. */
! 394: /* Montgomery's trick of hiding the cost of computing inverses mod N at a
! 395: price of three extra multiplications mod N, by working on up to 64 or
! 396: even 128 points in parallel, is used heavily. --GN */
! 397:
! 398: /* *** auxiliary functions for ellfacteur: *** */
! 399:
! 400: /* Parallel addition on nbc curves, assigning the result to locations at and
! 401: following *X3, *Y3. Safe to be called with X3,Y3 equal to X2,Y2 (_not_
! 402: to X1,Y1). It is also safe to overwrite Y2 with X3. (If Y coords of
! 403: result not desired, set Y3=NULL.) If nbc1 < nbc, the first summand is
! 404: assumed to hold only nbc1 distinct points, which are repeated as often
! 405: as we need them (useful for adding one point on each of a few curves
! 406: to several other points on the same curves).
! 407: Return 0 when successful, 1 when we hit a denominator divisible by N,
! 408: and 2 when gcd(denominator, N) is a nontrivial factor of N, which will
! 409: be preserved in gl.
! 410: We use more stack space than the old code did, and thus run a bit of a
! 411: risk of overflowing it, but it's still bounded by a constant multiple
! 412: of lgefint(N)*nbc, as it was in the old version --GN1998Jul02,Aug12 */
! 413: /* (Lessee: Second phase creates 12 items on the stack, per iteration,
! 414: of which four are twice as long and one is thrice as long as N --
! 415: makes 18 units per iteration. First phase creates 4 units. Total
! 416: can be as large as about 4*nbcmax+18*8 units. And elladd2() is just
! 417: as bad, and elldouble() comes to about 3*nbcmax+29*8 units. A few
! 418: strategic garbage collections every 8 iterations should help when nbc
! 419: is large...) --GN1998Aug23 */
! 420:
! 421: static int
! 422: elladd0(long nbc, long nbc1,
! 423: GEN *X1, GEN *Y1, GEN *X2, GEN *Y2, GEN *X3, GEN *Y3)
! 424: {
! 425: GEN lambda;
! 426: GEN W[2*nbcmax], *A=W+nbc; /* W[0],A[0] never used */
! 427: long i, av=avma, tetpil;
! 428: ulong mask = ~0UL;
! 429:
! 430: /* actually, this is only ever called with nbc1==nbc or nbc1==4, so: */
! 431: if (nbc1 == 4) mask = 3;
! 432: else if (nbc1 < nbc) err(bugparier, "[caller of] elladd0");
! 433:
! 434: /* W[0] = gun; */
! 435: W[1] = /* A[0] =*/ subii(X1[0], X2[0]);
! 436: for (i=1; i<nbc; i++)
! 437: {
! 438: A[i] = subii(X1[i&mask], X2[i]); /* don't waste time reducing mod N here */
! 439: W[i+1] = modii(mulii(A[i], W[i]), N);
! 440: }
! 441: tetpil = avma;
! 442:
! 443: /* if gl != N we have a factor */
! 444: if (!invmod(W[nbc], N, &gl))
! 445: {
! 446: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
! 447: if (X2 != X3)
! 448: {
! 449: long k;
! 450: /* cannot add on one of the curves mod N: make sure X3 contains
! 451: something useful before letting the caller proceed */
! 452: for (k = 2*nbc; k--; ) affii(X2[k],X3[k]);
! 453: }
! 454: avma = av; return 1;
! 455: }
! 456:
! 457: while (i--) /* nbc times, actually */
! 458: {
! 459: lambda = modii(mulii(subii(Y1[i&mask], Y2[i]),
! 460: i?mulii(gl, W[i]):gl), N);
! 461: modiiz(subii(sqri(lambda), addii(X2[i], X1[i&mask])), N, X3[i]);
! 462: if (Y3)
! 463: modiiz(subii(mulii(lambda, subii(X1[i&mask], X3[i])),
! 464: Y1[i&mask]),
! 465: N, Y3[i]);
! 466: if (!i) break;
! 467: gl = modii(mulii(gl, A[i]), N);
! 468: if (!(i&7)) gl = gerepileupto(tetpil, gl);
! 469: }
! 470: avma=av; return 0;
! 471: }
! 472:
! 473: /* Shortcut variant, for use in cases where Y coordinates follow their
! 474: corresponding X coordinates, and the first summand doesn't need to be
! 475: repeated */
! 476: static int
! 477: elladd(long nbc, GEN *X1, GEN *X2, GEN *X3)
! 478: {
! 479: return elladd0(nbc, nbc, X1, X1+nbc, X2, X2+nbc, X3, X3+nbc);
! 480: }
! 481: /* this could perhaps become a macro --GN */
! 482:
! 483: /* The next one is exactly the same except it does twice as many additions
! 484: (and thus hides even more of the cost of the modular inverse); the net
! 485: effect is the same as elladd(nbc,X1,X2,X3) followed by elladd(nbc,X4,X5,X6).
! 486: Safe to have X2==X3 and/or X5==X6, and of course safe to have X1 or X2
! 487: coincide with X4 or X5, in any order. */
! 488:
! 489: static int
! 490: elladd2(long nbc, GEN *X1, GEN *X2, GEN *X3, GEN *X4, GEN *X5, GEN *X6)
! 491: {
! 492: GEN lambda, *Y1 = X1+nbc, *Y2 = X2+nbc, *Y3 = X3+nbc;
! 493: GEN *Y4 = X4+nbc, *Y5 = X5+nbc, *Y6 = X6+nbc;
! 494: GEN W[4*nbcmax], *A=W+2*nbc; /* W[0],A[0] never used */
! 495: long i,j, av=avma, tetpil;
! 496: /* W[0] = gun; */
! 497: W[1] = /* A[0] =*/ subii(X1[0], X2[0]);
! 498: for (i=1; i<nbc; i++)
! 499: {
! 500: A[i] = subii(X1[i], X2[i]); /* don't waste time reducing mod N here */
! 501: W[i+1] = modii(mulii(A[i], W[i]), N);
! 502: }
! 503: for (j=0; j<nbc; i++,j++)
! 504: {
! 505: A[i] = subii(X4[j], X5[j]);
! 506: W[i+1] = modii(mulii(A[i], W[i]), N);
! 507: }
! 508: tetpil = avma;
! 509:
! 510: /* if gl != N we have a factor */
! 511: if (!invmod(W[2*nbc], N, &gl))
! 512: {
! 513: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
! 514: if (X2 != X3)
! 515: {
! 516: long k;
! 517: /* cannot add on one of the curves mod N: make sure X3 contains
! 518: something useful before letting the caller proceed */
! 519: for (k = 2*nbc; k--; ) affii(X2[k],X3[k]);
! 520: }
! 521: if (X5 != X6)
! 522: {
! 523: long k;
! 524: /* same for X6 */
! 525: for (k = 2*nbc; k--; ) affii(X5[k],X6[k]);
! 526: }
! 527: avma = av; return 1;
! 528: }
! 529:
! 530: while (j--) /* nbc times, actually */
! 531: {
! 532: i--;
! 533: lambda = modii(mulii(subii(Y4[j], Y5[j]),
! 534: mulii(gl, W[i])), N);
! 535: modiiz(subii(sqri(lambda), addii(X5[j], X4[j])), N, X6[j]);
! 536: modiiz(subii(mulii(lambda, subii(X4[j], X6[j])), Y4[j]), N, Y6[j]);
! 537: gl = modii(mulii(gl, A[i]), N);
! 538: if (!(i&7)) gl = gerepileupto(tetpil, gl);
! 539: }
! 540: while (i--) /* nbc times */
! 541: {
! 542: lambda = modii(mulii(subii(Y1[i], Y2[i]),
! 543: i?mulii(gl, W[i]):gl), N);
! 544: modiiz(subii(sqri(lambda), addii(X2[i], X1[i])), N, X3[i]);
! 545: modiiz(subii(mulii(lambda, subii(X1[i], X3[i])), Y1[i]), N, Y3[i]);
! 546: if (!i) break;
! 547: gl = modii(mulii(gl, A[i]), N);
! 548: if (!(i&7)) gl = gerepileupto(tetpil, gl);
! 549: }
! 550: avma=av; return 0;
! 551: }
! 552:
! 553: /* Parallel doubling on nbc curves, assigning the result to locations at
! 554: and following *X2. Safe to be called with X2 equal to X1. Return
! 555: value as for elladd() above. If we find a point at infinity mod N,
! 556: and if X1 != X2, we copy the points at X1 to X2.
! 557: Use fewer assignments than the old code. Strangely, whereas this gains
! 558: about 3% on my P133 with elladd(), it makes hardly any difference here
! 559: with elldouble() --GN */
! 560: static int
! 561: elldouble(long nbc, GEN *X1, GEN *X2)
! 562: {
! 563: GEN lambda,v, *Y1 = X1+nbc, *Y2 = X2+nbc;
! 564: GEN W[nbcmax+1]; /* W[0] never used */
! 565: long i, av=avma, tetpil;
! 566: /*W[0] = gun;*/ W[1] = Y1[0];
! 567: for (i=1; i<nbc; i++)
! 568: W[i+1] = modii(mulii(Y1[i], W[i]), N);
! 569: tetpil = avma;
! 570:
! 571: if (!invmod(W[nbc], N, &gl))
! 572: {
! 573: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
! 574: if (X1 != X2)
! 575: {
! 576: long k;
! 577: /* cannot double on one of the curves mod N: make sure X2 contains
! 578: something useful before letting the caller proceed */
! 579: for (k = 2*nbc; k--; ) affii(X1[k],X2[k]);
! 580: }
! 581: avma = av; return 1;
! 582: }
! 583:
! 584: while (i--) /* nbc times, actually */
! 585: {
! 586: lambda = modii(mulii(addsi(1, mulsi(3, sqri(X1[i]))),
! 587: i?mulii(gl,W[i]):gl), N);
! 588: if (signe(lambda)) /* half of zero is still zero */
! 589: lambda = shifti(mod2(lambda)? addii(lambda, N): lambda, -1);
! 590: v = modii(subii(sqri(lambda), shifti(X1[i],1)), N);
! 591: if (i) gl = modii(mulii(gl, Y1[i]), N);
! 592: modiiz(subii(mulii(lambda, subii(X1[i], v)), Y1[i]), N, Y2[i]);
! 593: affii(v, X2[i]);
! 594: if (!(i&7) && i) gl = gerepileupto(tetpil, gl);
! 595: }
! 596: avma = av; return 0;
! 597: }
! 598:
! 599: /* Parallel multiplication by an odd prime k on nbc curves, storing the
! 600: result to locations at and following *X2. Safe to be called with X2
! 601: equal to X1. Return values as for elladd() and elldouble().
! 602: Uses (a simplified variant of) Peter Montgomery's PRAC (PRactical Addition
! 603: Chain) algorithm; see ftp://ftp.cwi.nl/pub/pmontgom/Lucas.ps.gz .
! 604: With thanks to Paul Zimmermann for the reference. --GN1998Aug13 */
! 605:
! 606: /* We use an array of GENs pointed at by XAUX as a scratchpad; this will
! 607: have been set up by ellfacteur() (so we don't need to reinitialize it
! 608: each time). */
! 609:
! 610: static int
! 611: ellmult(long nbc, ulong k, GEN *X1, GEN *X2) /* k>2 prime, not checked */
! 612: {
! 613: long i,d,e,e1,r,av=avma,tetpil;
! 614: int res;
! 615: GEN *A=X2, *B=XAUX, *S, *T=XAUX+2*nbc;
! 616:
! 617: for (i = 2*nbc; i--; ) { affii(X1[i],XAUX[i]); }
! 618: tetpil = avma;
! 619:
! 620: /* first doubling picks up X1; after this we'll be working in XAUX and
! 621: X2 only, mostly via A and B and T */
! 622: if ((res = elldouble(nbc, X1, X2)) != 0)
! 623: {
! 624: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 625: return res;
! 626: }
! 627:
! 628: /* split the work at the golden ratio */
! 629: r = (long)(k*0.61803398875 + .5);
! 630: d = k - r; e = r - d; /* NB d+e == r, so no danger of ofl below */
! 631:
! 632: while (d != e)
! 633: {
! 634:
! 635: /* apply one of the nine transformations from PM's Table 4. We first
! 636: figure out which, and then go into an eight-way switch, because
! 637: some of the transformations are similar enough to share code. */
! 638:
! 639: if (d <= e + (e>>2)) /* floor(1.25*e) */
! 640: {
! 641: if ((d+e)%3 == 0)
! 642: { i = 0; goto apply; } /* Table 4, rule 1 */
! 643: else if ((d-e)%6 == 0)
! 644: { i = 1; goto apply; } /* rule 2 */
! 645: /* else fall through */
! 646: }
! 647: if ((d+3)>>2 <= e) /* equiv to d <= 4*e but cannot ofl */
! 648: { i = 2; goto apply; } /* rule 3, the most common case */
! 649: if ((d&1)==(e&1))
! 650: { i = 1; goto apply; } /* rule 4, which does the same as rule 2 */
! 651: if (!(d&1))
! 652: { i = 3; goto apply; } /* rule 5 */
! 653: if (d%3 == 0)
! 654: { i = 4; goto apply; } /* rule 6 */
! 655: if ((d+e)%3 == 0)
! 656: { i = 5; goto apply; } /* rule 7 */
! 657: if ((d-e)%3 == 0)
! 658: { i = 6; goto apply; } /* rule 8 */
! 659: /* when we get here, e must be even, for otherwise one of rules 4,5
! 660: would have applied */
! 661: i = 7; /* rule 9 */
! 662:
! 663: apply:
! 664: switch(i) /* i takes values in {0,...,7} here */
! 665: {
! 666: case 0: /* rule 1 */
! 667: e1 = d - e; d = (d + e1)/3; e = (e - e1)/3;
! 668: if ((res = elladd(nbc, A, B, T)) != 0)
! 669: {
! 670: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 671: return res;
! 672: }
! 673: if ((res = elladd2(nbc, T, A, A, T, B, B)) != 0)
! 674: {
! 675: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 676: return res;
! 677: }
! 678: break; /* end of rule 1 */
! 679: case 1: /* rules 2 and 4, part 1 */
! 680: d -= e;
! 681: if ((res = elladd(nbc, A, B, B)) != 0)
! 682: {
! 683: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 684: return res;
! 685: }
! 686: /* FALL THROUGH */
! 687: case 3: /* rule 5, and 2nd part of rules 2 and 4 */
! 688: d >>= 1;
! 689: if ((res = elldouble(nbc, A, A)) != 0)
! 690: {
! 691: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 692: return res;
! 693: }
! 694: break; /* end of rules 2, 4, and 5 */
! 695: case 4: /* rule 6 */
! 696: d /= 3;
! 697: if ((res = elldouble(nbc, A, T)) != 0)
! 698: {
! 699: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 700: return res;
! 701: }
! 702: if ((res = elladd(nbc, T, A, A)) != 0)
! 703: {
! 704: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 705: return res;
! 706: }
! 707: /* FALL THROUGH */
! 708: case 2: /* rule 3, and 2nd part of rule 6 */
! 709: d -= e;
! 710: if ((res = elladd(nbc, A, B, B)) != 0)
! 711: {
! 712: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 713: return res;
! 714: }
! 715: break; /* end of rules 3 and 6 */
! 716: case 5: /* rule 7 */
! 717: d = (d - e - e)/3;
! 718: if ((res = elldouble(nbc, A, T)) != 0)
! 719: {
! 720: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 721: return res;
! 722: }
! 723: if ((res = elladd2(nbc, T, A, A, T, B, B)) != 0)
! 724: {
! 725: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 726: return res;
! 727: }
! 728: break; /* end of rule 7 */
! 729: case 6: /* rule 8 */
! 730: d = (d - e)/3;
! 731: if ((res = elladd(nbc, A, B, B)) != 0)
! 732: {
! 733: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 734: return res;
! 735: }
! 736: if ((res = elldouble(nbc, A, T)) != 0)
! 737: {
! 738: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 739: return res;
! 740: }
! 741: if ((res = elladd(nbc, T, A, A)) != 0)
! 742: {
! 743: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 744: return res;
! 745: }
! 746: break; /* end of rule 8 */
! 747: case 7: /* rule 9 */
! 748: e >>= 1;
! 749: if ((res = elldouble(nbc, B, B)) != 0)
! 750: {
! 751: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 752: return res;
! 753: }
! 754: break; /* end of rule 9 */
! 755: default: /* never reached */
! 756: break;
! 757: }
! 758: /* end of Table 4 processing */
! 759:
! 760: /* swap d <-> e and A <-> B if necessary */
! 761: if (d < e) { r = d; d = e; e = r; S = A; A = B; B = S; }
! 762: } /* while */
! 763: if ((res = elladd(nbc, XAUX, X2, X2)) != 0)
! 764: {
! 765: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 766: return res;
! 767: }
! 768: avma = av; return 0;
! 769: }
! 770:
! 771: /* PRAC implementation notes - main changes against the paper version:
! 772: (1) The general function [m+n]P = f([m]P,[n]P,[m-n]P) collapses (for
! 773: m!=n) to an elladd() which does not depend on the third argument; and
! 774: thus all references to the third variable (C in the paper) can be elimi-
! 775: nated. (2) Since our multipliers are prime, the outer loop of the paper
! 776: version executes only once, and thus is invisible above. (3) The first
! 777: step in the inner loop of the paper version will always be rule 3, but
! 778: the addition requested by this rule amounts to a doubling, and it will
! 779: always be followed by a swap, so we have unrolled this first iteration.
! 780: (4) Some simplifications in rules 6 and 7 are possible given the above,
! 781: and we can save one addition in each of the two cases. NB one can show
! 782: that none of the other elladd()s in the loop can ever turn out to de-
! 783: generate into an elldouble. (5) I tried to optimize for rule 3, which
! 784: is used far more frequently than all others together, but it didn't
! 785: improve things, so I removed the nested tight loop again. --GN */
! 786:
! 787: /* The main loop body of ellfacteur() runs slightly _slower_ under PRAC than
! 788: under a straightforward left-shift binary multiplication algorithm when
! 789: N has <30 digits and B1 is small; PRAC wins when N and B1 get larger.
! 790: Weird. --GN */
! 791:
! 792:
! 793: /* memory layout in ellfacteur(): We'll have a large-ish array of GEN
! 794: pointers, and one huge chunk of memory containing all the actual GEN
! 795: (t_INT) objects.
! 796: nbc will be held constant throughout the invocation. */
! 797: /* The B1 stage of each iteration through the main loop needs little
! 798: space: enough for the X and Y coordinates of the current points,
! 799: and twice as much again as scratchpad for ellmult(). */
! 800: /* The B2 stage, starting from some current set of points Q, needs, in
! 801: succession:
! 802: - space for [2]Q, [4]Q, ..., [10]Q, and [p]Q for building the helix;
! 803: - space for 48*nbc X and Y coordinates to hold the helix. Now this
! 804: could re-use [2]Q,...,[8]Q, but only with difficulty, since we don't
! 805: know in advance which residue class mod 210 our p is going to be in.
! 806: It can and should re-use [p]Q, though;
! 807: - space for (temporarily [30]Q and then) [210]Q, [420]Q, and several
! 808: further doublings until the giant step multiplier is reached. This
! 809: _can_ re-use the remaining cells from above. The computation of [210]Q
! 810: will have been the last call to ellmult() within this iteration of the
! 811: main loop, so the scratchpad is now also free to be re-used. We also
! 812: compute [630]Q by a parallel addition; we'll need it later to get the
! 813: baby-step table bootstrapped a little faster. */
! 814: /* Finally, for no more than 4 curves at a time, room for up to 1024 X
! 815: coordinates only (the Y coordinates needed whilst setting up this baby
! 816: step table are temporarily stored in the upper half, and overwritten
! 817: during the last series of additions). */
! 818:
! 819: /* Graphically: after end of B1 stage (X,Y are the coords of Q):
! 820: +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
! 821: | X Y | scratch | [2]Q| [4]Q| [6]Q| [8]Q|[10]Q| ... | ...
! 822: +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
! 823: *X *XAUX *XT *XD *XB
! 824:
! 825: [30]Q is computed from [10]Q. [210]Q can go into XY, etc:
! 826: +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
! 827: |[210]|[420]|[630]|[840]|[1680,3360,6720,...,2048*210] |bstp table...
! 828: +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
! 829: *X *XAUX *XT *XD [*XG, somewhere here] *XB .... *XH
! 830:
! 831: So we need (13 + 48) * 2 * nbc slots here, and another 4096 slots for
! 832: the baby step table (not all of which will be used when we start with a
! 833: small B1, but it's better to allocate and initialize ahead of time all
! 834: the slots that might be needed later). */
! 835:
! 836: /* Note on memory locality: During the B2 phase, accesses to the helix
! 837: (once it has been set up) will be clustered by curves (4 out of nbc at
! 838: a time). Accesses to the baby steps table will wander from one end of
! 839: the array to the other and back, one such cycle per giant step, and
! 840: during a full cycle we would expect on the order of 2E4 accesses when
! 841: using the largest giant step size. Thus we shouldn't be doing too bad
! 842: with respect to thrashing a (512KBy) L2 cache. However, we don't want
! 843: the baby step table to grow larger than this, even if it would reduce
! 844: the number of E.C. operations by a few more per cent for very large B2,
! 845: lest cache thrashing slow down everything disproportionally. --GN */
! 846:
! 847: /* parameters for miller() via snextpr(), for use by ellfacteur() */
! 848: #define miller_k1 16 /* B1 phase, foolproof below 10^12 */
! 849: #define miller_k2 1 /* B2 phase, not foolproof, much faster */
! 850: /* (miller_k2 will let thousands of composites slip through, which doesn't
! 851: harm ECM, but ellmult() during the B1 phase should only be fed primes
! 852: which really are prime) */
! 853:
! 854: /* ellfacteur() has been re-tuned to be useful as a first stage before
! 855: MPQS, especially for _large_ arguments, when insist is false, and now
! 856: also for the case when insist is true, vaguely following suggestions
! 857: by Paul Zimmermann (see http://www.loria.fr/~zimmerma/ and especially
! 858: http://www.loria.fr/~zimmerma/records/ecmnet.html) of INRIA/LORIA.
! 859: --GN 1998Jul,Aug */
! 860: GEN
! 861: ellfacteur(GEN n, int insist)
! 862: {
! 863: static ulong TB1[] =
! 864: {
! 865: /* table revised, cf. below 1998Aug15 --GN */
! 866: 142,172,208,252,305,370,450,545,661,801,972,1180,1430,
! 867: 1735,2100,2550,3090,3745,4540,5505,6675,8090,9810,11900,
! 868: 14420,17490,21200,25700,31160,37780UL,45810UL,55550UL,67350UL,
! 869: 81660UL,99010UL,120050UL,145550UL,176475UL,213970UL,259430UL,
! 870: 314550UL,381380UL,462415UL,560660UL,679780UL,824220UL,999340UL,
! 871: 1211670UL,1469110UL,1781250UL,2159700UL,2618600UL,3175000UL,
! 872: 3849600UL,4667500UL,5659200UL,6861600UL,8319500UL,10087100UL,
! 873: 12230300UL,14828900UL,17979600UL,21799700UL,26431500UL,
! 874: 32047300UL,38856400UL, /* 110 times that still fits into 32bits */
! 875: #ifdef LONG_IS_64BITS
! 876: 47112200UL,57122100UL,69258800UL,83974200UL,101816200UL,
! 877: 123449000UL,149678200UL,181480300UL,220039400UL,266791100UL,
! 878: 323476100UL,392204900UL,475536500UL,576573500UL,699077800UL,
! 879: 847610500UL,1027701900UL,1246057200UL,1510806400UL,1831806700UL,
! 880: 2221009800UL,2692906700UL,3265067200UL,3958794400UL,4799917500UL,
! 881: /* the only reason to stop here is that I got bored (and that users
! 882: will get bored watching their 64bit machines churning on such large
! 883: numbers for month after month). Someone can extend this table when
! 884: the hardware has gotten 100 times faster than now --GN */
! 885: #endif
! 886: };
! 887: static ulong TB1_for_stage[] =
! 888: {
! 889: /* table revised 1998Aug11 --GN. The idea is to start a little below
! 890: the optimal B1 for finding factors which would just have been missed
! 891: by pollardbrent(), and escalate gradually, changing curves suf-
! 892: ficiently frequently to give good coverage of the small factor
! 893: ranges. The table entries grow a bit faster than what Paul says
! 894: would be optimal, but having a single table instead of a 2D array
! 895: keeps the code simple */
! 896: 500,520,560,620,700,800,900,1000,1150,1300,1450,1600,1800,2000,
! 897: 2200,2450,2700,2950,3250,3600,4000,4400,4850,5300,5800,6400,
! 898: 7100,7850,8700,9600,10600,11700,12900,14200,15700,17300,
! 899: 19000,21000,23200,25500,28000,31000,34500UL,38500UL,43000UL,
! 900: 48000UL,53800UL,60400UL,67750UL,76000UL,85300UL,95700UL,
! 901: 107400UL,120500UL,135400UL,152000UL,170800UL,191800UL,215400UL,
! 902: 241800UL,271400UL,304500UL,341500UL,383100UL,429700UL,481900UL,
! 903: 540400UL,606000UL,679500UL,761800UL,854100UL,957500UL,1073500UL,
! 904: };
! 905: long nbc,nbc2,dsn,dsnmax,rep,spc,gse,gss,rcn,rcn0,bstp,bstp0;
! 906: long a,i,j,k, av,av1,lim, size = expi(n) + 1, tf = lgefint(n);
! 907: ulong B1,B2,B2_p,B2_rt,m,p,p0,p2,dp;
! 908: GEN w,w0,x,*X,*XT,*XD,*XG,*YG,*XH,*XB,*XB2,*Xh,*Yh,*Xb, res = cgeti(tf);
! 909: int rflag, use_clones = 0;
! 910: byteptr d, d0;
! 911:
! 912: av = avma; /* taking res into account */
! 913: N = n; /* make n known to auxiliary functions */
! 914: /* determine where we'll start, how long we'll persist, and how many
! 915: curves we'll use in parallel */
! 916: if (insist)
! 917: {
! 918: dsnmax = (size >> 2) - 10;
! 919: if (dsnmax < 0) dsnmax = 0;
! 920: #ifdef LONG_IS_64BITS
! 921: else if (dsnmax > 90) dsnmax = 90;
! 922: #else
! 923: else if (dsnmax > 65) dsnmax = 65;
! 924: #endif
! 925: dsn = (size >> 3) - 5;
! 926: if (dsn < 0) dsn = 0;
! 927: else if (dsn > 47) dsn = 47;
! 928: /* pick up the torch where non-insistent stage would have given up */
! 929: nbc = dsn + (dsn >> 2) + 9; /* 8 or more curves in parallel */
! 930: nbc &= ~3; /* nbc is always a multiple of 4 */
! 931: if (nbc > nbcmax) nbc = nbcmax;
! 932: a = 1 + (nbcmax<<7); /* seed for choice of curves */
! 933: }
! 934: else
! 935: {
! 936: dsn = (size - 140) >> 3;
! 937: if (dsn > 12) dsn = 12;
! 938: dsnmax = 72;
! 939: if (dsn < 0) /* < 140 bits: decline the task */
! 940: {
! 941: #ifdef __EMX__
! 942: /* MPQS's disk access under DOS/EMX would be abysmally slow, so... */
! 943: dsn = 0;
! 944: rep = 20;
! 945: nbc = 8;
! 946: #else
! 947: if (DEBUGLEVEL >= 4)
! 948: {
! 949: fprintferr("ECM: number too small to justify this stage\n");
! 950: flusherr();
! 951: }
! 952: avma = av; return NULL;
! 953: #endif
! 954: }
! 955: else
! 956: {
! 957: rep = (size <= 248 ?
! 958: (size <= 176 ? (size - 124) >> 4 : (size - 148) >> 3) :
! 959: (size - 224) >> 1);
! 960: nbc = ((size >> 3) << 2) - 80;
! 961: if (nbc < 8) nbc = 8;
! 962: else if (nbc > nbcmax) nbc = nbcmax;
! 963: #ifdef __EMX__
! 964: rep += 20;
! 965: #endif
! 966: }
! 967:
! 968: /* it may be convenient to use disjoint sets of curves for the non-insist
! 969: and insist phases; moreover, repeated non-insistent calls acting on
! 970: factors of the same original number should try to use fresh curves.
! 971: The following achieves this */
! 972: a = 1 + (nbcmax<<3)*(size & 0xf);
! 973: }
! 974: if (dsn > dsnmax) dsn = dsnmax;
! 975:
! 976: if (DEBUGLEVEL >= 4)
! 977: {
! 978: (void) timer2(); /* clear timer */
! 979: fprintferr("ECM: working on %ld curves at a time; initializing", nbc);
! 980: if (!insist)
! 981: {
! 982: if (rep == 1)
! 983: fprintferr(" for one round");
! 984: else
! 985: fprintferr(" for up to %ld rounds", rep);
! 986: }
! 987: fprintferr("...\n");
! 988: }
! 989:
! 990: /* The auxiliary routines above need < (3*nbc+240)*tf words on the PARI
! 991: stack, in addition to the spc*(tf+1) words occupied by our main table.
! 992: If stack space is already tight, try the heap, using newbloc() and
! 993: killbloc() */
! 994: nbc2 = nbc << 1;
! 995: spc = (13 + 48) * nbc2 + bstpmax * 4;
! 996: if ((long)((GEN)avma - (GEN)bot) < spc + 385 + (spc + 3*nbc + 240)*tf)
! 997: {
! 998: if (DEBUGLEVEL >= 5)
! 999: {
! 1000: fprintferr("ECM: stack tight, using clone space on the heap\n");
! 1001: }
! 1002: use_clones = 1;
! 1003: x = newbloc(spc + 385);
! 1004: }
! 1005: else
! 1006: x = new_chunk(spc + 385);
! 1007: X = 1 + (GEN*)x; /* B1 phase: current point */
! 1008: XAUX = X + nbc2; /* scratchpad for ellmult() */
! 1009: XT = XAUX + nbc2; /* ditto, will later hold [3*210]Q */
! 1010: XD = XT + nbc2; /* room for various multiples */
! 1011: XB = XD + 20*nbc; /* start of baby steps table */
! 1012: XB2 = XB + 2 * bstpmax; /* middle of baby steps table */
! 1013: XH = XB2 + 2 * bstpmax; /* end of bstps table, start of helix */
! 1014: Xh = XH + 96*nbc; /* little helix, X coords */
! 1015: Yh = XH + 192; /* ditto, Y coords */
! 1016: /* XG will be set later, inside the main loop, since it depends on B2 */
! 1017:
! 1018: {
! 1019: long tw = evallg(tf) | evaltyp(t_INT);
! 1020:
! 1021: if (use_clones)
! 1022: w = newbloc(spc*tf);
! 1023: else
! 1024: w = new_chunk(spc*tf);
! 1025: w0 = w; /* remember this for later... */
! 1026: for (i = spc; i--; )
! 1027: {
! 1028: *w = tw; X[i] = w; w += tf; /* hack for: w = cgeti(tf) */
! 1029: }
! 1030: /* Xh range of 384 pointers not set; these will later duplicate the
! 1031: pointers in the XH range, 4 curves at a time. Some of the cells
! 1032: reserved here for the XB range will never be used, instead, we'll
! 1033: warp the pointers to connect to (read-only) GENs in the X/XD range;
! 1034: it would be complicated to skip them here to conserve merely a few
! 1035: KBy of stack or heap space. --GN */
! 1036: }
! 1037:
! 1038: /* *** ECM MAIN LOOP *** */
! 1039: for(;;)
! 1040: {
! 1041: d = diffptr; rcn = NPRC; /* multipliers begin at the beginning */
! 1042:
! 1043: /* pick curves */
! 1044: for (i = nbc2; i--; ) affsi(a++, X[i]);
! 1045: /* pick bounds */
! 1046: B1 = insist ? TB1[dsn] : TB1_for_stage[dsn];
! 1047: B2 = 110*B1;
! 1048: B2_rt = (ulong)(sqrt((double)B2));
! 1049: /* pick giant step exponent and size.
! 1050: With 32 baby steps, a giant step corresponds to 32*420 = 13440, appro-
! 1051: priate for the smallest B2s. With 1024, a giant step will be 430080;
! 1052: this will be appropriate for B1 >~ 42000, where 512 baby steps would
! 1053: imply roughly the same number of E.C. additions. */
! 1054: gse = (B1 < 656 ?
! 1055: (B1 < 200 ? 5 : 6) :
! 1056: (B1 < 10500 ?
! 1057: (B1 < 2625 ? 7 : 8) :
! 1058: (B1 < 42000 ? 9 : 10)
! 1059: )
! 1060: );
! 1061: gss = 1UL << gse;
! 1062: XG = XT + gse*nbc2; /* will later hold [2^(gse+1)*210]Q */
! 1063: YG = XG + nbc;
! 1064:
! 1065: if (DEBUGLEVEL >= 4)
! 1066: {
! 1067: fprintferr("ECM: time = %6ld ms\nECM: dsn = %2ld,\tB1 = %4lu,",
! 1068: timer2(), dsn, B1);
! 1069: fprintferr("\tB2 = %6lu,\tgss = %4ld*420\n", B2, gss);
! 1070: flusherr();
! 1071: }
! 1072: p = *d++;
! 1073:
! 1074: /* ---B1 PHASE--- */
! 1075: /* treat p=2 separately */
! 1076: B2_p = B2 >> 1;
! 1077: for (m=1; m<=B2_p; m<<=1)
! 1078: {
! 1079: if ((rflag = elldouble(nbc, X, X)) > 1) goto fin;
! 1080: else if (rflag) break;
! 1081: }
! 1082:
! 1083: /* p=3,...,nextprime(B1) */
! 1084: while (p < B1 && p <= B2_rt)
! 1085: {
! 1086: p = snextpr(p, &d, &rcn, NULL, miller_k1);
! 1087: B2_p = B2/p; /* beware integer overflow on 32-bit CPUs */
! 1088: for (m=1; m<=B2_p; m*=p)
! 1089: {
! 1090: if ((rflag = ellmult(nbc, p, X, X)) > 1) goto fin;
! 1091: else if (rflag) break;
! 1092: }
! 1093: }
! 1094: /* primes p larger than sqrt(B2) can appear only to the 1st power */
! 1095: while (p < B1)
! 1096: {
! 1097: p = snextpr(p, &d, &rcn, NULL, miller_k1);
! 1098: if (ellmult(nbc, p, X, X) > 1) goto fin; /* p^2 > B2: no loop */
! 1099: }
! 1100:
! 1101: if (DEBUGLEVEL >= 4)
! 1102: {
! 1103: fprintferr("ECM: time = %6ld ms, B1 phase done, ", timer2());
! 1104: fprintferr("p = %lu, setting up for B2\n", p);
! 1105: }
! 1106:
! 1107: /* ---B2 PHASE--- */
! 1108: /* compute [2]Q,...,[10]Q, which we need to build the helix */
! 1109: if (elldouble(nbc, X, XD) > 1)
! 1110: goto fin; /* [2]Q */
! 1111: if (elldouble(nbc, XD, XD + nbc2) > 1)
! 1112: goto fin; /* [4]Q */
! 1113: if (elladd(nbc, XD, XD + nbc2, XD + (nbc<<2)) > 1)
! 1114: goto fin; /* [6]Q */
! 1115: if (elladd2(nbc,
! 1116: XD, XD + (nbc<<2), XT + (nbc<<3),
! 1117: XD + nbc2, XD + (nbc<<2), XD + (nbc<<3)) > 1)
! 1118: goto fin; /* [8]Q and [10]Q */
! 1119: if (DEBUGLEVEL >= 7)
! 1120: fprintferr("\t(got [2]Q...[10]Q)\n");
! 1121:
! 1122: /* get next prime (still using the foolproof test) */
! 1123: p = snextpr(p, &d, &rcn, NULL, miller_k1);
! 1124: /* make sure we have the residue class number (mod 210) */
! 1125: if (rcn == NPRC)
! 1126: {
! 1127: rcn = prc210_no[(p % 210) >> 1];
! 1128: if (rcn == NPRC)
! 1129: {
! 1130: fprintferr("ECM: %lu should have been prime but isn\'t\n", p);
! 1131: err(bugparier, "ellfacteur");
! 1132: }
! 1133: }
! 1134:
! 1135: /* compute [p]Q and put it into its place in the helix */
! 1136: if (ellmult(nbc, p, X, XH + rcn*nbc2) > 1) goto fin;
! 1137: if (DEBUGLEVEL >= 7)
! 1138: fprintferr("\t(got [p]Q, p = %lu = %lu mod 210)\n",
! 1139: p, (ulong)(prc210_rp[rcn]));
! 1140:
! 1141: /* save current p, d, and rcn; we'll need them more than once below */
! 1142: p0 = p;
! 1143: d0 = d;
! 1144: rcn0 = rcn; /* remember where the helix wraps */
! 1145: bstp0 = 0; /* p is at baby-step offset 0 from itself */
! 1146:
! 1147: /* fill up the helix, stepping forward through the prime residue classes
! 1148: mod 210 until we're back at the r'class of p0. Keep updating p so
! 1149: that we can print meaningful diagnostics if a factor shows up; but
! 1150: don't bother checking which of these p's are in fact prime */
! 1151: for (i = 47; i; i--) /* 47 iterations */
! 1152: {
! 1153: p += (dp = (ulong)prc210_d1[rcn]);
! 1154: if (rcn == 47)
! 1155: { /* wrap mod 210 */
! 1156: if (elladd(nbc, XT + dp*nbc, XH + rcn*nbc2, XH) > 1)
! 1157: goto fin;
! 1158: rcn = 0;
! 1159: continue;
! 1160: }
! 1161: if (elladd(nbc, XT + dp*nbc, XH + rcn*nbc2, XH + rcn*nbc2 + nbc2) > 1)
! 1162: goto fin;
! 1163: rcn++;
! 1164: }
! 1165: if (DEBUGLEVEL >= 7)
! 1166: fprintferr("\t(got initial helix)\n");
! 1167:
! 1168: /* compute [210]Q etc, which will be needed for the baby step table */
! 1169: if (ellmult(nbc, 3, XD + (nbc<<3), X) > 1) goto fin;
! 1170: if (ellmult(nbc, 7, X, X) > 1) goto fin; /* [210]Q */
! 1171: /* this was the last call to ellmult() in the main loop body; may now
! 1172: overwrite XAUX and slots XD and following */
! 1173: if (elldouble(nbc, X, XAUX) > 1) goto fin; /* [420]Q */
! 1174: if (elladd(nbc, X, XAUX, XT) > 1) goto fin; /* [630]Q */
! 1175: if (elladd(nbc, X, XT, XD) > 1) goto fin; /* [840]Q */
! 1176: for (i=1; i <= gse; i++) /* gse successive doublings */
! 1177: {
! 1178: if (elldouble(nbc, XT + i*nbc2, XD + i*nbc2) > 1) goto fin;
! 1179: }
! 1180: /* (the last iteration has initialized XG to [210*2^(gse+1)]Q) */
! 1181:
! 1182: if (DEBUGLEVEL >= 4)
! 1183: {
! 1184: fprintferr("ECM: time = %6ld ms, entering B2 phase, p = %lu\n",
! 1185: timer2(), p);
! 1186: }
! 1187:
! 1188: /* inner loop over small sets of 4 curves at a time */
! 1189: for (i = nbc - 4; i >= 0; i -= 4)
! 1190: {
! 1191: if (DEBUGLEVEL >= 6)
! 1192: fprintferr("ECM: finishing curves %ld...%ld\n", i, i+3);
! 1193: /* copy relevant pointers from XH to Xh. Recall memory layout in XH
! 1194: is: nbc X coordinates followed by nbc Y coordinates for residue
! 1195: class 1 mod 210, then the same for r.c. 11 mod 210, etc. Memory
! 1196: layout for Xh is: four X coords for 1 mod 210, four for 11 mod 210,
! 1197: etc, four for 209 mod 210, and then the corresponding Y coordinates
! 1198: in the same order. This will allow us to do a giant step on Xh
! 1199: using just three calls to elladd0() each acting on 64 points in
! 1200: parallel */
! 1201: for (j = 48; j--; )
! 1202: {
! 1203: k = nbc2*j + i;
! 1204: m = j << 2; /* X coordinates */
! 1205: Xh[m] = XH[k]; Xh[m+1] = XH[k+1];
! 1206: Xh[m+2] = XH[k+2]; Xh[m+3] = XH[k+3];
! 1207: k += nbc; /* Y coordinates */
! 1208: Yh[m] = XH[k]; Yh[m+1] = XH[k+1];
! 1209: Yh[m+2] = XH[k+2]; Yh[m+3] = XH[k+3];
! 1210: }
! 1211: /* build baby step table of X coords of multiples of [210]Q. XB[4*j]
! 1212: will point at X coords on four curves from [(j+1)*210]Q. Until
! 1213: we're done, we need some Y coords as well, which we keep in the
! 1214: second half of the table, overwriting them at the end when gse==10.
! 1215: Those multiples which we already have (by 1,2,3,4,8,16,...,2^gse)
! 1216: are entered simply by copying the pointers, ignoring the small
! 1217: number of slots in w that were initially reserved for them.
! 1218: Here are the initial entries... */
! 1219: for (Xb=XB,k=2,j=i; k--; Xb=XB2,j+=nbc) /* do first X, then Y coords */
! 1220: {
! 1221: Xb[0] = X[j]; Xb[1] = X[j+1]; /* [210]Q */
! 1222: Xb[2] = X[j+2]; Xb[3] = X[j+3];
! 1223: Xb[4] = XAUX[j]; Xb[5] = XAUX[j+1]; /* [420]Q */
! 1224: Xb[6] = XAUX[j+2]; Xb[7] = XAUX[j+3];
! 1225: Xb[8] = XT[j]; Xb[9] = XT[j+1]; /* [630]Q */
! 1226: Xb[10] = XT[j+2]; Xb[11] = XT[j+3];
! 1227: Xb += 4; /* this points at [420]Q */
! 1228: /* ... entries at powers of 2 times 210 .... */
! 1229: for (m = 2; m < gse+k; m++) /* omit Y coords of [2^gse*210]Q */
! 1230: {
! 1231: long m2 = m*nbc2 + j;
! 1232: Xb += (2UL<<m); /* points now at [2^m*210]Q */
! 1233: Xb[0] = XAUX[m2]; Xb[1] = XAUX[m2+1];
! 1234: Xb[2] = XAUX[m2+2]; Xb[3] = XAUX[m2+3];
! 1235: }
! 1236: }
! 1237: if (DEBUGLEVEL >= 7)
! 1238: fprintferr("\t(extracted precomputed helix / baby step entries)\n");
! 1239: /* ... glue in between, up to 16*210 ... */
! 1240: if (elladd0(12, 4, /* 12 pts + (4 pts replicated thrice) */
! 1241: XB + 12, XB2 + 12,
! 1242: XB, XB2,
! 1243: XB + 16, XB2 + 16)
! 1244: > 1) goto fin; /* 4 + {1,2,3} = {5,6,7} */
! 1245: if (elladd0(28, 4, /* 28 pts + (4 pts replicated 7fold) */
! 1246: XB + 28, XB2 + 28,
! 1247: XB, XB2,
! 1248: XB + 32, XB2 + 32)
! 1249: > 1) goto fin; /* 8 + {1,...,7} = {9,...,15} */
! 1250: /* ... and the remainder of the lot */
! 1251: for (m = 5; m <= gse; m++)
! 1252: {
! 1253: /* fill in from 2^(m-1)+1 to 2^m-1 in chunks of 64 and 60 points */
! 1254: ulong m2 = 2UL << m; /* will point at 2^(m-1)+1 */
! 1255: for (j = 0; j < m2-64; j+=64) /* executed 0 times when m == 5 */
! 1256: {
! 1257: if (elladd0(64, 4,
! 1258: XB + m2 - 4, XB2 + m2 - 4,
! 1259: XB + j, XB2 + j,
! 1260: XB + m2 + j,
! 1261: (m<gse ? XB2 + m2 + j : NULL))
! 1262: > 1) goto fin;
! 1263: } /* j == m2-64 here, 60 points left */
! 1264: if (elladd0(60, 4,
! 1265: XB + m2 - 4, XB2 + m2 - 4,
! 1266: XB + j, XB2 + j,
! 1267: XB + m2 + j,
! 1268: (m<gse ? XB2 + m2 + j : NULL))
! 1269: > 1) goto fin;
! 1270: /* (when m==gse, drop Y coords of result, and when both equal 1024,
! 1271: overwrite Y coords of second argument with X coords of result) */
! 1272: }
! 1273: if (DEBUGLEVEL >= 7)
! 1274: fprintferr("\t(baby step table complete)\n");
! 1275: /* initialize a few other things */
! 1276: bstp = bstp0;
! 1277: p = p0; d = d0; rcn = rcn0;
! 1278: gl = gun;
! 1279: av1 = avma;
! 1280: lim=stack_lim(av1,1);
! 1281: /* the correct entry in XB to use depends on bstp and on where we are
! 1282: on the helix. As we skip from prime to prime, bstp will be incre-
! 1283: mented by snextpr() each time we wrap around through residue class
! 1284: number 0 (1 mod 210), but the baby step should not be taken until
! 1285: rcn>=rcn0 (i.e. until we pass again the residue class of p0).
! 1286: The correct signed multiplier is thus k = bstp - (rcn < rcn0),
! 1287: and the offset from XB is four times (|k| - 1). When k==0, we may
! 1288: ignore the current prime (if it had led to a factorization, this
! 1289: would have been noted during the last giant step, or -- when we
! 1290: first get here -- whilst initializing the helix). When k > gss,
! 1291: we must do a giant step and bump bstp back by -2*gss.
! 1292: The gcd of the product of X coord differences against N is taken just
! 1293: before we do a giant step. */
! 1294:
! 1295: /* loop over probable primes p0 < p <= nextprime(B2),
! 1296: inserting giant steps as necessary */
! 1297: while (p < B2)
! 1298: {
! 1299: /* save current p for diagnostics */
! 1300: p2 = p;
! 1301: /* get next probable prime */
! 1302: p = snextpr(p, &d, &rcn, &bstp, miller_k2);
! 1303: /* work out the corresponding baby-step multiplier */
! 1304: k = bstp - (rcn < rcn0 ? 1 : 0);
! 1305: /* check whether it's giant-step time */
! 1306: if (k > gss)
! 1307: {
! 1308: /* take gcd */
! 1309: gl = mppgcd(gl, n);
! 1310: if (!is_pm1(gl) && !egalii(gl, n)) { p = p2; goto fin; }
! 1311: gl = gun;
! 1312: avma = av1;
! 1313: while (k > gss) /* hm, just how large are those prime gaps? */
! 1314: {
! 1315: /* giant step */
! 1316: if (DEBUGLEVEL >= 7)
! 1317: fprintferr("\t(giant step at p = %lu)\n", p);
! 1318: if (elladd0(64, 4,
! 1319: XG + i, YG + i,
! 1320: Xh, Yh, Xh, Yh) > 1) goto fin;
! 1321: if (elladd0(64, 4,
! 1322: XG + i, YG + i,
! 1323: Xh + 64, Yh + 64, Xh + 64, Yh + 64) > 1) goto fin;
! 1324: if (elladd0(64, 4,
! 1325: XG + i, YG + i,
! 1326: Xh + 128, Yh + 128, Xh + 128, Yh + 128)
! 1327: > 1) goto fin;
! 1328: bstp -= (gss << 1);
! 1329: /* recompute multiplier */
! 1330: k = bstp - (rcn < rcn0 ? 1 : 0);
! 1331: }
! 1332: }
! 1333: if (!k) continue; /* point of interest is already in Xh */
! 1334: if (k < 0) k = -k;
! 1335: m = ((ulong)k - 1) << 2;
! 1336: /* accumulate product of differences of X coordinates */
! 1337: j = rcn<<2;
! 1338: gl = modii(mulii(gl, subii(XB[m], Xh[j])), n);
! 1339: gl = modii(mulii(gl, subii(XB[m+1], Xh[j+1])), n);
! 1340: gl = modii(mulii(gl, subii(XB[m+2], Xh[j+2])), n);
! 1341: gl = modii(mulii(gl, subii(XB[m+3], Xh[j+3])), n);
! 1342: if (low_stack(lim, stack_lim(av1,1)))
! 1343: {
! 1344: if(DEBUGMEM>1) err(warnmem,"ellfacteur");
! 1345: gl = gerepileupto(av1, gl);
! 1346: }
! 1347: } /* loop over p */
! 1348: avma = av1;
! 1349: } /* for i (loop over sets of 4 curves) */
! 1350:
! 1351: /* continuation part of main loop */
! 1352:
! 1353: if (dsn < dsnmax)
! 1354: {
! 1355: dsn += insist ? 1 : 2;
! 1356: if (dsn > dsnmax) dsn = dsnmax;
! 1357: }
! 1358:
! 1359: if (!insist && !--rep)
! 1360: {
! 1361: if (DEBUGLEVEL >= 4)
! 1362: {
! 1363: fprintferr("ECM: time = %6ld ms,\tellfacteur giving up.\n",
! 1364: timer2());
! 1365: flusherr();
! 1366: }
! 1367: avma = av;
! 1368: if (use_clones) { gunclone(w0); gunclone(x); }
! 1369: return NULL;
! 1370: }
! 1371: }
! 1372: /* *** END OF ECM MAIN LOOP *** */
! 1373: fin:
! 1374: affii(gl, res);
! 1375:
! 1376: if (DEBUGLEVEL >= 4)
! 1377: {
! 1378: fprintferr("ECM: time = %6ld ms,\tp <= %6lu,\n\tfound factor = %Z\n",
! 1379: timer2(), p, res);
! 1380: flusherr();
! 1381: }
! 1382: avma=av;
! 1383: if (use_clones) { gunclone(w0); gunclone(x); }
! 1384: return res;
! 1385: }
! 1386:
! 1387: /***********************************************************************/
! 1388: /** **/
! 1389: /** FACTORIZATION (Pollard-Brent rho) **/
! 1390: /** pollardbrent() returns a non trivial factor of n, assuming n is **/
! 1391: /** composite and has no small prime divisor, or NULL if going on **/
! 1392: /** would take more time than we want to spend. GN1998Jun18-26 **/
! 1393: /** (Cf. Algorithm 8.5.2 in ACiCNT) **/
! 1394: /** **/
! 1395: /***********************************************************************/
! 1396: static void
! 1397: rho_dbg(long c, long msg_mask)
! 1398: {
! 1399: if (c & msg_mask) return;
! 1400: fprintferr("Rho: time = %6ld ms,\t%3ld round%s\n",
! 1401: timer2(), c, (c==1?"":"s"));
! 1402: flusherr();
! 1403: }
! 1404:
! 1405: /* Tuning parameter: for input up to 64 bits long, we must not spend more
! 1406: * than a very short time, for fear of slowing things down on average.
! 1407: * With the current tuning formula, increase our efforts somewhat at 49 bit
! 1408: * input (an extra round for each bit at first), and go up more and more
! 1409: * rapidly after we pass 80 bits. */
! 1410:
! 1411: #define tune_pb_min 14 /* even 15 seems too much */
! 1412:
! 1413: /* We return NULL when we run out of time, or a single t_INT containing a
! 1414: nontrivial factor of n, or a vector of t_INTs, each triple of successive
! 1415: entries containing a factor, an exponent (equal to un), and a factor
! 1416: class (NULL for unknown or zero for known composite), matching the
! 1417: internal representation used by the ifac_*() routines below. Repeated
! 1418: factors can arise and are legal; the caller will be sorting the factors
! 1419: anyway. */
! 1420: GEN
! 1421: pollardbrent(GEN n)
! 1422: {
! 1423: long tf = lgefint(n), size = 0, delta, retries = 0, msg_mask;
! 1424: long c0, c, k, k1, l, avP, avx, GGG, av = avma;
! 1425: GEN x, x1, y, P, g, g1, res;
! 1426:
! 1427: if (DEBUGLEVEL > 3) (void)timer2(); /* clear timer */
! 1428:
! 1429: if (tf >= 4)
! 1430: size = expi(n) + 1;
! 1431: else if (tf == 3) /* try to keep purify happy... */
! 1432: size = BITS_IN_LONG - bfffo(n[2]);
! 1433:
! 1434: if (size <= 32)
! 1435: c0 = 32; /* amounts very nearly to `insist' */
! 1436: else if (size <= 48)
! 1437: c0 = tune_pb_min;
! 1438: else if (size <= 72)
! 1439: c0 = tune_pb_min + size - 24;
! 1440: else if (size <= 301)
! 1441: /* nonlinear increase in effort, kicking in around 80 bits */
! 1442: /* 301 gives 48121 + tune_pb_min */
! 1443: c0 = tune_pb_min + size - 60 +
! 1444: ((size-73)>>1)*((size-70)>>3)*((size-56)>>4);
! 1445: else
! 1446: c0 = 49152; /* ECM is faster when it'd take longer */
! 1447:
! 1448: c = c0 << 5; /* 32 iterations per round */
! 1449: msg_mask = (size >= 448? 0x1fff:
! 1450: (size >= 192? (256L<<((size-128)>>6))-1: 0xff));
! 1451: PB_RETRY:
! 1452: /* trick to make a `random' choice determined by n. Don't use x^2+0 or
! 1453: * x^2-2, ever. Don't use x^2-3 or x^2-7 with a starting value of 2.
! 1454: * x^2+4, x^2+9 are affine conjugate to x^2+1, so don't use them either.
! 1455: *
! 1456: * (the point being that when we get called again on a composite cofactor
! 1457: * of something we've already seen, we had better avoid the same delta) */
! 1458: switch ((size + retries) & 7)
! 1459: {
! 1460: case 0: delta= 1; break;
! 1461: case 1: delta= -1; break;
! 1462: case 2: delta= 3; break;
! 1463: case 3: delta= 5; break;
! 1464: case 4: delta= -5; break;
! 1465: case 5: delta= 7; break;
! 1466: case 6: delta= 11; break;
! 1467: case 7: delta=-11; break;
! 1468: }
! 1469: if (DEBUGLEVEL > 3)
! 1470: {
! 1471: if (!retries)
! 1472: {
! 1473: if (size < 1536)
! 1474: fprintferr("Rho: searching small factor of %ld-bit integer\n", size);
! 1475: else
! 1476: fprintferr("Rho: searching small factor of %ld-word integer\n", tf-2);
! 1477: }
! 1478: else
! 1479: fprintferr("Rho: restarting for remaining rounds...\n");
! 1480: fprintferr("Rho: using X^2%+1ld for up to %ld rounds of 32 iterations\n",
! 1481: delta, c >> 5);
! 1482: flusherr();
! 1483: }
! 1484: x=gdeux; P=gun; g1 = NULL; k = 1; l = 1;
! 1485: (void)new_chunk(10 + 6 * tf); /* enough for cgetg(10) + 3 divii */
! 1486: y = cgeti(tf); affsi(2, y);
! 1487: x1= cgeti(tf); affsi(2, x1);
! 1488: avx = avma;
! 1489: avP = (long)new_chunk(2 * tf); /* enough for x = addsi(tf+1) */
! 1490: GGG = (long)new_chunk(4 * tf); /* enough for P = modii(2tf+1, tf) */
! 1491:
! 1492: for (;;) /* terminated under the control of c */
! 1493: {
! 1494: /* use the polynomial x^2 + delta */
! 1495: #define one_iter() {\
! 1496: avma = GGG; x = resii(sqri(x), n); /* to garbage zone */\
! 1497: avma = avx; x = addsi(delta,x); /* erase garbage */\
! 1498: avma = GGG; P = mulii(P, subii(x1, x));\
! 1499: avma = avP; P = modii(P,n); }
! 1500:
! 1501: one_iter();
! 1502:
! 1503: if ((--c & 0x1f)==0) /* one round complete */
! 1504: {
! 1505: g = mppgcd(n, P);
! 1506: if (!is_pm1(g)) goto fin; /* caught something */
! 1507: if (c <= 0)
! 1508: { /* getting bored */
! 1509: if (DEBUGLEVEL > 3)
! 1510: {
! 1511: fprintferr("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
! 1512: timer2());
! 1513: flusherr();
! 1514: }
! 1515: avma=av; return NULL;
! 1516: }
! 1517: P = gun; /* not necessary, but saves 1 mulii/round */
! 1518: if (DEBUGLEVEL > 3) rho_dbg(c0-(c>>5), msg_mask);
! 1519: affii(x,y);
! 1520: }
! 1521:
! 1522: if (--k) continue; /* normal end of loop body */
! 1523:
! 1524: if (c & 0x1f) /* otherwise, we already checked */
! 1525: {
! 1526: g = mppgcd(n, P);
! 1527: if (!is_pm1(g)) goto fin;
! 1528: P = gun;
! 1529: }
! 1530:
! 1531: /* Fast forward phase, doing l inner iterations without computing gcds.
! 1532: * Check first whether it would take us beyond the alloted time.
! 1533: * Fast forward rounds count only half (although they're taking
! 1534: * more like 2/3 the time of normal rounds). This to counteract the
! 1535: * nuisance that all c0 between 4096 and 6144 would act exactly as
! 1536: * 4096; with the halving trick only the range 4096..5120 collapses
! 1537: * (similarly for all other powers of two) */
! 1538: if ((c-=(l>>1)) <= 0)
! 1539: { /* got bored */
! 1540: if (DEBUGLEVEL > 3)
! 1541: {
! 1542: fprintferr("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
! 1543: timer2());
! 1544: flusherr();
! 1545: }
! 1546: avma=av; return NULL;
! 1547: }
! 1548: c &= ~0x1f; /* keep it on multiples of 32 */
! 1549:
! 1550: /* Fast forward loop */
! 1551: affii(x, x1); k = l; l <<= 1;
! 1552: /* don't show this for the first several (short) fast forward phases. */
! 1553: if (DEBUGLEVEL > 3 && (l>>7) > msg_mask)
! 1554: {
! 1555: fprintferr("Rho: fast forward phase (%ld rounds of 64)...\n", l>>7);
! 1556: flusherr();
! 1557: }
! 1558: for (k1=k; k1; k1--) one_iter();
! 1559: if (DEBUGLEVEL > 3 && (l>>7) > msg_mask)
! 1560: {
! 1561: fprintferr("Rho: time = %6ld ms,\t%3ld rounds, back to normal mode\n",
! 1562: timer2(), c0-(c>>5));
! 1563: flusherr();
! 1564: }
! 1565:
! 1566: affii(x,y);
! 1567: } /* forever */
! 1568:
! 1569: fin:
! 1570: /* An accumulated gcd was > 1 */
! 1571: /* if it isn't n, and looks prime, return it */
! 1572: if (!egalii(g,n))
! 1573: {
! 1574: if (miller(g,17))
! 1575: {
! 1576: if (DEBUGLEVEL > 3)
! 1577: {
! 1578: rho_dbg(c0-(c>>5), 0);
! 1579: fprintferr("\tfound factor = %Z\n",g);
! 1580: flusherr();
! 1581: }
! 1582: avma=av; return icopy(g);
! 1583: }
! 1584: avma = avx; g1 = icopy(g); /* known composite, keep it safe */
! 1585: avx = avma;
! 1586: }
! 1587: else g1 = n; /* and work modulo g1 for backtracking */
! 1588:
! 1589: /* Here g1 is known composite */
! 1590: if (DEBUGLEVEL > 3 && size > 192)
! 1591: {
! 1592: fprintferr("Rho: hang on a second, we got something here...\n");
! 1593: flusherr();
! 1594: }
! 1595: for(;;) /* backtrack until period recovered. Must terminate */
! 1596: {
! 1597: avma = GGG; y = resii(sqri(y), g1);
! 1598: avma = avx; y = addsi(delta,y);
! 1599: g = mppgcd(subii(x1, y), g1);
! 1600: if (!is_pm1(g)) break;
! 1601:
! 1602: if (DEBUGLEVEL > 3 && (--c & 0x1f) == 0) rho_dbg(c0-(c>>5), msg_mask);
! 1603: }
! 1604:
! 1605: avma = av; /* safe */
! 1606: if (g1 == n || egalii(g,g1))
! 1607: {
! 1608: if (g1 == n && egalii(g,g1))
! 1609: { /* out of luck */
! 1610: if (DEBUGLEVEL > 3)
! 1611: {
! 1612: rho_dbg(c0-(c>>5), 0);
! 1613: fprintferr("\tPollard-Brent failed.\n"); flusherr();
! 1614: }
! 1615: if (++retries >= 4) return NULL;
! 1616: goto PB_RETRY;
! 1617: }
! 1618: /* half lucky: we've split n, but g1 equals either g or n */
! 1619: if (DEBUGLEVEL > 3)
! 1620: {
! 1621: rho_dbg(c0-(c>>5), 0);
! 1622: fprintferr("\tfound %sfactor = %Z\n",
! 1623: (g1!=n ? "composite " : ""), g);
! 1624: flusherr();
! 1625: }
! 1626: res = cgetg(7, t_VEC);
! 1627: res[1] = licopy(g); /* factor */
! 1628: res[2] = un; /* exponent 1 */
! 1629: res[3] = (g1!=n? zero: (long)NULL); /* known composite when g1!=n */
! 1630:
! 1631: res[4] = ldivii(n,g); /* cofactor */
! 1632: res[5] = un; /* exponent 1 */
! 1633: res[6] = (long)NULL; /* unknown */
! 1634: return res;
! 1635: }
! 1636: /* g < g1 < n : our lucky day -- we've split g1, too */
! 1637: res = cgetg(10, t_VEC);
! 1638: /* unknown status for all three factors */
! 1639: res[1] = licopy(g); res[2] = un; res[3] = (long)NULL;
! 1640: res[4] = ldivii(g1,g); res[5] = un; res[6] = (long)NULL;
! 1641: res[7] = ldivii(n,g1); res[8] = un; res[9] = (long)NULL;
! 1642: if (DEBUGLEVEL > 3)
! 1643: {
! 1644: rho_dbg(c0-(c>>5), 0);
! 1645: fprintferr("\tfound factors = %Z, %Z,\n\tand %Z\n",
! 1646: res[1], res[4], res[7]);
! 1647: flusherr();
! 1648: }
! 1649: return res;
! 1650: }
! 1651:
! 1652: /***********************************************************************/
! 1653: /** **/
! 1654: /** DETECTING ODD POWERS **/
! 1655: /** Factoring engines like MPQS which ultimately rely on computing **/
! 1656: /** gcd(N, x^2-y^2) to find a nontrivial factor of N are fundamen- **/
! 1657: /** tally incapable of splitting a proper power of an odd prime, **/
! 1658: /** because of the cyclicity of the prime residue class group. We **/
! 1659: /** already have a square-detection function carrecomplet(), which **/
! 1660: /** also returns the square root if appropriate. Here's an analogue **/
! 1661: /** for cubes, fifth and 7th powers. 11th powers are a non-issue so **/
! 1662: /** long as mpqs() gives up beyond 100 decimal digits (since ECM **/
! 1663: /** easily find a 10-digit prime factor of a 100-digit number). **/
! 1664: /** GN1998Jun28 **/
! 1665: /** **/
! 1666: /***********************************************************************/
! 1667:
! 1668: /* Use a multistage sieve. First stages work mod 211, 209, 61, 203;
! 1669: if the argument is larger than a word, we first reduce mod the product
! 1670: of these and then take the remainder apart. Second stages use 117,
! 1671: 31, 43, 71 in this order. Moduli which are no longer interesting are
! 1672: skipped. Everything is encoded in a single table of 106 24-bit masks.
! 1673: We only need the first half of the residues. Three bits per modulus
! 1674: indicate which residues are 7th (bit 2), 5th (bit 1) powers or cubes
! 1675: (bit 0); the eight moduli above are assigned right-to-left. The table
! 1676: will err on the side of safety if one of the moduli divides the number
! 1677: to be tested, but as this leads to inefficiency it should still be
! 1678: avoided. */
! 1679:
! 1680: static ulong powersmod[106] = {
! 1681: 077777777ul, /* 0 */
! 1682: 077777777ul, /* 1 */
! 1683: 013562440ul, /* 2 */
! 1684: 012462540ul, /* 3 */
! 1685: 013562440ul, /* 4 */
! 1686: 052662441ul, /* 5 */
! 1687: 016663440ul, /* 6 */
! 1688: 016463450ul, /* 7 */
! 1689: 013573551ul, /* 8 */
! 1690: 012462540ul, /* 9 */
! 1691: 012462464ul, /* 10 */
! 1692: 013462771ul, /* 11 */
! 1693: 012466473ul, /* 12 */
! 1694: 012463641ul, /* 13 */
! 1695: 052463646ul, /* 14 */
! 1696: 012563446ul, /* 15 */
! 1697: 013762440ul, /* 16 */
! 1698: 052766440ul, /* 17 */
! 1699: 012772451ul, /* 18 */
! 1700: 012762454ul, /* 19 */
! 1701: 032763550ul, /* 20 */
! 1702: 013763664ul, /* 21 */
! 1703: 017763460ul, /* 22 */
! 1704: 037762565ul, /* 23 */
! 1705: 017762540ul, /* 24 */
! 1706: 057762441ul, /* 25 */
! 1707: 037772452ul, /* 26 */
! 1708: 017773551ul, /* 27 */
! 1709: 017767541ul, /* 28 */
! 1710: 017767640ul, /* 29 */
! 1711: 037766450ul, /* 30 */
! 1712: 017762752ul, /* 31 */
! 1713: 037762762ul, /* 32 */
! 1714: 017762742ul, /* 33 */
! 1715: 037763762ul, /* 34 */
! 1716: 017763740ul, /* 35 */
! 1717: 077763740ul, /* 36 */
! 1718: 077762750ul, /* 37 */
! 1719: 077762752ul, /* 38 */
! 1720: 077762750ul, /* 39 */
! 1721: 077762743ul, /* 40 */
! 1722: 077767740ul, /* 41 */
! 1723: 077763741ul, /* 42 */
! 1724: 077763762ul, /* 43 */
! 1725: 077772760ul, /* 44 */
! 1726: 077762770ul, /* 45 */
! 1727: 077766750ul, /* 46 */
! 1728: 077762740ul, /* 47 */
! 1729: 077763740ul, /* 48 */
! 1730: 077763750ul, /* 49 */
! 1731: 077763752ul, /* 50 */
! 1732: 077762740ul, /* 51 */
! 1733: 077762740ul, /* 52 */
! 1734: 077772740ul, /* 53 */
! 1735: 077762762ul, /* 54 */
! 1736: 077763765ul, /* 55 */
! 1737: 077763770ul, /* 56 */
! 1738: 077767750ul, /* 57 */
! 1739: 077766753ul, /* 58 */
! 1740: 077776740ul, /* 59 */
! 1741: 077772741ul, /* 60 */
! 1742: 077772744ul, /* 61 */
! 1743: 077773740ul, /* 62 */
! 1744: 077773743ul, /* 63 */
! 1745: 077773751ul, /* 64 */
! 1746: 077772771ul, /* 65 */
! 1747: 077772760ul, /* 66 */
! 1748: 077772763ul, /* 67 */
! 1749: 077772751ul, /* 68 */
! 1750: 077773750ul, /* 69 */
! 1751: 077777740ul, /* 70 */
! 1752: 077773745ul, /* 71 */
! 1753: 077772740ul, /* 72 */
! 1754: 077772742ul, /* 73 */
! 1755: 077772744ul, /* 74 */
! 1756: 077776750ul, /* 75 */
! 1757: 077773771ul, /* 76 */
! 1758: 077773774ul, /* 77 */
! 1759: 077773760ul, /* 78 */
! 1760: 077772741ul, /* 79 */
! 1761: 077772740ul, /* 80 */
! 1762: 077772740ul, /* 81 */
! 1763: 077772741ul, /* 82 */
! 1764: 077773754ul, /* 83 */
! 1765: 077773750ul, /* 84 */
! 1766: 077773740ul, /* 85 */
! 1767: 077776741ul, /* 86 */
! 1768: 077776771ul, /* 87 */
! 1769: 077776773ul, /* 88 */
! 1770: 077772761ul, /* 89 */
! 1771: 077773741ul, /* 90 */
! 1772: 077773740ul, /* 91 */
! 1773: 077773740ul, /* 92 */
! 1774: 077772740ul, /* 93 */
! 1775: 077772752ul, /* 94 */
! 1776: 077772750ul, /* 95 */
! 1777: 077772751ul, /* 96 */
! 1778: 077773741ul, /* 97 */
! 1779: 077773761ul, /* 98 */
! 1780: 077777760ul, /* 99 */
! 1781: 077772765ul, /* 100 */
! 1782: 077772742ul, /* 101 */
! 1783: 077777751ul, /* 102 */
! 1784: 077777750ul, /* 103 */
! 1785: 077777745ul, /* 104 */
! 1786: 077777770ul /* 105 */
! 1787: };
! 1788:
! 1789: /* Returns 3, 5, or 7 if x is a cube (but not a 5th or 7th power), a 5th
! 1790: power (but not a 7th), or a 7th power, and in this case creates the
! 1791: base on the stack and assigns its address to *pt. Otherwise returns 0.
! 1792: x must be of type t_INT and nonzero; this is not checked. The *mask
! 1793: argument tells us which things to check -- bit 0: 3rd, bit 1: 5th,
! 1794: bit 2: 7th pwr; set a bit to have the corresponding power examined --
! 1795: and is updated appropriately for a possible follow-up call */
! 1796:
! 1797: long /* no longer static -- used in mpqs.c */
! 1798: is_odd_power(GEN x, GEN *pt, long *mask)
! 1799: {
! 1800: long av=avma, tetpil, lgx=lgefint(x), exponent=0, residue, resbyte;
! 1801: GEN y;
! 1802:
! 1803: *mask &= 7; /* paranoia */
! 1804: if (!*mask) return 0; /* useful when running in a loop */
! 1805: if (signe(x) < 0) x=absi(x);
! 1806:
! 1807: if (DEBUGLEVEL >= 5)
! 1808: {
! 1809: fprintferr("OddPwrs: is %Z\n\t...a", x);
! 1810: if (*mask&1) fprintferr(" 3rd%s",
! 1811: (*mask==7?",":(*mask!=1?" or":"")));
! 1812: if (*mask&2) fprintferr(" 5th%s",
! 1813: (*mask==7?", or":(*mask&4?" or":"")));
! 1814: if (*mask&4) fprintferr(" 7th");
! 1815: fprintferr(" power?\n");
! 1816: }
! 1817: if (lgx > 3) residue = smodis(x, 211*209*61*203);
! 1818: else residue = x[2];
! 1819:
! 1820: resbyte=residue%211; if (resbyte > 105) resbyte = 211 - resbyte;
! 1821: *mask &= powersmod[resbyte];
! 1822: if (DEBUGLEVEL >= 5)
! 1823: {
! 1824: fprintferr("\tmodulo: resid. (remaining possibilities)\n");
! 1825: fprintferr("\t 211: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 1826: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 1827: }
! 1828: if (!*mask) { avma=av; return 0; }
! 1829:
! 1830: if (*mask & 3)
! 1831: {
! 1832: resbyte=residue%209; if (resbyte > 104) resbyte = 209 - resbyte;
! 1833: *mask &= (powersmod[resbyte] >> 3);
! 1834: if (DEBUGLEVEL >= 5)
! 1835: fprintferr("\t 209: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 1836: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 1837: if (!*mask) { avma=av; return 0; }
! 1838: }
! 1839: if (*mask & 3)
! 1840: {
! 1841: resbyte=residue%61; if (resbyte > 30) resbyte = 61 - resbyte;
! 1842: *mask &= (powersmod[resbyte] >> 6);
! 1843: if (DEBUGLEVEL >= 5)
! 1844: fprintferr("\t 61: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 1845: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 1846: if (!*mask) { avma=av; return 0; }
! 1847: }
! 1848: if (*mask & 5)
! 1849: {
! 1850: resbyte=residue%203; if (resbyte > 101) resbyte = 203 - resbyte;
! 1851: *mask &= (powersmod[resbyte] >> 9);
! 1852: if (DEBUGLEVEL >= 5)
! 1853: fprintferr("\t 203: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 1854: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 1855: if (!*mask) { avma=av; return 0; }
! 1856: }
! 1857:
! 1858: if (lgx > 3) residue = smodis(x, 117*31*43*71);
! 1859: else residue = x[2];
! 1860:
! 1861: if (*mask & 1)
! 1862: {
! 1863: resbyte=residue%117; if (resbyte > 58) resbyte = 117 - resbyte;
! 1864: *mask &= (powersmod[resbyte] >> 12);
! 1865: if (DEBUGLEVEL >= 5)
! 1866: fprintferr("\t 117: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 1867: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 1868: if (!*mask) { avma=av; return 0; }
! 1869: }
! 1870: if (*mask & 3)
! 1871: {
! 1872: resbyte=residue%31; if (resbyte > 15) resbyte = 31 - resbyte;
! 1873: *mask &= (powersmod[resbyte] >> 15);
! 1874: if (DEBUGLEVEL >= 5)
! 1875: fprintferr("\t 31: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 1876: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 1877: if (!*mask) { avma=av; return 0; }
! 1878: }
! 1879: if (*mask & 5)
! 1880: {
! 1881: resbyte=residue%43; if (resbyte > 21) resbyte = 43 - resbyte;
! 1882: *mask &= (powersmod[resbyte] >> 18);
! 1883: if (DEBUGLEVEL >= 5)
! 1884: fprintferr("\t 43: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 1885: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 1886: if (!*mask) { avma=av; return 0; }
! 1887: }
! 1888: if (*mask & 6)
! 1889: {
! 1890: resbyte=residue%71; if (resbyte > 35) resbyte = 71 - resbyte;
! 1891: *mask &= (powersmod[resbyte] >> 21);
! 1892: if (DEBUGLEVEL >= 5)
! 1893: fprintferr("\t 71: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 1894: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 1895: if (!*mask) { avma=av; return 0; }
! 1896: }
! 1897:
! 1898: /* priority to higher powers -- if we have a 21st, it'll be easier to
! 1899: rediscover that its 7th root is a cube than that its cube root is
! 1900: a 7th power */
! 1901: if ((resbyte = *mask & 4)) /* assignment */
! 1902: exponent = 7;
! 1903: else if ((resbyte = *mask & 2))
! 1904: exponent = 5;
! 1905: else
! 1906: { resbyte = 1; exponent = 3; }
! 1907: /* leave that mask bit on for the moment, we might need it for a
! 1908: subsequent call */
! 1909:
! 1910: /* precision in the following is one extra significant word (overkill) */
! 1911: y=ground(gpow(x, ginv(stoi(exponent)), lgx));
! 1912: if (!egalii(gpowgs(y, exponent), x))
! 1913: {
! 1914: if (DEBUGLEVEL >= 5)
! 1915: {
! 1916: if (exponent == 3)
! 1917: fprintferr("\tBut it nevertheless wasn't a cube.\n");
! 1918: else
! 1919: fprintferr("\tBut it nevertheless wasn't a %ldth power.\n",
! 1920: exponent);
! 1921: }
! 1922: *mask &= ~resbyte; /* _now_ turn the bit off */
! 1923: avma=av; return 0;
! 1924: }
! 1925: /* caller (ifac_crack() below) will report the final result if it was
! 1926: a pure power, so no further diagnostics here */
! 1927:
! 1928: tetpil=avma;
! 1929: if (!pt) { avma=av; return exponent; } /* this branch not used */
! 1930: *pt=gerepile(av,tetpil,icopy(y));
! 1931: return exponent;
! 1932: }
! 1933:
! 1934: /***********************************************************************/
! 1935: /** **/
! 1936: /** FACTORIZATION (master iteration) **/
! 1937: /** Driver for the various methods of finding large factors **/
! 1938: /** (after trial division has cast out the very small ones). **/
! 1939: /** GN1998Jun24--30 **/
! 1940: /** **/
! 1941: /***********************************************************************/
! 1942:
! 1943: /** Direct use:
! 1944: ** ifac_start() registers a number (without prime factors < 100)
! 1945: ** with the iterative factorizer, and also registers whether or
! 1946: ** not we should terminate early if we find that the number is
! 1947: ** not squarefree, and a hint about which method(s) to use. This
! 1948: ** must always be called first. The input _must_ have been checked
! 1949: ** to be composite by the caller. The routine immediately tries
! 1950: ** to decompose it nontrivially into a product of two factors,
! 1951: ** except in squarefreeness (`Moebius') mode.
! 1952: ** ifac_primary_factor() returns a prime divisor (not necessarily
! 1953: ** the smallest) and the corresponding exponent. */
! 1954:
! 1955: /** Encapsulated user interface:
! 1956: ** ifac_decomp() does the right thing for auxdecomp() (put a succession
! 1957: ** of prime divisor / exponent pairs onto the stack, not necessarily
! 1958: ** sorted, although in practice they will tend not to be too far from
! 1959: ** the correct order).
! 1960: **
! 1961: ** For each of the additive/multiplicative arithmetic functions, there is
! 1962: ** a `contributor' below, to be called on any large composite cofactor
! 1963: ** left over after trial division by small primes, whose result can then
! 1964: ** be added to or multiplied with whatever we already have:
! 1965: ** ifac_moebius() ifac_issquarefree() ifac_totient() ifac_omega()
! 1966: ** ifac_bigomega() ifac_numdiv() ifac_sumdiv() ifac_sumdivk() */
! 1967:
! 1968: /* We never test whether the input number is prime or composite, since
! 1969: presumably it will have come out of the small factors finder stage
! 1970: (which doesn't really exist yet but which will test the left-over
! 1971: cofactor for primality once it does). */
! 1972:
! 1973: /* The data structure in which we preserve whatever we know at any given
! 1974: time about our number N is kept on the PARI stack, and updated as needed.
! 1975: This makes the machinery re-entrant (you can have more than one fac-
! 1976: torization using ifac_start()/ifac_primary_factor() in progress simul-
! 1977: taneously so long as you preserve the GEN across garbage collections),
! 1978: and which avoids memory leaks when a lengthy factorization is interrupted.
! 1979: We also make an effort to keep the whole affair connected, and the parent
! 1980: object will always be older than its children. This may in rare cases
! 1981: lead to some extra copying around, and knowing what is garbage at any
! 1982: given time is not entirely trivial. See below for examples how to do
! 1983: it right. (Connectedness can be destroyed if callers of ifac_main()
! 1984: create other stuff on the stack in between calls. This is harmless
! 1985: as long as ifac_realloc() is used to re-create a connected object at
! 1986: the head of the stack just before collecting garbage.) */
! 1987:
! 1988: /* Note that a PARI integer can have hundreds of millions of distinct prime
! 1989: factors larger than 2^16, given enough memory. And since there's no
! 1990: guarantee that we will find factors in order of increasing size, we must
! 1991: be prepared to drag a very large amount of data around (although this
! 1992: will _very_ rarely happen for random input!). So we start with a small
! 1993: structure and extend it when necessary. */
! 1994:
! 1995: /* The idea of data structure and algorithm is:
! 1996: Let N0 be whatever is currently left of N after dividing off all the
! 1997: prime powers we have already returned to the caller. Then we maintain
! 1998: N0 as a product
! 1999: (1) N0 = \prod_i P_i^{e_i} * \prod_j Q_j^{f_j} * \prod_k C_k^{g_k}
! 2000: where the P_i and Q_j are distinct primes, each C_k is known composite,
! 2001: none of the P_i divides any C_k, and we also know the total ordering
! 2002: of all the P_i, Q_j and C_k (in particular, we will never try to divide
! 2003: a C_k by a larger Q_j). Some of the C_k may have common factors, although
! 2004: this will not often be the case. */
! 2005:
! 2006: /* Caveat implementor: Taking gcds among C_k's is very likely to cost at
! 2007: least as much time as dividing off any primes as we find them, and book-
! 2008: keeping would be a nightmare (since D=gcd(C_1,C_2) can still have common
! 2009: factors with both C_1/D and C_2/D, and so on...). */
! 2010:
! 2011: /* At startup, we just initialize the structure to
! 2012: (2) N = C_1^1 (composite). */
! 2013:
! 2014: /* Whenever ifac_primary_factor() or ifac_decomp() (or, mutatis mutandis,
! 2015: one of the three arithmetic user interface routines) needs a primary
! 2016: factor, and the smallest thing in our list is P_1, we return that and
! 2017: its exponent, and remove it from our list.
! 2018: (When nothing is left, we return a sentinel value -- gun. And in Moebius
! 2019: mode, when we see something with exponent > 1, whether prime or composite,
! 2020: we yell at our caller by returning gzero or 0, depending on the function).
! 2021: In all other cases, ifac_main() iterates the following steps until we have
! 2022: a P_1 in the smallest position. */
! 2023:
! 2024: /* When the smallest item is C_1 (as it is initially):
! 2025: (3.1) Crack C_1 into a nontrivial product U_1 * U_2 by whatever method
! 2026: comes to mind for this size. (U for `unknown'.) Cracking will detect
! 2027: squares (and biquadrates etc), and it may detect odd powers, so we
! 2028: might instead see a power of some U_1 here, or even something of the form
! 2029: U_1^k*U_2^k. (Of course the exponent already attached to C_1 is taken
! 2030: into account in the following.)
! 2031: (3.2) If we have U_1*U_2, sort the two factors; convert to U_1^2 if they
! 2032: happen to be equal (which they shouldn't -- squares should have been
! 2033: caught at the preceding stage). Note that U_1 and (if it exists) U_2
! 2034: are automatically smaller than anything else in our list.
! 2035: (3.3) Check U_1 (and U_2) for primality, and flag them accordingly.
! 2036: (3.4) Iterate. */
! 2037:
! 2038: /* When the smallest item is Q_1:
! 2039: This is the potentially unpleasant case. The idea is to go through the
! 2040: entire list and try to divide Q_1 off each of the current C_k's, which
! 2041: will usually fail, but may succeed several times. When a division was
! 2042: successful, the corresponding C_k is removed from our list, and the co-
! 2043: factor becomes a U_l for the moment unless it is 1 (which happens when
! 2044: C_k was a power of Q_1). When we're through we upgrade Q_1 to P_1 status,
! 2045: and then do a primality check on each U_l and sort it back into the list
! 2046: either as a Q_j or as a C_k. If during the insertion sort we discover
! 2047: that some U_l equals some P_i or Q_j or C_k we already have, we just add
! 2048: U_l's exponent to that of its twin. (The sorting should therefore happen
! 2049: before the primality test).
! 2050: Note that this may produce one or more elements smaller than the P_1
! 2051: we just confirmed, so we may have to repeat the iteration. */
! 2052:
! 2053: /* There's a little trick that avoids some Q_1 instances. Just after we do
! 2054: a sweep to classify all current unknowns as either composites or primes,
! 2055: we do another downward sweep beginning with the largest current factor
! 2056: and stopping just above the largest current composite. Every Q_j we
! 2057: pass is turned into a P_i. (Different primes are automatically coprime
! 2058: among each other, and primes tend not to divide smaller composites.) */
! 2059:
! 2060: /* (We have no use for comparing the square of a prime to N0. Normally
! 2061: we will get called after casting out only the smallest primes, and
! 2062: since we cannot guarantee that we see the large prime factors in as-
! 2063: cending order, we cannot stop when we find one larger than sqrt(N0).) */
! 2064:
! 2065: /* Data structure: We keep everything in a single t_VEC of t_INTs. The
! 2066: first component records whether we're doing full (NULL) or Moebius (un)
! 2067: factorization; in the latter case many subroutines return a sentinel
! 2068: value as soon as they spot an exponent > 1. The second component records
! 2069: the hint from factorint()'s optional flag, for use by ifac_crack().
! 2070: The remaining components (initially 15) are used in groups of three:
! 2071: a GEN pointer at the t_INT value of the factor, a pointer at the t_INT
! 2072: exponent (usually gun or gdeux so we don't clutter up the stack too
! 2073: much), and another t_INT GEN pointer to record the class of the factor:
! 2074: NULL for unknown, zero for known composite C_k, un for known prime Q_j
! 2075: awaiting trial division, and deux for finished prime P_i. */
! 2076:
! 2077: /* When during the division stage we re-sort a C_k-turned-U_l to a lower
! 2078: position, we rotate any intervening material upward towards its old
! 2079: slot. When a C_k was divided down to 1, its slot is left empty at
! 2080: first; similarly when the re-sorting detects a repeated factor.
! 2081: After the sorting phase, we de-fragment the list and squeeze all the
! 2082: occupied slots together to the high end, so that ifac_crack() has room
! 2083: for new factors. When this doesn't suffice, we abandon the current
! 2084: vector and allocate a somewhat larger one, defragmenting again during
! 2085: copying. */
! 2086:
! 2087: /* (For internal use, note that all exponents will fit into C longs, given
! 2088: PARI's lgefint field size. When we work with them, we sometimes read
! 2089: out the GEN pointer, and sometimes do an itos, whatever is more con-
! 2090: venient for the task at hand.) */
! 2091:
! 2092:
! 2093: /*** Overview and forward declarations: ***/
! 2094:
! 2095: /* The `*where' argument in the following points into *partial at the
! 2096: first of the three fields of the first occupied slot. It's there
! 2097: because the caller would already know where `here' is, so we don't
! 2098: want to search for it again, although it wouldn't take much time.
! 2099: On the other hand, we do not preserve this from one user-interface
! 2100: call to the next. */
! 2101:
! 2102: static GEN
! 2103: ifac_find(GEN *partial, GEN *where);
! 2104: /* Return GEN pointing at the first nonempty slot strictly behind the
! 2105: current *where, or NULL if such doesn't exist. Can be used to skip
! 2106: a range of vacant slots, or to initialize *where in the first place
! 2107: (pass partial in both args). Does not modify its argument pointers. */
! 2108:
! 2109: void
! 2110: ifac_realloc(GEN *partial, GEN *where, long new_lg);
! 2111: /* Move to a larger main vector, updating *where if it points into it.
! 2112: Certainly updates *partial. Can be used as a specialized gcopy before
! 2113: a gerepileupto()/gerepilemanysp() (pass 0 as the new length).
! 2114: Normally, one would pass new_lg=1 to let this function guess the
! 2115: new size. To be used sparingly. */
! 2116:
! 2117: static long
! 2118: ifac_crack(GEN *partial, GEN *where);
! 2119: /* Split the first (composite) entry. There _must_ already be room for
! 2120: another factor below *where, and *where will be updated. Factor and
! 2121: cofactor will be inserted in the correct order, updating *where, or
! 2122: factor^k will be inserted if such should be the case (leaving *where
! 2123: unchanged). The factor or factors will be set to unknown, and inherit
! 2124: the exponent (or a multiple thereof) of its/their ancestor. Returns
! 2125: number of factors written into the structure (normally 2, but 1 if a
! 2126: factor equalled its cofactor, and may be more than 1 if a factoring
! 2127: engine returned a vector of factors instead of a single factor). Can
! 2128: reallocate the data structure in the vector-of-factors case (but not
! 2129: in the more common single-factor case) */
! 2130:
! 2131: static long
! 2132: ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec);
! 2133: /* Gets called to complete ifac_crack()'s job when a factoring engine
! 2134: splits the current factor into a product of three or more new factors.
! 2135: Makes room for them if necessary, sorts them, gives them the right
! 2136: exponents and class etc. Also returns the number of factors actually
! 2137: written, which may be less than the number of components in facvec
! 2138: if there are duplicates.--- Vectors of factors (cf pollardbrent()
! 2139: above) actually contain `slots' of three GENs per factor with the
! 2140: three fields being interpreted exactly as in our partial factorization
! 2141: data structure. Thus `engines' can tell us what they already happen to
! 2142: know about factors being prime or composite and/or appearing to a power
! 2143: larger than the first */
! 2144:
! 2145: static long
! 2146: ifac_divide(GEN *partial, GEN *where);
! 2147: /* Divide all current composites by first (prime, class Q) entry, updating
! 2148: its exponent, and turning it into a finished prime (class P). Return 1
! 2149: if any such divisions succeeded (in Moebius mode, the update may then
! 2150: not have been completed), or 0 if none of them succeeded. Doesn't
! 2151: modify *where. */
! 2152:
! 2153: static long
! 2154: ifac_sort_one(GEN *partial, GEN *where, GEN washere);
! 2155: /* re-sort one (typically unknown) entry from washere to a new position,
! 2156: rotating intervening entries upward to fill the vacant space. It may
! 2157: happen (rarely) that the new position is the same as the old one, or
! 2158: that the new value of the entry coincides with a value already occupying
! 2159: a lower slot, in which latter case we just add exponents (and use the
! 2160: `more known' class, and return 1 immediately when in Moebius mode).
! 2161: The slots between *where and washere must be in sorted order, so a
! 2162: sweep using this to re-sort several unknowns must proceed upward (see
! 2163: ifac_resort() below). Return 1 if we see an exponent > 1 (in Moebius
! 2164: mode without completing the update), 0 otherwise. */
! 2165:
! 2166: static long
! 2167: ifac_resort(GEN *partial, GEN *where);
! 2168: /* sort all current unknowns downward to where they belong. Sweeps
! 2169: in the upward direction. Not needed after ifac_crack(), only when
! 2170: ifac_divide() returned true. May update *where. Returns 1 when an
! 2171: ifac_sort_one() call does so to indicate a repeated factor, or 0 if
! 2172: any and all such calls returned 0 */
! 2173:
! 2174: static void
! 2175: ifac_defrag(GEN *partial, GEN *where);
! 2176: /* defragment: collect and squeeze out any unoccupied slots above *where
! 2177: during a downward sweep. Unoccupied slots arise when a composite factor
! 2178: dissolves completely whilst dividing off a prime, or when ifac_resort()
! 2179: spots a coincidence and merges two factors. *where will be updated */
! 2180:
! 2181: static void
! 2182: ifac_whoiswho(GEN *partial, GEN *where, long after_crack);
! 2183: /* determine primality or compositeness of all current unknowns, and set
! 2184: class Q primes to finished (class P) if everything larger is already
! 2185: known to be prime. When after_crack is nonnegative, only look at the
! 2186: first after_crack things in the list (do nothing when it's zero) */
! 2187:
! 2188: static GEN
! 2189: ifac_main(GEN *partial);
! 2190: /* main loop: iterate until smallest entry is a finished prime; returns
! 2191: a `where' pointer, or gun if nothing left, or gzero in Moebius mode if
! 2192: we aren't squarefree */
! 2193:
! 2194: /* NB In the most common cases, control flows from the user interface to
! 2195: ifac_main() and then to a succession of ifac_crack()s and ifac_divide()s,
! 2196: with (typically) none of the latter finding anything. */
! 2197:
! 2198: /** user interface: **/
! 2199: /* return initial data structure, see ifac_crack() below for semantics
! 2200: of the hint argument */
! 2201: GEN
! 2202: ifac_start(GEN n, long moebius, long hint);
! 2203:
! 2204: /* run main loop until primary factor is found, return the prime and
! 2205: assign the exponent. If nothing left, return gun and set exponent
! 2206: to 0; if in Moebius mode and a square factor is discovered, return
! 2207: gzero and set exponent to 0 */
! 2208: GEN
! 2209: ifac_primary_factor(GEN *partial, long *exponent);
! 2210:
! 2211: /* call ifac_start() and run main loop until factorization is complete,
! 2212: accumulating prime / exponent pairs on the PARI stack to be picked up
! 2213: by aux_end(). Return number of distinct primes found */
! 2214: long
! 2215: ifac_decomp(GEN n, long hint);
! 2216:
! 2217: /* completely encapsulated functions; these call ifac_start() themselves,
! 2218: and ensure proper stack housekeeping etc. Call them on any large
! 2219: composite left over after trial division, and multiply/add the result
! 2220: onto whatever you already have from the small factors. Don't call
! 2221: them on large primes; they will run into trouble */
! 2222: long
! 2223: ifac_moebius(GEN n, long hint);
! 2224:
! 2225: long
! 2226: ifac_issquarefree(GEN n, long hint);
! 2227:
! 2228: long
! 2229: ifac_omega(GEN n, long hint);
! 2230:
! 2231: long
! 2232: ifac_bigomega(GEN n, long hint);
! 2233:
! 2234: GEN
! 2235: ifac_totient(GEN n, long hint); /* for gp's eulerphi() */
! 2236:
! 2237: GEN
! 2238: ifac_numdiv(GEN n, long hint);
! 2239:
! 2240: GEN
! 2241: ifac_sumdiv(GEN n, long hint);
! 2242:
! 2243: GEN
! 2244: ifac_sumdivk(GEN n, long k, long hint);
! 2245:
! 2246: /*** implementation ***/
! 2247:
! 2248: #define ifac_initial_length 24 /* codeword, moebius flag, hint, 7 slots */
! 2249: /* (more than enough in most cases -- a 512-bit product of distinct 8-bit
! 2250: primes needs at most 7 slots at a time) */
! 2251:
! 2252: GEN
! 2253: ifac_start(GEN n, long moebius, long hint)
! 2254: {
! 2255: GEN part, here;
! 2256:
! 2257: if (typ(n) != t_INT) err(typeer, "ifac_start");
! 2258: if (signe(n) == 0)
! 2259: err(talker, "factoring 0 in ifac_start");
! 2260:
! 2261: part = cgetg(ifac_initial_length, t_VEC);
! 2262: here = part + ifac_initial_length;
! 2263: part[1] = moebius? un : (long)NULL;
! 2264: switch(hint)
! 2265: {
! 2266: case 0:
! 2267: part[2] = zero; break;
! 2268: case 1:
! 2269: part[2] = un; break;
! 2270: case 2:
! 2271: part[2] = deux; break;
! 2272: default:
! 2273: part[2] = (long)stoi(hint);
! 2274: }
! 2275: if (isonstack(n))
! 2276: n = absi(n);
! 2277: /* make copy, because we'll later want to mpdivis() into it in place.
! 2278: If it's not on stack, then we assume it is a clone made for us by
! 2279: auxdecomp0(), and we assume the sign has already been set positive */
! 2280: /* fill first slot at the top end */
! 2281: *--here = zero; /* initially composite */
! 2282: *--here = un; /* initial exponent 1 */
! 2283: *--here = (long) n;
! 2284: /* and NULL out the remaining slots */
! 2285: while (here > part + 3) *--here = (long)NULL;
! 2286: return part;
! 2287: }
! 2288:
! 2289: static GEN
! 2290: ifac_find(GEN *partial, GEN *where)
! 2291: {
! 2292: long lgp = lg(*partial);
! 2293: GEN end = *partial + lgp;
! 2294: GEN scan = *where + 3;
! 2295:
! 2296: if (DEBUGLEVEL >= 5)
! 2297: {
! 2298: if (!*partial || typ(*partial) != t_VEC)
! 2299: err(typeer, "ifac_find");
! 2300: if (lg(*partial) < ifac_initial_length)
! 2301: err(talker, "partial impossibly short in ifac_find");
! 2302: if (!(*where) ||
! 2303: *where > *partial + lgp - 3 ||
! 2304: *where < *partial) /* sic */
! 2305: err(talker, "`*where\' out of bounds in ifac_find");
! 2306: }
! 2307: while (scan < end && !*scan) scan += 3;
! 2308: /* paranoia -- check completely NULLed ? nope -- we never inspect the
! 2309: exponent field for deciding whether a slot is empty or occupied */
! 2310: if (scan < end)
! 2311: {
! 2312: if (DEBUGLEVEL >= 5)
! 2313: {
! 2314: if (!scan[1])
! 2315: err(talker, "factor has NULL exponent in ifac_find");
! 2316: }
! 2317: return scan;
! 2318: }
! 2319: return NULL;
! 2320: }
! 2321:
! 2322: /* simple defragmenter */
! 2323: static void
! 2324: ifac_defrag(GEN *partial, GEN *where)
! 2325: {
! 2326: long lgp = lg(*partial);
! 2327: GEN scan_new = *partial + lgp - 3, scan_old = scan_new;
! 2328:
! 2329: while (scan_old >= *where)
! 2330: {
! 2331: if (*scan_old) /* slot occupied? */
! 2332: {
! 2333: if (scan_old < scan_new)
! 2334: {
! 2335: scan_new[2] = scan_old[2];
! 2336: scan_new[1] = scan_old[1];
! 2337: *scan_new = *scan_old;
! 2338: }
! 2339: scan_new -= 3; /* point at next slot to be written */
! 2340: }
! 2341: scan_old -= 3;
! 2342: }
! 2343: scan_new += 3; /* back up to last slot written */
! 2344: *where = scan_new;
! 2345: while (scan_new > *partial + 3)
! 2346: *--scan_new = (long)NULL; /* erase junk */
! 2347: }
! 2348:
! 2349: /* and complex version combined with reallocation. If new_lg is 0, we
! 2350: use the old length, so this acts just like gcopy except that the where
! 2351: pointer is carried along; if it is 1, we make an educated guess.
! 2352: Exception: If new_lg is 0, the vector is full to the brim, and the
! 2353: first entry is composite, we make it longer to avoid being called again
! 2354: a microsecond later (at significant cost).
! 2355: It is safe to call this with NULL for the where argument; if it doesn't
! 2356: point anywhere within the old structure, it will be left alone */
! 2357: void
! 2358: ifac_realloc(GEN *partial, GEN *where, long new_lg)
! 2359: {
! 2360: long old_lg = lg(*partial);
! 2361: GEN newpart, scan_new, scan_old;
! 2362:
! 2363: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
! 2364: {
! 2365: if (!*partial || typ(*partial) != t_VEC)
! 2366: err(typeer, "ifac_realloc");
! 2367: if (lg(*partial) < ifac_initial_length)
! 2368: err(talker, "partial impossibly short in ifac_realloc");
! 2369: }
! 2370:
! 2371: if (new_lg == 1)
! 2372: new_lg = 2*old_lg - 6; /* from 7 slots to 13 to 25... */
! 2373: else if (new_lg <= old_lg) /* includes case new_lg == 0 */
! 2374: {
! 2375: new_lg = old_lg;
! 2376: if ((*partial)[3] && /* structure full */
! 2377: ((*partial)[5]==zero || (*partial)[5]==(long)NULL))
! 2378: /* and first entry composite or unknown */
! 2379: new_lg += 6; /* give it a little more breathing space */
! 2380: }
! 2381: newpart = cgetg(new_lg, t_VEC);
! 2382: if (DEBUGMEM >= 3)
! 2383: {
! 2384: fprintferr("IFAC: new partial factorization structure (%ld slots)\n",
! 2385: (new_lg - 3)/3);
! 2386: flusherr();
! 2387: }
! 2388: newpart[1] = (*partial)[1]; /* moebius */
! 2389: newpart[2] = (*partial)[2]; /* hint */
! 2390: /* downward sweep through the old *partial, picking up where1 and carry-
! 2391: ing it over if and when we pass it. (This will only be useful if
! 2392: it pointed at a non-empty slot.) Factors are licopy()d so that we
! 2393: again have a nice object (parent older than children, connected),
! 2394: except the one factor that may still be living in a clone where n
! 2395: originally was; exponents are similarly copied if they aren't global
! 2396: constants; class-of-factor fields are always global constants so we
! 2397: need only copy them as pointers. Caller may then do a gerepileupto()
! 2398: or a gerepilemanysp() */
! 2399: scan_new = newpart + new_lg - 3;
! 2400: scan_old = *partial + old_lg - 3;
! 2401: for (; scan_old > *partial + 2; scan_old -= 3)
! 2402: {
! 2403: if (*where == scan_old) *where = scan_new;
! 2404: if (!*scan_old) continue; /* skip empty slots */
! 2405:
! 2406: *scan_new =
! 2407: isonstack((GEN)(*scan_old)) ?
! 2408: licopy((GEN)(*scan_old)) : *scan_old;
! 2409: scan_new[1] =
! 2410: isonstack((GEN)(scan_old[1])) ?
! 2411: licopy((GEN)(scan_old[1])) : scan_old[1];
! 2412: scan_new[2] = scan_old[2];
! 2413: scan_new -= 3;
! 2414: }
! 2415: scan_new += 3; /* back up to last slot written */
! 2416: while (scan_new > newpart + 3)
! 2417: *--scan_new = (long)NULL;
! 2418: *partial = newpart;
! 2419: }
! 2420:
! 2421: #define moebius_mode ((*partial)[1])
! 2422:
! 2423: /* Bubble-sort-of-thing sort. Won't be exercised frequently,
! 2424: so this is ok. */
! 2425: static long
! 2426: ifac_sort_one(GEN *partial, GEN *where, GEN washere)
! 2427: {
! 2428: GEN scan = washere - 3;
! 2429: GEN value, exponent, class0, class1;
! 2430: long cmp_res;
! 2431:
! 2432: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
! 2433: {
! 2434: long lgp;
! 2435: if (!*partial || typ(*partial) != t_VEC)
! 2436: err(typeer, "ifac_sort_one");
! 2437: if ((lgp = lg(*partial)) < ifac_initial_length)
! 2438: err(talker, "partial impossibly short in ifac_sort_one");
! 2439: if (!(*where) ||
! 2440: *where < *partial + 3 ||
! 2441: *where > *partial + lgp - 3)
! 2442: err(talker, "`*where\' out of bounds in ifac_sort_one");
! 2443: if (!washere ||
! 2444: washere < *where ||
! 2445: washere > *partial + lgp - 3)
! 2446: err(talker, "`washere\' out of bounds in ifac_sort_one");
! 2447: }
! 2448: value = (GEN)(*washere);
! 2449: exponent = (GEN)(washere[1]);
! 2450: if (exponent != gun && moebius_mode && cmpsi(1,exponent) < 0)
! 2451: return 1; /* should have been detected by caller */
! 2452: class0 = (GEN)(washere[2]);
! 2453:
! 2454: if (scan < *where) return 0; /* nothing to do, washere==*where */
! 2455:
! 2456: cmp_res = -1; /* sentinel */
! 2457: while (scan >= *where) /* therefore at least once */
! 2458: {
! 2459: if (*scan) /* current slot nonempty */
! 2460: {
! 2461: /* check against where */
! 2462: cmp_res = cmpii(value, (GEN)(*scan));
! 2463: if (cmp_res >= 0) break; /* have found where to stop */
! 2464: }
! 2465: /* copy current slot upward by one position and move pointers down */
! 2466: scan[5] = scan[2];
! 2467: scan[4] = scan[1];
! 2468: scan[3] = *scan;
! 2469: scan -= 3;
! 2470: }
! 2471: scan += 3;
! 2472: /* at this point there are the following possibilities:
! 2473: (*) cmp_res == -1. Either value is less than that at *where, or for
! 2474: some reason *where was pointing at one or more vacant slots and any
! 2475: factors we saw en route were larger than value. At any rate,
! 2476: scan == *where now, and scan is pointing at an empty slot, into
! 2477: which we'll stash our entry.
! 2478: (*) cmp_res == 0. The entry at scan-3 is the one, we compare class0
! 2479: fields and add exponents, and put it all into the vacated scan slot,
! 2480: NULLing the one at scan-3 (and possibly updating *where).
! 2481: (*) cmp_res == 1. The slot at scan is the one to store our entry
! 2482: into. */
! 2483: if (cmp_res != 0)
! 2484: {
! 2485: if (cmp_res < 0 && scan != *where)
! 2486: err(talker, "misaligned partial detected in ifac_sort_one");
! 2487: *scan = (long)value;
! 2488: scan[1] = (long)exponent;
! 2489: scan[2] = (long)class0;
! 2490: return 0;
! 2491: }
! 2492: /* case cmp_res == 0: repeated factor detected */
! 2493: if (DEBUGLEVEL >= 4)
! 2494: {
! 2495: fprintferr("IFAC: repeated factor %Z\n\tdetected in ifac_sort_one\n",
! 2496: value);
! 2497: flusherr();
! 2498: }
! 2499: if (moebius_mode) return 1; /* not squarefree */
! 2500: /* if old class0 was composite and new is prime, or vice versa,
! 2501: complain (and if one class0 was unknown and the other wasn't,
! 2502: use the known one) */
! 2503: class1 = (GEN)(scan[-1]);
! 2504: if (class0) /* should never be used */
! 2505: {
! 2506: if(class1)
! 2507: {
! 2508: if (class0 == gzero && class1 != gzero)
! 2509: err(talker, "composite equals prime in ifac_sort_one");
! 2510: else if (class0 != gzero && class1 == gzero)
! 2511: err(talker, "prime equals composite in ifac_sort_one");
! 2512: else if (class0 == gdeux) /* should happen even less */
! 2513: scan[2] = (long)class0; /* use it */
! 2514: }
! 2515: else /* shouldn't happen either */
! 2516: scan[2] = (long)class0; /* use it */
! 2517: }
! 2518: /* else stay with the existing known class0 */
! 2519: scan[2] = (long)class1;
! 2520: /* in any case, add exponents */
! 2521: if (scan[-2] == un && exponent == gun)
! 2522: scan[1] = deux;
! 2523: else
! 2524: scan[1] = laddii((GEN)(scan[-2]), exponent);
! 2525: /* move the value over */
! 2526: *scan = scan[-3];
! 2527: /* null out the vacated slot below */
! 2528: *--scan = (long)NULL;
! 2529: *--scan = (long)NULL;
! 2530: *--scan = (long)NULL;
! 2531: /* finally, see whether *where should be pulled in */
! 2532: if (scan == *where) *where += 3;
! 2533: return 0;
! 2534: }
! 2535:
! 2536: /* the following loop around the former doesn't need to check moebius_mode
! 2537: because ifac_sort_one() never returns 1 in normal mode */
! 2538: static long
! 2539: ifac_resort(GEN *partial, GEN *where)
! 2540: {
! 2541: long lgp = lg(*partial), res = 0;
! 2542: GEN scan = *where;
! 2543:
! 2544: for (; scan < *partial + lgp; scan += 3)
! 2545: {
! 2546: if (*scan && /* slot occupied */
! 2547: !scan[2]) /* with an unknown */
! 2548: {
! 2549: res |= ifac_sort_one(partial, where, scan);
! 2550: if (res) return res; /* early exit */
! 2551: }
! 2552: }
! 2553: return res;
! 2554: }
! 2555:
! 2556: /* sweep downward so we can with luck turn some Qs into Ps */
! 2557: static void
! 2558: ifac_whoiswho(GEN *partial, GEN *where, long after_crack)
! 2559: {
! 2560: long lgp = lg(*partial), larger_compos = 0;
! 2561: GEN scan, scan_end = *partial + lgp - 3;
! 2562:
! 2563: if (DEBUGLEVEL >= 5)
! 2564: {
! 2565: if (!*partial || typ(*partial) != t_VEC)
! 2566: err(typeer, "ifac_whoiswho");
! 2567: if (lg(*partial) < ifac_initial_length)
! 2568: err(talker, "partial impossibly short in ifac_whoiswho");
! 2569: if (!(*where) ||
! 2570: *where > scan_end ||
! 2571: *where < *partial + 3)
! 2572: err(talker, "`*where\' out of bounds in ifac_whoiswho");
! 2573: }
! 2574:
! 2575: if (after_crack == 0) return;
! 2576: if (after_crack > 0)
! 2577: {
! 2578: larger_compos = 1; /* disable Q-to-P trick */
! 2579: scan = *where + 3*(after_crack - 1);
! 2580: /* check at most after_crack entries */
! 2581: if (scan > scan_end) /* ooops... */
! 2582: {
! 2583: err(warner, "avoiding nonexistent factors in ifac_whoiswho");
! 2584: scan = scan_end;
! 2585: }
! 2586: }
! 2587: else { larger_compos = 0; scan = scan_end; }
! 2588:
! 2589: for (; scan >= *where; scan -= 3)
! 2590: {
! 2591: if (scan[2]) /* known class of factor */
! 2592: {
! 2593: if (scan[2] == zero) larger_compos = 1;
! 2594: else if (!larger_compos && scan[2] == un)
! 2595: {
! 2596: if (DEBUGLEVEL >= 3)
! 2597: {
! 2598: fprintferr("IFAC: factor %Z\n\tis prime (no larger composite)\n",
! 2599: **where);
! 2600: fprintferr("IFAC: prime %Z\n\tappears with exponent = %ld\n",
! 2601: **where, itos((GEN)(*where)[1]));
! 2602: }
! 2603: scan[2] = deux;
! 2604: } /* no else case */
! 2605: continue;
! 2606: }
! 2607: scan[2] =
! 2608: (isprime((GEN)(*scan)) ?
! 2609: (larger_compos ? un : deux) : /* un- or finished prime */
! 2610: zero); /* composite */
! 2611:
! 2612: if (scan[2] == zero) larger_compos = 1;
! 2613: if (DEBUGLEVEL >= 3)
! 2614: {
! 2615: fprintferr("IFAC: factor %Z\n\tis %s\n", *scan,
! 2616: (scan[2] == zero ? "composite" : "prime"));
! 2617: }
! 2618: }
! 2619: }
! 2620:
! 2621: /* Here we normally do not check that the first entry is a not-finished
! 2622: prime. Stack management: we may allocate a new exponent */
! 2623: static long
! 2624: ifac_divide(GEN *partial, GEN *where)
! 2625: {
! 2626: long lgp = lg(*partial);
! 2627: GEN scan = *where + 3;
! 2628: long res = 0, exponent, newexp, otherexp;
! 2629:
! 2630: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
! 2631: {
! 2632: if (!*partial || typ(*partial) != t_VEC)
! 2633: err(typeer, "ifac_divide");
! 2634: if (lg(*partial) < ifac_initial_length)
! 2635: err(talker, "partial impossibly short in ifac_divide");
! 2636: if (!(*where) ||
! 2637: *where > *partial + lgp - 3 ||
! 2638: *where < *partial + 3)
! 2639: err(talker, "`*where\' out of bounds in ifac_divide");
! 2640: if ((*where)[2] != un)
! 2641: err(talker, "division by composite or finished prime in ifac_divide");
! 2642: }
! 2643: if (!(**where)) /* always test just this one */
! 2644: err(talker, "division by nothing in ifac_divide");
! 2645:
! 2646: newexp = exponent = itos((GEN)((*where)[1]));
! 2647: if (exponent > 1 && moebius_mode) return 1;
! 2648: /* should've been caught by caller already */
! 2649:
! 2650: /* go for it */
! 2651: for (; scan < *partial + lgp; scan += 3)
! 2652: {
! 2653: if (scan[2] != zero) continue; /* the other thing ain't composite */
! 2654: otherexp = 0;
! 2655: /* let mpdivis divide the other factor in place to keep stack clutter
! 2656: minimal */
! 2657: while (mpdivis((GEN)(*scan), (GEN)(**where), (GEN)(*scan)))
! 2658: {
! 2659: if (moebius_mode) return 1; /* immediately */
! 2660: if (!otherexp) otherexp = itos((GEN)(scan[1]));
! 2661: newexp += otherexp;
! 2662: }
! 2663: if (newexp > exponent) /* did anything happen? */
! 2664: {
! 2665: (*where)[1] = (newexp == 2 ? deux : (long)(stoi(newexp)));
! 2666: exponent = newexp;
! 2667: if (is_pm1((GEN)(*scan))) /* factor dissolved completely */
! 2668: {
! 2669: *scan = scan[1] = (long)NULL;
! 2670: if (DEBUGLEVEL >= 4)
! 2671: fprintferr("IFAC: a factor was a power of another prime factor\n");
! 2672: }
! 2673: else if (DEBUGLEVEL >= 4)
! 2674: {
! 2675: fprintferr("IFAC: a factor was divisible by another prime factor,\n");
! 2676: fprintferr("\tleaving a cofactor = %Z\n", *scan);
! 2677: }
! 2678: scan[2] = (long)NULL; /* at any rate it's Unknown now */
! 2679: res = 1;
! 2680: if (DEBUGLEVEL >= 5)
! 2681: {
! 2682: fprintferr("IFAC: prime %Z\n\tappears at least to the power %ld\n",
! 2683: **where, newexp);
! 2684: }
! 2685: }
! 2686: } /* for */
! 2687: (*where)[2] = deux; /* make it a finished prime */
! 2688: if (DEBUGLEVEL >= 3)
! 2689: {
! 2690: fprintferr("IFAC: prime %Z\n\tappears with exponent = %ld\n",
! 2691: **where, newexp);
! 2692: }
! 2693: return res;
! 2694: }
! 2695:
! 2696:
! 2697: GEN mpqs(GEN N); /* in src/modules/mpqs.c, maybe a dummy,
! 2698: returns a factor, or a vector of factors,
! 2699: or NULL */
! 2700:
! 2701: /* The following takes the place of 2.0.9.alpha's find_factor(). */
! 2702:
! 2703: /* The meaning of the hint changes against 2.0.9.alpha to:
! 2704: hint == 0 : Use our own strategy, and this should be the default
! 2705: hint & 1 : Avoid mpqs(), use ellfacteur() after pollardbrent()
! 2706: hint & 2 : Avoid first-stage ellfacteur() in favour of mpqs()
! 2707: (which may still fall back to ellfacteur() if mpqs() is not installed
! 2708: or gives up)
! 2709: hint & 4 : Avoid even the pollardbrent() stage
! 2710: hint & 8 : Avoid final ellfacteur(); this may `declare' a composite
! 2711: to be prime. */
! 2712:
! 2713: /* stack housekeeping: this routine may create one or more objects (a new
! 2714: factor, or possibly several, and perhaps one or more new exponents > 2) */
! 2715: static long
! 2716: ifac_crack(GEN *partial, GEN *where)
! 2717: {
! 2718: long hint, cmp_res, exp1 = 1, exp2 = 1, av;
! 2719: GEN factor = NULL, exponent;
! 2720:
! 2721: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
! 2722: {
! 2723: long lgp;
! 2724: if (!*partial || typ(*partial) != t_VEC)
! 2725: err(typeer, "ifac_crack");
! 2726: if ((lgp = lg(*partial)) < ifac_initial_length)
! 2727: err(talker, "partial impossibly short in ifac_crack");
! 2728: if (!(*where) ||
! 2729: *where < *partial + 6 || /* sic -- caller must realloc first */
! 2730: *where > *partial + lgp - 3)
! 2731: err(talker, "`*where\' out of bounds in ifac_crack");
! 2732: if (!(**where) || typ((GEN)(**where)) != t_INT)
! 2733: err(typeer, "ifac_crack");
! 2734: if ((*where)[2] != zero)
! 2735: err(talker, "operand not known composite in ifac_crack");
! 2736: }
! 2737: hint = itos((GEN)((*partial)[2])) & 15;
! 2738: exponent = (GEN)((*where)[1]);
! 2739:
! 2740: if (DEBUGLEVEL >= 3)
! 2741: fprintferr("IFAC: cracking composite\n\t%Z\n", **where);
! 2742:
! 2743: /* crack squares. Quite fast due to the initial square residue test */
! 2744: if (DEBUGLEVEL >= 4)
! 2745: fprintferr("IFAC: checking for pure square\n");
! 2746: av = avma;
! 2747: while (carrecomplet((GEN)(**where), &factor))
! 2748: {
! 2749: if (DEBUGLEVEL >= 4)
! 2750: fprintferr("IFAC: found %Z =\n\t%Z ^2\n", **where, factor);
! 2751: affii(factor, (GEN)(**where)); avma = av; factor = NULL;
! 2752: if (exponent == gun)
! 2753: (*where)[1] = deux;
! 2754: else if (exponent == gdeux)
! 2755: { (*where)[1] = (long)stoi(4); av = avma; }
! 2756: else
! 2757: { affii(shifti(exponent, 1), (GEN)((*where)[1])); avma = av; }
! 2758: exponent = (GEN)((*where)[1]);
! 2759: if (moebius_mode) return 0; /* no need to carry on... */
! 2760: exp1 = 2;
! 2761: } /* while carrecomplet */
! 2762:
! 2763: /* check whether our composite hasn't become prime */
! 2764: if (exp1 > 1 && isprime((GEN)(**where)))
! 2765: {
! 2766: (*where)[2] = un;
! 2767: if (DEBUGLEVEL >= 4)
! 2768: {
! 2769: fprintferr("IFAC: factor %Z\n\tis prime\n",**where);
! 2770: flusherr();
! 2771: }
! 2772: return 0; /* bypass subsequent ifac_whoiswho() call */
! 2773: }
! 2774: /* still composite -- carry on */
! 2775:
! 2776: /* MPQS cannot factor prime powers; check for cubes/5th/7th powers.
! 2777: Do this even if MPQS is blocked by hint -- it still serves a useful
! 2778: purpose in bounded factorization */
! 2779: {
! 2780: long mask = 7;
! 2781: if (DEBUGLEVEL == 4)
! 2782: fprintferr("IFAC: checking for odd power\n");
! 2783: /* (At debug levels > 4, is_odd_power() itself prints something more
! 2784: informative) */
! 2785: av = avma;
! 2786: while ((exp1 = /* assignment */
! 2787: is_odd_power((GEN)(**where), &factor, &mask)))
! 2788: {
! 2789: if (exp2 == 1) exp2 = exp1; /* remember this after the loop */
! 2790: if (DEBUGLEVEL >= 4)
! 2791: fprintferr("IFAC: found %Z =\n\t%Z ^%ld\n", **where, factor, exp1);
! 2792: affii(factor, (GEN)(**where)); avma = av; factor = NULL;
! 2793: if (exponent == gun)
! 2794: { (*where)[1] = (long)stoi(exp1); av = avma; }
! 2795: else if (exponent == gdeux)
! 2796: { (*where)[1] = (long)stoi(exp1<<1); av = avma; }
! 2797: else
! 2798: { affii(mulsi(exp1, exponent), (GEN)((*where)[1])); avma = av; }
! 2799: exponent = (GEN)((*where)[1]);
! 2800: if (moebius_mode) return 0; /* no need to carry on... */
! 2801: } /* while is_odd_power */
! 2802:
! 2803: if (exp2 > 1)
! 2804: { /* Something nice has happened */
! 2805: /* check whether our composite hasn't become prime */
! 2806: if (isprime((GEN)(**where)))
! 2807: {
! 2808: (*where)[2] = un;
! 2809: if (DEBUGLEVEL >= 4)
! 2810: {
! 2811: fprintferr("IFAC: factor %Z\n\tis prime\n", **where);
! 2812: flusherr();
! 2813: }
! 2814: return 0; /* bypass subsequent ifac_whoiswho() call */
! 2815: }
! 2816: /* base of power is still composite (an exceedingly rare case),
! 2817: fall through */
! 2818: }
! 2819: } /* odd power stage */
! 2820:
! 2821: /* pollardbrent() Rho usually gets a first chance */
! 2822: if (!(hint & 4))
! 2823: {
! 2824: if (DEBUGLEVEL >= 4)
! 2825: fprintferr("IFAC: trying Pollard-Brent rho method first\n");
! 2826: factor = pollardbrent((GEN)(**where));
! 2827: } /* Rho stage */
! 2828:
! 2829: /* if this didn't work, try one of our high-power beasties */
! 2830: if (!factor && !(hint & 2))
! 2831: {
! 2832: if (DEBUGLEVEL >= 4)
! 2833: fprintferr("IFAC: trying Lenstra-Montgomery ECM\n");
! 2834: factor = ellfacteur((GEN)(**where), 0); /* do not insist */
! 2835: } /* First ECM stage */
! 2836:
! 2837: if (!factor && !(hint & 1))
! 2838: {
! 2839: if (DEBUGLEVEL >= 4)
! 2840: fprintferr("IFAC: trying Multi-Polynomial Quadratic Sieve\n");
! 2841: factor = mpqs((GEN)(**where));
! 2842: } /* MPQS stage */
! 2843:
! 2844: if (!factor)
! 2845: {
! 2846: if (!(hint & 8)) /* still no luck? force it */
! 2847: {
! 2848: if (DEBUGLEVEL >= 4)
! 2849: fprintferr("IFAC: forcing ECM, may take some time\n");
! 2850: factor = ellfacteur((GEN)(**where), 1);
! 2851: } /* final ECM stage, guaranteed to succeed */
! 2852: else /* limited factorization */
! 2853: {
! 2854: if (DEBUGLEVEL >= 2)
! 2855: {
! 2856: err(warner, "IFAC: unfactored composite declared prime");
! 2857: /* don't print it out at level 3 or above, where it would appear
! 2858: several times before and after this message already */
! 2859: if (DEBUGLEVEL == 2)
! 2860: {
! 2861: fprintferr("\t%Z\n",**where);
! 2862: flusherr();
! 2863: }
! 2864: }
! 2865: (*where)[2] = un; /* might as well trial-divide by it... */
! 2866: return 1;
! 2867: }
! 2868: } /* Final ECM stage */
! 2869:
! 2870: if (DEBUGLEVEL >= 1)
! 2871: {
! 2872: if (!factor) /* never reached */
! 2873: err(talker, "all available factoring methods failed in ifac_crack");
! 2874: }
! 2875: if (typ(factor) == t_VEC) /* delegate this case */
! 2876: return ifac_insert_multiplet(partial, where, factor);
! 2877:
! 2878: else if (typ(factor) != t_INT)
! 2879: {
! 2880: fprintferr("IFAC: factorizer returned strange object to ifac_crack\n");
! 2881: outerr(factor);
! 2882: err(bugparier, "factoring");
! 2883: }
! 2884:
! 2885: /* got single integer back: work out the cofactor (in place) */
! 2886: if (!mpdivis((GEN)(**where), factor, (GEN)(**where)))
! 2887: {
! 2888: fprintferr("IFAC: factoring %Z\n", **where);
! 2889: fprintferr("\tyielded `factor\' %Z\n\twhich isn't!\n", factor);
! 2890: err(bugparier, "factoring");
! 2891: }
! 2892:
! 2893: /* the factoring engines report the factor found when DEBUGLEVEL is
! 2894: large enough; let's tell about the cofactor */
! 2895: if (DEBUGLEVEL >= 4)
! 2896: fprintferr("IFAC: cofactor = %Z\n", **where);
! 2897:
! 2898: /* ok, now `factor' is one factor and **where is the other, find out
! 2899: which is larger */
! 2900: cmp_res = cmpii(factor, (GEN)(**where));
! 2901: if (cmp_res < 0) /* common case */
! 2902: {
! 2903: (*where)[2] = (long)NULL; /* mark cofactor `unknown' */
! 2904: (*where)[-1] = (long)NULL; /* mark factor `unknown' */
! 2905: (*where)[-2] =
! 2906: isonstack(exponent) ? licopy(exponent) : (long)exponent;
! 2907: *where -= 3;
! 2908: **where = (long)factor;
! 2909: return 2;
! 2910: }
! 2911: else if (cmp_res == 0) /* hep, split a square in the middle */
! 2912: {
! 2913: err(warner,
! 2914: "square not found by carrecomplet, ifac_crack recovering");
! 2915: cgiv(factor);
! 2916: (*where)[2] = (long)NULL; /* mark the sqrt `unknown' */
! 2917: if (exponent == gun) /* double the exponent */
! 2918: (*where)[1] = deux;
! 2919: else if (exponent == gdeux)
! 2920: (*where)[1] = (long)stoi(4); /* make a new one */
! 2921: else /* overwrite old exponent */
! 2922: {
! 2923: av = avma;
! 2924: affii(shifti(exponent, 1), (GEN)((*where)[1]));
! 2925: avma = av;
! 2926: /* leave *where unchanged */
! 2927: }
! 2928: if (moebius_mode) return 0;
! 2929: return 1;
! 2930: }
! 2931: else /* factor > cofactor, rearrange */
! 2932: {
! 2933: (*where)[2] = (long)NULL; /* mark factor `unknown' */
! 2934: (*where)[-1] = (long)NULL; /* mark cofactor `unknown' */
! 2935: (*where)[-2] =
! 2936: isonstack(exponent) ? licopy(exponent) : (long)exponent;
! 2937: *where -= 3;
! 2938: **where = (*where)[3]; /* move cofactor pointer to lowest slot */
! 2939: (*where)[3] = (long)factor; /* save factor */
! 2940: return 2;
! 2941: }
! 2942: }
! 2943:
! 2944: /* the following doesn't collect garbage; caller's caller should do it
! 2945: (which means ifac_main()). No diagnostics either, the factoring engine
! 2946: should have printed what it found when DEBUGLEVEL>=4 or so. Note facvec
! 2947: contains slots of three components per factor; repeated factors are
! 2948: expressly allowed (and their classes shouldn't contradict each other
! 2949: whereas their exponents will be added up) */
! 2950: static long
! 2951: ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec)
! 2952: {
! 2953: long j,k=1, lfv=lg(facvec)-1, nf=lfv/3, room=(long)(*where-*partial);
! 2954: /* one of the factors will go into the *where slot, so room is now
! 2955: 3 times the number of slots we can use */
! 2956: long needroom = lfv - room;
! 2957: GEN sorted, auxvec = cgetg(nf+1, t_VEC), factor;
! 2958: long exponent = itos((GEN)((*where)[1])); /* the old exponent */
! 2959: GEN newexp;
! 2960:
! 2961: if (DEBUGLEVEL >= 5)
! 2962: fprintferr("IFAC: incorporating set of %ld factors%s\n",
! 2963: nf, (DEBUGLEVEL >=6 ? "..." : ""));
! 2964: if (needroom > 0)
! 2965: ifac_realloc(partial, where, lg(*partial) + needroom + 3);
! 2966: /* one extra slot for paranoia, errm, future use */
! 2967:
! 2968: /* create sort permutation from the values of the factors */
! 2969: for (j=nf; j; j--) auxvec[j] = facvec[3*j-2]; /* just the pointers */
! 2970: sorted = sindexsort(auxvec);
! 2971: /* and readjust the result for the triple spacing */
! 2972: for (j=nf; j; j--) sorted[j] = 3*sorted[j]-2;
! 2973: if (DEBUGLEVEL >= 6)
! 2974: fprintferr("\tsorted them...\n");
! 2975:
! 2976: /* store factors, beginning at *where, and catching any duplicates */
! 2977: **where = facvec[sorted[nf]];
! 2978: if ((newexp = (GEN)(facvec[sorted[nf]+1])) != gun) /* new exponent > 1 */
! 2979: {
! 2980: if (exponent == 1)
! 2981: (*where)[1] = isonstack(newexp) ? licopy(newexp) : (long)newexp;
! 2982: else
! 2983: (*where)[1] = lmulsi(exponent, newexp);
! 2984: } /* if new exponent is 1, the old exponent already in place will do */
! 2985: (*where)[2] = facvec[sorted[nf]+2]; /* copy class */
! 2986: if (DEBUGLEVEL >= 6)
! 2987: fprintferr("\tstored (largest) factor no. %ld...\n", nf);
! 2988:
! 2989: for (j=nf-1; j; j--)
! 2990: {
! 2991: factor = (GEN)(facvec[sorted[j]]);
! 2992: if (egalii(factor, (GEN)(**where)))
! 2993: {
! 2994: if (DEBUGLEVEL >= 6)
! 2995: fprintferr("\tfactor no. %ld is a duplicate%s\n",
! 2996: j, (j>1 ? "..." : ""));
! 2997: /* update exponent, ignore class which would already have been set,
! 2998: and then forget current factor */
! 2999: if ((newexp = (GEN)(facvec[sorted[j]+1])) != gun) /* new exp > 1 */
! 3000: { /* now we have at least 3 */
! 3001: (*where)[1] = laddii((GEN)((*where)[1]),
! 3002: mulsi(exponent, newexp));
! 3003: }
! 3004: else
! 3005: {
! 3006: if ((*where)[1] == un && exponent == 1)
! 3007: (*where)[1] = deux;
! 3008: else
! 3009: (*where)[1] = laddsi(exponent, (GEN)((*where)[1]));
! 3010: /* not safe to add 1 in place -- that might overwrite gdeux,
! 3011: with `interesting' consequences */
! 3012: }
! 3013: if (moebius_mode) return 0; /* stop now, but with exponent updated */
! 3014: continue;
! 3015: }
! 3016: (*where)[-1] = facvec[sorted[j]+2]; /* class as given */
! 3017: if ((newexp = (GEN)(facvec[sorted[j]+1])) != gun) /* new exp > 1 */
! 3018: {
! 3019: if (exponent == 1 && newexp == gdeux)
! 3020: (*where)[-2] = deux;
! 3021: else /* exponent*newexp > 2 */
! 3022: (*where)[-2] = lmulsi(exponent, newexp);
! 3023: }
! 3024: else
! 3025: {
! 3026: (*where)[-2] = (exponent == 1 ? un :
! 3027: (exponent == 2 ? deux :
! 3028: (long)stoi(exponent))); /* inherit parent's exponent */
! 3029: }
! 3030: (*where)[-3] = isonstack(factor) ? licopy(factor) : (long)factor;
! 3031: /* keep components younger than *partial */
! 3032: *where -= 3;
! 3033: k++;
! 3034: if (DEBUGLEVEL >= 6)
! 3035: fprintferr("\tfactor no. %ld was unique%s\n",
! 3036: j, (j>1 ? " (so far)..." : ""));
! 3037: }
! 3038: /* make the `sorted' object safe for garbage collection (probably not a
! 3039: problem, since it should be in the garbage zone from everybody's
! 3040: perspective, but it's easy to do it) */
! 3041: *sorted = evaltyp(t_INT) | evallg(nf+1);
! 3042: return k;
! 3043: }
! 3044:
! 3045: static GEN
! 3046: ifac_main(GEN *partial)
! 3047: {
! 3048: /* leave the basic error checking to ifac_find() */
! 3049: GEN here = ifac_find(partial, partial);
! 3050: long res, nf;
! 3051:
! 3052: /* if nothing left, return gun */
! 3053: if (!here) return gun;
! 3054:
! 3055: /* if we are in Moebius mode and have already detected a repeated factor,
! 3056: stop right here. Shouldn't really happen */
! 3057: if (moebius_mode && here[1] != un)
! 3058: {
! 3059: if (DEBUGLEVEL >= 3)
! 3060: {
! 3061: fprintferr("IFAC: main loop: repeated old factor\n\t%Z\n", *here);
! 3062: flusherr();
! 3063: }
! 3064: return gzero;
! 3065: }
! 3066:
! 3067: /* loop until first entry is a finished prime. May involve reallocations
! 3068: and thus updates of *partial */
! 3069: while (here[2] != deux)
! 3070: {
! 3071: /* if it's unknown, something has gone wrong; try to recover */
! 3072: if (!(here[2]))
! 3073: {
! 3074: err(warner, "IFAC: unknown factor seen in main loop");
! 3075: res = ifac_resort(partial, &here);
! 3076: if (res) return gzero; /* can only happen in Moebius mode */
! 3077: ifac_whoiswho(partial, &here, -1);
! 3078: /* defrag for good measure */
! 3079: ifac_defrag(partial, &here);
! 3080: continue;
! 3081: }
! 3082: /* if it's composite, crack it */
! 3083: if (here[2] == zero)
! 3084: {
! 3085: /* make sure there's room for another factor */
! 3086: if (here < *partial + 6)
! 3087: { /* try defrag first */
! 3088: ifac_defrag(partial, &here);
! 3089: if (here < *partial + 6) /* no luck */
! 3090: {
! 3091: ifac_realloc(partial, &here, 1); /* guaranteed to work */
! 3092: /* Unfortunately, we can't do a garbage collection here since we
! 3093: know too little about where in the stack the old components
! 3094: were. */
! 3095: }
! 3096: }
! 3097: nf = ifac_crack(partial, &here);
! 3098: if (moebius_mode && here[1] != un) /* that was a power */
! 3099: {
! 3100: if (DEBUGLEVEL >= 3)
! 3101: {
! 3102: fprintferr("IFAC: main loop: repeated new factor\n\t%Z\n", *here);
! 3103: flusherr();
! 3104: }
! 3105: return gzero;
! 3106: }
! 3107: /* deal with the new unknowns. No resort, since ifac_crack will
! 3108: already have sorted them */
! 3109: ifac_whoiswho(partial, &here, nf);
! 3110: continue;
! 3111: }
! 3112: /* if it's prime but not yet finished, finish it */
! 3113: if (here[2] == un)
! 3114: {
! 3115: res = ifac_divide(partial, &here);
! 3116: if (res)
! 3117: {
! 3118: if (moebius_mode)
! 3119: {
! 3120: if (DEBUGLEVEL >= 3)
! 3121: {
! 3122: fprintferr("IFAC: main loop: another factor was divisible by\n");
! 3123: fprintferr("\t%Z\n", *here); flusherr();
! 3124: }
! 3125: return gzero;
! 3126: }
! 3127: ifac_defrag(partial, &here);
! 3128: (void)(ifac_resort(partial, &here)); /* sort new cofactors down */
! 3129: /* it doesn't matter right now whether this finds a repeated factor,
! 3130: since we never get to this point in Moebius mode */
! 3131: ifac_defrag(partial, &here); /* resort may have created new gaps */
! 3132: ifac_whoiswho(partial, &here, -1);
! 3133: }
! 3134: continue;
! 3135: }
! 3136: /* there are no other cases, never reached */
! 3137: err(talker, "non-existent factor class in ifac_main");
! 3138: } /* while */
! 3139: if (moebius_mode && here[1] != un)
! 3140: {
! 3141: if (DEBUGLEVEL >= 3)
! 3142: {
! 3143: fprintferr("IFAC: after main loop: repeated old factor\n\t%Z\n", *here);
! 3144: flusherr();
! 3145: }
! 3146: return gzero; /* just a safety net */
! 3147: }
! 3148: if (DEBUGLEVEL >= 4)
! 3149: {
! 3150: long nf = (*partial + lg(*partial) - here - 3)/3;
! 3151: if (nf)
! 3152: fprintferr("IFAC: main loop: %ld factor%s left\n",
! 3153: nf, (nf>1 ? "s" : ""));
! 3154: else
! 3155: fprintferr("IFAC: main loop: this was the last factor\n");
! 3156: flusherr();
! 3157: }
! 3158: return here;
! 3159: }
! 3160:
! 3161: /* Caller of the following should worry about stack management, it makes
! 3162: a rather shameless mess :^) */
! 3163: GEN
! 3164: ifac_primary_factor(GEN *partial, long *exponent)
! 3165: {
! 3166: GEN here = ifac_main(partial);
! 3167: GEN res;
! 3168:
! 3169: if (here == gun) { *exponent = 0; return gun; }
! 3170: else if (here == gzero) { *exponent = 0; return gzero; }
! 3171:
! 3172: res = icopy((GEN)(*here));
! 3173: *exponent = itos((GEN)(here[1]));
! 3174: here[2] = here[1] = *here = (long)NULL;
! 3175: return res;
! 3176: }
! 3177:
! 3178: /* encapsulated routines */
! 3179:
! 3180: /* prime/exponent pairs need to appear contiguously on the stack, but we
! 3181: also need to have our data structure somewhere, and we don't know in
! 3182: advance how many primes will turn up. The following discipline achieves
! 3183: this: When ifac_decomp() is called, n should point at an object older
! 3184: than the oldest small prime/exponent pair (auxdecomp0() guarantees
! 3185: this easily since it mpdivis()es any divisors it discovers off its own
! 3186: copy of the original N). We allocate sufficient space to accommodate
! 3187: several pairs -- eleven pairs ought to fit in a space not much larger
! 3188: than n itself -- before calling ifac_start(). If we manage to complete
! 3189: the factorization before we run out of space, we free the data structure
! 3190: and cull the excess reserved space before returning. When we do run out,
! 3191: we have to leapfrog to generate more (guesstimating the requirements
! 3192: from what is left in the partial factorization structure); room for
! 3193: fresh pairs is allocated at the head of the stack, followed by an
! 3194: ifac_realloc() to reconnect the data structure and move it out of the
! 3195: way, followed by a few pointer tweaks to connect the new pairs space
! 3196: to the old one.-- This whole affair translates into a surprisingly
! 3197: compact little routine. */
! 3198:
! 3199: #define ifac_overshoot 64 /* lgefint(n)+64 words reserved */
! 3200:
! 3201: long
! 3202: ifac_decomp(GEN n, long hint)
! 3203: {
! 3204: long tf=lgefint(n), av=avma, lim=stack_lim(av,1);
! 3205: long nb=0;
! 3206: GEN part, here, workspc = new_chunk(tf + ifac_overshoot), pairs = (GEN)av;
! 3207: /* workspc will be doled out by us in pairs of smaller t_INTs */
! 3208: long tetpil = avma; /* remember head of workspc zone */
! 3209:
! 3210: if (!n || typ(n) != t_INT) err(typeer, "ifac_decomp");
! 3211: if (!signe(n) || tf < 3) err(talker, "factoring 0 in ifac_decomp");
! 3212:
! 3213: part = ifac_start(n, 0, hint);
! 3214: here = ifac_main(&part);
! 3215:
! 3216: while (here != gun)
! 3217: {
! 3218: long lf=lgefint((GEN)(*here));
! 3219: if (pairs - workspc < lf + 3) /* out of room, leapfrog */
! 3220: {
! 3221: /* the ifac_realloc() below will clear tetpil - avma words
! 3222: on the stack, which should be about enough for the extra
! 3223: primes we're going to see, and we'll want several more to
! 3224: accommodate further exponents. In most cases, the lf + 3
! 3225: below is pure paranoia, but the factor we're about to copy
! 3226: might be the one sitting off the stack in the original n,
! 3227: so let's play safe */
! 3228: workspc = new_chunk(lf + 3 + ifac_overshoot);
! 3229: ifac_realloc(&part, &here, 0);
! 3230: here = ifac_find(&part, &part);
! 3231: tetpil = (long)workspc;
! 3232: }
! 3233: /* room enough now */
! 3234: nb++;
! 3235: pairs -= lf;
! 3236: *pairs = evaltyp(t_INT) | evallg(lf);
! 3237: affii((GEN)(*here), pairs);
! 3238: pairs -= 3;
! 3239: *pairs = evaltyp(t_INT) | evallg(3);
! 3240: affii((GEN)(here[1]), pairs);
! 3241: here[2] = here[1] = *here = (long)NULL;
! 3242: here = ifac_main(&part);
! 3243: if (low_stack(lim, stack_lim(av,1)))
! 3244: {
! 3245: if(DEBUGMEM>1) err(warnmem,"[2] ifac_decomp");
! 3246: ifac_realloc(&part, &here, 0);
! 3247: part = gerepileupto(tetpil, part);
! 3248: }
! 3249: }
! 3250: avma = (long)pairs;
! 3251: if (DEBUGLEVEL >= 3)
! 3252: {
! 3253: fprintferr("IFAC: found %ld large prime (power) factor%s.\n",
! 3254: nb, (nb>1? "s": ""));
! 3255: flusherr();
! 3256: }
! 3257: return nb;
! 3258: }
! 3259:
! 3260: long
! 3261: ifac_moebius(GEN n, long hint)
! 3262: {
! 3263: long mu=1, av=avma, lim=stack_lim(av,1);
! 3264: GEN part = ifac_start(n, 1, hint);
! 3265: GEN here = ifac_main(&part);
! 3266:
! 3267: while (here != gun && here != gzero)
! 3268: {
! 3269: if (itos((GEN)(here[1])) > 1)
! 3270: { here = gzero; break; } /* shouldn't happen */
! 3271: mu = -mu;
! 3272: here[2] = here[1] = *here = (long)NULL;
! 3273: here = ifac_main(&part);
! 3274: if (low_stack(lim, stack_lim(av,1)))
! 3275: {
! 3276: if(DEBUGMEM>1) err(warnmem,"ifac_moebius");
! 3277: ifac_realloc(&part, &here, 0);
! 3278: part = gerepileupto(av, part);
! 3279: }
! 3280: }
! 3281: avma = av;
! 3282: return (here == gun ? mu : 0);
! 3283: }
! 3284:
! 3285: long
! 3286: ifac_issquarefree(GEN n, long hint)
! 3287: {
! 3288: long av=avma, lim=stack_lim(av,1);
! 3289: GEN part = ifac_start(n, 1, hint);
! 3290: GEN here = ifac_main(&part);
! 3291:
! 3292: while (here != gun && here != gzero)
! 3293: {
! 3294: if (itos((GEN)(here[1])) > 1)
! 3295: { here = gzero; break; } /* shouldn't happen */
! 3296: here[2] = here[1] = *here = (long)NULL;
! 3297: here = ifac_main(&part);
! 3298: if (low_stack(lim, stack_lim(av,1)))
! 3299: {
! 3300: if(DEBUGMEM>1) err(warnmem,"ifac_issquarefree");
! 3301: ifac_realloc(&part, &here, 0);
! 3302: part = gerepileupto(av, part);
! 3303: }
! 3304: }
! 3305: avma = av;
! 3306: return (here == gun ? 1 : 0);
! 3307: }
! 3308:
! 3309: long
! 3310: ifac_omega(GEN n, long hint)
! 3311: {
! 3312: long omega=0, av=avma, lim=stack_lim(av,1);
! 3313: GEN part = ifac_start(n, 0, hint);
! 3314: GEN here = ifac_main(&part);
! 3315:
! 3316: while (here != gun)
! 3317: {
! 3318: omega++;
! 3319: here[2] = here[1] = *here = (long)NULL;
! 3320: here = ifac_main(&part);
! 3321: if (low_stack(lim, stack_lim(av,1)))
! 3322: {
! 3323: if(DEBUGMEM>1) err(warnmem,"ifac_omega");
! 3324: ifac_realloc(&part, &here, 0);
! 3325: part = gerepileupto(av, part);
! 3326: }
! 3327: }
! 3328: avma = av;
! 3329: return omega;
! 3330: }
! 3331:
! 3332: long
! 3333: ifac_bigomega(GEN n, long hint)
! 3334: {
! 3335: long Omega=0, av=avma, lim=stack_lim(av,1);
! 3336: GEN part = ifac_start(n, 0, hint);
! 3337: GEN here = ifac_main(&part);
! 3338:
! 3339: while (here != gun)
! 3340: {
! 3341: Omega += itos((GEN)(here[1]));
! 3342: here[2] = here[1] = *here = (long)NULL;
! 3343: here = ifac_main(&part);
! 3344: if (low_stack(lim, stack_lim(av,1)))
! 3345: {
! 3346: if(DEBUGMEM>1) err(warnmem,"ifac_bigomega");
! 3347: ifac_realloc(&part, &here, 0);
! 3348: part = gerepileupto(av, part);
! 3349: }
! 3350: }
! 3351: avma = av;
! 3352: return Omega;
! 3353: }
! 3354:
! 3355: GEN
! 3356: ifac_totient(GEN n, long hint)
! 3357: {
! 3358: GEN res = cgeti(lgefint(n));
! 3359: long exponent, av=avma, tetpil, lim=stack_lim(av,1);
! 3360: GEN phi = gun;
! 3361: GEN part = ifac_start(n, 0, hint);
! 3362: GEN here = ifac_main(&part);
! 3363:
! 3364: while (here != gun)
! 3365: {
! 3366: phi = mulii(phi, addsi(-1, (GEN)(*here)));
! 3367: if (here[1] != un)
! 3368: {
! 3369: if (here[1] == deux)
! 3370: {
! 3371: phi = mulii(phi, (GEN)(*here));
! 3372: }
! 3373: else
! 3374: {
! 3375: exponent = itos((GEN)(here[1]));
! 3376: phi = mulii(phi, gpowgs((GEN)(*here), exponent-1));
! 3377: }
! 3378: }
! 3379: here[2] = here[1] = *here = (long)NULL;
! 3380: here = ifac_main(&part);
! 3381: if (low_stack(lim, stack_lim(av,1)))
! 3382: {
! 3383: GEN *gsav[2];
! 3384: if(DEBUGMEM>1) err(warnmem,"ifac_totient");
! 3385: tetpil = avma;
! 3386: ifac_realloc(&part, &here, 0);
! 3387: phi = icopy(phi);
! 3388: gsav[0] = φ gsav[1] = ∂
! 3389: gerepilemanysp(av, tetpil, gsav, 2);
! 3390: /* don't try to preserve here, safer to pick it up again
! 3391: (and ifac_find does a lot of sanity checking at high
! 3392: debuglevels) */
! 3393: here = ifac_find(&part, &part);
! 3394: }
! 3395: }
! 3396: affii(phi, res);
! 3397: avma = av;
! 3398: return res;
! 3399: }
! 3400:
! 3401: GEN
! 3402: ifac_numdiv(GEN n, long hint)
! 3403: {
! 3404: /* we don't preallocate since it's too hard to guess the right
! 3405: size here */
! 3406: GEN res;
! 3407: long av=avma, tetpil, lim=stack_lim(av,1);
! 3408: GEN exponent, tau = gun;
! 3409: GEN part = ifac_start(n, 0, hint);
! 3410: GEN here = ifac_main(&part);
! 3411:
! 3412: while (here != gun)
! 3413: {
! 3414: exponent = (GEN)(here[1]);
! 3415: tau = mulii(tau, addsi(1, exponent));
! 3416: here[2] = here[1] = *here = (long)NULL;
! 3417: here = ifac_main(&part);
! 3418: if (low_stack(lim, stack_lim(av,1)))
! 3419: {
! 3420: GEN *gsav[2];
! 3421: if(DEBUGMEM>1) err(warnmem,"ifac_numdiv");
! 3422: tetpil = avma;
! 3423: ifac_realloc(&part, &here, 0);
! 3424: tau = icopy(tau);
! 3425: gsav[0] = τ gsav[1] = ∂
! 3426: gerepilemanysp(av, tetpil, gsav, 2);
! 3427: /* (see ifac_totient()) */
! 3428: here = ifac_find(&part, &part);
! 3429: }
! 3430: }
! 3431: tetpil = avma;
! 3432: res = icopy(tau);
! 3433: return gerepile(av, tetpil, res);
! 3434: }
! 3435:
! 3436: GEN
! 3437: ifac_sumdiv(GEN n, long hint)
! 3438: {
! 3439: /* don't preallocate */
! 3440: GEN res;
! 3441: long exponent, av=avma, tetpil, lim=stack_lim(av,1);
! 3442: GEN contrib, sigma = gun;
! 3443: GEN part = ifac_start(n, 0, hint);
! 3444: GEN here = ifac_main(&part);
! 3445:
! 3446: while (here != gun)
! 3447: {
! 3448: exponent = itos((GEN)(here[1]));
! 3449: contrib = addsi(1, (GEN)(*here));
! 3450: for (; exponent > 1; exponent--)
! 3451: contrib = addsi(1, mulii((GEN)(*here), contrib));
! 3452: sigma = mulii(sigma, contrib);
! 3453: here[2] = here[1] = *here = (long)NULL;
! 3454: here = ifac_main(&part);
! 3455: if (low_stack(lim, stack_lim(av,1)))
! 3456: {
! 3457: GEN *gsav[2];
! 3458: if(DEBUGMEM>1) err(warnmem,"ifac_sumdiv");
! 3459: tetpil = avma;
! 3460: ifac_realloc(&part, &here, 0);
! 3461: sigma = icopy(sigma);
! 3462: gsav[0] = σ gsav[1] = ∂
! 3463: gerepilemanysp(av, tetpil, gsav, 2);
! 3464: /* (see ifac_totient()) */
! 3465: here = ifac_find(&part, &part);
! 3466: }
! 3467: }
! 3468: tetpil = avma;
! 3469: res = icopy(sigma);
! 3470: return gerepile(av, tetpil, res);
! 3471: }
! 3472:
! 3473: /* k should be positive, and indeed it had better be > 1 (not checked).
! 3474: The calling function knows what to do with the other cases. */
! 3475: GEN
! 3476: ifac_sumdivk(GEN n, long k, long hint)
! 3477: {
! 3478: /* don't preallocate */
! 3479: GEN res;
! 3480: long exponent, av=avma, tetpil, lim=stack_lim(av,1);
! 3481: GEN contrib, q, sigma = gun;
! 3482: GEN part = ifac_start(n, 0, hint);
! 3483: GEN here = ifac_main(&part);
! 3484:
! 3485: while (here != gun)
! 3486: {
! 3487: exponent = itos((GEN)(here[1]));
! 3488: q = gpowgs((GEN)(*here), k);
! 3489: contrib = addsi(1, q);
! 3490: for (; exponent > 1; exponent--)
! 3491: contrib = addsi(1, mulii(q, contrib));
! 3492: sigma = mulii(sigma, contrib);
! 3493: here[2] = here[1] = *here = (long)NULL;
! 3494: here = ifac_main(&part);
! 3495: if (low_stack(lim, stack_lim(av,1)))
! 3496: {
! 3497: GEN *gsav[2];
! 3498: if(DEBUGMEM>1) err(warnmem,"ifac_sumdivk");
! 3499: tetpil = avma;
! 3500: ifac_realloc(&part, &here, 0);
! 3501: sigma = icopy(sigma);
! 3502: gsav[0] = σ gsav[1] = ∂
! 3503: gerepilemanysp(av, tetpil, gsav, 2);
! 3504: /* (see ifac_totient()) */
! 3505: here = ifac_find(&part, &part);
! 3506: }
! 3507: }
! 3508: tetpil = avma;
! 3509: res = icopy(sigma);
! 3510: return gerepile(av, tetpil, res);
! 3511: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>