Annotation of OpenXM_contrib/pari-2.2/src/basemath/ifactor1.c, Revision 1.1
1.1 ! noro 1: /* $Id: ifactor1.c,v 1.23 2001/05/23 18:42:21 karim Exp $
! 2:
! 3: Copyright (C) 2000 The PARI group.
! 4:
! 5: This file is part of the PARI/GP package.
! 6:
! 7: PARI/GP is free software; you can redistribute it and/or modify it under the
! 8: terms of the GNU General Public License as published by the Free Software
! 9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
! 10: ANY WARRANTY WHATSOEVER.
! 11:
! 12: Check the License for details. You should have received a copy of it, along
! 13: with the package; see the file 'COPYING'. If not, write to the Free Software
! 14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
! 15:
! 16: /********************************************************************/
! 17: /** **/
! 18: /** INTEGER FACTORIZATION **/
! 19: /** **/
! 20: /********************************************************************/
! 21: #include "pari.h"
! 22:
! 23: /*********************************************************************/
! 24: /** **/
! 25: /** PSEUDO PRIMALITY **/
! 26: /** **/
! 27: /*********************************************************************/
! 28: static GEN sqrt1, sqrt2, t1, t;
! 29: static long r1;
! 30:
! 31: /* The following two internal routines don't restore avma -- the caller
! 32: must do so at the end. */
! 33: static GEN
! 34: init_miller(GEN n)
! 35: {
! 36: if (signe(n) < 0) n = absi(n);
! 37: t=addsi(-1,n); r1=vali(t); t1 = shifti(t,-r1);
! 38: sqrt1=cgeti(lg(t)); sqrt1[1]=evalsigne(0)|evallgefint(2);
! 39: sqrt2=cgeti(lg(t)); sqrt2[1]=evalsigne(0)|evallgefint(2);
! 40: return n;
! 41: }
! 42:
! 43: /* is n strong pseudo-prime for base a ? `End matching' (check for square
! 44: * roots of -1) added by GN */
! 45: /* TODO: If ends do mismatch, then we have factored n, and this information
! 46: should somehow be made available to the factoring machinery. --GN */
! 47: static int
! 48: bad_for_base(GEN n, GEN a)
! 49: {
! 50: long r, av=avma, lim=stack_lim(av,1);
! 51: GEN c2, c = powmodulo(a,t1,n);
! 52:
! 53: if (!is_pm1(c) && !egalii(t,c)) /* go fishing for -1, not for 1 */
! 54: {
! 55: for (r=r1-1; r; r--) /* (this saves one squaring/reduction) */
! 56: {
! 57: c2=c; c=resii(sqri(c),n);
! 58: if (egalii(t,c)) break;
! 59: if (low_stack(lim, stack_lim(av,1)))
! 60: {
! 61: GEN *gsav[2]; gsav[0]=&c; gsav[1]=&c2;
! 62: if(DEBUGMEM>1) err(warnmem,"miller(rabin)");
! 63: gerepilemany(av, gsav, 2);
! 64: }
! 65: }
! 66: if (!r) return 1;
! 67: /* sqrt(-1) seen, compare or remember */
! 68: if (signe(sqrt1)) /* we saw one earlier: compare */
! 69: {
! 70: /* check if too many sqrt(-1)s mod n */
! 71: if (!egalii(c2,sqrt1) && !egalii(c2,sqrt2)) return 1;
! 72: }
! 73: else { affii(c2,sqrt1); affii(subii(n,c2),sqrt2); } /* remember */
! 74: }
! 75: return 0;
! 76: }
! 77:
! 78: /* Miller-Rabin test for k random bases */
! 79: long
! 80: millerrabin(GEN n, long k)
! 81: {
! 82: long r,i,av2, av = avma;
! 83:
! 84: if (!signe(n)) return 0;
! 85: /* If |n| <= 3, check if n = +- 1 */
! 86: if (lgefint(n)==3 && (ulong)(n[2])<=3) return (n[2] != 1);
! 87:
! 88: if (!mod2(n)) return 0;
! 89: n = init_miller(n); av2=avma;
! 90: for (i=1; i<=k; i++)
! 91: {
! 92: do r = smodsi(mymyrand(),n); while (!r);
! 93: if (DEBUGLEVEL > 4)
! 94: fprintferr("Miller-Rabin: testing base %ld\n",
! 95: r);
! 96: if (bad_for_base(n, stoi(r))) { avma=av; return 0; }
! 97: avma=av2;
! 98: }
! 99: avma=av; return 1;
! 100: }
! 101:
! 102: /* As above for k bases taken in pr (i.e not random).
! 103: * We must have |n|>2 and 1<=k<=11 (not checked) or k in {16,17} to select
! 104: * some special sets of bases.
! 105: *
! 106: * By computations of Gerhard Jaeschke, `On strong pseudoprimes to several
! 107: * bases', Math.Comp. 61 (1993), 915--926 (see also Chris Caldwell's Prime
! 108: * Number Pages at http://www.utm.edu/research/primes/prove2.html), we have:
! 109: *
! 110: * k == 4 (bases 2,3,5,7) correctly detects all composites
! 111: * n < 118 670 087 467 == 172243 * 688969 with the single exception of
! 112: * n == 3 215 031 751 == 151 * 751 * 28351,
! 113: *
! 114: * k == 5 (bases 2,3,5,7,11) correctly detects all composites
! 115: * n < 2 152 302 898 747 == 6763 * 10627 * 29947,
! 116: *
! 117: * k == 6 (bases 2,3,...,13) correctly detects all composites
! 118: * n < 3 474 749 660 383 == 1303 * 16927 * 157543,
! 119: *
! 120: * k == 7 (bases 2,3,...,17) correctly detects all composites
! 121: * n < 341 550 071 728 321 == 10670053 * 32010157,
! 122: * and even this limiting value is caught by an end mismatch between bases
! 123: * 2 and 5 (or 5 and 17).
! 124: *
! 125: * Moreover, the four bases chosen at
! 126: *
! 127: * k == 16 (2,13,23,1662803) will correctly detect all composites up
! 128: * to at least 10^12, and the combination at
! 129: *
! 130: * k == 17 (31,73) detects most odd composites without prime factors > 100
! 131: * in the range n < 2^36 (with less than 250 exceptions, indeed with fewer
! 132: * than 1400 exceptions up to 2^42). --GN
! 133: * (DATA TO BE COMPLETED)
! 134: */
! 135: int /* no longer static -- needed in mpqs.c */
! 136: miller(GEN n, long k)
! 137: {
! 138: long r,i,av2, av = avma;
! 139: static long pr[] =
! 140: { 0, 2,3,5,7,11,13,17,19,23,29, 31,73, 2,13,23,1662803UL, };
! 141: long *p;
! 142:
! 143: if (!mod2(n)) return 0;
! 144: if (k==16)
! 145: { /* use smaller (faster) bases if possible */
! 146: if (lgefint(n)==3 && (ulong)(n[2]) < 3215031751UL) p = pr; /* 2,3,5,7 */
! 147: else p = pr+13; /* 2,13,23,1662803 */
! 148: k=4;
! 149: }
! 150: else if (k==17)
! 151: {
! 152: if (lgefint(n)==3 && (ulong)(n[2]) < 1373653) p = pr; /* 2,3 */
! 153: else p = pr+11; /* 31,73 */
! 154: k=2;
! 155: }
! 156: else p = pr; /* 2,3,5,... */
! 157: n = init_miller(n); av2=avma;
! 158: for (i=1; i<=k; i++)
! 159: {
! 160: r = smodsi(p[i],n); if (!r) break;
! 161: if (bad_for_base(n, stoi(r))) { avma = av; return 0; }
! 162: avma=av2;
! 163: }
! 164: avma=av; return 1;
! 165: }
! 166: /***********************************************************************/
! 167: /** **/
! 168: /** Pocklington-Lehmer **/
! 169: /** P-1 primality test **/
! 170: /** Crude implementation BA 2000Apr21 **/
! 171: /***********************************************************************/
! 172:
! 173: /*assume n>=2*/
! 174: static long pl831(GEN N, GEN p)
! 175: {
! 176: ulong ltop=avma,av;
! 177: long a;
! 178: GEN Nmun,Nmunp;
! 179: Nmun=addis(N,-1);
! 180: Nmunp=divii(Nmun,p);
! 181: av=avma;
! 182: for(a=2;;a++)
! 183: {
! 184: GEN b;
! 185: b=powmodulo(stoi(a),Nmunp,N);
! 186: if (gcmp1(powmodulo(b,p,N)))
! 187: {
! 188: GEN g;
! 189: g=mppgcd(addis(b,-1),N);
! 190: if (gcmp1(g))
! 191: {
! 192: avma=ltop;
! 193: return a;
! 194: }
! 195: if (!gegal(g,N))
! 196: {
! 197: avma=ltop;
! 198: return 0;
! 199: }
! 200: }
! 201: else
! 202: {
! 203: avma=ltop;
! 204: return 0;
! 205: }
! 206: avma=av;
! 207: }
! 208: }
! 209: /*
! 210: * flag 0: return gun (prime), gzero (composite)
! 211: * flag 1: return gzero (composite), gun (small prime), matrix (large prime)
! 212: *
! 213: * The matrix has 3 columns, [a,b,c] with
! 214: * a[i] prime factor of N-1,
! 215: * b[i] witness for a[i] as in pl831
! 216: * c[i] plisprime(a[i])
! 217: */
! 218: extern GEN decomp_limit(GEN n, GEN limit);
! 219: GEN
! 220: plisprime(GEN N, long flag)
! 221: {
! 222: ulong ltop=avma;
! 223: long i;
! 224: int eps;
! 225: GEN C,F;
! 226: if ( typ(N) != t_INT ) err(arither1);
! 227: eps = absi_cmp(N,gdeux);
! 228: if (eps<=0) return eps? gzero: gun;
! 229: N = absi(N);
! 230: /* Use Jaeschke results. See above */
! 231: if (miller(N,7))
! 232: { /* compare to 341550071728321 */
! 233: if (cmpii(N, u2toi(0x136a3, 0x52b2c8c1)) < 0) { avma=ltop; return gun; }
! 234: }
! 235: else { avma=ltop; return gzero; }
! 236: F=(GEN)decomp_limit(addis(N,-1),racine(N))[1];
! 237: if (DEBUGLEVEL>=3) fprintferr("P.L.:factor O.K.\n");
! 238: C=cgetg(4,t_MAT);
! 239: C[1]=lgetg(lg(F),t_COL);
! 240: C[2]=lgetg(lg(F),t_COL);
! 241: C[3]=lgetg(lg(F),t_COL);
! 242: for(i=1;i<lg(F);i++)
! 243: {
! 244: long witness;
! 245: GEN p;
! 246: p=(GEN)F[i];
! 247: witness=pl831(N,p);
! 248: if (!witness)
! 249: {
! 250: avma=ltop;
! 251: return gzero;
! 252: }
! 253: mael(C,1,i)=lcopy(p);
! 254: mael(C,2,i)=lstoi(witness);
! 255: mael(C,3,i)=(long)plisprime(p,flag);
! 256: if (gmael(C,3,i)==gzero)
! 257: err(talker,"Sorry false prime number %Z in plisprime",p);
! 258: }
! 259: if (!flag) { avma=ltop; return gun; }
! 260: return gerepileupto(ltop,C);
! 261: }
! 262:
! 263: /***********************************************************************/
! 264: /** **/
! 265: /** PRIMES IN SUCCESSION **/
! 266: /** (abstracted by GN 1998Aug21 mainly for use in ellfacteur() below) **/
! 267: /** **/
! 268: /***********************************************************************/
! 269:
! 270: /* map from prime residue classes mod 210 to their numbers in {0...47}.
! 271: * Subscripts into this array take the form ((k-1)%210)/2, ranging from
! 272: * 0 to 104. Unused entries are 128
! 273: */
! 274: #define NPRC 128 /* non-prime residue class */
! 275:
! 276: static
! 277: unsigned char prc210_no[] =
! 278: {
! 279: 0, NPRC, NPRC, NPRC, NPRC, 1, 2, NPRC, 3, 4, NPRC, /* 21 */
! 280: 5, NPRC, NPRC, 6, 7, NPRC, NPRC, 8, NPRC, 9, /* 41 */
! 281: 10, NPRC, 11, NPRC, NPRC, 12, NPRC, NPRC, 13, 14, NPRC, /* 63 */
! 282: NPRC, 15, NPRC, 16, 17, NPRC, NPRC, 18, NPRC, 19, /* 83 */
! 283: NPRC, NPRC, 20, NPRC, NPRC, NPRC, 21, NPRC, 22, 23, NPRC, /* 105 */
! 284: 24, 25, NPRC, 26, NPRC, NPRC, NPRC, 27, NPRC, NPRC, /* 125 */
! 285: 28, NPRC, 29, NPRC, NPRC, 30, 31, NPRC, 32, NPRC, NPRC, /* 147 */
! 286: 33, 34, NPRC, NPRC, 35, NPRC, NPRC, 36, NPRC, 37, /* 167 */
! 287: 38, NPRC, 39, NPRC, NPRC, 40, 41, NPRC, NPRC, 42, NPRC, /* 189 */
! 288: 43, 44, NPRC, 45, 46, NPRC, NPRC, NPRC, NPRC, 47, /* 209 */
! 289: };
! 290:
! 291: /* map from prime residue classes mod 210 (by number) to their smallest
! 292: * positive representatives
! 293: */
! 294: static
! 295: unsigned char prc210_rp[] =
! 296: {
! 297: 1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79,
! 298: 83, 89, 97, 101, 103, 107, 109, 113, 121, 127, 131, 137, 139, 143, 149,
! 299: 151, 157, 163, 167, 169, 173, 179, 181, 187, 191, 193, 197, 199, 209,
! 300: };
! 301:
! 302: /* first differences of the preceding */
! 303: static
! 304: unsigned char prc210_d1[] =
! 305: {
! 306: 10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6,
! 307: 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6,
! 308: 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2, 10, 2,
! 309: };
! 310:
! 311: GEN
! 312: nextprime(GEN n)
! 313: {
! 314: long rc,rc0,rcd,rcn,av1,av2, av = avma;
! 315:
! 316: if (typ(n) != t_INT) n=gceil(n); /* accept arguments in R --GN */
! 317: if (typ(n) != t_INT) err(arither1);
! 318: if (signe(n) <= 0) { avma=av; return gdeux; }
! 319: if (lgefint(n) <= 3)
! 320: { /* check if n <= 7 */
! 321: ulong k = n[2];
! 322: if (k <= 2) { avma=av; return gdeux; }
! 323: if (k == 3) { avma = av; return stoi(3); }
! 324: if (k <= 5) { avma = av; return stoi(5); }
! 325: if (k <= 7) { avma = av; return stoi(7); }
! 326: }
! 327: /* here n > 7 */
! 328: if (!(mod2(n))) n = addsi(1,n);
! 329: rc = rc0 = smodis(n, 210);
! 330: rcn = (long)(prc210_no[rc0>>1]);
! 331: /* find next prime residue class mod 210 */
! 332: while (rcn == NPRC)
! 333: {
! 334: rc += 2; /* cannot wrap since 209 is coprime */
! 335: rcn = (long)(prc210_no[rc>>1]);
! 336: }
! 337: if (rc > rc0) n = addsi(rc - rc0, n);
! 338: /* now find an actual prime */
! 339: av2 = av1 = avma;
! 340: for(;;)
! 341: {
! 342: if (miller(n,10)) break;
! 343: av1 = avma;
! 344: rcd = prc210_d1[rcn];
! 345: if (++rcn > 47) rcn = 0;
! 346: n = addsi(rcd,n);
! 347: }
! 348: if (av1!=av2) return gerepile(av,av1,n);
! 349: return (av1==av)? icopy(n): n;
! 350: }
! 351:
! 352: GEN
! 353: precprime(GEN n)
! 354: {
! 355: long rc,rc0,rcd,rcn,av1,av2, av = avma;
! 356:
! 357: if (typ(n) != t_INT) n=gfloor(n); /* accept arguments in R --GN */
! 358: if (typ(n) != t_INT) err(arither1);
! 359: if (signe(n)<=0) { avma=av; return gzero; }
! 360: if (lgefint(n) <= 3)
! 361: { /* check if n <= 10 */
! 362: ulong k = n[2];
! 363: if (k <= 1) { avma=av; return gzero; }
! 364: if (k == 2) { avma=av; return gdeux; }
! 365: if (k <= 4) { avma=av; return stoi(3); }
! 366: if (k <= 6) { avma=av; return stoi(5); }
! 367: if (k <= 10) { avma=av; return stoi(7); }
! 368: }
! 369: /* here n >= 11 */
! 370: if (!(mod2(n))) n = addsi(-1,n);
! 371: rc = rc0 = smodis(n, 210);
! 372: rcn = (long)(prc210_no[rc0>>1]);
! 373: /* find last prime residue class mod 210 */
! 374: while (rcn == NPRC)
! 375: {
! 376: rc -= 2; /* cannot wrap since 1 is coprime */
! 377: rcn = (long)(prc210_no[rc>>1]);
! 378: }
! 379: if (rc < rc0) n = addsi(rc - rc0, n);
! 380: /* now find an actual prime */
! 381: av2 = av1 = avma;
! 382: for(;;)
! 383: {
! 384: if (miller(n,10)) break;
! 385: av1 = avma;
! 386: if (rcn == 0)
! 387: { rcd = 2; rcn = 47; }
! 388: else
! 389: rcd = prc210_d1[--rcn];
! 390: n = addsi(-rcd,n);
! 391: }
! 392: if (av1!=av2) return gerepile(av,av1,n);
! 393: return (av1==av)? icopy(n): n;
! 394: }
! 395:
! 396: /* find next single-word prime strictly larger than p. If **d is non-NULL,
! 397: * this will be p + *(*d)++, using the diffptr table. Otherwise imitate
! 398: * nextprime(). Apart from *d, caller must supply a long variable to which
! 399: * rcn points, initialized either to NPRC or to the correct residue class
! 400: * number for the current p; we'll use this to track the current prime
! 401: * residue class mod 210 once we're out of range of the diffptr table, and
! 402: * we'll update it before that if it isn't NPRC. *q is incremented when-
! 403: * ever q!=NULL and we wrap from 209 mod 210 to 1 mod 210; this makes sense
! 404: * only when *rcn already held the correct value. Caller must also supply
! 405: * the second argument for miller(). --GN1998Aug22
! 406: */
! 407: ulong
! 408: snextpr(ulong p, byteptr *d, long *rcn, long *q, long k)
! 409: {
! 410: static ulong pp[] =
! 411: { evaltyp(t_INT)|m_evallg(3), evalsigne(1)|evallgefint(3), 0 };
! 412: static ulong *pp2 = pp + 2;
! 413: static GEN gp = (GEN)pp;
! 414: long d1 = **d, rcn0;
! 415:
! 416: if (d1)
! 417: {
! 418: if (*rcn != NPRC)
! 419: {
! 420: rcn0 = *rcn;
! 421: while (d1 > 0)
! 422: {
! 423: d1 -= prc210_d1[*rcn];
! 424: if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
! 425: }
! 426: if (d1 < 0)
! 427: {
! 428: fprintferr("snextpr: prime %lu wasn\'t %lu mod 210\n",
! 429: p, (ulong)prc210_rp[rcn0]);
! 430: err(bugparier, "[caller of] snextpr");
! 431: }
! 432: }
! 433: return p + *(*d)++;
! 434: }
! 435: /* we are beyond the diffptr table */
! 436: if (*rcn == NPRC) /* we need to initialize this now */
! 437: {
! 438: *rcn = prc210_no[(p % 210) >> 1];
! 439: if (*rcn == NPRC)
! 440: {
! 441: fprintferr("snextpr: %lu should have been prime but isn\'t\n", p);
! 442: err(bugparier, "[caller of] snextpr");
! 443: }
! 444: }
! 445: /* look for the next one */
! 446: *pp2 = p;
! 447: *pp2 += prc210_d1[*rcn];
! 448: if (++*rcn > 47) *rcn = 0;
! 449: while (!miller(gp, k))
! 450: {
! 451: *pp2 += prc210_d1[*rcn];
! 452: if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
! 453: if (*pp2 <= 11) /* wraparound mod 2^BITS_IN_LONG */
! 454: {
! 455: fprintferr("snextpr: integer wraparound after prime %lu\n", p);
! 456: err(bugparier, "[caller of] snextpr");
! 457: }
! 458: }
! 459: return *pp2;
! 460: }
! 461:
! 462:
! 463: /***********************************************************************/
! 464: /** **/
! 465: /** FACTORIZATION (ECM) **/
! 466: /** Integer factorization using the elliptic curves method (ECM). **/
! 467: /** ellfacteur() returns a non trivial factor of N, assuming N>0, **/
! 468: /** is composite, and has no prime divisor below 2^14 or so. **/
! 469: /** Extensively modified by GN Jul-Aug 1998, with much helpful **/
! 470: /** advice by Paul Zimmermann. Thanks also to Guillaume Hanrot **/
! 471: /** and Igor Schein for providing many CPU cycles whilst testing. **/
! 472: /** **/
! 473: /***********************************************************************/
! 474:
! 475: static GEN N, gl, *XAUX;
! 476: #define nbcmax 64 /* max number of simultaneous curves */
! 477: #define bstpmax 1024 /* max number of baby step table entries */
! 478:
! 479: /* addition/doubling/multiplication of a point on an `elliptic curve'
! 480: * mod N may result in one of three things: a new bona fide point,
! 481: * a point at infinity (betraying itself by a denominator divisible
! 482: * by N), or a point which is at infinity mod some nontrivial factor
! 483: * of N but finite mod some other factor (betraying itself by a denom-
! 484: * inator which has nontrivial gcd with N, and this is of course what
! 485: * we want).
! 486: */
! 487: /* (In the second case, addition/doubling will simply abort, copying one
! 488: * of the summands to the destination array of points unless they coincide.
! 489: * Multiplication will stop at some unpredictable intermediate stage: The
! 490: * destination will contain _some_ multiple of the input point, but not
! 491: * necessarily the desired one, which doesn't matter. As long as we're
! 492: * multiplying (B1 phase) we simply carry on with the next multiplier.
! 493: * During the B2 phase, the only additions are the giant steps, and the
! 494: * worst that can happen here is that we lose one residue class mod 210
! 495: * of prime multipliers on 4 of the curves, so again, we ignore the problem
! 496: * and just carry on.) */
! 497: /* The idea is: Select a handful of curves mod N and one point P on each of
! 498: * them. Try to compute, for each such point, the multiple [M]P = Q where
! 499: * M is the product of all powers <= B2 of primes <= nextprime(B1), for some
! 500: * suitably chosen B1 and B2. Then check whether multiplying Q by one of the
! 501: * primes < nextprime(B2) would betray a factor. This second stage proceeds
! 502: * by looking separately at the primes in each residue class mod 210, four
! 503: * curves at a time, and stepping additively to ever larger multipliers,
! 504: * by comparing X coordinates of points which we would need to add in order
! 505: * to reach another prime multiplier in the same residue class. `Comparing'
! 506: * means that we accumulate a product of differences of X coordinates, and
! 507: * from time to time take a gcd of this product with N.
! 508: */
! 509: /* Montgomery's trick of hiding the cost of computing inverses mod N at a
! 510: * price of three extra multiplications mod N, by working on up to 64 or
! 511: * even 128 points in parallel, is used heavily. --GN
! 512: */
! 513:
! 514: /* *** auxiliary functions for ellfacteur: *** */
! 515:
! 516: /* Parallel addition on nbc curves, assigning the result to locations at and
! 517: * following *X3, *Y3. Safe to be called with X3,Y3 equal to X2,Y2 (_not_
! 518: * to X1,Y1). It is also safe to overwrite Y2 with X3. (If Y coords of
! 519: * result not desired, set Y3=NULL.) If nbc1 < nbc, the first summand is
! 520: * assumed to hold only nbc1 distinct points, which are repeated as often
! 521: * as we need them (useful for adding one point on each of a few curves
! 522: * to several other points on the same curves).
! 523: * Return 0 when successful, 1 when we hit a denominator divisible by N,
! 524: * and 2 when gcd(denominator, N) is a nontrivial factor of N, which will
! 525: * be preserved in gl.
! 526: * We use more stack space than the old code did, and thus run a bit of a
! 527: * risk of overflowing it, but it's still bounded by a constant multiple
! 528: * of lgefint(N)*nbc, as it was in the old version --GN1998Jul02,Aug12
! 529: */
! 530: /* (Lessee: Second phase creates 12 items on the stack, per iteration,
! 531: * of which four are twice as long and one is thrice as long as N --
! 532: * makes 18 units per iteration. First phase creates 4 units. Total
! 533: * can be as large as about 4*nbcmax+18*8 units. And elladd2() is just
! 534: * as bad, and elldouble() comes to about 3*nbcmax+29*8 units. A few
! 535: * strategic garbage collections every 8 iterations should help when nbc
! 536: * is large...) --GN1998Aug23
! 537: */
! 538:
! 539: static int
! 540: elladd0(long nbc, long nbc1,
! 541: GEN *X1, GEN *Y1, GEN *X2, GEN *Y2, GEN *X3, GEN *Y3)
! 542: {
! 543: GEN lambda;
! 544: GEN W[2*nbcmax], *A=W+nbc; /* W[0],A[0] never used */
! 545: long i, av=avma, tetpil;
! 546: ulong mask = ~0UL;
! 547:
! 548: /* actually, this is only ever called with nbc1==nbc or nbc1==4, so: */
! 549: if (nbc1 == 4) mask = 3;
! 550: else if (nbc1 < nbc) err(bugparier, "[caller of] elladd0");
! 551:
! 552: /* W[0] = gun; */
! 553: W[1] = /* A[0] =*/ subii(X1[0], X2[0]);
! 554: for (i=1; i<nbc; i++)
! 555: {
! 556: A[i] = subii(X1[i&mask], X2[i]); /* don't waste time reducing mod N here */
! 557: W[i+1] = modii(mulii(A[i], W[i]), N);
! 558: }
! 559: tetpil = avma;
! 560:
! 561: /* if gl != N we have a factor */
! 562: if (!invmod(W[nbc], N, &gl))
! 563: {
! 564: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
! 565: if (X2 != X3)
! 566: {
! 567: long k;
! 568: /* cannot add on one of the curves mod N: make sure X3 contains
! 569: * something useful before letting the caller proceed
! 570: */
! 571: for (k = 2*nbc; k--; ) affii(X2[k],X3[k]);
! 572: }
! 573: avma = av; return 1;
! 574: }
! 575:
! 576: while (i--) /* nbc times, actually */
! 577: {
! 578: lambda = modii(mulii(subii(Y1[i&mask], Y2[i]),
! 579: i?mulii(gl, W[i]):gl), N);
! 580: modiiz(subii(sqri(lambda), addii(X2[i], X1[i&mask])), N, X3[i]);
! 581: if (Y3)
! 582: modiiz(subii(mulii(lambda, subii(X1[i&mask], X3[i])),
! 583: Y1[i&mask]),
! 584: N, Y3[i]);
! 585: if (!i) break;
! 586: gl = modii(mulii(gl, A[i]), N);
! 587: if (!(i&7)) gl = gerepileupto(tetpil, gl);
! 588: }
! 589: avma=av; return 0;
! 590: }
! 591:
! 592: /* Shortcut variant, for use in cases where Y coordinates follow their
! 593: * corresponding X coordinates, and the first summand doesn't need to be
! 594: * repeated
! 595: */
! 596: static int
! 597: elladd(long nbc, GEN *X1, GEN *X2, GEN *X3)
! 598: {
! 599: return elladd0(nbc, nbc, X1, X1+nbc, X2, X2+nbc, X3, X3+nbc);
! 600: }
! 601: /* this could perhaps become a macro --GN */
! 602:
! 603: /* The next one is exactly the same except it does twice as many additions
! 604: * (and thus hides even more of the cost of the modular inverse); the net
! 605: * effect is the same as elladd(nbc,X1,X2,X3) followed by elladd(nbc,X4,X5,X6).
! 606: * Safe to have X2==X3 and/or X5==X6, and of course safe to have X1 or X2
! 607: * coincide with X4 or X5, in any order.
! 608: */
! 609:
! 610: static int
! 611: elladd2(long nbc, GEN *X1, GEN *X2, GEN *X3, GEN *X4, GEN *X5, GEN *X6)
! 612: {
! 613: GEN lambda, *Y1 = X1+nbc, *Y2 = X2+nbc, *Y3 = X3+nbc;
! 614: GEN *Y4 = X4+nbc, *Y5 = X5+nbc, *Y6 = X6+nbc;
! 615: GEN W[4*nbcmax], *A=W+2*nbc; /* W[0],A[0] never used */
! 616: long i,j, av=avma, tetpil;
! 617: /* W[0] = gun; */
! 618: W[1] = /* A[0] =*/ subii(X1[0], X2[0]);
! 619: for (i=1; i<nbc; i++)
! 620: {
! 621: A[i] = subii(X1[i], X2[i]); /* don't waste time reducing mod N here */
! 622: W[i+1] = modii(mulii(A[i], W[i]), N);
! 623: }
! 624: for (j=0; j<nbc; i++,j++)
! 625: {
! 626: A[i] = subii(X4[j], X5[j]);
! 627: W[i+1] = modii(mulii(A[i], W[i]), N);
! 628: }
! 629: tetpil = avma;
! 630:
! 631: /* if gl != N we have a factor */
! 632: if (!invmod(W[2*nbc], N, &gl))
! 633: {
! 634: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
! 635: if (X2 != X3)
! 636: {
! 637: long k;
! 638: /* cannot add on one of the curves mod N: make sure X3 contains
! 639: * something useful before letting the caller proceed
! 640: */
! 641: for (k = 2*nbc; k--; ) affii(X2[k],X3[k]);
! 642: }
! 643: if (X5 != X6)
! 644: {
! 645: long k;
! 646: /* same for X6 */
! 647: for (k = 2*nbc; k--; ) affii(X5[k],X6[k]);
! 648: }
! 649: avma = av; return 1;
! 650: }
! 651:
! 652: while (j--) /* nbc times, actually */
! 653: {
! 654: i--;
! 655: lambda = modii(mulii(subii(Y4[j], Y5[j]),
! 656: mulii(gl, W[i])), N);
! 657: modiiz(subii(sqri(lambda), addii(X5[j], X4[j])), N, X6[j]);
! 658: modiiz(subii(mulii(lambda, subii(X4[j], X6[j])), Y4[j]), N, Y6[j]);
! 659: gl = modii(mulii(gl, A[i]), N);
! 660: if (!(i&7)) gl = gerepileupto(tetpil, gl);
! 661: }
! 662: while (i--) /* nbc times */
! 663: {
! 664: lambda = modii(mulii(subii(Y1[i], Y2[i]),
! 665: i?mulii(gl, W[i]):gl), N);
! 666: modiiz(subii(sqri(lambda), addii(X2[i], X1[i])), N, X3[i]);
! 667: modiiz(subii(mulii(lambda, subii(X1[i], X3[i])), Y1[i]), N, Y3[i]);
! 668: if (!i) break;
! 669: gl = modii(mulii(gl, A[i]), N);
! 670: if (!(i&7)) gl = gerepileupto(tetpil, gl);
! 671: }
! 672: avma=av; return 0;
! 673: }
! 674:
! 675: /* Parallel doubling on nbc curves, assigning the result to locations at
! 676: * and following *X2. Safe to be called with X2 equal to X1. Return
! 677: * value as for elladd() above. If we find a point at infinity mod N,
! 678: * and if X1 != X2, we copy the points at X1 to X2.
! 679: * Use fewer assignments than the old code. Strangely, whereas this gains
! 680: * about 3% on my P133 with elladd(), it makes hardly any difference here
! 681: * with elldouble() --GN
! 682: */
! 683: static int
! 684: elldouble(long nbc, GEN *X1, GEN *X2)
! 685: {
! 686: GEN lambda,v, *Y1 = X1+nbc, *Y2 = X2+nbc;
! 687: GEN W[nbcmax+1]; /* W[0] never used */
! 688: long i, av=avma, tetpil;
! 689: /*W[0] = gun;*/ W[1] = Y1[0];
! 690: for (i=1; i<nbc; i++)
! 691: W[i+1] = modii(mulii(Y1[i], W[i]), N);
! 692: tetpil = avma;
! 693:
! 694: if (!invmod(W[nbc], N, &gl))
! 695: {
! 696: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
! 697: if (X1 != X2)
! 698: {
! 699: long k;
! 700: /* cannot double on one of the curves mod N: make sure X2 contains
! 701: * something useful before letting the caller proceed
! 702: */
! 703: for (k = 2*nbc; k--; ) affii(X1[k],X2[k]);
! 704: }
! 705: avma = av; return 1;
! 706: }
! 707:
! 708: while (i--) /* nbc times, actually */
! 709: {
! 710: lambda = modii(mulii(addsi(1, mulsi(3, sqri(X1[i]))),
! 711: i?mulii(gl,W[i]):gl), N);
! 712: if (signe(lambda)) /* half of zero is still zero */
! 713: lambda = shifti(mod2(lambda)? addii(lambda, N): lambda, -1);
! 714: v = modii(subii(sqri(lambda), shifti(X1[i],1)), N);
! 715: if (i) gl = modii(mulii(gl, Y1[i]), N);
! 716: modiiz(subii(mulii(lambda, subii(X1[i], v)), Y1[i]), N, Y2[i]);
! 717: affii(v, X2[i]);
! 718: if (!(i&7) && i) gl = gerepileupto(tetpil, gl);
! 719: }
! 720: avma = av; return 0;
! 721: }
! 722:
! 723: /* Parallel multiplication by an odd prime k on nbc curves, storing the
! 724: * result to locations at and following *X2. Safe to be called with X2
! 725: * equal to X1. Return values as for elladd() and elldouble().
! 726: * Uses (a simplified variant of) Peter Montgomery's PRAC (PRactical Addition
! 727: * Chain) algorithm; see ftp://ftp.cwi.nl/pub/pmontgom/Lucas.ps.gz .
! 728: * With thanks to Paul Zimmermann for the reference. --GN1998Aug13
! 729: */
! 730:
! 731: /* We use an array of GENs pointed at by XAUX as a scratchpad; this will
! 732: * have been set up by ellfacteur() (so we don't need to reinitialize it
! 733: * each time).
! 734: */
! 735:
! 736: static int
! 737: ellmult(long nbc, ulong k, GEN *X1, GEN *X2) /* k>2 prime, not checked */
! 738: {
! 739: long i,d,e,e1,r,av=avma,tetpil;
! 740: int res;
! 741: GEN *A=X2, *B=XAUX, *S, *T=XAUX+2*nbc;
! 742:
! 743: for (i = 2*nbc; i--; ) { affii(X1[i],XAUX[i]); }
! 744: tetpil = avma;
! 745:
! 746: /* first doubling picks up X1; after this we'll be working in XAUX and
! 747: * X2 only, mostly via A and B and T
! 748: */
! 749: if ((res = elldouble(nbc, X1, X2)) != 0)
! 750: {
! 751: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 752: return res;
! 753: }
! 754:
! 755: /* split the work at the golden ratio */
! 756: r = (long)(k*0.61803398875 + .5);
! 757: d = k - r; e = r - d; /* NB d+e == r, so no danger of ofl below */
! 758:
! 759: while (d != e)
! 760: {
! 761:
! 762: /* apply one of the nine transformations from PM's Table 4. We first
! 763: * figure out which, and then go into an eight-way switch, because
! 764: * some of the transformations are similar enough to share code.
! 765: */
! 766: if (d <= e + (e>>2)) /* floor(1.25*e) */
! 767: {
! 768: if ((d+e)%3 == 0)
! 769: { i = 0; goto apply; } /* Table 4, rule 1 */
! 770: else if ((d-e)%6 == 0)
! 771: { i = 1; goto apply; } /* rule 2 */
! 772: /* else fall through */
! 773: }
! 774: if ((d+3)>>2 <= e) /* equiv to d <= 4*e but cannot ofl */
! 775: { i = 2; goto apply; } /* rule 3, the most common case */
! 776: if ((d&1)==(e&1))
! 777: { i = 1; goto apply; } /* rule 4, which does the same as rule 2 */
! 778: if (!(d&1))
! 779: { i = 3; goto apply; } /* rule 5 */
! 780: if (d%3 == 0)
! 781: { i = 4; goto apply; } /* rule 6 */
! 782: if ((d+e)%3 == 0)
! 783: { i = 5; goto apply; } /* rule 7 */
! 784: if ((d-e)%3 == 0)
! 785: { i = 6; goto apply; } /* rule 8 */
! 786: /* when we get here, e must be even, for otherwise one of rules 4,5
! 787: * would have applied
! 788: */
! 789: i = 7; /* rule 9 */
! 790:
! 791: apply:
! 792: switch(i) /* i takes values in {0,...,7} here */
! 793: {
! 794: case 0: /* rule 1 */
! 795: e1 = d - e; d = (d + e1)/3; e = (e - e1)/3;
! 796: if ((res = elladd(nbc, A, B, T)) != 0)
! 797: {
! 798: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 799: return res;
! 800: }
! 801: if ((res = elladd2(nbc, T, A, A, T, B, B)) != 0)
! 802: {
! 803: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 804: return res;
! 805: }
! 806: break; /* end of rule 1 */
! 807: case 1: /* rules 2 and 4, part 1 */
! 808: d -= e;
! 809: if ((res = elladd(nbc, A, B, B)) != 0)
! 810: {
! 811: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 812: return res;
! 813: }
! 814: /* FALL THROUGH */
! 815: case 3: /* rule 5, and 2nd part of rules 2 and 4 */
! 816: d >>= 1;
! 817: if ((res = elldouble(nbc, A, A)) != 0)
! 818: {
! 819: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 820: return res;
! 821: }
! 822: break; /* end of rules 2, 4, and 5 */
! 823: case 4: /* rule 6 */
! 824: d /= 3;
! 825: if ((res = elldouble(nbc, A, T)) != 0)
! 826: {
! 827: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 828: return res;
! 829: }
! 830: if ((res = elladd(nbc, T, A, A)) != 0)
! 831: {
! 832: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 833: return res;
! 834: }
! 835: /* FALL THROUGH */
! 836: case 2: /* rule 3, and 2nd part of rule 6 */
! 837: d -= e;
! 838: if ((res = elladd(nbc, A, B, B)) != 0)
! 839: {
! 840: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 841: return res;
! 842: }
! 843: break; /* end of rules 3 and 6 */
! 844: case 5: /* rule 7 */
! 845: d = (d - e - e)/3;
! 846: if ((res = elldouble(nbc, A, T)) != 0)
! 847: {
! 848: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 849: return res;
! 850: }
! 851: if ((res = elladd2(nbc, T, A, A, T, B, B)) != 0)
! 852: {
! 853: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 854: return res;
! 855: }
! 856: break; /* end of rule 7 */
! 857: case 6: /* rule 8 */
! 858: d = (d - e)/3;
! 859: if ((res = elladd(nbc, A, B, B)) != 0)
! 860: {
! 861: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 862: return res;
! 863: }
! 864: if ((res = elldouble(nbc, A, T)) != 0)
! 865: {
! 866: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 867: return res;
! 868: }
! 869: if ((res = elladd(nbc, T, A, A)) != 0)
! 870: {
! 871: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 872: return res;
! 873: }
! 874: break; /* end of rule 8 */
! 875: case 7: /* rule 9 */
! 876: e >>= 1;
! 877: if ((res = elldouble(nbc, B, B)) != 0)
! 878: {
! 879: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 880: return res;
! 881: }
! 882: break; /* end of rule 9 */
! 883: default: /* never reached */
! 884: break;
! 885: }
! 886: /* end of Table 4 processing */
! 887:
! 888: /* swap d <-> e and A <-> B if necessary */
! 889: if (d < e) { r = d; d = e; e = r; S = A; A = B; B = S; }
! 890: } /* while */
! 891: if ((res = elladd(nbc, XAUX, X2, X2)) != 0)
! 892: {
! 893: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
! 894: return res;
! 895: }
! 896: avma = av; return 0;
! 897: }
! 898:
! 899: /* PRAC implementation notes - main changes against the paper version:
! 900: * (1) The general function [m+n]P = f([m]P,[n]P,[m-n]P) collapses (for
! 901: * m!=n) to an elladd() which does not depend on the third argument; and
! 902: * thus all references to the third variable (C in the paper) can be elimi-
! 903: * nated. (2) Since our multipliers are prime, the outer loop of the paper
! 904: * version executes only once, and thus is invisible above. (3) The first
! 905: * step in the inner loop of the paper version will always be rule 3, but
! 906: * the addition requested by this rule amounts to a doubling, and it will
! 907: * always be followed by a swap, so we have unrolled this first iteration.
! 908: * (4) Some simplifications in rules 6 and 7 are possible given the above,
! 909: * and we can save one addition in each of the two cases. NB one can show
! 910: * that none of the other elladd()s in the loop can ever turn out to de-
! 911: * generate into an elldouble. (5) I tried to optimize for rule 3, which
! 912: * is used far more frequently than all others together, but it didn't
! 913: * improve things, so I removed the nested tight loop again. --GN
! 914: */
! 915:
! 916: /* The main loop body of ellfacteur() runs slightly _slower_ under PRAC than
! 917: * under a straightforward left-shift binary multiplication algorithm when
! 918: * N has <30 digits and B1 is small; PRAC wins when N and B1 get larger.
! 919: * Weird. --GN
! 920: */
! 921:
! 922: /* memory layout in ellfacteur(): We'll have a large-ish array of GEN
! 923: * pointers, and one huge chunk of memory containing all the actual GEN
! 924: * (t_INT) objects.
! 925: * nbc will be held constant throughout the invocation.
! 926: */
! 927: /* The B1 stage of each iteration through the main loop needs little
! 928: * space: enough for the X and Y coordinates of the current points,
! 929: * and twice as much again as scratchpad for ellmult().
! 930: */
! 931: /* The B2 stage, starting from some current set of points Q, needs, in
! 932: * succession:
! 933: * - space for [2]Q, [4]Q, ..., [10]Q, and [p]Q for building the helix;
! 934: * - space for 48*nbc X and Y coordinates to hold the helix. Now this
! 935: * could re-use [2]Q,...,[8]Q, but only with difficulty, since we don't
! 936: * know in advance which residue class mod 210 our p is going to be in.
! 937: * It can and should re-use [p]Q, though;
! 938: * - space for (temporarily [30]Q and then) [210]Q, [420]Q, and several
! 939: * further doublings until the giant step multiplier is reached. This
! 940: * _can_ re-use the remaining cells from above. The computation of [210]Q
! 941: * will have been the last call to ellmult() within this iteration of the
! 942: * main loop, so the scratchpad is now also free to be re-used. We also
! 943: * compute [630]Q by a parallel addition; we'll need it later to get the
! 944: * baby-step table bootstrapped a little faster.
! 945: */
! 946: /* Finally, for no more than 4 curves at a time, room for up to 1024 X
! 947: * coordinates only (the Y coordinates needed whilst setting up this baby
! 948: * step table are temporarily stored in the upper half, and overwritten
! 949: * during the last series of additions).
! 950: */
! 951: /* Graphically: after end of B1 stage (X,Y are the coords of Q):
! 952: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
! 953: * | X Y | scratch | [2]Q| [4]Q| [6]Q| [8]Q|[10]Q| ... | ...
! 954: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
! 955: * *X *XAUX *XT *XD *XB
! 956: *
! 957: * [30]Q is computed from [10]Q. [210]Q can go into XY, etc:
! 958: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
! 959: * |[210]|[420]|[630]|[840]|[1680,3360,6720,...,2048*210] |bstp table...
! 960: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
! 961: * *X *XAUX *XT *XD [*XG, somewhere here] *XB .... *XH
! 962: *
! 963: * So we need (13 + 48) * 2 * nbc slots here, and another 4096 slots for
! 964: * the baby step table (not all of which will be used when we start with a
! 965: * small B1, but it's better to allocate and initialize ahead of time all
! 966: * the slots that might be needed later).
! 967: */
! 968: /* Note on memory locality: During the B2 phase, accesses to the helix
! 969: * (once it has been set up) will be clustered by curves (4 out of nbc at
! 970: * a time). Accesses to the baby steps table will wander from one end of
! 971: * the array to the other and back, one such cycle per giant step, and
! 972: * during a full cycle we would expect on the order of 2E4 accesses when
! 973: * using the largest giant step size. Thus we shouldn't be doing too bad
! 974: * with respect to thrashing a (512KBy) L2 cache. However, we don't want
! 975: * the baby step table to grow larger than this, even if it would reduce
! 976: * the number of E.C. operations by a few more per cent for very large B2,
! 977: * lest cache thrashing slow down everything disproportionally. --GN
! 978: */
! 979:
! 980: /* parameters for miller() via snextpr(), for use by ellfacteur() */
! 981: #define miller_k1 16 /* B1 phase, foolproof below 10^12 */
! 982: #define miller_k2 1 /* B2 phase, not foolproof, much faster */
! 983: /* (miller_k2 will let thousands of composites slip through, which doesn't
! 984: * harm ECM, but ellmult() during the B1 phase should only be fed primes
! 985: * which really are prime)
! 986: */
! 987: /* ellfacteur() has been re-tuned to be useful as a first stage before
! 988: * MPQS, especially for _large_ arguments, when insist is false, and now
! 989: * also for the case when insist is true, vaguely following suggestions
! 990: * by Paul Zimmermann (see http://www.loria.fr/~zimmerma/ and especially
! 991: * http://www.loria.fr/~zimmerma/records/ecmnet.html) of INRIA/LORIA.
! 992: * --GN 1998Jul,Aug
! 993: */
! 994: GEN
! 995: ellfacteur(GEN n, int insist)
! 996: {
! 997: static ulong TB1[] =
! 998: {
! 999: /* table revised, cf. below 1998Aug15 --GN */
! 1000: 142,172,208,252,305,370,450,545,661,801,972,1180,1430,
! 1001: 1735,2100,2550,3090,3745,4540,5505,6675,8090,9810,11900,
! 1002: 14420,17490,21200,25700,31160,37780UL,45810UL,55550UL,67350UL,
! 1003: 81660UL,99010UL,120050UL,145550UL,176475UL,213970UL,259430UL,
! 1004: 314550UL,381380UL,462415UL,560660UL,679780UL,824220UL,999340UL,
! 1005: 1211670UL,1469110UL,1781250UL,2159700UL,2618600UL,3175000UL,
! 1006: 3849600UL,4667500UL,5659200UL,6861600UL,8319500UL,10087100UL,
! 1007: 12230300UL,14828900UL,17979600UL,21799700UL,26431500UL,
! 1008: 32047300UL,38856400UL, /* 110 times that still fits into 32bits */
! 1009: #ifdef LONG_IS_64BIT
! 1010: 47112200UL,57122100UL,69258800UL,83974200UL,101816200UL,
! 1011: 123449000UL,149678200UL,181480300UL,220039400UL,266791100UL,
! 1012: 323476100UL,392204900UL,475536500UL,576573500UL,699077800UL,
! 1013: 847610500UL,1027701900UL,1246057200UL,1510806400UL,1831806700UL,
! 1014: 2221009800UL,2692906700UL,3265067200UL,3958794400UL,4799917500UL,
! 1015: /* the only reason to stop here is that I got bored (and that users
! 1016: * will get bored watching their 64bit machines churning on such large
! 1017: * numbers for month after month). Someone can extend this table when
! 1018: * the hardware has gotten 100 times faster than now --GN
! 1019: */
! 1020: #endif
! 1021: };
! 1022: static ulong TB1_for_stage[] =
! 1023: {
! 1024: /* table revised 1998Aug11 --GN. The idea is to start a little below
! 1025: * the optimal B1 for finding factors which would just have been missed
! 1026: * by pollardbrent(), and escalate gradually, changing curves suf-
! 1027: * ficiently frequently to give good coverage of the small factor
! 1028: * ranges. The table entries grow a bit faster than what Paul says
! 1029: * would be optimal, but having a single table instead of a 2D array
! 1030: * keeps the code simple
! 1031: */
! 1032: 500,520,560,620,700,800,900,1000,1150,1300,1450,1600,1800,2000,
! 1033: 2200,2450,2700,2950,3250,3600,4000,4400,4850,5300,5800,6400,
! 1034: 7100,7850,8700,9600,10600,11700,12900,14200,15700,17300,
! 1035: 19000,21000,23200,25500,28000,31000,34500UL,38500UL,43000UL,
! 1036: 48000UL,53800UL,60400UL,67750UL,76000UL,85300UL,95700UL,
! 1037: 107400UL,120500UL,135400UL,152000UL,170800UL,191800UL,215400UL,
! 1038: 241800UL,271400UL,304500UL,341500UL,383100UL,429700UL,481900UL,
! 1039: 540400UL,606000UL,679500UL,761800UL,854100UL,957500UL,1073500UL,
! 1040: };
! 1041: long nbc,nbc2,dsn,dsnmax,rep,spc,gse,gss,rcn,rcn0,bstp,bstp0;
! 1042: long a,i,j,k, av,av1,avtmp, size = expi(n) + 1, tf = lgefint(n);
! 1043: ulong B1,B2,B2_p,B2_rt,m,p,p0,p2,dp;
! 1044: GEN w,w0,x,*X,*XT,*XD,*XG,*YG,*XH,*XB,*XB2,*Xh,*Yh,*Xb, res = cgeti(tf);
! 1045: int rflag, use_clones = 0;
! 1046: byteptr d, d0;
! 1047:
! 1048: av = avma; /* taking res into account */
! 1049: N = n; /* make n known to auxiliary functions */
! 1050: /* determine where we'll start, how long we'll persist, and how many
! 1051: * curves we'll use in parallel
! 1052: */
! 1053: if (insist)
! 1054: {
! 1055: dsnmax = (size >> 2) - 10;
! 1056: if (dsnmax < 0) dsnmax = 0;
! 1057: #ifdef LONG_IS_64BIT
! 1058: else if (dsnmax > 90) dsnmax = 90;
! 1059: #else
! 1060: else if (dsnmax > 65) dsnmax = 65;
! 1061: #endif
! 1062: dsn = (size >> 3) - 5;
! 1063: if (dsn < 0) dsn = 0;
! 1064: else if (dsn > 47) dsn = 47;
! 1065: /* pick up the torch where non-insistent stage would have given up */
! 1066: nbc = dsn + (dsn >> 2) + 9; /* 8 or more curves in parallel */
! 1067: nbc &= ~3; /* nbc is always a multiple of 4 */
! 1068: if (nbc > nbcmax) nbc = nbcmax;
! 1069: a = 1 + (nbcmax<<7); /* seed for choice of curves */
! 1070: rep = 0; /* gcc -Wall */
! 1071: }
! 1072: else
! 1073: {
! 1074: dsn = (size - 140) >> 3;
! 1075: if (dsn > 12) dsn = 12;
! 1076: dsnmax = 72;
! 1077: if (dsn < 0) /* < 140 bits: decline the task */
! 1078: {
! 1079: #ifdef __EMX__
! 1080: /* MPQS's disk access under DOS/EMX would be abysmally slow, so... */
! 1081: dsn = 0;
! 1082: rep = 20;
! 1083: nbc = 8;
! 1084: #else
! 1085: if (DEBUGLEVEL >= 4)
! 1086: {
! 1087: fprintferr("ECM: number too small to justify this stage\n");
! 1088: flusherr();
! 1089: }
! 1090: avma = av; return NULL;
! 1091: #endif
! 1092: }
! 1093: else
! 1094: {
! 1095: rep = (size <= 248 ?
! 1096: (size <= 176 ? (size - 124) >> 4 : (size - 148) >> 3) :
! 1097: (size - 224) >> 1);
! 1098: nbc = ((size >> 3) << 2) - 80;
! 1099: if (nbc < 8) nbc = 8;
! 1100: else if (nbc > nbcmax) nbc = nbcmax;
! 1101: #ifdef __EMX__
! 1102: rep += 20;
! 1103: #endif
! 1104: }
! 1105:
! 1106: /* it may be convenient to use disjoint sets of curves for the non-insist
! 1107: * and insist phases; moreover, repeated non-insistent calls acting on
! 1108: * factors of the same original number should try to use fresh curves.
! 1109: * The following achieves this
! 1110: */
! 1111: a = 1 + (nbcmax<<3)*(size & 0xf);
! 1112: }
! 1113: if (dsn > dsnmax) dsn = dsnmax;
! 1114:
! 1115: if (DEBUGLEVEL >= 4)
! 1116: {
! 1117: (void) timer2(); /* clear timer */
! 1118: fprintferr("ECM: working on %ld curves at a time; initializing", nbc);
! 1119: if (!insist)
! 1120: {
! 1121: if (rep == 1)
! 1122: fprintferr(" for one round");
! 1123: else
! 1124: fprintferr(" for up to %ld rounds", rep);
! 1125: }
! 1126: fprintferr("...\n");
! 1127: }
! 1128:
! 1129: /* The auxiliary routines above need < (3*nbc+240)*tf words on the PARI
! 1130: * stack, in addition to the spc*(tf+1) words occupied by our main table.
! 1131: * If stack space is already tight, try the heap, using newbloc() and
! 1132: * killbloc()
! 1133: */
! 1134: nbc2 = nbc << 1;
! 1135: spc = (13 + 48) * nbc2 + bstpmax * 4;
! 1136: if ((long)((GEN)avma - (GEN)bot) < spc + 385 + (spc + 3*nbc + 240)*tf)
! 1137: {
! 1138: if (DEBUGLEVEL >= 5)
! 1139: {
! 1140: fprintferr("ECM: stack tight, using clone space on the heap\n");
! 1141: }
! 1142: use_clones = 1;
! 1143: x = newbloc(spc + 385);
! 1144: }
! 1145: else
! 1146: x = new_chunk(spc + 385);
! 1147: X = 1 + (GEN*)x; /* B1 phase: current point */
! 1148: XAUX = X + nbc2; /* scratchpad for ellmult() */
! 1149: XT = XAUX + nbc2; /* ditto, will later hold [3*210]Q */
! 1150: XD = XT + nbc2; /* room for various multiples */
! 1151: XB = XD + 20*nbc; /* start of baby steps table */
! 1152: XB2 = XB + 2 * bstpmax; /* middle of baby steps table */
! 1153: XH = XB2 + 2 * bstpmax; /* end of bstps table, start of helix */
! 1154: Xh = XH + 96*nbc; /* little helix, X coords */
! 1155: Yh = XH + 192; /* ditto, Y coords */
! 1156: /* XG will be set later, inside the main loop, since it depends on B2 */
! 1157:
! 1158: {
! 1159: long tw = evallg(tf) | evaltyp(t_INT);
! 1160:
! 1161: if (use_clones)
! 1162: w = newbloc(spc*tf);
! 1163: else
! 1164: w = new_chunk(spc*tf);
! 1165: w0 = w; /* remember this for later... */
! 1166: for (i = spc; i--; )
! 1167: {
! 1168: *w = tw; X[i] = w; w += tf; /* hack for: w = cgeti(tf) */
! 1169: }
! 1170: /* Xh range of 384 pointers not set; these will later duplicate the
! 1171: * pointers in the XH range, 4 curves at a time. Some of the cells
! 1172: * reserved here for the XB range will never be used, instead, we'll
! 1173: * warp the pointers to connect to (read-only) GENs in the X/XD range;
! 1174: * it would be complicated to skip them here to conserve merely a few
! 1175: * KBy of stack or heap space. --GN
! 1176: */
! 1177: }
! 1178:
! 1179: /* *** ECM MAIN LOOP *** */
! 1180: for(;;)
! 1181: {
! 1182: d = diffptr; rcn = NPRC; /* multipliers begin at the beginning */
! 1183:
! 1184: /* pick curves */
! 1185: for (i = nbc2; i--; ) affsi(a++, X[i]);
! 1186: /* pick bounds */
! 1187: B1 = insist ? TB1[dsn] : TB1_for_stage[dsn];
! 1188: B2 = 110*B1;
! 1189: B2_rt = (ulong)(sqrt((double)B2));
! 1190: /* pick giant step exponent and size.
! 1191: * With 32 baby steps, a giant step corresponds to 32*420 = 13440, appro-
! 1192: * priate for the smallest B2s. With 1024, a giant step will be 430080;
! 1193: * this will be appropriate for B1 >~ 42000, where 512 baby steps would
! 1194: * imply roughly the same number of E.C. additions.
! 1195: */
! 1196: gse = (B1 < 656 ?
! 1197: (B1 < 200 ? 5 : 6) :
! 1198: (B1 < 10500 ?
! 1199: (B1 < 2625 ? 7 : 8) :
! 1200: (B1 < 42000 ? 9 : 10)
! 1201: )
! 1202: );
! 1203: gss = 1UL << gse;
! 1204: XG = XT + gse*nbc2; /* will later hold [2^(gse+1)*210]Q */
! 1205: YG = XG + nbc;
! 1206:
! 1207: if (DEBUGLEVEL >= 4)
! 1208: {
! 1209: fprintferr("ECM: time = %6ld ms\nECM: dsn = %2ld,\tB1 = %4lu,",
! 1210: timer2(), dsn, B1);
! 1211: fprintferr("\tB2 = %6lu,\tgss = %4ld*420\n", B2, gss);
! 1212: flusherr();
! 1213: }
! 1214: p = *d++;
! 1215:
! 1216: /* ---B1 PHASE--- */
! 1217: /* treat p=2 separately */
! 1218: B2_p = B2 >> 1;
! 1219: for (m=1; m<=B2_p; m<<=1)
! 1220: {
! 1221: if ((rflag = elldouble(nbc, X, X)) > 1) goto fin;
! 1222: else if (rflag) break;
! 1223: }
! 1224:
! 1225: /* p=3,...,nextprime(B1) */
! 1226: while (p < B1 && p <= B2_rt)
! 1227: {
! 1228: p = snextpr(p, &d, &rcn, NULL, miller_k1);
! 1229: B2_p = B2/p; /* beware integer overflow on 32-bit CPUs */
! 1230: for (m=1; m<=B2_p; m*=p)
! 1231: {
! 1232: if ((rflag = ellmult(nbc, p, X, X)) > 1) goto fin;
! 1233: else if (rflag) break;
! 1234: }
! 1235: }
! 1236: /* primes p larger than sqrt(B2) can appear only to the 1st power */
! 1237: while (p < B1)
! 1238: {
! 1239: p = snextpr(p, &d, &rcn, NULL, miller_k1);
! 1240: if (ellmult(nbc, p, X, X) > 1) goto fin; /* p^2 > B2: no loop */
! 1241: }
! 1242:
! 1243: if (DEBUGLEVEL >= 4)
! 1244: {
! 1245: fprintferr("ECM: time = %6ld ms, B1 phase done, ", timer2());
! 1246: fprintferr("p = %lu, setting up for B2\n", p);
! 1247: }
! 1248:
! 1249: /* ---B2 PHASE--- */
! 1250: /* compute [2]Q,...,[10]Q, which we need to build the helix */
! 1251: if (elldouble(nbc, X, XD) > 1)
! 1252: goto fin; /* [2]Q */
! 1253: if (elldouble(nbc, XD, XD + nbc2) > 1)
! 1254: goto fin; /* [4]Q */
! 1255: if (elladd(nbc, XD, XD + nbc2, XD + (nbc<<2)) > 1)
! 1256: goto fin; /* [6]Q */
! 1257: if (elladd2(nbc,
! 1258: XD, XD + (nbc<<2), XT + (nbc<<3),
! 1259: XD + nbc2, XD + (nbc<<2), XD + (nbc<<3)) > 1)
! 1260: goto fin; /* [8]Q and [10]Q */
! 1261: if (DEBUGLEVEL >= 7)
! 1262: fprintferr("\t(got [2]Q...[10]Q)\n");
! 1263:
! 1264: /* get next prime (still using the foolproof test) */
! 1265: p = snextpr(p, &d, &rcn, NULL, miller_k1);
! 1266: /* make sure we have the residue class number (mod 210) */
! 1267: if (rcn == NPRC)
! 1268: {
! 1269: rcn = prc210_no[(p % 210) >> 1];
! 1270: if (rcn == NPRC)
! 1271: {
! 1272: fprintferr("ECM: %lu should have been prime but isn\'t\n", p);
! 1273: err(bugparier, "ellfacteur");
! 1274: }
! 1275: }
! 1276:
! 1277: /* compute [p]Q and put it into its place in the helix */
! 1278: if (ellmult(nbc, p, X, XH + rcn*nbc2) > 1) goto fin;
! 1279: if (DEBUGLEVEL >= 7)
! 1280: fprintferr("\t(got [p]Q, p = %lu = %lu mod 210)\n",
! 1281: p, (ulong)(prc210_rp[rcn]));
! 1282:
! 1283: /* save current p, d, and rcn; we'll need them more than once below */
! 1284: p0 = p;
! 1285: d0 = d;
! 1286: rcn0 = rcn; /* remember where the helix wraps */
! 1287: bstp0 = 0; /* p is at baby-step offset 0 from itself */
! 1288:
! 1289: /* fill up the helix, stepping forward through the prime residue classes
! 1290: * mod 210 until we're back at the r'class of p0. Keep updating p so
! 1291: * that we can print meaningful diagnostics if a factor shows up; but
! 1292: * don't bother checking which of these p's are in fact prime
! 1293: */
! 1294: for (i = 47; i; i--) /* 47 iterations */
! 1295: {
! 1296: p += (dp = (ulong)prc210_d1[rcn]);
! 1297: if (rcn == 47)
! 1298: { /* wrap mod 210 */
! 1299: if (elladd(nbc, XT + dp*nbc, XH + rcn*nbc2, XH) > 1)
! 1300: goto fin;
! 1301: rcn = 0;
! 1302: continue;
! 1303: }
! 1304: if (elladd(nbc, XT + dp*nbc, XH + rcn*nbc2, XH + rcn*nbc2 + nbc2) > 1)
! 1305: goto fin;
! 1306: rcn++;
! 1307: }
! 1308: if (DEBUGLEVEL >= 7)
! 1309: fprintferr("\t(got initial helix)\n");
! 1310:
! 1311: /* compute [210]Q etc, which will be needed for the baby step table */
! 1312: if (ellmult(nbc, 3, XD + (nbc<<3), X) > 1) goto fin;
! 1313: if (ellmult(nbc, 7, X, X) > 1) goto fin; /* [210]Q */
! 1314: /* this was the last call to ellmult() in the main loop body; may now
! 1315: * overwrite XAUX and slots XD and following
! 1316: */
! 1317: if (elldouble(nbc, X, XAUX) > 1) goto fin; /* [420]Q */
! 1318: if (elladd(nbc, X, XAUX, XT) > 1) goto fin; /* [630]Q */
! 1319: if (elladd(nbc, X, XT, XD) > 1) goto fin; /* [840]Q */
! 1320: for (i=1; i <= gse; i++) /* gse successive doublings */
! 1321: {
! 1322: if (elldouble(nbc, XT + i*nbc2, XD + i*nbc2) > 1) goto fin;
! 1323: }
! 1324: /* (the last iteration has initialized XG to [210*2^(gse+1)]Q) */
! 1325:
! 1326: if (DEBUGLEVEL >= 4)
! 1327: {
! 1328: fprintferr("ECM: time = %6ld ms, entering B2 phase, p = %lu\n",
! 1329: timer2(), p);
! 1330: }
! 1331:
! 1332: /* inner loop over small sets of 4 curves at a time */
! 1333: for (i = nbc - 4; i >= 0; i -= 4)
! 1334: {
! 1335: if (DEBUGLEVEL >= 6)
! 1336: fprintferr("ECM: finishing curves %ld...%ld\n", i, i+3);
! 1337: /* copy relevant pointers from XH to Xh. Recall memory layout in XH
! 1338: * is: nbc X coordinates followed by nbc Y coordinates for residue
! 1339: * class 1 mod 210, then the same for r.c. 11 mod 210, etc. Memory
! 1340: * layout for Xh is: four X coords for 1 mod 210, four for 11 mod 210,
! 1341: * etc, four for 209 mod 210, and then the corresponding Y coordinates
! 1342: * in the same order. This will allow us to do a giant step on Xh
! 1343: * using just three calls to elladd0() each acting on 64 points in
! 1344: * parallel
! 1345: */
! 1346: for (j = 48; j--; )
! 1347: {
! 1348: k = nbc2*j + i;
! 1349: m = j << 2; /* X coordinates */
! 1350: Xh[m] = XH[k]; Xh[m+1] = XH[k+1];
! 1351: Xh[m+2] = XH[k+2]; Xh[m+3] = XH[k+3];
! 1352: k += nbc; /* Y coordinates */
! 1353: Yh[m] = XH[k]; Yh[m+1] = XH[k+1];
! 1354: Yh[m+2] = XH[k+2]; Yh[m+3] = XH[k+3];
! 1355: }
! 1356: /* build baby step table of X coords of multiples of [210]Q. XB[4*j]
! 1357: * will point at X coords on four curves from [(j+1)*210]Q. Until
! 1358: * we're done, we need some Y coords as well, which we keep in the
! 1359: * second half of the table, overwriting them at the end when gse==10.
! 1360: * Those multiples which we already have (by 1,2,3,4,8,16,...,2^gse)
! 1361: * are entered simply by copying the pointers, ignoring the small
! 1362: * number of slots in w that were initially reserved for them.
! 1363: * Here are the initial entries...
! 1364: */
! 1365: for (Xb=XB,k=2,j=i; k--; Xb=XB2,j+=nbc) /* do first X, then Y coords */
! 1366: {
! 1367: Xb[0] = X[j]; Xb[1] = X[j+1]; /* [210]Q */
! 1368: Xb[2] = X[j+2]; Xb[3] = X[j+3];
! 1369: Xb[4] = XAUX[j]; Xb[5] = XAUX[j+1]; /* [420]Q */
! 1370: Xb[6] = XAUX[j+2]; Xb[7] = XAUX[j+3];
! 1371: Xb[8] = XT[j]; Xb[9] = XT[j+1]; /* [630]Q */
! 1372: Xb[10] = XT[j+2]; Xb[11] = XT[j+3];
! 1373: Xb += 4; /* this points at [420]Q */
! 1374: /* ... entries at powers of 2 times 210 .... */
! 1375: for (m = 2; m < (ulong)gse+k; m++) /* omit Y coords of [2^gse*210]Q */
! 1376: {
! 1377: long m2 = m*nbc2 + j;
! 1378: Xb += (2UL<<m); /* points now at [2^m*210]Q */
! 1379: Xb[0] = XAUX[m2]; Xb[1] = XAUX[m2+1];
! 1380: Xb[2] = XAUX[m2+2]; Xb[3] = XAUX[m2+3];
! 1381: }
! 1382: }
! 1383: if (DEBUGLEVEL >= 7)
! 1384: fprintferr("\t(extracted precomputed helix / baby step entries)\n");
! 1385: /* ... glue in between, up to 16*210 ... */
! 1386: if (elladd0(12, 4, /* 12 pts + (4 pts replicated thrice) */
! 1387: XB + 12, XB2 + 12,
! 1388: XB, XB2,
! 1389: XB + 16, XB2 + 16)
! 1390: > 1) goto fin; /* 4 + {1,2,3} = {5,6,7} */
! 1391: if (elladd0(28, 4, /* 28 pts + (4 pts replicated 7fold) */
! 1392: XB + 28, XB2 + 28,
! 1393: XB, XB2,
! 1394: XB + 32, XB2 + 32)
! 1395: > 1) goto fin; /* 8 + {1,...,7} = {9,...,15} */
! 1396: /* ... and the remainder of the lot */
! 1397: for (m = 5; m <= (ulong)gse; m++)
! 1398: {
! 1399: /* fill in from 2^(m-1)+1 to 2^m-1 in chunks of 64 and 60 points */
! 1400: ulong m2 = 2UL << m; /* will point at 2^(m-1)+1 */
! 1401: for (j = 0; (ulong)j < m2-64; j+=64) /* executed 0 times when m == 5 */
! 1402: {
! 1403: if (elladd0(64, 4,
! 1404: XB + m2 - 4, XB2 + m2 - 4,
! 1405: XB + j, XB2 + j,
! 1406: XB + m2 + j,
! 1407: (m<(ulong)gse ? XB2 + m2 + j : NULL))
! 1408: > 1) goto fin;
! 1409: } /* j == m2-64 here, 60 points left */
! 1410: if (elladd0(60, 4,
! 1411: XB + m2 - 4, XB2 + m2 - 4,
! 1412: XB + j, XB2 + j,
! 1413: XB + m2 + j,
! 1414: (m<(ulong)gse ? XB2 + m2 + j : NULL))
! 1415: > 1) goto fin;
! 1416: /* (when m==gse, drop Y coords of result, and when both equal 1024,
! 1417: * overwrite Y coords of second argument with X coords of result)
! 1418: */
! 1419: }
! 1420: if (DEBUGLEVEL >= 7)
! 1421: fprintferr("\t(baby step table complete)\n");
! 1422: /* initialize a few other things */
! 1423: bstp = bstp0;
! 1424: p = p0; d = d0; rcn = rcn0;
! 1425: gl = gun;
! 1426: av1 = avma;
! 1427: /* scratchspace for prod (x_i-x_j) */
! 1428: avtmp = (long)new_chunk(8 * lgefint(n));
! 1429: /* the correct entry in XB to use depends on bstp and on where we are
! 1430: * on the helix. As we skip from prime to prime, bstp will be incre-
! 1431: * mented by snextpr() each time we wrap around through residue class
! 1432: * number 0 (1 mod 210), but the baby step should not be taken until
! 1433: * rcn>=rcn0 (i.e. until we pass again the residue class of p0).
! 1434: * The correct signed multiplier is thus k = bstp - (rcn < rcn0),
! 1435: * and the offset from XB is four times (|k| - 1). When k==0, we may
! 1436: * ignore the current prime (if it had led to a factorization, this
! 1437: * would have been noted during the last giant step, or -- when we
! 1438: * first get here -- whilst initializing the helix). When k > gss,
! 1439: * we must do a giant step and bump bstp back by -2*gss.
! 1440: * The gcd of the product of X coord differences against N is taken just
! 1441: * before we do a giant step.
! 1442: */
! 1443: /* loop over probable primes p0 < p <= nextprime(B2),
! 1444: * inserting giant steps as necessary
! 1445: */
! 1446: while (p < B2)
! 1447: {
! 1448: /* save current p for diagnostics */
! 1449: p2 = p;
! 1450: /* get next probable prime */
! 1451: p = snextpr(p, &d, &rcn, &bstp, miller_k2);
! 1452: /* work out the corresponding baby-step multiplier */
! 1453: k = bstp - (rcn < rcn0 ? 1 : 0);
! 1454: /* check whether it's giant-step time */
! 1455: if (k > gss)
! 1456: {
! 1457: /* take gcd */
! 1458: gl = mppgcd(gl, n);
! 1459: if (!is_pm1(gl) && !egalii(gl, n)) { p = p2; goto fin; }
! 1460: gl = gun;
! 1461: avma = av1;
! 1462: while (k > gss) /* hm, just how large are those prime gaps? */
! 1463: {
! 1464: /* giant step */
! 1465: if (DEBUGLEVEL >= 7)
! 1466: fprintferr("\t(giant step at p = %lu)\n", p);
! 1467: if (elladd0(64, 4,
! 1468: XG + i, YG + i,
! 1469: Xh, Yh, Xh, Yh) > 1) goto fin;
! 1470: if (elladd0(64, 4,
! 1471: XG + i, YG + i,
! 1472: Xh + 64, Yh + 64, Xh + 64, Yh + 64) > 1) goto fin;
! 1473: if (elladd0(64, 4,
! 1474: XG + i, YG + i,
! 1475: Xh + 128, Yh + 128, Xh + 128, Yh + 128)
! 1476: > 1) goto fin;
! 1477: bstp -= (gss << 1);
! 1478: /* recompute multiplier */
! 1479: k = bstp - (rcn < rcn0 ? 1 : 0);
! 1480: }
! 1481: }
! 1482: if (!k) continue; /* point of interest is already in Xh */
! 1483: if (k < 0) k = -k;
! 1484: m = ((ulong)k - 1) << 2;
! 1485: /* accumulate product of differences of X coordinates */
! 1486: j = rcn<<2;
! 1487: avma = avtmp; /* go to garbage zone */
! 1488: gl = modii(mulii(gl, subii(XB[m], Xh[j])), n);
! 1489: gl = modii(mulii(gl, subii(XB[m+1], Xh[j+1])), n);
! 1490: gl = modii(mulii(gl, subii(XB[m+2], Xh[j+2])), n);
! 1491: gl = mulii(gl, subii(XB[m+3], Xh[j+3]));
! 1492: avma = av1;
! 1493: gl = modii(gl, n);
! 1494: } /* loop over p */
! 1495: avma = av1;
! 1496: } /* for i (loop over sets of 4 curves) */
! 1497:
! 1498: /* continuation part of main loop */
! 1499:
! 1500: if (dsn < dsnmax)
! 1501: {
! 1502: dsn += insist ? 1 : 2;
! 1503: if (dsn > dsnmax) dsn = dsnmax;
! 1504: }
! 1505:
! 1506: if (!insist && !--rep)
! 1507: {
! 1508: if (DEBUGLEVEL >= 4)
! 1509: {
! 1510: fprintferr("ECM: time = %6ld ms,\tellfacteur giving up.\n",
! 1511: timer2());
! 1512: flusherr();
! 1513: }
! 1514: avma = av;
! 1515: if (use_clones) { gunclone(w0); gunclone(x); }
! 1516: return NULL;
! 1517: }
! 1518: }
! 1519: /* *** END OF ECM MAIN LOOP *** */
! 1520: fin:
! 1521: affii(gl, res);
! 1522:
! 1523: if (DEBUGLEVEL >= 4)
! 1524: {
! 1525: fprintferr("ECM: time = %6ld ms,\tp <= %6lu,\n\tfound factor = %Z\n",
! 1526: timer2(), p, res);
! 1527: flusherr();
! 1528: }
! 1529: avma=av;
! 1530: if (use_clones) { gunclone(w0); gunclone(x); }
! 1531: return res;
! 1532: }
! 1533:
! 1534: /***********************************************************************/
! 1535: /** **/
! 1536: /** FACTORIZATION (Pollard-Brent rho) **/
! 1537: /** pollardbrent() returns a nontrivial factor of n, assuming n is **/
! 1538: /** composite and has no small prime divisor, or NULL if going on **/
! 1539: /** would take more time than we want to spend. Sometimes it will **/
! 1540: /** find more than one factor, and return a structure suitable for **/
! 1541: /** interpretation by ifac_crack() below. GN1998Jun18-26 **/
! 1542: /** (Cf. Algorithm 8.5.2 in ACiCNT) **/
! 1543: /** **/
! 1544: /***********************************************************************/
! 1545:
! 1546: static void
! 1547: rho_dbg(long c, long msg_mask)
! 1548: {
! 1549: if (c & msg_mask) return;
! 1550: fprintferr("Rho: time = %6ld ms,\t%3ld round%s\n",
! 1551: timer2(), c, (c==1?"":"s"));
! 1552: flusherr();
! 1553: }
! 1554:
! 1555: /* Tuning parameter: for input up to 64 bits long, we must not spend more
! 1556: * than a very short time, for fear of slowing things down on average.
! 1557: * With the current tuning formula, increase our efforts somewhat at 49 bit
! 1558: * input (an extra round for each bit at first), and go up more and more
! 1559: * rapidly after we pass 80 bits.-- Changed this (again...) to adjust for
! 1560: * the presence of squfof, which will finish input up to 59 bits quickly.
! 1561: */
! 1562:
! 1563: #define tune_pb_min 14 /* even 15 seems too much. */
! 1564:
! 1565: /* We return NULL when we run out of time, or a single t_INT containing a
! 1566: * nontrivial factor of n, or a vector of t_INTs, each triple of successive
! 1567: * entries containing a factor, an exponent (equal to un), and a factor
! 1568: * class (NULL for unknown or zero for known composite), matching the
! 1569: * internal representation used by the ifac_*() routines below. Repeated
! 1570: * factors can arise and are legal; the caller will be sorting the factors
! 1571: * anyway.
! 1572: */
! 1573: GEN
! 1574: pollardbrent(GEN n)
! 1575: {
! 1576: long tf = lgefint(n), size = 0, delta, retries = 0, msg_mask;
! 1577: long c0, c, k, k1, l, avP, avx, GGG, av = avma;
! 1578: GEN x, x1, y, P, g, g1, res;
! 1579:
! 1580: if (DEBUGLEVEL >= 4) (void)timer2(); /* clear timer */
! 1581:
! 1582: if (tf >= 4)
! 1583: size = expi(n) + 1;
! 1584: else if (tf == 3) /* try to keep purify happy... */
! 1585: size = BITS_IN_LONG - bfffo(n[2]);
! 1586:
! 1587: if (size <= 28)
! 1588: c0 = 32; /* amounts very nearly to `insist'.
! 1589: * Now that we have squfof(), we don't insist
! 1590: * any more when input is 2^29 ... 2^32
! 1591: */
! 1592: else if (size <= 42)
! 1593: c0 = tune_pb_min;
! 1594: else if (size <= 59) /* match squfof() cutoff point */
! 1595: c0 = tune_pb_min + ((size - 42)<<1);
! 1596: else if (size <= 72)
! 1597: c0 = tune_pb_min + size - 24;
! 1598: else if (size <= 301)
! 1599: /* nonlinear increase in effort, kicking in around 80 bits */
! 1600: /* 301 gives 48121 + tune_pb_min */
! 1601: c0 = tune_pb_min + size - 60 +
! 1602: ((size-73)>>1)*((size-70)>>3)*((size-56)>>4);
! 1603: else
! 1604: c0 = 49152; /* ECM is faster when it'd take longer */
! 1605:
! 1606: c = c0 << 5; /* 32 iterations per round */
! 1607: msg_mask = (size >= 448? 0x1fff:
! 1608: (size >= 192? (256L<<((size-128)>>6))-1: 0xff));
! 1609: PB_RETRY:
! 1610: /* trick to make a `random' choice determined by n. Don't use x^2+0 or
! 1611: * x^2-2, ever. Don't use x^2-3 or x^2-7 with a starting value of 2.
! 1612: * x^2+4, x^2+9 are affine conjugate to x^2+1, so don't use them either.
! 1613: *
! 1614: * (the point being that when we get called again on a composite cofactor
! 1615: * of something we've already seen, we had better avoid the same delta)
! 1616: */
! 1617: switch ((size + retries) & 7)
! 1618: {
! 1619: case 0: delta= 1; break;
! 1620: case 1: delta= -1; break;
! 1621: case 2: delta= 3; break;
! 1622: case 3: delta= 5; break;
! 1623: case 4: delta= -5; break;
! 1624: case 5: delta= 7; break;
! 1625: case 6: delta= 11; break;
! 1626: /* case 7: */
! 1627: default: delta=-11; break;
! 1628: }
! 1629: if (DEBUGLEVEL >= 4)
! 1630: {
! 1631: if (!retries)
! 1632: {
! 1633: if (size < 1536)
! 1634: fprintferr("Rho: searching small factor of %ld-bit integer\n", size);
! 1635: else
! 1636: fprintferr("Rho: searching small factor of %ld-word integer\n", tf-2);
! 1637: }
! 1638: else
! 1639: fprintferr("Rho: restarting for remaining rounds...\n");
! 1640: fprintferr("Rho: using X^2%+1ld for up to %ld rounds of 32 iterations\n",
! 1641: delta, c >> 5);
! 1642: flusherr();
! 1643: }
! 1644: x=gdeux; P=gun; g1 = NULL; k = 1; l = 1;
! 1645: (void)new_chunk(10 + 6 * tf); /* enough for cgetg(10) + 3 divii */
! 1646: y = cgeti(tf); affsi(2, y);
! 1647: x1= cgeti(tf); affsi(2, x1);
! 1648: avx = avma;
! 1649: avP = (long)new_chunk(2 * tf); /* enough for x = addsi(tf+1) */
! 1650: GGG = (long)new_chunk(4 * tf); /* enough for P = modii(2tf+1, tf) */
! 1651:
! 1652: for (;;) /* terminated under the control of c */
! 1653: {
! 1654: /* use the polynomial x^2 + delta */
! 1655: #define one_iter() {\
! 1656: avma = GGG; x = resii(sqri(x), n); /* to garbage zone */\
! 1657: avma = avx; x = addsi(delta,x); /* erase garbage */\
! 1658: avma = GGG; P = mulii(P, subii(x1, x));\
! 1659: avma = avP; P = modii(P,n); }
! 1660:
! 1661: one_iter();
! 1662:
! 1663: if ((--c & 0x1f)==0) /* one round complete */
! 1664: {
! 1665: g = mppgcd(n, P);
! 1666: if (!is_pm1(g)) goto fin; /* caught something */
! 1667: if (c <= 0)
! 1668: { /* getting bored */
! 1669: if (DEBUGLEVEL >= 4)
! 1670: {
! 1671: fprintferr("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
! 1672: timer2());
! 1673: flusherr();
! 1674: }
! 1675: avma=av; return NULL;
! 1676: }
! 1677: P = gun; /* not necessary, but saves 1 mulii/round */
! 1678: if (DEBUGLEVEL >= 4) rho_dbg(c0-(c>>5), msg_mask);
! 1679: affii(x,y);
! 1680: }
! 1681:
! 1682: if (--k) continue; /* normal end of loop body */
! 1683:
! 1684: if (c & 0x1f) /* otherwise, we already checked */
! 1685: {
! 1686: g = mppgcd(n, P);
! 1687: if (!is_pm1(g)) goto fin;
! 1688: P = gun;
! 1689: }
! 1690:
! 1691: /* Fast forward phase, doing l inner iterations without computing gcds.
! 1692: * Check first whether it would take us beyond the alloted time.
! 1693: * Fast forward rounds count only half (although they're taking
! 1694: * more like 2/3 the time of normal rounds). This to counteract the
! 1695: * nuisance that all c0 between 4096 and 6144 would act exactly as
! 1696: * 4096; with the halving trick only the range 4096..5120 collapses
! 1697: * (similarly for all other powers of two)
! 1698: */
! 1699: if ((c-=(l>>1)) <= 0)
! 1700: { /* got bored */
! 1701: if (DEBUGLEVEL >= 4)
! 1702: {
! 1703: fprintferr("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
! 1704: timer2());
! 1705: flusherr();
! 1706: }
! 1707: avma=av; return NULL;
! 1708: }
! 1709: c &= ~0x1f; /* keep it on multiples of 32 */
! 1710:
! 1711: /* Fast forward loop */
! 1712: affii(x, x1); k = l; l <<= 1;
! 1713: /* don't show this for the first several (short) fast forward phases. */
! 1714: if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
! 1715: {
! 1716: fprintferr("Rho: fast forward phase (%ld rounds of 64)...\n", l>>7);
! 1717: flusherr();
! 1718: }
! 1719: for (k1=k; k1; k1--) one_iter();
! 1720: if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
! 1721: {
! 1722: fprintferr("Rho: time = %6ld ms,\t%3ld rounds, back to normal mode\n",
! 1723: timer2(), c0-(c>>5));
! 1724: flusherr();
! 1725: }
! 1726:
! 1727: affii(x,y);
! 1728: } /* forever */
! 1729:
! 1730: fin:
! 1731: /* An accumulated gcd was > 1 */
! 1732: /* if it isn't n, and looks prime, return it */
! 1733: if (!egalii(g,n))
! 1734: {
! 1735: if (miller(g,17))
! 1736: {
! 1737: if (DEBUGLEVEL >= 4)
! 1738: {
! 1739: rho_dbg(c0-(c>>5), 0);
! 1740: fprintferr("\tfound factor = %Z\n",g);
! 1741: flusherr();
! 1742: }
! 1743: avma=av; return icopy(g);
! 1744: }
! 1745: avma = avx; g1 = icopy(g); /* known composite, keep it safe */
! 1746: avx = avma;
! 1747: }
! 1748: else g1 = n; /* and work modulo g1 for backtracking */
! 1749:
! 1750: /* Here g1 is known composite */
! 1751: if (DEBUGLEVEL >= 4 && size > 192)
! 1752: {
! 1753: fprintferr("Rho: hang on a second, we got something here...\n");
! 1754: flusherr();
! 1755: }
! 1756: for(;;) /* backtrack until period recovered. Must terminate */
! 1757: {
! 1758: avma = GGG; y = resii(sqri(y), g1);
! 1759: avma = avx; y = addsi(delta,y);
! 1760: g = mppgcd(subii(x1, y), g1);
! 1761: if (!is_pm1(g)) break;
! 1762:
! 1763: if (DEBUGLEVEL >= 4 && (--c & 0x1f) == 0) rho_dbg(c0-(c>>5), msg_mask);
! 1764: }
! 1765:
! 1766: avma = av; /* safe */
! 1767: if (g1 == n || egalii(g,g1))
! 1768: {
! 1769: if (g1 == n && egalii(g,g1))
! 1770: { /* out of luck */
! 1771: if (DEBUGLEVEL >= 4)
! 1772: {
! 1773: rho_dbg(c0-(c>>5), 0);
! 1774: fprintferr("\tPollard-Brent failed.\n"); flusherr();
! 1775: }
! 1776: if (++retries >= 4) return NULL;
! 1777: goto PB_RETRY;
! 1778: }
! 1779: /* half lucky: we've split n, but g1 equals either g or n */
! 1780: if (DEBUGLEVEL >= 4)
! 1781: {
! 1782: rho_dbg(c0-(c>>5), 0);
! 1783: fprintferr("\tfound %sfactor = %Z\n",
! 1784: (g1!=n ? "composite " : ""), g);
! 1785: flusherr();
! 1786: }
! 1787: res = cgetg(7, t_VEC);
! 1788: res[1] = licopy(g); /* factor */
! 1789: res[2] = un; /* exponent 1 */
! 1790: res[3] = (g1!=n? zero: (long)NULL); /* known composite when g1!=n */
! 1791:
! 1792: res[4] = ldivii(n,g); /* cofactor */
! 1793: res[5] = un; /* exponent 1 */
! 1794: res[6] = (long)NULL; /* unknown */
! 1795: return res;
! 1796: }
! 1797: /* g < g1 < n : our lucky day -- we've split g1, too */
! 1798: res = cgetg(10, t_VEC);
! 1799: /* unknown status for all three factors */
! 1800: res[1] = licopy(g); res[2] = un; res[3] = (long)NULL;
! 1801: res[4] = ldivii(g1,g); res[5] = un; res[6] = (long)NULL;
! 1802: res[7] = ldivii(n,g1); res[8] = un; res[9] = (long)NULL;
! 1803: if (DEBUGLEVEL >= 4)
! 1804: {
! 1805: rho_dbg(c0-(c>>5), 0);
! 1806: fprintferr("\tfound factors = %Z, %Z,\n\tand %Z\n",
! 1807: res[1], res[4], res[7]);
! 1808: flusherr();
! 1809: }
! 1810: return res;
! 1811: }
! 1812:
! 1813: /***********************************************************************/
! 1814: /** **/
! 1815: /** FACTORIZATION (Shanks' SQUFOF) **/
! 1816: /** squfof() returns a nontrivial factor of n, assuming n is odd, **/
! 1817: /** composite, not a pure square, and has no small prime divisor, **/
! 1818: /** or NULL if it fails to find one. It works on two discriminants **/
! 1819: /** simultaneously (n and 5n for n=1(4), 3n and 4n for n=3(4)). **/
! 1820: /** The present implementation is limited to input <2^59, and will **/
! 1821: /** work most of the time in signed arithmetic on integers <2^31 in **/
! 1822: /** absolute size. Occasionally, it may find a factor which is a **/
! 1823: /** square.-- Since this will be used in the double-large-prime **/
! 1824: /** variation of MPQS, we provide a way of suppressing debugging **/
! 1825: /** output even at high debuglevels. GN2000Sep30-Oct01 **/
! 1826: /** (Cf. Algorithm 8.7.2 in ACiCNT) **/
! 1827: /** **/
! 1828: /***********************************************************************/
! 1829: extern ulong ucarrecomplet(ulong A);
! 1830: static long squfof_ambig(long a, long B, long dd, GEN D, long *cntamb);
! 1831:
! 1832: #define SQUFOF_BLACKLIST_SZ 64
! 1833:
! 1834: GEN
! 1835: squfof(GEN n, long quiet)
! 1836: {
! 1837: long tf = lgefint(n), nm4, cnt = 0, cntamb;
! 1838: long a1, b1, c1, d1, dd1, L1, a2, b2, c2, d2, dd2, L2, a, q, c, qc, qcb;
! 1839: GEN D1, D2, Q, res;
! 1840: long av = avma;
! 1841: static long blacklist1[SQUFOF_BLACKLIST_SZ], blacklist2[SQUFOF_BLACKLIST_SZ];
! 1842: long blp1 = 0, blp2 = 0;
! 1843: long mydebug = DEBUGLEVEL - quiet;
! 1844: int act1 = 1, act2 = 1;
! 1845:
! 1846: if (cmpis(n,5) <= 0) return NULL; /* input n <= 5 */
! 1847:
! 1848: #ifdef LONG_IS_64BIT
! 1849: if (tf > 3 || (tf == 3 && bfffo(n[2]) < 5)) /* n too large */
! 1850: return NULL;
! 1851: #else /* 32 bits */
! 1852: if (tf > 4 || (tf == 4 && bfffo(n[2]) < 5)) /* n too large */
! 1853: return NULL;
! 1854: #endif
! 1855: /* now we have 5 < n < 2^59 */
! 1856:
! 1857: nm4 = mod4(n);
! 1858: if (!(nm4 & 1)) return gdeux; /* n even */
! 1859:
! 1860: if (nm4 == 1)
! 1861: { /* case n = 1 (mod4): run one iteration on D1 = n, another on D2 = 5n */
! 1862: D1 = n; /* no need to copy */
! 1863: Q = racine(D1); d1 = itos(Q); L1 = itos(racine(Q));
! 1864: dd1 = (d1>>1) + (d1&1); /* rounding trick, see below */
! 1865: b1 = ((d1-1) & (~1UL)) + 1; /* largest odd number not exceeding d1 */
! 1866: c1 = itos(shifti(subii(D1, sqri(stoi(b1))), -2));
! 1867: if (c1 == 0) /* n was a square */
! 1868: {
! 1869: avma = av;
! 1870: res = cgetg(4, t_VEC);
! 1871: res[1] = lstoi(d1); /* factor */
! 1872: res[2] = deux; /* exponent 2 */
! 1873: res[3] = (long)NULL; /* unknown whether prime or composite */
! 1874: return res;
! 1875: }
! 1876: D2 = mulsi(5,n);
! 1877: Q = racine(D2); d2 = itos(Q); L2 = itos(racine(Q));
! 1878: dd2 = (d2>>1) + (d2&1);
! 1879: b2 = ((d2-1) & (~1UL)) + 1; /* b1, b2 will always stay odd */
! 1880: c2 = itos(shifti(subii(D2, sqri(stoi(b2))), -2));
! 1881: if (c2 == 0) /* 5n is a square, caller should avoid this */
! 1882: {
! 1883: avma = av;
! 1884: res = cgetg(4, t_VEC);
! 1885: res[1] = lstoi(d2/5); /* factor */
! 1886: res[2] = deux; /* exponent 2 */
! 1887: res[3] = (long)NULL; /* unknown whether prime or composite */
! 1888: return res;
! 1889: }
! 1890: }
! 1891: else
! 1892: { /* case n = 3 (mod4): run one iteration on D1 = 3n, another on D2 = 4n */
! 1893: D1 = mulsi(3,n);
! 1894: Q = racine(D1); d1 = itos(Q); L1 = itos(racine(Q));
! 1895: dd1 = (d1>>1) + (d1&1);
! 1896: b1 = ((d1-1) & (~1UL)) + 1; /* will always stay odd */
! 1897: c1 = itos(shifti(subii(D1, sqri(stoi(b1))), -2));
! 1898: if (c1 == 0) /* 3n is a square, caller should avoid this */
! 1899: {
! 1900: avma = av;
! 1901: res = cgetg(4, t_VEC);
! 1902: res[1] = lstoi(d1/3); /* factor */
! 1903: res[2] = deux; /* exponent 2 */
! 1904: res[3] = (long)NULL; /* unknown whether prime or composite */
! 1905: return res;
! 1906: }
! 1907: D2 = shifti(n,2);
! 1908: Q = racine(D2); d2 = itos(Q); L2 = itos(racine(Q));
! 1909: dd2 = d2>>1; /* no rounding trick here */
! 1910: b2 = (d2 & (~1UL)); /* largest even below d2, will stay even */
! 1911: c2 = itos(shifti(subii(D2, sqri(stoi(b2))), -2));
! 1912: /* c2 cannot vanish -- n = 3(mod 4) cannot be a square */
! 1913: }
! 1914: a1 = a2 = 1;
! 1915: /* This completes the setup of the two (identity) forms (a1,b1,-c1) and
! 1916: * (a2,b2,-c2).
! 1917: *
! 1918: * Attentive readers will notice that a1 and c1 represent the absolute
! 1919: * values of the a,c coefficients; we keep track of the sign separately,
! 1920: * in fact the sign info is contained in the rightmost bit of the iteration
! 1921: * counter cnt: when cnt is even, c is understood to be negative, else c
! 1922: * is positive and a < 0.
! 1923: *
! 1924: * The quantities dd1, dd2 are used to compute floor((d1+b1)/2) etc., with-
! 1925: * out overflowing the 31bit signed integer size limit, as dd1+floor(b1/2)
! 1926: * etc. This is the "rounding trick" alluded to above.
! 1927: *
! 1928: * L1, L2 are the limits for blacklisting small leading coefficients
! 1929: * on the principal cycle, to guarantee that when we find a square form,
! 1930: * its square root will belong to an ambiguous cycle (i.e. won't be an
! 1931: * earlier form on the principal cycle).
! 1932: *
! 1933: * When n = 3(mod 4), D2 = 12(mod 16), and b^2 is always 0 or 4 mod 16.
! 1934: * It follows that 4*a*c must be 4 or 8 mod 16, respectively, so at most
! 1935: * one of a,c can be divisible by 2 at most to the first power. This fact
! 1936: * is used a couple of times below.
! 1937: *
! 1938: * The flags act1, act2 remain true while the respective cycle is still
! 1939: * active; we drop them to false when we return to the identity form with-
! 1940: * out having found a square form (or when the blacklist overflows, which
! 1941: * shouldn't happen).
! 1942: */
! 1943:
! 1944: if (mydebug >= 4)
! 1945: {
! 1946: fprintferr("SQUFOF: entering main loop with forms\n"
! 1947: "\t(1, %ld, %ld) and (1, %ld, %ld)\n\tof discriminants\n"
! 1948: "\t%Z and %Z, respectively\n",
! 1949: b1, -c1, b2, -c2, D1, D2);
! 1950: flusherr();
! 1951: (void)timer2(); /* clear timer */
! 1952: }
! 1953:
! 1954: /* MAIN LOOP: walk around the principal cycle looking for a square form.
! 1955: * Blacklist small leading coefficients.
! 1956: *
! 1957: * The reduction operator can be computed entirely in 32-bit arithmetic:
! 1958: * Let q = floor(floor((d1+b1)/2)/c1) (when c1>dd1, q=1, which happens
! 1959: * often enough to special-case it). Then the new b1 = (q*c1-b1) + q*c1,
! 1960: * which can be computed without overflowing, and the new c1 equals
! 1961: * a1 - q*(q*c1-b1), where the righthand term is bounded by d1 in abs
! 1962: * size since both the old and the new a1 are positive and bounded by d1.
! 1963: */
! 1964: while (act1 + act2 > 0)
! 1965: {
! 1966: /* send first form through reduction operator if active */
! 1967: if (act1)
! 1968: {
! 1969: c = c1;
! 1970: if (c > dd1)
! 1971: q = 1;
! 1972: else
! 1973: q = (dd1 + (b1>>1)) / c;
! 1974: if (q == 1)
! 1975: {
! 1976: qcb = c - b1; b1 = c + qcb; c1 = a1 - qcb;
! 1977: }
! 1978: else
! 1979: {
! 1980: qc = q*c; qcb = qc - b1; b1 = qc + qcb; c1 = a1 - q*qcb;
! 1981: }
! 1982: a1 = c;
! 1983:
! 1984: if (a1 <= L1) /* blacklist this */
! 1985: {
! 1986: if (blp1 >= SQUFOF_BLACKLIST_SZ)
! 1987: /* blacklist overflows: shouldn't happen */
! 1988: act1 = 0; /* silently */
! 1989: else
! 1990: {
! 1991: if (mydebug >= 6)
! 1992: fprintferr("SQUFOF: blacklisting a = %ld on first cycle\n", a1);
! 1993: blacklist1[blp1++] = a1;
! 1994: }
! 1995: }
! 1996: }
! 1997:
! 1998: /* send second form through reduction operator if active */
! 1999: if (act2)
! 2000: {
! 2001: c = c2;
! 2002: if (c > dd2)
! 2003: q = 1;
! 2004: else
! 2005: q = (dd2 + (b2>>1)) / c;
! 2006: if (q == 1)
! 2007: {
! 2008: qcb = c - b2; b2 = c + qcb; c2 = a2 - qcb;
! 2009: }
! 2010: else
! 2011: {
! 2012: qc = q*c; qcb = qc - b2; b2 = qc + qcb; c2 = a2 - q*qcb;
! 2013: }
! 2014: a2 = c;
! 2015:
! 2016: if (a2 <= L2) /* blacklist this */
! 2017: {
! 2018: if (blp2 >= SQUFOF_BLACKLIST_SZ)
! 2019: /* blacklist overflows: shouldn't happen */
! 2020: act2 = 0; /* silently */
! 2021: else
! 2022: {
! 2023: if (mydebug >= 6)
! 2024: fprintferr("SQUFOF: blacklisting a = %ld on second cycle\n", a2);
! 2025: blacklist2[blp2++] = a2;
! 2026: }
! 2027: }
! 2028: }
! 2029:
! 2030: /* bump counter, loop if this is an odd iteration (i.e. if the real
! 2031: * leading coefficients are negative)
! 2032: */
! 2033: if (++cnt & 1) continue;
! 2034:
! 2035: /* second half of main loop entered only when the leading coefficients
! 2036: * are positive (i.e., during even-numbered iterations)
! 2037: */
! 2038:
! 2039: /* examine first form if active */
! 2040: if (act1 && a1 == 1) /* back to identity form */
! 2041: {
! 2042: act1 = 0; /* drop this discriminant */
! 2043: if (mydebug >= 4)
! 2044: {
! 2045: fprintferr("SQUFOF: first cycle exhausted after %ld iterations,\n"
! 2046: "\tdropping it\n",
! 2047: cnt);
! 2048: flusherr();
! 2049: }
! 2050: }
! 2051: if (act1)
! 2052: {
! 2053: if ((a = ucarrecomplet(a1)) != 0) /* square form? */
! 2054: {
! 2055: if (mydebug >= 4)
! 2056: {
! 2057: fprintferr("SQUFOF: square form (%ld^2, %ld, %ld) on first cycle\n"
! 2058: "\tafter %ld iterations, time = %ld ms\n",
! 2059: a, b1, -c1, cnt, timer2());
! 2060: /* flusherr delayed until we've dealt with it */
! 2061: }
! 2062: /* blacklisted? */
! 2063: if (a <= L1)
! 2064: {
! 2065: int j;
! 2066: for (j = 0; j < blp1; j++)
! 2067: if (a == blacklist1[j]) { a = 0; break; }
! 2068: }
! 2069: if (a > 0) /* not blacklisted */
! 2070: {
! 2071: /* imprimitive form? */
! 2072: q = cgcd(a, b1);
! 2073: if (nm4 == 3 && cgcd(q, 3) > 1) /* paranoia */
! 2074: {
! 2075: avma = av;
! 2076: /* we really possess q^2/3 here, but let the caller sort this
! 2077: * out. His fault for calling us with a multiple of 3. We
! 2078: * cannot claim (q/3)^2 as a known factor here since q might
! 2079: * equal 3, in which case 3 is the correct answer to return.
! 2080: */
! 2081: if (q == 3)
! 2082: {
! 2083: if (mydebug >= 4)
! 2084: {
! 2085: fprintferr("SQUFOF: found factor 3\n");
! 2086: flusherr();
! 2087: }
! 2088: return stoi(3);
! 2089: }
! 2090: else q /= 3; /* and fall through to the next conditional */
! 2091: }
! 2092: if (q > 1) /* q^2 divides D1 and, in fact, n */
! 2093: {
! 2094: avma = av;
! 2095: if (mydebug >= 4)
! 2096: {
! 2097: fprintferr("SQUFOF: found factor %ld^2\n", q);
! 2098: flusherr();
! 2099: }
! 2100: res = cgetg(4, t_VEC);
! 2101: res[1] = lstoi(q); /* factor */
! 2102: res[2] = deux; /* exponent 2 */
! 2103: res[3] = (long)NULL; /* unknown whether prime or composite */
! 2104: return res;
! 2105: }
! 2106:
! 2107: /* chase the inverse root form back along the ambiguous cycle */
! 2108: q = squfof_ambig(a, b1, dd1, D1, &cntamb);
! 2109: if (mydebug >= 6)
! 2110: fprintferr("SQUFOF: squfof_ambig returned %ld\n", q);
! 2111: if (nm4 == 3) q /= cgcd(q, 3);
! 2112:
! 2113: /* return if successful */
! 2114: if (q > 1)
! 2115: {
! 2116: avma = av;
! 2117: if (mydebug >= 4)
! 2118: {
! 2119: fprintferr("SQUFOF: found factor %ld from ambiguous form\n"
! 2120: "\tafter %ld steps on the ambiguous cycle, "
! 2121: "time = %ld ms\n",
! 2122: q, cntamb, timer2());
! 2123: flusherr();
! 2124: }
! 2125: res = stoi(q);
! 2126: return res;
! 2127: }
! 2128: else if (mydebug >= 4) /* nothing found */
! 2129: {
! 2130: fprintferr("SQUFOF: ...found nothing useful on the ambiguous "
! 2131: "cycle\n"
! 2132: "\tafter %ld steps there, time = %ld ms\n",
! 2133: cntamb, timer2());
! 2134: flusherr();
! 2135: }
! 2136: }
! 2137: else if (mydebug >= 4) /* blacklisted */
! 2138: {
! 2139: fprintferr("SQUFOF: ...but the root form seems to be on the "
! 2140: "principal cycle\n");
! 2141: flusherr();
! 2142: }
! 2143: }
! 2144: /* else proceed */
! 2145: }
! 2146:
! 2147: /* examine second form if active */
! 2148: if (act2 && a2 == 1) /* back to identity form */
! 2149: {
! 2150: act2 = 0; /* drop this discriminant */
! 2151: if (mydebug >= 4)
! 2152: {
! 2153: fprintferr("SQUFOF: second cycle exhausted after %ld iterations,\n"
! 2154: "\tdropping it\n",
! 2155: cnt);
! 2156: flusherr();
! 2157: }
! 2158: }
! 2159: if (act2)
! 2160: {
! 2161: if ((a = ucarrecomplet(a2)) != 0) /* square form? */
! 2162: {
! 2163: if (mydebug >= 4)
! 2164: {
! 2165: fprintferr("SQUFOF: square form (%ld^2, %ld, %ld) on second cycle\n"
! 2166: "\tafter %ld iterations, time = %ld ms\n",
! 2167: a, b2, -c2, cnt, timer2());
! 2168: flusherr();
! 2169: }
! 2170: /* blacklisted? */
! 2171: if (a <= L2)
! 2172: {
! 2173: int j;
! 2174: for (j = 0; j < blp2; j++)
! 2175: if (a == blacklist2[j]) { a = 0; break; }
! 2176: }
! 2177: if (a > 0) /* not blacklisted */
! 2178: {
! 2179: /* imprimitive form? */
! 2180: q = cgcd(a, b2);
! 2181: /* NB if b2 is even, a is odd, so the gcd is always odd */
! 2182: if (nm4 == 1 && cgcd(q, 5) > 1) /* paranoia */
! 2183: {
! 2184: avma = av;
! 2185: /* we really possess q^2/5 here, but let the caller sort this
! 2186: * out. His fault for calling us with a multiple of 5. We
! 2187: * cannot claim (q/5)^2 as a known factor here since q might
! 2188: * equal 5, in which case 5 is the correct answer to return.
! 2189: */
! 2190: if (q == 5)
! 2191: {
! 2192: if (mydebug >= 4)
! 2193: {
! 2194: fprintferr("SQUFOF: found factor 5\n");
! 2195: flusherr();
! 2196: }
! 2197: return stoi(5);
! 2198: }
! 2199: else q /= 5; /* and fall through to the next conditional */
! 2200: }
! 2201: if (q > 1) /* q^2 divides D2 */
! 2202: {
! 2203: avma = av;
! 2204: if (mydebug >= 4)
! 2205: {
! 2206: fprintferr("SQUFOF: found factor %ld^2\n", q);
! 2207: flusherr();
! 2208: }
! 2209: res = cgetg(4, t_VEC);
! 2210: res[1] = lstoi(q); /* factor */
! 2211: res[2] = deux; /* exponent 2 */
! 2212: res[3] = (long)NULL; /* unknown whether prime or composite */
! 2213: return res;
! 2214: }
! 2215:
! 2216: /* chase the inverse root form along the ambiguous cycle */
! 2217: q = squfof_ambig(a, b2, dd2, D2, &cntamb);
! 2218: if (mydebug >= 6)
! 2219: fprintferr("SQUFOF: squfof_ambig returned %ld\n", q);
! 2220: if (nm4 == 1) q /= cgcd(q, 5);
! 2221:
! 2222: /* return if successful */
! 2223: if (q > 1)
! 2224: {
! 2225: avma = av;
! 2226: if (mydebug >= 4)
! 2227: {
! 2228: fprintferr("SQUFOF: found factor %ld from ambiguous form\n"
! 2229: "\tafter %ld steps on the ambiguous cycle, "
! 2230: "time = %ld ms\n",
! 2231: q, cntamb, timer2());
! 2232: flusherr();
! 2233: }
! 2234: res = stoi(q);
! 2235: return res;
! 2236: }
! 2237: else if (mydebug >= 4) /* nothing found */
! 2238: {
! 2239: fprintferr("SQUFOF: ...found nothing useful on the ambiguous "
! 2240: "cycle\n"
! 2241: "\tafter %ld steps there, time = %ld ms\n",
! 2242: cntamb, timer2());
! 2243: flusherr();
! 2244: }
! 2245: }
! 2246: else if (mydebug >= 4) /* blacklisted */
! 2247: {
! 2248: fprintferr("SQUFOF: ...but the root form seems to be on the "
! 2249: "principal cycle\n");
! 2250: flusherr();
! 2251: }
! 2252: }
! 2253: /* else proceed */
! 2254: }
! 2255:
! 2256: } /* end main loop */
! 2257:
! 2258: /* when we get here, both discriminants have, alas, turned out to be
! 2259: * useless.
! 2260: */
! 2261: if (mydebug >= 4)
! 2262: {
! 2263: fprintferr("SQUFOF: giving up, time = %ld ms\n", timer2());
! 2264: flusherr();
! 2265: }
! 2266:
! 2267: avma = av;
! 2268: return NULL;
! 2269: }
! 2270:
! 2271: /* The following is invoked to walk back along the ambiguous cycle
! 2272: * until we hit an ambiguous form and thus the desired factor, which
! 2273: * it returns. If it fails for any reason, it returns 0. It doesn't
! 2274: * interfere with timing and diagnostics, which it leaves to squfof().
! 2275: *
! 2276: * Before we invoke this, we've found a form (A, B, -C) with A = a^2,
! 2277: * where a isn't blacklisted and where gcd(a, B) = 1. According to
! 2278: * ACiCANT, we should now proceed reducing the form (a, -B, -aC), but
! 2279: * it is easy to show that the first reduction step always sends this
! 2280: * to (-aC, B, a), and the next one, with q computed as usual from B
! 2281: * and a (occupying the c position), gives a reduced form, whose third
! 2282: * member is easiest to recover by going back to D. From this point
! 2283: * onwards, we're once again working with single-word numbers.
! 2284: * NB here there is no need to track signs, we just work with the abs
! 2285: * values of the coefficients.
! 2286: */
! 2287: static
! 2288: long
! 2289: squfof_ambig(long a, long B, long dd, GEN D, long *cntamb)
! 2290: {
! 2291: long b, c, q, qc, qcb, av = avma;
! 2292: long a0, b0, b1, c0;
! 2293:
! 2294: q = (dd + (B>>1)) / a; qc = q*a; qcb = qc - B;
! 2295: b = qc + qcb;
! 2296: c = itos(divis(shifti(subii(D, sqri(stoi(b))), -2), a));
! 2297: #ifdef DEBUG_SQUFOF
! 2298: fprintferr("SQUFOF: ambigous cycle of discriminant %Z\n", D);
! 2299: fprintferr("SQUFOF: Form on ambigous cycle (%ld, %ld, %ld)\n",
! 2300: a, b, c);
! 2301: #endif
! 2302:
! 2303: avma = av; /* no further stack operations follow */
! 2304: *cntamb = 0; /* count reduction steps on the cycle */
! 2305: a0 = a; b0 = b1 = b; /* end of loop detection and safeguard */
! 2306:
! 2307: for (;;) /* reduced cycles are finite */
! 2308: {
! 2309: /* reduction step */
! 2310: c0 = c;
! 2311: if (c0 > dd)
! 2312: q = 1;
! 2313: else
! 2314: q = (dd + (b>>1)) / c0;
! 2315: if (q == 1)
! 2316: {
! 2317: qcb = c0 - b; b = c0 + qcb; c = a - qcb;
! 2318: }
! 2319: else
! 2320: {
! 2321: qc = q*c0; qcb = qc - b; b = qc + qcb; c = a - q*qcb;
! 2322: }
! 2323: a = c0;
! 2324:
! 2325: (*cntamb)++;
! 2326:
! 2327: /* check whether we're done */
! 2328: if (b == b1) return (a&1 ? a : a>>1);
! 2329:
! 2330: /* safeguard against infinite loop: recognize when we've walked
! 2331: * around the entire cycle in vain. (I don't think this can
! 2332: * actually happen -- exercise. But better safe than sorry.)
! 2333: */
! 2334: if (b == b0 && a == a0) return 0;
! 2335:
! 2336: /* prepare for next iteration */
! 2337: b1 = b;
! 2338: }
! 2339: /* NOT REACHED */
! 2340: return 0;
! 2341: }
! 2342:
! 2343: /***********************************************************************/
! 2344: /** **/
! 2345: /** DETECTING ODD POWERS **/
! 2346: /** Factoring engines like MPQS which ultimately rely on computing **/
! 2347: /** gcd(N, x^2-y^2) to find a nontrivial factor of N are fundamen- **/
! 2348: /** tally incapable of splitting a proper power of an odd prime, **/
! 2349: /** because of the cyclicity of the prime residue class group. We **/
! 2350: /** already have a square-detection function carrecomplet(), which **/
! 2351: /** also returns the square root if appropriate. Here's an analogue **/
! 2352: /** for cubes, fifth and 7th powers. 11th powers are a non-issue so **/
! 2353: /** long as mpqs() gives up beyond 100 decimal digits (since ECM **/
! 2354: /** easily finds a 10-digit prime factor of a 100-digit number). **/
! 2355: /** GN1998Jun28 **/
! 2356: /** **/
! 2357: /***********************************************************************/
! 2358:
! 2359: /* Use a multistage sieve. First stages work mod 211, 209, 61, 203;
! 2360: * if the argument is larger than a word, we first reduce mod the product
! 2361: * of these and then take the remainder apart. Second stages use 117,
! 2362: * 31, 43, 71 in this order. Moduli which are no longer interesting are
! 2363: * skipped. Everything is encoded in a single table of 106 24-bit masks.
! 2364: * We only need the first half of the residues. Three bits per modulus
! 2365: * indicate which residues are 7th (bit 2), 5th (bit 1) powers or cubes
! 2366: * (bit 0); the eight moduli above are assigned right-to-left. The table
! 2367: * will err on the side of safety if one of the moduli divides the number
! 2368: * to be tested, but as this leads to inefficiency it should still be
! 2369: * avoided.
! 2370: */
! 2371:
! 2372: static ulong powersmod[106] = {
! 2373: 077777777ul, /* 0 */
! 2374: 077777777ul, /* 1 */
! 2375: 013562440ul, /* 2 */
! 2376: 012462540ul, /* 3 */
! 2377: 013562440ul, /* 4 */
! 2378: 052662441ul, /* 5 */
! 2379: 016663440ul, /* 6 */
! 2380: 016463450ul, /* 7 */
! 2381: 013573551ul, /* 8 */
! 2382: 012462540ul, /* 9 */
! 2383: 012462464ul, /* 10 */
! 2384: 013462771ul, /* 11 */
! 2385: 012466473ul, /* 12 */
! 2386: 012463641ul, /* 13 */
! 2387: 052463646ul, /* 14 */
! 2388: 012563446ul, /* 15 */
! 2389: 013762440ul, /* 16 */
! 2390: 052766440ul, /* 17 */
! 2391: 012772451ul, /* 18 */
! 2392: 012762454ul, /* 19 */
! 2393: 032763550ul, /* 20 */
! 2394: 013763664ul, /* 21 */
! 2395: 017763460ul, /* 22 */
! 2396: 037762565ul, /* 23 */
! 2397: 017762540ul, /* 24 */
! 2398: 057762441ul, /* 25 */
! 2399: 037772452ul, /* 26 */
! 2400: 017773551ul, /* 27 */
! 2401: 017767541ul, /* 28 */
! 2402: 017767640ul, /* 29 */
! 2403: 037766450ul, /* 30 */
! 2404: 017762752ul, /* 31 */
! 2405: 037762762ul, /* 32 */
! 2406: 017762742ul, /* 33 */
! 2407: 037763762ul, /* 34 */
! 2408: 017763740ul, /* 35 */
! 2409: 077763740ul, /* 36 */
! 2410: 077762750ul, /* 37 */
! 2411: 077762752ul, /* 38 */
! 2412: 077762750ul, /* 39 */
! 2413: 077762743ul, /* 40 */
! 2414: 077767740ul, /* 41 */
! 2415: 077763741ul, /* 42 */
! 2416: 077763762ul, /* 43 */
! 2417: 077772760ul, /* 44 */
! 2418: 077762770ul, /* 45 */
! 2419: 077766750ul, /* 46 */
! 2420: 077762740ul, /* 47 */
! 2421: 077763740ul, /* 48 */
! 2422: 077763750ul, /* 49 */
! 2423: 077763752ul, /* 50 */
! 2424: 077762740ul, /* 51 */
! 2425: 077762740ul, /* 52 */
! 2426: 077772740ul, /* 53 */
! 2427: 077762762ul, /* 54 */
! 2428: 077763765ul, /* 55 */
! 2429: 077763770ul, /* 56 */
! 2430: 077767750ul, /* 57 */
! 2431: 077766753ul, /* 58 */
! 2432: 077776740ul, /* 59 */
! 2433: 077772741ul, /* 60 */
! 2434: 077772744ul, /* 61 */
! 2435: 077773740ul, /* 62 */
! 2436: 077773743ul, /* 63 */
! 2437: 077773751ul, /* 64 */
! 2438: 077772771ul, /* 65 */
! 2439: 077772760ul, /* 66 */
! 2440: 077772763ul, /* 67 */
! 2441: 077772751ul, /* 68 */
! 2442: 077773750ul, /* 69 */
! 2443: 077777740ul, /* 70 */
! 2444: 077773745ul, /* 71 */
! 2445: 077772740ul, /* 72 */
! 2446: 077772742ul, /* 73 */
! 2447: 077772744ul, /* 74 */
! 2448: 077776750ul, /* 75 */
! 2449: 077773771ul, /* 76 */
! 2450: 077773774ul, /* 77 */
! 2451: 077773760ul, /* 78 */
! 2452: 077772741ul, /* 79 */
! 2453: 077772740ul, /* 80 */
! 2454: 077772740ul, /* 81 */
! 2455: 077772741ul, /* 82 */
! 2456: 077773754ul, /* 83 */
! 2457: 077773750ul, /* 84 */
! 2458: 077773740ul, /* 85 */
! 2459: 077776741ul, /* 86 */
! 2460: 077776771ul, /* 87 */
! 2461: 077776773ul, /* 88 */
! 2462: 077772761ul, /* 89 */
! 2463: 077773741ul, /* 90 */
! 2464: 077773740ul, /* 91 */
! 2465: 077773740ul, /* 92 */
! 2466: 077772740ul, /* 93 */
! 2467: 077772752ul, /* 94 */
! 2468: 077772750ul, /* 95 */
! 2469: 077772751ul, /* 96 */
! 2470: 077773741ul, /* 97 */
! 2471: 077773761ul, /* 98 */
! 2472: 077777760ul, /* 99 */
! 2473: 077772765ul, /* 100 */
! 2474: 077772742ul, /* 101 */
! 2475: 077777751ul, /* 102 */
! 2476: 077777750ul, /* 103 */
! 2477: 077777745ul, /* 104 */
! 2478: 077777770ul /* 105 */
! 2479: };
! 2480:
! 2481: /* Returns 3, 5, or 7 if x is a cube (but not a 5th or 7th power), a 5th
! 2482: * power (but not a 7th), or a 7th power, and in this case creates the
! 2483: * base on the stack and assigns its address to *pt. Otherwise returns 0.
! 2484: * x must be of type t_INT and nonzero; this is not checked. The *mask
! 2485: * argument tells us which things to check -- bit 0: 3rd, bit 1: 5th,
! 2486: * bit 2: 7th pwr; set a bit to have the corresponding power examined --
! 2487: * and is updated appropriately for a possible follow-up call
! 2488: */
! 2489:
! 2490: long /* no longer static -- used in mpqs.c */
! 2491: is_odd_power(GEN x, GEN *pt, long *mask)
! 2492: {
! 2493: long av=avma, tetpil, lgx=lgefint(x), exponent=0, residue, resbyte;
! 2494: GEN y;
! 2495:
! 2496: *mask &= 7; /* paranoia */
! 2497: if (!*mask) return 0; /* useful when running in a loop */
! 2498: if (signe(x) < 0) x=absi(x);
! 2499:
! 2500: if (DEBUGLEVEL >= 5)
! 2501: {
! 2502: fprintferr("OddPwrs: is %Z\n\t...a", x);
! 2503: if (*mask&1) fprintferr(" 3rd%s",
! 2504: (*mask==7?",":(*mask!=1?" or":"")));
! 2505: if (*mask&2) fprintferr(" 5th%s",
! 2506: (*mask==7?", or":(*mask&4?" or":"")));
! 2507: if (*mask&4) fprintferr(" 7th");
! 2508: fprintferr(" power?\n");
! 2509: }
! 2510: if (lgx > 3) residue = smodis(x, 211*209*61*203);
! 2511: else residue = x[2];
! 2512:
! 2513: resbyte=residue%211; if (resbyte > 105) resbyte = 211 - resbyte;
! 2514: *mask &= powersmod[resbyte];
! 2515: if (DEBUGLEVEL >= 5)
! 2516: {
! 2517: fprintferr("\tmodulo: resid. (remaining possibilities)\n");
! 2518: fprintferr("\t 211: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 2519: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 2520: }
! 2521: if (!*mask) { avma=av; return 0; }
! 2522:
! 2523: if (*mask & 3)
! 2524: {
! 2525: resbyte=residue%209; if (resbyte > 104) resbyte = 209 - resbyte;
! 2526: *mask &= (powersmod[resbyte] >> 3);
! 2527: if (DEBUGLEVEL >= 5)
! 2528: fprintferr("\t 209: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 2529: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 2530: if (!*mask) { avma=av; return 0; }
! 2531: }
! 2532: if (*mask & 3)
! 2533: {
! 2534: resbyte=residue%61; if (resbyte > 30) resbyte = 61 - resbyte;
! 2535: *mask &= (powersmod[resbyte] >> 6);
! 2536: if (DEBUGLEVEL >= 5)
! 2537: fprintferr("\t 61: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 2538: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 2539: if (!*mask) { avma=av; return 0; }
! 2540: }
! 2541: if (*mask & 5)
! 2542: {
! 2543: resbyte=residue%203; if (resbyte > 101) resbyte = 203 - resbyte;
! 2544: *mask &= (powersmod[resbyte] >> 9);
! 2545: if (DEBUGLEVEL >= 5)
! 2546: fprintferr("\t 203: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 2547: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 2548: if (!*mask) { avma=av; return 0; }
! 2549: }
! 2550:
! 2551: if (lgx > 3) residue = smodis(x, 117*31*43*71);
! 2552: else residue = x[2];
! 2553:
! 2554: if (*mask & 1)
! 2555: {
! 2556: resbyte=residue%117; if (resbyte > 58) resbyte = 117 - resbyte;
! 2557: *mask &= (powersmod[resbyte] >> 12);
! 2558: if (DEBUGLEVEL >= 5)
! 2559: fprintferr("\t 117: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 2560: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 2561: if (!*mask) { avma=av; return 0; }
! 2562: }
! 2563: if (*mask & 3)
! 2564: {
! 2565: resbyte=residue%31; if (resbyte > 15) resbyte = 31 - resbyte;
! 2566: *mask &= (powersmod[resbyte] >> 15);
! 2567: if (DEBUGLEVEL >= 5)
! 2568: fprintferr("\t 31: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 2569: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 2570: if (!*mask) { avma=av; return 0; }
! 2571: }
! 2572: if (*mask & 5)
! 2573: {
! 2574: resbyte=residue%43; if (resbyte > 21) resbyte = 43 - resbyte;
! 2575: *mask &= (powersmod[resbyte] >> 18);
! 2576: if (DEBUGLEVEL >= 5)
! 2577: fprintferr("\t 43: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 2578: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 2579: if (!*mask) { avma=av; return 0; }
! 2580: }
! 2581: if (*mask & 6)
! 2582: {
! 2583: resbyte=residue%71; if (resbyte > 35) resbyte = 71 - resbyte;
! 2584: *mask &= (powersmod[resbyte] >> 21);
! 2585: if (DEBUGLEVEL >= 5)
! 2586: fprintferr("\t 71: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
! 2587: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
! 2588: if (!*mask) { avma=av; return 0; }
! 2589: }
! 2590:
! 2591: /* priority to higher powers -- if we have a 21st, it'll be easier to
! 2592: * rediscover that its 7th root is a cube than that its cube root is
! 2593: * a 7th power
! 2594: */
! 2595: if ((resbyte = *mask & 4)) /* assignment */
! 2596: exponent = 7;
! 2597: else if ((resbyte = *mask & 2))
! 2598: exponent = 5;
! 2599: else
! 2600: { resbyte = 1; exponent = 3; }
! 2601: /* leave that mask bit on for the moment, we might need it for a
! 2602: * subsequent call
! 2603: */
! 2604:
! 2605: /* precision in the following is one extra significant word (overkill) */
! 2606: y=ground(gpow(x, ginv(stoi(exponent)), lgx));
! 2607: if (!egalii(gpowgs(y, exponent), x))
! 2608: {
! 2609: if (DEBUGLEVEL >= 5)
! 2610: {
! 2611: if (exponent == 3)
! 2612: fprintferr("\tBut it nevertheless wasn't a cube.\n");
! 2613: else
! 2614: fprintferr("\tBut it nevertheless wasn't a %ldth power.\n",
! 2615: exponent);
! 2616: }
! 2617: *mask &= ~resbyte; /* _now_ turn the bit off */
! 2618: avma=av; return 0;
! 2619: }
! 2620: /* caller (ifac_crack() below) will report the final result if it was
! 2621: * a pure power, so no further diagnostics here
! 2622: */
! 2623: tetpil=avma;
! 2624: if (!pt) { avma=av; return exponent; } /* this branch not used */
! 2625: *pt=gerepile(av,tetpil,icopy(y));
! 2626: return exponent;
! 2627: }
! 2628:
! 2629: /***********************************************************************/
! 2630: /** **/
! 2631: /** FACTORIZATION (master iteration) **/
! 2632: /** Driver for the various methods of finding large factors **/
! 2633: /** (after trial division has cast out the very small ones). **/
! 2634: /** GN1998Jun24--30 **/
! 2635: /** **/
! 2636: /***********************************************************************/
! 2637:
! 2638: /** Direct use:
! 2639: ** ifac_start() registers a number (without prime factors < 100)
! 2640: ** with the iterative factorizer, and also registers whether or
! 2641: ** not we should terminate early if we find that the number is
! 2642: ** not squarefree, and a hint about which method(s) to use. This
! 2643: ** must always be called first. The input _must_ have been checked
! 2644: ** to be composite by the caller. The routine immediately tries
! 2645: ** to decompose it nontrivially into a product of two factors,
! 2646: ** except in squarefreeness (`Moebius') mode.
! 2647: ** ifac_primary_factor() returns a prime divisor (not necessarily
! 2648: ** the smallest) and the corresponding exponent. */
! 2649:
! 2650: /** Encapsulated user interface:
! 2651: ** ifac_decomp() does the right thing for auxdecomp() (put a succession
! 2652: ** of prime divisor / exponent pairs onto the stack, not necessarily
! 2653: ** sorted, although in practice they will tend not to be too far from
! 2654: ** the correct order).
! 2655: **
! 2656: ** For each of the additive/multiplicative arithmetic functions, there is
! 2657: ** a `contributor' below, to be called on any large composite cofactor
! 2658: ** left over after trial division by small primes, whose result can then
! 2659: ** be added to or multiplied with whatever we already have:
! 2660: ** ifac_moebius() ifac_issquarefree() ifac_totient() ifac_omega()
! 2661: ** ifac_bigomega() ifac_numdiv() ifac_sumdiv() ifac_sumdivk() */
! 2662:
! 2663: /* We never test whether the input number is prime or composite, since
! 2664: * presumably it will have come out of the small factors finder stage
! 2665: * (which doesn't really exist yet but which will test the left-over
! 2666: * cofactor for primality once it does).
! 2667: */
! 2668: /* The data structure in which we preserve whatever we know at any given
! 2669: * time about our number N is kept on the PARI stack, and updated as needed.
! 2670: * This makes the machinery re-entrant (you can have more than one fac-
! 2671: * torization using ifac_start()/ifac_primary_factor() in progress simul-
! 2672: * taneously so long as you preserve the GEN across garbage collections),
! 2673: * and which avoids memory leaks when a lengthy factorization is interrupted.
! 2674: * We also make an effort to keep the whole affair connected, and the parent
! 2675: * object will always be older than its children. This may in rare cases
! 2676: * lead to some extra copying around, and knowing what is garbage at any
! 2677: * given time is not entirely trivial. See below for examples how to do
! 2678: * it right. (Connectedness can be destroyed if callers of ifac_main()
! 2679: * create other stuff on the stack in between calls. This is harmless
! 2680: * as long as ifac_realloc() is used to re-create a connected object at
! 2681: * the head of the stack just before collecting garbage.)
! 2682: */
! 2683: /* Note that a PARI integer can have hundreds of millions of distinct prime
! 2684: * factors larger than 2^16, given enough memory. And since there's no
! 2685: * guarantee that we will find factors in order of increasing size, we must
! 2686: * be prepared to drag a very large amount of data around (although this
! 2687: * will _very_ rarely happen for random input!). So we start with a small
! 2688: * structure and extend it when necessary.
! 2689: */
! 2690: /* The idea of data structure and algorithm is:
! 2691: * Let N0 be whatever is currently left of N after dividing off all the
! 2692: * prime powers we have already returned to the caller. Then we maintain
! 2693: * N0 as a product
! 2694: * (1) N0 = \prod_i P_i^{e_i} * \prod_j Q_j^{f_j} * \prod_k C_k^{g_k}
! 2695: * where the P_i and Q_j are distinct primes, each C_k is known composite,
! 2696: * none of the P_i divides any C_k, and we also know the total ordering
! 2697: * of all the P_i, Q_j and C_k (in particular, we will never try to divide
! 2698: * a C_k by a larger Q_j). Some of the C_k may have common factors, although
! 2699: * this will not often be the case.
! 2700: */
! 2701: /* Caveat implementor: Taking gcds among C_k's is very likely to cost at
! 2702: * least as much time as dividing off any primes as we find them, and book-
! 2703: * keeping would be a nightmare (since D=gcd(C_1,C_2) can still have common
! 2704: * factors with both C_1/D and C_2/D, and so on...).
! 2705: */
! 2706: /* At startup, we just initialize the structure to
! 2707: * (2) N = C_1^1 (composite).
! 2708: */
! 2709: /* Whenever ifac_primary_factor() or ifac_decomp() (or, mutatis mutandis,
! 2710: * one of the three arithmetic user interface routines) needs a primary
! 2711: * factor, and the smallest thing in our list is P_1, we return that and
! 2712: * its exponent, and remove it from our list.
! 2713: * (When nothing is left, we return a sentinel value -- gun. And in Moebius
! 2714: * mode, when we see something with exponent > 1, whether prime or composite,
! 2715: * we yell at our caller by returning gzero or 0, depending on the function).
! 2716: * In all other cases, ifac_main() iterates the following steps until we have
! 2717: * a P_1 in the smallest position.
! 2718: */
! 2719: /* When the smallest item is C_1 (as it is initially):
! 2720: * (3.1) Crack C_1 into a nontrivial product U_1 * U_2 by whatever method
! 2721: * comes to mind for this size. (U for `unknown'.) Cracking will detect
! 2722: * squares (and biquadrates etc), and it may detect odd powers, so we
! 2723: * might instead see a power of some U_1 here, or even something of the form
! 2724: * U_1^k*U_2^k. (Of course the exponent already attached to C_1 is taken
! 2725: * into account in the following.)
! 2726: * (3.2) If we have U_1*U_2, sort the two factors; convert to U_1^2 if they
! 2727: * happen to be equal (which they shouldn't -- squares should have been
! 2728: * caught at the preceding stage). Note that U_1 and (if it exists) U_2
! 2729: * are automatically smaller than anything else in our list.
! 2730: * (3.3) Check U_1 (and U_2) for primality, and flag them accordingly.
! 2731: * (3.4) Iterate.
! 2732: */
! 2733: /* When the smallest item is Q_1:
! 2734: * This is the potentially unpleasant case. The idea is to go through the
! 2735: * entire list and try to divide Q_1 off each of the current C_k's, which
! 2736: * will usually fail, but may succeed several times. When a division was
! 2737: * successful, the corresponding C_k is removed from our list, and the co-
! 2738: * factor becomes a U_l for the moment unless it is 1 (which happens when
! 2739: * C_k was a power of Q_1). When we're through we upgrade Q_1 to P_1 status,
! 2740: * and then do a primality check on each U_l and sort it back into the list
! 2741: * either as a Q_j or as a C_k. If during the insertion sort we discover
! 2742: * that some U_l equals some P_i or Q_j or C_k we already have, we just add
! 2743: * U_l's exponent to that of its twin. (The sorting should therefore happen
! 2744: * before the primality test).
! 2745: * Note that this may produce one or more elements smaller than the P_1
! 2746: * we just confirmed, so we may have to repeat the iteration.
! 2747: */
! 2748: /* There's a little trick that avoids some Q_1 instances. Just after we do
! 2749: * a sweep to classify all current unknowns as either composites or primes,
! 2750: * we do another downward sweep beginning with the largest current factor
! 2751: * and stopping just above the largest current composite. Every Q_j we
! 2752: * pass is turned into a P_i. (Different primes are automatically coprime
! 2753: * among each other, and primes tend not to divide smaller composites.)
! 2754: */
! 2755: /* (We have no use for comparing the square of a prime to N0. Normally
! 2756: * we will get called after casting out only the smallest primes, and
! 2757: * since we cannot guarantee that we see the large prime factors in as-
! 2758: * cending order, we cannot stop when we find one larger than sqrt(N0).)
! 2759: */
! 2760: /* Data structure: We keep everything in a single t_VEC of t_INTs. The
! 2761: * first component records whether we're doing full (NULL) or Moebius (un)
! 2762: * factorization; in the latter case many subroutines return a sentinel
! 2763: * value as soon as they spot an exponent > 1. The second component records
! 2764: * the hint from factorint()'s optional flag, for use by ifac_crack().
! 2765: * The remaining components (initially 15) are used in groups of three:
! 2766: * a GEN pointer at the t_INT value of the factor, a pointer at the t_INT
! 2767: * exponent (usually gun or gdeux so we don't clutter up the stack too
! 2768: * much), and another t_INT GEN pointer to record the class of the factor:
! 2769: * NULL for unknown, zero for known composite C_k, un for known prime Q_j
! 2770: * awaiting trial division, and deux for finished prime P_i.
! 2771: */
! 2772: /* When during the division stage we re-sort a C_k-turned-U_l to a lower
! 2773: * position, we rotate any intervening material upward towards its old
! 2774: * slot. When a C_k was divided down to 1, its slot is left empty at
! 2775: * first; similarly when the re-sorting detects a repeated factor.
! 2776: * After the sorting phase, we de-fragment the list and squeeze all the
! 2777: * occupied slots together to the high end, so that ifac_crack() has room
! 2778: * for new factors. When this doesn't suffice, we abandon the current
! 2779: * vector and allocate a somewhat larger one, defragmenting again during
! 2780: * copying.
! 2781: */
! 2782: /* (For internal use, note that all exponents will fit into C longs, given
! 2783: * PARI's lgefint field size. When we work with them, we sometimes read
! 2784: * out the GEN pointer, and sometimes do an itos, whatever is more con-
! 2785: * venient for the task at hand.)
! 2786: */
! 2787:
! 2788: /*** Overview and forward declarations: ***/
! 2789:
! 2790: /* The `*where' argument in the following points into *partial at the
! 2791: * first of the three fields of the first occupied slot. It's there
! 2792: * because the caller would already know where `here' is, so we don't
! 2793: * want to search for it again, although it wouldn't take much time.
! 2794: * On the other hand, we do not preserve this from one user-interface
! 2795: * call to the next.
! 2796: */
! 2797:
! 2798: static GEN
! 2799: ifac_find(GEN *partial, GEN *where);
! 2800: /* Return GEN pointing at the first nonempty slot strictly behind the
! 2801: * current *where, or NULL if such doesn't exist. Can be used to skip
! 2802: * a range of vacant slots, or to initialize *where in the first place
! 2803: * (pass partial in both args). Does not modify its argument pointers.
! 2804: */
! 2805:
! 2806: void
! 2807: ifac_realloc(GEN *partial, GEN *where, long new_lg);
! 2808: /* Move to a larger main vector, updating *where if it points into it.
! 2809: * Certainly updates *partial. Can be used as a specialized gcopy before
! 2810: * a gerepileupto()/gerepilemanysp() (pass 0 as the new length).
! 2811: * Normally, one would pass new_lg=1 to let this function guess the
! 2812: * new size. To be used sparingly.
! 2813: */
! 2814:
! 2815: static long
! 2816: ifac_crack(GEN *partial, GEN *where);
! 2817: /* Split the first (composite) entry. There _must_ already be room for
! 2818: * another factor below *where, and *where will be updated. Factor and
! 2819: * cofactor will be inserted in the correct order, updating *where, or
! 2820: * factor^k will be inserted if such should be the case (leaving *where
! 2821: * unchanged). The factor or factors will be set to unknown, and inherit
! 2822: * the exponent (or a multiple thereof) of its/their ancestor. Returns
! 2823: * number of factors written into the structure (normally 2, but 1 if a
! 2824: * factor equalled its cofactor, and may be more than 1 if a factoring
! 2825: * engine returned a vector of factors instead of a single factor). Can
! 2826: * reallocate the data structure in the vector-of-factors case (but not
! 2827: * in the more common single-factor case)
! 2828: */
! 2829:
! 2830: static long
! 2831: ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec);
! 2832: /* Gets called to complete ifac_crack()'s job when a factoring engine
! 2833: * splits the current factor into a product of three or more new factors.
! 2834: * Makes room for them if necessary, sorts them, gives them the right
! 2835: * exponents and class etc. Also returns the number of factors actually
! 2836: * written, which may be less than the number of components in facvec
! 2837: * if there are duplicates.--- Vectors of factors (cf pollardbrent()
! 2838: * above) actually contain `slots' of three GENs per factor with the
! 2839: * three fields being interpreted exactly as in our partial factorization
! 2840: * data structure. Thus `engines' can tell us what they already happen to
! 2841: * know about factors being prime or composite and/or appearing to a power
! 2842: * larger than the first
! 2843: */
! 2844:
! 2845: static long
! 2846: ifac_divide(GEN *partial, GEN *where);
! 2847: /* Divide all current composites by first (prime, class Q) entry, updating
! 2848: * its exponent, and turning it into a finished prime (class P). Return 1
! 2849: * if any such divisions succeeded (in Moebius mode, the update may then
! 2850: * not have been completed), or 0 if none of them succeeded. Doesn't
! 2851: * modify *where.
! 2852: */
! 2853:
! 2854: static long
! 2855: ifac_sort_one(GEN *partial, GEN *where, GEN washere);
! 2856: /* re-sort one (typically unknown) entry from washere to a new position,
! 2857: * rotating intervening entries upward to fill the vacant space. It may
! 2858: * happen (rarely) that the new position is the same as the old one, or
! 2859: * that the new value of the entry coincides with a value already occupying
! 2860: * a lower slot, in which latter case we just add exponents (and use the
! 2861: * `more known' class, and return 1 immediately when in Moebius mode).
! 2862: * The slots between *where and washere must be in sorted order, so a
! 2863: * sweep using this to re-sort several unknowns must proceed upward (see
! 2864: * ifac_resort() below). Return 1 if we see an exponent > 1 (in Moebius
! 2865: * mode without completing the update), 0 otherwise.
! 2866: */
! 2867:
! 2868: static long
! 2869: ifac_resort(GEN *partial, GEN *where);
! 2870: /* sort all current unknowns downward to where they belong. Sweeps
! 2871: * in the upward direction. Not needed after ifac_crack(), only when
! 2872: * ifac_divide() returned true. May update *where. Returns 1 when an
! 2873: * ifac_sort_one() call does so to indicate a repeated factor, or 0 if
! 2874: * any and all such calls returned 0
! 2875: */
! 2876:
! 2877: static void
! 2878: ifac_defrag(GEN *partial, GEN *where);
! 2879: /* defragment: collect and squeeze out any unoccupied slots above *where
! 2880: * during a downward sweep. Unoccupied slots arise when a composite factor
! 2881: * dissolves completely whilst dividing off a prime, or when ifac_resort()
! 2882: * spots a coincidence and merges two factors. *where will be updated
! 2883: */
! 2884:
! 2885: static void
! 2886: ifac_whoiswho(GEN *partial, GEN *where, long after_crack);
! 2887: /* determine primality or compositeness of all current unknowns, and set
! 2888: * class Q primes to finished (class P) if everything larger is already
! 2889: * known to be prime. When after_crack is nonnegative, only look at the
! 2890: * first after_crack things in the list (do nothing when it's zero)
! 2891: */
! 2892:
! 2893: static GEN
! 2894: ifac_main(GEN *partial);
! 2895: /* main loop: iterate until smallest entry is a finished prime; returns
! 2896: * a `where' pointer, or gun if nothing left, or gzero in Moebius mode if
! 2897: * we aren't squarefree
! 2898: */
! 2899:
! 2900: /* NB In the most common cases, control flows from the user interface to
! 2901: * ifac_main() and then to a succession of ifac_crack()s and ifac_divide()s,
! 2902: * with (typically) none of the latter finding anything.
! 2903: */
! 2904:
! 2905: /** user interface: **/
! 2906: /* return initial data structure, see ifac_crack() below for semantics
! 2907: * of the hint argument
! 2908: */
! 2909: GEN
! 2910: ifac_start(GEN n, long moebius, long hint);
! 2911:
! 2912: /* run main loop until primary factor is found, return the prime and
! 2913: * assign the exponent. If nothing left, return gun and set exponent
! 2914: * to 0; if in Moebius mode and a square factor is discovered, return
! 2915: * gzero and set exponent to 0
! 2916: */
! 2917: GEN
! 2918: ifac_primary_factor(GEN *partial, long *exponent);
! 2919:
! 2920: /* call ifac_start() and run main loop until factorization is complete,
! 2921: * accumulating prime / exponent pairs on the PARI stack to be picked up
! 2922: * by aux_end(). Return number of distinct primes found
! 2923: */
! 2924: long
! 2925: ifac_decomp(GEN n, long hint);
! 2926:
! 2927: /* completely encapsulated functions; these call ifac_start() themselves,
! 2928: * and ensure proper stack housekeeping etc. Call them on any large
! 2929: * composite left over after trial division, and multiply/add the result
! 2930: * onto whatever you already have from the small factors. Don't call
! 2931: * them on large primes; they will run into trouble
! 2932: */
! 2933: long
! 2934: ifac_moebius(GEN n, long hint);
! 2935:
! 2936: long
! 2937: ifac_issquarefree(GEN n, long hint);
! 2938:
! 2939: long
! 2940: ifac_omega(GEN n, long hint);
! 2941:
! 2942: long
! 2943: ifac_bigomega(GEN n, long hint);
! 2944:
! 2945: GEN
! 2946: ifac_totient(GEN n, long hint); /* for gp's eulerphi() */
! 2947:
! 2948: GEN
! 2949: ifac_numdiv(GEN n, long hint);
! 2950:
! 2951: GEN
! 2952: ifac_sumdiv(GEN n, long hint);
! 2953:
! 2954: GEN
! 2955: ifac_sumdivk(GEN n, long k, long hint);
! 2956:
! 2957: /*** implementation ***/
! 2958:
! 2959: #define ifac_initial_length 24 /* codeword, moebius flag, hint, 7 slots */
! 2960: /* (more than enough in most cases -- a 512-bit product of distinct 8-bit
! 2961: * primes needs at most 7 slots at a time)
! 2962: */
! 2963:
! 2964: GEN
! 2965: ifac_start(GEN n, long moebius, long hint)
! 2966: {
! 2967: GEN part, here;
! 2968:
! 2969: if (typ(n) != t_INT) err(typeer, "ifac_start");
! 2970: if (signe(n) == 0)
! 2971: err(talker, "factoring 0 in ifac_start");
! 2972:
! 2973: part = cgetg(ifac_initial_length, t_VEC);
! 2974: here = part + ifac_initial_length;
! 2975: part[1] = moebius? un : (long)NULL;
! 2976: switch(hint)
! 2977: {
! 2978: case 0:
! 2979: part[2] = zero; break;
! 2980: case 1:
! 2981: part[2] = un; break;
! 2982: case 2:
! 2983: part[2] = deux; break;
! 2984: default:
! 2985: part[2] = (long)stoi(hint);
! 2986: }
! 2987: if (isonstack(n))
! 2988: n = absi(n);
! 2989: /* make copy, because we'll later want to mpdivis() into it in place.
! 2990: * If it's not on stack, then we assume it is a clone made for us by
! 2991: * auxdecomp0(), and we assume the sign has already been set positive
! 2992: */
! 2993: /* fill first slot at the top end */
! 2994: *--here = zero; /* initially composite */
! 2995: *--here = un; /* initial exponent 1 */
! 2996: *--here = (long) n;
! 2997: /* and NULL out the remaining slots */
! 2998: while (here > part + 3) *--here = (long)NULL;
! 2999: return part;
! 3000: }
! 3001:
! 3002: static GEN
! 3003: ifac_find(GEN *partial, GEN *where)
! 3004: {
! 3005: long lgp = lg(*partial);
! 3006: GEN end = *partial + lgp;
! 3007: GEN scan = *where + 3;
! 3008:
! 3009: if (DEBUGLEVEL >= 5)
! 3010: {
! 3011: if (!*partial || typ(*partial) != t_VEC)
! 3012: err(typeer, "ifac_find");
! 3013: if (lg(*partial) < ifac_initial_length)
! 3014: err(talker, "partial impossibly short in ifac_find");
! 3015: if (!(*where) ||
! 3016: *where > *partial + lgp - 3 ||
! 3017: *where < *partial) /* sic */
! 3018: err(talker, "`*where\' out of bounds in ifac_find");
! 3019: }
! 3020: while (scan < end && !*scan) scan += 3;
! 3021: /* paranoia -- check completely NULLed ? nope -- we never inspect the
! 3022: * exponent field for deciding whether a slot is empty or occupied
! 3023: */
! 3024: if (scan < end)
! 3025: {
! 3026: if (DEBUGLEVEL >= 5)
! 3027: {
! 3028: if (!scan[1])
! 3029: err(talker, "factor has NULL exponent in ifac_find");
! 3030: }
! 3031: return scan;
! 3032: }
! 3033: return NULL;
! 3034: }
! 3035:
! 3036: /* simple defragmenter */
! 3037: static void
! 3038: ifac_defrag(GEN *partial, GEN *where)
! 3039: {
! 3040: long lgp = lg(*partial);
! 3041: GEN scan_new = *partial + lgp - 3, scan_old = scan_new;
! 3042:
! 3043: while (scan_old >= *where)
! 3044: {
! 3045: if (*scan_old) /* slot occupied? */
! 3046: {
! 3047: if (scan_old < scan_new)
! 3048: {
! 3049: scan_new[2] = scan_old[2];
! 3050: scan_new[1] = scan_old[1];
! 3051: *scan_new = *scan_old;
! 3052: }
! 3053: scan_new -= 3; /* point at next slot to be written */
! 3054: }
! 3055: scan_old -= 3;
! 3056: }
! 3057: scan_new += 3; /* back up to last slot written */
! 3058: *where = scan_new;
! 3059: while (scan_new > *partial + 3)
! 3060: *--scan_new = (long)NULL; /* erase junk */
! 3061: }
! 3062:
! 3063: /* and complex version combined with reallocation. If new_lg is 0, we
! 3064: * use the old length, so this acts just like gcopy except that the where
! 3065: * pointer is carried along; if it is 1, we make an educated guess.
! 3066: * Exception: If new_lg is 0, the vector is full to the brim, and the
! 3067: * first entry is composite, we make it longer to avoid being called again
! 3068: * a microsecond later (at significant cost).
! 3069: * It is safe to call this with NULL for the where argument; if it doesn't
! 3070: * point anywhere within the old structure, it will be left alone
! 3071: */
! 3072: void
! 3073: ifac_realloc(GEN *partial, GEN *where, long new_lg)
! 3074: {
! 3075: long old_lg = lg(*partial);
! 3076: GEN newpart, scan_new, scan_old;
! 3077:
! 3078: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
! 3079: {
! 3080: if (!*partial || typ(*partial) != t_VEC)
! 3081: err(typeer, "ifac_realloc");
! 3082: if (lg(*partial) < ifac_initial_length)
! 3083: err(talker, "partial impossibly short in ifac_realloc");
! 3084: }
! 3085:
! 3086: if (new_lg == 1)
! 3087: new_lg = 2*old_lg - 6; /* from 7 slots to 13 to 25... */
! 3088: else if (new_lg <= old_lg) /* includes case new_lg == 0 */
! 3089: {
! 3090: new_lg = old_lg;
! 3091: if ((*partial)[3] && /* structure full */
! 3092: ((*partial)[5]==zero || (*partial)[5]==(long)NULL))
! 3093: /* and first entry composite or unknown */
! 3094: new_lg += 6; /* give it a little more breathing space */
! 3095: }
! 3096: newpart = cgetg(new_lg, t_VEC);
! 3097: if (DEBUGMEM >= 3)
! 3098: {
! 3099: fprintferr("IFAC: new partial factorization structure (%ld slots)\n",
! 3100: (new_lg - 3)/3);
! 3101: flusherr();
! 3102: }
! 3103: newpart[1] = (*partial)[1]; /* moebius */
! 3104: newpart[2] = (*partial)[2]; /* hint */
! 3105: /* downward sweep through the old *partial, picking up where1 and carry-
! 3106: * ing it over if and when we pass it. (This will only be useful if
! 3107: * it pointed at a non-empty slot.) Factors are licopy()d so that we
! 3108: * again have a nice object (parent older than children, connected),
! 3109: * except the one factor that may still be living in a clone where n
! 3110: * originally was; exponents are similarly copied if they aren't global
! 3111: * constants; class-of-factor fields are always global constants so we
! 3112: * need only copy them as pointers. Caller may then do a gerepileupto()
! 3113: * or a gerepilemanysp()
! 3114: */
! 3115: scan_new = newpart + new_lg - 3;
! 3116: scan_old = *partial + old_lg - 3;
! 3117: for (; scan_old > *partial + 2; scan_old -= 3)
! 3118: {
! 3119: if (*where == scan_old) *where = scan_new;
! 3120: if (!*scan_old) continue; /* skip empty slots */
! 3121:
! 3122: *scan_new =
! 3123: isonstack((GEN)(*scan_old)) ?
! 3124: licopy((GEN)(*scan_old)) : *scan_old;
! 3125: scan_new[1] =
! 3126: isonstack((GEN)(scan_old[1])) ?
! 3127: licopy((GEN)(scan_old[1])) : scan_old[1];
! 3128: scan_new[2] = scan_old[2];
! 3129: scan_new -= 3;
! 3130: }
! 3131: scan_new += 3; /* back up to last slot written */
! 3132: while (scan_new > newpart + 3)
! 3133: *--scan_new = (long)NULL;
! 3134: *partial = newpart;
! 3135: }
! 3136:
! 3137: #define moebius_mode ((*partial)[1])
! 3138:
! 3139: /* Bubble-sort-of-thing sort. Won't be exercised frequently,
! 3140: * so this is ok.
! 3141: */
! 3142: static long
! 3143: ifac_sort_one(GEN *partial, GEN *where, GEN washere)
! 3144: {
! 3145: GEN scan = washere - 3;
! 3146: GEN value, exponent, class0, class1;
! 3147: long cmp_res;
! 3148:
! 3149: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
! 3150: {
! 3151: long lgp;
! 3152: if (!*partial || typ(*partial) != t_VEC)
! 3153: err(typeer, "ifac_sort_one");
! 3154: if ((lgp = lg(*partial)) < ifac_initial_length)
! 3155: err(talker, "partial impossibly short in ifac_sort_one");
! 3156: if (!(*where) ||
! 3157: *where < *partial + 3 ||
! 3158: *where > *partial + lgp - 3)
! 3159: err(talker, "`*where\' out of bounds in ifac_sort_one");
! 3160: if (!washere ||
! 3161: washere < *where ||
! 3162: washere > *partial + lgp - 3)
! 3163: err(talker, "`washere\' out of bounds in ifac_sort_one");
! 3164: }
! 3165: value = (GEN)(*washere);
! 3166: exponent = (GEN)(washere[1]);
! 3167: if (exponent != gun && moebius_mode && cmpsi(1,exponent) < 0)
! 3168: return 1; /* should have been detected by caller */
! 3169: class0 = (GEN)(washere[2]);
! 3170:
! 3171: if (scan < *where) return 0; /* nothing to do, washere==*where */
! 3172:
! 3173: cmp_res = -1; /* sentinel */
! 3174: while (scan >= *where) /* therefore at least once */
! 3175: {
! 3176: if (*scan) /* current slot nonempty */
! 3177: {
! 3178: /* check against where */
! 3179: cmp_res = cmpii(value, (GEN)(*scan));
! 3180: if (cmp_res >= 0) break; /* have found where to stop */
! 3181: }
! 3182: /* copy current slot upward by one position and move pointers down */
! 3183: scan[5] = scan[2];
! 3184: scan[4] = scan[1];
! 3185: scan[3] = *scan;
! 3186: scan -= 3;
! 3187: }
! 3188: scan += 3;
! 3189: /* at this point there are the following possibilities:
! 3190: * (*) cmp_res == -1. Either value is less than that at *where, or for
! 3191: * some reason *where was pointing at one or more vacant slots and any
! 3192: * factors we saw en route were larger than value. At any rate,
! 3193: * scan == *where now, and scan is pointing at an empty slot, into
! 3194: * which we'll stash our entry.
! 3195: * (*) cmp_res == 0. The entry at scan-3 is the one, we compare class0
! 3196: * fields and add exponents, and put it all into the vacated scan slot,
! 3197: * NULLing the one at scan-3 (and possibly updating *where).
! 3198: * (*) cmp_res == 1. The slot at scan is the one to store our entry
! 3199: * into.
! 3200: */
! 3201: if (cmp_res != 0)
! 3202: {
! 3203: if (cmp_res < 0 && scan != *where)
! 3204: err(talker, "misaligned partial detected in ifac_sort_one");
! 3205: *scan = (long)value;
! 3206: scan[1] = (long)exponent;
! 3207: scan[2] = (long)class0;
! 3208: return 0;
! 3209: }
! 3210: /* case cmp_res == 0: repeated factor detected */
! 3211: if (DEBUGLEVEL >= 4)
! 3212: {
! 3213: fprintferr("IFAC: repeated factor %Z\n\tdetected in ifac_sort_one\n",
! 3214: value);
! 3215: flusherr();
! 3216: }
! 3217: if (moebius_mode) return 1; /* not squarefree */
! 3218: /* if old class0 was composite and new is prime, or vice versa,
! 3219: * complain (and if one class0 was unknown and the other wasn't,
! 3220: * use the known one)
! 3221: */
! 3222: class1 = (GEN)(scan[-1]);
! 3223: if (class0) /* should never be used */
! 3224: {
! 3225: if(class1)
! 3226: {
! 3227: if (class0 == gzero && class1 != gzero)
! 3228: err(talker, "composite equals prime in ifac_sort_one");
! 3229: else if (class0 != gzero && class1 == gzero)
! 3230: err(talker, "prime equals composite in ifac_sort_one");
! 3231: else if (class0 == gdeux) /* should happen even less */
! 3232: scan[2] = (long)class0; /* use it */
! 3233: }
! 3234: else /* shouldn't happen either */
! 3235: scan[2] = (long)class0; /* use it */
! 3236: }
! 3237: /* else stay with the existing known class0 */
! 3238: scan[2] = (long)class1;
! 3239: /* in any case, add exponents */
! 3240: if (scan[-2] == un && exponent == gun)
! 3241: scan[1] = deux;
! 3242: else
! 3243: scan[1] = laddii((GEN)(scan[-2]), exponent);
! 3244: /* move the value over */
! 3245: *scan = scan[-3];
! 3246: /* null out the vacated slot below */
! 3247: *--scan = (long)NULL;
! 3248: *--scan = (long)NULL;
! 3249: *--scan = (long)NULL;
! 3250: /* finally, see whether *where should be pulled in */
! 3251: if (scan == *where) *where += 3;
! 3252: return 0;
! 3253: }
! 3254:
! 3255: /* the following loop around the former doesn't need to check moebius_mode
! 3256: * because ifac_sort_one() never returns 1 in normal mode
! 3257: */
! 3258: static long
! 3259: ifac_resort(GEN *partial, GEN *where)
! 3260: {
! 3261: long lgp = lg(*partial), res = 0;
! 3262: GEN scan = *where;
! 3263:
! 3264: for (; scan < *partial + lgp; scan += 3)
! 3265: {
! 3266: if (*scan && /* slot occupied */
! 3267: !scan[2]) /* with an unknown */
! 3268: {
! 3269: res |= ifac_sort_one(partial, where, scan);
! 3270: if (res) return res; /* early exit */
! 3271: }
! 3272: }
! 3273: return res;
! 3274: }
! 3275:
! 3276: /* sweep downward so we can with luck turn some Qs into Ps */
! 3277: static void
! 3278: ifac_whoiswho(GEN *partial, GEN *where, long after_crack)
! 3279: {
! 3280: long lgp = lg(*partial), larger_compos = 0;
! 3281: GEN scan, scan_end = *partial + lgp - 3;
! 3282:
! 3283: if (DEBUGLEVEL >= 5)
! 3284: {
! 3285: if (!*partial || typ(*partial) != t_VEC)
! 3286: err(typeer, "ifac_whoiswho");
! 3287: if (lg(*partial) < ifac_initial_length)
! 3288: err(talker, "partial impossibly short in ifac_whoiswho");
! 3289: if (!(*where) ||
! 3290: *where > scan_end ||
! 3291: *where < *partial + 3)
! 3292: err(talker, "`*where\' out of bounds in ifac_whoiswho");
! 3293: }
! 3294:
! 3295: if (after_crack == 0) return;
! 3296: if (after_crack > 0)
! 3297: {
! 3298: larger_compos = 1; /* disable Q-to-P trick */
! 3299: scan = *where + 3*(after_crack - 1);
! 3300: /* check at most after_crack entries */
! 3301: if (scan > scan_end) /* ooops... */
! 3302: {
! 3303: err(warner, "avoiding nonexistent factors in ifac_whoiswho");
! 3304: scan = scan_end;
! 3305: }
! 3306: }
! 3307: else { larger_compos = 0; scan = scan_end; }
! 3308:
! 3309: for (; scan >= *where; scan -= 3)
! 3310: {
! 3311: if (scan[2]) /* known class of factor */
! 3312: {
! 3313: if (scan[2] == zero) larger_compos = 1;
! 3314: else if (!larger_compos && scan[2] == un)
! 3315: {
! 3316: if (DEBUGLEVEL >= 3)
! 3317: {
! 3318: fprintferr("IFAC: factor %Z\n\tis prime (no larger composite)\n",
! 3319: **where);
! 3320: fprintferr("IFAC: prime %Z\n\tappears with exponent = %ld\n",
! 3321: **where, itos((GEN)(*where)[1]));
! 3322: }
! 3323: scan[2] = deux;
! 3324: } /* no else case */
! 3325: continue;
! 3326: }
! 3327: scan[2] =
! 3328: (isprime((GEN)(*scan)) ?
! 3329: (larger_compos ? un : deux) : /* un- or finished prime */
! 3330: zero); /* composite */
! 3331:
! 3332: if (scan[2] == zero) larger_compos = 1;
! 3333: if (DEBUGLEVEL >= 3)
! 3334: {
! 3335: fprintferr("IFAC: factor %Z\n\tis %s\n", *scan,
! 3336: (scan[2] == zero ? "composite" : "prime"));
! 3337: }
! 3338: }
! 3339: }
! 3340:
! 3341: /* Here we normally do not check that the first entry is a not-finished
! 3342: * prime. Stack management: we may allocate a new exponent
! 3343: */
! 3344: static long
! 3345: ifac_divide(GEN *partial, GEN *where)
! 3346: {
! 3347: long lgp = lg(*partial);
! 3348: GEN scan = *where + 3;
! 3349: long res = 0, exponent, newexp, otherexp;
! 3350:
! 3351: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
! 3352: {
! 3353: if (!*partial || typ(*partial) != t_VEC)
! 3354: err(typeer, "ifac_divide");
! 3355: if (lg(*partial) < ifac_initial_length)
! 3356: err(talker, "partial impossibly short in ifac_divide");
! 3357: if (!(*where) ||
! 3358: *where > *partial + lgp - 3 ||
! 3359: *where < *partial + 3)
! 3360: err(talker, "`*where\' out of bounds in ifac_divide");
! 3361: if ((*where)[2] != un)
! 3362: err(talker, "division by composite or finished prime in ifac_divide");
! 3363: }
! 3364: if (!(**where)) /* always test just this one */
! 3365: err(talker, "division by nothing in ifac_divide");
! 3366:
! 3367: newexp = exponent = itos((GEN)((*where)[1]));
! 3368: if (exponent > 1 && moebius_mode) return 1;
! 3369: /* should've been caught by caller already */
! 3370:
! 3371: /* go for it */
! 3372: for (; scan < *partial + lgp; scan += 3)
! 3373: {
! 3374: if (scan[2] != zero) continue; /* the other thing ain't composite */
! 3375: otherexp = 0;
! 3376: /* let mpdivis divide the other factor in place to keep stack clutter
! 3377: minimal */
! 3378: while (mpdivis((GEN)(*scan), (GEN)(**where), (GEN)(*scan)))
! 3379: {
! 3380: if (moebius_mode) return 1; /* immediately */
! 3381: if (!otherexp) otherexp = itos((GEN)(scan[1]));
! 3382: newexp += otherexp;
! 3383: }
! 3384: if (newexp > exponent) /* did anything happen? */
! 3385: {
! 3386: (*where)[1] = (newexp == 2 ? deux : (long)(stoi(newexp)));
! 3387: exponent = newexp;
! 3388: if (is_pm1((GEN)(*scan))) /* factor dissolved completely */
! 3389: {
! 3390: *scan = scan[1] = (long)NULL;
! 3391: if (DEBUGLEVEL >= 4)
! 3392: fprintferr("IFAC: a factor was a power of another prime factor\n");
! 3393: }
! 3394: else if (DEBUGLEVEL >= 4)
! 3395: {
! 3396: fprintferr("IFAC: a factor was divisible by another prime factor,\n");
! 3397: fprintferr("\tleaving a cofactor = %Z\n", *scan);
! 3398: }
! 3399: scan[2] = (long)NULL; /* at any rate it's Unknown now */
! 3400: res = 1;
! 3401: if (DEBUGLEVEL >= 5)
! 3402: {
! 3403: fprintferr("IFAC: prime %Z\n\tappears at least to the power %ld\n",
! 3404: **where, newexp);
! 3405: }
! 3406: }
! 3407: } /* for */
! 3408: (*where)[2] = deux; /* make it a finished prime */
! 3409: if (DEBUGLEVEL >= 3)
! 3410: {
! 3411: fprintferr("IFAC: prime %Z\n\tappears with exponent = %ld\n",
! 3412: **where, newexp);
! 3413: }
! 3414: return res;
! 3415: }
! 3416:
! 3417:
! 3418: GEN mpqs(GEN N); /* in src/modules/mpqs.c, maybe a dummy,
! 3419: * returns a factor, or a vector of factors,
! 3420: * or NULL
! 3421: */
! 3422:
! 3423: /* The following takes the place of 2.0.9.alpha's find_factor(). */
! 3424:
! 3425: /* The meaning of the hint changes against 2.0.9.alpha to:
! 3426: * hint == 0 : Use our own strategy, and this should be the default
! 3427: * hint & 1 : Avoid mpqs(), use ellfacteur() after pollardbrent()
! 3428: * hint & 2 : Avoid first-stage ellfacteur() in favour of mpqs()
! 3429: * (which may still fall back to ellfacteur() if mpqs() is not installed
! 3430: * or gives up)
! 3431: * hint & 4 : Avoid even the pollardbrent() and squfof() stages
! 3432: * hint & 8 : Avoid final ellfacteur(); this may `declare' a composite
! 3433: * to be prime.
! 3434: */
! 3435:
! 3436: /* stack housekeeping: this routine may create one or more objects (a new
! 3437: * factor, or possibly several, and perhaps one or more new exponents > 2)
! 3438: * Added squfof --GN2000Oct01
! 3439: */
! 3440: static long
! 3441: ifac_crack(GEN *partial, GEN *where)
! 3442: {
! 3443: long hint, cmp_res, exp1 = 1, exp2 = 1, av;
! 3444: GEN factor = NULL, exponent;
! 3445:
! 3446: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
! 3447: {
! 3448: long lgp;
! 3449: if (!*partial || typ(*partial) != t_VEC)
! 3450: err(typeer, "ifac_crack");
! 3451: if ((lgp = lg(*partial)) < ifac_initial_length)
! 3452: err(talker, "partial impossibly short in ifac_crack");
! 3453: if (!(*where) ||
! 3454: *where < *partial + 6 || /* sic -- caller must realloc first */
! 3455: *where > *partial + lgp - 3)
! 3456: err(talker, "`*where\' out of bounds in ifac_crack");
! 3457: if (!(**where) || typ((GEN)(**where)) != t_INT)
! 3458: err(typeer, "ifac_crack");
! 3459: if ((*where)[2] != zero)
! 3460: err(talker, "operand not known composite in ifac_crack");
! 3461: }
! 3462: hint = itos((GEN)((*partial)[2])) & 15;
! 3463: exponent = (GEN)((*where)[1]);
! 3464:
! 3465: if (DEBUGLEVEL >= 3)
! 3466: fprintferr("IFAC: cracking composite\n\t%Z\n", **where);
! 3467:
! 3468: /* crack squares. Quite fast due to the initial square residue test */
! 3469: if (DEBUGLEVEL >= 4)
! 3470: fprintferr("IFAC: checking for pure square\n");
! 3471: av = avma;
! 3472: while (carrecomplet((GEN)(**where), &factor))
! 3473: {
! 3474: if (DEBUGLEVEL >= 4)
! 3475: fprintferr("IFAC: found %Z =\n\t%Z ^2\n", **where, factor);
! 3476: affii(factor, (GEN)(**where)); avma = av; factor = NULL;
! 3477: if (exponent == gun)
! 3478: (*where)[1] = deux;
! 3479: else if (exponent == gdeux)
! 3480: { (*where)[1] = (long)stoi(4); av = avma; }
! 3481: else
! 3482: { affii(shifti(exponent, 1), (GEN)((*where)[1])); avma = av; }
! 3483: exponent = (GEN)((*where)[1]);
! 3484: if (moebius_mode) return 0; /* no need to carry on... */
! 3485: exp1 = 2;
! 3486: } /* while carrecomplet */
! 3487:
! 3488: /* check whether our composite hasn't become prime */
! 3489: if (exp1 > 1 && hint != 15 && isprime((GEN)(**where)))
! 3490: {
! 3491: (*where)[2] = un;
! 3492: if (DEBUGLEVEL >= 4)
! 3493: {
! 3494: fprintferr("IFAC: factor %Z\n\tis prime\n",**where);
! 3495: flusherr();
! 3496: }
! 3497: return 0; /* bypass subsequent ifac_whoiswho() call */
! 3498: }
! 3499: /* still composite -- carry on */
! 3500:
! 3501: /* MPQS cannot factor prime powers; check for cubes/5th/7th powers.
! 3502: * Do this even if MPQS is blocked by hint -- it still serves a useful
! 3503: * purpose in bounded factorization
! 3504: */
! 3505: {
! 3506: long mask = 7;
! 3507: if (DEBUGLEVEL == 4)
! 3508: fprintferr("IFAC: checking for odd power\n");
! 3509: /* (At debug levels > 4, is_odd_power() itself prints something more
! 3510: * informative)
! 3511: */
! 3512: av = avma;
! 3513: while ((exp1 = /* assignment */
! 3514: is_odd_power((GEN)(**where), &factor, &mask)))
! 3515: {
! 3516: if (exp2 == 1) exp2 = exp1; /* remember this after the loop */
! 3517: if (DEBUGLEVEL >= 4)
! 3518: fprintferr("IFAC: found %Z =\n\t%Z ^%ld\n", **where, factor, exp1);
! 3519: affii(factor, (GEN)(**where)); avma = av; factor = NULL;
! 3520: if (exponent == gun)
! 3521: { (*where)[1] = (long)stoi(exp1); av = avma; }
! 3522: else if (exponent == gdeux)
! 3523: { (*where)[1] = (long)stoi(exp1<<1); av = avma; }
! 3524: else
! 3525: { affii(mulsi(exp1, exponent), (GEN)((*where)[1])); avma = av; }
! 3526: exponent = (GEN)((*where)[1]);
! 3527: if (moebius_mode) return 0; /* no need to carry on... */
! 3528: } /* while is_odd_power */
! 3529:
! 3530: if (exp2 > 1 && hint != 15 && isprime((GEN)(**where)))
! 3531: { /* Something nice has happened and our composite has become prime */
! 3532: (*where)[2] = un;
! 3533: if (DEBUGLEVEL >= 4)
! 3534: {
! 3535: fprintferr("IFAC: factor %Z\n\tis prime\n", **where);
! 3536: flusherr();
! 3537: }
! 3538: return 0; /* bypass subsequent ifac_whoiswho() call */
! 3539: }
! 3540: } /* odd power stage */
! 3541:
! 3542: /* pollardbrent() Rho usually gets a first chance */
! 3543: if (!(hint & 4))
! 3544: {
! 3545: if (DEBUGLEVEL >= 4)
! 3546: fprintferr("IFAC: trying Pollard-Brent rho method first\n");
! 3547: factor = pollardbrent((GEN)(**where));
! 3548: } /* Rho stage */
! 3549:
! 3550: /* Shanks' squfof() is next. It will pass up the chance silently when
! 3551: * the input number is too large. We put this under the same governing
! 3552: * bit of the hint parameter, for no very good reason other than avoiding
! 3553: * a proliferation of further meaningful bits in all the wrong order.
! 3554: */
! 3555: if (!factor && !(hint & 4))
! 3556: {
! 3557: if (DEBUGLEVEL >= 4)
! 3558: fprintferr("IFAC: trying Shanks' SQUFOF, will fail silently if input\n"
! 3559: " is too large for it.\n");
! 3560: factor = squfof((GEN)(**where), 0); /* allow squfof's own diagnostics */
! 3561: }
! 3562:
! 3563: /* if this didn't work, try one of our high-power beasties */
! 3564: if (!factor && !(hint & 2))
! 3565: {
! 3566: if (DEBUGLEVEL >= 4)
! 3567: fprintferr("IFAC: trying Lenstra-Montgomery ECM\n");
! 3568: factor = ellfacteur((GEN)(**where), 0); /* do not insist */
! 3569: } /* First ECM stage */
! 3570:
! 3571: if (!factor && !(hint & 1))
! 3572: {
! 3573: if (DEBUGLEVEL >= 4)
! 3574: fprintferr("IFAC: trying Multi-Polynomial Quadratic Sieve\n");
! 3575: factor = mpqs((GEN)(**where));
! 3576: } /* MPQS stage */
! 3577:
! 3578: if (!factor)
! 3579: {
! 3580: if (!(hint & 8)) /* still no luck? force it */
! 3581: {
! 3582: if (DEBUGLEVEL >= 4)
! 3583: fprintferr("IFAC: forcing ECM, may take some time\n");
! 3584: factor = ellfacteur((GEN)(**where), 1);
! 3585: } /* final ECM stage, guaranteed to succeed */
! 3586: else /* limited factorization */
! 3587: {
! 3588: if (DEBUGLEVEL >= 2)
! 3589: {
! 3590: err(warner, "IFAC: unfactored composite declared prime");
! 3591: /* don't print it out at level 3 or above, where it would appear
! 3592: * several times before and after this message already
! 3593: */
! 3594: if (DEBUGLEVEL == 2)
! 3595: {
! 3596: fprintferr("\t%Z\n",**where);
! 3597: flusherr();
! 3598: }
! 3599: }
! 3600: (*where)[2] = un; /* might as well trial-divide by it... */
! 3601: return 1;
! 3602: }
! 3603: } /* Final ECM stage */
! 3604:
! 3605: if (DEBUGLEVEL >= 1)
! 3606: {
! 3607: if (!factor) /* never reached */
! 3608: err(talker, "all available factoring methods failed in ifac_crack");
! 3609: }
! 3610: if (typ(factor) == t_VEC) /* delegate this case */
! 3611: return ifac_insert_multiplet(partial, where, factor);
! 3612:
! 3613: else if (typ(factor) != t_INT)
! 3614: {
! 3615: fprintferr("IFAC: factorizer returned strange object to ifac_crack\n");
! 3616: outerr(factor);
! 3617: err(bugparier, "factoring");
! 3618: }
! 3619:
! 3620: /* got single integer back: work out the cofactor (in place) */
! 3621: if (!mpdivis((GEN)(**where), factor, (GEN)(**where)))
! 3622: {
! 3623: fprintferr("IFAC: factoring %Z\n", **where);
! 3624: fprintferr("\tyielded `factor\' %Z\n\twhich isn't!\n", factor);
! 3625: err(bugparier, "factoring");
! 3626: }
! 3627:
! 3628: /* the factoring engines report the factor found when DEBUGLEVEL is
! 3629: * large enough; let's tell about the cofactor
! 3630: */
! 3631: if (DEBUGLEVEL >= 4)
! 3632: fprintferr("IFAC: cofactor = %Z\n", **where);
! 3633:
! 3634: /* ok, now `factor' is one factor and **where is the other, find out
! 3635: * which is larger
! 3636: */
! 3637: cmp_res = cmpii(factor, (GEN)(**where));
! 3638: if (cmp_res < 0) /* common case */
! 3639: {
! 3640: (*where)[2] = (long)NULL; /* mark cofactor `unknown' */
! 3641: (*where)[-1] = (long)NULL; /* mark factor `unknown' */
! 3642: (*where)[-2] =
! 3643: isonstack(exponent) ? licopy(exponent) : (long)exponent;
! 3644: *where -= 3;
! 3645: **where = (long)factor;
! 3646: return 2;
! 3647: }
! 3648: else if (cmp_res == 0) /* hep, split a square in the middle */
! 3649: {
! 3650: err(warner,
! 3651: "square not found by carrecomplet, ifac_crack recovering");
! 3652: cgiv(factor);
! 3653: (*where)[2] = (long)NULL; /* mark the sqrt `unknown' */
! 3654: if (exponent == gun) /* double the exponent */
! 3655: (*where)[1] = deux;
! 3656: else if (exponent == gdeux)
! 3657: (*where)[1] = (long)stoi(4); /* make a new one */
! 3658: else /* overwrite old exponent */
! 3659: {
! 3660: av = avma;
! 3661: affii(shifti(exponent, 1), (GEN)((*where)[1]));
! 3662: avma = av;
! 3663: /* leave *where unchanged */
! 3664: }
! 3665: if (moebius_mode) return 0;
! 3666: return 1;
! 3667: }
! 3668: else /* factor > cofactor, rearrange */
! 3669: {
! 3670: (*where)[2] = (long)NULL; /* mark factor `unknown' */
! 3671: (*where)[-1] = (long)NULL; /* mark cofactor `unknown' */
! 3672: (*where)[-2] =
! 3673: isonstack(exponent) ? licopy(exponent) : (long)exponent;
! 3674: *where -= 3;
! 3675: **where = (*where)[3]; /* move cofactor pointer to lowest slot */
! 3676: (*where)[3] = (long)factor; /* save factor */
! 3677: return 2;
! 3678: }
! 3679: }
! 3680:
! 3681: /* the following doesn't collect garbage; caller's caller should do it
! 3682: * (which means ifac_main()). No diagnostics either, the factoring engine
! 3683: * should have printed what it found when DEBUGLEVEL>=4 or so. Note facvec
! 3684: * contains slots of three components per factor; repeated factors are
! 3685: * expressly allowed (and their classes shouldn't contradict each other
! 3686: * whereas their exponents will be added up)
! 3687: */
! 3688: static long
! 3689: ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec)
! 3690: {
! 3691: long j,k=1, lfv=lg(facvec)-1, nf=lfv/3, room=(long)(*where-*partial);
! 3692: /* one of the factors will go into the *where slot, so room is now
! 3693: * 3 times the number of slots we can use
! 3694: */
! 3695: long needroom = lfv - room;
! 3696: GEN sorted, auxvec = cgetg(nf+1, t_VEC), factor;
! 3697: long exponent = itos((GEN)((*where)[1])); /* the old exponent */
! 3698: GEN newexp;
! 3699:
! 3700: if (DEBUGLEVEL >= 5)
! 3701: fprintferr("IFAC: incorporating set of %ld factor(s)%s\n",
! 3702: nf, (DEBUGLEVEL >=6 ? "..." : ""));
! 3703: /* fixed: squfof may return a single, squared, factor as a set
! 3704: * --GN2000Oct01
! 3705: */
! 3706: if (needroom > 0)
! 3707: ifac_realloc(partial, where, lg(*partial) + needroom + 3);
! 3708: /* one extra slot for paranoia, errm, future use */
! 3709:
! 3710: /* create sort permutation from the values of the factors */
! 3711: for (j=nf; j; j--) auxvec[j] = facvec[3*j-2]; /* just the pointers */
! 3712: sorted = sindexsort(auxvec);
! 3713: /* and readjust the result for the triple spacing */
! 3714: for (j=nf; j; j--) sorted[j] = 3*sorted[j]-2;
! 3715: if (DEBUGLEVEL >= 6)
! 3716: fprintferr("\tsorted them...\n");
! 3717:
! 3718: /* store factors, beginning at *where, and catching any duplicates */
! 3719: **where = facvec[sorted[nf]];
! 3720: if ((newexp = (GEN)(facvec[sorted[nf]+1])) != gun) /* new exponent > 1 */
! 3721: {
! 3722: if (exponent == 1)
! 3723: (*where)[1] = isonstack(newexp) ? licopy(newexp) : (long)newexp;
! 3724: else
! 3725: (*where)[1] = lmulsi(exponent, newexp);
! 3726: } /* if new exponent is 1, the old exponent already in place will do */
! 3727: (*where)[2] = facvec[sorted[nf]+2]; /* copy class */
! 3728: if (DEBUGLEVEL >= 6)
! 3729: fprintferr("\tstored (largest) factor no. %ld...\n", nf);
! 3730:
! 3731: for (j=nf-1; j; j--)
! 3732: {
! 3733: factor = (GEN)(facvec[sorted[j]]);
! 3734: if (egalii(factor, (GEN)(**where)))
! 3735: {
! 3736: if (DEBUGLEVEL >= 6)
! 3737: fprintferr("\tfactor no. %ld is a duplicate%s\n",
! 3738: j, (j>1 ? "..." : ""));
! 3739: /* update exponent, ignore class which would already have been set,
! 3740: * and then forget current factor
! 3741: */
! 3742: if ((newexp = (GEN)(facvec[sorted[j]+1])) != gun) /* new exp > 1 */
! 3743: { /* now we have at least 3 */
! 3744: (*where)[1] = laddii((GEN)((*where)[1]),
! 3745: mulsi(exponent, newexp));
! 3746: }
! 3747: else
! 3748: {
! 3749: if ((*where)[1] == un && exponent == 1)
! 3750: (*where)[1] = deux;
! 3751: else
! 3752: (*where)[1] = laddsi(exponent, (GEN)((*where)[1]));
! 3753: /* not safe to add 1 in place -- that might overwrite gdeux,
! 3754: * with `interesting' consequences
! 3755: */
! 3756: }
! 3757: if (moebius_mode) return 0; /* stop now, but with exponent updated */
! 3758: continue;
! 3759: }
! 3760: (*where)[-1] = facvec[sorted[j]+2]; /* class as given */
! 3761: if ((newexp = (GEN)(facvec[sorted[j]+1])) != gun) /* new exp > 1 */
! 3762: {
! 3763: if (exponent == 1 && newexp == gdeux)
! 3764: (*where)[-2] = deux;
! 3765: else /* exponent*newexp > 2 */
! 3766: (*where)[-2] = lmulsi(exponent, newexp);
! 3767: }
! 3768: else
! 3769: {
! 3770: (*where)[-2] = (exponent == 1 ? un :
! 3771: (exponent == 2 ? deux :
! 3772: (long)stoi(exponent))); /* inherit parent's exponent */
! 3773: }
! 3774: (*where)[-3] = isonstack(factor) ? licopy(factor) : (long)factor;
! 3775: /* keep components younger than *partial */
! 3776: *where -= 3;
! 3777: k++;
! 3778: if (DEBUGLEVEL >= 6)
! 3779: fprintferr("\tfactor no. %ld was unique%s\n",
! 3780: j, (j>1 ? " (so far)..." : ""));
! 3781: }
! 3782: /* make the `sorted' object safe for garbage collection (probably not a
! 3783: * problem, since it should be in the garbage zone from everybody's
! 3784: * perspective, but it's easy to do it)
! 3785: */
! 3786: *sorted = evaltyp(t_INT) | evallg(nf+1);
! 3787: return k;
! 3788: }
! 3789:
! 3790: static GEN
! 3791: ifac_main(GEN *partial)
! 3792: {
! 3793: /* leave the basic error checking to ifac_find() */
! 3794: GEN here = ifac_find(partial, partial);
! 3795: long res, nf;
! 3796:
! 3797: /* if nothing left, return gun */
! 3798: if (!here) return gun;
! 3799:
! 3800: /* if we are in Moebius mode and have already detected a repeated factor,
! 3801: * stop right here. Shouldn't really happen
! 3802: */
! 3803: if (moebius_mode && here[1] != un)
! 3804: {
! 3805: if (DEBUGLEVEL >= 3)
! 3806: {
! 3807: fprintferr("IFAC: main loop: repeated old factor\n\t%Z\n", *here);
! 3808: flusherr();
! 3809: }
! 3810: return gzero;
! 3811: }
! 3812:
! 3813: /* loop until first entry is a finished prime. May involve reallocations
! 3814: * and thus updates of *partial
! 3815: */
! 3816: while (here[2] != deux)
! 3817: {
! 3818: /* if it's unknown, something has gone wrong; try to recover */
! 3819: if (!(here[2]))
! 3820: {
! 3821: err(warner, "IFAC: unknown factor seen in main loop");
! 3822: res = ifac_resort(partial, &here);
! 3823: if (res) return gzero; /* can only happen in Moebius mode */
! 3824: ifac_whoiswho(partial, &here, -1);
! 3825: /* defrag for good measure */
! 3826: ifac_defrag(partial, &here);
! 3827: continue;
! 3828: }
! 3829: /* if it's composite, crack it */
! 3830: if (here[2] == zero)
! 3831: {
! 3832: /* make sure there's room for another factor */
! 3833: if (here < *partial + 6)
! 3834: { /* try defrag first */
! 3835: ifac_defrag(partial, &here);
! 3836: if (here < *partial + 6) /* no luck */
! 3837: {
! 3838: ifac_realloc(partial, &here, 1); /* guaranteed to work */
! 3839: /* Unfortunately, we can't do a garbage collection here since we
! 3840: * know too little about where in the stack the old components
! 3841: * were.
! 3842: */
! 3843: }
! 3844: }
! 3845: nf = ifac_crack(partial, &here);
! 3846: if (moebius_mode && here[1] != un) /* that was a power */
! 3847: {
! 3848: if (DEBUGLEVEL >= 3)
! 3849: {
! 3850: fprintferr("IFAC: main loop: repeated new factor\n\t%Z\n", *here);
! 3851: flusherr();
! 3852: }
! 3853: return gzero;
! 3854: }
! 3855: /* deal with the new unknowns. No resort, since ifac_crack will
! 3856: * already have sorted them
! 3857: */
! 3858: ifac_whoiswho(partial, &here, nf);
! 3859: continue;
! 3860: }
! 3861: /* if it's prime but not yet finished, finish it */
! 3862: if (here[2] == un)
! 3863: {
! 3864: res = ifac_divide(partial, &here);
! 3865: if (res)
! 3866: {
! 3867: if (moebius_mode)
! 3868: {
! 3869: if (DEBUGLEVEL >= 3)
! 3870: {
! 3871: fprintferr("IFAC: main loop: another factor was divisible by\n");
! 3872: fprintferr("\t%Z\n", *here); flusherr();
! 3873: }
! 3874: return gzero;
! 3875: }
! 3876: ifac_defrag(partial, &here);
! 3877: (void)(ifac_resort(partial, &here)); /* sort new cofactors down */
! 3878: /* it doesn't matter right now whether this finds a repeated factor,
! 3879: * since we never get to this point in Moebius mode
! 3880: */
! 3881: ifac_defrag(partial, &here); /* resort may have created new gaps */
! 3882: ifac_whoiswho(partial, &here, -1);
! 3883: }
! 3884: continue;
! 3885: }
! 3886: /* there are no other cases, never reached */
! 3887: err(talker, "non-existent factor class in ifac_main");
! 3888: } /* while */
! 3889: if (moebius_mode && here[1] != un)
! 3890: {
! 3891: if (DEBUGLEVEL >= 3)
! 3892: {
! 3893: fprintferr("IFAC: after main loop: repeated old factor\n\t%Z\n", *here);
! 3894: flusherr();
! 3895: }
! 3896: return gzero; /* just a safety net */
! 3897: }
! 3898: if (DEBUGLEVEL >= 4)
! 3899: {
! 3900: long nf = (*partial + lg(*partial) - here - 3)/3;
! 3901: if (nf)
! 3902: fprintferr("IFAC: main loop: %ld factor%s left\n",
! 3903: nf, (nf>1 ? "s" : ""));
! 3904: else
! 3905: fprintferr("IFAC: main loop: this was the last factor\n");
! 3906: flusherr();
! 3907: }
! 3908: return here;
! 3909: }
! 3910:
! 3911: /* Caller of the following should worry about stack management, it makes
! 3912: * a rather shameless mess :^)
! 3913: */
! 3914: GEN
! 3915: ifac_primary_factor(GEN *partial, long *exponent)
! 3916: {
! 3917: GEN here = ifac_main(partial);
! 3918: GEN res;
! 3919:
! 3920: if (here == gun) { *exponent = 0; return gun; }
! 3921: else if (here == gzero) { *exponent = 0; return gzero; }
! 3922:
! 3923: res = icopy((GEN)(*here));
! 3924: *exponent = itos((GEN)(here[1]));
! 3925: here[2] = here[1] = *here = (long)NULL;
! 3926: return res;
! 3927: }
! 3928:
! 3929: /* encapsulated routines */
! 3930:
! 3931: /* prime/exponent pairs need to appear contiguously on the stack, but we
! 3932: * also need to have our data structure somewhere, and we don't know in
! 3933: * advance how many primes will turn up. The following discipline achieves
! 3934: * this: When ifac_decomp() is called, n should point at an object older
! 3935: * than the oldest small prime/exponent pair (auxdecomp0() guarantees
! 3936: * this easily since it mpdivis()es any divisors it discovers off its own
! 3937: * copy of the original N). We allocate sufficient space to accommodate
! 3938: * several pairs -- eleven pairs ought to fit in a space not much larger
! 3939: * than n itself -- before calling ifac_start(). If we manage to complete
! 3940: * the factorization before we run out of space, we free the data structure
! 3941: * and cull the excess reserved space before returning. When we do run out,
! 3942: * we have to leapfrog to generate more (guesstimating the requirements
! 3943: * from what is left in the partial factorization structure); room for
! 3944: * fresh pairs is allocated at the head of the stack, followed by an
! 3945: * ifac_realloc() to reconnect the data structure and move it out of the
! 3946: * way, followed by a few pointer tweaks to connect the new pairs space
! 3947: * to the old one.-- This whole affair translates into a surprisingly
! 3948: * compact little routine.
! 3949: */
! 3950:
! 3951: #define ifac_overshoot 64 /* lgefint(n)+64 words reserved */
! 3952: /* ifac_decomp_break:
! 3953: *
! 3954: * Find primary factors of n until ifac_break return true, or n is
! 3955: * factored if ifac_break is NULL.
! 3956: */
! 3957: /* ifac_break:
! 3958: *
! 3959: * state is for state management of the function, and depend of the
! 3960: * function. ifac_break is called initially in decomp_break with
! 3961: * here=NULL. This allows the function to see the new value of n.
! 3962: * return 1: stop factoring, 0 continue. If ifac_break is NULL,
! 3963: * assumed to always return 0. ifac_break must not let anything on the
! 3964: * stack. However data can be stored in state
! 3965: */
! 3966:
! 3967: long
! 3968: ifac_decomp_break(GEN n, long (*ifac_break)(GEN n,GEN pairs,GEN here,GEN state),
! 3969: GEN state, long hint)
! 3970: {
! 3971: long tf=lgefint(n), av=avma, lim=stack_lim(av,1);
! 3972: long nb=0;
! 3973: GEN part, here, workspc = new_chunk(tf + ifac_overshoot), pairs = (GEN)av;
! 3974: /* workspc will be doled out by us in pairs of smaller t_INTs */
! 3975: long tetpil = avma; /* remember head of workspc zone */
! 3976:
! 3977: if (!n || typ(n) != t_INT) err(typeer, "ifac_decomp");
! 3978: if (!signe(n) || tf < 3) err(talker, "factoring 0 in ifac_decomp");
! 3979:
! 3980: part = ifac_start(n, 0, hint);
! 3981: here = ifac_main(&part);
! 3982:
! 3983: while (here != gun)
! 3984: {
! 3985: long lf=lgefint((GEN)(*here));
! 3986: if (pairs - workspc < lf + 3) /* out of room, leapfrog */
! 3987: {
! 3988: /* the ifac_realloc() below will clear tetpil - avma words
! 3989: * on the stack, which should be about enough for the extra
! 3990: * primes we're going to see, and we'll want several more to
! 3991: * accommodate further exponents. In most cases, the lf + 3
! 3992: * below is pure paranoia, but the factor we're about to copy
! 3993: * might be the one sitting off the stack in the original n,
! 3994: * so let's play safe
! 3995: */
! 3996: workspc = new_chunk(lf + 3 + ifac_overshoot);
! 3997: ifac_realloc(&part, &here, 0);
! 3998: here = ifac_find(&part, &part);
! 3999: tetpil = (long)workspc;
! 4000: }
! 4001: /* room enough now */
! 4002: nb++;
! 4003: pairs -= lf;
! 4004: *pairs = evaltyp(t_INT) | evallg(lf);
! 4005: affii((GEN)(*here), pairs);
! 4006: pairs -= 3;
! 4007: *pairs = evaltyp(t_INT) | evallg(3);
! 4008: affii((GEN)(here[1]), pairs);
! 4009: if (ifac_break && (*ifac_break)(n,pairs,here,state))
! 4010: {
! 4011: if (DEBUGLEVEL >= 3)
! 4012: fprintferr("IFAC: (Partial fact.)Stop requested.\n");
! 4013: break;
! 4014: }
! 4015: here[2] = here[1] = *here = (long)NULL;
! 4016: here = ifac_main(&part);
! 4017: if (low_stack(lim, stack_lim(av,1)))
! 4018: {
! 4019: if(DEBUGMEM>1) err(warnmem,"[2] ifac_decomp");
! 4020: ifac_realloc(&part, &here, 0);
! 4021: part = gerepileupto(tetpil, part);
! 4022: }
! 4023: }
! 4024: avma = (long)pairs;
! 4025: if (DEBUGLEVEL >= 3)
! 4026: {
! 4027: fprintferr("IFAC: found %ld large prime (power) factor%s.\n",
! 4028: nb, (nb>1? "s": ""));
! 4029: flusherr();
! 4030: }
! 4031: return nb;
! 4032: }
! 4033:
! 4034: long
! 4035: ifac_decomp(GEN n, long hint)
! 4036: {
! 4037: return ifac_decomp_break(n, NULL, gzero, hint);
! 4038: }
! 4039:
! 4040: long
! 4041: ifac_moebius(GEN n, long hint)
! 4042: {
! 4043: long mu=1, av=avma, lim=stack_lim(av,1);
! 4044: GEN part = ifac_start(n, 1, hint);
! 4045: GEN here = ifac_main(&part);
! 4046:
! 4047: while (here != gun && here != gzero)
! 4048: {
! 4049: if (itos((GEN)(here[1])) > 1)
! 4050: { here = gzero; break; } /* shouldn't happen */
! 4051: mu = -mu;
! 4052: here[2] = here[1] = *here = (long)NULL;
! 4053: here = ifac_main(&part);
! 4054: if (low_stack(lim, stack_lim(av,1)))
! 4055: {
! 4056: if(DEBUGMEM>1) err(warnmem,"ifac_moebius");
! 4057: ifac_realloc(&part, &here, 0);
! 4058: part = gerepileupto(av, part);
! 4059: }
! 4060: }
! 4061: avma = av;
! 4062: return (here == gun ? mu : 0);
! 4063: }
! 4064:
! 4065: long
! 4066: ifac_issquarefree(GEN n, long hint)
! 4067: {
! 4068: long av=avma, lim=stack_lim(av,1);
! 4069: GEN part = ifac_start(n, 1, hint);
! 4070: GEN here = ifac_main(&part);
! 4071:
! 4072: while (here != gun && here != gzero)
! 4073: {
! 4074: if (itos((GEN)(here[1])) > 1)
! 4075: { here = gzero; break; } /* shouldn't happen */
! 4076: here[2] = here[1] = *here = (long)NULL;
! 4077: here = ifac_main(&part);
! 4078: if (low_stack(lim, stack_lim(av,1)))
! 4079: {
! 4080: if(DEBUGMEM>1) err(warnmem,"ifac_issquarefree");
! 4081: ifac_realloc(&part, &here, 0);
! 4082: part = gerepileupto(av, part);
! 4083: }
! 4084: }
! 4085: avma = av;
! 4086: return (here == gun ? 1 : 0);
! 4087: }
! 4088:
! 4089: long
! 4090: ifac_omega(GEN n, long hint)
! 4091: {
! 4092: long omega=0, av=avma, lim=stack_lim(av,1);
! 4093: GEN part = ifac_start(n, 0, hint);
! 4094: GEN here = ifac_main(&part);
! 4095:
! 4096: while (here != gun)
! 4097: {
! 4098: omega++;
! 4099: here[2] = here[1] = *here = (long)NULL;
! 4100: here = ifac_main(&part);
! 4101: if (low_stack(lim, stack_lim(av,1)))
! 4102: {
! 4103: if(DEBUGMEM>1) err(warnmem,"ifac_omega");
! 4104: ifac_realloc(&part, &here, 0);
! 4105: part = gerepileupto(av, part);
! 4106: }
! 4107: }
! 4108: avma = av;
! 4109: return omega;
! 4110: }
! 4111:
! 4112: long
! 4113: ifac_bigomega(GEN n, long hint)
! 4114: {
! 4115: long Omega=0, av=avma, lim=stack_lim(av,1);
! 4116: GEN part = ifac_start(n, 0, hint);
! 4117: GEN here = ifac_main(&part);
! 4118:
! 4119: while (here != gun)
! 4120: {
! 4121: Omega += itos((GEN)(here[1]));
! 4122: here[2] = here[1] = *here = (long)NULL;
! 4123: here = ifac_main(&part);
! 4124: if (low_stack(lim, stack_lim(av,1)))
! 4125: {
! 4126: if(DEBUGMEM>1) err(warnmem,"ifac_bigomega");
! 4127: ifac_realloc(&part, &here, 0);
! 4128: part = gerepileupto(av, part);
! 4129: }
! 4130: }
! 4131: avma = av;
! 4132: return Omega;
! 4133: }
! 4134:
! 4135: GEN
! 4136: ifac_totient(GEN n, long hint)
! 4137: {
! 4138: GEN res = cgeti(lgefint(n));
! 4139: long exponent, av=avma, tetpil, lim=stack_lim(av,1);
! 4140: GEN phi = gun;
! 4141: GEN part = ifac_start(n, 0, hint);
! 4142: GEN here = ifac_main(&part);
! 4143:
! 4144: while (here != gun)
! 4145: {
! 4146: phi = mulii(phi, addsi(-1, (GEN)(*here)));
! 4147: if (here[1] != un)
! 4148: {
! 4149: if (here[1] == deux)
! 4150: {
! 4151: phi = mulii(phi, (GEN)(*here));
! 4152: }
! 4153: else
! 4154: {
! 4155: exponent = itos((GEN)(here[1]));
! 4156: phi = mulii(phi, gpowgs((GEN)(*here), exponent-1));
! 4157: }
! 4158: }
! 4159: here[2] = here[1] = *here = (long)NULL;
! 4160: here = ifac_main(&part);
! 4161: if (low_stack(lim, stack_lim(av,1)))
! 4162: {
! 4163: GEN *gsav[2];
! 4164: if(DEBUGMEM>1) err(warnmem,"ifac_totient");
! 4165: tetpil = avma;
! 4166: ifac_realloc(&part, &here, 0);
! 4167: phi = icopy(phi);
! 4168: gsav[0] = φ gsav[1] = ∂
! 4169: gerepilemanysp(av, tetpil, gsav, 2);
! 4170: /* don't try to preserve here, safer to pick it up again
! 4171: * (and ifac_find does a lot of sanity checking at high
! 4172: * debuglevels)
! 4173: */
! 4174: here = ifac_find(&part, &part);
! 4175: }
! 4176: }
! 4177: affii(phi, res);
! 4178: avma = av;
! 4179: return res;
! 4180: }
! 4181:
! 4182: GEN
! 4183: ifac_numdiv(GEN n, long hint)
! 4184: {
! 4185: /* we don't preallocate since it's too hard to guess the right
! 4186: * size here
! 4187: */
! 4188: GEN res;
! 4189: long av=avma, tetpil, lim=stack_lim(av,1);
! 4190: GEN exponent, tau = gun;
! 4191: GEN part = ifac_start(n, 0, hint);
! 4192: GEN here = ifac_main(&part);
! 4193:
! 4194: while (here != gun)
! 4195: {
! 4196: exponent = (GEN)(here[1]);
! 4197: tau = mulii(tau, addsi(1, exponent));
! 4198: here[2] = here[1] = *here = (long)NULL;
! 4199: here = ifac_main(&part);
! 4200: if (low_stack(lim, stack_lim(av,1)))
! 4201: {
! 4202: GEN *gsav[2];
! 4203: if(DEBUGMEM>1) err(warnmem,"ifac_numdiv");
! 4204: tetpil = avma;
! 4205: ifac_realloc(&part, &here, 0);
! 4206: tau = icopy(tau);
! 4207: gsav[0] = τ gsav[1] = ∂
! 4208: gerepilemanysp(av, tetpil, gsav, 2);
! 4209: /* (see ifac_totient()) */
! 4210: here = ifac_find(&part, &part);
! 4211: }
! 4212: }
! 4213: tetpil = avma;
! 4214: res = icopy(tau);
! 4215: return gerepile(av, tetpil, res);
! 4216: }
! 4217:
! 4218: GEN
! 4219: ifac_sumdiv(GEN n, long hint)
! 4220: {
! 4221: /* don't preallocate */
! 4222: GEN res;
! 4223: long exponent, av=avma, tetpil, lim=stack_lim(av,1);
! 4224: GEN contrib, sigma = gun;
! 4225: GEN part = ifac_start(n, 0, hint);
! 4226: GEN here = ifac_main(&part);
! 4227:
! 4228: while (here != gun)
! 4229: {
! 4230: exponent = itos((GEN)(here[1]));
! 4231: contrib = addsi(1, (GEN)(*here));
! 4232: for (; exponent > 1; exponent--)
! 4233: contrib = addsi(1, mulii((GEN)(*here), contrib));
! 4234: sigma = mulii(sigma, contrib);
! 4235: here[2] = here[1] = *here = (long)NULL;
! 4236: here = ifac_main(&part);
! 4237: if (low_stack(lim, stack_lim(av,1)))
! 4238: {
! 4239: GEN *gsav[2];
! 4240: if(DEBUGMEM>1) err(warnmem,"ifac_sumdiv");
! 4241: tetpil = avma;
! 4242: ifac_realloc(&part, &here, 0);
! 4243: sigma = icopy(sigma);
! 4244: gsav[0] = σ gsav[1] = ∂
! 4245: gerepilemanysp(av, tetpil, gsav, 2);
! 4246: /* (see ifac_totient()) */
! 4247: here = ifac_find(&part, &part);
! 4248: }
! 4249: }
! 4250: tetpil = avma;
! 4251: res = icopy(sigma);
! 4252: return gerepile(av, tetpil, res);
! 4253: }
! 4254:
! 4255: /* k should be positive, and indeed it had better be > 1 (not checked).
! 4256: * The calling function knows what to do with the other cases.
! 4257: */
! 4258: GEN
! 4259: ifac_sumdivk(GEN n, long k, long hint)
! 4260: {
! 4261: /* don't preallocate */
! 4262: GEN res;
! 4263: long exponent, av=avma, tetpil, lim=stack_lim(av,1);
! 4264: GEN contrib, q, sigma = gun;
! 4265: GEN part = ifac_start(n, 0, hint);
! 4266: GEN here = ifac_main(&part);
! 4267:
! 4268: while (here != gun)
! 4269: {
! 4270: exponent = itos((GEN)(here[1]));
! 4271: q = gpowgs((GEN)(*here), k);
! 4272: contrib = addsi(1, q);
! 4273: for (; exponent > 1; exponent--)
! 4274: contrib = addsi(1, mulii(q, contrib));
! 4275: sigma = mulii(sigma, contrib);
! 4276: here[2] = here[1] = *here = (long)NULL;
! 4277: here = ifac_main(&part);
! 4278: if (low_stack(lim, stack_lim(av,1)))
! 4279: {
! 4280: GEN *gsav[2];
! 4281: if(DEBUGMEM>1) err(warnmem,"ifac_sumdivk");
! 4282: tetpil = avma;
! 4283: ifac_realloc(&part, &here, 0);
! 4284: sigma = icopy(sigma);
! 4285: gsav[0] = σ gsav[1] = ∂
! 4286: gerepilemanysp(av, tetpil, gsav, 2);
! 4287: /* (see ifac_totient()) */
! 4288: here = ifac_find(&part, &part);
! 4289: }
! 4290: }
! 4291: tetpil = avma;
! 4292: res = icopy(sigma);
! 4293: return gerepile(av, tetpil, res);
! 4294: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>