Annotation of OpenXM_contrib/pari-2.2/src/basemath/ifactor1.c, Revision 1.2
1.2 ! noro 1: /* $Id: ifactor1.c,v 1.38 2002/07/15 13:30:00 karim Exp $
1.1 noro 2:
3: Copyright (C) 2000 The PARI group.
4:
5: This file is part of the PARI/GP package.
6:
7: PARI/GP is free software; you can redistribute it and/or modify it under the
8: terms of the GNU General Public License as published by the Free Software
9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
10: ANY WARRANTY WHATSOEVER.
11:
12: Check the License for details. You should have received a copy of it, along
13: with the package; see the file 'COPYING'. If not, write to the Free Software
14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
15:
16: /********************************************************************/
17: /** **/
18: /** INTEGER FACTORIZATION **/
19: /** **/
20: /********************************************************************/
21: #include "pari.h"
1.2 ! noro 22: extern GEN decomp_limit(GEN n, GEN limit);
! 23: extern int BSW_isprime(GEN x);
! 24: extern int BSW_isprime_small(GEN x);
! 25: extern ulong ucarrecomplet(ulong A);
! 26: extern GEN mpqs(GEN N);/* in src/modules/mpqs.c,
! 27: * returns a factor, a vector of factors, or NULL */
1.1 noro 28:
29: /*********************************************************************/
30: /** **/
31: /** PSEUDO PRIMALITY **/
32: /** **/
33: /*********************************************************************/
34: static GEN sqrt1, sqrt2, t1, t;
35: static long r1;
36:
37: /* The following two internal routines don't restore avma -- the caller
38: must do so at the end. */
39: static GEN
40: init_miller(GEN n)
41: {
42: if (signe(n) < 0) n = absi(n);
43: t=addsi(-1,n); r1=vali(t); t1 = shifti(t,-r1);
44: sqrt1=cgeti(lg(t)); sqrt1[1]=evalsigne(0)|evallgefint(2);
45: sqrt2=cgeti(lg(t)); sqrt2[1]=evalsigne(0)|evallgefint(2);
46: return n;
47: }
48:
49: /* is n strong pseudo-prime for base a ? `End matching' (check for square
50: * roots of -1) added by GN */
51: /* TODO: If ends do mismatch, then we have factored n, and this information
52: should somehow be made available to the factoring machinery. --GN */
53: static int
54: bad_for_base(GEN n, GEN a)
55: {
1.2 ! noro 56: long r;
! 57: gpmem_t av=avma, lim=stack_lim(av, 1);
1.1 noro 58: GEN c2, c = powmodulo(a,t1,n);
59:
60: if (!is_pm1(c) && !egalii(t,c)) /* go fishing for -1, not for 1 */
61: {
62: for (r=r1-1; r; r--) /* (this saves one squaring/reduction) */
63: {
64: c2=c; c=resii(sqri(c),n);
65: if (egalii(t,c)) break;
66: if (low_stack(lim, stack_lim(av,1)))
67: {
68: GEN *gsav[2]; gsav[0]=&c; gsav[1]=&c2;
69: if(DEBUGMEM>1) err(warnmem,"miller(rabin)");
70: gerepilemany(av, gsav, 2);
71: }
72: }
73: if (!r) return 1;
74: /* sqrt(-1) seen, compare or remember */
75: if (signe(sqrt1)) /* we saw one earlier: compare */
76: {
77: /* check if too many sqrt(-1)s mod n */
78: if (!egalii(c2,sqrt1) && !egalii(c2,sqrt2)) return 1;
79: }
80: else { affii(c2,sqrt1); affii(subii(n,c2),sqrt2); } /* remember */
81: }
82: return 0;
83: }
84:
85: /* Miller-Rabin test for k random bases */
86: long
87: millerrabin(GEN n, long k)
88: {
1.2 ! noro 89: long r, i;
! 90: gpmem_t av2, av = avma;
1.1 noro 91:
92: if (!signe(n)) return 0;
93: /* If |n| <= 3, check if n = +- 1 */
94: if (lgefint(n)==3 && (ulong)(n[2])<=3) return (n[2] != 1);
95:
96: if (!mod2(n)) return 0;
97: n = init_miller(n); av2=avma;
98: for (i=1; i<=k; i++)
99: {
100: do r = smodsi(mymyrand(),n); while (!r);
101: if (DEBUGLEVEL > 4)
102: fprintferr("Miller-Rabin: testing base %ld\n",
103: r);
104: if (bad_for_base(n, stoi(r))) { avma=av; return 0; }
105: avma=av2;
106: }
107: avma=av; return 1;
108: }
109:
110: /* As above for k bases taken in pr (i.e not random).
111: * We must have |n|>2 and 1<=k<=11 (not checked) or k in {16,17} to select
112: * some special sets of bases.
113: *
114: * By computations of Gerhard Jaeschke, `On strong pseudoprimes to several
115: * bases', Math.Comp. 61 (1993), 915--926 (see also Chris Caldwell's Prime
116: * Number Pages at http://www.utm.edu/research/primes/prove2.html), we have:
117: *
118: * k == 4 (bases 2,3,5,7) correctly detects all composites
119: * n < 118 670 087 467 == 172243 * 688969 with the single exception of
120: * n == 3 215 031 751 == 151 * 751 * 28351,
121: *
122: * k == 5 (bases 2,3,5,7,11) correctly detects all composites
123: * n < 2 152 302 898 747 == 6763 * 10627 * 29947,
124: *
125: * k == 6 (bases 2,3,...,13) correctly detects all composites
126: * n < 3 474 749 660 383 == 1303 * 16927 * 157543,
127: *
128: * k == 7 (bases 2,3,...,17) correctly detects all composites
129: * n < 341 550 071 728 321 == 10670053 * 32010157,
130: * and even this limiting value is caught by an end mismatch between bases
131: * 2 and 5 (or 5 and 17).
132: *
133: * Moreover, the four bases chosen at
134: *
135: * k == 16 (2,13,23,1662803) will correctly detect all composites up
136: * to at least 10^12, and the combination at
137: *
138: * k == 17 (31,73) detects most odd composites without prime factors > 100
139: * in the range n < 2^36 (with less than 250 exceptions, indeed with fewer
140: * than 1400 exceptions up to 2^42). --GN
141: * (DATA TO BE COMPLETED)
142: */
143: int /* no longer static -- needed in mpqs.c */
144: miller(GEN n, long k)
145: {
1.2 ! noro 146: long r, i;
! 147: gpmem_t av2, av = avma;
1.1 noro 148: static long pr[] =
149: { 0, 2,3,5,7,11,13,17,19,23,29, 31,73, 2,13,23,1662803UL, };
150: long *p;
151:
152: if (!mod2(n)) return 0;
153: if (k==16)
154: { /* use smaller (faster) bases if possible */
155: if (lgefint(n)==3 && (ulong)(n[2]) < 3215031751UL) p = pr; /* 2,3,5,7 */
156: else p = pr+13; /* 2,13,23,1662803 */
157: k=4;
158: }
159: else if (k==17)
160: {
161: if (lgefint(n)==3 && (ulong)(n[2]) < 1373653) p = pr; /* 2,3 */
162: else p = pr+11; /* 31,73 */
163: k=2;
164: }
165: else p = pr; /* 2,3,5,... */
166: n = init_miller(n); av2=avma;
167: for (i=1; i<=k; i++)
168: {
169: r = smodsi(p[i],n); if (!r) break;
170: if (bad_for_base(n, stoi(r))) { avma = av; return 0; }
171: avma=av2;
172: }
173: avma=av; return 1;
174: }
1.2 ! noro 175:
! 176: /* compute n-th term of Lucas sequence modulo N.
! 177: * v_{k+2} = P v_{k+1} - v_k, v_0 = 2, v_1 = P.
! 178: * Assume n > 0 */
! 179: static GEN
! 180: LucasMod(GEN n, long P, GEN N)
! 181: {
! 182: gpmem_t av = avma, lim = stack_lim(av, 1);
! 183: GEN nd = n+2;
! 184: long i, m = *nd, j = 1+bfffo((ulong)m);
! 185: GEN v = stoi(P), v1 = stoi(P*P - 2);
! 186:
! 187: m <<= j; j = BITS_IN_LONG - j;
! 188: for (i=lgefint(n)-2;;) /* cf. leftright_pow */
! 189: {
! 190: for (; j; m<<=1,j--)
! 191: { /* v = v_k, v1 = v_{k+1} */
! 192: if (m < 0)
! 193: { /* set v = v_{2k+1}, v1 = v_{2k+2} */
! 194: v = subis(mulii(v,v1), P);
! 195: v1= subis(sqri(v1), 2);
! 196: }
! 197: else
! 198: {/* set v = v_{2k}, v1 = v_{2k+1} */
! 199: v1= subis(mulii(v,v1), P);
! 200: v = subis(sqri(v), 2);
! 201: }
! 202: v = modii(v, N);
! 203: v1= modii(v1,N);
! 204: if (low_stack(lim,stack_lim(av,1)))
! 205: {
! 206: GEN *gptr[2]; gptr[0]=&v; gptr[1]=&v1;
! 207: if(DEBUGMEM>1) err(warnmem,"LucasMod");
! 208: gerepilemany(av,gptr,2);
! 209: }
! 210: }
! 211: if (--i == 0) return v;
! 212: j = BITS_IN_LONG;
! 213: m = *++nd;
! 214: }
! 215: }
! 216:
! 217: /* check that N not a square first (taken care of here, but inefficient)
! 218: * assume N > 3 */
! 219: static int
! 220: IsLucasPsP0(GEN N)
! 221: {
! 222: long b, i, v;
! 223: GEN N_2, m, z;
! 224:
! 225: for (b=3, i=0;; b+=2, i++)
! 226: {
! 227: if (i == 64 && carreparfait(N)) return 0; /* avoid oo loop if N = m^2 */
! 228: if (krosg(b*b - 4, N) < 0) break;
! 229: }
! 230:
! 231: m = addis(N,1); v = vali(m); m = shifti(m,-v);
! 232: z = LucasMod(m, b, N);
! 233: if (egalii(z, gdeux)) return 1;
! 234: N_2 = subis(N,2);
! 235: if (egalii(z, N_2)) return 1;
! 236: for (i=1; i<v; i++)
! 237: {
! 238: if (!signe(z)) return 1;
! 239: z = modii(subis(sqri(z), 2), N);
! 240: if (egalii(z, gdeux)) return 0;
! 241: }
! 242: return 0;
! 243: }
! 244:
! 245: long
! 246: BSW_psp(GEN N)
! 247: {
! 248: gpmem_t av = avma;
! 249: int k;
! 250: GEN T;
! 251:
! 252: if (typ(N) != t_INT) err(arither1);
! 253: if (signe(N) <= 0) return 0;
! 254: if (!is_bigint(N))
! 255: switch(itos(N))
! 256: {
! 257: case 1: return 0;
! 258: case 2:
! 259: case 3: return 1;
! 260: }
! 261: if (!mod2(N)) return 0;
! 262:
! 263: T = init_miller(N);
! 264: k = bad_for_base(T,gdeux);
! 265: avma = av;
! 266: k = (!k && IsLucasPsP0(N));
! 267: avma = av; return k;
! 268: }
! 269:
1.1 noro 270: /***********************************************************************/
271: /** **/
272: /** Pocklington-Lehmer **/
273: /** P-1 primality test **/
274: /** Crude implementation BA 2000Apr21 **/
275: /***********************************************************************/
276:
277: /*assume n>=2*/
278: static long pl831(GEN N, GEN p)
279: {
1.2 ! noro 280: gpmem_t ltop=avma, av;
1.1 noro 281: long a;
282: GEN Nmun,Nmunp;
283: Nmun=addis(N,-1);
284: Nmunp=divii(Nmun,p);
285: av=avma;
286: for(a=2;;a++)
287: {
288: GEN b;
289: b=powmodulo(stoi(a),Nmunp,N);
290: if (gcmp1(powmodulo(b,p,N)))
291: {
292: GEN g;
293: g=mppgcd(addis(b,-1),N);
1.2 ! noro 294: if (gcmp1(g)) { avma=ltop; return a; }
! 295: if (!gegal(g,N)) { avma=ltop; return 0; }
1.1 noro 296: }
1.2 ! noro 297: else { avma=ltop; return 0; }
1.1 noro 298: avma=av;
299: }
300: }
1.2 ! noro 301: /* Assume N is a strong BSW pseudoprime
! 302: *
1.1 noro 303: * flag 0: return gun (prime), gzero (composite)
304: * flag 1: return gzero (composite), gun (small prime), matrix (large prime)
305: *
1.2 ! noro 306: * The matrix has 3 columns, [a,b,c] with
1.1 noro 307: * a[i] prime factor of N-1,
308: * b[i] witness for a[i] as in pl831
309: * c[i] plisprime(a[i])
310: */
311: GEN
312: plisprime(GEN N, long flag)
313: {
1.2 ! noro 314: gpmem_t ltop = avma;
! 315: long i, l, t = typ(N);
1.1 noro 316: int eps;
1.2 ! noro 317: GEN C, F = NULL;
! 318:
! 319: if (t == t_VEC)
! 320: { /* [ N, [p1,...,pk] ], pi list of pseudoprime divisors of N */
! 321: F = (GEN)N[2];
! 322: N = (GEN)N[1]; t = typ(N);
! 323: }
! 324: if (t != t_INT) err(arither1);
! 325: if (DEBUGLEVEL>3) fprintferr("PL: proving primality of N = %Z\n", N);
! 326:
1.1 noro 327: eps = absi_cmp(N,gdeux);
328: if (eps<=0) return eps? gzero: gun;
1.2 ! noro 329:
1.1 noro 330: N = absi(N);
1.2 ! noro 331: if (!F)
! 332: {
! 333: F = (GEN)decomp_limit(addis(N,-1), racine(N))[1];
! 334: if (DEBUGLEVEL>3) fprintferr("PL: N-1 factored!\n");
! 335: }
! 336:
! 337: C = cgetg(4,t_MAT); l = lg(F);
! 338: C[1] = lgetg(l,t_COL);
! 339: C[2] = lgetg(l,t_COL);
! 340: C[3] = lgetg(l,t_COL);
! 341: for(i=1; i<l; i++)
! 342: {
! 343: GEN p = (GEN)F[i], r;
! 344: long witness = pl831(N,p);
! 345:
! 346: if (!witness) { avma = ltop; return gzero; }
! 347: mael(C,1,i) = licopy(p);
! 348: mael(C,2,i) = lstoi(witness);
! 349: if (!flag) r = BSW_isprime(p)? gun: gzero;
! 350: else
! 351: {
! 352: if (BSW_isprime_small(p)) r = gun;
! 353: else if (expi(p) > 250) r = isprimeAPRCL(p)? gdeux: gzero;
! 354: else r = plisprime(p,flag);
! 355: }
! 356: mael(C,3,i) = (long)r;
! 357: if (r == gzero) err(talker,"False prime number %Z in plisprime", p);
1.1 noro 358: }
1.2 ! noro 359: if (!flag) { avma = ltop; return gun; }
1.1 noro 360: return gerepileupto(ltop,C);
361: }
362:
363: /***********************************************************************/
364: /** **/
365: /** PRIMES IN SUCCESSION **/
366: /** (abstracted by GN 1998Aug21 mainly for use in ellfacteur() below) **/
367: /** **/
368: /***********************************************************************/
369:
370: /* map from prime residue classes mod 210 to their numbers in {0...47}.
371: * Subscripts into this array take the form ((k-1)%210)/2, ranging from
372: * 0 to 104. Unused entries are 128
373: */
374: #define NPRC 128 /* non-prime residue class */
375:
376: static
377: unsigned char prc210_no[] =
378: {
379: 0, NPRC, NPRC, NPRC, NPRC, 1, 2, NPRC, 3, 4, NPRC, /* 21 */
380: 5, NPRC, NPRC, 6, 7, NPRC, NPRC, 8, NPRC, 9, /* 41 */
381: 10, NPRC, 11, NPRC, NPRC, 12, NPRC, NPRC, 13, 14, NPRC, /* 63 */
382: NPRC, 15, NPRC, 16, 17, NPRC, NPRC, 18, NPRC, 19, /* 83 */
383: NPRC, NPRC, 20, NPRC, NPRC, NPRC, 21, NPRC, 22, 23, NPRC, /* 105 */
384: 24, 25, NPRC, 26, NPRC, NPRC, NPRC, 27, NPRC, NPRC, /* 125 */
385: 28, NPRC, 29, NPRC, NPRC, 30, 31, NPRC, 32, NPRC, NPRC, /* 147 */
386: 33, 34, NPRC, NPRC, 35, NPRC, NPRC, 36, NPRC, 37, /* 167 */
387: 38, NPRC, 39, NPRC, NPRC, 40, 41, NPRC, NPRC, 42, NPRC, /* 189 */
388: 43, 44, NPRC, 45, 46, NPRC, NPRC, NPRC, NPRC, 47, /* 209 */
389: };
390:
391: /* map from prime residue classes mod 210 (by number) to their smallest
392: * positive representatives
393: */
394: static
395: unsigned char prc210_rp[] =
396: {
397: 1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79,
398: 83, 89, 97, 101, 103, 107, 109, 113, 121, 127, 131, 137, 139, 143, 149,
399: 151, 157, 163, 167, 169, 173, 179, 181, 187, 191, 193, 197, 199, 209,
400: };
401:
402: /* first differences of the preceding */
403: static
404: unsigned char prc210_d1[] =
405: {
406: 10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6,
407: 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6,
408: 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2, 10, 2,
409: };
410:
411: GEN
412: nextprime(GEN n)
413: {
1.2 ! noro 414: long rc, rc0, rcd, rcn;
! 415: gpmem_t av1, av2, av = avma;
1.1 noro 416:
417: if (typ(n) != t_INT) n=gceil(n); /* accept arguments in R --GN */
418: if (typ(n) != t_INT) err(arither1);
419: if (signe(n) <= 0) { avma=av; return gdeux; }
420: if (lgefint(n) <= 3)
421: { /* check if n <= 7 */
422: ulong k = n[2];
423: if (k <= 2) { avma=av; return gdeux; }
424: if (k == 3) { avma = av; return stoi(3); }
425: if (k <= 5) { avma = av; return stoi(5); }
426: if (k <= 7) { avma = av; return stoi(7); }
427: }
428: /* here n > 7 */
429: if (!(mod2(n))) n = addsi(1,n);
430: rc = rc0 = smodis(n, 210);
431: rcn = (long)(prc210_no[rc0>>1]);
432: /* find next prime residue class mod 210 */
433: while (rcn == NPRC)
434: {
435: rc += 2; /* cannot wrap since 209 is coprime */
436: rcn = (long)(prc210_no[rc>>1]);
437: }
438: if (rc > rc0) n = addsi(rc - rc0, n);
439: /* now find an actual prime */
440: av2 = av1 = avma;
441: for(;;)
442: {
1.2 ! noro 443: if (BSW_psp(n)) break;
1.1 noro 444: av1 = avma;
445: rcd = prc210_d1[rcn];
446: if (++rcn > 47) rcn = 0;
447: n = addsi(rcd,n);
448: }
449: if (av1!=av2) return gerepile(av,av1,n);
450: return (av1==av)? icopy(n): n;
451: }
452:
453: GEN
454: precprime(GEN n)
455: {
1.2 ! noro 456: long rc, rc0, rcd, rcn;
! 457: gpmem_t av1, av2, av = avma;
1.1 noro 458:
459: if (typ(n) != t_INT) n=gfloor(n); /* accept arguments in R --GN */
460: if (typ(n) != t_INT) err(arither1);
461: if (signe(n)<=0) { avma=av; return gzero; }
462: if (lgefint(n) <= 3)
463: { /* check if n <= 10 */
464: ulong k = n[2];
465: if (k <= 1) { avma=av; return gzero; }
466: if (k == 2) { avma=av; return gdeux; }
467: if (k <= 4) { avma=av; return stoi(3); }
468: if (k <= 6) { avma=av; return stoi(5); }
469: if (k <= 10) { avma=av; return stoi(7); }
470: }
471: /* here n >= 11 */
472: if (!(mod2(n))) n = addsi(-1,n);
473: rc = rc0 = smodis(n, 210);
474: rcn = (long)(prc210_no[rc0>>1]);
475: /* find last prime residue class mod 210 */
476: while (rcn == NPRC)
477: {
478: rc -= 2; /* cannot wrap since 1 is coprime */
479: rcn = (long)(prc210_no[rc>>1]);
480: }
481: if (rc < rc0) n = addsi(rc - rc0, n);
482: /* now find an actual prime */
483: av2 = av1 = avma;
484: for(;;)
485: {
1.2 ! noro 486: if (BSW_psp(n)) break;
1.1 noro 487: av1 = avma;
488: if (rcn == 0)
489: { rcd = 2; rcn = 47; }
490: else
491: rcd = prc210_d1[--rcn];
492: n = addsi(-rcd,n);
493: }
494: if (av1!=av2) return gerepile(av,av1,n);
495: return (av1==av)? icopy(n): n;
496: }
497:
498: /* find next single-word prime strictly larger than p. If **d is non-NULL,
499: * this will be p + *(*d)++, using the diffptr table. Otherwise imitate
500: * nextprime(). Apart from *d, caller must supply a long variable to which
501: * rcn points, initialized either to NPRC or to the correct residue class
502: * number for the current p; we'll use this to track the current prime
503: * residue class mod 210 once we're out of range of the diffptr table, and
504: * we'll update it before that if it isn't NPRC. *q is incremented when-
505: * ever q!=NULL and we wrap from 209 mod 210 to 1 mod 210; this makes sense
506: * only when *rcn already held the correct value. Caller must also supply
507: * the second argument for miller(). --GN1998Aug22
508: */
509: ulong
510: snextpr(ulong p, byteptr *d, long *rcn, long *q, long k)
511: {
512: static ulong pp[] =
1.2 ! noro 513: { evaltyp(t_INT)|_evallg(3), evalsigne(1)|evallgefint(3), 0 };
1.1 noro 514: static ulong *pp2 = pp + 2;
515: static GEN gp = (GEN)pp;
1.2 ! noro 516: long rcn0;
1.1 noro 517:
1.2 ! noro 518: if (**d)
1.1 noro 519: {
1.2 ! noro 520: byteptr dd = *d;
! 521: long d1 = 0;
! 522:
! 523: NEXT_PRIME_VIADIFF(d1,dd);
1.1 noro 524: if (*rcn != NPRC)
525: {
526: rcn0 = *rcn;
527: while (d1 > 0)
528: {
529: d1 -= prc210_d1[*rcn];
530: if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
531: }
532: if (d1 < 0)
533: {
534: fprintferr("snextpr: prime %lu wasn\'t %lu mod 210\n",
535: p, (ulong)prc210_rp[rcn0]);
536: err(bugparier, "[caller of] snextpr");
537: }
538: }
1.2 ! noro 539: NEXT_PRIME_VIADIFF(p,*d);
! 540: return p;
1.1 noro 541: }
542: /* we are beyond the diffptr table */
543: if (*rcn == NPRC) /* we need to initialize this now */
544: {
545: *rcn = prc210_no[(p % 210) >> 1];
546: if (*rcn == NPRC)
547: {
548: fprintferr("snextpr: %lu should have been prime but isn\'t\n", p);
549: err(bugparier, "[caller of] snextpr");
550: }
551: }
552: /* look for the next one */
553: *pp2 = p;
554: *pp2 += prc210_d1[*rcn];
555: if (++*rcn > 47) *rcn = 0;
556: while (!miller(gp, k))
557: {
558: *pp2 += prc210_d1[*rcn];
559: if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
560: if (*pp2 <= 11) /* wraparound mod 2^BITS_IN_LONG */
561: {
562: fprintferr("snextpr: integer wraparound after prime %lu\n", p);
563: err(bugparier, "[caller of] snextpr");
564: }
565: }
566: return *pp2;
567: }
568:
569:
570: /***********************************************************************/
571: /** **/
572: /** FACTORIZATION (ECM) **/
573: /** Integer factorization using the elliptic curves method (ECM). **/
574: /** ellfacteur() returns a non trivial factor of N, assuming N>0, **/
575: /** is composite, and has no prime divisor below 2^14 or so. **/
576: /** Extensively modified by GN Jul-Aug 1998, with much helpful **/
577: /** advice by Paul Zimmermann. Thanks also to Guillaume Hanrot **/
578: /** and Igor Schein for providing many CPU cycles whilst testing. **/
579: /** **/
580: /***********************************************************************/
581:
582: static GEN N, gl, *XAUX;
583: #define nbcmax 64 /* max number of simultaneous curves */
584: #define bstpmax 1024 /* max number of baby step table entries */
585:
586: /* addition/doubling/multiplication of a point on an `elliptic curve'
587: * mod N may result in one of three things: a new bona fide point,
588: * a point at infinity (betraying itself by a denominator divisible
589: * by N), or a point which is at infinity mod some nontrivial factor
590: * of N but finite mod some other factor (betraying itself by a denom-
591: * inator which has nontrivial gcd with N, and this is of course what
592: * we want).
593: */
594: /* (In the second case, addition/doubling will simply abort, copying one
595: * of the summands to the destination array of points unless they coincide.
596: * Multiplication will stop at some unpredictable intermediate stage: The
597: * destination will contain _some_ multiple of the input point, but not
598: * necessarily the desired one, which doesn't matter. As long as we're
599: * multiplying (B1 phase) we simply carry on with the next multiplier.
600: * During the B2 phase, the only additions are the giant steps, and the
601: * worst that can happen here is that we lose one residue class mod 210
602: * of prime multipliers on 4 of the curves, so again, we ignore the problem
603: * and just carry on.) */
604: /* The idea is: Select a handful of curves mod N and one point P on each of
605: * them. Try to compute, for each such point, the multiple [M]P = Q where
606: * M is the product of all powers <= B2 of primes <= nextprime(B1), for some
607: * suitably chosen B1 and B2. Then check whether multiplying Q by one of the
608: * primes < nextprime(B2) would betray a factor. This second stage proceeds
609: * by looking separately at the primes in each residue class mod 210, four
610: * curves at a time, and stepping additively to ever larger multipliers,
611: * by comparing X coordinates of points which we would need to add in order
612: * to reach another prime multiplier in the same residue class. `Comparing'
613: * means that we accumulate a product of differences of X coordinates, and
614: * from time to time take a gcd of this product with N.
615: */
616: /* Montgomery's trick of hiding the cost of computing inverses mod N at a
617: * price of three extra multiplications mod N, by working on up to 64 or
618: * even 128 points in parallel, is used heavily. --GN
619: */
620:
621: /* *** auxiliary functions for ellfacteur: *** */
622:
623: /* Parallel addition on nbc curves, assigning the result to locations at and
624: * following *X3, *Y3. Safe to be called with X3,Y3 equal to X2,Y2 (_not_
625: * to X1,Y1). It is also safe to overwrite Y2 with X3. (If Y coords of
626: * result not desired, set Y3=NULL.) If nbc1 < nbc, the first summand is
627: * assumed to hold only nbc1 distinct points, which are repeated as often
628: * as we need them (useful for adding one point on each of a few curves
629: * to several other points on the same curves).
630: * Return 0 when successful, 1 when we hit a denominator divisible by N,
631: * and 2 when gcd(denominator, N) is a nontrivial factor of N, which will
632: * be preserved in gl.
633: * We use more stack space than the old code did, and thus run a bit of a
634: * risk of overflowing it, but it's still bounded by a constant multiple
635: * of lgefint(N)*nbc, as it was in the old version --GN1998Jul02,Aug12
636: */
637: /* (Lessee: Second phase creates 12 items on the stack, per iteration,
638: * of which four are twice as long and one is thrice as long as N --
639: * makes 18 units per iteration. First phase creates 4 units. Total
640: * can be as large as about 4*nbcmax+18*8 units. And elladd2() is just
641: * as bad, and elldouble() comes to about 3*nbcmax+29*8 units. A few
642: * strategic garbage collections every 8 iterations should help when nbc
643: * is large...) --GN1998Aug23
644: */
645:
646: static int
647: elladd0(long nbc, long nbc1,
648: GEN *X1, GEN *Y1, GEN *X2, GEN *Y2, GEN *X3, GEN *Y3)
649: {
650: GEN lambda;
651: GEN W[2*nbcmax], *A=W+nbc; /* W[0],A[0] never used */
1.2 ! noro 652: long i;
! 653: gpmem_t av=avma, tetpil;
1.1 noro 654: ulong mask = ~0UL;
655:
656: /* actually, this is only ever called with nbc1==nbc or nbc1==4, so: */
657: if (nbc1 == 4) mask = 3;
658: else if (nbc1 < nbc) err(bugparier, "[caller of] elladd0");
659:
660: /* W[0] = gun; */
661: W[1] = /* A[0] =*/ subii(X1[0], X2[0]);
662: for (i=1; i<nbc; i++)
663: {
664: A[i] = subii(X1[i&mask], X2[i]); /* don't waste time reducing mod N here */
665: W[i+1] = modii(mulii(A[i], W[i]), N);
666: }
667: tetpil = avma;
668:
669: /* if gl != N we have a factor */
670: if (!invmod(W[nbc], N, &gl))
671: {
672: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
673: if (X2 != X3)
674: {
675: long k;
676: /* cannot add on one of the curves mod N: make sure X3 contains
677: * something useful before letting the caller proceed
678: */
679: for (k = 2*nbc; k--; ) affii(X2[k],X3[k]);
680: }
681: avma = av; return 1;
682: }
683:
684: while (i--) /* nbc times, actually */
685: {
686: lambda = modii(mulii(subii(Y1[i&mask], Y2[i]),
687: i?mulii(gl, W[i]):gl), N);
688: modiiz(subii(sqri(lambda), addii(X2[i], X1[i&mask])), N, X3[i]);
689: if (Y3)
690: modiiz(subii(mulii(lambda, subii(X1[i&mask], X3[i])),
691: Y1[i&mask]),
692: N, Y3[i]);
693: if (!i) break;
694: gl = modii(mulii(gl, A[i]), N);
695: if (!(i&7)) gl = gerepileupto(tetpil, gl);
696: }
697: avma=av; return 0;
698: }
699:
700: /* Shortcut variant, for use in cases where Y coordinates follow their
701: * corresponding X coordinates, and the first summand doesn't need to be
702: * repeated
703: */
704: static int
705: elladd(long nbc, GEN *X1, GEN *X2, GEN *X3)
706: {
707: return elladd0(nbc, nbc, X1, X1+nbc, X2, X2+nbc, X3, X3+nbc);
708: }
709: /* this could perhaps become a macro --GN */
710:
711: /* The next one is exactly the same except it does twice as many additions
712: * (and thus hides even more of the cost of the modular inverse); the net
713: * effect is the same as elladd(nbc,X1,X2,X3) followed by elladd(nbc,X4,X5,X6).
714: * Safe to have X2==X3 and/or X5==X6, and of course safe to have X1 or X2
715: * coincide with X4 or X5, in any order.
716: */
717:
718: static int
719: elladd2(long nbc, GEN *X1, GEN *X2, GEN *X3, GEN *X4, GEN *X5, GEN *X6)
720: {
721: GEN lambda, *Y1 = X1+nbc, *Y2 = X2+nbc, *Y3 = X3+nbc;
722: GEN *Y4 = X4+nbc, *Y5 = X5+nbc, *Y6 = X6+nbc;
723: GEN W[4*nbcmax], *A=W+2*nbc; /* W[0],A[0] never used */
1.2 ! noro 724: long i, j;
! 725: gpmem_t av=avma, tetpil;
1.1 noro 726: /* W[0] = gun; */
727: W[1] = /* A[0] =*/ subii(X1[0], X2[0]);
728: for (i=1; i<nbc; i++)
729: {
730: A[i] = subii(X1[i], X2[i]); /* don't waste time reducing mod N here */
731: W[i+1] = modii(mulii(A[i], W[i]), N);
732: }
733: for (j=0; j<nbc; i++,j++)
734: {
735: A[i] = subii(X4[j], X5[j]);
736: W[i+1] = modii(mulii(A[i], W[i]), N);
737: }
738: tetpil = avma;
739:
740: /* if gl != N we have a factor */
741: if (!invmod(W[2*nbc], N, &gl))
742: {
743: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
744: if (X2 != X3)
745: {
746: long k;
747: /* cannot add on one of the curves mod N: make sure X3 contains
748: * something useful before letting the caller proceed
749: */
750: for (k = 2*nbc; k--; ) affii(X2[k],X3[k]);
751: }
752: if (X5 != X6)
753: {
754: long k;
755: /* same for X6 */
756: for (k = 2*nbc; k--; ) affii(X5[k],X6[k]);
757: }
758: avma = av; return 1;
759: }
760:
761: while (j--) /* nbc times, actually */
762: {
763: i--;
764: lambda = modii(mulii(subii(Y4[j], Y5[j]),
765: mulii(gl, W[i])), N);
766: modiiz(subii(sqri(lambda), addii(X5[j], X4[j])), N, X6[j]);
767: modiiz(subii(mulii(lambda, subii(X4[j], X6[j])), Y4[j]), N, Y6[j]);
768: gl = modii(mulii(gl, A[i]), N);
769: if (!(i&7)) gl = gerepileupto(tetpil, gl);
770: }
771: while (i--) /* nbc times */
772: {
773: lambda = modii(mulii(subii(Y1[i], Y2[i]),
774: i?mulii(gl, W[i]):gl), N);
775: modiiz(subii(sqri(lambda), addii(X2[i], X1[i])), N, X3[i]);
776: modiiz(subii(mulii(lambda, subii(X1[i], X3[i])), Y1[i]), N, Y3[i]);
777: if (!i) break;
778: gl = modii(mulii(gl, A[i]), N);
779: if (!(i&7)) gl = gerepileupto(tetpil, gl);
780: }
781: avma=av; return 0;
782: }
783:
784: /* Parallel doubling on nbc curves, assigning the result to locations at
785: * and following *X2. Safe to be called with X2 equal to X1. Return
786: * value as for elladd() above. If we find a point at infinity mod N,
787: * and if X1 != X2, we copy the points at X1 to X2.
788: * Use fewer assignments than the old code. Strangely, whereas this gains
789: * about 3% on my P133 with elladd(), it makes hardly any difference here
790: * with elldouble() --GN
791: */
792: static int
793: elldouble(long nbc, GEN *X1, GEN *X2)
794: {
795: GEN lambda,v, *Y1 = X1+nbc, *Y2 = X2+nbc;
796: GEN W[nbcmax+1]; /* W[0] never used */
1.2 ! noro 797: long i;
! 798: gpmem_t av=avma, tetpil;
1.1 noro 799: /*W[0] = gun;*/ W[1] = Y1[0];
800: for (i=1; i<nbc; i++)
801: W[i+1] = modii(mulii(Y1[i], W[i]), N);
802: tetpil = avma;
803:
804: if (!invmod(W[nbc], N, &gl))
805: {
806: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
807: if (X1 != X2)
808: {
809: long k;
810: /* cannot double on one of the curves mod N: make sure X2 contains
811: * something useful before letting the caller proceed
812: */
813: for (k = 2*nbc; k--; ) affii(X1[k],X2[k]);
814: }
815: avma = av; return 1;
816: }
817:
818: while (i--) /* nbc times, actually */
819: {
820: lambda = modii(mulii(addsi(1, mulsi(3, sqri(X1[i]))),
821: i?mulii(gl,W[i]):gl), N);
822: if (signe(lambda)) /* half of zero is still zero */
823: lambda = shifti(mod2(lambda)? addii(lambda, N): lambda, -1);
824: v = modii(subii(sqri(lambda), shifti(X1[i],1)), N);
825: if (i) gl = modii(mulii(gl, Y1[i]), N);
826: modiiz(subii(mulii(lambda, subii(X1[i], v)), Y1[i]), N, Y2[i]);
827: affii(v, X2[i]);
828: if (!(i&7) && i) gl = gerepileupto(tetpil, gl);
829: }
830: avma = av; return 0;
831: }
832:
833: /* Parallel multiplication by an odd prime k on nbc curves, storing the
834: * result to locations at and following *X2. Safe to be called with X2
835: * equal to X1. Return values as for elladd() and elldouble().
836: * Uses (a simplified variant of) Peter Montgomery's PRAC (PRactical Addition
837: * Chain) algorithm; see ftp://ftp.cwi.nl/pub/pmontgom/Lucas.ps.gz .
838: * With thanks to Paul Zimmermann for the reference. --GN1998Aug13
839: */
840:
841: /* We use an array of GENs pointed at by XAUX as a scratchpad; this will
842: * have been set up by ellfacteur() (so we don't need to reinitialize it
843: * each time).
844: */
845:
846: static int
847: ellmult(long nbc, ulong k, GEN *X1, GEN *X2) /* k>2 prime, not checked */
848: {
1.2 ! noro 849: long i, d, e, e1, r;
! 850: gpmem_t av=avma, tetpil;
1.1 noro 851: int res;
852: GEN *A=X2, *B=XAUX, *S, *T=XAUX+2*nbc;
853:
854: for (i = 2*nbc; i--; ) { affii(X1[i],XAUX[i]); }
855: tetpil = avma;
856:
857: /* first doubling picks up X1; after this we'll be working in XAUX and
858: * X2 only, mostly via A and B and T
859: */
860: if ((res = elldouble(nbc, X1, X2)) != 0)
861: {
862: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
863: return res;
864: }
865:
866: /* split the work at the golden ratio */
867: r = (long)(k*0.61803398875 + .5);
868: d = k - r; e = r - d; /* NB d+e == r, so no danger of ofl below */
869:
870: while (d != e)
871: {
872:
873: /* apply one of the nine transformations from PM's Table 4. We first
874: * figure out which, and then go into an eight-way switch, because
875: * some of the transformations are similar enough to share code.
876: */
877: if (d <= e + (e>>2)) /* floor(1.25*e) */
878: {
879: if ((d+e)%3 == 0)
880: { i = 0; goto apply; } /* Table 4, rule 1 */
881: else if ((d-e)%6 == 0)
882: { i = 1; goto apply; } /* rule 2 */
883: /* else fall through */
884: }
885: if ((d+3)>>2 <= e) /* equiv to d <= 4*e but cannot ofl */
886: { i = 2; goto apply; } /* rule 3, the most common case */
887: if ((d&1)==(e&1))
888: { i = 1; goto apply; } /* rule 4, which does the same as rule 2 */
889: if (!(d&1))
890: { i = 3; goto apply; } /* rule 5 */
891: if (d%3 == 0)
892: { i = 4; goto apply; } /* rule 6 */
893: if ((d+e)%3 == 0)
894: { i = 5; goto apply; } /* rule 7 */
895: if ((d-e)%3 == 0)
896: { i = 6; goto apply; } /* rule 8 */
897: /* when we get here, e must be even, for otherwise one of rules 4,5
898: * would have applied
899: */
900: i = 7; /* rule 9 */
901:
902: apply:
903: switch(i) /* i takes values in {0,...,7} here */
904: {
905: case 0: /* rule 1 */
906: e1 = d - e; d = (d + e1)/3; e = (e - e1)/3;
907: if ((res = elladd(nbc, A, B, T)) != 0)
908: {
909: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
910: return res;
911: }
912: if ((res = elladd2(nbc, T, A, A, T, B, B)) != 0)
913: {
914: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
915: return res;
916: }
917: break; /* end of rule 1 */
918: case 1: /* rules 2 and 4, part 1 */
919: d -= e;
920: if ((res = elladd(nbc, A, B, B)) != 0)
921: {
922: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
923: return res;
924: }
925: /* FALL THROUGH */
926: case 3: /* rule 5, and 2nd part of rules 2 and 4 */
927: d >>= 1;
928: if ((res = elldouble(nbc, A, A)) != 0)
929: {
930: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
931: return res;
932: }
933: break; /* end of rules 2, 4, and 5 */
934: case 4: /* rule 6 */
935: d /= 3;
936: if ((res = elldouble(nbc, A, T)) != 0)
937: {
938: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
939: return res;
940: }
941: if ((res = elladd(nbc, T, A, A)) != 0)
942: {
943: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
944: return res;
945: }
946: /* FALL THROUGH */
947: case 2: /* rule 3, and 2nd part of rule 6 */
948: d -= e;
949: if ((res = elladd(nbc, A, B, B)) != 0)
950: {
951: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
952: return res;
953: }
954: break; /* end of rules 3 and 6 */
955: case 5: /* rule 7 */
956: d = (d - e - e)/3;
957: if ((res = elldouble(nbc, A, T)) != 0)
958: {
959: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
960: return res;
961: }
962: if ((res = elladd2(nbc, T, A, A, T, B, B)) != 0)
963: {
964: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
965: return res;
966: }
967: break; /* end of rule 7 */
968: case 6: /* rule 8 */
969: d = (d - e)/3;
970: if ((res = elladd(nbc, A, B, B)) != 0)
971: {
972: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
973: return res;
974: }
975: if ((res = elldouble(nbc, A, T)) != 0)
976: {
977: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
978: return res;
979: }
980: if ((res = elladd(nbc, T, A, A)) != 0)
981: {
982: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
983: return res;
984: }
985: break; /* end of rule 8 */
986: case 7: /* rule 9 */
987: e >>= 1;
988: if ((res = elldouble(nbc, B, B)) != 0)
989: {
990: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
991: return res;
992: }
993: break; /* end of rule 9 */
994: default: /* never reached */
995: break;
996: }
997: /* end of Table 4 processing */
998:
999: /* swap d <-> e and A <-> B if necessary */
1000: if (d < e) { r = d; d = e; e = r; S = A; A = B; B = S; }
1001: } /* while */
1002: if ((res = elladd(nbc, XAUX, X2, X2)) != 0)
1003: {
1004: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
1005: return res;
1006: }
1007: avma = av; return 0;
1008: }
1009:
1010: /* PRAC implementation notes - main changes against the paper version:
1011: * (1) The general function [m+n]P = f([m]P,[n]P,[m-n]P) collapses (for
1012: * m!=n) to an elladd() which does not depend on the third argument; and
1013: * thus all references to the third variable (C in the paper) can be elimi-
1014: * nated. (2) Since our multipliers are prime, the outer loop of the paper
1015: * version executes only once, and thus is invisible above. (3) The first
1016: * step in the inner loop of the paper version will always be rule 3, but
1017: * the addition requested by this rule amounts to a doubling, and it will
1018: * always be followed by a swap, so we have unrolled this first iteration.
1019: * (4) Some simplifications in rules 6 and 7 are possible given the above,
1020: * and we can save one addition in each of the two cases. NB one can show
1021: * that none of the other elladd()s in the loop can ever turn out to de-
1022: * generate into an elldouble. (5) I tried to optimize for rule 3, which
1023: * is used far more frequently than all others together, but it didn't
1024: * improve things, so I removed the nested tight loop again. --GN
1025: */
1026:
1027: /* The main loop body of ellfacteur() runs slightly _slower_ under PRAC than
1028: * under a straightforward left-shift binary multiplication algorithm when
1029: * N has <30 digits and B1 is small; PRAC wins when N and B1 get larger.
1030: * Weird. --GN
1031: */
1032:
1033: /* memory layout in ellfacteur(): We'll have a large-ish array of GEN
1034: * pointers, and one huge chunk of memory containing all the actual GEN
1035: * (t_INT) objects.
1036: * nbc will be held constant throughout the invocation.
1037: */
1038: /* The B1 stage of each iteration through the main loop needs little
1039: * space: enough for the X and Y coordinates of the current points,
1040: * and twice as much again as scratchpad for ellmult().
1041: */
1042: /* The B2 stage, starting from some current set of points Q, needs, in
1043: * succession:
1044: * - space for [2]Q, [4]Q, ..., [10]Q, and [p]Q for building the helix;
1045: * - space for 48*nbc X and Y coordinates to hold the helix. Now this
1046: * could re-use [2]Q,...,[8]Q, but only with difficulty, since we don't
1047: * know in advance which residue class mod 210 our p is going to be in.
1048: * It can and should re-use [p]Q, though;
1049: * - space for (temporarily [30]Q and then) [210]Q, [420]Q, and several
1050: * further doublings until the giant step multiplier is reached. This
1051: * _can_ re-use the remaining cells from above. The computation of [210]Q
1052: * will have been the last call to ellmult() within this iteration of the
1053: * main loop, so the scratchpad is now also free to be re-used. We also
1054: * compute [630]Q by a parallel addition; we'll need it later to get the
1055: * baby-step table bootstrapped a little faster.
1056: */
1057: /* Finally, for no more than 4 curves at a time, room for up to 1024 X
1058: * coordinates only (the Y coordinates needed whilst setting up this baby
1059: * step table are temporarily stored in the upper half, and overwritten
1060: * during the last series of additions).
1061: */
1062: /* Graphically: after end of B1 stage (X,Y are the coords of Q):
1063: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
1064: * | X Y | scratch | [2]Q| [4]Q| [6]Q| [8]Q|[10]Q| ... | ...
1065: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
1066: * *X *XAUX *XT *XD *XB
1067: *
1068: * [30]Q is computed from [10]Q. [210]Q can go into XY, etc:
1069: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
1070: * |[210]|[420]|[630]|[840]|[1680,3360,6720,...,2048*210] |bstp table...
1071: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
1072: * *X *XAUX *XT *XD [*XG, somewhere here] *XB .... *XH
1073: *
1074: * So we need (13 + 48) * 2 * nbc slots here, and another 4096 slots for
1075: * the baby step table (not all of which will be used when we start with a
1076: * small B1, but it's better to allocate and initialize ahead of time all
1077: * the slots that might be needed later).
1078: */
1079: /* Note on memory locality: During the B2 phase, accesses to the helix
1080: * (once it has been set up) will be clustered by curves (4 out of nbc at
1081: * a time). Accesses to the baby steps table will wander from one end of
1082: * the array to the other and back, one such cycle per giant step, and
1083: * during a full cycle we would expect on the order of 2E4 accesses when
1084: * using the largest giant step size. Thus we shouldn't be doing too bad
1085: * with respect to thrashing a (512KBy) L2 cache. However, we don't want
1086: * the baby step table to grow larger than this, even if it would reduce
1087: * the number of E.C. operations by a few more per cent for very large B2,
1088: * lest cache thrashing slow down everything disproportionally. --GN
1089: */
1090:
1091: /* parameters for miller() via snextpr(), for use by ellfacteur() */
1092: #define miller_k1 16 /* B1 phase, foolproof below 10^12 */
1093: #define miller_k2 1 /* B2 phase, not foolproof, much faster */
1094: /* (miller_k2 will let thousands of composites slip through, which doesn't
1095: * harm ECM, but ellmult() during the B1 phase should only be fed primes
1096: * which really are prime)
1097: */
1098: /* ellfacteur() has been re-tuned to be useful as a first stage before
1099: * MPQS, especially for _large_ arguments, when insist is false, and now
1100: * also for the case when insist is true, vaguely following suggestions
1101: * by Paul Zimmermann (see http://www.loria.fr/~zimmerma/ and especially
1102: * http://www.loria.fr/~zimmerma/records/ecmnet.html) of INRIA/LORIA.
1103: * --GN 1998Jul,Aug
1104: */
1105: GEN
1106: ellfacteur(GEN n, int insist)
1107: {
1108: static ulong TB1[] =
1109: {
1110: /* table revised, cf. below 1998Aug15 --GN */
1111: 142,172,208,252,305,370,450,545,661,801,972,1180,1430,
1112: 1735,2100,2550,3090,3745,4540,5505,6675,8090,9810,11900,
1113: 14420,17490,21200,25700,31160,37780UL,45810UL,55550UL,67350UL,
1114: 81660UL,99010UL,120050UL,145550UL,176475UL,213970UL,259430UL,
1115: 314550UL,381380UL,462415UL,560660UL,679780UL,824220UL,999340UL,
1116: 1211670UL,1469110UL,1781250UL,2159700UL,2618600UL,3175000UL,
1117: 3849600UL,4667500UL,5659200UL,6861600UL,8319500UL,10087100UL,
1118: 12230300UL,14828900UL,17979600UL,21799700UL,26431500UL,
1119: 32047300UL,38856400UL, /* 110 times that still fits into 32bits */
1120: #ifdef LONG_IS_64BIT
1121: 47112200UL,57122100UL,69258800UL,83974200UL,101816200UL,
1122: 123449000UL,149678200UL,181480300UL,220039400UL,266791100UL,
1123: 323476100UL,392204900UL,475536500UL,576573500UL,699077800UL,
1124: 847610500UL,1027701900UL,1246057200UL,1510806400UL,1831806700UL,
1125: 2221009800UL,2692906700UL,3265067200UL,3958794400UL,4799917500UL,
1126: /* the only reason to stop here is that I got bored (and that users
1127: * will get bored watching their 64bit machines churning on such large
1128: * numbers for month after month). Someone can extend this table when
1129: * the hardware has gotten 100 times faster than now --GN
1130: */
1131: #endif
1132: };
1133: static ulong TB1_for_stage[] =
1134: {
1135: /* table revised 1998Aug11 --GN. The idea is to start a little below
1136: * the optimal B1 for finding factors which would just have been missed
1137: * by pollardbrent(), and escalate gradually, changing curves suf-
1138: * ficiently frequently to give good coverage of the small factor
1139: * ranges. The table entries grow a bit faster than what Paul says
1140: * would be optimal, but having a single table instead of a 2D array
1141: * keeps the code simple
1142: */
1143: 500,520,560,620,700,800,900,1000,1150,1300,1450,1600,1800,2000,
1144: 2200,2450,2700,2950,3250,3600,4000,4400,4850,5300,5800,6400,
1145: 7100,7850,8700,9600,10600,11700,12900,14200,15700,17300,
1146: 19000,21000,23200,25500,28000,31000,34500UL,38500UL,43000UL,
1147: 48000UL,53800UL,60400UL,67750UL,76000UL,85300UL,95700UL,
1148: 107400UL,120500UL,135400UL,152000UL,170800UL,191800UL,215400UL,
1149: 241800UL,271400UL,304500UL,341500UL,383100UL,429700UL,481900UL,
1150: 540400UL,606000UL,679500UL,761800UL,854100UL,957500UL,1073500UL,
1151: };
1152: long nbc,nbc2,dsn,dsnmax,rep,spc,gse,gss,rcn,rcn0,bstp,bstp0;
1.2 ! noro 1153: long a, i, j, k, size = expi(n) + 1, tf = lgefint(n);
! 1154: gpmem_t av, av1, avtmp;
1.1 noro 1155: ulong B1,B2,B2_p,B2_rt,m,p,p0,p2,dp;
1156: GEN w,w0,x,*X,*XT,*XD,*XG,*YG,*XH,*XB,*XB2,*Xh,*Yh,*Xb, res = cgeti(tf);
1157: int rflag, use_clones = 0;
1158: byteptr d, d0;
1159:
1160: av = avma; /* taking res into account */
1161: N = n; /* make n known to auxiliary functions */
1162: /* determine where we'll start, how long we'll persist, and how many
1163: * curves we'll use in parallel
1164: */
1165: if (insist)
1166: {
1167: dsnmax = (size >> 2) - 10;
1168: if (dsnmax < 0) dsnmax = 0;
1169: #ifdef LONG_IS_64BIT
1170: else if (dsnmax > 90) dsnmax = 90;
1171: #else
1172: else if (dsnmax > 65) dsnmax = 65;
1173: #endif
1174: dsn = (size >> 3) - 5;
1175: if (dsn < 0) dsn = 0;
1176: else if (dsn > 47) dsn = 47;
1177: /* pick up the torch where non-insistent stage would have given up */
1178: nbc = dsn + (dsn >> 2) + 9; /* 8 or more curves in parallel */
1179: nbc &= ~3; /* nbc is always a multiple of 4 */
1180: if (nbc > nbcmax) nbc = nbcmax;
1181: a = 1 + (nbcmax<<7); /* seed for choice of curves */
1182: rep = 0; /* gcc -Wall */
1183: }
1184: else
1185: {
1186: dsn = (size - 140) >> 3;
1187: if (dsn > 12) dsn = 12;
1188: dsnmax = 72;
1189: if (dsn < 0) /* < 140 bits: decline the task */
1190: {
1191: #ifdef __EMX__
1192: /* MPQS's disk access under DOS/EMX would be abysmally slow, so... */
1193: dsn = 0;
1194: rep = 20;
1195: nbc = 8;
1196: #else
1197: if (DEBUGLEVEL >= 4)
1198: {
1199: fprintferr("ECM: number too small to justify this stage\n");
1200: flusherr();
1201: }
1202: avma = av; return NULL;
1203: #endif
1204: }
1205: else
1206: {
1207: rep = (size <= 248 ?
1208: (size <= 176 ? (size - 124) >> 4 : (size - 148) >> 3) :
1209: (size - 224) >> 1);
1210: nbc = ((size >> 3) << 2) - 80;
1211: if (nbc < 8) nbc = 8;
1212: else if (nbc > nbcmax) nbc = nbcmax;
1213: #ifdef __EMX__
1214: rep += 20;
1215: #endif
1216: }
1217:
1218: /* it may be convenient to use disjoint sets of curves for the non-insist
1219: * and insist phases; moreover, repeated non-insistent calls acting on
1220: * factors of the same original number should try to use fresh curves.
1221: * The following achieves this
1222: */
1223: a = 1 + (nbcmax<<3)*(size & 0xf);
1224: }
1225: if (dsn > dsnmax) dsn = dsnmax;
1226:
1227: if (DEBUGLEVEL >= 4)
1228: {
1229: (void) timer2(); /* clear timer */
1230: fprintferr("ECM: working on %ld curves at a time; initializing", nbc);
1231: if (!insist)
1232: {
1233: if (rep == 1)
1234: fprintferr(" for one round");
1235: else
1236: fprintferr(" for up to %ld rounds", rep);
1237: }
1238: fprintferr("...\n");
1239: }
1240:
1241: /* The auxiliary routines above need < (3*nbc+240)*tf words on the PARI
1242: * stack, in addition to the spc*(tf+1) words occupied by our main table.
1243: * If stack space is already tight, try the heap, using newbloc() and
1244: * killbloc()
1245: */
1246: nbc2 = nbc << 1;
1247: spc = (13 + 48) * nbc2 + bstpmax * 4;
1248: if ((long)((GEN)avma - (GEN)bot) < spc + 385 + (spc + 3*nbc + 240)*tf)
1249: {
1250: if (DEBUGLEVEL >= 5)
1251: {
1252: fprintferr("ECM: stack tight, using clone space on the heap\n");
1253: }
1254: use_clones = 1;
1255: x = newbloc(spc + 385);
1256: }
1257: else
1258: x = new_chunk(spc + 385);
1259: X = 1 + (GEN*)x; /* B1 phase: current point */
1260: XAUX = X + nbc2; /* scratchpad for ellmult() */
1261: XT = XAUX + nbc2; /* ditto, will later hold [3*210]Q */
1262: XD = XT + nbc2; /* room for various multiples */
1263: XB = XD + 20*nbc; /* start of baby steps table */
1264: XB2 = XB + 2 * bstpmax; /* middle of baby steps table */
1265: XH = XB2 + 2 * bstpmax; /* end of bstps table, start of helix */
1266: Xh = XH + 96*nbc; /* little helix, X coords */
1267: Yh = XH + 192; /* ditto, Y coords */
1268: /* XG will be set later, inside the main loop, since it depends on B2 */
1269:
1270: {
1271: long tw = evallg(tf) | evaltyp(t_INT);
1272:
1273: if (use_clones)
1274: w = newbloc(spc*tf);
1275: else
1276: w = new_chunk(spc*tf);
1277: w0 = w; /* remember this for later... */
1278: for (i = spc; i--; )
1279: {
1280: *w = tw; X[i] = w; w += tf; /* hack for: w = cgeti(tf) */
1281: }
1282: /* Xh range of 384 pointers not set; these will later duplicate the
1283: * pointers in the XH range, 4 curves at a time. Some of the cells
1284: * reserved here for the XB range will never be used, instead, we'll
1285: * warp the pointers to connect to (read-only) GENs in the X/XD range;
1286: * it would be complicated to skip them here to conserve merely a few
1287: * KBy of stack or heap space. --GN
1288: */
1289: }
1290:
1291: /* *** ECM MAIN LOOP *** */
1292: for(;;)
1293: {
1294: d = diffptr; rcn = NPRC; /* multipliers begin at the beginning */
1295:
1296: /* pick curves */
1297: for (i = nbc2; i--; ) affsi(a++, X[i]);
1298: /* pick bounds */
1299: B1 = insist ? TB1[dsn] : TB1_for_stage[dsn];
1300: B2 = 110*B1;
1301: B2_rt = (ulong)(sqrt((double)B2));
1302: /* pick giant step exponent and size.
1303: * With 32 baby steps, a giant step corresponds to 32*420 = 13440, appro-
1304: * priate for the smallest B2s. With 1024, a giant step will be 430080;
1305: * this will be appropriate for B1 >~ 42000, where 512 baby steps would
1306: * imply roughly the same number of E.C. additions.
1307: */
1308: gse = (B1 < 656 ?
1309: (B1 < 200 ? 5 : 6) :
1310: (B1 < 10500 ?
1311: (B1 < 2625 ? 7 : 8) :
1312: (B1 < 42000 ? 9 : 10)
1313: )
1314: );
1315: gss = 1UL << gse;
1316: XG = XT + gse*nbc2; /* will later hold [2^(gse+1)*210]Q */
1317: YG = XG + nbc;
1318:
1319: if (DEBUGLEVEL >= 4)
1320: {
1321: fprintferr("ECM: time = %6ld ms\nECM: dsn = %2ld,\tB1 = %4lu,",
1322: timer2(), dsn, B1);
1323: fprintferr("\tB2 = %6lu,\tgss = %4ld*420\n", B2, gss);
1324: flusherr();
1325: }
1.2 ! noro 1326: p = 0;
! 1327: NEXT_PRIME_VIADIFF(p,d);
1.1 noro 1328:
1329: /* ---B1 PHASE--- */
1330: /* treat p=2 separately */
1331: B2_p = B2 >> 1;
1332: for (m=1; m<=B2_p; m<<=1)
1333: {
1334: if ((rflag = elldouble(nbc, X, X)) > 1) goto fin;
1335: else if (rflag) break;
1336: }
1337:
1338: /* p=3,...,nextprime(B1) */
1339: while (p < B1 && p <= B2_rt)
1340: {
1341: p = snextpr(p, &d, &rcn, NULL, miller_k1);
1342: B2_p = B2/p; /* beware integer overflow on 32-bit CPUs */
1343: for (m=1; m<=B2_p; m*=p)
1344: {
1345: if ((rflag = ellmult(nbc, p, X, X)) > 1) goto fin;
1346: else if (rflag) break;
1347: }
1348: }
1349: /* primes p larger than sqrt(B2) can appear only to the 1st power */
1350: while (p < B1)
1351: {
1352: p = snextpr(p, &d, &rcn, NULL, miller_k1);
1353: if (ellmult(nbc, p, X, X) > 1) goto fin; /* p^2 > B2: no loop */
1354: }
1355:
1356: if (DEBUGLEVEL >= 4)
1357: {
1358: fprintferr("ECM: time = %6ld ms, B1 phase done, ", timer2());
1359: fprintferr("p = %lu, setting up for B2\n", p);
1360: }
1361:
1362: /* ---B2 PHASE--- */
1363: /* compute [2]Q,...,[10]Q, which we need to build the helix */
1364: if (elldouble(nbc, X, XD) > 1)
1365: goto fin; /* [2]Q */
1366: if (elldouble(nbc, XD, XD + nbc2) > 1)
1367: goto fin; /* [4]Q */
1368: if (elladd(nbc, XD, XD + nbc2, XD + (nbc<<2)) > 1)
1369: goto fin; /* [6]Q */
1370: if (elladd2(nbc,
1371: XD, XD + (nbc<<2), XT + (nbc<<3),
1372: XD + nbc2, XD + (nbc<<2), XD + (nbc<<3)) > 1)
1373: goto fin; /* [8]Q and [10]Q */
1374: if (DEBUGLEVEL >= 7)
1375: fprintferr("\t(got [2]Q...[10]Q)\n");
1376:
1377: /* get next prime (still using the foolproof test) */
1378: p = snextpr(p, &d, &rcn, NULL, miller_k1);
1379: /* make sure we have the residue class number (mod 210) */
1380: if (rcn == NPRC)
1381: {
1382: rcn = prc210_no[(p % 210) >> 1];
1383: if (rcn == NPRC)
1384: {
1385: fprintferr("ECM: %lu should have been prime but isn\'t\n", p);
1386: err(bugparier, "ellfacteur");
1387: }
1388: }
1389:
1390: /* compute [p]Q and put it into its place in the helix */
1391: if (ellmult(nbc, p, X, XH + rcn*nbc2) > 1) goto fin;
1392: if (DEBUGLEVEL >= 7)
1393: fprintferr("\t(got [p]Q, p = %lu = %lu mod 210)\n",
1394: p, (ulong)(prc210_rp[rcn]));
1395:
1396: /* save current p, d, and rcn; we'll need them more than once below */
1397: p0 = p;
1398: d0 = d;
1399: rcn0 = rcn; /* remember where the helix wraps */
1400: bstp0 = 0; /* p is at baby-step offset 0 from itself */
1401:
1402: /* fill up the helix, stepping forward through the prime residue classes
1403: * mod 210 until we're back at the r'class of p0. Keep updating p so
1404: * that we can print meaningful diagnostics if a factor shows up; but
1405: * don't bother checking which of these p's are in fact prime
1406: */
1407: for (i = 47; i; i--) /* 47 iterations */
1408: {
1409: p += (dp = (ulong)prc210_d1[rcn]);
1410: if (rcn == 47)
1411: { /* wrap mod 210 */
1412: if (elladd(nbc, XT + dp*nbc, XH + rcn*nbc2, XH) > 1)
1413: goto fin;
1414: rcn = 0;
1415: continue;
1416: }
1417: if (elladd(nbc, XT + dp*nbc, XH + rcn*nbc2, XH + rcn*nbc2 + nbc2) > 1)
1418: goto fin;
1419: rcn++;
1420: }
1421: if (DEBUGLEVEL >= 7)
1422: fprintferr("\t(got initial helix)\n");
1423:
1424: /* compute [210]Q etc, which will be needed for the baby step table */
1425: if (ellmult(nbc, 3, XD + (nbc<<3), X) > 1) goto fin;
1426: if (ellmult(nbc, 7, X, X) > 1) goto fin; /* [210]Q */
1427: /* this was the last call to ellmult() in the main loop body; may now
1428: * overwrite XAUX and slots XD and following
1429: */
1430: if (elldouble(nbc, X, XAUX) > 1) goto fin; /* [420]Q */
1431: if (elladd(nbc, X, XAUX, XT) > 1) goto fin; /* [630]Q */
1432: if (elladd(nbc, X, XT, XD) > 1) goto fin; /* [840]Q */
1433: for (i=1; i <= gse; i++) /* gse successive doublings */
1434: {
1435: if (elldouble(nbc, XT + i*nbc2, XD + i*nbc2) > 1) goto fin;
1436: }
1437: /* (the last iteration has initialized XG to [210*2^(gse+1)]Q) */
1438:
1439: if (DEBUGLEVEL >= 4)
1440: {
1441: fprintferr("ECM: time = %6ld ms, entering B2 phase, p = %lu\n",
1442: timer2(), p);
1443: }
1444:
1445: /* inner loop over small sets of 4 curves at a time */
1446: for (i = nbc - 4; i >= 0; i -= 4)
1447: {
1448: if (DEBUGLEVEL >= 6)
1449: fprintferr("ECM: finishing curves %ld...%ld\n", i, i+3);
1450: /* copy relevant pointers from XH to Xh. Recall memory layout in XH
1451: * is: nbc X coordinates followed by nbc Y coordinates for residue
1452: * class 1 mod 210, then the same for r.c. 11 mod 210, etc. Memory
1453: * layout for Xh is: four X coords for 1 mod 210, four for 11 mod 210,
1454: * etc, four for 209 mod 210, and then the corresponding Y coordinates
1455: * in the same order. This will allow us to do a giant step on Xh
1456: * using just three calls to elladd0() each acting on 64 points in
1457: * parallel
1458: */
1459: for (j = 48; j--; )
1460: {
1461: k = nbc2*j + i;
1462: m = j << 2; /* X coordinates */
1463: Xh[m] = XH[k]; Xh[m+1] = XH[k+1];
1464: Xh[m+2] = XH[k+2]; Xh[m+3] = XH[k+3];
1465: k += nbc; /* Y coordinates */
1466: Yh[m] = XH[k]; Yh[m+1] = XH[k+1];
1467: Yh[m+2] = XH[k+2]; Yh[m+3] = XH[k+3];
1468: }
1469: /* build baby step table of X coords of multiples of [210]Q. XB[4*j]
1470: * will point at X coords on four curves from [(j+1)*210]Q. Until
1471: * we're done, we need some Y coords as well, which we keep in the
1472: * second half of the table, overwriting them at the end when gse==10.
1473: * Those multiples which we already have (by 1,2,3,4,8,16,...,2^gse)
1474: * are entered simply by copying the pointers, ignoring the small
1475: * number of slots in w that were initially reserved for them.
1476: * Here are the initial entries...
1477: */
1478: for (Xb=XB,k=2,j=i; k--; Xb=XB2,j+=nbc) /* do first X, then Y coords */
1479: {
1480: Xb[0] = X[j]; Xb[1] = X[j+1]; /* [210]Q */
1481: Xb[2] = X[j+2]; Xb[3] = X[j+3];
1482: Xb[4] = XAUX[j]; Xb[5] = XAUX[j+1]; /* [420]Q */
1483: Xb[6] = XAUX[j+2]; Xb[7] = XAUX[j+3];
1484: Xb[8] = XT[j]; Xb[9] = XT[j+1]; /* [630]Q */
1485: Xb[10] = XT[j+2]; Xb[11] = XT[j+3];
1486: Xb += 4; /* this points at [420]Q */
1487: /* ... entries at powers of 2 times 210 .... */
1488: for (m = 2; m < (ulong)gse+k; m++) /* omit Y coords of [2^gse*210]Q */
1489: {
1490: long m2 = m*nbc2 + j;
1491: Xb += (2UL<<m); /* points now at [2^m*210]Q */
1492: Xb[0] = XAUX[m2]; Xb[1] = XAUX[m2+1];
1493: Xb[2] = XAUX[m2+2]; Xb[3] = XAUX[m2+3];
1494: }
1495: }
1496: if (DEBUGLEVEL >= 7)
1497: fprintferr("\t(extracted precomputed helix / baby step entries)\n");
1498: /* ... glue in between, up to 16*210 ... */
1499: if (elladd0(12, 4, /* 12 pts + (4 pts replicated thrice) */
1500: XB + 12, XB2 + 12,
1501: XB, XB2,
1502: XB + 16, XB2 + 16)
1503: > 1) goto fin; /* 4 + {1,2,3} = {5,6,7} */
1504: if (elladd0(28, 4, /* 28 pts + (4 pts replicated 7fold) */
1505: XB + 28, XB2 + 28,
1506: XB, XB2,
1507: XB + 32, XB2 + 32)
1508: > 1) goto fin; /* 8 + {1,...,7} = {9,...,15} */
1509: /* ... and the remainder of the lot */
1510: for (m = 5; m <= (ulong)gse; m++)
1511: {
1512: /* fill in from 2^(m-1)+1 to 2^m-1 in chunks of 64 and 60 points */
1513: ulong m2 = 2UL << m; /* will point at 2^(m-1)+1 */
1514: for (j = 0; (ulong)j < m2-64; j+=64) /* executed 0 times when m == 5 */
1515: {
1516: if (elladd0(64, 4,
1517: XB + m2 - 4, XB2 + m2 - 4,
1518: XB + j, XB2 + j,
1519: XB + m2 + j,
1520: (m<(ulong)gse ? XB2 + m2 + j : NULL))
1521: > 1) goto fin;
1522: } /* j == m2-64 here, 60 points left */
1523: if (elladd0(60, 4,
1524: XB + m2 - 4, XB2 + m2 - 4,
1525: XB + j, XB2 + j,
1526: XB + m2 + j,
1527: (m<(ulong)gse ? XB2 + m2 + j : NULL))
1528: > 1) goto fin;
1529: /* (when m==gse, drop Y coords of result, and when both equal 1024,
1530: * overwrite Y coords of second argument with X coords of result)
1531: */
1532: }
1533: if (DEBUGLEVEL >= 7)
1534: fprintferr("\t(baby step table complete)\n");
1535: /* initialize a few other things */
1536: bstp = bstp0;
1537: p = p0; d = d0; rcn = rcn0;
1538: gl = gun;
1539: av1 = avma;
1540: /* scratchspace for prod (x_i-x_j) */
1.2 ! noro 1541: avtmp = (gpmem_t)new_chunk(8 * lgefint(n));
1.1 noro 1542: /* the correct entry in XB to use depends on bstp and on where we are
1543: * on the helix. As we skip from prime to prime, bstp will be incre-
1544: * mented by snextpr() each time we wrap around through residue class
1545: * number 0 (1 mod 210), but the baby step should not be taken until
1546: * rcn>=rcn0 (i.e. until we pass again the residue class of p0).
1547: * The correct signed multiplier is thus k = bstp - (rcn < rcn0),
1548: * and the offset from XB is four times (|k| - 1). When k==0, we may
1549: * ignore the current prime (if it had led to a factorization, this
1550: * would have been noted during the last giant step, or -- when we
1551: * first get here -- whilst initializing the helix). When k > gss,
1552: * we must do a giant step and bump bstp back by -2*gss.
1553: * The gcd of the product of X coord differences against N is taken just
1554: * before we do a giant step.
1555: */
1556: /* loop over probable primes p0 < p <= nextprime(B2),
1557: * inserting giant steps as necessary
1558: */
1559: while (p < B2)
1560: {
1561: /* save current p for diagnostics */
1562: p2 = p;
1563: /* get next probable prime */
1564: p = snextpr(p, &d, &rcn, &bstp, miller_k2);
1565: /* work out the corresponding baby-step multiplier */
1566: k = bstp - (rcn < rcn0 ? 1 : 0);
1567: /* check whether it's giant-step time */
1568: if (k > gss)
1569: {
1570: /* take gcd */
1571: gl = mppgcd(gl, n);
1572: if (!is_pm1(gl) && !egalii(gl, n)) { p = p2; goto fin; }
1573: gl = gun;
1574: avma = av1;
1575: while (k > gss) /* hm, just how large are those prime gaps? */
1576: {
1577: /* giant step */
1578: if (DEBUGLEVEL >= 7)
1579: fprintferr("\t(giant step at p = %lu)\n", p);
1580: if (elladd0(64, 4,
1581: XG + i, YG + i,
1582: Xh, Yh, Xh, Yh) > 1) goto fin;
1583: if (elladd0(64, 4,
1584: XG + i, YG + i,
1585: Xh + 64, Yh + 64, Xh + 64, Yh + 64) > 1) goto fin;
1586: if (elladd0(64, 4,
1587: XG + i, YG + i,
1588: Xh + 128, Yh + 128, Xh + 128, Yh + 128)
1589: > 1) goto fin;
1590: bstp -= (gss << 1);
1591: /* recompute multiplier */
1592: k = bstp - (rcn < rcn0 ? 1 : 0);
1593: }
1594: }
1595: if (!k) continue; /* point of interest is already in Xh */
1596: if (k < 0) k = -k;
1597: m = ((ulong)k - 1) << 2;
1598: /* accumulate product of differences of X coordinates */
1599: j = rcn<<2;
1600: avma = avtmp; /* go to garbage zone */
1601: gl = modii(mulii(gl, subii(XB[m], Xh[j])), n);
1602: gl = modii(mulii(gl, subii(XB[m+1], Xh[j+1])), n);
1603: gl = modii(mulii(gl, subii(XB[m+2], Xh[j+2])), n);
1604: gl = mulii(gl, subii(XB[m+3], Xh[j+3]));
1605: avma = av1;
1606: gl = modii(gl, n);
1607: } /* loop over p */
1608: avma = av1;
1609: } /* for i (loop over sets of 4 curves) */
1610:
1611: /* continuation part of main loop */
1612:
1613: if (dsn < dsnmax)
1614: {
1615: dsn += insist ? 1 : 2;
1616: if (dsn > dsnmax) dsn = dsnmax;
1617: }
1618:
1619: if (!insist && !--rep)
1620: {
1621: if (DEBUGLEVEL >= 4)
1622: {
1623: fprintferr("ECM: time = %6ld ms,\tellfacteur giving up.\n",
1624: timer2());
1625: flusherr();
1626: }
1627: avma = av;
1628: if (use_clones) { gunclone(w0); gunclone(x); }
1629: return NULL;
1630: }
1631: }
1632: /* *** END OF ECM MAIN LOOP *** */
1633: fin:
1634: affii(gl, res);
1635:
1636: if (DEBUGLEVEL >= 4)
1637: {
1638: fprintferr("ECM: time = %6ld ms,\tp <= %6lu,\n\tfound factor = %Z\n",
1639: timer2(), p, res);
1640: flusherr();
1641: }
1642: avma=av;
1643: if (use_clones) { gunclone(w0); gunclone(x); }
1644: return res;
1645: }
1646:
1647: /***********************************************************************/
1648: /** **/
1649: /** FACTORIZATION (Pollard-Brent rho) **/
1650: /** pollardbrent() returns a nontrivial factor of n, assuming n is **/
1651: /** composite and has no small prime divisor, or NULL if going on **/
1652: /** would take more time than we want to spend. Sometimes it will **/
1653: /** find more than one factor, and return a structure suitable for **/
1654: /** interpretation by ifac_crack() below. GN1998Jun18-26 **/
1655: /** (Cf. Algorithm 8.5.2 in ACiCNT) **/
1656: /** **/
1657: /***********************************************************************/
1658:
1659: static void
1660: rho_dbg(long c, long msg_mask)
1661: {
1662: if (c & msg_mask) return;
1663: fprintferr("Rho: time = %6ld ms,\t%3ld round%s\n",
1664: timer2(), c, (c==1?"":"s"));
1665: flusherr();
1666: }
1667:
1668: /* Tuning parameter: for input up to 64 bits long, we must not spend more
1669: * than a very short time, for fear of slowing things down on average.
1670: * With the current tuning formula, increase our efforts somewhat at 49 bit
1671: * input (an extra round for each bit at first), and go up more and more
1672: * rapidly after we pass 80 bits.-- Changed this (again...) to adjust for
1673: * the presence of squfof, which will finish input up to 59 bits quickly.
1674: */
1675:
1676: #define tune_pb_min 14 /* even 15 seems too much. */
1677:
1678: /* We return NULL when we run out of time, or a single t_INT containing a
1679: * nontrivial factor of n, or a vector of t_INTs, each triple of successive
1680: * entries containing a factor, an exponent (equal to un), and a factor
1681: * class (NULL for unknown or zero for known composite), matching the
1682: * internal representation used by the ifac_*() routines below. Repeated
1683: * factors can arise and are legal; the caller will be sorting the factors
1684: * anyway.
1685: */
1686: GEN
1687: pollardbrent(GEN n)
1688: {
1689: long tf = lgefint(n), size = 0, delta, retries = 0, msg_mask;
1.2 ! noro 1690: long c0, c, k, k1, l;
! 1691: gpmem_t GGG, avP, avx, av = avma;
1.1 noro 1692: GEN x, x1, y, P, g, g1, res;
1693:
1694: if (DEBUGLEVEL >= 4) (void)timer2(); /* clear timer */
1695:
1696: if (tf >= 4)
1697: size = expi(n) + 1;
1698: else if (tf == 3) /* try to keep purify happy... */
1.2 ! noro 1699: size = BITS_IN_LONG - bfffo((ulong)n[2]);
1.1 noro 1700:
1701: if (size <= 28)
1702: c0 = 32; /* amounts very nearly to `insist'.
1703: * Now that we have squfof(), we don't insist
1704: * any more when input is 2^29 ... 2^32
1705: */
1706: else if (size <= 42)
1707: c0 = tune_pb_min;
1708: else if (size <= 59) /* match squfof() cutoff point */
1709: c0 = tune_pb_min + ((size - 42)<<1);
1710: else if (size <= 72)
1711: c0 = tune_pb_min + size - 24;
1712: else if (size <= 301)
1713: /* nonlinear increase in effort, kicking in around 80 bits */
1714: /* 301 gives 48121 + tune_pb_min */
1715: c0 = tune_pb_min + size - 60 +
1716: ((size-73)>>1)*((size-70)>>3)*((size-56)>>4);
1.2 ! noro 1717: else
1.1 noro 1718: c0 = 49152; /* ECM is faster when it'd take longer */
1719:
1720: c = c0 << 5; /* 32 iterations per round */
1721: msg_mask = (size >= 448? 0x1fff:
1722: (size >= 192? (256L<<((size-128)>>6))-1: 0xff));
1723: PB_RETRY:
1724: /* trick to make a `random' choice determined by n. Don't use x^2+0 or
1725: * x^2-2, ever. Don't use x^2-3 or x^2-7 with a starting value of 2.
1726: * x^2+4, x^2+9 are affine conjugate to x^2+1, so don't use them either.
1727: *
1728: * (the point being that when we get called again on a composite cofactor
1729: * of something we've already seen, we had better avoid the same delta)
1730: */
1731: switch ((size + retries) & 7)
1732: {
1733: case 0: delta= 1; break;
1734: case 1: delta= -1; break;
1735: case 2: delta= 3; break;
1736: case 3: delta= 5; break;
1737: case 4: delta= -5; break;
1738: case 5: delta= 7; break;
1739: case 6: delta= 11; break;
1740: /* case 7: */
1741: default: delta=-11; break;
1742: }
1743: if (DEBUGLEVEL >= 4)
1744: {
1745: if (!retries)
1746: {
1747: if (size < 1536)
1748: fprintferr("Rho: searching small factor of %ld-bit integer\n", size);
1749: else
1750: fprintferr("Rho: searching small factor of %ld-word integer\n", tf-2);
1751: }
1752: else
1753: fprintferr("Rho: restarting for remaining rounds...\n");
1754: fprintferr("Rho: using X^2%+1ld for up to %ld rounds of 32 iterations\n",
1755: delta, c >> 5);
1756: flusherr();
1757: }
1758: x=gdeux; P=gun; g1 = NULL; k = 1; l = 1;
1759: (void)new_chunk(10 + 6 * tf); /* enough for cgetg(10) + 3 divii */
1760: y = cgeti(tf); affsi(2, y);
1761: x1= cgeti(tf); affsi(2, x1);
1762: avx = avma;
1.2 ! noro 1763: avP = (gpmem_t)new_chunk(2 * tf); /* enough for x = addsi(tf+1) */
! 1764: GGG = (gpmem_t)new_chunk(4 * tf); /* enough for P = modii(2tf+1, tf) */
1.1 noro 1765:
1766: for (;;) /* terminated under the control of c */
1767: {
1768: /* use the polynomial x^2 + delta */
1769: #define one_iter() {\
1770: avma = GGG; x = resii(sqri(x), n); /* to garbage zone */\
1771: avma = avx; x = addsi(delta,x); /* erase garbage */\
1772: avma = GGG; P = mulii(P, subii(x1, x));\
1773: avma = avP; P = modii(P,n); }
1774:
1775: one_iter();
1776:
1777: if ((--c & 0x1f)==0) /* one round complete */
1778: {
1779: g = mppgcd(n, P);
1780: if (!is_pm1(g)) goto fin; /* caught something */
1781: if (c <= 0)
1782: { /* getting bored */
1783: if (DEBUGLEVEL >= 4)
1784: {
1785: fprintferr("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1786: timer2());
1787: flusherr();
1788: }
1789: avma=av; return NULL;
1790: }
1791: P = gun; /* not necessary, but saves 1 mulii/round */
1792: if (DEBUGLEVEL >= 4) rho_dbg(c0-(c>>5), msg_mask);
1793: affii(x,y);
1794: }
1795:
1796: if (--k) continue; /* normal end of loop body */
1797:
1798: if (c & 0x1f) /* otherwise, we already checked */
1799: {
1800: g = mppgcd(n, P);
1801: if (!is_pm1(g)) goto fin;
1802: P = gun;
1803: }
1804:
1805: /* Fast forward phase, doing l inner iterations without computing gcds.
1806: * Check first whether it would take us beyond the alloted time.
1807: * Fast forward rounds count only half (although they're taking
1808: * more like 2/3 the time of normal rounds). This to counteract the
1809: * nuisance that all c0 between 4096 and 6144 would act exactly as
1810: * 4096; with the halving trick only the range 4096..5120 collapses
1811: * (similarly for all other powers of two)
1812: */
1813: if ((c-=(l>>1)) <= 0)
1814: { /* got bored */
1815: if (DEBUGLEVEL >= 4)
1816: {
1817: fprintferr("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1818: timer2());
1819: flusherr();
1820: }
1821: avma=av; return NULL;
1822: }
1823: c &= ~0x1f; /* keep it on multiples of 32 */
1824:
1825: /* Fast forward loop */
1826: affii(x, x1); k = l; l <<= 1;
1827: /* don't show this for the first several (short) fast forward phases. */
1828: if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1829: {
1830: fprintferr("Rho: fast forward phase (%ld rounds of 64)...\n", l>>7);
1831: flusherr();
1832: }
1833: for (k1=k; k1; k1--) one_iter();
1834: if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1835: {
1836: fprintferr("Rho: time = %6ld ms,\t%3ld rounds, back to normal mode\n",
1837: timer2(), c0-(c>>5));
1838: flusherr();
1839: }
1840:
1841: affii(x,y);
1842: } /* forever */
1843:
1844: fin:
1845: /* An accumulated gcd was > 1 */
1846: /* if it isn't n, and looks prime, return it */
1847: if (!egalii(g,n))
1848: {
1849: if (miller(g,17))
1850: {
1851: if (DEBUGLEVEL >= 4)
1852: {
1853: rho_dbg(c0-(c>>5), 0);
1854: fprintferr("\tfound factor = %Z\n",g);
1855: flusherr();
1856: }
1857: avma=av; return icopy(g);
1858: }
1859: avma = avx; g1 = icopy(g); /* known composite, keep it safe */
1860: avx = avma;
1861: }
1862: else g1 = n; /* and work modulo g1 for backtracking */
1863:
1864: /* Here g1 is known composite */
1865: if (DEBUGLEVEL >= 4 && size > 192)
1866: {
1867: fprintferr("Rho: hang on a second, we got something here...\n");
1868: flusherr();
1869: }
1870: for(;;) /* backtrack until period recovered. Must terminate */
1871: {
1872: avma = GGG; y = resii(sqri(y), g1);
1873: avma = avx; y = addsi(delta,y);
1874: g = mppgcd(subii(x1, y), g1);
1875: if (!is_pm1(g)) break;
1876:
1877: if (DEBUGLEVEL >= 4 && (--c & 0x1f) == 0) rho_dbg(c0-(c>>5), msg_mask);
1878: }
1879:
1880: avma = av; /* safe */
1881: if (g1 == n || egalii(g,g1))
1882: {
1883: if (g1 == n && egalii(g,g1))
1884: { /* out of luck */
1885: if (DEBUGLEVEL >= 4)
1886: {
1887: rho_dbg(c0-(c>>5), 0);
1888: fprintferr("\tPollard-Brent failed.\n"); flusherr();
1889: }
1890: if (++retries >= 4) return NULL;
1891: goto PB_RETRY;
1892: }
1893: /* half lucky: we've split n, but g1 equals either g or n */
1894: if (DEBUGLEVEL >= 4)
1895: {
1896: rho_dbg(c0-(c>>5), 0);
1897: fprintferr("\tfound %sfactor = %Z\n",
1898: (g1!=n ? "composite " : ""), g);
1899: flusherr();
1900: }
1901: res = cgetg(7, t_VEC);
1902: res[1] = licopy(g); /* factor */
1903: res[2] = un; /* exponent 1 */
1904: res[3] = (g1!=n? zero: (long)NULL); /* known composite when g1!=n */
1905:
1906: res[4] = ldivii(n,g); /* cofactor */
1907: res[5] = un; /* exponent 1 */
1908: res[6] = (long)NULL; /* unknown */
1909: return res;
1910: }
1911: /* g < g1 < n : our lucky day -- we've split g1, too */
1912: res = cgetg(10, t_VEC);
1913: /* unknown status for all three factors */
1914: res[1] = licopy(g); res[2] = un; res[3] = (long)NULL;
1915: res[4] = ldivii(g1,g); res[5] = un; res[6] = (long)NULL;
1916: res[7] = ldivii(n,g1); res[8] = un; res[9] = (long)NULL;
1917: if (DEBUGLEVEL >= 4)
1918: {
1919: rho_dbg(c0-(c>>5), 0);
1920: fprintferr("\tfound factors = %Z, %Z,\n\tand %Z\n",
1921: res[1], res[4], res[7]);
1922: flusherr();
1923: }
1924: return res;
1925: }
1926:
1927: /***********************************************************************/
1928: /** **/
1929: /** FACTORIZATION (Shanks' SQUFOF) **/
1930: /** squfof() returns a nontrivial factor of n, assuming n is odd, **/
1931: /** composite, not a pure square, and has no small prime divisor, **/
1932: /** or NULL if it fails to find one. It works on two discriminants **/
1933: /** simultaneously (n and 5n for n=1(4), 3n and 4n for n=3(4)). **/
1934: /** The present implementation is limited to input <2^59, and will **/
1935: /** work most of the time in signed arithmetic on integers <2^31 in **/
1936: /** absolute size. Occasionally, it may find a factor which is a **/
1937: /** square.-- Since this will be used in the double-large-prime **/
1938: /** variation of MPQS, we provide a way of suppressing debugging **/
1939: /** output even at high debuglevels. GN2000Sep30-Oct01 **/
1940: /** (Cf. Algorithm 8.7.2 in ACiCNT) **/
1941: /** **/
1942: /***********************************************************************/
1943: static long squfof_ambig(long a, long B, long dd, GEN D, long *cntamb);
1944:
1945: #define SQUFOF_BLACKLIST_SZ 64
1946:
1947: GEN
1948: squfof(GEN n, long quiet)
1949: {
1950: long tf = lgefint(n), nm4, cnt = 0, cntamb;
1951: long a1, b1, c1, d1, dd1, L1, a2, b2, c2, d2, dd2, L2, a, q, c, qc, qcb;
1952: GEN D1, D2, Q, res;
1.2 ! noro 1953: gpmem_t av = avma;
1.1 noro 1954: static long blacklist1[SQUFOF_BLACKLIST_SZ], blacklist2[SQUFOF_BLACKLIST_SZ];
1955: long blp1 = 0, blp2 = 0;
1956: long mydebug = DEBUGLEVEL - quiet;
1957: int act1 = 1, act2 = 1;
1958:
1959: if (cmpis(n,5) <= 0) return NULL; /* input n <= 5 */
1960:
1961: #ifdef LONG_IS_64BIT
1.2 ! noro 1962: if (tf > 3 || (tf == 3 && bfffo((ulong)n[2]) < 5)) /* n too large */
1.1 noro 1963: return NULL;
1964: #else /* 32 bits */
1.2 ! noro 1965: if (tf > 4 || (tf == 4 && bfffo((ulong)n[2]) < 5)) /* n too large */
1.1 noro 1966: return NULL;
1967: #endif
1968: /* now we have 5 < n < 2^59 */
1969:
1970: nm4 = mod4(n);
1971: if (!(nm4 & 1)) return gdeux; /* n even */
1972:
1973: if (nm4 == 1)
1974: { /* case n = 1 (mod4): run one iteration on D1 = n, another on D2 = 5n */
1975: D1 = n; /* no need to copy */
1976: Q = racine(D1); d1 = itos(Q); L1 = itos(racine(Q));
1977: dd1 = (d1>>1) + (d1&1); /* rounding trick, see below */
1978: b1 = ((d1-1) & (~1UL)) + 1; /* largest odd number not exceeding d1 */
1979: c1 = itos(shifti(subii(D1, sqri(stoi(b1))), -2));
1980: if (c1 == 0) /* n was a square */
1981: {
1982: avma = av;
1983: res = cgetg(4, t_VEC);
1984: res[1] = lstoi(d1); /* factor */
1985: res[2] = deux; /* exponent 2 */
1986: res[3] = (long)NULL; /* unknown whether prime or composite */
1987: return res;
1988: }
1989: D2 = mulsi(5,n);
1990: Q = racine(D2); d2 = itos(Q); L2 = itos(racine(Q));
1991: dd2 = (d2>>1) + (d2&1);
1992: b2 = ((d2-1) & (~1UL)) + 1; /* b1, b2 will always stay odd */
1993: c2 = itos(shifti(subii(D2, sqri(stoi(b2))), -2));
1994: if (c2 == 0) /* 5n is a square, caller should avoid this */
1995: {
1996: avma = av;
1997: res = cgetg(4, t_VEC);
1998: res[1] = lstoi(d2/5); /* factor */
1999: res[2] = deux; /* exponent 2 */
2000: res[3] = (long)NULL; /* unknown whether prime or composite */
2001: return res;
2002: }
2003: }
2004: else
2005: { /* case n = 3 (mod4): run one iteration on D1 = 3n, another on D2 = 4n */
2006: D1 = mulsi(3,n);
2007: Q = racine(D1); d1 = itos(Q); L1 = itos(racine(Q));
2008: dd1 = (d1>>1) + (d1&1);
2009: b1 = ((d1-1) & (~1UL)) + 1; /* will always stay odd */
2010: c1 = itos(shifti(subii(D1, sqri(stoi(b1))), -2));
2011: if (c1 == 0) /* 3n is a square, caller should avoid this */
2012: {
2013: avma = av;
2014: res = cgetg(4, t_VEC);
2015: res[1] = lstoi(d1/3); /* factor */
2016: res[2] = deux; /* exponent 2 */
2017: res[3] = (long)NULL; /* unknown whether prime or composite */
2018: return res;
2019: }
2020: D2 = shifti(n,2);
2021: Q = racine(D2); d2 = itos(Q); L2 = itos(racine(Q));
2022: dd2 = d2>>1; /* no rounding trick here */
2023: b2 = (d2 & (~1UL)); /* largest even below d2, will stay even */
2024: c2 = itos(shifti(subii(D2, sqri(stoi(b2))), -2));
2025: /* c2 cannot vanish -- n = 3(mod 4) cannot be a square */
2026: }
2027: a1 = a2 = 1;
2028: /* This completes the setup of the two (identity) forms (a1,b1,-c1) and
2029: * (a2,b2,-c2).
2030: *
2031: * Attentive readers will notice that a1 and c1 represent the absolute
2032: * values of the a,c coefficients; we keep track of the sign separately,
2033: * in fact the sign info is contained in the rightmost bit of the iteration
2034: * counter cnt: when cnt is even, c is understood to be negative, else c
2035: * is positive and a < 0.
2036: *
2037: * The quantities dd1, dd2 are used to compute floor((d1+b1)/2) etc., with-
2038: * out overflowing the 31bit signed integer size limit, as dd1+floor(b1/2)
2039: * etc. This is the "rounding trick" alluded to above.
2040: *
2041: * L1, L2 are the limits for blacklisting small leading coefficients
2042: * on the principal cycle, to guarantee that when we find a square form,
2043: * its square root will belong to an ambiguous cycle (i.e. won't be an
2044: * earlier form on the principal cycle).
2045: *
2046: * When n = 3(mod 4), D2 = 12(mod 16), and b^2 is always 0 or 4 mod 16.
2047: * It follows that 4*a*c must be 4 or 8 mod 16, respectively, so at most
2048: * one of a,c can be divisible by 2 at most to the first power. This fact
2049: * is used a couple of times below.
2050: *
2051: * The flags act1, act2 remain true while the respective cycle is still
2052: * active; we drop them to false when we return to the identity form with-
2053: * out having found a square form (or when the blacklist overflows, which
2054: * shouldn't happen).
2055: */
2056:
2057: if (mydebug >= 4)
2058: {
2059: fprintferr("SQUFOF: entering main loop with forms\n"
2060: "\t(1, %ld, %ld) and (1, %ld, %ld)\n\tof discriminants\n"
2061: "\t%Z and %Z, respectively\n",
2062: b1, -c1, b2, -c2, D1, D2);
2063: flusherr();
2064: (void)timer2(); /* clear timer */
2065: }
2066:
2067: /* MAIN LOOP: walk around the principal cycle looking for a square form.
2068: * Blacklist small leading coefficients.
2069: *
2070: * The reduction operator can be computed entirely in 32-bit arithmetic:
2071: * Let q = floor(floor((d1+b1)/2)/c1) (when c1>dd1, q=1, which happens
2072: * often enough to special-case it). Then the new b1 = (q*c1-b1) + q*c1,
2073: * which can be computed without overflowing, and the new c1 equals
2074: * a1 - q*(q*c1-b1), where the righthand term is bounded by d1 in abs
2075: * size since both the old and the new a1 are positive and bounded by d1.
2076: */
2077: while (act1 + act2 > 0)
2078: {
2079: /* send first form through reduction operator if active */
2080: if (act1)
2081: {
2082: c = c1;
2083: if (c > dd1)
2084: q = 1;
2085: else
2086: q = (dd1 + (b1>>1)) / c;
2087: if (q == 1)
2088: {
2089: qcb = c - b1; b1 = c + qcb; c1 = a1 - qcb;
2090: }
2091: else
2092: {
2093: qc = q*c; qcb = qc - b1; b1 = qc + qcb; c1 = a1 - q*qcb;
2094: }
2095: a1 = c;
2096:
2097: if (a1 <= L1) /* blacklist this */
2098: {
2099: if (blp1 >= SQUFOF_BLACKLIST_SZ)
2100: /* blacklist overflows: shouldn't happen */
2101: act1 = 0; /* silently */
2102: else
2103: {
2104: if (mydebug >= 6)
2105: fprintferr("SQUFOF: blacklisting a = %ld on first cycle\n", a1);
2106: blacklist1[blp1++] = a1;
2107: }
2108: }
2109: }
2110:
2111: /* send second form through reduction operator if active */
2112: if (act2)
2113: {
2114: c = c2;
2115: if (c > dd2)
2116: q = 1;
2117: else
2118: q = (dd2 + (b2>>1)) / c;
2119: if (q == 1)
2120: {
2121: qcb = c - b2; b2 = c + qcb; c2 = a2 - qcb;
2122: }
2123: else
2124: {
2125: qc = q*c; qcb = qc - b2; b2 = qc + qcb; c2 = a2 - q*qcb;
2126: }
2127: a2 = c;
2128:
2129: if (a2 <= L2) /* blacklist this */
2130: {
2131: if (blp2 >= SQUFOF_BLACKLIST_SZ)
2132: /* blacklist overflows: shouldn't happen */
2133: act2 = 0; /* silently */
2134: else
2135: {
2136: if (mydebug >= 6)
2137: fprintferr("SQUFOF: blacklisting a = %ld on second cycle\n", a2);
2138: blacklist2[blp2++] = a2;
2139: }
2140: }
2141: }
2142:
2143: /* bump counter, loop if this is an odd iteration (i.e. if the real
2144: * leading coefficients are negative)
2145: */
2146: if (++cnt & 1) continue;
2147:
2148: /* second half of main loop entered only when the leading coefficients
2149: * are positive (i.e., during even-numbered iterations)
2150: */
2151:
2152: /* examine first form if active */
2153: if (act1 && a1 == 1) /* back to identity form */
2154: {
2155: act1 = 0; /* drop this discriminant */
2156: if (mydebug >= 4)
2157: {
2158: fprintferr("SQUFOF: first cycle exhausted after %ld iterations,\n"
2159: "\tdropping it\n",
2160: cnt);
2161: flusherr();
2162: }
2163: }
2164: if (act1)
2165: {
2166: if ((a = ucarrecomplet(a1)) != 0) /* square form? */
2167: {
2168: if (mydebug >= 4)
2169: {
2170: fprintferr("SQUFOF: square form (%ld^2, %ld, %ld) on first cycle\n"
2171: "\tafter %ld iterations, time = %ld ms\n",
2172: a, b1, -c1, cnt, timer2());
2173: /* flusherr delayed until we've dealt with it */
2174: }
2175: /* blacklisted? */
2176: if (a <= L1)
2177: {
2178: int j;
2179: for (j = 0; j < blp1; j++)
2180: if (a == blacklist1[j]) { a = 0; break; }
2181: }
2182: if (a > 0) /* not blacklisted */
2183: {
2184: /* imprimitive form? */
2185: q = cgcd(a, b1);
2186: if (nm4 == 3 && cgcd(q, 3) > 1) /* paranoia */
2187: {
2188: avma = av;
2189: /* we really possess q^2/3 here, but let the caller sort this
2190: * out. His fault for calling us with a multiple of 3. We
2191: * cannot claim (q/3)^2 as a known factor here since q might
2192: * equal 3, in which case 3 is the correct answer to return.
2193: */
2194: if (q == 3)
2195: {
2196: if (mydebug >= 4)
2197: {
2198: fprintferr("SQUFOF: found factor 3\n");
2199: flusherr();
2200: }
2201: return stoi(3);
2202: }
2203: else q /= 3; /* and fall through to the next conditional */
2204: }
2205: if (q > 1) /* q^2 divides D1 and, in fact, n */
2206: {
2207: avma = av;
2208: if (mydebug >= 4)
2209: {
2210: fprintferr("SQUFOF: found factor %ld^2\n", q);
2211: flusherr();
2212: }
2213: res = cgetg(4, t_VEC);
2214: res[1] = lstoi(q); /* factor */
2215: res[2] = deux; /* exponent 2 */
2216: res[3] = (long)NULL; /* unknown whether prime or composite */
2217: return res;
2218: }
2219:
2220: /* chase the inverse root form back along the ambiguous cycle */
2221: q = squfof_ambig(a, b1, dd1, D1, &cntamb);
2222: if (mydebug >= 6)
2223: fprintferr("SQUFOF: squfof_ambig returned %ld\n", q);
2224: if (nm4 == 3) q /= cgcd(q, 3);
2225:
2226: /* return if successful */
2227: if (q > 1)
2228: {
2229: avma = av;
2230: if (mydebug >= 4)
2231: {
2232: fprintferr("SQUFOF: found factor %ld from ambiguous form\n"
2233: "\tafter %ld steps on the ambiguous cycle, "
2234: "time = %ld ms\n",
2235: q, cntamb, timer2());
2236: flusherr();
2237: }
2238: res = stoi(q);
2239: return res;
2240: }
2241: else if (mydebug >= 4) /* nothing found */
2242: {
2243: fprintferr("SQUFOF: ...found nothing useful on the ambiguous "
2244: "cycle\n"
2245: "\tafter %ld steps there, time = %ld ms\n",
2246: cntamb, timer2());
2247: flusherr();
2248: }
2249: }
2250: else if (mydebug >= 4) /* blacklisted */
2251: {
2252: fprintferr("SQUFOF: ...but the root form seems to be on the "
2253: "principal cycle\n");
2254: flusherr();
2255: }
2256: }
2257: /* else proceed */
2258: }
2259:
2260: /* examine second form if active */
2261: if (act2 && a2 == 1) /* back to identity form */
2262: {
2263: act2 = 0; /* drop this discriminant */
2264: if (mydebug >= 4)
2265: {
2266: fprintferr("SQUFOF: second cycle exhausted after %ld iterations,\n"
2267: "\tdropping it\n",
2268: cnt);
2269: flusherr();
2270: }
2271: }
2272: if (act2)
2273: {
2274: if ((a = ucarrecomplet(a2)) != 0) /* square form? */
2275: {
2276: if (mydebug >= 4)
2277: {
2278: fprintferr("SQUFOF: square form (%ld^2, %ld, %ld) on second cycle\n"
2279: "\tafter %ld iterations, time = %ld ms\n",
2280: a, b2, -c2, cnt, timer2());
2281: flusherr();
2282: }
2283: /* blacklisted? */
2284: if (a <= L2)
2285: {
2286: int j;
2287: for (j = 0; j < blp2; j++)
2288: if (a == blacklist2[j]) { a = 0; break; }
2289: }
2290: if (a > 0) /* not blacklisted */
2291: {
2292: /* imprimitive form? */
2293: q = cgcd(a, b2);
2294: /* NB if b2 is even, a is odd, so the gcd is always odd */
2295: if (nm4 == 1 && cgcd(q, 5) > 1) /* paranoia */
2296: {
2297: avma = av;
2298: /* we really possess q^2/5 here, but let the caller sort this
2299: * out. His fault for calling us with a multiple of 5. We
2300: * cannot claim (q/5)^2 as a known factor here since q might
2301: * equal 5, in which case 5 is the correct answer to return.
2302: */
2303: if (q == 5)
2304: {
2305: if (mydebug >= 4)
2306: {
2307: fprintferr("SQUFOF: found factor 5\n");
2308: flusherr();
2309: }
2310: return stoi(5);
2311: }
2312: else q /= 5; /* and fall through to the next conditional */
2313: }
2314: if (q > 1) /* q^2 divides D2 */
2315: {
2316: avma = av;
2317: if (mydebug >= 4)
2318: {
2319: fprintferr("SQUFOF: found factor %ld^2\n", q);
2320: flusherr();
2321: }
2322: res = cgetg(4, t_VEC);
2323: res[1] = lstoi(q); /* factor */
2324: res[2] = deux; /* exponent 2 */
2325: res[3] = (long)NULL; /* unknown whether prime or composite */
2326: return res;
2327: }
2328:
2329: /* chase the inverse root form along the ambiguous cycle */
2330: q = squfof_ambig(a, b2, dd2, D2, &cntamb);
2331: if (mydebug >= 6)
2332: fprintferr("SQUFOF: squfof_ambig returned %ld\n", q);
2333: if (nm4 == 1) q /= cgcd(q, 5);
2334:
2335: /* return if successful */
2336: if (q > 1)
2337: {
2338: avma = av;
2339: if (mydebug >= 4)
2340: {
2341: fprintferr("SQUFOF: found factor %ld from ambiguous form\n"
2342: "\tafter %ld steps on the ambiguous cycle, "
2343: "time = %ld ms\n",
2344: q, cntamb, timer2());
2345: flusherr();
2346: }
2347: res = stoi(q);
2348: return res;
2349: }
2350: else if (mydebug >= 4) /* nothing found */
2351: {
2352: fprintferr("SQUFOF: ...found nothing useful on the ambiguous "
2353: "cycle\n"
2354: "\tafter %ld steps there, time = %ld ms\n",
2355: cntamb, timer2());
2356: flusherr();
2357: }
2358: }
2359: else if (mydebug >= 4) /* blacklisted */
2360: {
2361: fprintferr("SQUFOF: ...but the root form seems to be on the "
2362: "principal cycle\n");
2363: flusherr();
2364: }
2365: }
2366: /* else proceed */
2367: }
2368:
2369: } /* end main loop */
2370:
2371: /* when we get here, both discriminants have, alas, turned out to be
2372: * useless.
2373: */
2374: if (mydebug >= 4)
2375: {
2376: fprintferr("SQUFOF: giving up, time = %ld ms\n", timer2());
2377: flusherr();
2378: }
2379:
2380: avma = av;
2381: return NULL;
2382: }
2383:
2384: /* The following is invoked to walk back along the ambiguous cycle
2385: * until we hit an ambiguous form and thus the desired factor, which
2386: * it returns. If it fails for any reason, it returns 0. It doesn't
2387: * interfere with timing and diagnostics, which it leaves to squfof().
2388: *
2389: * Before we invoke this, we've found a form (A, B, -C) with A = a^2,
2390: * where a isn't blacklisted and where gcd(a, B) = 1. According to
2391: * ACiCANT, we should now proceed reducing the form (a, -B, -aC), but
2392: * it is easy to show that the first reduction step always sends this
2393: * to (-aC, B, a), and the next one, with q computed as usual from B
2394: * and a (occupying the c position), gives a reduced form, whose third
2395: * member is easiest to recover by going back to D. From this point
2396: * onwards, we're once again working with single-word numbers.
2397: * NB here there is no need to track signs, we just work with the abs
2398: * values of the coefficients.
2399: */
2400: static
2401: long
2402: squfof_ambig(long a, long B, long dd, GEN D, long *cntamb)
2403: {
1.2 ! noro 2404: long b, c, q, qc, qcb;
! 2405: gpmem_t av = avma;
1.1 noro 2406: long a0, b0, b1, c0;
2407:
2408: q = (dd + (B>>1)) / a; qc = q*a; qcb = qc - B;
2409: b = qc + qcb;
2410: c = itos(divis(shifti(subii(D, sqri(stoi(b))), -2), a));
2411: #ifdef DEBUG_SQUFOF
2412: fprintferr("SQUFOF: ambigous cycle of discriminant %Z\n", D);
2413: fprintferr("SQUFOF: Form on ambigous cycle (%ld, %ld, %ld)\n",
2414: a, b, c);
2415: #endif
2416:
2417: avma = av; /* no further stack operations follow */
2418: *cntamb = 0; /* count reduction steps on the cycle */
2419: a0 = a; b0 = b1 = b; /* end of loop detection and safeguard */
2420:
2421: for (;;) /* reduced cycles are finite */
2422: {
2423: /* reduction step */
2424: c0 = c;
2425: if (c0 > dd)
2426: q = 1;
2427: else
2428: q = (dd + (b>>1)) / c0;
2429: if (q == 1)
2430: {
2431: qcb = c0 - b; b = c0 + qcb; c = a - qcb;
2432: }
2433: else
2434: {
2435: qc = q*c0; qcb = qc - b; b = qc + qcb; c = a - q*qcb;
2436: }
2437: a = c0;
2438:
2439: (*cntamb)++;
2440:
2441: /* check whether we're done */
2442: if (b == b1) return (a&1 ? a : a>>1);
2443:
2444: /* safeguard against infinite loop: recognize when we've walked
2445: * around the entire cycle in vain. (I don't think this can
2446: * actually happen -- exercise. But better safe than sorry.)
2447: */
2448: if (b == b0 && a == a0) return 0;
2449:
2450: /* prepare for next iteration */
2451: b1 = b;
2452: }
2453: /* NOT REACHED */
2454: return 0;
2455: }
2456:
2457: /***********************************************************************/
2458: /** **/
2459: /** DETECTING ODD POWERS **/
2460: /** Factoring engines like MPQS which ultimately rely on computing **/
2461: /** gcd(N, x^2-y^2) to find a nontrivial factor of N are fundamen- **/
2462: /** tally incapable of splitting a proper power of an odd prime, **/
2463: /** because of the cyclicity of the prime residue class group. We **/
2464: /** already have a square-detection function carrecomplet(), which **/
2465: /** also returns the square root if appropriate. Here's an analogue **/
2466: /** for cubes, fifth and 7th powers. 11th powers are a non-issue so **/
2467: /** long as mpqs() gives up beyond 100 decimal digits (since ECM **/
2468: /** easily finds a 10-digit prime factor of a 100-digit number). **/
2469: /** GN1998Jun28 **/
2470: /** **/
2471: /***********************************************************************/
2472:
2473: /* Use a multistage sieve. First stages work mod 211, 209, 61, 203;
2474: * if the argument is larger than a word, we first reduce mod the product
2475: * of these and then take the remainder apart. Second stages use 117,
2476: * 31, 43, 71 in this order. Moduli which are no longer interesting are
2477: * skipped. Everything is encoded in a single table of 106 24-bit masks.
2478: * We only need the first half of the residues. Three bits per modulus
2479: * indicate which residues are 7th (bit 2), 5th (bit 1) powers or cubes
2480: * (bit 0); the eight moduli above are assigned right-to-left. The table
2481: * will err on the side of safety if one of the moduli divides the number
2482: * to be tested, but as this leads to inefficiency it should still be
2483: * avoided.
2484: */
2485:
2486: static ulong powersmod[106] = {
2487: 077777777ul, /* 0 */
2488: 077777777ul, /* 1 */
2489: 013562440ul, /* 2 */
2490: 012462540ul, /* 3 */
2491: 013562440ul, /* 4 */
2492: 052662441ul, /* 5 */
2493: 016663440ul, /* 6 */
2494: 016463450ul, /* 7 */
2495: 013573551ul, /* 8 */
2496: 012462540ul, /* 9 */
2497: 012462464ul, /* 10 */
2498: 013462771ul, /* 11 */
2499: 012466473ul, /* 12 */
2500: 012463641ul, /* 13 */
2501: 052463646ul, /* 14 */
2502: 012563446ul, /* 15 */
2503: 013762440ul, /* 16 */
2504: 052766440ul, /* 17 */
2505: 012772451ul, /* 18 */
2506: 012762454ul, /* 19 */
2507: 032763550ul, /* 20 */
2508: 013763664ul, /* 21 */
2509: 017763460ul, /* 22 */
2510: 037762565ul, /* 23 */
2511: 017762540ul, /* 24 */
2512: 057762441ul, /* 25 */
2513: 037772452ul, /* 26 */
2514: 017773551ul, /* 27 */
2515: 017767541ul, /* 28 */
2516: 017767640ul, /* 29 */
2517: 037766450ul, /* 30 */
2518: 017762752ul, /* 31 */
2519: 037762762ul, /* 32 */
2520: 017762742ul, /* 33 */
2521: 037763762ul, /* 34 */
2522: 017763740ul, /* 35 */
2523: 077763740ul, /* 36 */
2524: 077762750ul, /* 37 */
2525: 077762752ul, /* 38 */
2526: 077762750ul, /* 39 */
2527: 077762743ul, /* 40 */
2528: 077767740ul, /* 41 */
2529: 077763741ul, /* 42 */
2530: 077763762ul, /* 43 */
2531: 077772760ul, /* 44 */
2532: 077762770ul, /* 45 */
2533: 077766750ul, /* 46 */
2534: 077762740ul, /* 47 */
2535: 077763740ul, /* 48 */
2536: 077763750ul, /* 49 */
2537: 077763752ul, /* 50 */
2538: 077762740ul, /* 51 */
2539: 077762740ul, /* 52 */
2540: 077772740ul, /* 53 */
2541: 077762762ul, /* 54 */
2542: 077763765ul, /* 55 */
2543: 077763770ul, /* 56 */
2544: 077767750ul, /* 57 */
2545: 077766753ul, /* 58 */
2546: 077776740ul, /* 59 */
2547: 077772741ul, /* 60 */
2548: 077772744ul, /* 61 */
2549: 077773740ul, /* 62 */
2550: 077773743ul, /* 63 */
2551: 077773751ul, /* 64 */
2552: 077772771ul, /* 65 */
2553: 077772760ul, /* 66 */
2554: 077772763ul, /* 67 */
2555: 077772751ul, /* 68 */
2556: 077773750ul, /* 69 */
2557: 077777740ul, /* 70 */
2558: 077773745ul, /* 71 */
2559: 077772740ul, /* 72 */
2560: 077772742ul, /* 73 */
2561: 077772744ul, /* 74 */
2562: 077776750ul, /* 75 */
2563: 077773771ul, /* 76 */
2564: 077773774ul, /* 77 */
2565: 077773760ul, /* 78 */
2566: 077772741ul, /* 79 */
2567: 077772740ul, /* 80 */
2568: 077772740ul, /* 81 */
2569: 077772741ul, /* 82 */
2570: 077773754ul, /* 83 */
2571: 077773750ul, /* 84 */
2572: 077773740ul, /* 85 */
2573: 077776741ul, /* 86 */
2574: 077776771ul, /* 87 */
2575: 077776773ul, /* 88 */
2576: 077772761ul, /* 89 */
2577: 077773741ul, /* 90 */
2578: 077773740ul, /* 91 */
2579: 077773740ul, /* 92 */
2580: 077772740ul, /* 93 */
2581: 077772752ul, /* 94 */
2582: 077772750ul, /* 95 */
2583: 077772751ul, /* 96 */
2584: 077773741ul, /* 97 */
2585: 077773761ul, /* 98 */
2586: 077777760ul, /* 99 */
2587: 077772765ul, /* 100 */
2588: 077772742ul, /* 101 */
2589: 077777751ul, /* 102 */
2590: 077777750ul, /* 103 */
2591: 077777745ul, /* 104 */
2592: 077777770ul /* 105 */
2593: };
2594:
2595: /* Returns 3, 5, or 7 if x is a cube (but not a 5th or 7th power), a 5th
2596: * power (but not a 7th), or a 7th power, and in this case creates the
2597: * base on the stack and assigns its address to *pt. Otherwise returns 0.
2598: * x must be of type t_INT and nonzero; this is not checked. The *mask
2599: * argument tells us which things to check -- bit 0: 3rd, bit 1: 5th,
2600: * bit 2: 7th pwr; set a bit to have the corresponding power examined --
2601: * and is updated appropriately for a possible follow-up call
2602: */
2603:
2604: long /* no longer static -- used in mpqs.c */
2605: is_odd_power(GEN x, GEN *pt, long *mask)
2606: {
1.2 ! noro 2607: long lgx=lgefint(x), exponent=0, residue, resbyte;
! 2608: gpmem_t av=avma, tetpil;
1.1 noro 2609: GEN y;
2610:
2611: *mask &= 7; /* paranoia */
2612: if (!*mask) return 0; /* useful when running in a loop */
2613: if (signe(x) < 0) x=absi(x);
2614:
2615: if (DEBUGLEVEL >= 5)
2616: {
2617: fprintferr("OddPwrs: is %Z\n\t...a", x);
2618: if (*mask&1) fprintferr(" 3rd%s",
2619: (*mask==7?",":(*mask!=1?" or":"")));
2620: if (*mask&2) fprintferr(" 5th%s",
2621: (*mask==7?", or":(*mask&4?" or":"")));
2622: if (*mask&4) fprintferr(" 7th");
2623: fprintferr(" power?\n");
2624: }
2625: if (lgx > 3) residue = smodis(x, 211*209*61*203);
2626: else residue = x[2];
2627:
2628: resbyte=residue%211; if (resbyte > 105) resbyte = 211 - resbyte;
2629: *mask &= powersmod[resbyte];
2630: if (DEBUGLEVEL >= 5)
2631: {
2632: fprintferr("\tmodulo: resid. (remaining possibilities)\n");
2633: fprintferr("\t 211: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2634: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2635: }
2636: if (!*mask) { avma=av; return 0; }
2637:
2638: if (*mask & 3)
2639: {
2640: resbyte=residue%209; if (resbyte > 104) resbyte = 209 - resbyte;
2641: *mask &= (powersmod[resbyte] >> 3);
2642: if (DEBUGLEVEL >= 5)
2643: fprintferr("\t 209: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2644: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2645: if (!*mask) { avma=av; return 0; }
2646: }
2647: if (*mask & 3)
2648: {
2649: resbyte=residue%61; if (resbyte > 30) resbyte = 61 - resbyte;
2650: *mask &= (powersmod[resbyte] >> 6);
2651: if (DEBUGLEVEL >= 5)
2652: fprintferr("\t 61: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2653: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2654: if (!*mask) { avma=av; return 0; }
2655: }
2656: if (*mask & 5)
2657: {
2658: resbyte=residue%203; if (resbyte > 101) resbyte = 203 - resbyte;
2659: *mask &= (powersmod[resbyte] >> 9);
2660: if (DEBUGLEVEL >= 5)
2661: fprintferr("\t 203: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2662: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2663: if (!*mask) { avma=av; return 0; }
2664: }
2665:
2666: if (lgx > 3) residue = smodis(x, 117*31*43*71);
2667: else residue = x[2];
2668:
2669: if (*mask & 1)
2670: {
2671: resbyte=residue%117; if (resbyte > 58) resbyte = 117 - resbyte;
2672: *mask &= (powersmod[resbyte] >> 12);
2673: if (DEBUGLEVEL >= 5)
2674: fprintferr("\t 117: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2675: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2676: if (!*mask) { avma=av; return 0; }
2677: }
2678: if (*mask & 3)
2679: {
2680: resbyte=residue%31; if (resbyte > 15) resbyte = 31 - resbyte;
2681: *mask &= (powersmod[resbyte] >> 15);
2682: if (DEBUGLEVEL >= 5)
2683: fprintferr("\t 31: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2684: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2685: if (!*mask) { avma=av; return 0; }
2686: }
2687: if (*mask & 5)
2688: {
2689: resbyte=residue%43; if (resbyte > 21) resbyte = 43 - resbyte;
2690: *mask &= (powersmod[resbyte] >> 18);
2691: if (DEBUGLEVEL >= 5)
2692: fprintferr("\t 43: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2693: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2694: if (!*mask) { avma=av; return 0; }
2695: }
2696: if (*mask & 6)
2697: {
2698: resbyte=residue%71; if (resbyte > 35) resbyte = 71 - resbyte;
2699: *mask &= (powersmod[resbyte] >> 21);
2700: if (DEBUGLEVEL >= 5)
2701: fprintferr("\t 71: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2702: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2703: if (!*mask) { avma=av; return 0; }
2704: }
2705:
2706: /* priority to higher powers -- if we have a 21st, it'll be easier to
2707: * rediscover that its 7th root is a cube than that its cube root is
2708: * a 7th power
2709: */
2710: if ((resbyte = *mask & 4)) /* assignment */
2711: exponent = 7;
2712: else if ((resbyte = *mask & 2))
2713: exponent = 5;
2714: else
2715: { resbyte = 1; exponent = 3; }
2716: /* leave that mask bit on for the moment, we might need it for a
2717: * subsequent call
2718: */
2719:
2720: /* precision in the following is one extra significant word (overkill) */
2721: y=ground(gpow(x, ginv(stoi(exponent)), lgx));
2722: if (!egalii(gpowgs(y, exponent), x))
2723: {
2724: if (DEBUGLEVEL >= 5)
2725: {
2726: if (exponent == 3)
2727: fprintferr("\tBut it nevertheless wasn't a cube.\n");
2728: else
2729: fprintferr("\tBut it nevertheless wasn't a %ldth power.\n",
2730: exponent);
2731: }
2732: *mask &= ~resbyte; /* _now_ turn the bit off */
2733: avma=av; return 0;
2734: }
2735: /* caller (ifac_crack() below) will report the final result if it was
2736: * a pure power, so no further diagnostics here
2737: */
2738: tetpil=avma;
2739: if (!pt) { avma=av; return exponent; } /* this branch not used */
2740: *pt=gerepile(av,tetpil,icopy(y));
2741: return exponent;
2742: }
2743:
2744: /***********************************************************************/
2745: /** **/
2746: /** FACTORIZATION (master iteration) **/
2747: /** Driver for the various methods of finding large factors **/
2748: /** (after trial division has cast out the very small ones). **/
2749: /** GN1998Jun24--30 **/
2750: /** **/
2751: /***********************************************************************/
2752:
2753: /** Direct use:
2754: ** ifac_start() registers a number (without prime factors < 100)
2755: ** with the iterative factorizer, and also registers whether or
2756: ** not we should terminate early if we find that the number is
2757: ** not squarefree, and a hint about which method(s) to use. This
2758: ** must always be called first. The input _must_ have been checked
2759: ** to be composite by the caller. The routine immediately tries
2760: ** to decompose it nontrivially into a product of two factors,
2761: ** except in squarefreeness (`Moebius') mode.
2762: ** ifac_primary_factor() returns a prime divisor (not necessarily
2763: ** the smallest) and the corresponding exponent. */
2764:
2765: /** Encapsulated user interface:
2766: ** ifac_decomp() does the right thing for auxdecomp() (put a succession
2767: ** of prime divisor / exponent pairs onto the stack, not necessarily
2768: ** sorted, although in practice they will tend not to be too far from
2769: ** the correct order).
2770: **
2771: ** For each of the additive/multiplicative arithmetic functions, there is
2772: ** a `contributor' below, to be called on any large composite cofactor
2773: ** left over after trial division by small primes, whose result can then
2774: ** be added to or multiplied with whatever we already have:
2775: ** ifac_moebius() ifac_issquarefree() ifac_totient() ifac_omega()
2776: ** ifac_bigomega() ifac_numdiv() ifac_sumdiv() ifac_sumdivk() */
2777:
2778: /* We never test whether the input number is prime or composite, since
2779: * presumably it will have come out of the small factors finder stage
2780: * (which doesn't really exist yet but which will test the left-over
2781: * cofactor for primality once it does).
2782: */
2783: /* The data structure in which we preserve whatever we know at any given
2784: * time about our number N is kept on the PARI stack, and updated as needed.
2785: * This makes the machinery re-entrant (you can have more than one fac-
2786: * torization using ifac_start()/ifac_primary_factor() in progress simul-
2787: * taneously so long as you preserve the GEN across garbage collections),
2788: * and which avoids memory leaks when a lengthy factorization is interrupted.
2789: * We also make an effort to keep the whole affair connected, and the parent
2790: * object will always be older than its children. This may in rare cases
2791: * lead to some extra copying around, and knowing what is garbage at any
2792: * given time is not entirely trivial. See below for examples how to do
2793: * it right. (Connectedness can be destroyed if callers of ifac_main()
2794: * create other stuff on the stack in between calls. This is harmless
2795: * as long as ifac_realloc() is used to re-create a connected object at
2796: * the head of the stack just before collecting garbage.)
2797: */
2798: /* Note that a PARI integer can have hundreds of millions of distinct prime
2799: * factors larger than 2^16, given enough memory. And since there's no
2800: * guarantee that we will find factors in order of increasing size, we must
2801: * be prepared to drag a very large amount of data around (although this
2802: * will _very_ rarely happen for random input!). So we start with a small
2803: * structure and extend it when necessary.
2804: */
2805: /* The idea of data structure and algorithm is:
2806: * Let N0 be whatever is currently left of N after dividing off all the
2807: * prime powers we have already returned to the caller. Then we maintain
2808: * N0 as a product
2809: * (1) N0 = \prod_i P_i^{e_i} * \prod_j Q_j^{f_j} * \prod_k C_k^{g_k}
2810: * where the P_i and Q_j are distinct primes, each C_k is known composite,
2811: * none of the P_i divides any C_k, and we also know the total ordering
2812: * of all the P_i, Q_j and C_k (in particular, we will never try to divide
2813: * a C_k by a larger Q_j). Some of the C_k may have common factors, although
2814: * this will not often be the case.
2815: */
2816: /* Caveat implementor: Taking gcds among C_k's is very likely to cost at
2817: * least as much time as dividing off any primes as we find them, and book-
2818: * keeping would be a nightmare (since D=gcd(C_1,C_2) can still have common
2819: * factors with both C_1/D and C_2/D, and so on...).
2820: */
2821: /* At startup, we just initialize the structure to
2822: * (2) N = C_1^1 (composite).
2823: */
2824: /* Whenever ifac_primary_factor() or ifac_decomp() (or, mutatis mutandis,
2825: * one of the three arithmetic user interface routines) needs a primary
2826: * factor, and the smallest thing in our list is P_1, we return that and
2827: * its exponent, and remove it from our list.
2828: * (When nothing is left, we return a sentinel value -- gun. And in Moebius
2829: * mode, when we see something with exponent > 1, whether prime or composite,
2830: * we yell at our caller by returning gzero or 0, depending on the function).
2831: * In all other cases, ifac_main() iterates the following steps until we have
2832: * a P_1 in the smallest position.
2833: */
2834: /* When the smallest item is C_1 (as it is initially):
2835: * (3.1) Crack C_1 into a nontrivial product U_1 * U_2 by whatever method
2836: * comes to mind for this size. (U for `unknown'.) Cracking will detect
2837: * squares (and biquadrates etc), and it may detect odd powers, so we
2838: * might instead see a power of some U_1 here, or even something of the form
2839: * U_1^k*U_2^k. (Of course the exponent already attached to C_1 is taken
2840: * into account in the following.)
2841: * (3.2) If we have U_1*U_2, sort the two factors; convert to U_1^2 if they
2842: * happen to be equal (which they shouldn't -- squares should have been
2843: * caught at the preceding stage). Note that U_1 and (if it exists) U_2
2844: * are automatically smaller than anything else in our list.
2845: * (3.3) Check U_1 (and U_2) for primality, and flag them accordingly.
2846: * (3.4) Iterate.
2847: */
2848: /* When the smallest item is Q_1:
2849: * This is the potentially unpleasant case. The idea is to go through the
2850: * entire list and try to divide Q_1 off each of the current C_k's, which
2851: * will usually fail, but may succeed several times. When a division was
2852: * successful, the corresponding C_k is removed from our list, and the co-
2853: * factor becomes a U_l for the moment unless it is 1 (which happens when
2854: * C_k was a power of Q_1). When we're through we upgrade Q_1 to P_1 status,
2855: * and then do a primality check on each U_l and sort it back into the list
2856: * either as a Q_j or as a C_k. If during the insertion sort we discover
2857: * that some U_l equals some P_i or Q_j or C_k we already have, we just add
2858: * U_l's exponent to that of its twin. (The sorting should therefore happen
2859: * before the primality test).
2860: * Note that this may produce one or more elements smaller than the P_1
2861: * we just confirmed, so we may have to repeat the iteration.
2862: */
2863: /* There's a little trick that avoids some Q_1 instances. Just after we do
2864: * a sweep to classify all current unknowns as either composites or primes,
2865: * we do another downward sweep beginning with the largest current factor
2866: * and stopping just above the largest current composite. Every Q_j we
2867: * pass is turned into a P_i. (Different primes are automatically coprime
2868: * among each other, and primes tend not to divide smaller composites.)
2869: */
2870: /* (We have no use for comparing the square of a prime to N0. Normally
2871: * we will get called after casting out only the smallest primes, and
2872: * since we cannot guarantee that we see the large prime factors in as-
2873: * cending order, we cannot stop when we find one larger than sqrt(N0).)
2874: */
2875: /* Data structure: We keep everything in a single t_VEC of t_INTs. The
2876: * first component records whether we're doing full (NULL) or Moebius (un)
2877: * factorization; in the latter case many subroutines return a sentinel
2878: * value as soon as they spot an exponent > 1. The second component records
2879: * the hint from factorint()'s optional flag, for use by ifac_crack().
2880: * The remaining components (initially 15) are used in groups of three:
2881: * a GEN pointer at the t_INT value of the factor, a pointer at the t_INT
2882: * exponent (usually gun or gdeux so we don't clutter up the stack too
2883: * much), and another t_INT GEN pointer to record the class of the factor:
2884: * NULL for unknown, zero for known composite C_k, un for known prime Q_j
2885: * awaiting trial division, and deux for finished prime P_i.
2886: */
2887: /* When during the division stage we re-sort a C_k-turned-U_l to a lower
2888: * position, we rotate any intervening material upward towards its old
2889: * slot. When a C_k was divided down to 1, its slot is left empty at
2890: * first; similarly when the re-sorting detects a repeated factor.
2891: * After the sorting phase, we de-fragment the list and squeeze all the
2892: * occupied slots together to the high end, so that ifac_crack() has room
2893: * for new factors. When this doesn't suffice, we abandon the current
2894: * vector and allocate a somewhat larger one, defragmenting again during
2895: * copying.
2896: */
2897: /* (For internal use, note that all exponents will fit into C longs, given
2898: * PARI's lgefint field size. When we work with them, we sometimes read
2899: * out the GEN pointer, and sometimes do an itos, whatever is more con-
2900: * venient for the task at hand.)
2901: */
2902:
2903: /*** Overview and forward declarations: ***/
2904:
2905: /* The `*where' argument in the following points into *partial at the
2906: * first of the three fields of the first occupied slot. It's there
2907: * because the caller would already know where `here' is, so we don't
2908: * want to search for it again, although it wouldn't take much time.
2909: * On the other hand, we do not preserve this from one user-interface
2910: * call to the next.
2911: */
2912:
2913: static GEN
2914: ifac_find(GEN *partial, GEN *where);
2915: /* Return GEN pointing at the first nonempty slot strictly behind the
2916: * current *where, or NULL if such doesn't exist. Can be used to skip
2917: * a range of vacant slots, or to initialize *where in the first place
2918: * (pass partial in both args). Does not modify its argument pointers.
2919: */
2920:
2921: void
2922: ifac_realloc(GEN *partial, GEN *where, long new_lg);
2923: /* Move to a larger main vector, updating *where if it points into it.
2924: * Certainly updates *partial. Can be used as a specialized gcopy before
2925: * a gerepileupto()/gerepilemanysp() (pass 0 as the new length).
2926: * Normally, one would pass new_lg=1 to let this function guess the
2927: * new size. To be used sparingly.
2928: */
2929:
2930: static long
2931: ifac_crack(GEN *partial, GEN *where);
2932: /* Split the first (composite) entry. There _must_ already be room for
2933: * another factor below *where, and *where will be updated. Factor and
2934: * cofactor will be inserted in the correct order, updating *where, or
2935: * factor^k will be inserted if such should be the case (leaving *where
2936: * unchanged). The factor or factors will be set to unknown, and inherit
2937: * the exponent (or a multiple thereof) of its/their ancestor. Returns
2938: * number of factors written into the structure (normally 2, but 1 if a
2939: * factor equalled its cofactor, and may be more than 1 if a factoring
2940: * engine returned a vector of factors instead of a single factor). Can
2941: * reallocate the data structure in the vector-of-factors case (but not
2942: * in the more common single-factor case)
2943: */
2944:
2945: static long
2946: ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec);
2947: /* Gets called to complete ifac_crack()'s job when a factoring engine
2948: * splits the current factor into a product of three or more new factors.
2949: * Makes room for them if necessary, sorts them, gives them the right
2950: * exponents and class etc. Also returns the number of factors actually
2951: * written, which may be less than the number of components in facvec
2952: * if there are duplicates.--- Vectors of factors (cf pollardbrent()
2953: * above) actually contain `slots' of three GENs per factor with the
2954: * three fields being interpreted exactly as in our partial factorization
2955: * data structure. Thus `engines' can tell us what they already happen to
2956: * know about factors being prime or composite and/or appearing to a power
2957: * larger than the first
2958: */
2959:
2960: static long
2961: ifac_divide(GEN *partial, GEN *where);
2962: /* Divide all current composites by first (prime, class Q) entry, updating
2963: * its exponent, and turning it into a finished prime (class P). Return 1
2964: * if any such divisions succeeded (in Moebius mode, the update may then
2965: * not have been completed), or 0 if none of them succeeded. Doesn't
2966: * modify *where.
2967: */
2968:
2969: static long
2970: ifac_sort_one(GEN *partial, GEN *where, GEN washere);
2971: /* re-sort one (typically unknown) entry from washere to a new position,
2972: * rotating intervening entries upward to fill the vacant space. It may
2973: * happen (rarely) that the new position is the same as the old one, or
2974: * that the new value of the entry coincides with a value already occupying
2975: * a lower slot, in which latter case we just add exponents (and use the
2976: * `more known' class, and return 1 immediately when in Moebius mode).
2977: * The slots between *where and washere must be in sorted order, so a
2978: * sweep using this to re-sort several unknowns must proceed upward (see
2979: * ifac_resort() below). Return 1 if we see an exponent > 1 (in Moebius
2980: * mode without completing the update), 0 otherwise.
2981: */
2982:
2983: static long
2984: ifac_resort(GEN *partial, GEN *where);
2985: /* sort all current unknowns downward to where they belong. Sweeps
2986: * in the upward direction. Not needed after ifac_crack(), only when
2987: * ifac_divide() returned true. May update *where. Returns 1 when an
2988: * ifac_sort_one() call does so to indicate a repeated factor, or 0 if
2989: * any and all such calls returned 0
2990: */
2991:
2992: static void
2993: ifac_defrag(GEN *partial, GEN *where);
2994: /* defragment: collect and squeeze out any unoccupied slots above *where
2995: * during a downward sweep. Unoccupied slots arise when a composite factor
2996: * dissolves completely whilst dividing off a prime, or when ifac_resort()
2997: * spots a coincidence and merges two factors. *where will be updated
2998: */
2999:
3000: static void
3001: ifac_whoiswho(GEN *partial, GEN *where, long after_crack);
3002: /* determine primality or compositeness of all current unknowns, and set
3003: * class Q primes to finished (class P) if everything larger is already
3004: * known to be prime. When after_crack is nonnegative, only look at the
3005: * first after_crack things in the list (do nothing when it's zero)
3006: */
3007:
3008: static GEN
3009: ifac_main(GEN *partial);
3010: /* main loop: iterate until smallest entry is a finished prime; returns
3011: * a `where' pointer, or gun if nothing left, or gzero in Moebius mode if
3012: * we aren't squarefree
3013: */
3014:
3015: /* NB In the most common cases, control flows from the user interface to
3016: * ifac_main() and then to a succession of ifac_crack()s and ifac_divide()s,
3017: * with (typically) none of the latter finding anything.
3018: */
3019:
3020: /** user interface: **/
3021: /* return initial data structure, see ifac_crack() below for semantics
3022: * of the hint argument
3023: */
3024: GEN
3025: ifac_start(GEN n, long moebius, long hint);
3026:
3027: /* run main loop until primary factor is found, return the prime and
3028: * assign the exponent. If nothing left, return gun and set exponent
3029: * to 0; if in Moebius mode and a square factor is discovered, return
3030: * gzero and set exponent to 0
3031: */
3032: GEN
3033: ifac_primary_factor(GEN *partial, long *exponent);
3034:
3035: /* call ifac_start() and run main loop until factorization is complete,
3036: * accumulating prime / exponent pairs on the PARI stack to be picked up
3037: * by aux_end(). Return number of distinct primes found
3038: */
3039: long
3040: ifac_decomp(GEN n, long hint);
3041:
3042: /* completely encapsulated functions; these call ifac_start() themselves,
3043: * and ensure proper stack housekeeping etc. Call them on any large
3044: * composite left over after trial division, and multiply/add the result
3045: * onto whatever you already have from the small factors. Don't call
3046: * them on large primes; they will run into trouble
3047: */
3048: long
3049: ifac_moebius(GEN n, long hint);
3050:
3051: long
3052: ifac_issquarefree(GEN n, long hint);
3053:
3054: long
3055: ifac_omega(GEN n, long hint);
3056:
3057: long
3058: ifac_bigomega(GEN n, long hint);
3059:
3060: GEN
3061: ifac_totient(GEN n, long hint); /* for gp's eulerphi() */
3062:
3063: GEN
3064: ifac_numdiv(GEN n, long hint);
3065:
3066: GEN
3067: ifac_sumdiv(GEN n, long hint);
3068:
3069: GEN
3070: ifac_sumdivk(GEN n, long k, long hint);
3071:
3072: /*** implementation ***/
3073:
3074: #define ifac_initial_length 24 /* codeword, moebius flag, hint, 7 slots */
3075: /* (more than enough in most cases -- a 512-bit product of distinct 8-bit
3076: * primes needs at most 7 slots at a time)
3077: */
3078:
3079: GEN
3080: ifac_start(GEN n, long moebius, long hint)
3081: {
3082: GEN part, here;
3083:
3084: if (typ(n) != t_INT) err(typeer, "ifac_start");
3085: if (signe(n) == 0)
3086: err(talker, "factoring 0 in ifac_start");
3087:
3088: part = cgetg(ifac_initial_length, t_VEC);
3089: here = part + ifac_initial_length;
3090: part[1] = moebius? un : (long)NULL;
3091: switch(hint)
3092: {
3093: case 0:
3094: part[2] = zero; break;
3095: case 1:
3096: part[2] = un; break;
3097: case 2:
3098: part[2] = deux; break;
3099: default:
3100: part[2] = (long)stoi(hint);
3101: }
3102: if (isonstack(n))
3103: n = absi(n);
3104: /* make copy, because we'll later want to mpdivis() into it in place.
3105: * If it's not on stack, then we assume it is a clone made for us by
3106: * auxdecomp0(), and we assume the sign has already been set positive
3107: */
3108: /* fill first slot at the top end */
3109: *--here = zero; /* initially composite */
3110: *--here = un; /* initial exponent 1 */
3111: *--here = (long) n;
3112: /* and NULL out the remaining slots */
3113: while (here > part + 3) *--here = (long)NULL;
3114: return part;
3115: }
3116:
3117: static GEN
3118: ifac_find(GEN *partial, GEN *where)
3119: {
3120: long lgp = lg(*partial);
3121: GEN end = *partial + lgp;
3122: GEN scan = *where + 3;
3123:
3124: if (DEBUGLEVEL >= 5)
3125: {
3126: if (!*partial || typ(*partial) != t_VEC)
3127: err(typeer, "ifac_find");
3128: if (lg(*partial) < ifac_initial_length)
3129: err(talker, "partial impossibly short in ifac_find");
3130: if (!(*where) ||
3131: *where > *partial + lgp - 3 ||
3132: *where < *partial) /* sic */
3133: err(talker, "`*where\' out of bounds in ifac_find");
3134: }
3135: while (scan < end && !*scan) scan += 3;
3136: /* paranoia -- check completely NULLed ? nope -- we never inspect the
3137: * exponent field for deciding whether a slot is empty or occupied
3138: */
3139: if (scan < end)
3140: {
3141: if (DEBUGLEVEL >= 5)
3142: {
3143: if (!scan[1])
3144: err(talker, "factor has NULL exponent in ifac_find");
3145: }
3146: return scan;
3147: }
3148: return NULL;
3149: }
3150:
3151: /* simple defragmenter */
3152: static void
3153: ifac_defrag(GEN *partial, GEN *where)
3154: {
3155: long lgp = lg(*partial);
3156: GEN scan_new = *partial + lgp - 3, scan_old = scan_new;
3157:
3158: while (scan_old >= *where)
3159: {
3160: if (*scan_old) /* slot occupied? */
3161: {
3162: if (scan_old < scan_new)
3163: {
3164: scan_new[2] = scan_old[2];
3165: scan_new[1] = scan_old[1];
3166: *scan_new = *scan_old;
3167: }
3168: scan_new -= 3; /* point at next slot to be written */
3169: }
3170: scan_old -= 3;
3171: }
3172: scan_new += 3; /* back up to last slot written */
3173: *where = scan_new;
3174: while (scan_new > *partial + 3)
3175: *--scan_new = (long)NULL; /* erase junk */
3176: }
3177:
3178: /* and complex version combined with reallocation. If new_lg is 0, we
3179: * use the old length, so this acts just like gcopy except that the where
3180: * pointer is carried along; if it is 1, we make an educated guess.
3181: * Exception: If new_lg is 0, the vector is full to the brim, and the
3182: * first entry is composite, we make it longer to avoid being called again
3183: * a microsecond later (at significant cost).
3184: * It is safe to call this with NULL for the where argument; if it doesn't
3185: * point anywhere within the old structure, it will be left alone
3186: */
3187: void
3188: ifac_realloc(GEN *partial, GEN *where, long new_lg)
3189: {
3190: long old_lg = lg(*partial);
3191: GEN newpart, scan_new, scan_old;
3192:
3193: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
3194: {
3195: if (!*partial || typ(*partial) != t_VEC)
3196: err(typeer, "ifac_realloc");
3197: if (lg(*partial) < ifac_initial_length)
3198: err(talker, "partial impossibly short in ifac_realloc");
3199: }
3200:
3201: if (new_lg == 1)
3202: new_lg = 2*old_lg - 6; /* from 7 slots to 13 to 25... */
3203: else if (new_lg <= old_lg) /* includes case new_lg == 0 */
3204: {
3205: new_lg = old_lg;
3206: if ((*partial)[3] && /* structure full */
3207: ((*partial)[5]==zero || (*partial)[5]==(long)NULL))
3208: /* and first entry composite or unknown */
3209: new_lg += 6; /* give it a little more breathing space */
3210: }
3211: newpart = cgetg(new_lg, t_VEC);
3212: if (DEBUGMEM >= 3)
3213: {
3214: fprintferr("IFAC: new partial factorization structure (%ld slots)\n",
3215: (new_lg - 3)/3);
3216: flusherr();
3217: }
3218: newpart[1] = (*partial)[1]; /* moebius */
3219: newpart[2] = (*partial)[2]; /* hint */
3220: /* downward sweep through the old *partial, picking up where1 and carry-
3221: * ing it over if and when we pass it. (This will only be useful if
3222: * it pointed at a non-empty slot.) Factors are licopy()d so that we
3223: * again have a nice object (parent older than children, connected),
3224: * except the one factor that may still be living in a clone where n
3225: * originally was; exponents are similarly copied if they aren't global
3226: * constants; class-of-factor fields are always global constants so we
3227: * need only copy them as pointers. Caller may then do a gerepileupto()
3228: * or a gerepilemanysp()
3229: */
3230: scan_new = newpart + new_lg - 3;
3231: scan_old = *partial + old_lg - 3;
3232: for (; scan_old > *partial + 2; scan_old -= 3)
3233: {
3234: if (*where == scan_old) *where = scan_new;
3235: if (!*scan_old) continue; /* skip empty slots */
3236:
3237: *scan_new =
3238: isonstack((GEN)(*scan_old)) ?
3239: licopy((GEN)(*scan_old)) : *scan_old;
3240: scan_new[1] =
3241: isonstack((GEN)(scan_old[1])) ?
3242: licopy((GEN)(scan_old[1])) : scan_old[1];
3243: scan_new[2] = scan_old[2];
3244: scan_new -= 3;
3245: }
3246: scan_new += 3; /* back up to last slot written */
3247: while (scan_new > newpart + 3)
3248: *--scan_new = (long)NULL;
3249: *partial = newpart;
3250: }
3251:
3252: #define moebius_mode ((*partial)[1])
3253:
3254: /* Bubble-sort-of-thing sort. Won't be exercised frequently,
3255: * so this is ok.
3256: */
3257: static long
3258: ifac_sort_one(GEN *partial, GEN *where, GEN washere)
3259: {
3260: GEN scan = washere - 3;
3261: GEN value, exponent, class0, class1;
3262: long cmp_res;
3263:
3264: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
3265: {
3266: long lgp;
3267: if (!*partial || typ(*partial) != t_VEC)
3268: err(typeer, "ifac_sort_one");
3269: if ((lgp = lg(*partial)) < ifac_initial_length)
3270: err(talker, "partial impossibly short in ifac_sort_one");
3271: if (!(*where) ||
3272: *where < *partial + 3 ||
3273: *where > *partial + lgp - 3)
3274: err(talker, "`*where\' out of bounds in ifac_sort_one");
3275: if (!washere ||
3276: washere < *where ||
3277: washere > *partial + lgp - 3)
3278: err(talker, "`washere\' out of bounds in ifac_sort_one");
3279: }
3280: value = (GEN)(*washere);
3281: exponent = (GEN)(washere[1]);
3282: if (exponent != gun && moebius_mode && cmpsi(1,exponent) < 0)
3283: return 1; /* should have been detected by caller */
3284: class0 = (GEN)(washere[2]);
3285:
3286: if (scan < *where) return 0; /* nothing to do, washere==*where */
3287:
3288: cmp_res = -1; /* sentinel */
3289: while (scan >= *where) /* therefore at least once */
3290: {
3291: if (*scan) /* current slot nonempty */
3292: {
3293: /* check against where */
3294: cmp_res = cmpii(value, (GEN)(*scan));
3295: if (cmp_res >= 0) break; /* have found where to stop */
3296: }
3297: /* copy current slot upward by one position and move pointers down */
3298: scan[5] = scan[2];
3299: scan[4] = scan[1];
3300: scan[3] = *scan;
3301: scan -= 3;
3302: }
3303: scan += 3;
3304: /* at this point there are the following possibilities:
3305: * (*) cmp_res == -1. Either value is less than that at *where, or for
3306: * some reason *where was pointing at one or more vacant slots and any
3307: * factors we saw en route were larger than value. At any rate,
3308: * scan == *where now, and scan is pointing at an empty slot, into
3309: * which we'll stash our entry.
3310: * (*) cmp_res == 0. The entry at scan-3 is the one, we compare class0
3311: * fields and add exponents, and put it all into the vacated scan slot,
3312: * NULLing the one at scan-3 (and possibly updating *where).
3313: * (*) cmp_res == 1. The slot at scan is the one to store our entry
3314: * into.
3315: */
3316: if (cmp_res != 0)
3317: {
3318: if (cmp_res < 0 && scan != *where)
3319: err(talker, "misaligned partial detected in ifac_sort_one");
3320: *scan = (long)value;
3321: scan[1] = (long)exponent;
3322: scan[2] = (long)class0;
3323: return 0;
3324: }
3325: /* case cmp_res == 0: repeated factor detected */
3326: if (DEBUGLEVEL >= 4)
3327: {
3328: fprintferr("IFAC: repeated factor %Z\n\tdetected in ifac_sort_one\n",
3329: value);
3330: flusherr();
3331: }
3332: if (moebius_mode) return 1; /* not squarefree */
3333: /* if old class0 was composite and new is prime, or vice versa,
3334: * complain (and if one class0 was unknown and the other wasn't,
3335: * use the known one)
3336: */
3337: class1 = (GEN)(scan[-1]);
3338: if (class0) /* should never be used */
3339: {
3340: if(class1)
3341: {
3342: if (class0 == gzero && class1 != gzero)
3343: err(talker, "composite equals prime in ifac_sort_one");
3344: else if (class0 != gzero && class1 == gzero)
3345: err(talker, "prime equals composite in ifac_sort_one");
3346: else if (class0 == gdeux) /* should happen even less */
3347: scan[2] = (long)class0; /* use it */
3348: }
3349: else /* shouldn't happen either */
3350: scan[2] = (long)class0; /* use it */
3351: }
3352: /* else stay with the existing known class0 */
3353: scan[2] = (long)class1;
3354: /* in any case, add exponents */
3355: if (scan[-2] == un && exponent == gun)
3356: scan[1] = deux;
3357: else
3358: scan[1] = laddii((GEN)(scan[-2]), exponent);
3359: /* move the value over */
3360: *scan = scan[-3];
3361: /* null out the vacated slot below */
3362: *--scan = (long)NULL;
3363: *--scan = (long)NULL;
3364: *--scan = (long)NULL;
3365: /* finally, see whether *where should be pulled in */
3366: if (scan == *where) *where += 3;
3367: return 0;
3368: }
3369:
3370: /* the following loop around the former doesn't need to check moebius_mode
3371: * because ifac_sort_one() never returns 1 in normal mode
3372: */
3373: static long
3374: ifac_resort(GEN *partial, GEN *where)
3375: {
3376: long lgp = lg(*partial), res = 0;
3377: GEN scan = *where;
3378:
3379: for (; scan < *partial + lgp; scan += 3)
3380: {
3381: if (*scan && /* slot occupied */
3382: !scan[2]) /* with an unknown */
3383: {
3384: res |= ifac_sort_one(partial, where, scan);
3385: if (res) return res; /* early exit */
3386: }
3387: }
3388: return res;
3389: }
3390:
3391: /* sweep downward so we can with luck turn some Qs into Ps */
3392: static void
3393: ifac_whoiswho(GEN *partial, GEN *where, long after_crack)
3394: {
3395: long lgp = lg(*partial), larger_compos = 0;
3396: GEN scan, scan_end = *partial + lgp - 3;
3397:
3398: if (DEBUGLEVEL >= 5)
3399: {
3400: if (!*partial || typ(*partial) != t_VEC)
3401: err(typeer, "ifac_whoiswho");
3402: if (lg(*partial) < ifac_initial_length)
3403: err(talker, "partial impossibly short in ifac_whoiswho");
3404: if (!(*where) ||
3405: *where > scan_end ||
3406: *where < *partial + 3)
3407: err(talker, "`*where\' out of bounds in ifac_whoiswho");
3408: }
3409:
3410: if (after_crack == 0) return;
3411: if (after_crack > 0)
3412: {
3413: larger_compos = 1; /* disable Q-to-P trick */
3414: scan = *where + 3*(after_crack - 1);
3415: /* check at most after_crack entries */
3416: if (scan > scan_end) /* ooops... */
3417: {
3418: err(warner, "avoiding nonexistent factors in ifac_whoiswho");
3419: scan = scan_end;
3420: }
3421: }
3422: else { larger_compos = 0; scan = scan_end; }
3423:
3424: for (; scan >= *where; scan -= 3)
3425: {
3426: if (scan[2]) /* known class of factor */
3427: {
3428: if (scan[2] == zero) larger_compos = 1;
3429: else if (!larger_compos && scan[2] == un)
3430: {
3431: if (DEBUGLEVEL >= 3)
3432: {
3433: fprintferr("IFAC: factor %Z\n\tis prime (no larger composite)\n",
3434: **where);
3435: fprintferr("IFAC: prime %Z\n\tappears with exponent = %ld\n",
3436: **where, itos((GEN)(*where)[1]));
3437: }
3438: scan[2] = deux;
3439: } /* no else case */
3440: continue;
3441: }
3442: scan[2] =
1.2 ! noro 3443: (BSW_psp((GEN)(*scan)) ?
1.1 noro 3444: (larger_compos ? un : deux) : /* un- or finished prime */
3445: zero); /* composite */
3446:
3447: if (scan[2] == zero) larger_compos = 1;
3448: if (DEBUGLEVEL >= 3)
3449: {
3450: fprintferr("IFAC: factor %Z\n\tis %s\n", *scan,
3451: (scan[2] == zero ? "composite" : "prime"));
3452: }
3453: }
3454: }
3455:
3456: /* Here we normally do not check that the first entry is a not-finished
3457: * prime. Stack management: we may allocate a new exponent
3458: */
3459: static long
3460: ifac_divide(GEN *partial, GEN *where)
3461: {
3462: long lgp = lg(*partial);
3463: GEN scan = *where + 3;
3464: long res = 0, exponent, newexp, otherexp;
3465:
3466: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
3467: {
3468: if (!*partial || typ(*partial) != t_VEC)
3469: err(typeer, "ifac_divide");
3470: if (lg(*partial) < ifac_initial_length)
3471: err(talker, "partial impossibly short in ifac_divide");
3472: if (!(*where) ||
3473: *where > *partial + lgp - 3 ||
3474: *where < *partial + 3)
3475: err(talker, "`*where\' out of bounds in ifac_divide");
3476: if ((*where)[2] != un)
3477: err(talker, "division by composite or finished prime in ifac_divide");
3478: }
3479: if (!(**where)) /* always test just this one */
3480: err(talker, "division by nothing in ifac_divide");
3481:
3482: newexp = exponent = itos((GEN)((*where)[1]));
3483: if (exponent > 1 && moebius_mode) return 1;
3484: /* should've been caught by caller already */
3485:
3486: /* go for it */
3487: for (; scan < *partial + lgp; scan += 3)
3488: {
3489: if (scan[2] != zero) continue; /* the other thing ain't composite */
3490: otherexp = 0;
3491: /* let mpdivis divide the other factor in place to keep stack clutter
3492: minimal */
3493: while (mpdivis((GEN)(*scan), (GEN)(**where), (GEN)(*scan)))
3494: {
3495: if (moebius_mode) return 1; /* immediately */
3496: if (!otherexp) otherexp = itos((GEN)(scan[1]));
3497: newexp += otherexp;
3498: }
3499: if (newexp > exponent) /* did anything happen? */
3500: {
3501: (*where)[1] = (newexp == 2 ? deux : (long)(stoi(newexp)));
3502: exponent = newexp;
3503: if (is_pm1((GEN)(*scan))) /* factor dissolved completely */
3504: {
3505: *scan = scan[1] = (long)NULL;
3506: if (DEBUGLEVEL >= 4)
3507: fprintferr("IFAC: a factor was a power of another prime factor\n");
3508: }
3509: else if (DEBUGLEVEL >= 4)
3510: {
3511: fprintferr("IFAC: a factor was divisible by another prime factor,\n");
3512: fprintferr("\tleaving a cofactor = %Z\n", *scan);
3513: }
3514: scan[2] = (long)NULL; /* at any rate it's Unknown now */
3515: res = 1;
3516: if (DEBUGLEVEL >= 5)
3517: {
3518: fprintferr("IFAC: prime %Z\n\tappears at least to the power %ld\n",
3519: **where, newexp);
3520: }
3521: }
3522: } /* for */
3523: (*where)[2] = deux; /* make it a finished prime */
3524: if (DEBUGLEVEL >= 3)
3525: {
3526: fprintferr("IFAC: prime %Z\n\tappears with exponent = %ld\n",
3527: **where, newexp);
3528: }
3529: return res;
3530: }
3531:
3532:
3533: /* The following takes the place of 2.0.9.alpha's find_factor(). */
3534:
3535: /* The meaning of the hint changes against 2.0.9.alpha to:
3536: * hint == 0 : Use our own strategy, and this should be the default
3537: * hint & 1 : Avoid mpqs(), use ellfacteur() after pollardbrent()
3538: * hint & 2 : Avoid first-stage ellfacteur() in favour of mpqs()
3539: * (which may still fall back to ellfacteur() if mpqs() is not installed
3540: * or gives up)
3541: * hint & 4 : Avoid even the pollardbrent() and squfof() stages
3542: * hint & 8 : Avoid final ellfacteur(); this may `declare' a composite
3543: * to be prime.
3544: */
3545:
3546: /* stack housekeeping: this routine may create one or more objects (a new
3547: * factor, or possibly several, and perhaps one or more new exponents > 2)
3548: * Added squfof --GN2000Oct01
3549: */
3550: static long
3551: ifac_crack(GEN *partial, GEN *where)
3552: {
1.2 ! noro 3553: long hint, cmp_res, exp1 = 1, exp2 = 1;
! 3554: gpmem_t av;
1.1 noro 3555: GEN factor = NULL, exponent;
3556:
3557: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
3558: {
3559: long lgp;
3560: if (!*partial || typ(*partial) != t_VEC)
3561: err(typeer, "ifac_crack");
3562: if ((lgp = lg(*partial)) < ifac_initial_length)
3563: err(talker, "partial impossibly short in ifac_crack");
3564: if (!(*where) ||
3565: *where < *partial + 6 || /* sic -- caller must realloc first */
3566: *where > *partial + lgp - 3)
3567: err(talker, "`*where\' out of bounds in ifac_crack");
3568: if (!(**where) || typ((GEN)(**where)) != t_INT)
3569: err(typeer, "ifac_crack");
3570: if ((*where)[2] != zero)
3571: err(talker, "operand not known composite in ifac_crack");
3572: }
3573: hint = itos((GEN)((*partial)[2])) & 15;
3574: exponent = (GEN)((*where)[1]);
3575:
3576: if (DEBUGLEVEL >= 3)
3577: fprintferr("IFAC: cracking composite\n\t%Z\n", **where);
3578:
3579: /* crack squares. Quite fast due to the initial square residue test */
3580: if (DEBUGLEVEL >= 4)
3581: fprintferr("IFAC: checking for pure square\n");
3582: av = avma;
3583: while (carrecomplet((GEN)(**where), &factor))
3584: {
3585: if (DEBUGLEVEL >= 4)
3586: fprintferr("IFAC: found %Z =\n\t%Z ^2\n", **where, factor);
3587: affii(factor, (GEN)(**where)); avma = av; factor = NULL;
3588: if (exponent == gun)
3589: (*where)[1] = deux;
3590: else if (exponent == gdeux)
3591: { (*where)[1] = (long)stoi(4); av = avma; }
3592: else
3593: { affii(shifti(exponent, 1), (GEN)((*where)[1])); avma = av; }
3594: exponent = (GEN)((*where)[1]);
3595: if (moebius_mode) return 0; /* no need to carry on... */
3596: exp1 = 2;
3597: } /* while carrecomplet */
3598:
3599: /* check whether our composite hasn't become prime */
1.2 ! noro 3600: if (exp1 > 1 && hint != 15 && BSW_psp((GEN)(**where)))
1.1 noro 3601: {
3602: (*where)[2] = un;
3603: if (DEBUGLEVEL >= 4)
3604: {
3605: fprintferr("IFAC: factor %Z\n\tis prime\n",**where);
3606: flusherr();
3607: }
3608: return 0; /* bypass subsequent ifac_whoiswho() call */
3609: }
3610: /* still composite -- carry on */
3611:
3612: /* MPQS cannot factor prime powers; check for cubes/5th/7th powers.
3613: * Do this even if MPQS is blocked by hint -- it still serves a useful
3614: * purpose in bounded factorization
3615: */
3616: {
3617: long mask = 7;
3618: if (DEBUGLEVEL == 4)
3619: fprintferr("IFAC: checking for odd power\n");
3620: /* (At debug levels > 4, is_odd_power() itself prints something more
3621: * informative)
3622: */
3623: av = avma;
3624: while ((exp1 = /* assignment */
3625: is_odd_power((GEN)(**where), &factor, &mask)))
3626: {
3627: if (exp2 == 1) exp2 = exp1; /* remember this after the loop */
3628: if (DEBUGLEVEL >= 4)
3629: fprintferr("IFAC: found %Z =\n\t%Z ^%ld\n", **where, factor, exp1);
3630: affii(factor, (GEN)(**where)); avma = av; factor = NULL;
3631: if (exponent == gun)
3632: { (*where)[1] = (long)stoi(exp1); av = avma; }
3633: else if (exponent == gdeux)
3634: { (*where)[1] = (long)stoi(exp1<<1); av = avma; }
3635: else
3636: { affii(mulsi(exp1, exponent), (GEN)((*where)[1])); avma = av; }
3637: exponent = (GEN)((*where)[1]);
3638: if (moebius_mode) return 0; /* no need to carry on... */
3639: } /* while is_odd_power */
3640:
1.2 ! noro 3641: if (exp2 > 1 && hint != 15 && BSW_psp((GEN)(**where)))
1.1 noro 3642: { /* Something nice has happened and our composite has become prime */
3643: (*where)[2] = un;
3644: if (DEBUGLEVEL >= 4)
3645: {
3646: fprintferr("IFAC: factor %Z\n\tis prime\n", **where);
3647: flusherr();
3648: }
3649: return 0; /* bypass subsequent ifac_whoiswho() call */
3650: }
3651: } /* odd power stage */
3652:
3653: /* pollardbrent() Rho usually gets a first chance */
3654: if (!(hint & 4))
3655: {
3656: if (DEBUGLEVEL >= 4)
3657: fprintferr("IFAC: trying Pollard-Brent rho method first\n");
3658: factor = pollardbrent((GEN)(**where));
3659: } /* Rho stage */
3660:
3661: /* Shanks' squfof() is next. It will pass up the chance silently when
3662: * the input number is too large. We put this under the same governing
3663: * bit of the hint parameter, for no very good reason other than avoiding
3664: * a proliferation of further meaningful bits in all the wrong order.
3665: */
3666: if (!factor && !(hint & 4))
3667: {
3668: if (DEBUGLEVEL >= 4)
3669: fprintferr("IFAC: trying Shanks' SQUFOF, will fail silently if input\n"
3670: " is too large for it.\n");
3671: factor = squfof((GEN)(**where), 0); /* allow squfof's own diagnostics */
3672: }
3673:
3674: /* if this didn't work, try one of our high-power beasties */
3675: if (!factor && !(hint & 2))
3676: {
3677: if (DEBUGLEVEL >= 4)
3678: fprintferr("IFAC: trying Lenstra-Montgomery ECM\n");
3679: factor = ellfacteur((GEN)(**where), 0); /* do not insist */
3680: } /* First ECM stage */
3681:
3682: if (!factor && !(hint & 1))
3683: {
3684: if (DEBUGLEVEL >= 4)
3685: fprintferr("IFAC: trying Multi-Polynomial Quadratic Sieve\n");
3686: factor = mpqs((GEN)(**where));
3687: } /* MPQS stage */
3688:
3689: if (!factor)
3690: {
3691: if (!(hint & 8)) /* still no luck? force it */
3692: {
3693: if (DEBUGLEVEL >= 4)
3694: fprintferr("IFAC: forcing ECM, may take some time\n");
3695: factor = ellfacteur((GEN)(**where), 1);
3696: } /* final ECM stage, guaranteed to succeed */
3697: else /* limited factorization */
3698: {
3699: if (DEBUGLEVEL >= 2)
3700: {
3701: err(warner, "IFAC: unfactored composite declared prime");
3702: /* don't print it out at level 3 or above, where it would appear
3703: * several times before and after this message already
3704: */
3705: if (DEBUGLEVEL == 2)
3706: {
3707: fprintferr("\t%Z\n",**where);
3708: flusherr();
3709: }
3710: }
3711: (*where)[2] = un; /* might as well trial-divide by it... */
3712: return 1;
3713: }
3714: } /* Final ECM stage */
3715:
3716: if (DEBUGLEVEL >= 1)
3717: {
3718: if (!factor) /* never reached */
3719: err(talker, "all available factoring methods failed in ifac_crack");
3720: }
3721: if (typ(factor) == t_VEC) /* delegate this case */
3722: return ifac_insert_multiplet(partial, where, factor);
3723:
3724: else if (typ(factor) != t_INT)
3725: {
3726: fprintferr("IFAC: factorizer returned strange object to ifac_crack\n");
3727: outerr(factor);
3728: err(bugparier, "factoring");
3729: }
3730:
3731: /* got single integer back: work out the cofactor (in place) */
3732: if (!mpdivis((GEN)(**where), factor, (GEN)(**where)))
3733: {
3734: fprintferr("IFAC: factoring %Z\n", **where);
3735: fprintferr("\tyielded `factor\' %Z\n\twhich isn't!\n", factor);
3736: err(bugparier, "factoring");
3737: }
3738:
3739: /* the factoring engines report the factor found when DEBUGLEVEL is
3740: * large enough; let's tell about the cofactor
3741: */
3742: if (DEBUGLEVEL >= 4)
3743: fprintferr("IFAC: cofactor = %Z\n", **where);
3744:
3745: /* ok, now `factor' is one factor and **where is the other, find out
3746: * which is larger
3747: */
3748: cmp_res = cmpii(factor, (GEN)(**where));
3749: if (cmp_res < 0) /* common case */
3750: {
3751: (*where)[2] = (long)NULL; /* mark cofactor `unknown' */
3752: (*where)[-1] = (long)NULL; /* mark factor `unknown' */
3753: (*where)[-2] =
3754: isonstack(exponent) ? licopy(exponent) : (long)exponent;
3755: *where -= 3;
3756: **where = (long)factor;
3757: return 2;
3758: }
3759: else if (cmp_res == 0) /* hep, split a square in the middle */
3760: {
3761: err(warner,
3762: "square not found by carrecomplet, ifac_crack recovering");
3763: cgiv(factor);
3764: (*where)[2] = (long)NULL; /* mark the sqrt `unknown' */
3765: if (exponent == gun) /* double the exponent */
3766: (*where)[1] = deux;
3767: else if (exponent == gdeux)
3768: (*where)[1] = (long)stoi(4); /* make a new one */
3769: else /* overwrite old exponent */
3770: {
3771: av = avma;
3772: affii(shifti(exponent, 1), (GEN)((*where)[1]));
3773: avma = av;
3774: /* leave *where unchanged */
3775: }
3776: if (moebius_mode) return 0;
3777: return 1;
3778: }
3779: else /* factor > cofactor, rearrange */
3780: {
3781: (*where)[2] = (long)NULL; /* mark factor `unknown' */
3782: (*where)[-1] = (long)NULL; /* mark cofactor `unknown' */
3783: (*where)[-2] =
3784: isonstack(exponent) ? licopy(exponent) : (long)exponent;
3785: *where -= 3;
3786: **where = (*where)[3]; /* move cofactor pointer to lowest slot */
3787: (*where)[3] = (long)factor; /* save factor */
3788: return 2;
3789: }
3790: }
3791:
3792: /* the following doesn't collect garbage; caller's caller should do it
3793: * (which means ifac_main()). No diagnostics either, the factoring engine
3794: * should have printed what it found when DEBUGLEVEL>=4 or so. Note facvec
3795: * contains slots of three components per factor; repeated factors are
3796: * expressly allowed (and their classes shouldn't contradict each other
3797: * whereas their exponents will be added up)
3798: */
3799: static long
3800: ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec)
3801: {
3802: long j,k=1, lfv=lg(facvec)-1, nf=lfv/3, room=(long)(*where-*partial);
3803: /* one of the factors will go into the *where slot, so room is now
3804: * 3 times the number of slots we can use
3805: */
3806: long needroom = lfv - room;
3807: GEN sorted, auxvec = cgetg(nf+1, t_VEC), factor;
3808: long exponent = itos((GEN)((*where)[1])); /* the old exponent */
3809: GEN newexp;
3810:
3811: if (DEBUGLEVEL >= 5)
3812: fprintferr("IFAC: incorporating set of %ld factor(s)%s\n",
3813: nf, (DEBUGLEVEL >=6 ? "..." : ""));
3814: /* fixed: squfof may return a single, squared, factor as a set
3815: * --GN2000Oct01
3816: */
3817: if (needroom > 0)
3818: ifac_realloc(partial, where, lg(*partial) + needroom + 3);
3819: /* one extra slot for paranoia, errm, future use */
3820:
3821: /* create sort permutation from the values of the factors */
3822: for (j=nf; j; j--) auxvec[j] = facvec[3*j-2]; /* just the pointers */
3823: sorted = sindexsort(auxvec);
3824: /* and readjust the result for the triple spacing */
3825: for (j=nf; j; j--) sorted[j] = 3*sorted[j]-2;
3826: if (DEBUGLEVEL >= 6)
3827: fprintferr("\tsorted them...\n");
3828:
3829: /* store factors, beginning at *where, and catching any duplicates */
3830: **where = facvec[sorted[nf]];
3831: if ((newexp = (GEN)(facvec[sorted[nf]+1])) != gun) /* new exponent > 1 */
3832: {
3833: if (exponent == 1)
3834: (*where)[1] = isonstack(newexp) ? licopy(newexp) : (long)newexp;
3835: else
3836: (*where)[1] = lmulsi(exponent, newexp);
3837: } /* if new exponent is 1, the old exponent already in place will do */
3838: (*where)[2] = facvec[sorted[nf]+2]; /* copy class */
3839: if (DEBUGLEVEL >= 6)
3840: fprintferr("\tstored (largest) factor no. %ld...\n", nf);
3841:
3842: for (j=nf-1; j; j--)
3843: {
3844: factor = (GEN)(facvec[sorted[j]]);
3845: if (egalii(factor, (GEN)(**where)))
3846: {
3847: if (DEBUGLEVEL >= 6)
3848: fprintferr("\tfactor no. %ld is a duplicate%s\n",
3849: j, (j>1 ? "..." : ""));
3850: /* update exponent, ignore class which would already have been set,
3851: * and then forget current factor
3852: */
3853: if ((newexp = (GEN)(facvec[sorted[j]+1])) != gun) /* new exp > 1 */
3854: { /* now we have at least 3 */
3855: (*where)[1] = laddii((GEN)((*where)[1]),
3856: mulsi(exponent, newexp));
3857: }
3858: else
3859: {
3860: if ((*where)[1] == un && exponent == 1)
3861: (*where)[1] = deux;
3862: else
3863: (*where)[1] = laddsi(exponent, (GEN)((*where)[1]));
3864: /* not safe to add 1 in place -- that might overwrite gdeux,
3865: * with `interesting' consequences
3866: */
3867: }
3868: if (moebius_mode) return 0; /* stop now, but with exponent updated */
3869: continue;
3870: }
3871: (*where)[-1] = facvec[sorted[j]+2]; /* class as given */
3872: if ((newexp = (GEN)(facvec[sorted[j]+1])) != gun) /* new exp > 1 */
3873: {
3874: if (exponent == 1 && newexp == gdeux)
3875: (*where)[-2] = deux;
3876: else /* exponent*newexp > 2 */
3877: (*where)[-2] = lmulsi(exponent, newexp);
3878: }
3879: else
3880: {
3881: (*where)[-2] = (exponent == 1 ? un :
3882: (exponent == 2 ? deux :
3883: (long)stoi(exponent))); /* inherit parent's exponent */
3884: }
3885: (*where)[-3] = isonstack(factor) ? licopy(factor) : (long)factor;
3886: /* keep components younger than *partial */
3887: *where -= 3;
3888: k++;
3889: if (DEBUGLEVEL >= 6)
3890: fprintferr("\tfactor no. %ld was unique%s\n",
3891: j, (j>1 ? " (so far)..." : ""));
3892: }
3893: /* make the `sorted' object safe for garbage collection (probably not a
3894: * problem, since it should be in the garbage zone from everybody's
3895: * perspective, but it's easy to do it)
3896: */
3897: *sorted = evaltyp(t_INT) | evallg(nf+1);
3898: return k;
3899: }
3900:
3901: static GEN
3902: ifac_main(GEN *partial)
3903: {
3904: /* leave the basic error checking to ifac_find() */
3905: GEN here = ifac_find(partial, partial);
3906: long res, nf;
3907:
3908: /* if nothing left, return gun */
3909: if (!here) return gun;
3910:
3911: /* if we are in Moebius mode and have already detected a repeated factor,
3912: * stop right here. Shouldn't really happen
3913: */
3914: if (moebius_mode && here[1] != un)
3915: {
3916: if (DEBUGLEVEL >= 3)
3917: {
3918: fprintferr("IFAC: main loop: repeated old factor\n\t%Z\n", *here);
3919: flusherr();
3920: }
3921: return gzero;
3922: }
3923:
3924: /* loop until first entry is a finished prime. May involve reallocations
3925: * and thus updates of *partial
3926: */
3927: while (here[2] != deux)
3928: {
3929: /* if it's unknown, something has gone wrong; try to recover */
3930: if (!(here[2]))
3931: {
3932: err(warner, "IFAC: unknown factor seen in main loop");
3933: res = ifac_resort(partial, &here);
3934: if (res) return gzero; /* can only happen in Moebius mode */
3935: ifac_whoiswho(partial, &here, -1);
3936: /* defrag for good measure */
3937: ifac_defrag(partial, &here);
3938: continue;
3939: }
3940: /* if it's composite, crack it */
3941: if (here[2] == zero)
3942: {
3943: /* make sure there's room for another factor */
3944: if (here < *partial + 6)
3945: { /* try defrag first */
3946: ifac_defrag(partial, &here);
3947: if (here < *partial + 6) /* no luck */
3948: {
3949: ifac_realloc(partial, &here, 1); /* guaranteed to work */
3950: /* Unfortunately, we can't do a garbage collection here since we
3951: * know too little about where in the stack the old components
3952: * were.
3953: */
3954: }
3955: }
3956: nf = ifac_crack(partial, &here);
3957: if (moebius_mode && here[1] != un) /* that was a power */
3958: {
3959: if (DEBUGLEVEL >= 3)
3960: {
3961: fprintferr("IFAC: main loop: repeated new factor\n\t%Z\n", *here);
3962: flusherr();
3963: }
3964: return gzero;
3965: }
3966: /* deal with the new unknowns. No resort, since ifac_crack will
3967: * already have sorted them
3968: */
3969: ifac_whoiswho(partial, &here, nf);
3970: continue;
3971: }
3972: /* if it's prime but not yet finished, finish it */
3973: if (here[2] == un)
3974: {
3975: res = ifac_divide(partial, &here);
3976: if (res)
3977: {
3978: if (moebius_mode)
3979: {
3980: if (DEBUGLEVEL >= 3)
3981: {
3982: fprintferr("IFAC: main loop: another factor was divisible by\n");
3983: fprintferr("\t%Z\n", *here); flusherr();
3984: }
3985: return gzero;
3986: }
3987: ifac_defrag(partial, &here);
3988: (void)(ifac_resort(partial, &here)); /* sort new cofactors down */
3989: /* it doesn't matter right now whether this finds a repeated factor,
3990: * since we never get to this point in Moebius mode
3991: */
3992: ifac_defrag(partial, &here); /* resort may have created new gaps */
3993: ifac_whoiswho(partial, &here, -1);
3994: }
3995: continue;
3996: }
3997: /* there are no other cases, never reached */
3998: err(talker, "non-existent factor class in ifac_main");
3999: } /* while */
4000: if (moebius_mode && here[1] != un)
4001: {
4002: if (DEBUGLEVEL >= 3)
4003: {
4004: fprintferr("IFAC: after main loop: repeated old factor\n\t%Z\n", *here);
4005: flusherr();
4006: }
4007: return gzero; /* just a safety net */
4008: }
4009: if (DEBUGLEVEL >= 4)
4010: {
4011: long nf = (*partial + lg(*partial) - here - 3)/3;
4012: if (nf)
4013: fprintferr("IFAC: main loop: %ld factor%s left\n",
4014: nf, (nf>1 ? "s" : ""));
4015: else
4016: fprintferr("IFAC: main loop: this was the last factor\n");
4017: flusherr();
4018: }
4019: return here;
4020: }
4021:
4022: /* Caller of the following should worry about stack management, it makes
4023: * a rather shameless mess :^)
4024: */
4025: GEN
4026: ifac_primary_factor(GEN *partial, long *exponent)
4027: {
4028: GEN here = ifac_main(partial);
4029: GEN res;
4030:
4031: if (here == gun) { *exponent = 0; return gun; }
4032: else if (here == gzero) { *exponent = 0; return gzero; }
4033:
4034: res = icopy((GEN)(*here));
4035: *exponent = itos((GEN)(here[1]));
4036: here[2] = here[1] = *here = (long)NULL;
4037: return res;
4038: }
4039:
4040: /* encapsulated routines */
4041:
4042: /* prime/exponent pairs need to appear contiguously on the stack, but we
4043: * also need to have our data structure somewhere, and we don't know in
4044: * advance how many primes will turn up. The following discipline achieves
4045: * this: When ifac_decomp() is called, n should point at an object older
4046: * than the oldest small prime/exponent pair (auxdecomp0() guarantees
4047: * this easily since it mpdivis()es any divisors it discovers off its own
4048: * copy of the original N). We allocate sufficient space to accommodate
4049: * several pairs -- eleven pairs ought to fit in a space not much larger
4050: * than n itself -- before calling ifac_start(). If we manage to complete
4051: * the factorization before we run out of space, we free the data structure
4052: * and cull the excess reserved space before returning. When we do run out,
4053: * we have to leapfrog to generate more (guesstimating the requirements
4054: * from what is left in the partial factorization structure); room for
4055: * fresh pairs is allocated at the head of the stack, followed by an
4056: * ifac_realloc() to reconnect the data structure and move it out of the
4057: * way, followed by a few pointer tweaks to connect the new pairs space
4058: * to the old one.-- This whole affair translates into a surprisingly
4059: * compact little routine.
4060: */
4061:
4062: #define ifac_overshoot 64 /* lgefint(n)+64 words reserved */
1.2 ! noro 4063: /* ifac_decomp_break:
1.1 noro 4064: *
4065: * Find primary factors of n until ifac_break return true, or n is
4066: * factored if ifac_break is NULL.
4067: */
4068: /* ifac_break:
4069: *
4070: * state is for state management of the function, and depend of the
4071: * function. ifac_break is called initially in decomp_break with
4072: * here=NULL. This allows the function to see the new value of n.
4073: * return 1: stop factoring, 0 continue. If ifac_break is NULL,
4074: * assumed to always return 0. ifac_break must not let anything on the
4075: * stack. However data can be stored in state
4076: */
4077:
4078: long
4079: ifac_decomp_break(GEN n, long (*ifac_break)(GEN n,GEN pairs,GEN here,GEN state),
4080: GEN state, long hint)
4081: {
1.2 ! noro 4082: long tf=lgefint(n);
! 4083: gpmem_t av=avma, lim=stack_lim(av, 1);
1.1 noro 4084: long nb=0;
4085: GEN part, here, workspc = new_chunk(tf + ifac_overshoot), pairs = (GEN)av;
4086: /* workspc will be doled out by us in pairs of smaller t_INTs */
1.2 ! noro 4087: gpmem_t tetpil = avma; /* remember head of workspc zone */;
1.1 noro 4088:
4089: if (!n || typ(n) != t_INT) err(typeer, "ifac_decomp");
4090: if (!signe(n) || tf < 3) err(talker, "factoring 0 in ifac_decomp");
4091:
4092: part = ifac_start(n, 0, hint);
4093: here = ifac_main(&part);
4094:
4095: while (here != gun)
4096: {
4097: long lf=lgefint((GEN)(*here));
4098: if (pairs - workspc < lf + 3) /* out of room, leapfrog */
4099: {
4100: /* the ifac_realloc() below will clear tetpil - avma words
4101: * on the stack, which should be about enough for the extra
4102: * primes we're going to see, and we'll want several more to
4103: * accommodate further exponents. In most cases, the lf + 3
4104: * below is pure paranoia, but the factor we're about to copy
4105: * might be the one sitting off the stack in the original n,
4106: * so let's play safe
4107: */
4108: workspc = new_chunk(lf + 3 + ifac_overshoot);
4109: ifac_realloc(&part, &here, 0);
4110: here = ifac_find(&part, &part);
1.2 ! noro 4111: tetpil = (gpmem_t)workspc;
1.1 noro 4112: }
4113: /* room enough now */
4114: nb++;
4115: pairs -= lf;
4116: *pairs = evaltyp(t_INT) | evallg(lf);
4117: affii((GEN)(*here), pairs);
4118: pairs -= 3;
4119: *pairs = evaltyp(t_INT) | evallg(3);
4120: affii((GEN)(here[1]), pairs);
4121: if (ifac_break && (*ifac_break)(n,pairs,here,state))
4122: {
4123: if (DEBUGLEVEL >= 3)
4124: fprintferr("IFAC: (Partial fact.)Stop requested.\n");
4125: break;
4126: }
4127: here[2] = here[1] = *here = (long)NULL;
4128: here = ifac_main(&part);
4129: if (low_stack(lim, stack_lim(av,1)))
4130: {
4131: if(DEBUGMEM>1) err(warnmem,"[2] ifac_decomp");
4132: ifac_realloc(&part, &here, 0);
4133: part = gerepileupto(tetpil, part);
4134: }
4135: }
1.2 ! noro 4136: avma = (gpmem_t)pairs;
1.1 noro 4137: if (DEBUGLEVEL >= 3)
4138: {
4139: fprintferr("IFAC: found %ld large prime (power) factor%s.\n",
4140: nb, (nb>1? "s": ""));
4141: flusherr();
4142: }
4143: return nb;
4144: }
4145:
4146: long
4147: ifac_decomp(GEN n, long hint)
4148: {
4149: return ifac_decomp_break(n, NULL, gzero, hint);
4150: }
4151:
4152: long
4153: ifac_moebius(GEN n, long hint)
4154: {
1.2 ! noro 4155: long mu=1;
! 4156: gpmem_t av=avma, lim=stack_lim(av, 1);
1.1 noro 4157: GEN part = ifac_start(n, 1, hint);
4158: GEN here = ifac_main(&part);
4159:
4160: while (here != gun && here != gzero)
4161: {
4162: if (itos((GEN)(here[1])) > 1)
4163: { here = gzero; break; } /* shouldn't happen */
4164: mu = -mu;
4165: here[2] = here[1] = *here = (long)NULL;
4166: here = ifac_main(&part);
4167: if (low_stack(lim, stack_lim(av,1)))
4168: {
4169: if(DEBUGMEM>1) err(warnmem,"ifac_moebius");
4170: ifac_realloc(&part, &here, 0);
4171: part = gerepileupto(av, part);
4172: }
4173: }
4174: avma = av;
4175: return (here == gun ? mu : 0);
4176: }
4177:
4178: long
4179: ifac_issquarefree(GEN n, long hint)
4180: {
1.2 ! noro 4181: gpmem_t av=avma, lim=stack_lim(av, 1);
1.1 noro 4182: GEN part = ifac_start(n, 1, hint);
4183: GEN here = ifac_main(&part);
4184:
4185: while (here != gun && here != gzero)
4186: {
4187: if (itos((GEN)(here[1])) > 1)
4188: { here = gzero; break; } /* shouldn't happen */
4189: here[2] = here[1] = *here = (long)NULL;
4190: here = ifac_main(&part);
4191: if (low_stack(lim, stack_lim(av,1)))
4192: {
4193: if(DEBUGMEM>1) err(warnmem,"ifac_issquarefree");
4194: ifac_realloc(&part, &here, 0);
4195: part = gerepileupto(av, part);
4196: }
4197: }
4198: avma = av;
4199: return (here == gun ? 1 : 0);
4200: }
4201:
4202: long
4203: ifac_omega(GEN n, long hint)
4204: {
1.2 ! noro 4205: long omega=0;
! 4206: gpmem_t av=avma, lim=stack_lim(av, 1);
1.1 noro 4207: GEN part = ifac_start(n, 0, hint);
4208: GEN here = ifac_main(&part);
4209:
4210: while (here != gun)
4211: {
4212: omega++;
4213: here[2] = here[1] = *here = (long)NULL;
4214: here = ifac_main(&part);
4215: if (low_stack(lim, stack_lim(av,1)))
4216: {
4217: if(DEBUGMEM>1) err(warnmem,"ifac_omega");
4218: ifac_realloc(&part, &here, 0);
4219: part = gerepileupto(av, part);
4220: }
4221: }
4222: avma = av;
4223: return omega;
4224: }
4225:
4226: long
4227: ifac_bigomega(GEN n, long hint)
4228: {
1.2 ! noro 4229: long Omega=0;
! 4230: gpmem_t av=avma, lim=stack_lim(av, 1);
1.1 noro 4231: GEN part = ifac_start(n, 0, hint);
4232: GEN here = ifac_main(&part);
4233:
4234: while (here != gun)
4235: {
4236: Omega += itos((GEN)(here[1]));
4237: here[2] = here[1] = *here = (long)NULL;
4238: here = ifac_main(&part);
4239: if (low_stack(lim, stack_lim(av,1)))
4240: {
4241: if(DEBUGMEM>1) err(warnmem,"ifac_bigomega");
4242: ifac_realloc(&part, &here, 0);
4243: part = gerepileupto(av, part);
4244: }
4245: }
4246: avma = av;
4247: return Omega;
4248: }
4249:
4250: GEN
4251: ifac_totient(GEN n, long hint)
4252: {
4253: GEN res = cgeti(lgefint(n));
1.2 ! noro 4254: long exponent;
! 4255: gpmem_t av=avma, tetpil, lim=stack_lim(av, 1);
1.1 noro 4256: GEN phi = gun;
4257: GEN part = ifac_start(n, 0, hint);
4258: GEN here = ifac_main(&part);
4259:
4260: while (here != gun)
4261: {
4262: phi = mulii(phi, addsi(-1, (GEN)(*here)));
4263: if (here[1] != un)
4264: {
4265: if (here[1] == deux)
4266: {
4267: phi = mulii(phi, (GEN)(*here));
4268: }
4269: else
4270: {
4271: exponent = itos((GEN)(here[1]));
4272: phi = mulii(phi, gpowgs((GEN)(*here), exponent-1));
4273: }
4274: }
4275: here[2] = here[1] = *here = (long)NULL;
4276: here = ifac_main(&part);
4277: if (low_stack(lim, stack_lim(av,1)))
4278: {
4279: GEN *gsav[2];
4280: if(DEBUGMEM>1) err(warnmem,"ifac_totient");
4281: tetpil = avma;
4282: ifac_realloc(&part, &here, 0);
4283: phi = icopy(phi);
4284: gsav[0] = φ gsav[1] = ∂
4285: gerepilemanysp(av, tetpil, gsav, 2);
4286: /* don't try to preserve here, safer to pick it up again
4287: * (and ifac_find does a lot of sanity checking at high
4288: * debuglevels)
4289: */
4290: here = ifac_find(&part, &part);
4291: }
4292: }
4293: affii(phi, res);
4294: avma = av;
4295: return res;
4296: }
4297:
4298: GEN
4299: ifac_numdiv(GEN n, long hint)
4300: {
4301: /* we don't preallocate since it's too hard to guess the right
4302: * size here
4303: */
4304: GEN res;
1.2 ! noro 4305: gpmem_t av=avma, tetpil, lim=stack_lim(av, 1);
1.1 noro 4306: GEN exponent, tau = gun;
4307: GEN part = ifac_start(n, 0, hint);
4308: GEN here = ifac_main(&part);
4309:
4310: while (here != gun)
4311: {
4312: exponent = (GEN)(here[1]);
4313: tau = mulii(tau, addsi(1, exponent));
4314: here[2] = here[1] = *here = (long)NULL;
4315: here = ifac_main(&part);
4316: if (low_stack(lim, stack_lim(av,1)))
4317: {
4318: GEN *gsav[2];
4319: if(DEBUGMEM>1) err(warnmem,"ifac_numdiv");
4320: tetpil = avma;
4321: ifac_realloc(&part, &here, 0);
4322: tau = icopy(tau);
4323: gsav[0] = τ gsav[1] = ∂
4324: gerepilemanysp(av, tetpil, gsav, 2);
4325: /* (see ifac_totient()) */
4326: here = ifac_find(&part, &part);
4327: }
4328: }
4329: tetpil = avma;
4330: res = icopy(tau);
4331: return gerepile(av, tetpil, res);
4332: }
4333:
4334: GEN
4335: ifac_sumdiv(GEN n, long hint)
4336: {
4337: /* don't preallocate */
4338: GEN res;
1.2 ! noro 4339: long exponent;
! 4340: gpmem_t av=avma, tetpil, lim=stack_lim(av, 1);
1.1 noro 4341: GEN contrib, sigma = gun;
4342: GEN part = ifac_start(n, 0, hint);
4343: GEN here = ifac_main(&part);
4344:
4345: while (here != gun)
4346: {
4347: exponent = itos((GEN)(here[1]));
4348: contrib = addsi(1, (GEN)(*here));
4349: for (; exponent > 1; exponent--)
4350: contrib = addsi(1, mulii((GEN)(*here), contrib));
4351: sigma = mulii(sigma, contrib);
4352: here[2] = here[1] = *here = (long)NULL;
4353: here = ifac_main(&part);
4354: if (low_stack(lim, stack_lim(av,1)))
4355: {
4356: GEN *gsav[2];
4357: if(DEBUGMEM>1) err(warnmem,"ifac_sumdiv");
4358: tetpil = avma;
4359: ifac_realloc(&part, &here, 0);
4360: sigma = icopy(sigma);
4361: gsav[0] = σ gsav[1] = ∂
4362: gerepilemanysp(av, tetpil, gsav, 2);
4363: /* (see ifac_totient()) */
4364: here = ifac_find(&part, &part);
4365: }
4366: }
4367: tetpil = avma;
4368: res = icopy(sigma);
4369: return gerepile(av, tetpil, res);
4370: }
4371:
4372: /* k should be positive, and indeed it had better be > 1 (not checked).
4373: * The calling function knows what to do with the other cases.
4374: */
4375: GEN
4376: ifac_sumdivk(GEN n, long k, long hint)
4377: {
4378: /* don't preallocate */
4379: GEN res;
1.2 ! noro 4380: long exponent;
! 4381: gpmem_t av=avma, tetpil, lim=stack_lim(av, 1);
1.1 noro 4382: GEN contrib, q, sigma = gun;
4383: GEN part = ifac_start(n, 0, hint);
4384: GEN here = ifac_main(&part);
4385:
4386: while (here != gun)
4387: {
4388: exponent = itos((GEN)(here[1]));
4389: q = gpowgs((GEN)(*here), k);
4390: contrib = addsi(1, q);
4391: for (; exponent > 1; exponent--)
4392: contrib = addsi(1, mulii(q, contrib));
4393: sigma = mulii(sigma, contrib);
4394: here[2] = here[1] = *here = (long)NULL;
4395: here = ifac_main(&part);
4396: if (low_stack(lim, stack_lim(av,1)))
4397: {
4398: GEN *gsav[2];
4399: if(DEBUGMEM>1) err(warnmem,"ifac_sumdivk");
4400: tetpil = avma;
4401: ifac_realloc(&part, &here, 0);
4402: sigma = icopy(sigma);
4403: gsav[0] = σ gsav[1] = ∂
4404: gerepilemanysp(av, tetpil, gsav, 2);
4405: /* (see ifac_totient()) */
4406: here = ifac_find(&part, &part);
4407: }
4408: }
4409: tetpil = avma;
4410: res = icopy(sigma);
4411: return gerepile(av, tetpil, res);
4412: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>