Annotation of OpenXM_contrib/pari-2.2/src/basemath/ifactor1.c, Revision 1.1.1.1
1.1 noro 1: /* $Id: ifactor1.c,v 1.23 2001/05/23 18:42:21 karim Exp $
2:
3: Copyright (C) 2000 The PARI group.
4:
5: This file is part of the PARI/GP package.
6:
7: PARI/GP is free software; you can redistribute it and/or modify it under the
8: terms of the GNU General Public License as published by the Free Software
9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
10: ANY WARRANTY WHATSOEVER.
11:
12: Check the License for details. You should have received a copy of it, along
13: with the package; see the file 'COPYING'. If not, write to the Free Software
14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
15:
16: /********************************************************************/
17: /** **/
18: /** INTEGER FACTORIZATION **/
19: /** **/
20: /********************************************************************/
21: #include "pari.h"
22:
23: /*********************************************************************/
24: /** **/
25: /** PSEUDO PRIMALITY **/
26: /** **/
27: /*********************************************************************/
28: static GEN sqrt1, sqrt2, t1, t;
29: static long r1;
30:
31: /* The following two internal routines don't restore avma -- the caller
32: must do so at the end. */
33: static GEN
34: init_miller(GEN n)
35: {
36: if (signe(n) < 0) n = absi(n);
37: t=addsi(-1,n); r1=vali(t); t1 = shifti(t,-r1);
38: sqrt1=cgeti(lg(t)); sqrt1[1]=evalsigne(0)|evallgefint(2);
39: sqrt2=cgeti(lg(t)); sqrt2[1]=evalsigne(0)|evallgefint(2);
40: return n;
41: }
42:
43: /* is n strong pseudo-prime for base a ? `End matching' (check for square
44: * roots of -1) added by GN */
45: /* TODO: If ends do mismatch, then we have factored n, and this information
46: should somehow be made available to the factoring machinery. --GN */
47: static int
48: bad_for_base(GEN n, GEN a)
49: {
50: long r, av=avma, lim=stack_lim(av,1);
51: GEN c2, c = powmodulo(a,t1,n);
52:
53: if (!is_pm1(c) && !egalii(t,c)) /* go fishing for -1, not for 1 */
54: {
55: for (r=r1-1; r; r--) /* (this saves one squaring/reduction) */
56: {
57: c2=c; c=resii(sqri(c),n);
58: if (egalii(t,c)) break;
59: if (low_stack(lim, stack_lim(av,1)))
60: {
61: GEN *gsav[2]; gsav[0]=&c; gsav[1]=&c2;
62: if(DEBUGMEM>1) err(warnmem,"miller(rabin)");
63: gerepilemany(av, gsav, 2);
64: }
65: }
66: if (!r) return 1;
67: /* sqrt(-1) seen, compare or remember */
68: if (signe(sqrt1)) /* we saw one earlier: compare */
69: {
70: /* check if too many sqrt(-1)s mod n */
71: if (!egalii(c2,sqrt1) && !egalii(c2,sqrt2)) return 1;
72: }
73: else { affii(c2,sqrt1); affii(subii(n,c2),sqrt2); } /* remember */
74: }
75: return 0;
76: }
77:
78: /* Miller-Rabin test for k random bases */
79: long
80: millerrabin(GEN n, long k)
81: {
82: long r,i,av2, av = avma;
83:
84: if (!signe(n)) return 0;
85: /* If |n| <= 3, check if n = +- 1 */
86: if (lgefint(n)==3 && (ulong)(n[2])<=3) return (n[2] != 1);
87:
88: if (!mod2(n)) return 0;
89: n = init_miller(n); av2=avma;
90: for (i=1; i<=k; i++)
91: {
92: do r = smodsi(mymyrand(),n); while (!r);
93: if (DEBUGLEVEL > 4)
94: fprintferr("Miller-Rabin: testing base %ld\n",
95: r);
96: if (bad_for_base(n, stoi(r))) { avma=av; return 0; }
97: avma=av2;
98: }
99: avma=av; return 1;
100: }
101:
102: /* As above for k bases taken in pr (i.e not random).
103: * We must have |n|>2 and 1<=k<=11 (not checked) or k in {16,17} to select
104: * some special sets of bases.
105: *
106: * By computations of Gerhard Jaeschke, `On strong pseudoprimes to several
107: * bases', Math.Comp. 61 (1993), 915--926 (see also Chris Caldwell's Prime
108: * Number Pages at http://www.utm.edu/research/primes/prove2.html), we have:
109: *
110: * k == 4 (bases 2,3,5,7) correctly detects all composites
111: * n < 118 670 087 467 == 172243 * 688969 with the single exception of
112: * n == 3 215 031 751 == 151 * 751 * 28351,
113: *
114: * k == 5 (bases 2,3,5,7,11) correctly detects all composites
115: * n < 2 152 302 898 747 == 6763 * 10627 * 29947,
116: *
117: * k == 6 (bases 2,3,...,13) correctly detects all composites
118: * n < 3 474 749 660 383 == 1303 * 16927 * 157543,
119: *
120: * k == 7 (bases 2,3,...,17) correctly detects all composites
121: * n < 341 550 071 728 321 == 10670053 * 32010157,
122: * and even this limiting value is caught by an end mismatch between bases
123: * 2 and 5 (or 5 and 17).
124: *
125: * Moreover, the four bases chosen at
126: *
127: * k == 16 (2,13,23,1662803) will correctly detect all composites up
128: * to at least 10^12, and the combination at
129: *
130: * k == 17 (31,73) detects most odd composites without prime factors > 100
131: * in the range n < 2^36 (with less than 250 exceptions, indeed with fewer
132: * than 1400 exceptions up to 2^42). --GN
133: * (DATA TO BE COMPLETED)
134: */
135: int /* no longer static -- needed in mpqs.c */
136: miller(GEN n, long k)
137: {
138: long r,i,av2, av = avma;
139: static long pr[] =
140: { 0, 2,3,5,7,11,13,17,19,23,29, 31,73, 2,13,23,1662803UL, };
141: long *p;
142:
143: if (!mod2(n)) return 0;
144: if (k==16)
145: { /* use smaller (faster) bases if possible */
146: if (lgefint(n)==3 && (ulong)(n[2]) < 3215031751UL) p = pr; /* 2,3,5,7 */
147: else p = pr+13; /* 2,13,23,1662803 */
148: k=4;
149: }
150: else if (k==17)
151: {
152: if (lgefint(n)==3 && (ulong)(n[2]) < 1373653) p = pr; /* 2,3 */
153: else p = pr+11; /* 31,73 */
154: k=2;
155: }
156: else p = pr; /* 2,3,5,... */
157: n = init_miller(n); av2=avma;
158: for (i=1; i<=k; i++)
159: {
160: r = smodsi(p[i],n); if (!r) break;
161: if (bad_for_base(n, stoi(r))) { avma = av; return 0; }
162: avma=av2;
163: }
164: avma=av; return 1;
165: }
166: /***********************************************************************/
167: /** **/
168: /** Pocklington-Lehmer **/
169: /** P-1 primality test **/
170: /** Crude implementation BA 2000Apr21 **/
171: /***********************************************************************/
172:
173: /*assume n>=2*/
174: static long pl831(GEN N, GEN p)
175: {
176: ulong ltop=avma,av;
177: long a;
178: GEN Nmun,Nmunp;
179: Nmun=addis(N,-1);
180: Nmunp=divii(Nmun,p);
181: av=avma;
182: for(a=2;;a++)
183: {
184: GEN b;
185: b=powmodulo(stoi(a),Nmunp,N);
186: if (gcmp1(powmodulo(b,p,N)))
187: {
188: GEN g;
189: g=mppgcd(addis(b,-1),N);
190: if (gcmp1(g))
191: {
192: avma=ltop;
193: return a;
194: }
195: if (!gegal(g,N))
196: {
197: avma=ltop;
198: return 0;
199: }
200: }
201: else
202: {
203: avma=ltop;
204: return 0;
205: }
206: avma=av;
207: }
208: }
209: /*
210: * flag 0: return gun (prime), gzero (composite)
211: * flag 1: return gzero (composite), gun (small prime), matrix (large prime)
212: *
213: * The matrix has 3 columns, [a,b,c] with
214: * a[i] prime factor of N-1,
215: * b[i] witness for a[i] as in pl831
216: * c[i] plisprime(a[i])
217: */
218: extern GEN decomp_limit(GEN n, GEN limit);
219: GEN
220: plisprime(GEN N, long flag)
221: {
222: ulong ltop=avma;
223: long i;
224: int eps;
225: GEN C,F;
226: if ( typ(N) != t_INT ) err(arither1);
227: eps = absi_cmp(N,gdeux);
228: if (eps<=0) return eps? gzero: gun;
229: N = absi(N);
230: /* Use Jaeschke results. See above */
231: if (miller(N,7))
232: { /* compare to 341550071728321 */
233: if (cmpii(N, u2toi(0x136a3, 0x52b2c8c1)) < 0) { avma=ltop; return gun; }
234: }
235: else { avma=ltop; return gzero; }
236: F=(GEN)decomp_limit(addis(N,-1),racine(N))[1];
237: if (DEBUGLEVEL>=3) fprintferr("P.L.:factor O.K.\n");
238: C=cgetg(4,t_MAT);
239: C[1]=lgetg(lg(F),t_COL);
240: C[2]=lgetg(lg(F),t_COL);
241: C[3]=lgetg(lg(F),t_COL);
242: for(i=1;i<lg(F);i++)
243: {
244: long witness;
245: GEN p;
246: p=(GEN)F[i];
247: witness=pl831(N,p);
248: if (!witness)
249: {
250: avma=ltop;
251: return gzero;
252: }
253: mael(C,1,i)=lcopy(p);
254: mael(C,2,i)=lstoi(witness);
255: mael(C,3,i)=(long)plisprime(p,flag);
256: if (gmael(C,3,i)==gzero)
257: err(talker,"Sorry false prime number %Z in plisprime",p);
258: }
259: if (!flag) { avma=ltop; return gun; }
260: return gerepileupto(ltop,C);
261: }
262:
263: /***********************************************************************/
264: /** **/
265: /** PRIMES IN SUCCESSION **/
266: /** (abstracted by GN 1998Aug21 mainly for use in ellfacteur() below) **/
267: /** **/
268: /***********************************************************************/
269:
270: /* map from prime residue classes mod 210 to their numbers in {0...47}.
271: * Subscripts into this array take the form ((k-1)%210)/2, ranging from
272: * 0 to 104. Unused entries are 128
273: */
274: #define NPRC 128 /* non-prime residue class */
275:
276: static
277: unsigned char prc210_no[] =
278: {
279: 0, NPRC, NPRC, NPRC, NPRC, 1, 2, NPRC, 3, 4, NPRC, /* 21 */
280: 5, NPRC, NPRC, 6, 7, NPRC, NPRC, 8, NPRC, 9, /* 41 */
281: 10, NPRC, 11, NPRC, NPRC, 12, NPRC, NPRC, 13, 14, NPRC, /* 63 */
282: NPRC, 15, NPRC, 16, 17, NPRC, NPRC, 18, NPRC, 19, /* 83 */
283: NPRC, NPRC, 20, NPRC, NPRC, NPRC, 21, NPRC, 22, 23, NPRC, /* 105 */
284: 24, 25, NPRC, 26, NPRC, NPRC, NPRC, 27, NPRC, NPRC, /* 125 */
285: 28, NPRC, 29, NPRC, NPRC, 30, 31, NPRC, 32, NPRC, NPRC, /* 147 */
286: 33, 34, NPRC, NPRC, 35, NPRC, NPRC, 36, NPRC, 37, /* 167 */
287: 38, NPRC, 39, NPRC, NPRC, 40, 41, NPRC, NPRC, 42, NPRC, /* 189 */
288: 43, 44, NPRC, 45, 46, NPRC, NPRC, NPRC, NPRC, 47, /* 209 */
289: };
290:
291: /* map from prime residue classes mod 210 (by number) to their smallest
292: * positive representatives
293: */
294: static
295: unsigned char prc210_rp[] =
296: {
297: 1, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79,
298: 83, 89, 97, 101, 103, 107, 109, 113, 121, 127, 131, 137, 139, 143, 149,
299: 151, 157, 163, 167, 169, 173, 179, 181, 187, 191, 193, 197, 199, 209,
300: };
301:
302: /* first differences of the preceding */
303: static
304: unsigned char prc210_d1[] =
305: {
306: 10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6,
307: 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6,
308: 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2, 10, 2,
309: };
310:
311: GEN
312: nextprime(GEN n)
313: {
314: long rc,rc0,rcd,rcn,av1,av2, av = avma;
315:
316: if (typ(n) != t_INT) n=gceil(n); /* accept arguments in R --GN */
317: if (typ(n) != t_INT) err(arither1);
318: if (signe(n) <= 0) { avma=av; return gdeux; }
319: if (lgefint(n) <= 3)
320: { /* check if n <= 7 */
321: ulong k = n[2];
322: if (k <= 2) { avma=av; return gdeux; }
323: if (k == 3) { avma = av; return stoi(3); }
324: if (k <= 5) { avma = av; return stoi(5); }
325: if (k <= 7) { avma = av; return stoi(7); }
326: }
327: /* here n > 7 */
328: if (!(mod2(n))) n = addsi(1,n);
329: rc = rc0 = smodis(n, 210);
330: rcn = (long)(prc210_no[rc0>>1]);
331: /* find next prime residue class mod 210 */
332: while (rcn == NPRC)
333: {
334: rc += 2; /* cannot wrap since 209 is coprime */
335: rcn = (long)(prc210_no[rc>>1]);
336: }
337: if (rc > rc0) n = addsi(rc - rc0, n);
338: /* now find an actual prime */
339: av2 = av1 = avma;
340: for(;;)
341: {
342: if (miller(n,10)) break;
343: av1 = avma;
344: rcd = prc210_d1[rcn];
345: if (++rcn > 47) rcn = 0;
346: n = addsi(rcd,n);
347: }
348: if (av1!=av2) return gerepile(av,av1,n);
349: return (av1==av)? icopy(n): n;
350: }
351:
352: GEN
353: precprime(GEN n)
354: {
355: long rc,rc0,rcd,rcn,av1,av2, av = avma;
356:
357: if (typ(n) != t_INT) n=gfloor(n); /* accept arguments in R --GN */
358: if (typ(n) != t_INT) err(arither1);
359: if (signe(n)<=0) { avma=av; return gzero; }
360: if (lgefint(n) <= 3)
361: { /* check if n <= 10 */
362: ulong k = n[2];
363: if (k <= 1) { avma=av; return gzero; }
364: if (k == 2) { avma=av; return gdeux; }
365: if (k <= 4) { avma=av; return stoi(3); }
366: if (k <= 6) { avma=av; return stoi(5); }
367: if (k <= 10) { avma=av; return stoi(7); }
368: }
369: /* here n >= 11 */
370: if (!(mod2(n))) n = addsi(-1,n);
371: rc = rc0 = smodis(n, 210);
372: rcn = (long)(prc210_no[rc0>>1]);
373: /* find last prime residue class mod 210 */
374: while (rcn == NPRC)
375: {
376: rc -= 2; /* cannot wrap since 1 is coprime */
377: rcn = (long)(prc210_no[rc>>1]);
378: }
379: if (rc < rc0) n = addsi(rc - rc0, n);
380: /* now find an actual prime */
381: av2 = av1 = avma;
382: for(;;)
383: {
384: if (miller(n,10)) break;
385: av1 = avma;
386: if (rcn == 0)
387: { rcd = 2; rcn = 47; }
388: else
389: rcd = prc210_d1[--rcn];
390: n = addsi(-rcd,n);
391: }
392: if (av1!=av2) return gerepile(av,av1,n);
393: return (av1==av)? icopy(n): n;
394: }
395:
396: /* find next single-word prime strictly larger than p. If **d is non-NULL,
397: * this will be p + *(*d)++, using the diffptr table. Otherwise imitate
398: * nextprime(). Apart from *d, caller must supply a long variable to which
399: * rcn points, initialized either to NPRC or to the correct residue class
400: * number for the current p; we'll use this to track the current prime
401: * residue class mod 210 once we're out of range of the diffptr table, and
402: * we'll update it before that if it isn't NPRC. *q is incremented when-
403: * ever q!=NULL and we wrap from 209 mod 210 to 1 mod 210; this makes sense
404: * only when *rcn already held the correct value. Caller must also supply
405: * the second argument for miller(). --GN1998Aug22
406: */
407: ulong
408: snextpr(ulong p, byteptr *d, long *rcn, long *q, long k)
409: {
410: static ulong pp[] =
411: { evaltyp(t_INT)|m_evallg(3), evalsigne(1)|evallgefint(3), 0 };
412: static ulong *pp2 = pp + 2;
413: static GEN gp = (GEN)pp;
414: long d1 = **d, rcn0;
415:
416: if (d1)
417: {
418: if (*rcn != NPRC)
419: {
420: rcn0 = *rcn;
421: while (d1 > 0)
422: {
423: d1 -= prc210_d1[*rcn];
424: if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
425: }
426: if (d1 < 0)
427: {
428: fprintferr("snextpr: prime %lu wasn\'t %lu mod 210\n",
429: p, (ulong)prc210_rp[rcn0]);
430: err(bugparier, "[caller of] snextpr");
431: }
432: }
433: return p + *(*d)++;
434: }
435: /* we are beyond the diffptr table */
436: if (*rcn == NPRC) /* we need to initialize this now */
437: {
438: *rcn = prc210_no[(p % 210) >> 1];
439: if (*rcn == NPRC)
440: {
441: fprintferr("snextpr: %lu should have been prime but isn\'t\n", p);
442: err(bugparier, "[caller of] snextpr");
443: }
444: }
445: /* look for the next one */
446: *pp2 = p;
447: *pp2 += prc210_d1[*rcn];
448: if (++*rcn > 47) *rcn = 0;
449: while (!miller(gp, k))
450: {
451: *pp2 += prc210_d1[*rcn];
452: if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
453: if (*pp2 <= 11) /* wraparound mod 2^BITS_IN_LONG */
454: {
455: fprintferr("snextpr: integer wraparound after prime %lu\n", p);
456: err(bugparier, "[caller of] snextpr");
457: }
458: }
459: return *pp2;
460: }
461:
462:
463: /***********************************************************************/
464: /** **/
465: /** FACTORIZATION (ECM) **/
466: /** Integer factorization using the elliptic curves method (ECM). **/
467: /** ellfacteur() returns a non trivial factor of N, assuming N>0, **/
468: /** is composite, and has no prime divisor below 2^14 or so. **/
469: /** Extensively modified by GN Jul-Aug 1998, with much helpful **/
470: /** advice by Paul Zimmermann. Thanks also to Guillaume Hanrot **/
471: /** and Igor Schein for providing many CPU cycles whilst testing. **/
472: /** **/
473: /***********************************************************************/
474:
475: static GEN N, gl, *XAUX;
476: #define nbcmax 64 /* max number of simultaneous curves */
477: #define bstpmax 1024 /* max number of baby step table entries */
478:
479: /* addition/doubling/multiplication of a point on an `elliptic curve'
480: * mod N may result in one of three things: a new bona fide point,
481: * a point at infinity (betraying itself by a denominator divisible
482: * by N), or a point which is at infinity mod some nontrivial factor
483: * of N but finite mod some other factor (betraying itself by a denom-
484: * inator which has nontrivial gcd with N, and this is of course what
485: * we want).
486: */
487: /* (In the second case, addition/doubling will simply abort, copying one
488: * of the summands to the destination array of points unless they coincide.
489: * Multiplication will stop at some unpredictable intermediate stage: The
490: * destination will contain _some_ multiple of the input point, but not
491: * necessarily the desired one, which doesn't matter. As long as we're
492: * multiplying (B1 phase) we simply carry on with the next multiplier.
493: * During the B2 phase, the only additions are the giant steps, and the
494: * worst that can happen here is that we lose one residue class mod 210
495: * of prime multipliers on 4 of the curves, so again, we ignore the problem
496: * and just carry on.) */
497: /* The idea is: Select a handful of curves mod N and one point P on each of
498: * them. Try to compute, for each such point, the multiple [M]P = Q where
499: * M is the product of all powers <= B2 of primes <= nextprime(B1), for some
500: * suitably chosen B1 and B2. Then check whether multiplying Q by one of the
501: * primes < nextprime(B2) would betray a factor. This second stage proceeds
502: * by looking separately at the primes in each residue class mod 210, four
503: * curves at a time, and stepping additively to ever larger multipliers,
504: * by comparing X coordinates of points which we would need to add in order
505: * to reach another prime multiplier in the same residue class. `Comparing'
506: * means that we accumulate a product of differences of X coordinates, and
507: * from time to time take a gcd of this product with N.
508: */
509: /* Montgomery's trick of hiding the cost of computing inverses mod N at a
510: * price of three extra multiplications mod N, by working on up to 64 or
511: * even 128 points in parallel, is used heavily. --GN
512: */
513:
514: /* *** auxiliary functions for ellfacteur: *** */
515:
516: /* Parallel addition on nbc curves, assigning the result to locations at and
517: * following *X3, *Y3. Safe to be called with X3,Y3 equal to X2,Y2 (_not_
518: * to X1,Y1). It is also safe to overwrite Y2 with X3. (If Y coords of
519: * result not desired, set Y3=NULL.) If nbc1 < nbc, the first summand is
520: * assumed to hold only nbc1 distinct points, which are repeated as often
521: * as we need them (useful for adding one point on each of a few curves
522: * to several other points on the same curves).
523: * Return 0 when successful, 1 when we hit a denominator divisible by N,
524: * and 2 when gcd(denominator, N) is a nontrivial factor of N, which will
525: * be preserved in gl.
526: * We use more stack space than the old code did, and thus run a bit of a
527: * risk of overflowing it, but it's still bounded by a constant multiple
528: * of lgefint(N)*nbc, as it was in the old version --GN1998Jul02,Aug12
529: */
530: /* (Lessee: Second phase creates 12 items on the stack, per iteration,
531: * of which four are twice as long and one is thrice as long as N --
532: * makes 18 units per iteration. First phase creates 4 units. Total
533: * can be as large as about 4*nbcmax+18*8 units. And elladd2() is just
534: * as bad, and elldouble() comes to about 3*nbcmax+29*8 units. A few
535: * strategic garbage collections every 8 iterations should help when nbc
536: * is large...) --GN1998Aug23
537: */
538:
539: static int
540: elladd0(long nbc, long nbc1,
541: GEN *X1, GEN *Y1, GEN *X2, GEN *Y2, GEN *X3, GEN *Y3)
542: {
543: GEN lambda;
544: GEN W[2*nbcmax], *A=W+nbc; /* W[0],A[0] never used */
545: long i, av=avma, tetpil;
546: ulong mask = ~0UL;
547:
548: /* actually, this is only ever called with nbc1==nbc or nbc1==4, so: */
549: if (nbc1 == 4) mask = 3;
550: else if (nbc1 < nbc) err(bugparier, "[caller of] elladd0");
551:
552: /* W[0] = gun; */
553: W[1] = /* A[0] =*/ subii(X1[0], X2[0]);
554: for (i=1; i<nbc; i++)
555: {
556: A[i] = subii(X1[i&mask], X2[i]); /* don't waste time reducing mod N here */
557: W[i+1] = modii(mulii(A[i], W[i]), N);
558: }
559: tetpil = avma;
560:
561: /* if gl != N we have a factor */
562: if (!invmod(W[nbc], N, &gl))
563: {
564: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
565: if (X2 != X3)
566: {
567: long k;
568: /* cannot add on one of the curves mod N: make sure X3 contains
569: * something useful before letting the caller proceed
570: */
571: for (k = 2*nbc; k--; ) affii(X2[k],X3[k]);
572: }
573: avma = av; return 1;
574: }
575:
576: while (i--) /* nbc times, actually */
577: {
578: lambda = modii(mulii(subii(Y1[i&mask], Y2[i]),
579: i?mulii(gl, W[i]):gl), N);
580: modiiz(subii(sqri(lambda), addii(X2[i], X1[i&mask])), N, X3[i]);
581: if (Y3)
582: modiiz(subii(mulii(lambda, subii(X1[i&mask], X3[i])),
583: Y1[i&mask]),
584: N, Y3[i]);
585: if (!i) break;
586: gl = modii(mulii(gl, A[i]), N);
587: if (!(i&7)) gl = gerepileupto(tetpil, gl);
588: }
589: avma=av; return 0;
590: }
591:
592: /* Shortcut variant, for use in cases where Y coordinates follow their
593: * corresponding X coordinates, and the first summand doesn't need to be
594: * repeated
595: */
596: static int
597: elladd(long nbc, GEN *X1, GEN *X2, GEN *X3)
598: {
599: return elladd0(nbc, nbc, X1, X1+nbc, X2, X2+nbc, X3, X3+nbc);
600: }
601: /* this could perhaps become a macro --GN */
602:
603: /* The next one is exactly the same except it does twice as many additions
604: * (and thus hides even more of the cost of the modular inverse); the net
605: * effect is the same as elladd(nbc,X1,X2,X3) followed by elladd(nbc,X4,X5,X6).
606: * Safe to have X2==X3 and/or X5==X6, and of course safe to have X1 or X2
607: * coincide with X4 or X5, in any order.
608: */
609:
610: static int
611: elladd2(long nbc, GEN *X1, GEN *X2, GEN *X3, GEN *X4, GEN *X5, GEN *X6)
612: {
613: GEN lambda, *Y1 = X1+nbc, *Y2 = X2+nbc, *Y3 = X3+nbc;
614: GEN *Y4 = X4+nbc, *Y5 = X5+nbc, *Y6 = X6+nbc;
615: GEN W[4*nbcmax], *A=W+2*nbc; /* W[0],A[0] never used */
616: long i,j, av=avma, tetpil;
617: /* W[0] = gun; */
618: W[1] = /* A[0] =*/ subii(X1[0], X2[0]);
619: for (i=1; i<nbc; i++)
620: {
621: A[i] = subii(X1[i], X2[i]); /* don't waste time reducing mod N here */
622: W[i+1] = modii(mulii(A[i], W[i]), N);
623: }
624: for (j=0; j<nbc; i++,j++)
625: {
626: A[i] = subii(X4[j], X5[j]);
627: W[i+1] = modii(mulii(A[i], W[i]), N);
628: }
629: tetpil = avma;
630:
631: /* if gl != N we have a factor */
632: if (!invmod(W[2*nbc], N, &gl))
633: {
634: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
635: if (X2 != X3)
636: {
637: long k;
638: /* cannot add on one of the curves mod N: make sure X3 contains
639: * something useful before letting the caller proceed
640: */
641: for (k = 2*nbc; k--; ) affii(X2[k],X3[k]);
642: }
643: if (X5 != X6)
644: {
645: long k;
646: /* same for X6 */
647: for (k = 2*nbc; k--; ) affii(X5[k],X6[k]);
648: }
649: avma = av; return 1;
650: }
651:
652: while (j--) /* nbc times, actually */
653: {
654: i--;
655: lambda = modii(mulii(subii(Y4[j], Y5[j]),
656: mulii(gl, W[i])), N);
657: modiiz(subii(sqri(lambda), addii(X5[j], X4[j])), N, X6[j]);
658: modiiz(subii(mulii(lambda, subii(X4[j], X6[j])), Y4[j]), N, Y6[j]);
659: gl = modii(mulii(gl, A[i]), N);
660: if (!(i&7)) gl = gerepileupto(tetpil, gl);
661: }
662: while (i--) /* nbc times */
663: {
664: lambda = modii(mulii(subii(Y1[i], Y2[i]),
665: i?mulii(gl, W[i]):gl), N);
666: modiiz(subii(sqri(lambda), addii(X2[i], X1[i])), N, X3[i]);
667: modiiz(subii(mulii(lambda, subii(X1[i], X3[i])), Y1[i]), N, Y3[i]);
668: if (!i) break;
669: gl = modii(mulii(gl, A[i]), N);
670: if (!(i&7)) gl = gerepileupto(tetpil, gl);
671: }
672: avma=av; return 0;
673: }
674:
675: /* Parallel doubling on nbc curves, assigning the result to locations at
676: * and following *X2. Safe to be called with X2 equal to X1. Return
677: * value as for elladd() above. If we find a point at infinity mod N,
678: * and if X1 != X2, we copy the points at X1 to X2.
679: * Use fewer assignments than the old code. Strangely, whereas this gains
680: * about 3% on my P133 with elladd(), it makes hardly any difference here
681: * with elldouble() --GN
682: */
683: static int
684: elldouble(long nbc, GEN *X1, GEN *X2)
685: {
686: GEN lambda,v, *Y1 = X1+nbc, *Y2 = X2+nbc;
687: GEN W[nbcmax+1]; /* W[0] never used */
688: long i, av=avma, tetpil;
689: /*W[0] = gun;*/ W[1] = Y1[0];
690: for (i=1; i<nbc; i++)
691: W[i+1] = modii(mulii(Y1[i], W[i]), N);
692: tetpil = avma;
693:
694: if (!invmod(W[nbc], N, &gl))
695: {
696: if (!egalii(N,gl)) { gl = gerepile(av,tetpil,gl); return 2; }
697: if (X1 != X2)
698: {
699: long k;
700: /* cannot double on one of the curves mod N: make sure X2 contains
701: * something useful before letting the caller proceed
702: */
703: for (k = 2*nbc; k--; ) affii(X1[k],X2[k]);
704: }
705: avma = av; return 1;
706: }
707:
708: while (i--) /* nbc times, actually */
709: {
710: lambda = modii(mulii(addsi(1, mulsi(3, sqri(X1[i]))),
711: i?mulii(gl,W[i]):gl), N);
712: if (signe(lambda)) /* half of zero is still zero */
713: lambda = shifti(mod2(lambda)? addii(lambda, N): lambda, -1);
714: v = modii(subii(sqri(lambda), shifti(X1[i],1)), N);
715: if (i) gl = modii(mulii(gl, Y1[i]), N);
716: modiiz(subii(mulii(lambda, subii(X1[i], v)), Y1[i]), N, Y2[i]);
717: affii(v, X2[i]);
718: if (!(i&7) && i) gl = gerepileupto(tetpil, gl);
719: }
720: avma = av; return 0;
721: }
722:
723: /* Parallel multiplication by an odd prime k on nbc curves, storing the
724: * result to locations at and following *X2. Safe to be called with X2
725: * equal to X1. Return values as for elladd() and elldouble().
726: * Uses (a simplified variant of) Peter Montgomery's PRAC (PRactical Addition
727: * Chain) algorithm; see ftp://ftp.cwi.nl/pub/pmontgom/Lucas.ps.gz .
728: * With thanks to Paul Zimmermann for the reference. --GN1998Aug13
729: */
730:
731: /* We use an array of GENs pointed at by XAUX as a scratchpad; this will
732: * have been set up by ellfacteur() (so we don't need to reinitialize it
733: * each time).
734: */
735:
736: static int
737: ellmult(long nbc, ulong k, GEN *X1, GEN *X2) /* k>2 prime, not checked */
738: {
739: long i,d,e,e1,r,av=avma,tetpil;
740: int res;
741: GEN *A=X2, *B=XAUX, *S, *T=XAUX+2*nbc;
742:
743: for (i = 2*nbc; i--; ) { affii(X1[i],XAUX[i]); }
744: tetpil = avma;
745:
746: /* first doubling picks up X1; after this we'll be working in XAUX and
747: * X2 only, mostly via A and B and T
748: */
749: if ((res = elldouble(nbc, X1, X2)) != 0)
750: {
751: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
752: return res;
753: }
754:
755: /* split the work at the golden ratio */
756: r = (long)(k*0.61803398875 + .5);
757: d = k - r; e = r - d; /* NB d+e == r, so no danger of ofl below */
758:
759: while (d != e)
760: {
761:
762: /* apply one of the nine transformations from PM's Table 4. We first
763: * figure out which, and then go into an eight-way switch, because
764: * some of the transformations are similar enough to share code.
765: */
766: if (d <= e + (e>>2)) /* floor(1.25*e) */
767: {
768: if ((d+e)%3 == 0)
769: { i = 0; goto apply; } /* Table 4, rule 1 */
770: else if ((d-e)%6 == 0)
771: { i = 1; goto apply; } /* rule 2 */
772: /* else fall through */
773: }
774: if ((d+3)>>2 <= e) /* equiv to d <= 4*e but cannot ofl */
775: { i = 2; goto apply; } /* rule 3, the most common case */
776: if ((d&1)==(e&1))
777: { i = 1; goto apply; } /* rule 4, which does the same as rule 2 */
778: if (!(d&1))
779: { i = 3; goto apply; } /* rule 5 */
780: if (d%3 == 0)
781: { i = 4; goto apply; } /* rule 6 */
782: if ((d+e)%3 == 0)
783: { i = 5; goto apply; } /* rule 7 */
784: if ((d-e)%3 == 0)
785: { i = 6; goto apply; } /* rule 8 */
786: /* when we get here, e must be even, for otherwise one of rules 4,5
787: * would have applied
788: */
789: i = 7; /* rule 9 */
790:
791: apply:
792: switch(i) /* i takes values in {0,...,7} here */
793: {
794: case 0: /* rule 1 */
795: e1 = d - e; d = (d + e1)/3; e = (e - e1)/3;
796: if ((res = elladd(nbc, A, B, T)) != 0)
797: {
798: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
799: return res;
800: }
801: if ((res = elladd2(nbc, T, A, A, T, B, B)) != 0)
802: {
803: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
804: return res;
805: }
806: break; /* end of rule 1 */
807: case 1: /* rules 2 and 4, part 1 */
808: d -= e;
809: if ((res = elladd(nbc, A, B, B)) != 0)
810: {
811: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
812: return res;
813: }
814: /* FALL THROUGH */
815: case 3: /* rule 5, and 2nd part of rules 2 and 4 */
816: d >>= 1;
817: if ((res = elldouble(nbc, A, A)) != 0)
818: {
819: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
820: return res;
821: }
822: break; /* end of rules 2, 4, and 5 */
823: case 4: /* rule 6 */
824: d /= 3;
825: if ((res = elldouble(nbc, A, T)) != 0)
826: {
827: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
828: return res;
829: }
830: if ((res = elladd(nbc, T, A, A)) != 0)
831: {
832: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
833: return res;
834: }
835: /* FALL THROUGH */
836: case 2: /* rule 3, and 2nd part of rule 6 */
837: d -= e;
838: if ((res = elladd(nbc, A, B, B)) != 0)
839: {
840: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
841: return res;
842: }
843: break; /* end of rules 3 and 6 */
844: case 5: /* rule 7 */
845: d = (d - e - e)/3;
846: if ((res = elldouble(nbc, A, T)) != 0)
847: {
848: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
849: return res;
850: }
851: if ((res = elladd2(nbc, T, A, A, T, B, B)) != 0)
852: {
853: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
854: return res;
855: }
856: break; /* end of rule 7 */
857: case 6: /* rule 8 */
858: d = (d - e)/3;
859: if ((res = elladd(nbc, A, B, B)) != 0)
860: {
861: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
862: return res;
863: }
864: if ((res = elldouble(nbc, A, T)) != 0)
865: {
866: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
867: return res;
868: }
869: if ((res = elladd(nbc, T, A, A)) != 0)
870: {
871: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
872: return res;
873: }
874: break; /* end of rule 8 */
875: case 7: /* rule 9 */
876: e >>= 1;
877: if ((res = elldouble(nbc, B, B)) != 0)
878: {
879: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
880: return res;
881: }
882: break; /* end of rule 9 */
883: default: /* never reached */
884: break;
885: }
886: /* end of Table 4 processing */
887:
888: /* swap d <-> e and A <-> B if necessary */
889: if (d < e) { r = d; d = e; e = r; S = A; A = B; B = S; }
890: } /* while */
891: if ((res = elladd(nbc, XAUX, X2, X2)) != 0)
892: {
893: if (res > 1) { gl = gerepile(av,tetpil,gl); } else avma = av;
894: return res;
895: }
896: avma = av; return 0;
897: }
898:
899: /* PRAC implementation notes - main changes against the paper version:
900: * (1) The general function [m+n]P = f([m]P,[n]P,[m-n]P) collapses (for
901: * m!=n) to an elladd() which does not depend on the third argument; and
902: * thus all references to the third variable (C in the paper) can be elimi-
903: * nated. (2) Since our multipliers are prime, the outer loop of the paper
904: * version executes only once, and thus is invisible above. (3) The first
905: * step in the inner loop of the paper version will always be rule 3, but
906: * the addition requested by this rule amounts to a doubling, and it will
907: * always be followed by a swap, so we have unrolled this first iteration.
908: * (4) Some simplifications in rules 6 and 7 are possible given the above,
909: * and we can save one addition in each of the two cases. NB one can show
910: * that none of the other elladd()s in the loop can ever turn out to de-
911: * generate into an elldouble. (5) I tried to optimize for rule 3, which
912: * is used far more frequently than all others together, but it didn't
913: * improve things, so I removed the nested tight loop again. --GN
914: */
915:
916: /* The main loop body of ellfacteur() runs slightly _slower_ under PRAC than
917: * under a straightforward left-shift binary multiplication algorithm when
918: * N has <30 digits and B1 is small; PRAC wins when N and B1 get larger.
919: * Weird. --GN
920: */
921:
922: /* memory layout in ellfacteur(): We'll have a large-ish array of GEN
923: * pointers, and one huge chunk of memory containing all the actual GEN
924: * (t_INT) objects.
925: * nbc will be held constant throughout the invocation.
926: */
927: /* The B1 stage of each iteration through the main loop needs little
928: * space: enough for the X and Y coordinates of the current points,
929: * and twice as much again as scratchpad for ellmult().
930: */
931: /* The B2 stage, starting from some current set of points Q, needs, in
932: * succession:
933: * - space for [2]Q, [4]Q, ..., [10]Q, and [p]Q for building the helix;
934: * - space for 48*nbc X and Y coordinates to hold the helix. Now this
935: * could re-use [2]Q,...,[8]Q, but only with difficulty, since we don't
936: * know in advance which residue class mod 210 our p is going to be in.
937: * It can and should re-use [p]Q, though;
938: * - space for (temporarily [30]Q and then) [210]Q, [420]Q, and several
939: * further doublings until the giant step multiplier is reached. This
940: * _can_ re-use the remaining cells from above. The computation of [210]Q
941: * will have been the last call to ellmult() within this iteration of the
942: * main loop, so the scratchpad is now also free to be re-used. We also
943: * compute [630]Q by a parallel addition; we'll need it later to get the
944: * baby-step table bootstrapped a little faster.
945: */
946: /* Finally, for no more than 4 curves at a time, room for up to 1024 X
947: * coordinates only (the Y coordinates needed whilst setting up this baby
948: * step table are temporarily stored in the upper half, and overwritten
949: * during the last series of additions).
950: */
951: /* Graphically: after end of B1 stage (X,Y are the coords of Q):
952: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
953: * | X Y | scratch | [2]Q| [4]Q| [6]Q| [8]Q|[10]Q| ... | ...
954: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
955: * *X *XAUX *XT *XD *XB
956: *
957: * [30]Q is computed from [10]Q. [210]Q can go into XY, etc:
958: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
959: * |[210]|[420]|[630]|[840]|[1680,3360,6720,...,2048*210] |bstp table...
960: * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
961: * *X *XAUX *XT *XD [*XG, somewhere here] *XB .... *XH
962: *
963: * So we need (13 + 48) * 2 * nbc slots here, and another 4096 slots for
964: * the baby step table (not all of which will be used when we start with a
965: * small B1, but it's better to allocate and initialize ahead of time all
966: * the slots that might be needed later).
967: */
968: /* Note on memory locality: During the B2 phase, accesses to the helix
969: * (once it has been set up) will be clustered by curves (4 out of nbc at
970: * a time). Accesses to the baby steps table will wander from one end of
971: * the array to the other and back, one such cycle per giant step, and
972: * during a full cycle we would expect on the order of 2E4 accesses when
973: * using the largest giant step size. Thus we shouldn't be doing too bad
974: * with respect to thrashing a (512KBy) L2 cache. However, we don't want
975: * the baby step table to grow larger than this, even if it would reduce
976: * the number of E.C. operations by a few more per cent for very large B2,
977: * lest cache thrashing slow down everything disproportionally. --GN
978: */
979:
980: /* parameters for miller() via snextpr(), for use by ellfacteur() */
981: #define miller_k1 16 /* B1 phase, foolproof below 10^12 */
982: #define miller_k2 1 /* B2 phase, not foolproof, much faster */
983: /* (miller_k2 will let thousands of composites slip through, which doesn't
984: * harm ECM, but ellmult() during the B1 phase should only be fed primes
985: * which really are prime)
986: */
987: /* ellfacteur() has been re-tuned to be useful as a first stage before
988: * MPQS, especially for _large_ arguments, when insist is false, and now
989: * also for the case when insist is true, vaguely following suggestions
990: * by Paul Zimmermann (see http://www.loria.fr/~zimmerma/ and especially
991: * http://www.loria.fr/~zimmerma/records/ecmnet.html) of INRIA/LORIA.
992: * --GN 1998Jul,Aug
993: */
994: GEN
995: ellfacteur(GEN n, int insist)
996: {
997: static ulong TB1[] =
998: {
999: /* table revised, cf. below 1998Aug15 --GN */
1000: 142,172,208,252,305,370,450,545,661,801,972,1180,1430,
1001: 1735,2100,2550,3090,3745,4540,5505,6675,8090,9810,11900,
1002: 14420,17490,21200,25700,31160,37780UL,45810UL,55550UL,67350UL,
1003: 81660UL,99010UL,120050UL,145550UL,176475UL,213970UL,259430UL,
1004: 314550UL,381380UL,462415UL,560660UL,679780UL,824220UL,999340UL,
1005: 1211670UL,1469110UL,1781250UL,2159700UL,2618600UL,3175000UL,
1006: 3849600UL,4667500UL,5659200UL,6861600UL,8319500UL,10087100UL,
1007: 12230300UL,14828900UL,17979600UL,21799700UL,26431500UL,
1008: 32047300UL,38856400UL, /* 110 times that still fits into 32bits */
1009: #ifdef LONG_IS_64BIT
1010: 47112200UL,57122100UL,69258800UL,83974200UL,101816200UL,
1011: 123449000UL,149678200UL,181480300UL,220039400UL,266791100UL,
1012: 323476100UL,392204900UL,475536500UL,576573500UL,699077800UL,
1013: 847610500UL,1027701900UL,1246057200UL,1510806400UL,1831806700UL,
1014: 2221009800UL,2692906700UL,3265067200UL,3958794400UL,4799917500UL,
1015: /* the only reason to stop here is that I got bored (and that users
1016: * will get bored watching their 64bit machines churning on such large
1017: * numbers for month after month). Someone can extend this table when
1018: * the hardware has gotten 100 times faster than now --GN
1019: */
1020: #endif
1021: };
1022: static ulong TB1_for_stage[] =
1023: {
1024: /* table revised 1998Aug11 --GN. The idea is to start a little below
1025: * the optimal B1 for finding factors which would just have been missed
1026: * by pollardbrent(), and escalate gradually, changing curves suf-
1027: * ficiently frequently to give good coverage of the small factor
1028: * ranges. The table entries grow a bit faster than what Paul says
1029: * would be optimal, but having a single table instead of a 2D array
1030: * keeps the code simple
1031: */
1032: 500,520,560,620,700,800,900,1000,1150,1300,1450,1600,1800,2000,
1033: 2200,2450,2700,2950,3250,3600,4000,4400,4850,5300,5800,6400,
1034: 7100,7850,8700,9600,10600,11700,12900,14200,15700,17300,
1035: 19000,21000,23200,25500,28000,31000,34500UL,38500UL,43000UL,
1036: 48000UL,53800UL,60400UL,67750UL,76000UL,85300UL,95700UL,
1037: 107400UL,120500UL,135400UL,152000UL,170800UL,191800UL,215400UL,
1038: 241800UL,271400UL,304500UL,341500UL,383100UL,429700UL,481900UL,
1039: 540400UL,606000UL,679500UL,761800UL,854100UL,957500UL,1073500UL,
1040: };
1041: long nbc,nbc2,dsn,dsnmax,rep,spc,gse,gss,rcn,rcn0,bstp,bstp0;
1042: long a,i,j,k, av,av1,avtmp, size = expi(n) + 1, tf = lgefint(n);
1043: ulong B1,B2,B2_p,B2_rt,m,p,p0,p2,dp;
1044: GEN w,w0,x,*X,*XT,*XD,*XG,*YG,*XH,*XB,*XB2,*Xh,*Yh,*Xb, res = cgeti(tf);
1045: int rflag, use_clones = 0;
1046: byteptr d, d0;
1047:
1048: av = avma; /* taking res into account */
1049: N = n; /* make n known to auxiliary functions */
1050: /* determine where we'll start, how long we'll persist, and how many
1051: * curves we'll use in parallel
1052: */
1053: if (insist)
1054: {
1055: dsnmax = (size >> 2) - 10;
1056: if (dsnmax < 0) dsnmax = 0;
1057: #ifdef LONG_IS_64BIT
1058: else if (dsnmax > 90) dsnmax = 90;
1059: #else
1060: else if (dsnmax > 65) dsnmax = 65;
1061: #endif
1062: dsn = (size >> 3) - 5;
1063: if (dsn < 0) dsn = 0;
1064: else if (dsn > 47) dsn = 47;
1065: /* pick up the torch where non-insistent stage would have given up */
1066: nbc = dsn + (dsn >> 2) + 9; /* 8 or more curves in parallel */
1067: nbc &= ~3; /* nbc is always a multiple of 4 */
1068: if (nbc > nbcmax) nbc = nbcmax;
1069: a = 1 + (nbcmax<<7); /* seed for choice of curves */
1070: rep = 0; /* gcc -Wall */
1071: }
1072: else
1073: {
1074: dsn = (size - 140) >> 3;
1075: if (dsn > 12) dsn = 12;
1076: dsnmax = 72;
1077: if (dsn < 0) /* < 140 bits: decline the task */
1078: {
1079: #ifdef __EMX__
1080: /* MPQS's disk access under DOS/EMX would be abysmally slow, so... */
1081: dsn = 0;
1082: rep = 20;
1083: nbc = 8;
1084: #else
1085: if (DEBUGLEVEL >= 4)
1086: {
1087: fprintferr("ECM: number too small to justify this stage\n");
1088: flusherr();
1089: }
1090: avma = av; return NULL;
1091: #endif
1092: }
1093: else
1094: {
1095: rep = (size <= 248 ?
1096: (size <= 176 ? (size - 124) >> 4 : (size - 148) >> 3) :
1097: (size - 224) >> 1);
1098: nbc = ((size >> 3) << 2) - 80;
1099: if (nbc < 8) nbc = 8;
1100: else if (nbc > nbcmax) nbc = nbcmax;
1101: #ifdef __EMX__
1102: rep += 20;
1103: #endif
1104: }
1105:
1106: /* it may be convenient to use disjoint sets of curves for the non-insist
1107: * and insist phases; moreover, repeated non-insistent calls acting on
1108: * factors of the same original number should try to use fresh curves.
1109: * The following achieves this
1110: */
1111: a = 1 + (nbcmax<<3)*(size & 0xf);
1112: }
1113: if (dsn > dsnmax) dsn = dsnmax;
1114:
1115: if (DEBUGLEVEL >= 4)
1116: {
1117: (void) timer2(); /* clear timer */
1118: fprintferr("ECM: working on %ld curves at a time; initializing", nbc);
1119: if (!insist)
1120: {
1121: if (rep == 1)
1122: fprintferr(" for one round");
1123: else
1124: fprintferr(" for up to %ld rounds", rep);
1125: }
1126: fprintferr("...\n");
1127: }
1128:
1129: /* The auxiliary routines above need < (3*nbc+240)*tf words on the PARI
1130: * stack, in addition to the spc*(tf+1) words occupied by our main table.
1131: * If stack space is already tight, try the heap, using newbloc() and
1132: * killbloc()
1133: */
1134: nbc2 = nbc << 1;
1135: spc = (13 + 48) * nbc2 + bstpmax * 4;
1136: if ((long)((GEN)avma - (GEN)bot) < spc + 385 + (spc + 3*nbc + 240)*tf)
1137: {
1138: if (DEBUGLEVEL >= 5)
1139: {
1140: fprintferr("ECM: stack tight, using clone space on the heap\n");
1141: }
1142: use_clones = 1;
1143: x = newbloc(spc + 385);
1144: }
1145: else
1146: x = new_chunk(spc + 385);
1147: X = 1 + (GEN*)x; /* B1 phase: current point */
1148: XAUX = X + nbc2; /* scratchpad for ellmult() */
1149: XT = XAUX + nbc2; /* ditto, will later hold [3*210]Q */
1150: XD = XT + nbc2; /* room for various multiples */
1151: XB = XD + 20*nbc; /* start of baby steps table */
1152: XB2 = XB + 2 * bstpmax; /* middle of baby steps table */
1153: XH = XB2 + 2 * bstpmax; /* end of bstps table, start of helix */
1154: Xh = XH + 96*nbc; /* little helix, X coords */
1155: Yh = XH + 192; /* ditto, Y coords */
1156: /* XG will be set later, inside the main loop, since it depends on B2 */
1157:
1158: {
1159: long tw = evallg(tf) | evaltyp(t_INT);
1160:
1161: if (use_clones)
1162: w = newbloc(spc*tf);
1163: else
1164: w = new_chunk(spc*tf);
1165: w0 = w; /* remember this for later... */
1166: for (i = spc; i--; )
1167: {
1168: *w = tw; X[i] = w; w += tf; /* hack for: w = cgeti(tf) */
1169: }
1170: /* Xh range of 384 pointers not set; these will later duplicate the
1171: * pointers in the XH range, 4 curves at a time. Some of the cells
1172: * reserved here for the XB range will never be used, instead, we'll
1173: * warp the pointers to connect to (read-only) GENs in the X/XD range;
1174: * it would be complicated to skip them here to conserve merely a few
1175: * KBy of stack or heap space. --GN
1176: */
1177: }
1178:
1179: /* *** ECM MAIN LOOP *** */
1180: for(;;)
1181: {
1182: d = diffptr; rcn = NPRC; /* multipliers begin at the beginning */
1183:
1184: /* pick curves */
1185: for (i = nbc2; i--; ) affsi(a++, X[i]);
1186: /* pick bounds */
1187: B1 = insist ? TB1[dsn] : TB1_for_stage[dsn];
1188: B2 = 110*B1;
1189: B2_rt = (ulong)(sqrt((double)B2));
1190: /* pick giant step exponent and size.
1191: * With 32 baby steps, a giant step corresponds to 32*420 = 13440, appro-
1192: * priate for the smallest B2s. With 1024, a giant step will be 430080;
1193: * this will be appropriate for B1 >~ 42000, where 512 baby steps would
1194: * imply roughly the same number of E.C. additions.
1195: */
1196: gse = (B1 < 656 ?
1197: (B1 < 200 ? 5 : 6) :
1198: (B1 < 10500 ?
1199: (B1 < 2625 ? 7 : 8) :
1200: (B1 < 42000 ? 9 : 10)
1201: )
1202: );
1203: gss = 1UL << gse;
1204: XG = XT + gse*nbc2; /* will later hold [2^(gse+1)*210]Q */
1205: YG = XG + nbc;
1206:
1207: if (DEBUGLEVEL >= 4)
1208: {
1209: fprintferr("ECM: time = %6ld ms\nECM: dsn = %2ld,\tB1 = %4lu,",
1210: timer2(), dsn, B1);
1211: fprintferr("\tB2 = %6lu,\tgss = %4ld*420\n", B2, gss);
1212: flusherr();
1213: }
1214: p = *d++;
1215:
1216: /* ---B1 PHASE--- */
1217: /* treat p=2 separately */
1218: B2_p = B2 >> 1;
1219: for (m=1; m<=B2_p; m<<=1)
1220: {
1221: if ((rflag = elldouble(nbc, X, X)) > 1) goto fin;
1222: else if (rflag) break;
1223: }
1224:
1225: /* p=3,...,nextprime(B1) */
1226: while (p < B1 && p <= B2_rt)
1227: {
1228: p = snextpr(p, &d, &rcn, NULL, miller_k1);
1229: B2_p = B2/p; /* beware integer overflow on 32-bit CPUs */
1230: for (m=1; m<=B2_p; m*=p)
1231: {
1232: if ((rflag = ellmult(nbc, p, X, X)) > 1) goto fin;
1233: else if (rflag) break;
1234: }
1235: }
1236: /* primes p larger than sqrt(B2) can appear only to the 1st power */
1237: while (p < B1)
1238: {
1239: p = snextpr(p, &d, &rcn, NULL, miller_k1);
1240: if (ellmult(nbc, p, X, X) > 1) goto fin; /* p^2 > B2: no loop */
1241: }
1242:
1243: if (DEBUGLEVEL >= 4)
1244: {
1245: fprintferr("ECM: time = %6ld ms, B1 phase done, ", timer2());
1246: fprintferr("p = %lu, setting up for B2\n", p);
1247: }
1248:
1249: /* ---B2 PHASE--- */
1250: /* compute [2]Q,...,[10]Q, which we need to build the helix */
1251: if (elldouble(nbc, X, XD) > 1)
1252: goto fin; /* [2]Q */
1253: if (elldouble(nbc, XD, XD + nbc2) > 1)
1254: goto fin; /* [4]Q */
1255: if (elladd(nbc, XD, XD + nbc2, XD + (nbc<<2)) > 1)
1256: goto fin; /* [6]Q */
1257: if (elladd2(nbc,
1258: XD, XD + (nbc<<2), XT + (nbc<<3),
1259: XD + nbc2, XD + (nbc<<2), XD + (nbc<<3)) > 1)
1260: goto fin; /* [8]Q and [10]Q */
1261: if (DEBUGLEVEL >= 7)
1262: fprintferr("\t(got [2]Q...[10]Q)\n");
1263:
1264: /* get next prime (still using the foolproof test) */
1265: p = snextpr(p, &d, &rcn, NULL, miller_k1);
1266: /* make sure we have the residue class number (mod 210) */
1267: if (rcn == NPRC)
1268: {
1269: rcn = prc210_no[(p % 210) >> 1];
1270: if (rcn == NPRC)
1271: {
1272: fprintferr("ECM: %lu should have been prime but isn\'t\n", p);
1273: err(bugparier, "ellfacteur");
1274: }
1275: }
1276:
1277: /* compute [p]Q and put it into its place in the helix */
1278: if (ellmult(nbc, p, X, XH + rcn*nbc2) > 1) goto fin;
1279: if (DEBUGLEVEL >= 7)
1280: fprintferr("\t(got [p]Q, p = %lu = %lu mod 210)\n",
1281: p, (ulong)(prc210_rp[rcn]));
1282:
1283: /* save current p, d, and rcn; we'll need them more than once below */
1284: p0 = p;
1285: d0 = d;
1286: rcn0 = rcn; /* remember where the helix wraps */
1287: bstp0 = 0; /* p is at baby-step offset 0 from itself */
1288:
1289: /* fill up the helix, stepping forward through the prime residue classes
1290: * mod 210 until we're back at the r'class of p0. Keep updating p so
1291: * that we can print meaningful diagnostics if a factor shows up; but
1292: * don't bother checking which of these p's are in fact prime
1293: */
1294: for (i = 47; i; i--) /* 47 iterations */
1295: {
1296: p += (dp = (ulong)prc210_d1[rcn]);
1297: if (rcn == 47)
1298: { /* wrap mod 210 */
1299: if (elladd(nbc, XT + dp*nbc, XH + rcn*nbc2, XH) > 1)
1300: goto fin;
1301: rcn = 0;
1302: continue;
1303: }
1304: if (elladd(nbc, XT + dp*nbc, XH + rcn*nbc2, XH + rcn*nbc2 + nbc2) > 1)
1305: goto fin;
1306: rcn++;
1307: }
1308: if (DEBUGLEVEL >= 7)
1309: fprintferr("\t(got initial helix)\n");
1310:
1311: /* compute [210]Q etc, which will be needed for the baby step table */
1312: if (ellmult(nbc, 3, XD + (nbc<<3), X) > 1) goto fin;
1313: if (ellmult(nbc, 7, X, X) > 1) goto fin; /* [210]Q */
1314: /* this was the last call to ellmult() in the main loop body; may now
1315: * overwrite XAUX and slots XD and following
1316: */
1317: if (elldouble(nbc, X, XAUX) > 1) goto fin; /* [420]Q */
1318: if (elladd(nbc, X, XAUX, XT) > 1) goto fin; /* [630]Q */
1319: if (elladd(nbc, X, XT, XD) > 1) goto fin; /* [840]Q */
1320: for (i=1; i <= gse; i++) /* gse successive doublings */
1321: {
1322: if (elldouble(nbc, XT + i*nbc2, XD + i*nbc2) > 1) goto fin;
1323: }
1324: /* (the last iteration has initialized XG to [210*2^(gse+1)]Q) */
1325:
1326: if (DEBUGLEVEL >= 4)
1327: {
1328: fprintferr("ECM: time = %6ld ms, entering B2 phase, p = %lu\n",
1329: timer2(), p);
1330: }
1331:
1332: /* inner loop over small sets of 4 curves at a time */
1333: for (i = nbc - 4; i >= 0; i -= 4)
1334: {
1335: if (DEBUGLEVEL >= 6)
1336: fprintferr("ECM: finishing curves %ld...%ld\n", i, i+3);
1337: /* copy relevant pointers from XH to Xh. Recall memory layout in XH
1338: * is: nbc X coordinates followed by nbc Y coordinates for residue
1339: * class 1 mod 210, then the same for r.c. 11 mod 210, etc. Memory
1340: * layout for Xh is: four X coords for 1 mod 210, four for 11 mod 210,
1341: * etc, four for 209 mod 210, and then the corresponding Y coordinates
1342: * in the same order. This will allow us to do a giant step on Xh
1343: * using just three calls to elladd0() each acting on 64 points in
1344: * parallel
1345: */
1346: for (j = 48; j--; )
1347: {
1348: k = nbc2*j + i;
1349: m = j << 2; /* X coordinates */
1350: Xh[m] = XH[k]; Xh[m+1] = XH[k+1];
1351: Xh[m+2] = XH[k+2]; Xh[m+3] = XH[k+3];
1352: k += nbc; /* Y coordinates */
1353: Yh[m] = XH[k]; Yh[m+1] = XH[k+1];
1354: Yh[m+2] = XH[k+2]; Yh[m+3] = XH[k+3];
1355: }
1356: /* build baby step table of X coords of multiples of [210]Q. XB[4*j]
1357: * will point at X coords on four curves from [(j+1)*210]Q. Until
1358: * we're done, we need some Y coords as well, which we keep in the
1359: * second half of the table, overwriting them at the end when gse==10.
1360: * Those multiples which we already have (by 1,2,3,4,8,16,...,2^gse)
1361: * are entered simply by copying the pointers, ignoring the small
1362: * number of slots in w that were initially reserved for them.
1363: * Here are the initial entries...
1364: */
1365: for (Xb=XB,k=2,j=i; k--; Xb=XB2,j+=nbc) /* do first X, then Y coords */
1366: {
1367: Xb[0] = X[j]; Xb[1] = X[j+1]; /* [210]Q */
1368: Xb[2] = X[j+2]; Xb[3] = X[j+3];
1369: Xb[4] = XAUX[j]; Xb[5] = XAUX[j+1]; /* [420]Q */
1370: Xb[6] = XAUX[j+2]; Xb[7] = XAUX[j+3];
1371: Xb[8] = XT[j]; Xb[9] = XT[j+1]; /* [630]Q */
1372: Xb[10] = XT[j+2]; Xb[11] = XT[j+3];
1373: Xb += 4; /* this points at [420]Q */
1374: /* ... entries at powers of 2 times 210 .... */
1375: for (m = 2; m < (ulong)gse+k; m++) /* omit Y coords of [2^gse*210]Q */
1376: {
1377: long m2 = m*nbc2 + j;
1378: Xb += (2UL<<m); /* points now at [2^m*210]Q */
1379: Xb[0] = XAUX[m2]; Xb[1] = XAUX[m2+1];
1380: Xb[2] = XAUX[m2+2]; Xb[3] = XAUX[m2+3];
1381: }
1382: }
1383: if (DEBUGLEVEL >= 7)
1384: fprintferr("\t(extracted precomputed helix / baby step entries)\n");
1385: /* ... glue in between, up to 16*210 ... */
1386: if (elladd0(12, 4, /* 12 pts + (4 pts replicated thrice) */
1387: XB + 12, XB2 + 12,
1388: XB, XB2,
1389: XB + 16, XB2 + 16)
1390: > 1) goto fin; /* 4 + {1,2,3} = {5,6,7} */
1391: if (elladd0(28, 4, /* 28 pts + (4 pts replicated 7fold) */
1392: XB + 28, XB2 + 28,
1393: XB, XB2,
1394: XB + 32, XB2 + 32)
1395: > 1) goto fin; /* 8 + {1,...,7} = {9,...,15} */
1396: /* ... and the remainder of the lot */
1397: for (m = 5; m <= (ulong)gse; m++)
1398: {
1399: /* fill in from 2^(m-1)+1 to 2^m-1 in chunks of 64 and 60 points */
1400: ulong m2 = 2UL << m; /* will point at 2^(m-1)+1 */
1401: for (j = 0; (ulong)j < m2-64; j+=64) /* executed 0 times when m == 5 */
1402: {
1403: if (elladd0(64, 4,
1404: XB + m2 - 4, XB2 + m2 - 4,
1405: XB + j, XB2 + j,
1406: XB + m2 + j,
1407: (m<(ulong)gse ? XB2 + m2 + j : NULL))
1408: > 1) goto fin;
1409: } /* j == m2-64 here, 60 points left */
1410: if (elladd0(60, 4,
1411: XB + m2 - 4, XB2 + m2 - 4,
1412: XB + j, XB2 + j,
1413: XB + m2 + j,
1414: (m<(ulong)gse ? XB2 + m2 + j : NULL))
1415: > 1) goto fin;
1416: /* (when m==gse, drop Y coords of result, and when both equal 1024,
1417: * overwrite Y coords of second argument with X coords of result)
1418: */
1419: }
1420: if (DEBUGLEVEL >= 7)
1421: fprintferr("\t(baby step table complete)\n");
1422: /* initialize a few other things */
1423: bstp = bstp0;
1424: p = p0; d = d0; rcn = rcn0;
1425: gl = gun;
1426: av1 = avma;
1427: /* scratchspace for prod (x_i-x_j) */
1428: avtmp = (long)new_chunk(8 * lgefint(n));
1429: /* the correct entry in XB to use depends on bstp and on where we are
1430: * on the helix. As we skip from prime to prime, bstp will be incre-
1431: * mented by snextpr() each time we wrap around through residue class
1432: * number 0 (1 mod 210), but the baby step should not be taken until
1433: * rcn>=rcn0 (i.e. until we pass again the residue class of p0).
1434: * The correct signed multiplier is thus k = bstp - (rcn < rcn0),
1435: * and the offset from XB is four times (|k| - 1). When k==0, we may
1436: * ignore the current prime (if it had led to a factorization, this
1437: * would have been noted during the last giant step, or -- when we
1438: * first get here -- whilst initializing the helix). When k > gss,
1439: * we must do a giant step and bump bstp back by -2*gss.
1440: * The gcd of the product of X coord differences against N is taken just
1441: * before we do a giant step.
1442: */
1443: /* loop over probable primes p0 < p <= nextprime(B2),
1444: * inserting giant steps as necessary
1445: */
1446: while (p < B2)
1447: {
1448: /* save current p for diagnostics */
1449: p2 = p;
1450: /* get next probable prime */
1451: p = snextpr(p, &d, &rcn, &bstp, miller_k2);
1452: /* work out the corresponding baby-step multiplier */
1453: k = bstp - (rcn < rcn0 ? 1 : 0);
1454: /* check whether it's giant-step time */
1455: if (k > gss)
1456: {
1457: /* take gcd */
1458: gl = mppgcd(gl, n);
1459: if (!is_pm1(gl) && !egalii(gl, n)) { p = p2; goto fin; }
1460: gl = gun;
1461: avma = av1;
1462: while (k > gss) /* hm, just how large are those prime gaps? */
1463: {
1464: /* giant step */
1465: if (DEBUGLEVEL >= 7)
1466: fprintferr("\t(giant step at p = %lu)\n", p);
1467: if (elladd0(64, 4,
1468: XG + i, YG + i,
1469: Xh, Yh, Xh, Yh) > 1) goto fin;
1470: if (elladd0(64, 4,
1471: XG + i, YG + i,
1472: Xh + 64, Yh + 64, Xh + 64, Yh + 64) > 1) goto fin;
1473: if (elladd0(64, 4,
1474: XG + i, YG + i,
1475: Xh + 128, Yh + 128, Xh + 128, Yh + 128)
1476: > 1) goto fin;
1477: bstp -= (gss << 1);
1478: /* recompute multiplier */
1479: k = bstp - (rcn < rcn0 ? 1 : 0);
1480: }
1481: }
1482: if (!k) continue; /* point of interest is already in Xh */
1483: if (k < 0) k = -k;
1484: m = ((ulong)k - 1) << 2;
1485: /* accumulate product of differences of X coordinates */
1486: j = rcn<<2;
1487: avma = avtmp; /* go to garbage zone */
1488: gl = modii(mulii(gl, subii(XB[m], Xh[j])), n);
1489: gl = modii(mulii(gl, subii(XB[m+1], Xh[j+1])), n);
1490: gl = modii(mulii(gl, subii(XB[m+2], Xh[j+2])), n);
1491: gl = mulii(gl, subii(XB[m+3], Xh[j+3]));
1492: avma = av1;
1493: gl = modii(gl, n);
1494: } /* loop over p */
1495: avma = av1;
1496: } /* for i (loop over sets of 4 curves) */
1497:
1498: /* continuation part of main loop */
1499:
1500: if (dsn < dsnmax)
1501: {
1502: dsn += insist ? 1 : 2;
1503: if (dsn > dsnmax) dsn = dsnmax;
1504: }
1505:
1506: if (!insist && !--rep)
1507: {
1508: if (DEBUGLEVEL >= 4)
1509: {
1510: fprintferr("ECM: time = %6ld ms,\tellfacteur giving up.\n",
1511: timer2());
1512: flusherr();
1513: }
1514: avma = av;
1515: if (use_clones) { gunclone(w0); gunclone(x); }
1516: return NULL;
1517: }
1518: }
1519: /* *** END OF ECM MAIN LOOP *** */
1520: fin:
1521: affii(gl, res);
1522:
1523: if (DEBUGLEVEL >= 4)
1524: {
1525: fprintferr("ECM: time = %6ld ms,\tp <= %6lu,\n\tfound factor = %Z\n",
1526: timer2(), p, res);
1527: flusherr();
1528: }
1529: avma=av;
1530: if (use_clones) { gunclone(w0); gunclone(x); }
1531: return res;
1532: }
1533:
1534: /***********************************************************************/
1535: /** **/
1536: /** FACTORIZATION (Pollard-Brent rho) **/
1537: /** pollardbrent() returns a nontrivial factor of n, assuming n is **/
1538: /** composite and has no small prime divisor, or NULL if going on **/
1539: /** would take more time than we want to spend. Sometimes it will **/
1540: /** find more than one factor, and return a structure suitable for **/
1541: /** interpretation by ifac_crack() below. GN1998Jun18-26 **/
1542: /** (Cf. Algorithm 8.5.2 in ACiCNT) **/
1543: /** **/
1544: /***********************************************************************/
1545:
1546: static void
1547: rho_dbg(long c, long msg_mask)
1548: {
1549: if (c & msg_mask) return;
1550: fprintferr("Rho: time = %6ld ms,\t%3ld round%s\n",
1551: timer2(), c, (c==1?"":"s"));
1552: flusherr();
1553: }
1554:
1555: /* Tuning parameter: for input up to 64 bits long, we must not spend more
1556: * than a very short time, for fear of slowing things down on average.
1557: * With the current tuning formula, increase our efforts somewhat at 49 bit
1558: * input (an extra round for each bit at first), and go up more and more
1559: * rapidly after we pass 80 bits.-- Changed this (again...) to adjust for
1560: * the presence of squfof, which will finish input up to 59 bits quickly.
1561: */
1562:
1563: #define tune_pb_min 14 /* even 15 seems too much. */
1564:
1565: /* We return NULL when we run out of time, or a single t_INT containing a
1566: * nontrivial factor of n, or a vector of t_INTs, each triple of successive
1567: * entries containing a factor, an exponent (equal to un), and a factor
1568: * class (NULL for unknown or zero for known composite), matching the
1569: * internal representation used by the ifac_*() routines below. Repeated
1570: * factors can arise and are legal; the caller will be sorting the factors
1571: * anyway.
1572: */
1573: GEN
1574: pollardbrent(GEN n)
1575: {
1576: long tf = lgefint(n), size = 0, delta, retries = 0, msg_mask;
1577: long c0, c, k, k1, l, avP, avx, GGG, av = avma;
1578: GEN x, x1, y, P, g, g1, res;
1579:
1580: if (DEBUGLEVEL >= 4) (void)timer2(); /* clear timer */
1581:
1582: if (tf >= 4)
1583: size = expi(n) + 1;
1584: else if (tf == 3) /* try to keep purify happy... */
1585: size = BITS_IN_LONG - bfffo(n[2]);
1586:
1587: if (size <= 28)
1588: c0 = 32; /* amounts very nearly to `insist'.
1589: * Now that we have squfof(), we don't insist
1590: * any more when input is 2^29 ... 2^32
1591: */
1592: else if (size <= 42)
1593: c0 = tune_pb_min;
1594: else if (size <= 59) /* match squfof() cutoff point */
1595: c0 = tune_pb_min + ((size - 42)<<1);
1596: else if (size <= 72)
1597: c0 = tune_pb_min + size - 24;
1598: else if (size <= 301)
1599: /* nonlinear increase in effort, kicking in around 80 bits */
1600: /* 301 gives 48121 + tune_pb_min */
1601: c0 = tune_pb_min + size - 60 +
1602: ((size-73)>>1)*((size-70)>>3)*((size-56)>>4);
1603: else
1604: c0 = 49152; /* ECM is faster when it'd take longer */
1605:
1606: c = c0 << 5; /* 32 iterations per round */
1607: msg_mask = (size >= 448? 0x1fff:
1608: (size >= 192? (256L<<((size-128)>>6))-1: 0xff));
1609: PB_RETRY:
1610: /* trick to make a `random' choice determined by n. Don't use x^2+0 or
1611: * x^2-2, ever. Don't use x^2-3 or x^2-7 with a starting value of 2.
1612: * x^2+4, x^2+9 are affine conjugate to x^2+1, so don't use them either.
1613: *
1614: * (the point being that when we get called again on a composite cofactor
1615: * of something we've already seen, we had better avoid the same delta)
1616: */
1617: switch ((size + retries) & 7)
1618: {
1619: case 0: delta= 1; break;
1620: case 1: delta= -1; break;
1621: case 2: delta= 3; break;
1622: case 3: delta= 5; break;
1623: case 4: delta= -5; break;
1624: case 5: delta= 7; break;
1625: case 6: delta= 11; break;
1626: /* case 7: */
1627: default: delta=-11; break;
1628: }
1629: if (DEBUGLEVEL >= 4)
1630: {
1631: if (!retries)
1632: {
1633: if (size < 1536)
1634: fprintferr("Rho: searching small factor of %ld-bit integer\n", size);
1635: else
1636: fprintferr("Rho: searching small factor of %ld-word integer\n", tf-2);
1637: }
1638: else
1639: fprintferr("Rho: restarting for remaining rounds...\n");
1640: fprintferr("Rho: using X^2%+1ld for up to %ld rounds of 32 iterations\n",
1641: delta, c >> 5);
1642: flusherr();
1643: }
1644: x=gdeux; P=gun; g1 = NULL; k = 1; l = 1;
1645: (void)new_chunk(10 + 6 * tf); /* enough for cgetg(10) + 3 divii */
1646: y = cgeti(tf); affsi(2, y);
1647: x1= cgeti(tf); affsi(2, x1);
1648: avx = avma;
1649: avP = (long)new_chunk(2 * tf); /* enough for x = addsi(tf+1) */
1650: GGG = (long)new_chunk(4 * tf); /* enough for P = modii(2tf+1, tf) */
1651:
1652: for (;;) /* terminated under the control of c */
1653: {
1654: /* use the polynomial x^2 + delta */
1655: #define one_iter() {\
1656: avma = GGG; x = resii(sqri(x), n); /* to garbage zone */\
1657: avma = avx; x = addsi(delta,x); /* erase garbage */\
1658: avma = GGG; P = mulii(P, subii(x1, x));\
1659: avma = avP; P = modii(P,n); }
1660:
1661: one_iter();
1662:
1663: if ((--c & 0x1f)==0) /* one round complete */
1664: {
1665: g = mppgcd(n, P);
1666: if (!is_pm1(g)) goto fin; /* caught something */
1667: if (c <= 0)
1668: { /* getting bored */
1669: if (DEBUGLEVEL >= 4)
1670: {
1671: fprintferr("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1672: timer2());
1673: flusherr();
1674: }
1675: avma=av; return NULL;
1676: }
1677: P = gun; /* not necessary, but saves 1 mulii/round */
1678: if (DEBUGLEVEL >= 4) rho_dbg(c0-(c>>5), msg_mask);
1679: affii(x,y);
1680: }
1681:
1682: if (--k) continue; /* normal end of loop body */
1683:
1684: if (c & 0x1f) /* otherwise, we already checked */
1685: {
1686: g = mppgcd(n, P);
1687: if (!is_pm1(g)) goto fin;
1688: P = gun;
1689: }
1690:
1691: /* Fast forward phase, doing l inner iterations without computing gcds.
1692: * Check first whether it would take us beyond the alloted time.
1693: * Fast forward rounds count only half (although they're taking
1694: * more like 2/3 the time of normal rounds). This to counteract the
1695: * nuisance that all c0 between 4096 and 6144 would act exactly as
1696: * 4096; with the halving trick only the range 4096..5120 collapses
1697: * (similarly for all other powers of two)
1698: */
1699: if ((c-=(l>>1)) <= 0)
1700: { /* got bored */
1701: if (DEBUGLEVEL >= 4)
1702: {
1703: fprintferr("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1704: timer2());
1705: flusherr();
1706: }
1707: avma=av; return NULL;
1708: }
1709: c &= ~0x1f; /* keep it on multiples of 32 */
1710:
1711: /* Fast forward loop */
1712: affii(x, x1); k = l; l <<= 1;
1713: /* don't show this for the first several (short) fast forward phases. */
1714: if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1715: {
1716: fprintferr("Rho: fast forward phase (%ld rounds of 64)...\n", l>>7);
1717: flusherr();
1718: }
1719: for (k1=k; k1; k1--) one_iter();
1720: if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1721: {
1722: fprintferr("Rho: time = %6ld ms,\t%3ld rounds, back to normal mode\n",
1723: timer2(), c0-(c>>5));
1724: flusherr();
1725: }
1726:
1727: affii(x,y);
1728: } /* forever */
1729:
1730: fin:
1731: /* An accumulated gcd was > 1 */
1732: /* if it isn't n, and looks prime, return it */
1733: if (!egalii(g,n))
1734: {
1735: if (miller(g,17))
1736: {
1737: if (DEBUGLEVEL >= 4)
1738: {
1739: rho_dbg(c0-(c>>5), 0);
1740: fprintferr("\tfound factor = %Z\n",g);
1741: flusherr();
1742: }
1743: avma=av; return icopy(g);
1744: }
1745: avma = avx; g1 = icopy(g); /* known composite, keep it safe */
1746: avx = avma;
1747: }
1748: else g1 = n; /* and work modulo g1 for backtracking */
1749:
1750: /* Here g1 is known composite */
1751: if (DEBUGLEVEL >= 4 && size > 192)
1752: {
1753: fprintferr("Rho: hang on a second, we got something here...\n");
1754: flusherr();
1755: }
1756: for(;;) /* backtrack until period recovered. Must terminate */
1757: {
1758: avma = GGG; y = resii(sqri(y), g1);
1759: avma = avx; y = addsi(delta,y);
1760: g = mppgcd(subii(x1, y), g1);
1761: if (!is_pm1(g)) break;
1762:
1763: if (DEBUGLEVEL >= 4 && (--c & 0x1f) == 0) rho_dbg(c0-(c>>5), msg_mask);
1764: }
1765:
1766: avma = av; /* safe */
1767: if (g1 == n || egalii(g,g1))
1768: {
1769: if (g1 == n && egalii(g,g1))
1770: { /* out of luck */
1771: if (DEBUGLEVEL >= 4)
1772: {
1773: rho_dbg(c0-(c>>5), 0);
1774: fprintferr("\tPollard-Brent failed.\n"); flusherr();
1775: }
1776: if (++retries >= 4) return NULL;
1777: goto PB_RETRY;
1778: }
1779: /* half lucky: we've split n, but g1 equals either g or n */
1780: if (DEBUGLEVEL >= 4)
1781: {
1782: rho_dbg(c0-(c>>5), 0);
1783: fprintferr("\tfound %sfactor = %Z\n",
1784: (g1!=n ? "composite " : ""), g);
1785: flusherr();
1786: }
1787: res = cgetg(7, t_VEC);
1788: res[1] = licopy(g); /* factor */
1789: res[2] = un; /* exponent 1 */
1790: res[3] = (g1!=n? zero: (long)NULL); /* known composite when g1!=n */
1791:
1792: res[4] = ldivii(n,g); /* cofactor */
1793: res[5] = un; /* exponent 1 */
1794: res[6] = (long)NULL; /* unknown */
1795: return res;
1796: }
1797: /* g < g1 < n : our lucky day -- we've split g1, too */
1798: res = cgetg(10, t_VEC);
1799: /* unknown status for all three factors */
1800: res[1] = licopy(g); res[2] = un; res[3] = (long)NULL;
1801: res[4] = ldivii(g1,g); res[5] = un; res[6] = (long)NULL;
1802: res[7] = ldivii(n,g1); res[8] = un; res[9] = (long)NULL;
1803: if (DEBUGLEVEL >= 4)
1804: {
1805: rho_dbg(c0-(c>>5), 0);
1806: fprintferr("\tfound factors = %Z, %Z,\n\tand %Z\n",
1807: res[1], res[4], res[7]);
1808: flusherr();
1809: }
1810: return res;
1811: }
1812:
1813: /***********************************************************************/
1814: /** **/
1815: /** FACTORIZATION (Shanks' SQUFOF) **/
1816: /** squfof() returns a nontrivial factor of n, assuming n is odd, **/
1817: /** composite, not a pure square, and has no small prime divisor, **/
1818: /** or NULL if it fails to find one. It works on two discriminants **/
1819: /** simultaneously (n and 5n for n=1(4), 3n and 4n for n=3(4)). **/
1820: /** The present implementation is limited to input <2^59, and will **/
1821: /** work most of the time in signed arithmetic on integers <2^31 in **/
1822: /** absolute size. Occasionally, it may find a factor which is a **/
1823: /** square.-- Since this will be used in the double-large-prime **/
1824: /** variation of MPQS, we provide a way of suppressing debugging **/
1825: /** output even at high debuglevels. GN2000Sep30-Oct01 **/
1826: /** (Cf. Algorithm 8.7.2 in ACiCNT) **/
1827: /** **/
1828: /***********************************************************************/
1829: extern ulong ucarrecomplet(ulong A);
1830: static long squfof_ambig(long a, long B, long dd, GEN D, long *cntamb);
1831:
1832: #define SQUFOF_BLACKLIST_SZ 64
1833:
1834: GEN
1835: squfof(GEN n, long quiet)
1836: {
1837: long tf = lgefint(n), nm4, cnt = 0, cntamb;
1838: long a1, b1, c1, d1, dd1, L1, a2, b2, c2, d2, dd2, L2, a, q, c, qc, qcb;
1839: GEN D1, D2, Q, res;
1840: long av = avma;
1841: static long blacklist1[SQUFOF_BLACKLIST_SZ], blacklist2[SQUFOF_BLACKLIST_SZ];
1842: long blp1 = 0, blp2 = 0;
1843: long mydebug = DEBUGLEVEL - quiet;
1844: int act1 = 1, act2 = 1;
1845:
1846: if (cmpis(n,5) <= 0) return NULL; /* input n <= 5 */
1847:
1848: #ifdef LONG_IS_64BIT
1849: if (tf > 3 || (tf == 3 && bfffo(n[2]) < 5)) /* n too large */
1850: return NULL;
1851: #else /* 32 bits */
1852: if (tf > 4 || (tf == 4 && bfffo(n[2]) < 5)) /* n too large */
1853: return NULL;
1854: #endif
1855: /* now we have 5 < n < 2^59 */
1856:
1857: nm4 = mod4(n);
1858: if (!(nm4 & 1)) return gdeux; /* n even */
1859:
1860: if (nm4 == 1)
1861: { /* case n = 1 (mod4): run one iteration on D1 = n, another on D2 = 5n */
1862: D1 = n; /* no need to copy */
1863: Q = racine(D1); d1 = itos(Q); L1 = itos(racine(Q));
1864: dd1 = (d1>>1) + (d1&1); /* rounding trick, see below */
1865: b1 = ((d1-1) & (~1UL)) + 1; /* largest odd number not exceeding d1 */
1866: c1 = itos(shifti(subii(D1, sqri(stoi(b1))), -2));
1867: if (c1 == 0) /* n was a square */
1868: {
1869: avma = av;
1870: res = cgetg(4, t_VEC);
1871: res[1] = lstoi(d1); /* factor */
1872: res[2] = deux; /* exponent 2 */
1873: res[3] = (long)NULL; /* unknown whether prime or composite */
1874: return res;
1875: }
1876: D2 = mulsi(5,n);
1877: Q = racine(D2); d2 = itos(Q); L2 = itos(racine(Q));
1878: dd2 = (d2>>1) + (d2&1);
1879: b2 = ((d2-1) & (~1UL)) + 1; /* b1, b2 will always stay odd */
1880: c2 = itos(shifti(subii(D2, sqri(stoi(b2))), -2));
1881: if (c2 == 0) /* 5n is a square, caller should avoid this */
1882: {
1883: avma = av;
1884: res = cgetg(4, t_VEC);
1885: res[1] = lstoi(d2/5); /* factor */
1886: res[2] = deux; /* exponent 2 */
1887: res[3] = (long)NULL; /* unknown whether prime or composite */
1888: return res;
1889: }
1890: }
1891: else
1892: { /* case n = 3 (mod4): run one iteration on D1 = 3n, another on D2 = 4n */
1893: D1 = mulsi(3,n);
1894: Q = racine(D1); d1 = itos(Q); L1 = itos(racine(Q));
1895: dd1 = (d1>>1) + (d1&1);
1896: b1 = ((d1-1) & (~1UL)) + 1; /* will always stay odd */
1897: c1 = itos(shifti(subii(D1, sqri(stoi(b1))), -2));
1898: if (c1 == 0) /* 3n is a square, caller should avoid this */
1899: {
1900: avma = av;
1901: res = cgetg(4, t_VEC);
1902: res[1] = lstoi(d1/3); /* factor */
1903: res[2] = deux; /* exponent 2 */
1904: res[3] = (long)NULL; /* unknown whether prime or composite */
1905: return res;
1906: }
1907: D2 = shifti(n,2);
1908: Q = racine(D2); d2 = itos(Q); L2 = itos(racine(Q));
1909: dd2 = d2>>1; /* no rounding trick here */
1910: b2 = (d2 & (~1UL)); /* largest even below d2, will stay even */
1911: c2 = itos(shifti(subii(D2, sqri(stoi(b2))), -2));
1912: /* c2 cannot vanish -- n = 3(mod 4) cannot be a square */
1913: }
1914: a1 = a2 = 1;
1915: /* This completes the setup of the two (identity) forms (a1,b1,-c1) and
1916: * (a2,b2,-c2).
1917: *
1918: * Attentive readers will notice that a1 and c1 represent the absolute
1919: * values of the a,c coefficients; we keep track of the sign separately,
1920: * in fact the sign info is contained in the rightmost bit of the iteration
1921: * counter cnt: when cnt is even, c is understood to be negative, else c
1922: * is positive and a < 0.
1923: *
1924: * The quantities dd1, dd2 are used to compute floor((d1+b1)/2) etc., with-
1925: * out overflowing the 31bit signed integer size limit, as dd1+floor(b1/2)
1926: * etc. This is the "rounding trick" alluded to above.
1927: *
1928: * L1, L2 are the limits for blacklisting small leading coefficients
1929: * on the principal cycle, to guarantee that when we find a square form,
1930: * its square root will belong to an ambiguous cycle (i.e. won't be an
1931: * earlier form on the principal cycle).
1932: *
1933: * When n = 3(mod 4), D2 = 12(mod 16), and b^2 is always 0 or 4 mod 16.
1934: * It follows that 4*a*c must be 4 or 8 mod 16, respectively, so at most
1935: * one of a,c can be divisible by 2 at most to the first power. This fact
1936: * is used a couple of times below.
1937: *
1938: * The flags act1, act2 remain true while the respective cycle is still
1939: * active; we drop them to false when we return to the identity form with-
1940: * out having found a square form (or when the blacklist overflows, which
1941: * shouldn't happen).
1942: */
1943:
1944: if (mydebug >= 4)
1945: {
1946: fprintferr("SQUFOF: entering main loop with forms\n"
1947: "\t(1, %ld, %ld) and (1, %ld, %ld)\n\tof discriminants\n"
1948: "\t%Z and %Z, respectively\n",
1949: b1, -c1, b2, -c2, D1, D2);
1950: flusherr();
1951: (void)timer2(); /* clear timer */
1952: }
1953:
1954: /* MAIN LOOP: walk around the principal cycle looking for a square form.
1955: * Blacklist small leading coefficients.
1956: *
1957: * The reduction operator can be computed entirely in 32-bit arithmetic:
1958: * Let q = floor(floor((d1+b1)/2)/c1) (when c1>dd1, q=1, which happens
1959: * often enough to special-case it). Then the new b1 = (q*c1-b1) + q*c1,
1960: * which can be computed without overflowing, and the new c1 equals
1961: * a1 - q*(q*c1-b1), where the righthand term is bounded by d1 in abs
1962: * size since both the old and the new a1 are positive and bounded by d1.
1963: */
1964: while (act1 + act2 > 0)
1965: {
1966: /* send first form through reduction operator if active */
1967: if (act1)
1968: {
1969: c = c1;
1970: if (c > dd1)
1971: q = 1;
1972: else
1973: q = (dd1 + (b1>>1)) / c;
1974: if (q == 1)
1975: {
1976: qcb = c - b1; b1 = c + qcb; c1 = a1 - qcb;
1977: }
1978: else
1979: {
1980: qc = q*c; qcb = qc - b1; b1 = qc + qcb; c1 = a1 - q*qcb;
1981: }
1982: a1 = c;
1983:
1984: if (a1 <= L1) /* blacklist this */
1985: {
1986: if (blp1 >= SQUFOF_BLACKLIST_SZ)
1987: /* blacklist overflows: shouldn't happen */
1988: act1 = 0; /* silently */
1989: else
1990: {
1991: if (mydebug >= 6)
1992: fprintferr("SQUFOF: blacklisting a = %ld on first cycle\n", a1);
1993: blacklist1[blp1++] = a1;
1994: }
1995: }
1996: }
1997:
1998: /* send second form through reduction operator if active */
1999: if (act2)
2000: {
2001: c = c2;
2002: if (c > dd2)
2003: q = 1;
2004: else
2005: q = (dd2 + (b2>>1)) / c;
2006: if (q == 1)
2007: {
2008: qcb = c - b2; b2 = c + qcb; c2 = a2 - qcb;
2009: }
2010: else
2011: {
2012: qc = q*c; qcb = qc - b2; b2 = qc + qcb; c2 = a2 - q*qcb;
2013: }
2014: a2 = c;
2015:
2016: if (a2 <= L2) /* blacklist this */
2017: {
2018: if (blp2 >= SQUFOF_BLACKLIST_SZ)
2019: /* blacklist overflows: shouldn't happen */
2020: act2 = 0; /* silently */
2021: else
2022: {
2023: if (mydebug >= 6)
2024: fprintferr("SQUFOF: blacklisting a = %ld on second cycle\n", a2);
2025: blacklist2[blp2++] = a2;
2026: }
2027: }
2028: }
2029:
2030: /* bump counter, loop if this is an odd iteration (i.e. if the real
2031: * leading coefficients are negative)
2032: */
2033: if (++cnt & 1) continue;
2034:
2035: /* second half of main loop entered only when the leading coefficients
2036: * are positive (i.e., during even-numbered iterations)
2037: */
2038:
2039: /* examine first form if active */
2040: if (act1 && a1 == 1) /* back to identity form */
2041: {
2042: act1 = 0; /* drop this discriminant */
2043: if (mydebug >= 4)
2044: {
2045: fprintferr("SQUFOF: first cycle exhausted after %ld iterations,\n"
2046: "\tdropping it\n",
2047: cnt);
2048: flusherr();
2049: }
2050: }
2051: if (act1)
2052: {
2053: if ((a = ucarrecomplet(a1)) != 0) /* square form? */
2054: {
2055: if (mydebug >= 4)
2056: {
2057: fprintferr("SQUFOF: square form (%ld^2, %ld, %ld) on first cycle\n"
2058: "\tafter %ld iterations, time = %ld ms\n",
2059: a, b1, -c1, cnt, timer2());
2060: /* flusherr delayed until we've dealt with it */
2061: }
2062: /* blacklisted? */
2063: if (a <= L1)
2064: {
2065: int j;
2066: for (j = 0; j < blp1; j++)
2067: if (a == blacklist1[j]) { a = 0; break; }
2068: }
2069: if (a > 0) /* not blacklisted */
2070: {
2071: /* imprimitive form? */
2072: q = cgcd(a, b1);
2073: if (nm4 == 3 && cgcd(q, 3) > 1) /* paranoia */
2074: {
2075: avma = av;
2076: /* we really possess q^2/3 here, but let the caller sort this
2077: * out. His fault for calling us with a multiple of 3. We
2078: * cannot claim (q/3)^2 as a known factor here since q might
2079: * equal 3, in which case 3 is the correct answer to return.
2080: */
2081: if (q == 3)
2082: {
2083: if (mydebug >= 4)
2084: {
2085: fprintferr("SQUFOF: found factor 3\n");
2086: flusherr();
2087: }
2088: return stoi(3);
2089: }
2090: else q /= 3; /* and fall through to the next conditional */
2091: }
2092: if (q > 1) /* q^2 divides D1 and, in fact, n */
2093: {
2094: avma = av;
2095: if (mydebug >= 4)
2096: {
2097: fprintferr("SQUFOF: found factor %ld^2\n", q);
2098: flusherr();
2099: }
2100: res = cgetg(4, t_VEC);
2101: res[1] = lstoi(q); /* factor */
2102: res[2] = deux; /* exponent 2 */
2103: res[3] = (long)NULL; /* unknown whether prime or composite */
2104: return res;
2105: }
2106:
2107: /* chase the inverse root form back along the ambiguous cycle */
2108: q = squfof_ambig(a, b1, dd1, D1, &cntamb);
2109: if (mydebug >= 6)
2110: fprintferr("SQUFOF: squfof_ambig returned %ld\n", q);
2111: if (nm4 == 3) q /= cgcd(q, 3);
2112:
2113: /* return if successful */
2114: if (q > 1)
2115: {
2116: avma = av;
2117: if (mydebug >= 4)
2118: {
2119: fprintferr("SQUFOF: found factor %ld from ambiguous form\n"
2120: "\tafter %ld steps on the ambiguous cycle, "
2121: "time = %ld ms\n",
2122: q, cntamb, timer2());
2123: flusherr();
2124: }
2125: res = stoi(q);
2126: return res;
2127: }
2128: else if (mydebug >= 4) /* nothing found */
2129: {
2130: fprintferr("SQUFOF: ...found nothing useful on the ambiguous "
2131: "cycle\n"
2132: "\tafter %ld steps there, time = %ld ms\n",
2133: cntamb, timer2());
2134: flusherr();
2135: }
2136: }
2137: else if (mydebug >= 4) /* blacklisted */
2138: {
2139: fprintferr("SQUFOF: ...but the root form seems to be on the "
2140: "principal cycle\n");
2141: flusherr();
2142: }
2143: }
2144: /* else proceed */
2145: }
2146:
2147: /* examine second form if active */
2148: if (act2 && a2 == 1) /* back to identity form */
2149: {
2150: act2 = 0; /* drop this discriminant */
2151: if (mydebug >= 4)
2152: {
2153: fprintferr("SQUFOF: second cycle exhausted after %ld iterations,\n"
2154: "\tdropping it\n",
2155: cnt);
2156: flusherr();
2157: }
2158: }
2159: if (act2)
2160: {
2161: if ((a = ucarrecomplet(a2)) != 0) /* square form? */
2162: {
2163: if (mydebug >= 4)
2164: {
2165: fprintferr("SQUFOF: square form (%ld^2, %ld, %ld) on second cycle\n"
2166: "\tafter %ld iterations, time = %ld ms\n",
2167: a, b2, -c2, cnt, timer2());
2168: flusherr();
2169: }
2170: /* blacklisted? */
2171: if (a <= L2)
2172: {
2173: int j;
2174: for (j = 0; j < blp2; j++)
2175: if (a == blacklist2[j]) { a = 0; break; }
2176: }
2177: if (a > 0) /* not blacklisted */
2178: {
2179: /* imprimitive form? */
2180: q = cgcd(a, b2);
2181: /* NB if b2 is even, a is odd, so the gcd is always odd */
2182: if (nm4 == 1 && cgcd(q, 5) > 1) /* paranoia */
2183: {
2184: avma = av;
2185: /* we really possess q^2/5 here, but let the caller sort this
2186: * out. His fault for calling us with a multiple of 5. We
2187: * cannot claim (q/5)^2 as a known factor here since q might
2188: * equal 5, in which case 5 is the correct answer to return.
2189: */
2190: if (q == 5)
2191: {
2192: if (mydebug >= 4)
2193: {
2194: fprintferr("SQUFOF: found factor 5\n");
2195: flusherr();
2196: }
2197: return stoi(5);
2198: }
2199: else q /= 5; /* and fall through to the next conditional */
2200: }
2201: if (q > 1) /* q^2 divides D2 */
2202: {
2203: avma = av;
2204: if (mydebug >= 4)
2205: {
2206: fprintferr("SQUFOF: found factor %ld^2\n", q);
2207: flusherr();
2208: }
2209: res = cgetg(4, t_VEC);
2210: res[1] = lstoi(q); /* factor */
2211: res[2] = deux; /* exponent 2 */
2212: res[3] = (long)NULL; /* unknown whether prime or composite */
2213: return res;
2214: }
2215:
2216: /* chase the inverse root form along the ambiguous cycle */
2217: q = squfof_ambig(a, b2, dd2, D2, &cntamb);
2218: if (mydebug >= 6)
2219: fprintferr("SQUFOF: squfof_ambig returned %ld\n", q);
2220: if (nm4 == 1) q /= cgcd(q, 5);
2221:
2222: /* return if successful */
2223: if (q > 1)
2224: {
2225: avma = av;
2226: if (mydebug >= 4)
2227: {
2228: fprintferr("SQUFOF: found factor %ld from ambiguous form\n"
2229: "\tafter %ld steps on the ambiguous cycle, "
2230: "time = %ld ms\n",
2231: q, cntamb, timer2());
2232: flusherr();
2233: }
2234: res = stoi(q);
2235: return res;
2236: }
2237: else if (mydebug >= 4) /* nothing found */
2238: {
2239: fprintferr("SQUFOF: ...found nothing useful on the ambiguous "
2240: "cycle\n"
2241: "\tafter %ld steps there, time = %ld ms\n",
2242: cntamb, timer2());
2243: flusherr();
2244: }
2245: }
2246: else if (mydebug >= 4) /* blacklisted */
2247: {
2248: fprintferr("SQUFOF: ...but the root form seems to be on the "
2249: "principal cycle\n");
2250: flusherr();
2251: }
2252: }
2253: /* else proceed */
2254: }
2255:
2256: } /* end main loop */
2257:
2258: /* when we get here, both discriminants have, alas, turned out to be
2259: * useless.
2260: */
2261: if (mydebug >= 4)
2262: {
2263: fprintferr("SQUFOF: giving up, time = %ld ms\n", timer2());
2264: flusherr();
2265: }
2266:
2267: avma = av;
2268: return NULL;
2269: }
2270:
2271: /* The following is invoked to walk back along the ambiguous cycle
2272: * until we hit an ambiguous form and thus the desired factor, which
2273: * it returns. If it fails for any reason, it returns 0. It doesn't
2274: * interfere with timing and diagnostics, which it leaves to squfof().
2275: *
2276: * Before we invoke this, we've found a form (A, B, -C) with A = a^2,
2277: * where a isn't blacklisted and where gcd(a, B) = 1. According to
2278: * ACiCANT, we should now proceed reducing the form (a, -B, -aC), but
2279: * it is easy to show that the first reduction step always sends this
2280: * to (-aC, B, a), and the next one, with q computed as usual from B
2281: * and a (occupying the c position), gives a reduced form, whose third
2282: * member is easiest to recover by going back to D. From this point
2283: * onwards, we're once again working with single-word numbers.
2284: * NB here there is no need to track signs, we just work with the abs
2285: * values of the coefficients.
2286: */
2287: static
2288: long
2289: squfof_ambig(long a, long B, long dd, GEN D, long *cntamb)
2290: {
2291: long b, c, q, qc, qcb, av = avma;
2292: long a0, b0, b1, c0;
2293:
2294: q = (dd + (B>>1)) / a; qc = q*a; qcb = qc - B;
2295: b = qc + qcb;
2296: c = itos(divis(shifti(subii(D, sqri(stoi(b))), -2), a));
2297: #ifdef DEBUG_SQUFOF
2298: fprintferr("SQUFOF: ambigous cycle of discriminant %Z\n", D);
2299: fprintferr("SQUFOF: Form on ambigous cycle (%ld, %ld, %ld)\n",
2300: a, b, c);
2301: #endif
2302:
2303: avma = av; /* no further stack operations follow */
2304: *cntamb = 0; /* count reduction steps on the cycle */
2305: a0 = a; b0 = b1 = b; /* end of loop detection and safeguard */
2306:
2307: for (;;) /* reduced cycles are finite */
2308: {
2309: /* reduction step */
2310: c0 = c;
2311: if (c0 > dd)
2312: q = 1;
2313: else
2314: q = (dd + (b>>1)) / c0;
2315: if (q == 1)
2316: {
2317: qcb = c0 - b; b = c0 + qcb; c = a - qcb;
2318: }
2319: else
2320: {
2321: qc = q*c0; qcb = qc - b; b = qc + qcb; c = a - q*qcb;
2322: }
2323: a = c0;
2324:
2325: (*cntamb)++;
2326:
2327: /* check whether we're done */
2328: if (b == b1) return (a&1 ? a : a>>1);
2329:
2330: /* safeguard against infinite loop: recognize when we've walked
2331: * around the entire cycle in vain. (I don't think this can
2332: * actually happen -- exercise. But better safe than sorry.)
2333: */
2334: if (b == b0 && a == a0) return 0;
2335:
2336: /* prepare for next iteration */
2337: b1 = b;
2338: }
2339: /* NOT REACHED */
2340: return 0;
2341: }
2342:
2343: /***********************************************************************/
2344: /** **/
2345: /** DETECTING ODD POWERS **/
2346: /** Factoring engines like MPQS which ultimately rely on computing **/
2347: /** gcd(N, x^2-y^2) to find a nontrivial factor of N are fundamen- **/
2348: /** tally incapable of splitting a proper power of an odd prime, **/
2349: /** because of the cyclicity of the prime residue class group. We **/
2350: /** already have a square-detection function carrecomplet(), which **/
2351: /** also returns the square root if appropriate. Here's an analogue **/
2352: /** for cubes, fifth and 7th powers. 11th powers are a non-issue so **/
2353: /** long as mpqs() gives up beyond 100 decimal digits (since ECM **/
2354: /** easily finds a 10-digit prime factor of a 100-digit number). **/
2355: /** GN1998Jun28 **/
2356: /** **/
2357: /***********************************************************************/
2358:
2359: /* Use a multistage sieve. First stages work mod 211, 209, 61, 203;
2360: * if the argument is larger than a word, we first reduce mod the product
2361: * of these and then take the remainder apart. Second stages use 117,
2362: * 31, 43, 71 in this order. Moduli which are no longer interesting are
2363: * skipped. Everything is encoded in a single table of 106 24-bit masks.
2364: * We only need the first half of the residues. Three bits per modulus
2365: * indicate which residues are 7th (bit 2), 5th (bit 1) powers or cubes
2366: * (bit 0); the eight moduli above are assigned right-to-left. The table
2367: * will err on the side of safety if one of the moduli divides the number
2368: * to be tested, but as this leads to inefficiency it should still be
2369: * avoided.
2370: */
2371:
2372: static ulong powersmod[106] = {
2373: 077777777ul, /* 0 */
2374: 077777777ul, /* 1 */
2375: 013562440ul, /* 2 */
2376: 012462540ul, /* 3 */
2377: 013562440ul, /* 4 */
2378: 052662441ul, /* 5 */
2379: 016663440ul, /* 6 */
2380: 016463450ul, /* 7 */
2381: 013573551ul, /* 8 */
2382: 012462540ul, /* 9 */
2383: 012462464ul, /* 10 */
2384: 013462771ul, /* 11 */
2385: 012466473ul, /* 12 */
2386: 012463641ul, /* 13 */
2387: 052463646ul, /* 14 */
2388: 012563446ul, /* 15 */
2389: 013762440ul, /* 16 */
2390: 052766440ul, /* 17 */
2391: 012772451ul, /* 18 */
2392: 012762454ul, /* 19 */
2393: 032763550ul, /* 20 */
2394: 013763664ul, /* 21 */
2395: 017763460ul, /* 22 */
2396: 037762565ul, /* 23 */
2397: 017762540ul, /* 24 */
2398: 057762441ul, /* 25 */
2399: 037772452ul, /* 26 */
2400: 017773551ul, /* 27 */
2401: 017767541ul, /* 28 */
2402: 017767640ul, /* 29 */
2403: 037766450ul, /* 30 */
2404: 017762752ul, /* 31 */
2405: 037762762ul, /* 32 */
2406: 017762742ul, /* 33 */
2407: 037763762ul, /* 34 */
2408: 017763740ul, /* 35 */
2409: 077763740ul, /* 36 */
2410: 077762750ul, /* 37 */
2411: 077762752ul, /* 38 */
2412: 077762750ul, /* 39 */
2413: 077762743ul, /* 40 */
2414: 077767740ul, /* 41 */
2415: 077763741ul, /* 42 */
2416: 077763762ul, /* 43 */
2417: 077772760ul, /* 44 */
2418: 077762770ul, /* 45 */
2419: 077766750ul, /* 46 */
2420: 077762740ul, /* 47 */
2421: 077763740ul, /* 48 */
2422: 077763750ul, /* 49 */
2423: 077763752ul, /* 50 */
2424: 077762740ul, /* 51 */
2425: 077762740ul, /* 52 */
2426: 077772740ul, /* 53 */
2427: 077762762ul, /* 54 */
2428: 077763765ul, /* 55 */
2429: 077763770ul, /* 56 */
2430: 077767750ul, /* 57 */
2431: 077766753ul, /* 58 */
2432: 077776740ul, /* 59 */
2433: 077772741ul, /* 60 */
2434: 077772744ul, /* 61 */
2435: 077773740ul, /* 62 */
2436: 077773743ul, /* 63 */
2437: 077773751ul, /* 64 */
2438: 077772771ul, /* 65 */
2439: 077772760ul, /* 66 */
2440: 077772763ul, /* 67 */
2441: 077772751ul, /* 68 */
2442: 077773750ul, /* 69 */
2443: 077777740ul, /* 70 */
2444: 077773745ul, /* 71 */
2445: 077772740ul, /* 72 */
2446: 077772742ul, /* 73 */
2447: 077772744ul, /* 74 */
2448: 077776750ul, /* 75 */
2449: 077773771ul, /* 76 */
2450: 077773774ul, /* 77 */
2451: 077773760ul, /* 78 */
2452: 077772741ul, /* 79 */
2453: 077772740ul, /* 80 */
2454: 077772740ul, /* 81 */
2455: 077772741ul, /* 82 */
2456: 077773754ul, /* 83 */
2457: 077773750ul, /* 84 */
2458: 077773740ul, /* 85 */
2459: 077776741ul, /* 86 */
2460: 077776771ul, /* 87 */
2461: 077776773ul, /* 88 */
2462: 077772761ul, /* 89 */
2463: 077773741ul, /* 90 */
2464: 077773740ul, /* 91 */
2465: 077773740ul, /* 92 */
2466: 077772740ul, /* 93 */
2467: 077772752ul, /* 94 */
2468: 077772750ul, /* 95 */
2469: 077772751ul, /* 96 */
2470: 077773741ul, /* 97 */
2471: 077773761ul, /* 98 */
2472: 077777760ul, /* 99 */
2473: 077772765ul, /* 100 */
2474: 077772742ul, /* 101 */
2475: 077777751ul, /* 102 */
2476: 077777750ul, /* 103 */
2477: 077777745ul, /* 104 */
2478: 077777770ul /* 105 */
2479: };
2480:
2481: /* Returns 3, 5, or 7 if x is a cube (but not a 5th or 7th power), a 5th
2482: * power (but not a 7th), or a 7th power, and in this case creates the
2483: * base on the stack and assigns its address to *pt. Otherwise returns 0.
2484: * x must be of type t_INT and nonzero; this is not checked. The *mask
2485: * argument tells us which things to check -- bit 0: 3rd, bit 1: 5th,
2486: * bit 2: 7th pwr; set a bit to have the corresponding power examined --
2487: * and is updated appropriately for a possible follow-up call
2488: */
2489:
2490: long /* no longer static -- used in mpqs.c */
2491: is_odd_power(GEN x, GEN *pt, long *mask)
2492: {
2493: long av=avma, tetpil, lgx=lgefint(x), exponent=0, residue, resbyte;
2494: GEN y;
2495:
2496: *mask &= 7; /* paranoia */
2497: if (!*mask) return 0; /* useful when running in a loop */
2498: if (signe(x) < 0) x=absi(x);
2499:
2500: if (DEBUGLEVEL >= 5)
2501: {
2502: fprintferr("OddPwrs: is %Z\n\t...a", x);
2503: if (*mask&1) fprintferr(" 3rd%s",
2504: (*mask==7?",":(*mask!=1?" or":"")));
2505: if (*mask&2) fprintferr(" 5th%s",
2506: (*mask==7?", or":(*mask&4?" or":"")));
2507: if (*mask&4) fprintferr(" 7th");
2508: fprintferr(" power?\n");
2509: }
2510: if (lgx > 3) residue = smodis(x, 211*209*61*203);
2511: else residue = x[2];
2512:
2513: resbyte=residue%211; if (resbyte > 105) resbyte = 211 - resbyte;
2514: *mask &= powersmod[resbyte];
2515: if (DEBUGLEVEL >= 5)
2516: {
2517: fprintferr("\tmodulo: resid. (remaining possibilities)\n");
2518: fprintferr("\t 211: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2519: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2520: }
2521: if (!*mask) { avma=av; return 0; }
2522:
2523: if (*mask & 3)
2524: {
2525: resbyte=residue%209; if (resbyte > 104) resbyte = 209 - resbyte;
2526: *mask &= (powersmod[resbyte] >> 3);
2527: if (DEBUGLEVEL >= 5)
2528: fprintferr("\t 209: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2529: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2530: if (!*mask) { avma=av; return 0; }
2531: }
2532: if (*mask & 3)
2533: {
2534: resbyte=residue%61; if (resbyte > 30) resbyte = 61 - resbyte;
2535: *mask &= (powersmod[resbyte] >> 6);
2536: if (DEBUGLEVEL >= 5)
2537: fprintferr("\t 61: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2538: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2539: if (!*mask) { avma=av; return 0; }
2540: }
2541: if (*mask & 5)
2542: {
2543: resbyte=residue%203; if (resbyte > 101) resbyte = 203 - resbyte;
2544: *mask &= (powersmod[resbyte] >> 9);
2545: if (DEBUGLEVEL >= 5)
2546: fprintferr("\t 203: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2547: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2548: if (!*mask) { avma=av; return 0; }
2549: }
2550:
2551: if (lgx > 3) residue = smodis(x, 117*31*43*71);
2552: else residue = x[2];
2553:
2554: if (*mask & 1)
2555: {
2556: resbyte=residue%117; if (resbyte > 58) resbyte = 117 - resbyte;
2557: *mask &= (powersmod[resbyte] >> 12);
2558: if (DEBUGLEVEL >= 5)
2559: fprintferr("\t 117: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2560: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2561: if (!*mask) { avma=av; return 0; }
2562: }
2563: if (*mask & 3)
2564: {
2565: resbyte=residue%31; if (resbyte > 15) resbyte = 31 - resbyte;
2566: *mask &= (powersmod[resbyte] >> 15);
2567: if (DEBUGLEVEL >= 5)
2568: fprintferr("\t 31: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2569: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2570: if (!*mask) { avma=av; return 0; }
2571: }
2572: if (*mask & 5)
2573: {
2574: resbyte=residue%43; if (resbyte > 21) resbyte = 43 - resbyte;
2575: *mask &= (powersmod[resbyte] >> 18);
2576: if (DEBUGLEVEL >= 5)
2577: fprintferr("\t 43: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2578: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2579: if (!*mask) { avma=av; return 0; }
2580: }
2581: if (*mask & 6)
2582: {
2583: resbyte=residue%71; if (resbyte > 35) resbyte = 71 - resbyte;
2584: *mask &= (powersmod[resbyte] >> 21);
2585: if (DEBUGLEVEL >= 5)
2586: fprintferr("\t 71: %3ld (3rd %ld, 5th %ld, 7th %ld)\n",
2587: resbyte, *mask&1, (*mask>>1)&1, (*mask>>2)&1);
2588: if (!*mask) { avma=av; return 0; }
2589: }
2590:
2591: /* priority to higher powers -- if we have a 21st, it'll be easier to
2592: * rediscover that its 7th root is a cube than that its cube root is
2593: * a 7th power
2594: */
2595: if ((resbyte = *mask & 4)) /* assignment */
2596: exponent = 7;
2597: else if ((resbyte = *mask & 2))
2598: exponent = 5;
2599: else
2600: { resbyte = 1; exponent = 3; }
2601: /* leave that mask bit on for the moment, we might need it for a
2602: * subsequent call
2603: */
2604:
2605: /* precision in the following is one extra significant word (overkill) */
2606: y=ground(gpow(x, ginv(stoi(exponent)), lgx));
2607: if (!egalii(gpowgs(y, exponent), x))
2608: {
2609: if (DEBUGLEVEL >= 5)
2610: {
2611: if (exponent == 3)
2612: fprintferr("\tBut it nevertheless wasn't a cube.\n");
2613: else
2614: fprintferr("\tBut it nevertheless wasn't a %ldth power.\n",
2615: exponent);
2616: }
2617: *mask &= ~resbyte; /* _now_ turn the bit off */
2618: avma=av; return 0;
2619: }
2620: /* caller (ifac_crack() below) will report the final result if it was
2621: * a pure power, so no further diagnostics here
2622: */
2623: tetpil=avma;
2624: if (!pt) { avma=av; return exponent; } /* this branch not used */
2625: *pt=gerepile(av,tetpil,icopy(y));
2626: return exponent;
2627: }
2628:
2629: /***********************************************************************/
2630: /** **/
2631: /** FACTORIZATION (master iteration) **/
2632: /** Driver for the various methods of finding large factors **/
2633: /** (after trial division has cast out the very small ones). **/
2634: /** GN1998Jun24--30 **/
2635: /** **/
2636: /***********************************************************************/
2637:
2638: /** Direct use:
2639: ** ifac_start() registers a number (without prime factors < 100)
2640: ** with the iterative factorizer, and also registers whether or
2641: ** not we should terminate early if we find that the number is
2642: ** not squarefree, and a hint about which method(s) to use. This
2643: ** must always be called first. The input _must_ have been checked
2644: ** to be composite by the caller. The routine immediately tries
2645: ** to decompose it nontrivially into a product of two factors,
2646: ** except in squarefreeness (`Moebius') mode.
2647: ** ifac_primary_factor() returns a prime divisor (not necessarily
2648: ** the smallest) and the corresponding exponent. */
2649:
2650: /** Encapsulated user interface:
2651: ** ifac_decomp() does the right thing for auxdecomp() (put a succession
2652: ** of prime divisor / exponent pairs onto the stack, not necessarily
2653: ** sorted, although in practice they will tend not to be too far from
2654: ** the correct order).
2655: **
2656: ** For each of the additive/multiplicative arithmetic functions, there is
2657: ** a `contributor' below, to be called on any large composite cofactor
2658: ** left over after trial division by small primes, whose result can then
2659: ** be added to or multiplied with whatever we already have:
2660: ** ifac_moebius() ifac_issquarefree() ifac_totient() ifac_omega()
2661: ** ifac_bigomega() ifac_numdiv() ifac_sumdiv() ifac_sumdivk() */
2662:
2663: /* We never test whether the input number is prime or composite, since
2664: * presumably it will have come out of the small factors finder stage
2665: * (which doesn't really exist yet but which will test the left-over
2666: * cofactor for primality once it does).
2667: */
2668: /* The data structure in which we preserve whatever we know at any given
2669: * time about our number N is kept on the PARI stack, and updated as needed.
2670: * This makes the machinery re-entrant (you can have more than one fac-
2671: * torization using ifac_start()/ifac_primary_factor() in progress simul-
2672: * taneously so long as you preserve the GEN across garbage collections),
2673: * and which avoids memory leaks when a lengthy factorization is interrupted.
2674: * We also make an effort to keep the whole affair connected, and the parent
2675: * object will always be older than its children. This may in rare cases
2676: * lead to some extra copying around, and knowing what is garbage at any
2677: * given time is not entirely trivial. See below for examples how to do
2678: * it right. (Connectedness can be destroyed if callers of ifac_main()
2679: * create other stuff on the stack in between calls. This is harmless
2680: * as long as ifac_realloc() is used to re-create a connected object at
2681: * the head of the stack just before collecting garbage.)
2682: */
2683: /* Note that a PARI integer can have hundreds of millions of distinct prime
2684: * factors larger than 2^16, given enough memory. And since there's no
2685: * guarantee that we will find factors in order of increasing size, we must
2686: * be prepared to drag a very large amount of data around (although this
2687: * will _very_ rarely happen for random input!). So we start with a small
2688: * structure and extend it when necessary.
2689: */
2690: /* The idea of data structure and algorithm is:
2691: * Let N0 be whatever is currently left of N after dividing off all the
2692: * prime powers we have already returned to the caller. Then we maintain
2693: * N0 as a product
2694: * (1) N0 = \prod_i P_i^{e_i} * \prod_j Q_j^{f_j} * \prod_k C_k^{g_k}
2695: * where the P_i and Q_j are distinct primes, each C_k is known composite,
2696: * none of the P_i divides any C_k, and we also know the total ordering
2697: * of all the P_i, Q_j and C_k (in particular, we will never try to divide
2698: * a C_k by a larger Q_j). Some of the C_k may have common factors, although
2699: * this will not often be the case.
2700: */
2701: /* Caveat implementor: Taking gcds among C_k's is very likely to cost at
2702: * least as much time as dividing off any primes as we find them, and book-
2703: * keeping would be a nightmare (since D=gcd(C_1,C_2) can still have common
2704: * factors with both C_1/D and C_2/D, and so on...).
2705: */
2706: /* At startup, we just initialize the structure to
2707: * (2) N = C_1^1 (composite).
2708: */
2709: /* Whenever ifac_primary_factor() or ifac_decomp() (or, mutatis mutandis,
2710: * one of the three arithmetic user interface routines) needs a primary
2711: * factor, and the smallest thing in our list is P_1, we return that and
2712: * its exponent, and remove it from our list.
2713: * (When nothing is left, we return a sentinel value -- gun. And in Moebius
2714: * mode, when we see something with exponent > 1, whether prime or composite,
2715: * we yell at our caller by returning gzero or 0, depending on the function).
2716: * In all other cases, ifac_main() iterates the following steps until we have
2717: * a P_1 in the smallest position.
2718: */
2719: /* When the smallest item is C_1 (as it is initially):
2720: * (3.1) Crack C_1 into a nontrivial product U_1 * U_2 by whatever method
2721: * comes to mind for this size. (U for `unknown'.) Cracking will detect
2722: * squares (and biquadrates etc), and it may detect odd powers, so we
2723: * might instead see a power of some U_1 here, or even something of the form
2724: * U_1^k*U_2^k. (Of course the exponent already attached to C_1 is taken
2725: * into account in the following.)
2726: * (3.2) If we have U_1*U_2, sort the two factors; convert to U_1^2 if they
2727: * happen to be equal (which they shouldn't -- squares should have been
2728: * caught at the preceding stage). Note that U_1 and (if it exists) U_2
2729: * are automatically smaller than anything else in our list.
2730: * (3.3) Check U_1 (and U_2) for primality, and flag them accordingly.
2731: * (3.4) Iterate.
2732: */
2733: /* When the smallest item is Q_1:
2734: * This is the potentially unpleasant case. The idea is to go through the
2735: * entire list and try to divide Q_1 off each of the current C_k's, which
2736: * will usually fail, but may succeed several times. When a division was
2737: * successful, the corresponding C_k is removed from our list, and the co-
2738: * factor becomes a U_l for the moment unless it is 1 (which happens when
2739: * C_k was a power of Q_1). When we're through we upgrade Q_1 to P_1 status,
2740: * and then do a primality check on each U_l and sort it back into the list
2741: * either as a Q_j or as a C_k. If during the insertion sort we discover
2742: * that some U_l equals some P_i or Q_j or C_k we already have, we just add
2743: * U_l's exponent to that of its twin. (The sorting should therefore happen
2744: * before the primality test).
2745: * Note that this may produce one or more elements smaller than the P_1
2746: * we just confirmed, so we may have to repeat the iteration.
2747: */
2748: /* There's a little trick that avoids some Q_1 instances. Just after we do
2749: * a sweep to classify all current unknowns as either composites or primes,
2750: * we do another downward sweep beginning with the largest current factor
2751: * and stopping just above the largest current composite. Every Q_j we
2752: * pass is turned into a P_i. (Different primes are automatically coprime
2753: * among each other, and primes tend not to divide smaller composites.)
2754: */
2755: /* (We have no use for comparing the square of a prime to N0. Normally
2756: * we will get called after casting out only the smallest primes, and
2757: * since we cannot guarantee that we see the large prime factors in as-
2758: * cending order, we cannot stop when we find one larger than sqrt(N0).)
2759: */
2760: /* Data structure: We keep everything in a single t_VEC of t_INTs. The
2761: * first component records whether we're doing full (NULL) or Moebius (un)
2762: * factorization; in the latter case many subroutines return a sentinel
2763: * value as soon as they spot an exponent > 1. The second component records
2764: * the hint from factorint()'s optional flag, for use by ifac_crack().
2765: * The remaining components (initially 15) are used in groups of three:
2766: * a GEN pointer at the t_INT value of the factor, a pointer at the t_INT
2767: * exponent (usually gun or gdeux so we don't clutter up the stack too
2768: * much), and another t_INT GEN pointer to record the class of the factor:
2769: * NULL for unknown, zero for known composite C_k, un for known prime Q_j
2770: * awaiting trial division, and deux for finished prime P_i.
2771: */
2772: /* When during the division stage we re-sort a C_k-turned-U_l to a lower
2773: * position, we rotate any intervening material upward towards its old
2774: * slot. When a C_k was divided down to 1, its slot is left empty at
2775: * first; similarly when the re-sorting detects a repeated factor.
2776: * After the sorting phase, we de-fragment the list and squeeze all the
2777: * occupied slots together to the high end, so that ifac_crack() has room
2778: * for new factors. When this doesn't suffice, we abandon the current
2779: * vector and allocate a somewhat larger one, defragmenting again during
2780: * copying.
2781: */
2782: /* (For internal use, note that all exponents will fit into C longs, given
2783: * PARI's lgefint field size. When we work with them, we sometimes read
2784: * out the GEN pointer, and sometimes do an itos, whatever is more con-
2785: * venient for the task at hand.)
2786: */
2787:
2788: /*** Overview and forward declarations: ***/
2789:
2790: /* The `*where' argument in the following points into *partial at the
2791: * first of the three fields of the first occupied slot. It's there
2792: * because the caller would already know where `here' is, so we don't
2793: * want to search for it again, although it wouldn't take much time.
2794: * On the other hand, we do not preserve this from one user-interface
2795: * call to the next.
2796: */
2797:
2798: static GEN
2799: ifac_find(GEN *partial, GEN *where);
2800: /* Return GEN pointing at the first nonempty slot strictly behind the
2801: * current *where, or NULL if such doesn't exist. Can be used to skip
2802: * a range of vacant slots, or to initialize *where in the first place
2803: * (pass partial in both args). Does not modify its argument pointers.
2804: */
2805:
2806: void
2807: ifac_realloc(GEN *partial, GEN *where, long new_lg);
2808: /* Move to a larger main vector, updating *where if it points into it.
2809: * Certainly updates *partial. Can be used as a specialized gcopy before
2810: * a gerepileupto()/gerepilemanysp() (pass 0 as the new length).
2811: * Normally, one would pass new_lg=1 to let this function guess the
2812: * new size. To be used sparingly.
2813: */
2814:
2815: static long
2816: ifac_crack(GEN *partial, GEN *where);
2817: /* Split the first (composite) entry. There _must_ already be room for
2818: * another factor below *where, and *where will be updated. Factor and
2819: * cofactor will be inserted in the correct order, updating *where, or
2820: * factor^k will be inserted if such should be the case (leaving *where
2821: * unchanged). The factor or factors will be set to unknown, and inherit
2822: * the exponent (or a multiple thereof) of its/their ancestor. Returns
2823: * number of factors written into the structure (normally 2, but 1 if a
2824: * factor equalled its cofactor, and may be more than 1 if a factoring
2825: * engine returned a vector of factors instead of a single factor). Can
2826: * reallocate the data structure in the vector-of-factors case (but not
2827: * in the more common single-factor case)
2828: */
2829:
2830: static long
2831: ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec);
2832: /* Gets called to complete ifac_crack()'s job when a factoring engine
2833: * splits the current factor into a product of three or more new factors.
2834: * Makes room for them if necessary, sorts them, gives them the right
2835: * exponents and class etc. Also returns the number of factors actually
2836: * written, which may be less than the number of components in facvec
2837: * if there are duplicates.--- Vectors of factors (cf pollardbrent()
2838: * above) actually contain `slots' of three GENs per factor with the
2839: * three fields being interpreted exactly as in our partial factorization
2840: * data structure. Thus `engines' can tell us what they already happen to
2841: * know about factors being prime or composite and/or appearing to a power
2842: * larger than the first
2843: */
2844:
2845: static long
2846: ifac_divide(GEN *partial, GEN *where);
2847: /* Divide all current composites by first (prime, class Q) entry, updating
2848: * its exponent, and turning it into a finished prime (class P). Return 1
2849: * if any such divisions succeeded (in Moebius mode, the update may then
2850: * not have been completed), or 0 if none of them succeeded. Doesn't
2851: * modify *where.
2852: */
2853:
2854: static long
2855: ifac_sort_one(GEN *partial, GEN *where, GEN washere);
2856: /* re-sort one (typically unknown) entry from washere to a new position,
2857: * rotating intervening entries upward to fill the vacant space. It may
2858: * happen (rarely) that the new position is the same as the old one, or
2859: * that the new value of the entry coincides with a value already occupying
2860: * a lower slot, in which latter case we just add exponents (and use the
2861: * `more known' class, and return 1 immediately when in Moebius mode).
2862: * The slots between *where and washere must be in sorted order, so a
2863: * sweep using this to re-sort several unknowns must proceed upward (see
2864: * ifac_resort() below). Return 1 if we see an exponent > 1 (in Moebius
2865: * mode without completing the update), 0 otherwise.
2866: */
2867:
2868: static long
2869: ifac_resort(GEN *partial, GEN *where);
2870: /* sort all current unknowns downward to where they belong. Sweeps
2871: * in the upward direction. Not needed after ifac_crack(), only when
2872: * ifac_divide() returned true. May update *where. Returns 1 when an
2873: * ifac_sort_one() call does so to indicate a repeated factor, or 0 if
2874: * any and all such calls returned 0
2875: */
2876:
2877: static void
2878: ifac_defrag(GEN *partial, GEN *where);
2879: /* defragment: collect and squeeze out any unoccupied slots above *where
2880: * during a downward sweep. Unoccupied slots arise when a composite factor
2881: * dissolves completely whilst dividing off a prime, or when ifac_resort()
2882: * spots a coincidence and merges two factors. *where will be updated
2883: */
2884:
2885: static void
2886: ifac_whoiswho(GEN *partial, GEN *where, long after_crack);
2887: /* determine primality or compositeness of all current unknowns, and set
2888: * class Q primes to finished (class P) if everything larger is already
2889: * known to be prime. When after_crack is nonnegative, only look at the
2890: * first after_crack things in the list (do nothing when it's zero)
2891: */
2892:
2893: static GEN
2894: ifac_main(GEN *partial);
2895: /* main loop: iterate until smallest entry is a finished prime; returns
2896: * a `where' pointer, or gun if nothing left, or gzero in Moebius mode if
2897: * we aren't squarefree
2898: */
2899:
2900: /* NB In the most common cases, control flows from the user interface to
2901: * ifac_main() and then to a succession of ifac_crack()s and ifac_divide()s,
2902: * with (typically) none of the latter finding anything.
2903: */
2904:
2905: /** user interface: **/
2906: /* return initial data structure, see ifac_crack() below for semantics
2907: * of the hint argument
2908: */
2909: GEN
2910: ifac_start(GEN n, long moebius, long hint);
2911:
2912: /* run main loop until primary factor is found, return the prime and
2913: * assign the exponent. If nothing left, return gun and set exponent
2914: * to 0; if in Moebius mode and a square factor is discovered, return
2915: * gzero and set exponent to 0
2916: */
2917: GEN
2918: ifac_primary_factor(GEN *partial, long *exponent);
2919:
2920: /* call ifac_start() and run main loop until factorization is complete,
2921: * accumulating prime / exponent pairs on the PARI stack to be picked up
2922: * by aux_end(). Return number of distinct primes found
2923: */
2924: long
2925: ifac_decomp(GEN n, long hint);
2926:
2927: /* completely encapsulated functions; these call ifac_start() themselves,
2928: * and ensure proper stack housekeeping etc. Call them on any large
2929: * composite left over after trial division, and multiply/add the result
2930: * onto whatever you already have from the small factors. Don't call
2931: * them on large primes; they will run into trouble
2932: */
2933: long
2934: ifac_moebius(GEN n, long hint);
2935:
2936: long
2937: ifac_issquarefree(GEN n, long hint);
2938:
2939: long
2940: ifac_omega(GEN n, long hint);
2941:
2942: long
2943: ifac_bigomega(GEN n, long hint);
2944:
2945: GEN
2946: ifac_totient(GEN n, long hint); /* for gp's eulerphi() */
2947:
2948: GEN
2949: ifac_numdiv(GEN n, long hint);
2950:
2951: GEN
2952: ifac_sumdiv(GEN n, long hint);
2953:
2954: GEN
2955: ifac_sumdivk(GEN n, long k, long hint);
2956:
2957: /*** implementation ***/
2958:
2959: #define ifac_initial_length 24 /* codeword, moebius flag, hint, 7 slots */
2960: /* (more than enough in most cases -- a 512-bit product of distinct 8-bit
2961: * primes needs at most 7 slots at a time)
2962: */
2963:
2964: GEN
2965: ifac_start(GEN n, long moebius, long hint)
2966: {
2967: GEN part, here;
2968:
2969: if (typ(n) != t_INT) err(typeer, "ifac_start");
2970: if (signe(n) == 0)
2971: err(talker, "factoring 0 in ifac_start");
2972:
2973: part = cgetg(ifac_initial_length, t_VEC);
2974: here = part + ifac_initial_length;
2975: part[1] = moebius? un : (long)NULL;
2976: switch(hint)
2977: {
2978: case 0:
2979: part[2] = zero; break;
2980: case 1:
2981: part[2] = un; break;
2982: case 2:
2983: part[2] = deux; break;
2984: default:
2985: part[2] = (long)stoi(hint);
2986: }
2987: if (isonstack(n))
2988: n = absi(n);
2989: /* make copy, because we'll later want to mpdivis() into it in place.
2990: * If it's not on stack, then we assume it is a clone made for us by
2991: * auxdecomp0(), and we assume the sign has already been set positive
2992: */
2993: /* fill first slot at the top end */
2994: *--here = zero; /* initially composite */
2995: *--here = un; /* initial exponent 1 */
2996: *--here = (long) n;
2997: /* and NULL out the remaining slots */
2998: while (here > part + 3) *--here = (long)NULL;
2999: return part;
3000: }
3001:
3002: static GEN
3003: ifac_find(GEN *partial, GEN *where)
3004: {
3005: long lgp = lg(*partial);
3006: GEN end = *partial + lgp;
3007: GEN scan = *where + 3;
3008:
3009: if (DEBUGLEVEL >= 5)
3010: {
3011: if (!*partial || typ(*partial) != t_VEC)
3012: err(typeer, "ifac_find");
3013: if (lg(*partial) < ifac_initial_length)
3014: err(talker, "partial impossibly short in ifac_find");
3015: if (!(*where) ||
3016: *where > *partial + lgp - 3 ||
3017: *where < *partial) /* sic */
3018: err(talker, "`*where\' out of bounds in ifac_find");
3019: }
3020: while (scan < end && !*scan) scan += 3;
3021: /* paranoia -- check completely NULLed ? nope -- we never inspect the
3022: * exponent field for deciding whether a slot is empty or occupied
3023: */
3024: if (scan < end)
3025: {
3026: if (DEBUGLEVEL >= 5)
3027: {
3028: if (!scan[1])
3029: err(talker, "factor has NULL exponent in ifac_find");
3030: }
3031: return scan;
3032: }
3033: return NULL;
3034: }
3035:
3036: /* simple defragmenter */
3037: static void
3038: ifac_defrag(GEN *partial, GEN *where)
3039: {
3040: long lgp = lg(*partial);
3041: GEN scan_new = *partial + lgp - 3, scan_old = scan_new;
3042:
3043: while (scan_old >= *where)
3044: {
3045: if (*scan_old) /* slot occupied? */
3046: {
3047: if (scan_old < scan_new)
3048: {
3049: scan_new[2] = scan_old[2];
3050: scan_new[1] = scan_old[1];
3051: *scan_new = *scan_old;
3052: }
3053: scan_new -= 3; /* point at next slot to be written */
3054: }
3055: scan_old -= 3;
3056: }
3057: scan_new += 3; /* back up to last slot written */
3058: *where = scan_new;
3059: while (scan_new > *partial + 3)
3060: *--scan_new = (long)NULL; /* erase junk */
3061: }
3062:
3063: /* and complex version combined with reallocation. If new_lg is 0, we
3064: * use the old length, so this acts just like gcopy except that the where
3065: * pointer is carried along; if it is 1, we make an educated guess.
3066: * Exception: If new_lg is 0, the vector is full to the brim, and the
3067: * first entry is composite, we make it longer to avoid being called again
3068: * a microsecond later (at significant cost).
3069: * It is safe to call this with NULL for the where argument; if it doesn't
3070: * point anywhere within the old structure, it will be left alone
3071: */
3072: void
3073: ifac_realloc(GEN *partial, GEN *where, long new_lg)
3074: {
3075: long old_lg = lg(*partial);
3076: GEN newpart, scan_new, scan_old;
3077:
3078: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
3079: {
3080: if (!*partial || typ(*partial) != t_VEC)
3081: err(typeer, "ifac_realloc");
3082: if (lg(*partial) < ifac_initial_length)
3083: err(talker, "partial impossibly short in ifac_realloc");
3084: }
3085:
3086: if (new_lg == 1)
3087: new_lg = 2*old_lg - 6; /* from 7 slots to 13 to 25... */
3088: else if (new_lg <= old_lg) /* includes case new_lg == 0 */
3089: {
3090: new_lg = old_lg;
3091: if ((*partial)[3] && /* structure full */
3092: ((*partial)[5]==zero || (*partial)[5]==(long)NULL))
3093: /* and first entry composite or unknown */
3094: new_lg += 6; /* give it a little more breathing space */
3095: }
3096: newpart = cgetg(new_lg, t_VEC);
3097: if (DEBUGMEM >= 3)
3098: {
3099: fprintferr("IFAC: new partial factorization structure (%ld slots)\n",
3100: (new_lg - 3)/3);
3101: flusherr();
3102: }
3103: newpart[1] = (*partial)[1]; /* moebius */
3104: newpart[2] = (*partial)[2]; /* hint */
3105: /* downward sweep through the old *partial, picking up where1 and carry-
3106: * ing it over if and when we pass it. (This will only be useful if
3107: * it pointed at a non-empty slot.) Factors are licopy()d so that we
3108: * again have a nice object (parent older than children, connected),
3109: * except the one factor that may still be living in a clone where n
3110: * originally was; exponents are similarly copied if they aren't global
3111: * constants; class-of-factor fields are always global constants so we
3112: * need only copy them as pointers. Caller may then do a gerepileupto()
3113: * or a gerepilemanysp()
3114: */
3115: scan_new = newpart + new_lg - 3;
3116: scan_old = *partial + old_lg - 3;
3117: for (; scan_old > *partial + 2; scan_old -= 3)
3118: {
3119: if (*where == scan_old) *where = scan_new;
3120: if (!*scan_old) continue; /* skip empty slots */
3121:
3122: *scan_new =
3123: isonstack((GEN)(*scan_old)) ?
3124: licopy((GEN)(*scan_old)) : *scan_old;
3125: scan_new[1] =
3126: isonstack((GEN)(scan_old[1])) ?
3127: licopy((GEN)(scan_old[1])) : scan_old[1];
3128: scan_new[2] = scan_old[2];
3129: scan_new -= 3;
3130: }
3131: scan_new += 3; /* back up to last slot written */
3132: while (scan_new > newpart + 3)
3133: *--scan_new = (long)NULL;
3134: *partial = newpart;
3135: }
3136:
3137: #define moebius_mode ((*partial)[1])
3138:
3139: /* Bubble-sort-of-thing sort. Won't be exercised frequently,
3140: * so this is ok.
3141: */
3142: static long
3143: ifac_sort_one(GEN *partial, GEN *where, GEN washere)
3144: {
3145: GEN scan = washere - 3;
3146: GEN value, exponent, class0, class1;
3147: long cmp_res;
3148:
3149: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
3150: {
3151: long lgp;
3152: if (!*partial || typ(*partial) != t_VEC)
3153: err(typeer, "ifac_sort_one");
3154: if ((lgp = lg(*partial)) < ifac_initial_length)
3155: err(talker, "partial impossibly short in ifac_sort_one");
3156: if (!(*where) ||
3157: *where < *partial + 3 ||
3158: *where > *partial + lgp - 3)
3159: err(talker, "`*where\' out of bounds in ifac_sort_one");
3160: if (!washere ||
3161: washere < *where ||
3162: washere > *partial + lgp - 3)
3163: err(talker, "`washere\' out of bounds in ifac_sort_one");
3164: }
3165: value = (GEN)(*washere);
3166: exponent = (GEN)(washere[1]);
3167: if (exponent != gun && moebius_mode && cmpsi(1,exponent) < 0)
3168: return 1; /* should have been detected by caller */
3169: class0 = (GEN)(washere[2]);
3170:
3171: if (scan < *where) return 0; /* nothing to do, washere==*where */
3172:
3173: cmp_res = -1; /* sentinel */
3174: while (scan >= *where) /* therefore at least once */
3175: {
3176: if (*scan) /* current slot nonempty */
3177: {
3178: /* check against where */
3179: cmp_res = cmpii(value, (GEN)(*scan));
3180: if (cmp_res >= 0) break; /* have found where to stop */
3181: }
3182: /* copy current slot upward by one position and move pointers down */
3183: scan[5] = scan[2];
3184: scan[4] = scan[1];
3185: scan[3] = *scan;
3186: scan -= 3;
3187: }
3188: scan += 3;
3189: /* at this point there are the following possibilities:
3190: * (*) cmp_res == -1. Either value is less than that at *where, or for
3191: * some reason *where was pointing at one or more vacant slots and any
3192: * factors we saw en route were larger than value. At any rate,
3193: * scan == *where now, and scan is pointing at an empty slot, into
3194: * which we'll stash our entry.
3195: * (*) cmp_res == 0. The entry at scan-3 is the one, we compare class0
3196: * fields and add exponents, and put it all into the vacated scan slot,
3197: * NULLing the one at scan-3 (and possibly updating *where).
3198: * (*) cmp_res == 1. The slot at scan is the one to store our entry
3199: * into.
3200: */
3201: if (cmp_res != 0)
3202: {
3203: if (cmp_res < 0 && scan != *where)
3204: err(talker, "misaligned partial detected in ifac_sort_one");
3205: *scan = (long)value;
3206: scan[1] = (long)exponent;
3207: scan[2] = (long)class0;
3208: return 0;
3209: }
3210: /* case cmp_res == 0: repeated factor detected */
3211: if (DEBUGLEVEL >= 4)
3212: {
3213: fprintferr("IFAC: repeated factor %Z\n\tdetected in ifac_sort_one\n",
3214: value);
3215: flusherr();
3216: }
3217: if (moebius_mode) return 1; /* not squarefree */
3218: /* if old class0 was composite and new is prime, or vice versa,
3219: * complain (and if one class0 was unknown and the other wasn't,
3220: * use the known one)
3221: */
3222: class1 = (GEN)(scan[-1]);
3223: if (class0) /* should never be used */
3224: {
3225: if(class1)
3226: {
3227: if (class0 == gzero && class1 != gzero)
3228: err(talker, "composite equals prime in ifac_sort_one");
3229: else if (class0 != gzero && class1 == gzero)
3230: err(talker, "prime equals composite in ifac_sort_one");
3231: else if (class0 == gdeux) /* should happen even less */
3232: scan[2] = (long)class0; /* use it */
3233: }
3234: else /* shouldn't happen either */
3235: scan[2] = (long)class0; /* use it */
3236: }
3237: /* else stay with the existing known class0 */
3238: scan[2] = (long)class1;
3239: /* in any case, add exponents */
3240: if (scan[-2] == un && exponent == gun)
3241: scan[1] = deux;
3242: else
3243: scan[1] = laddii((GEN)(scan[-2]), exponent);
3244: /* move the value over */
3245: *scan = scan[-3];
3246: /* null out the vacated slot below */
3247: *--scan = (long)NULL;
3248: *--scan = (long)NULL;
3249: *--scan = (long)NULL;
3250: /* finally, see whether *where should be pulled in */
3251: if (scan == *where) *where += 3;
3252: return 0;
3253: }
3254:
3255: /* the following loop around the former doesn't need to check moebius_mode
3256: * because ifac_sort_one() never returns 1 in normal mode
3257: */
3258: static long
3259: ifac_resort(GEN *partial, GEN *where)
3260: {
3261: long lgp = lg(*partial), res = 0;
3262: GEN scan = *where;
3263:
3264: for (; scan < *partial + lgp; scan += 3)
3265: {
3266: if (*scan && /* slot occupied */
3267: !scan[2]) /* with an unknown */
3268: {
3269: res |= ifac_sort_one(partial, where, scan);
3270: if (res) return res; /* early exit */
3271: }
3272: }
3273: return res;
3274: }
3275:
3276: /* sweep downward so we can with luck turn some Qs into Ps */
3277: static void
3278: ifac_whoiswho(GEN *partial, GEN *where, long after_crack)
3279: {
3280: long lgp = lg(*partial), larger_compos = 0;
3281: GEN scan, scan_end = *partial + lgp - 3;
3282:
3283: if (DEBUGLEVEL >= 5)
3284: {
3285: if (!*partial || typ(*partial) != t_VEC)
3286: err(typeer, "ifac_whoiswho");
3287: if (lg(*partial) < ifac_initial_length)
3288: err(talker, "partial impossibly short in ifac_whoiswho");
3289: if (!(*where) ||
3290: *where > scan_end ||
3291: *where < *partial + 3)
3292: err(talker, "`*where\' out of bounds in ifac_whoiswho");
3293: }
3294:
3295: if (after_crack == 0) return;
3296: if (after_crack > 0)
3297: {
3298: larger_compos = 1; /* disable Q-to-P trick */
3299: scan = *where + 3*(after_crack - 1);
3300: /* check at most after_crack entries */
3301: if (scan > scan_end) /* ooops... */
3302: {
3303: err(warner, "avoiding nonexistent factors in ifac_whoiswho");
3304: scan = scan_end;
3305: }
3306: }
3307: else { larger_compos = 0; scan = scan_end; }
3308:
3309: for (; scan >= *where; scan -= 3)
3310: {
3311: if (scan[2]) /* known class of factor */
3312: {
3313: if (scan[2] == zero) larger_compos = 1;
3314: else if (!larger_compos && scan[2] == un)
3315: {
3316: if (DEBUGLEVEL >= 3)
3317: {
3318: fprintferr("IFAC: factor %Z\n\tis prime (no larger composite)\n",
3319: **where);
3320: fprintferr("IFAC: prime %Z\n\tappears with exponent = %ld\n",
3321: **where, itos((GEN)(*where)[1]));
3322: }
3323: scan[2] = deux;
3324: } /* no else case */
3325: continue;
3326: }
3327: scan[2] =
3328: (isprime((GEN)(*scan)) ?
3329: (larger_compos ? un : deux) : /* un- or finished prime */
3330: zero); /* composite */
3331:
3332: if (scan[2] == zero) larger_compos = 1;
3333: if (DEBUGLEVEL >= 3)
3334: {
3335: fprintferr("IFAC: factor %Z\n\tis %s\n", *scan,
3336: (scan[2] == zero ? "composite" : "prime"));
3337: }
3338: }
3339: }
3340:
3341: /* Here we normally do not check that the first entry is a not-finished
3342: * prime. Stack management: we may allocate a new exponent
3343: */
3344: static long
3345: ifac_divide(GEN *partial, GEN *where)
3346: {
3347: long lgp = lg(*partial);
3348: GEN scan = *where + 3;
3349: long res = 0, exponent, newexp, otherexp;
3350:
3351: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
3352: {
3353: if (!*partial || typ(*partial) != t_VEC)
3354: err(typeer, "ifac_divide");
3355: if (lg(*partial) < ifac_initial_length)
3356: err(talker, "partial impossibly short in ifac_divide");
3357: if (!(*where) ||
3358: *where > *partial + lgp - 3 ||
3359: *where < *partial + 3)
3360: err(talker, "`*where\' out of bounds in ifac_divide");
3361: if ((*where)[2] != un)
3362: err(talker, "division by composite or finished prime in ifac_divide");
3363: }
3364: if (!(**where)) /* always test just this one */
3365: err(talker, "division by nothing in ifac_divide");
3366:
3367: newexp = exponent = itos((GEN)((*where)[1]));
3368: if (exponent > 1 && moebius_mode) return 1;
3369: /* should've been caught by caller already */
3370:
3371: /* go for it */
3372: for (; scan < *partial + lgp; scan += 3)
3373: {
3374: if (scan[2] != zero) continue; /* the other thing ain't composite */
3375: otherexp = 0;
3376: /* let mpdivis divide the other factor in place to keep stack clutter
3377: minimal */
3378: while (mpdivis((GEN)(*scan), (GEN)(**where), (GEN)(*scan)))
3379: {
3380: if (moebius_mode) return 1; /* immediately */
3381: if (!otherexp) otherexp = itos((GEN)(scan[1]));
3382: newexp += otherexp;
3383: }
3384: if (newexp > exponent) /* did anything happen? */
3385: {
3386: (*where)[1] = (newexp == 2 ? deux : (long)(stoi(newexp)));
3387: exponent = newexp;
3388: if (is_pm1((GEN)(*scan))) /* factor dissolved completely */
3389: {
3390: *scan = scan[1] = (long)NULL;
3391: if (DEBUGLEVEL >= 4)
3392: fprintferr("IFAC: a factor was a power of another prime factor\n");
3393: }
3394: else if (DEBUGLEVEL >= 4)
3395: {
3396: fprintferr("IFAC: a factor was divisible by another prime factor,\n");
3397: fprintferr("\tleaving a cofactor = %Z\n", *scan);
3398: }
3399: scan[2] = (long)NULL; /* at any rate it's Unknown now */
3400: res = 1;
3401: if (DEBUGLEVEL >= 5)
3402: {
3403: fprintferr("IFAC: prime %Z\n\tappears at least to the power %ld\n",
3404: **where, newexp);
3405: }
3406: }
3407: } /* for */
3408: (*where)[2] = deux; /* make it a finished prime */
3409: if (DEBUGLEVEL >= 3)
3410: {
3411: fprintferr("IFAC: prime %Z\n\tappears with exponent = %ld\n",
3412: **where, newexp);
3413: }
3414: return res;
3415: }
3416:
3417:
3418: GEN mpqs(GEN N); /* in src/modules/mpqs.c, maybe a dummy,
3419: * returns a factor, or a vector of factors,
3420: * or NULL
3421: */
3422:
3423: /* The following takes the place of 2.0.9.alpha's find_factor(). */
3424:
3425: /* The meaning of the hint changes against 2.0.9.alpha to:
3426: * hint == 0 : Use our own strategy, and this should be the default
3427: * hint & 1 : Avoid mpqs(), use ellfacteur() after pollardbrent()
3428: * hint & 2 : Avoid first-stage ellfacteur() in favour of mpqs()
3429: * (which may still fall back to ellfacteur() if mpqs() is not installed
3430: * or gives up)
3431: * hint & 4 : Avoid even the pollardbrent() and squfof() stages
3432: * hint & 8 : Avoid final ellfacteur(); this may `declare' a composite
3433: * to be prime.
3434: */
3435:
3436: /* stack housekeeping: this routine may create one or more objects (a new
3437: * factor, or possibly several, and perhaps one or more new exponents > 2)
3438: * Added squfof --GN2000Oct01
3439: */
3440: static long
3441: ifac_crack(GEN *partial, GEN *where)
3442: {
3443: long hint, cmp_res, exp1 = 1, exp2 = 1, av;
3444: GEN factor = NULL, exponent;
3445:
3446: if (DEBUGLEVEL >= 5) /* none of these should ever happen */
3447: {
3448: long lgp;
3449: if (!*partial || typ(*partial) != t_VEC)
3450: err(typeer, "ifac_crack");
3451: if ((lgp = lg(*partial)) < ifac_initial_length)
3452: err(talker, "partial impossibly short in ifac_crack");
3453: if (!(*where) ||
3454: *where < *partial + 6 || /* sic -- caller must realloc first */
3455: *where > *partial + lgp - 3)
3456: err(talker, "`*where\' out of bounds in ifac_crack");
3457: if (!(**where) || typ((GEN)(**where)) != t_INT)
3458: err(typeer, "ifac_crack");
3459: if ((*where)[2] != zero)
3460: err(talker, "operand not known composite in ifac_crack");
3461: }
3462: hint = itos((GEN)((*partial)[2])) & 15;
3463: exponent = (GEN)((*where)[1]);
3464:
3465: if (DEBUGLEVEL >= 3)
3466: fprintferr("IFAC: cracking composite\n\t%Z\n", **where);
3467:
3468: /* crack squares. Quite fast due to the initial square residue test */
3469: if (DEBUGLEVEL >= 4)
3470: fprintferr("IFAC: checking for pure square\n");
3471: av = avma;
3472: while (carrecomplet((GEN)(**where), &factor))
3473: {
3474: if (DEBUGLEVEL >= 4)
3475: fprintferr("IFAC: found %Z =\n\t%Z ^2\n", **where, factor);
3476: affii(factor, (GEN)(**where)); avma = av; factor = NULL;
3477: if (exponent == gun)
3478: (*where)[1] = deux;
3479: else if (exponent == gdeux)
3480: { (*where)[1] = (long)stoi(4); av = avma; }
3481: else
3482: { affii(shifti(exponent, 1), (GEN)((*where)[1])); avma = av; }
3483: exponent = (GEN)((*where)[1]);
3484: if (moebius_mode) return 0; /* no need to carry on... */
3485: exp1 = 2;
3486: } /* while carrecomplet */
3487:
3488: /* check whether our composite hasn't become prime */
3489: if (exp1 > 1 && hint != 15 && isprime((GEN)(**where)))
3490: {
3491: (*where)[2] = un;
3492: if (DEBUGLEVEL >= 4)
3493: {
3494: fprintferr("IFAC: factor %Z\n\tis prime\n",**where);
3495: flusherr();
3496: }
3497: return 0; /* bypass subsequent ifac_whoiswho() call */
3498: }
3499: /* still composite -- carry on */
3500:
3501: /* MPQS cannot factor prime powers; check for cubes/5th/7th powers.
3502: * Do this even if MPQS is blocked by hint -- it still serves a useful
3503: * purpose in bounded factorization
3504: */
3505: {
3506: long mask = 7;
3507: if (DEBUGLEVEL == 4)
3508: fprintferr("IFAC: checking for odd power\n");
3509: /* (At debug levels > 4, is_odd_power() itself prints something more
3510: * informative)
3511: */
3512: av = avma;
3513: while ((exp1 = /* assignment */
3514: is_odd_power((GEN)(**where), &factor, &mask)))
3515: {
3516: if (exp2 == 1) exp2 = exp1; /* remember this after the loop */
3517: if (DEBUGLEVEL >= 4)
3518: fprintferr("IFAC: found %Z =\n\t%Z ^%ld\n", **where, factor, exp1);
3519: affii(factor, (GEN)(**where)); avma = av; factor = NULL;
3520: if (exponent == gun)
3521: { (*where)[1] = (long)stoi(exp1); av = avma; }
3522: else if (exponent == gdeux)
3523: { (*where)[1] = (long)stoi(exp1<<1); av = avma; }
3524: else
3525: { affii(mulsi(exp1, exponent), (GEN)((*where)[1])); avma = av; }
3526: exponent = (GEN)((*where)[1]);
3527: if (moebius_mode) return 0; /* no need to carry on... */
3528: } /* while is_odd_power */
3529:
3530: if (exp2 > 1 && hint != 15 && isprime((GEN)(**where)))
3531: { /* Something nice has happened and our composite has become prime */
3532: (*where)[2] = un;
3533: if (DEBUGLEVEL >= 4)
3534: {
3535: fprintferr("IFAC: factor %Z\n\tis prime\n", **where);
3536: flusherr();
3537: }
3538: return 0; /* bypass subsequent ifac_whoiswho() call */
3539: }
3540: } /* odd power stage */
3541:
3542: /* pollardbrent() Rho usually gets a first chance */
3543: if (!(hint & 4))
3544: {
3545: if (DEBUGLEVEL >= 4)
3546: fprintferr("IFAC: trying Pollard-Brent rho method first\n");
3547: factor = pollardbrent((GEN)(**where));
3548: } /* Rho stage */
3549:
3550: /* Shanks' squfof() is next. It will pass up the chance silently when
3551: * the input number is too large. We put this under the same governing
3552: * bit of the hint parameter, for no very good reason other than avoiding
3553: * a proliferation of further meaningful bits in all the wrong order.
3554: */
3555: if (!factor && !(hint & 4))
3556: {
3557: if (DEBUGLEVEL >= 4)
3558: fprintferr("IFAC: trying Shanks' SQUFOF, will fail silently if input\n"
3559: " is too large for it.\n");
3560: factor = squfof((GEN)(**where), 0); /* allow squfof's own diagnostics */
3561: }
3562:
3563: /* if this didn't work, try one of our high-power beasties */
3564: if (!factor && !(hint & 2))
3565: {
3566: if (DEBUGLEVEL >= 4)
3567: fprintferr("IFAC: trying Lenstra-Montgomery ECM\n");
3568: factor = ellfacteur((GEN)(**where), 0); /* do not insist */
3569: } /* First ECM stage */
3570:
3571: if (!factor && !(hint & 1))
3572: {
3573: if (DEBUGLEVEL >= 4)
3574: fprintferr("IFAC: trying Multi-Polynomial Quadratic Sieve\n");
3575: factor = mpqs((GEN)(**where));
3576: } /* MPQS stage */
3577:
3578: if (!factor)
3579: {
3580: if (!(hint & 8)) /* still no luck? force it */
3581: {
3582: if (DEBUGLEVEL >= 4)
3583: fprintferr("IFAC: forcing ECM, may take some time\n");
3584: factor = ellfacteur((GEN)(**where), 1);
3585: } /* final ECM stage, guaranteed to succeed */
3586: else /* limited factorization */
3587: {
3588: if (DEBUGLEVEL >= 2)
3589: {
3590: err(warner, "IFAC: unfactored composite declared prime");
3591: /* don't print it out at level 3 or above, where it would appear
3592: * several times before and after this message already
3593: */
3594: if (DEBUGLEVEL == 2)
3595: {
3596: fprintferr("\t%Z\n",**where);
3597: flusherr();
3598: }
3599: }
3600: (*where)[2] = un; /* might as well trial-divide by it... */
3601: return 1;
3602: }
3603: } /* Final ECM stage */
3604:
3605: if (DEBUGLEVEL >= 1)
3606: {
3607: if (!factor) /* never reached */
3608: err(talker, "all available factoring methods failed in ifac_crack");
3609: }
3610: if (typ(factor) == t_VEC) /* delegate this case */
3611: return ifac_insert_multiplet(partial, where, factor);
3612:
3613: else if (typ(factor) != t_INT)
3614: {
3615: fprintferr("IFAC: factorizer returned strange object to ifac_crack\n");
3616: outerr(factor);
3617: err(bugparier, "factoring");
3618: }
3619:
3620: /* got single integer back: work out the cofactor (in place) */
3621: if (!mpdivis((GEN)(**where), factor, (GEN)(**where)))
3622: {
3623: fprintferr("IFAC: factoring %Z\n", **where);
3624: fprintferr("\tyielded `factor\' %Z\n\twhich isn't!\n", factor);
3625: err(bugparier, "factoring");
3626: }
3627:
3628: /* the factoring engines report the factor found when DEBUGLEVEL is
3629: * large enough; let's tell about the cofactor
3630: */
3631: if (DEBUGLEVEL >= 4)
3632: fprintferr("IFAC: cofactor = %Z\n", **where);
3633:
3634: /* ok, now `factor' is one factor and **where is the other, find out
3635: * which is larger
3636: */
3637: cmp_res = cmpii(factor, (GEN)(**where));
3638: if (cmp_res < 0) /* common case */
3639: {
3640: (*where)[2] = (long)NULL; /* mark cofactor `unknown' */
3641: (*where)[-1] = (long)NULL; /* mark factor `unknown' */
3642: (*where)[-2] =
3643: isonstack(exponent) ? licopy(exponent) : (long)exponent;
3644: *where -= 3;
3645: **where = (long)factor;
3646: return 2;
3647: }
3648: else if (cmp_res == 0) /* hep, split a square in the middle */
3649: {
3650: err(warner,
3651: "square not found by carrecomplet, ifac_crack recovering");
3652: cgiv(factor);
3653: (*where)[2] = (long)NULL; /* mark the sqrt `unknown' */
3654: if (exponent == gun) /* double the exponent */
3655: (*where)[1] = deux;
3656: else if (exponent == gdeux)
3657: (*where)[1] = (long)stoi(4); /* make a new one */
3658: else /* overwrite old exponent */
3659: {
3660: av = avma;
3661: affii(shifti(exponent, 1), (GEN)((*where)[1]));
3662: avma = av;
3663: /* leave *where unchanged */
3664: }
3665: if (moebius_mode) return 0;
3666: return 1;
3667: }
3668: else /* factor > cofactor, rearrange */
3669: {
3670: (*where)[2] = (long)NULL; /* mark factor `unknown' */
3671: (*where)[-1] = (long)NULL; /* mark cofactor `unknown' */
3672: (*where)[-2] =
3673: isonstack(exponent) ? licopy(exponent) : (long)exponent;
3674: *where -= 3;
3675: **where = (*where)[3]; /* move cofactor pointer to lowest slot */
3676: (*where)[3] = (long)factor; /* save factor */
3677: return 2;
3678: }
3679: }
3680:
3681: /* the following doesn't collect garbage; caller's caller should do it
3682: * (which means ifac_main()). No diagnostics either, the factoring engine
3683: * should have printed what it found when DEBUGLEVEL>=4 or so. Note facvec
3684: * contains slots of three components per factor; repeated factors are
3685: * expressly allowed (and their classes shouldn't contradict each other
3686: * whereas their exponents will be added up)
3687: */
3688: static long
3689: ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec)
3690: {
3691: long j,k=1, lfv=lg(facvec)-1, nf=lfv/3, room=(long)(*where-*partial);
3692: /* one of the factors will go into the *where slot, so room is now
3693: * 3 times the number of slots we can use
3694: */
3695: long needroom = lfv - room;
3696: GEN sorted, auxvec = cgetg(nf+1, t_VEC), factor;
3697: long exponent = itos((GEN)((*where)[1])); /* the old exponent */
3698: GEN newexp;
3699:
3700: if (DEBUGLEVEL >= 5)
3701: fprintferr("IFAC: incorporating set of %ld factor(s)%s\n",
3702: nf, (DEBUGLEVEL >=6 ? "..." : ""));
3703: /* fixed: squfof may return a single, squared, factor as a set
3704: * --GN2000Oct01
3705: */
3706: if (needroom > 0)
3707: ifac_realloc(partial, where, lg(*partial) + needroom + 3);
3708: /* one extra slot for paranoia, errm, future use */
3709:
3710: /* create sort permutation from the values of the factors */
3711: for (j=nf; j; j--) auxvec[j] = facvec[3*j-2]; /* just the pointers */
3712: sorted = sindexsort(auxvec);
3713: /* and readjust the result for the triple spacing */
3714: for (j=nf; j; j--) sorted[j] = 3*sorted[j]-2;
3715: if (DEBUGLEVEL >= 6)
3716: fprintferr("\tsorted them...\n");
3717:
3718: /* store factors, beginning at *where, and catching any duplicates */
3719: **where = facvec[sorted[nf]];
3720: if ((newexp = (GEN)(facvec[sorted[nf]+1])) != gun) /* new exponent > 1 */
3721: {
3722: if (exponent == 1)
3723: (*where)[1] = isonstack(newexp) ? licopy(newexp) : (long)newexp;
3724: else
3725: (*where)[1] = lmulsi(exponent, newexp);
3726: } /* if new exponent is 1, the old exponent already in place will do */
3727: (*where)[2] = facvec[sorted[nf]+2]; /* copy class */
3728: if (DEBUGLEVEL >= 6)
3729: fprintferr("\tstored (largest) factor no. %ld...\n", nf);
3730:
3731: for (j=nf-1; j; j--)
3732: {
3733: factor = (GEN)(facvec[sorted[j]]);
3734: if (egalii(factor, (GEN)(**where)))
3735: {
3736: if (DEBUGLEVEL >= 6)
3737: fprintferr("\tfactor no. %ld is a duplicate%s\n",
3738: j, (j>1 ? "..." : ""));
3739: /* update exponent, ignore class which would already have been set,
3740: * and then forget current factor
3741: */
3742: if ((newexp = (GEN)(facvec[sorted[j]+1])) != gun) /* new exp > 1 */
3743: { /* now we have at least 3 */
3744: (*where)[1] = laddii((GEN)((*where)[1]),
3745: mulsi(exponent, newexp));
3746: }
3747: else
3748: {
3749: if ((*where)[1] == un && exponent == 1)
3750: (*where)[1] = deux;
3751: else
3752: (*where)[1] = laddsi(exponent, (GEN)((*where)[1]));
3753: /* not safe to add 1 in place -- that might overwrite gdeux,
3754: * with `interesting' consequences
3755: */
3756: }
3757: if (moebius_mode) return 0; /* stop now, but with exponent updated */
3758: continue;
3759: }
3760: (*where)[-1] = facvec[sorted[j]+2]; /* class as given */
3761: if ((newexp = (GEN)(facvec[sorted[j]+1])) != gun) /* new exp > 1 */
3762: {
3763: if (exponent == 1 && newexp == gdeux)
3764: (*where)[-2] = deux;
3765: else /* exponent*newexp > 2 */
3766: (*where)[-2] = lmulsi(exponent, newexp);
3767: }
3768: else
3769: {
3770: (*where)[-2] = (exponent == 1 ? un :
3771: (exponent == 2 ? deux :
3772: (long)stoi(exponent))); /* inherit parent's exponent */
3773: }
3774: (*where)[-3] = isonstack(factor) ? licopy(factor) : (long)factor;
3775: /* keep components younger than *partial */
3776: *where -= 3;
3777: k++;
3778: if (DEBUGLEVEL >= 6)
3779: fprintferr("\tfactor no. %ld was unique%s\n",
3780: j, (j>1 ? " (so far)..." : ""));
3781: }
3782: /* make the `sorted' object safe for garbage collection (probably not a
3783: * problem, since it should be in the garbage zone from everybody's
3784: * perspective, but it's easy to do it)
3785: */
3786: *sorted = evaltyp(t_INT) | evallg(nf+1);
3787: return k;
3788: }
3789:
3790: static GEN
3791: ifac_main(GEN *partial)
3792: {
3793: /* leave the basic error checking to ifac_find() */
3794: GEN here = ifac_find(partial, partial);
3795: long res, nf;
3796:
3797: /* if nothing left, return gun */
3798: if (!here) return gun;
3799:
3800: /* if we are in Moebius mode and have already detected a repeated factor,
3801: * stop right here. Shouldn't really happen
3802: */
3803: if (moebius_mode && here[1] != un)
3804: {
3805: if (DEBUGLEVEL >= 3)
3806: {
3807: fprintferr("IFAC: main loop: repeated old factor\n\t%Z\n", *here);
3808: flusherr();
3809: }
3810: return gzero;
3811: }
3812:
3813: /* loop until first entry is a finished prime. May involve reallocations
3814: * and thus updates of *partial
3815: */
3816: while (here[2] != deux)
3817: {
3818: /* if it's unknown, something has gone wrong; try to recover */
3819: if (!(here[2]))
3820: {
3821: err(warner, "IFAC: unknown factor seen in main loop");
3822: res = ifac_resort(partial, &here);
3823: if (res) return gzero; /* can only happen in Moebius mode */
3824: ifac_whoiswho(partial, &here, -1);
3825: /* defrag for good measure */
3826: ifac_defrag(partial, &here);
3827: continue;
3828: }
3829: /* if it's composite, crack it */
3830: if (here[2] == zero)
3831: {
3832: /* make sure there's room for another factor */
3833: if (here < *partial + 6)
3834: { /* try defrag first */
3835: ifac_defrag(partial, &here);
3836: if (here < *partial + 6) /* no luck */
3837: {
3838: ifac_realloc(partial, &here, 1); /* guaranteed to work */
3839: /* Unfortunately, we can't do a garbage collection here since we
3840: * know too little about where in the stack the old components
3841: * were.
3842: */
3843: }
3844: }
3845: nf = ifac_crack(partial, &here);
3846: if (moebius_mode && here[1] != un) /* that was a power */
3847: {
3848: if (DEBUGLEVEL >= 3)
3849: {
3850: fprintferr("IFAC: main loop: repeated new factor\n\t%Z\n", *here);
3851: flusherr();
3852: }
3853: return gzero;
3854: }
3855: /* deal with the new unknowns. No resort, since ifac_crack will
3856: * already have sorted them
3857: */
3858: ifac_whoiswho(partial, &here, nf);
3859: continue;
3860: }
3861: /* if it's prime but not yet finished, finish it */
3862: if (here[2] == un)
3863: {
3864: res = ifac_divide(partial, &here);
3865: if (res)
3866: {
3867: if (moebius_mode)
3868: {
3869: if (DEBUGLEVEL >= 3)
3870: {
3871: fprintferr("IFAC: main loop: another factor was divisible by\n");
3872: fprintferr("\t%Z\n", *here); flusherr();
3873: }
3874: return gzero;
3875: }
3876: ifac_defrag(partial, &here);
3877: (void)(ifac_resort(partial, &here)); /* sort new cofactors down */
3878: /* it doesn't matter right now whether this finds a repeated factor,
3879: * since we never get to this point in Moebius mode
3880: */
3881: ifac_defrag(partial, &here); /* resort may have created new gaps */
3882: ifac_whoiswho(partial, &here, -1);
3883: }
3884: continue;
3885: }
3886: /* there are no other cases, never reached */
3887: err(talker, "non-existent factor class in ifac_main");
3888: } /* while */
3889: if (moebius_mode && here[1] != un)
3890: {
3891: if (DEBUGLEVEL >= 3)
3892: {
3893: fprintferr("IFAC: after main loop: repeated old factor\n\t%Z\n", *here);
3894: flusherr();
3895: }
3896: return gzero; /* just a safety net */
3897: }
3898: if (DEBUGLEVEL >= 4)
3899: {
3900: long nf = (*partial + lg(*partial) - here - 3)/3;
3901: if (nf)
3902: fprintferr("IFAC: main loop: %ld factor%s left\n",
3903: nf, (nf>1 ? "s" : ""));
3904: else
3905: fprintferr("IFAC: main loop: this was the last factor\n");
3906: flusherr();
3907: }
3908: return here;
3909: }
3910:
3911: /* Caller of the following should worry about stack management, it makes
3912: * a rather shameless mess :^)
3913: */
3914: GEN
3915: ifac_primary_factor(GEN *partial, long *exponent)
3916: {
3917: GEN here = ifac_main(partial);
3918: GEN res;
3919:
3920: if (here == gun) { *exponent = 0; return gun; }
3921: else if (here == gzero) { *exponent = 0; return gzero; }
3922:
3923: res = icopy((GEN)(*here));
3924: *exponent = itos((GEN)(here[1]));
3925: here[2] = here[1] = *here = (long)NULL;
3926: return res;
3927: }
3928:
3929: /* encapsulated routines */
3930:
3931: /* prime/exponent pairs need to appear contiguously on the stack, but we
3932: * also need to have our data structure somewhere, and we don't know in
3933: * advance how many primes will turn up. The following discipline achieves
3934: * this: When ifac_decomp() is called, n should point at an object older
3935: * than the oldest small prime/exponent pair (auxdecomp0() guarantees
3936: * this easily since it mpdivis()es any divisors it discovers off its own
3937: * copy of the original N). We allocate sufficient space to accommodate
3938: * several pairs -- eleven pairs ought to fit in a space not much larger
3939: * than n itself -- before calling ifac_start(). If we manage to complete
3940: * the factorization before we run out of space, we free the data structure
3941: * and cull the excess reserved space before returning. When we do run out,
3942: * we have to leapfrog to generate more (guesstimating the requirements
3943: * from what is left in the partial factorization structure); room for
3944: * fresh pairs is allocated at the head of the stack, followed by an
3945: * ifac_realloc() to reconnect the data structure and move it out of the
3946: * way, followed by a few pointer tweaks to connect the new pairs space
3947: * to the old one.-- This whole affair translates into a surprisingly
3948: * compact little routine.
3949: */
3950:
3951: #define ifac_overshoot 64 /* lgefint(n)+64 words reserved */
3952: /* ifac_decomp_break:
3953: *
3954: * Find primary factors of n until ifac_break return true, or n is
3955: * factored if ifac_break is NULL.
3956: */
3957: /* ifac_break:
3958: *
3959: * state is for state management of the function, and depend of the
3960: * function. ifac_break is called initially in decomp_break with
3961: * here=NULL. This allows the function to see the new value of n.
3962: * return 1: stop factoring, 0 continue. If ifac_break is NULL,
3963: * assumed to always return 0. ifac_break must not let anything on the
3964: * stack. However data can be stored in state
3965: */
3966:
3967: long
3968: ifac_decomp_break(GEN n, long (*ifac_break)(GEN n,GEN pairs,GEN here,GEN state),
3969: GEN state, long hint)
3970: {
3971: long tf=lgefint(n), av=avma, lim=stack_lim(av,1);
3972: long nb=0;
3973: GEN part, here, workspc = new_chunk(tf + ifac_overshoot), pairs = (GEN)av;
3974: /* workspc will be doled out by us in pairs of smaller t_INTs */
3975: long tetpil = avma; /* remember head of workspc zone */
3976:
3977: if (!n || typ(n) != t_INT) err(typeer, "ifac_decomp");
3978: if (!signe(n) || tf < 3) err(talker, "factoring 0 in ifac_decomp");
3979:
3980: part = ifac_start(n, 0, hint);
3981: here = ifac_main(&part);
3982:
3983: while (here != gun)
3984: {
3985: long lf=lgefint((GEN)(*here));
3986: if (pairs - workspc < lf + 3) /* out of room, leapfrog */
3987: {
3988: /* the ifac_realloc() below will clear tetpil - avma words
3989: * on the stack, which should be about enough for the extra
3990: * primes we're going to see, and we'll want several more to
3991: * accommodate further exponents. In most cases, the lf + 3
3992: * below is pure paranoia, but the factor we're about to copy
3993: * might be the one sitting off the stack in the original n,
3994: * so let's play safe
3995: */
3996: workspc = new_chunk(lf + 3 + ifac_overshoot);
3997: ifac_realloc(&part, &here, 0);
3998: here = ifac_find(&part, &part);
3999: tetpil = (long)workspc;
4000: }
4001: /* room enough now */
4002: nb++;
4003: pairs -= lf;
4004: *pairs = evaltyp(t_INT) | evallg(lf);
4005: affii((GEN)(*here), pairs);
4006: pairs -= 3;
4007: *pairs = evaltyp(t_INT) | evallg(3);
4008: affii((GEN)(here[1]), pairs);
4009: if (ifac_break && (*ifac_break)(n,pairs,here,state))
4010: {
4011: if (DEBUGLEVEL >= 3)
4012: fprintferr("IFAC: (Partial fact.)Stop requested.\n");
4013: break;
4014: }
4015: here[2] = here[1] = *here = (long)NULL;
4016: here = ifac_main(&part);
4017: if (low_stack(lim, stack_lim(av,1)))
4018: {
4019: if(DEBUGMEM>1) err(warnmem,"[2] ifac_decomp");
4020: ifac_realloc(&part, &here, 0);
4021: part = gerepileupto(tetpil, part);
4022: }
4023: }
4024: avma = (long)pairs;
4025: if (DEBUGLEVEL >= 3)
4026: {
4027: fprintferr("IFAC: found %ld large prime (power) factor%s.\n",
4028: nb, (nb>1? "s": ""));
4029: flusherr();
4030: }
4031: return nb;
4032: }
4033:
4034: long
4035: ifac_decomp(GEN n, long hint)
4036: {
4037: return ifac_decomp_break(n, NULL, gzero, hint);
4038: }
4039:
4040: long
4041: ifac_moebius(GEN n, long hint)
4042: {
4043: long mu=1, av=avma, lim=stack_lim(av,1);
4044: GEN part = ifac_start(n, 1, hint);
4045: GEN here = ifac_main(&part);
4046:
4047: while (here != gun && here != gzero)
4048: {
4049: if (itos((GEN)(here[1])) > 1)
4050: { here = gzero; break; } /* shouldn't happen */
4051: mu = -mu;
4052: here[2] = here[1] = *here = (long)NULL;
4053: here = ifac_main(&part);
4054: if (low_stack(lim, stack_lim(av,1)))
4055: {
4056: if(DEBUGMEM>1) err(warnmem,"ifac_moebius");
4057: ifac_realloc(&part, &here, 0);
4058: part = gerepileupto(av, part);
4059: }
4060: }
4061: avma = av;
4062: return (here == gun ? mu : 0);
4063: }
4064:
4065: long
4066: ifac_issquarefree(GEN n, long hint)
4067: {
4068: long av=avma, lim=stack_lim(av,1);
4069: GEN part = ifac_start(n, 1, hint);
4070: GEN here = ifac_main(&part);
4071:
4072: while (here != gun && here != gzero)
4073: {
4074: if (itos((GEN)(here[1])) > 1)
4075: { here = gzero; break; } /* shouldn't happen */
4076: here[2] = here[1] = *here = (long)NULL;
4077: here = ifac_main(&part);
4078: if (low_stack(lim, stack_lim(av,1)))
4079: {
4080: if(DEBUGMEM>1) err(warnmem,"ifac_issquarefree");
4081: ifac_realloc(&part, &here, 0);
4082: part = gerepileupto(av, part);
4083: }
4084: }
4085: avma = av;
4086: return (here == gun ? 1 : 0);
4087: }
4088:
4089: long
4090: ifac_omega(GEN n, long hint)
4091: {
4092: long omega=0, av=avma, lim=stack_lim(av,1);
4093: GEN part = ifac_start(n, 0, hint);
4094: GEN here = ifac_main(&part);
4095:
4096: while (here != gun)
4097: {
4098: omega++;
4099: here[2] = here[1] = *here = (long)NULL;
4100: here = ifac_main(&part);
4101: if (low_stack(lim, stack_lim(av,1)))
4102: {
4103: if(DEBUGMEM>1) err(warnmem,"ifac_omega");
4104: ifac_realloc(&part, &here, 0);
4105: part = gerepileupto(av, part);
4106: }
4107: }
4108: avma = av;
4109: return omega;
4110: }
4111:
4112: long
4113: ifac_bigomega(GEN n, long hint)
4114: {
4115: long Omega=0, av=avma, lim=stack_lim(av,1);
4116: GEN part = ifac_start(n, 0, hint);
4117: GEN here = ifac_main(&part);
4118:
4119: while (here != gun)
4120: {
4121: Omega += itos((GEN)(here[1]));
4122: here[2] = here[1] = *here = (long)NULL;
4123: here = ifac_main(&part);
4124: if (low_stack(lim, stack_lim(av,1)))
4125: {
4126: if(DEBUGMEM>1) err(warnmem,"ifac_bigomega");
4127: ifac_realloc(&part, &here, 0);
4128: part = gerepileupto(av, part);
4129: }
4130: }
4131: avma = av;
4132: return Omega;
4133: }
4134:
4135: GEN
4136: ifac_totient(GEN n, long hint)
4137: {
4138: GEN res = cgeti(lgefint(n));
4139: long exponent, av=avma, tetpil, lim=stack_lim(av,1);
4140: GEN phi = gun;
4141: GEN part = ifac_start(n, 0, hint);
4142: GEN here = ifac_main(&part);
4143:
4144: while (here != gun)
4145: {
4146: phi = mulii(phi, addsi(-1, (GEN)(*here)));
4147: if (here[1] != un)
4148: {
4149: if (here[1] == deux)
4150: {
4151: phi = mulii(phi, (GEN)(*here));
4152: }
4153: else
4154: {
4155: exponent = itos((GEN)(here[1]));
4156: phi = mulii(phi, gpowgs((GEN)(*here), exponent-1));
4157: }
4158: }
4159: here[2] = here[1] = *here = (long)NULL;
4160: here = ifac_main(&part);
4161: if (low_stack(lim, stack_lim(av,1)))
4162: {
4163: GEN *gsav[2];
4164: if(DEBUGMEM>1) err(warnmem,"ifac_totient");
4165: tetpil = avma;
4166: ifac_realloc(&part, &here, 0);
4167: phi = icopy(phi);
4168: gsav[0] = φ gsav[1] = ∂
4169: gerepilemanysp(av, tetpil, gsav, 2);
4170: /* don't try to preserve here, safer to pick it up again
4171: * (and ifac_find does a lot of sanity checking at high
4172: * debuglevels)
4173: */
4174: here = ifac_find(&part, &part);
4175: }
4176: }
4177: affii(phi, res);
4178: avma = av;
4179: return res;
4180: }
4181:
4182: GEN
4183: ifac_numdiv(GEN n, long hint)
4184: {
4185: /* we don't preallocate since it's too hard to guess the right
4186: * size here
4187: */
4188: GEN res;
4189: long av=avma, tetpil, lim=stack_lim(av,1);
4190: GEN exponent, tau = gun;
4191: GEN part = ifac_start(n, 0, hint);
4192: GEN here = ifac_main(&part);
4193:
4194: while (here != gun)
4195: {
4196: exponent = (GEN)(here[1]);
4197: tau = mulii(tau, addsi(1, exponent));
4198: here[2] = here[1] = *here = (long)NULL;
4199: here = ifac_main(&part);
4200: if (low_stack(lim, stack_lim(av,1)))
4201: {
4202: GEN *gsav[2];
4203: if(DEBUGMEM>1) err(warnmem,"ifac_numdiv");
4204: tetpil = avma;
4205: ifac_realloc(&part, &here, 0);
4206: tau = icopy(tau);
4207: gsav[0] = τ gsav[1] = ∂
4208: gerepilemanysp(av, tetpil, gsav, 2);
4209: /* (see ifac_totient()) */
4210: here = ifac_find(&part, &part);
4211: }
4212: }
4213: tetpil = avma;
4214: res = icopy(tau);
4215: return gerepile(av, tetpil, res);
4216: }
4217:
4218: GEN
4219: ifac_sumdiv(GEN n, long hint)
4220: {
4221: /* don't preallocate */
4222: GEN res;
4223: long exponent, av=avma, tetpil, lim=stack_lim(av,1);
4224: GEN contrib, sigma = gun;
4225: GEN part = ifac_start(n, 0, hint);
4226: GEN here = ifac_main(&part);
4227:
4228: while (here != gun)
4229: {
4230: exponent = itos((GEN)(here[1]));
4231: contrib = addsi(1, (GEN)(*here));
4232: for (; exponent > 1; exponent--)
4233: contrib = addsi(1, mulii((GEN)(*here), contrib));
4234: sigma = mulii(sigma, contrib);
4235: here[2] = here[1] = *here = (long)NULL;
4236: here = ifac_main(&part);
4237: if (low_stack(lim, stack_lim(av,1)))
4238: {
4239: GEN *gsav[2];
4240: if(DEBUGMEM>1) err(warnmem,"ifac_sumdiv");
4241: tetpil = avma;
4242: ifac_realloc(&part, &here, 0);
4243: sigma = icopy(sigma);
4244: gsav[0] = σ gsav[1] = ∂
4245: gerepilemanysp(av, tetpil, gsav, 2);
4246: /* (see ifac_totient()) */
4247: here = ifac_find(&part, &part);
4248: }
4249: }
4250: tetpil = avma;
4251: res = icopy(sigma);
4252: return gerepile(av, tetpil, res);
4253: }
4254:
4255: /* k should be positive, and indeed it had better be > 1 (not checked).
4256: * The calling function knows what to do with the other cases.
4257: */
4258: GEN
4259: ifac_sumdivk(GEN n, long k, long hint)
4260: {
4261: /* don't preallocate */
4262: GEN res;
4263: long exponent, av=avma, tetpil, lim=stack_lim(av,1);
4264: GEN contrib, q, sigma = gun;
4265: GEN part = ifac_start(n, 0, hint);
4266: GEN here = ifac_main(&part);
4267:
4268: while (here != gun)
4269: {
4270: exponent = itos((GEN)(here[1]));
4271: q = gpowgs((GEN)(*here), k);
4272: contrib = addsi(1, q);
4273: for (; exponent > 1; exponent--)
4274: contrib = addsi(1, mulii(q, contrib));
4275: sigma = mulii(sigma, contrib);
4276: here[2] = here[1] = *here = (long)NULL;
4277: here = ifac_main(&part);
4278: if (low_stack(lim, stack_lim(av,1)))
4279: {
4280: GEN *gsav[2];
4281: if(DEBUGMEM>1) err(warnmem,"ifac_sumdivk");
4282: tetpil = avma;
4283: ifac_realloc(&part, &here, 0);
4284: sigma = icopy(sigma);
4285: gsav[0] = σ gsav[1] = ∂
4286: gerepilemanysp(av, tetpil, gsav, 2);
4287: /* (see ifac_totient()) */
4288: here = ifac_find(&part, &part);
4289: }
4290: }
4291: tetpil = avma;
4292: res = icopy(sigma);
4293: return gerepile(av, tetpil, res);
4294: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>