Annotation of OpenXM_contrib/gmp/mpn/generic/fib2_ui.c, Revision 1.1.1.1
1.1 ohara 1: /* mpn_fib2_ui -- calculate Fibonacci numbers.
2:
3: Copyright 2001, 2002 Free Software Foundation, Inc.
4:
5: This file is part of the GNU MP Library.
6:
7: The GNU MP Library is free software; you can redistribute it and/or modify
8: it under the terms of the GNU Lesser General Public License as published by
9: the Free Software Foundation; either version 2.1 of the License, or (at your
10: option) any later version.
11:
12: The GNU MP Library is distributed in the hope that it will be useful, but
13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15: License for more details.
16:
17: You should have received a copy of the GNU Lesser General Public License
18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: MA 02111-1307, USA. */
21:
22: #include <stdio.h>
23: #include "gmp.h"
24: #include "gmp-impl.h"
25: #include "longlong.h"
26:
27:
28: /* change this to "#define TRACE(x) x" for diagnostics */
29: #define TRACE(x)
30:
31:
32: /* The following table was generated by code at the end of this file. */
33:
34: const mp_limb_t
35: __gmp_fib_table[FIB_TABLE_LIMIT+2] = {
36:
37: #if GMP_NUMB_BITS >= 4
38: CNST_LIMB (0x1), /* -1 */
39: CNST_LIMB (0x0), /* 0 */
40: CNST_LIMB (0x1), /* 1 */
41: CNST_LIMB (0x1), /* 2 */
42: CNST_LIMB (0x2), /* 3 */
43: CNST_LIMB (0x3), /* 4 */
44: CNST_LIMB (0x5), /* 5 */
45: CNST_LIMB (0x8), /* 6 */
46: CNST_LIMB (0xD), /* 7 */
47: #endif
48: #if GMP_NUMB_BITS >= 8
49: CNST_LIMB (0x15), /* 8 */
50: CNST_LIMB (0x22), /* 9 */
51: CNST_LIMB (0x37), /* 10 */
52: CNST_LIMB (0x59), /* 11 */
53: CNST_LIMB (0x90), /* 12 */
54: CNST_LIMB (0xE9), /* 13 */
55: #endif
56: #if GMP_NUMB_BITS >= 16
57: CNST_LIMB (0x179), /* 14 */
58: CNST_LIMB (0x262), /* 15 */
59: CNST_LIMB (0x3DB), /* 16 */
60: CNST_LIMB (0x63D), /* 17 */
61: CNST_LIMB (0xA18), /* 18 */
62: CNST_LIMB (0x1055), /* 19 */
63: CNST_LIMB (0x1A6D), /* 20 */
64: CNST_LIMB (0x2AC2), /* 21 */
65: CNST_LIMB (0x452F), /* 22 */
66: CNST_LIMB (0x6FF1), /* 23 */
67: CNST_LIMB (0xB520), /* 24 */
68: #endif
69: #if GMP_NUMB_BITS >= 32
70: CNST_LIMB (0x12511), /* 25 */
71: CNST_LIMB (0x1DA31), /* 26 */
72: CNST_LIMB (0x2FF42), /* 27 */
73: CNST_LIMB (0x4D973), /* 28 */
74: CNST_LIMB (0x7D8B5), /* 29 */
75: CNST_LIMB (0xCB228), /* 30 */
76: CNST_LIMB (0x148ADD), /* 31 */
77: CNST_LIMB (0x213D05), /* 32 */
78: CNST_LIMB (0x35C7E2), /* 33 */
79: CNST_LIMB (0x5704E7), /* 34 */
80: CNST_LIMB (0x8CCCC9), /* 35 */
81: CNST_LIMB (0xE3D1B0), /* 36 */
82: CNST_LIMB (0x1709E79), /* 37 */
83: CNST_LIMB (0x2547029), /* 38 */
84: CNST_LIMB (0x3C50EA2), /* 39 */
85: CNST_LIMB (0x6197ECB), /* 40 */
86: CNST_LIMB (0x9DE8D6D), /* 41 */
87: CNST_LIMB (0xFF80C38), /* 42 */
88: CNST_LIMB (0x19D699A5), /* 43 */
89: CNST_LIMB (0x29CEA5DD), /* 44 */
90: CNST_LIMB (0x43A53F82), /* 45 */
91: CNST_LIMB (0x6D73E55F), /* 46 */
92: CNST_LIMB (0xB11924E1), /* 47 */
93: #endif
94: #if GMP_NUMB_BITS >= 64
95: CNST_LIMB (0x11E8D0A40), /* 48 */
96: CNST_LIMB (0x1CFA62F21), /* 49 */
97: CNST_LIMB (0x2EE333961), /* 50 */
98: CNST_LIMB (0x4BDD96882), /* 51 */
99: CNST_LIMB (0x7AC0CA1E3), /* 52 */
100: CNST_LIMB (0xC69E60A65), /* 53 */
101: CNST_LIMB (0x1415F2AC48), /* 54 */
102: CNST_LIMB (0x207FD8B6AD), /* 55 */
103: CNST_LIMB (0x3495CB62F5), /* 56 */
104: CNST_LIMB (0x5515A419A2), /* 57 */
105: CNST_LIMB (0x89AB6F7C97), /* 58 */
106: CNST_LIMB (0xDEC1139639), /* 59 */
107: CNST_LIMB (0x1686C8312D0), /* 60 */
108: CNST_LIMB (0x2472D96A909), /* 61 */
109: CNST_LIMB (0x3AF9A19BBD9), /* 62 */
110: CNST_LIMB (0x5F6C7B064E2), /* 63 */
111: CNST_LIMB (0x9A661CA20BB), /* 64 */
112: CNST_LIMB (0xF9D297A859D), /* 65 */
113: CNST_LIMB (0x19438B44A658), /* 66 */
114: CNST_LIMB (0x28E0B4BF2BF5), /* 67 */
115: CNST_LIMB (0x42244003D24D), /* 68 */
116: CNST_LIMB (0x6B04F4C2FE42), /* 69 */
117: CNST_LIMB (0xAD2934C6D08F), /* 70 */
118: CNST_LIMB (0x1182E2989CED1), /* 71 */
119: CNST_LIMB (0x1C5575E509F60), /* 72 */
120: CNST_LIMB (0x2DD8587DA6E31), /* 73 */
121: CNST_LIMB (0x4A2DCE62B0D91), /* 74 */
122: CNST_LIMB (0x780626E057BC2), /* 75 */
123: CNST_LIMB (0xC233F54308953), /* 76 */
124: CNST_LIMB (0x13A3A1C2360515), /* 77 */
125: CNST_LIMB (0x1FC6E116668E68), /* 78 */
126: CNST_LIMB (0x336A82D89C937D), /* 79 */
127: CNST_LIMB (0x533163EF0321E5), /* 80 */
128: CNST_LIMB (0x869BE6C79FB562), /* 81 */
129: CNST_LIMB (0xD9CD4AB6A2D747), /* 82 */
130: CNST_LIMB (0x16069317E428CA9), /* 83 */
131: CNST_LIMB (0x23A367C34E563F0), /* 84 */
132: CNST_LIMB (0x39A9FADB327F099), /* 85 */
133: CNST_LIMB (0x5D4D629E80D5489), /* 86 */
134: CNST_LIMB (0x96F75D79B354522), /* 87 */
135: CNST_LIMB (0xF444C01834299AB), /* 88 */
136: CNST_LIMB (0x18B3C1D91E77DECD), /* 89 */
137: CNST_LIMB (0x27F80DDAA1BA7878), /* 90 */
138: CNST_LIMB (0x40ABCFB3C0325745), /* 91 */
139: CNST_LIMB (0x68A3DD8E61ECCFBD), /* 92 */
140: CNST_LIMB (0xA94FAD42221F2702), /* 93 */
141: #endif
142: };
143:
144:
145: /* Store F[n] at fp and F[n-1] at f1p. fp and f1p should have room for
146: MPN_FIB2_SIZE(n) limbs.
147:
148: The return value is the actual number of limbs stored, this will be at
149: least 1. fp[size-1] will be non-zero, except when n==0, in which case
150: fp[0] is 0 and f1p[0] is 1. f1p[size-1] can be zero, since F[n-1]<F[n]
151: (for n>0).
152:
153: Notes:
154:
155: In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
156: low limb.
157:
158: In F[2k+1] with k odd, -2 is applied to the low limb of 4*F[k]^2 -
159: F[k-1]^2. This F[2k+1] is an F[4m+3] and such numbers are congruent to
160: 1, 2 or 5 mod 8, which means no underflow reaching it with a -2 (since
161: that would leave 6 or 7 mod 8).
162:
163: This property of F[4m+3] can be verified by induction on F[4m+3] =
164: 7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
165: identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
166:
167: Enhancements:
168:
169: If there was an mpn_addlshift, it'd be possible to eliminate the yp
170: temporary, using xp=F[k]^2, fp=F[k-1]^2, f1p=xp+fp, fp+=4*fp, fp-=f1p,
171: fp+=2*(-1)^n, etc. */
172:
173: mp_size_t
174: mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
175: {
176: mp_ptr xp, yp;
177: mp_size_t size;
178: unsigned long nfirst, mask;
179: TMP_DECL (marker);
180:
181: TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
182:
183: ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));
184:
185: /* Take a starting pair from the table. */
186: mask = 1;
187: for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)
188: mask <<= 1;
189: TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask));
190:
191: f1p[0] = FIB_TABLE ((int) nfirst - 1);
192: fp[0] = FIB_TABLE (nfirst);
193: size = 1;
194:
195: /* Skip to the end if the table lookup gives the final answer. */
196: if (mask != 1)
197: {
198: mp_size_t alloc;
199:
200: TMP_MARK (marker);
201: alloc = MPN_FIB2_SIZE (n);
202: TMP_ALLOC_LIMBS_2 (xp,alloc, yp,alloc);
203:
204: do
205: {
206: mp_limb_t c;
207:
208: /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
209: n&mask upwards.
210:
211: The next bit of n is n&(mask>>1) and we'll double to the pair
212: fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
213: that bit is 0 or 1 respectively. */
214:
215: TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n",
216: n >> refmpn_count_trailing_zeros(mask),
217: mask, size, alloc);
218: mpn_trace ("fp ", fp, size);
219: mpn_trace ("f1p", f1p, size));
220:
221: /* fp normalized, f1p at most one high zero */
222: ASSERT (fp[size-1] != 0);
223: ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);
224:
225: /* f1p[size-1] might be zero, but this occurs rarely, so it's not
226: worth bothering checking for it */
227: ASSERT (alloc >= 2*size);
228: mpn_sqr_n (xp, fp, size);
229: mpn_sqr_n (yp, f1p, size);
230: size *= 2;
231:
232: /* Shrink if possible. Since fp was normalized there'll be at
233: most one high zero on xp (and if there is then there's one on
234: yp too). */
235: ASSERT (xp[size-1] != 0 || yp[size-1] == 0);
236: size -= (xp[size-1] == 0);
237: ASSERT (xp[size-1] != 0); /* only one xp high zero */
238:
239: /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
240: n&mask is the low bit of our implied k. */
241: c = mpn_lshift (fp, xp, size, 2);
242: fp[0] |= (n & mask ? 0 : 2); /* possible +2 */
243: c -= mpn_sub_n (fp, fp, yp, size);
244: ASSERT (n & (mask << 1) ? fp[0] != 0 && fp[0] != 1 : 1);
245: fp[0] -= (n & mask ? 2 : 0); /* possible -2 */
246: ASSERT (alloc >= size+1);
247: xp[size] = 0;
248: yp[size] = 0;
249: fp[size] = c;
250: size += (c != 0);
251:
252: /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2.
253: F[2k-1]<F[2k+1] so no carry out of "size" limbs. */
254: ASSERT_NOCARRY (mpn_add_n (f1p, xp, yp, size));
255:
256: /* now n&mask is the new bit of n being considered */
257: mask >>= 1;
258:
259: /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
260: F[2k+1] and F[2k-1]. */
261: ASSERT_NOCARRY (mpn_sub_n ((n & mask ? f1p : fp), fp, f1p, size));
262:
263: /* Can have a high zero after replacing F[2k+1] with F[2k].
264: f1p will have a high zero if fp does. */
265: ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
266: size -= (fp[size-1] == 0);
267: }
268: while (mask != 1);
269:
270: TMP_FREE (marker);
271: }
272:
273: TRACE (printf ("done size=%ld\n", size);
274: mpn_trace ("fp ", fp, size);
275: mpn_trace ("f1p", f1p, size));
276:
277: return size;
278: }
279:
280:
281:
282:
283:
284: /* ------------------------------------------------------------------------- */
285:
286: #if GENERATE_FIB_TABLE
287: /* Generate the tables of fibonacci data. This doesn't depend on the limb
288: size of the host, and doesn't need mpz_fib_ui working.
289:
290: The bit sizes in the table[] below will get specific setups so that a
291: build with GMP_NUMB_BITS equal to one of those values has as much data in
292: __gmp_fib_table as will fit that number of bits.
293:
294: A build with GMP_NUMB_BITS equal to some other value will effectively
295: fall back to the previous set of generated data. For instance if 8 and
296: 16 bits have been generated, but a build with 13 bits is done then
297: __gmp_fib_table will only contain 8 bit values, whereas it could probably
298: fit a few more. Everything still works, it's just that the table scheme
299: is not fully exploited. */
300:
301: int
302: main (void)
303: {
304: static struct {
305: int bits;
306: int fib_limit;
307: int luc_limit;
308: } table[] = {
309: { 4 },
310: { 8 },
311: { 16 },
312: { 32 },
313: { 64 },
314: };
315:
316: int i, t;
317: mpz_t f[500];
318: mpz_t l;
319:
320: mpz_init (l);
321: mpz_init_set_si (f[0], 1L); /* F[-1] */
322: mpz_init_set_si (f[1], 0L); /* F[0] */
323: for (i = 2; i < numberof(f); i++)
324: {
325: mpz_init (f[i]);
326: mpz_add (f[i], f[i-1], f[i-2]);
327: }
328:
329: for (i = 1; i < numberof (f); i++)
330: {
331: /* L[n] = F[n]+2*F[n-1] */
332: mpz_add (l, f[i], f[i-1]);
333: mpz_add (l, l, f[i-1]);
334:
335: for (t = 0; t < numberof (table); t++)
336: {
337: if (mpz_sizeinbase (f[i], 2) <= table[t].bits)
338: table[t].fib_limit = i-1;
339: if (mpz_sizeinbase (l, 2) <= table[t].bits)
340: table[t].luc_limit = i-1;
341: }
342: }
343: if (table[t].fib_limit == numberof (f) + 1)
344: {
345: printf ("Oops, need bigger f[] array\n");
346: abort ();
347: }
348:
349: for (t = numberof (table) - 1; t >= 0; t--)
350: {
351: printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
352: printf ("#define FIB_TABLE_LIMIT %d\n", table[t].fib_limit);
353: printf ("#define FIB_TABLE_LUCNUM_LIMIT %d\n", table[t].luc_limit);
354: if (t != 0)
355: printf ("#else\n");
356: }
357: for (t = 0; t < numberof (table); t++)
358: printf ("#endif /* %d */\n", table[t].bits);
359: printf ("\n");
360: printf ("\n");
361:
362: printf ("const mp_limb_t\n");
363: printf ("__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n");
364: printf ("\n");
365: t = 0;
366: i = 0;
367: printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
368: for (;;)
369: {
370: gmp_printf (" CNST_LIMB (0x%ZX), /* %d */\n", f[i], i-1);
371:
372: if (i-1 == table[t].fib_limit)
373: {
374: printf ("#endif\n");
375: do
376: {
377: t++;
378: if (t >= numberof (table))
379: goto done;
380: }
381: while (i-1 == table[t].fib_limit);
382: printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
383: }
384: i++;
385: }
386: done:
387: printf ("};\n");
388:
389: return 0;
390: }
391:
392: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>