/* mpn_fib2_ui -- calculate Fibonacci numbers. Copyright 2001, 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include "gmp.h" #include "gmp-impl.h" #include "longlong.h" /* change this to "#define TRACE(x) x" for diagnostics */ #define TRACE(x) /* The following table was generated by code at the end of this file. */ const mp_limb_t __gmp_fib_table[FIB_TABLE_LIMIT+2] = { #if GMP_NUMB_BITS >= 4 CNST_LIMB (0x1), /* -1 */ CNST_LIMB (0x0), /* 0 */ CNST_LIMB (0x1), /* 1 */ CNST_LIMB (0x1), /* 2 */ CNST_LIMB (0x2), /* 3 */ CNST_LIMB (0x3), /* 4 */ CNST_LIMB (0x5), /* 5 */ CNST_LIMB (0x8), /* 6 */ CNST_LIMB (0xD), /* 7 */ #endif #if GMP_NUMB_BITS >= 8 CNST_LIMB (0x15), /* 8 */ CNST_LIMB (0x22), /* 9 */ CNST_LIMB (0x37), /* 10 */ CNST_LIMB (0x59), /* 11 */ CNST_LIMB (0x90), /* 12 */ CNST_LIMB (0xE9), /* 13 */ #endif #if GMP_NUMB_BITS >= 16 CNST_LIMB (0x179), /* 14 */ CNST_LIMB (0x262), /* 15 */ CNST_LIMB (0x3DB), /* 16 */ CNST_LIMB (0x63D), /* 17 */ CNST_LIMB (0xA18), /* 18 */ CNST_LIMB (0x1055), /* 19 */ CNST_LIMB (0x1A6D), /* 20 */ CNST_LIMB (0x2AC2), /* 21 */ CNST_LIMB (0x452F), /* 22 */ CNST_LIMB (0x6FF1), /* 23 */ CNST_LIMB (0xB520), /* 24 */ #endif #if GMP_NUMB_BITS >= 32 CNST_LIMB (0x12511), /* 25 */ CNST_LIMB (0x1DA31), /* 26 */ CNST_LIMB (0x2FF42), /* 27 */ CNST_LIMB (0x4D973), /* 28 */ CNST_LIMB (0x7D8B5), /* 29 */ CNST_LIMB (0xCB228), /* 30 */ CNST_LIMB (0x148ADD), /* 31 */ CNST_LIMB (0x213D05), /* 32 */ CNST_LIMB (0x35C7E2), /* 33 */ CNST_LIMB (0x5704E7), /* 34 */ CNST_LIMB (0x8CCCC9), /* 35 */ CNST_LIMB (0xE3D1B0), /* 36 */ CNST_LIMB (0x1709E79), /* 37 */ CNST_LIMB (0x2547029), /* 38 */ CNST_LIMB (0x3C50EA2), /* 39 */ CNST_LIMB (0x6197ECB), /* 40 */ CNST_LIMB (0x9DE8D6D), /* 41 */ CNST_LIMB (0xFF80C38), /* 42 */ CNST_LIMB (0x19D699A5), /* 43 */ CNST_LIMB (0x29CEA5DD), /* 44 */ CNST_LIMB (0x43A53F82), /* 45 */ CNST_LIMB (0x6D73E55F), /* 46 */ CNST_LIMB (0xB11924E1), /* 47 */ #endif #if GMP_NUMB_BITS >= 64 CNST_LIMB (0x11E8D0A40), /* 48 */ CNST_LIMB (0x1CFA62F21), /* 49 */ CNST_LIMB (0x2EE333961), /* 50 */ CNST_LIMB (0x4BDD96882), /* 51 */ CNST_LIMB (0x7AC0CA1E3), /* 52 */ CNST_LIMB (0xC69E60A65), /* 53 */ CNST_LIMB (0x1415F2AC48), /* 54 */ CNST_LIMB (0x207FD8B6AD), /* 55 */ CNST_LIMB (0x3495CB62F5), /* 56 */ CNST_LIMB (0x5515A419A2), /* 57 */ CNST_LIMB (0x89AB6F7C97), /* 58 */ CNST_LIMB (0xDEC1139639), /* 59 */ CNST_LIMB (0x1686C8312D0), /* 60 */ CNST_LIMB (0x2472D96A909), /* 61 */ CNST_LIMB (0x3AF9A19BBD9), /* 62 */ CNST_LIMB (0x5F6C7B064E2), /* 63 */ CNST_LIMB (0x9A661CA20BB), /* 64 */ CNST_LIMB (0xF9D297A859D), /* 65 */ CNST_LIMB (0x19438B44A658), /* 66 */ CNST_LIMB (0x28E0B4BF2BF5), /* 67 */ CNST_LIMB (0x42244003D24D), /* 68 */ CNST_LIMB (0x6B04F4C2FE42), /* 69 */ CNST_LIMB (0xAD2934C6D08F), /* 70 */ CNST_LIMB (0x1182E2989CED1), /* 71 */ CNST_LIMB (0x1C5575E509F60), /* 72 */ CNST_LIMB (0x2DD8587DA6E31), /* 73 */ CNST_LIMB (0x4A2DCE62B0D91), /* 74 */ CNST_LIMB (0x780626E057BC2), /* 75 */ CNST_LIMB (0xC233F54308953), /* 76 */ CNST_LIMB (0x13A3A1C2360515), /* 77 */ CNST_LIMB (0x1FC6E116668E68), /* 78 */ CNST_LIMB (0x336A82D89C937D), /* 79 */ CNST_LIMB (0x533163EF0321E5), /* 80 */ CNST_LIMB (0x869BE6C79FB562), /* 81 */ CNST_LIMB (0xD9CD4AB6A2D747), /* 82 */ CNST_LIMB (0x16069317E428CA9), /* 83 */ CNST_LIMB (0x23A367C34E563F0), /* 84 */ CNST_LIMB (0x39A9FADB327F099), /* 85 */ CNST_LIMB (0x5D4D629E80D5489), /* 86 */ CNST_LIMB (0x96F75D79B354522), /* 87 */ CNST_LIMB (0xF444C01834299AB), /* 88 */ CNST_LIMB (0x18B3C1D91E77DECD), /* 89 */ CNST_LIMB (0x27F80DDAA1BA7878), /* 90 */ CNST_LIMB (0x40ABCFB3C0325745), /* 91 */ CNST_LIMB (0x68A3DD8E61ECCFBD), /* 92 */ CNST_LIMB (0xA94FAD42221F2702), /* 93 */ #endif }; /* Store F[n] at fp and F[n-1] at f1p. fp and f1p should have room for MPN_FIB2_SIZE(n) limbs. The return value is the actual number of limbs stored, this will be at least 1. fp[size-1] will be non-zero, except when n==0, in which case fp[0] is 0 and f1p[0] is 1. f1p[size-1] can be zero, since F[n-1]0). Notes: In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the low limb. In F[2k+1] with k odd, -2 is applied to the low limb of 4*F[k]^2 - F[k-1]^2. This F[2k+1] is an F[4m+3] and such numbers are congruent to 1, 2 or 5 mod 8, which means no underflow reaching it with a -2 (since that would leave 6 or 7 mod 8). This property of F[4m+3] can be verified by induction on F[4m+3] = 7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j. Enhancements: If there was an mpn_addlshift, it'd be possible to eliminate the yp temporary, using xp=F[k]^2, fp=F[k-1]^2, f1p=xp+fp, fp+=4*fp, fp-=f1p, fp+=2*(-1)^n, etc. */ mp_size_t mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n) { mp_ptr xp, yp; mp_size_t size; unsigned long nfirst, mask; TMP_DECL (marker); TRACE (printf ("mpn_fib2_ui n=%lu\n", n)); ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n))); /* Take a starting pair from the table. */ mask = 1; for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2) mask <<= 1; TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask)); f1p[0] = FIB_TABLE ((int) nfirst - 1); fp[0] = FIB_TABLE (nfirst); size = 1; /* Skip to the end if the table lookup gives the final answer. */ if (mask != 1) { mp_size_t alloc; TMP_MARK (marker); alloc = MPN_FIB2_SIZE (n); TMP_ALLOC_LIMBS_2 (xp,alloc, yp,alloc); do { mp_limb_t c; /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from n&mask upwards. The next bit of n is n&(mask>>1) and we'll double to the pair fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as that bit is 0 or 1 respectively. */ TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n", n >> refmpn_count_trailing_zeros(mask), mask, size, alloc); mpn_trace ("fp ", fp, size); mpn_trace ("f1p", f1p, size)); /* fp normalized, f1p at most one high zero */ ASSERT (fp[size-1] != 0); ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0); /* f1p[size-1] might be zero, but this occurs rarely, so it's not worth bothering checking for it */ ASSERT (alloc >= 2*size); mpn_sqr_n (xp, fp, size); mpn_sqr_n (yp, f1p, size); size *= 2; /* Shrink if possible. Since fp was normalized there'll be at most one high zero on xp (and if there is then there's one on yp too). */ ASSERT (xp[size-1] != 0 || yp[size-1] == 0); size -= (xp[size-1] == 0); ASSERT (xp[size-1] != 0); /* only one xp high zero */ /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k. n&mask is the low bit of our implied k. */ c = mpn_lshift (fp, xp, size, 2); fp[0] |= (n & mask ? 0 : 2); /* possible +2 */ c -= mpn_sub_n (fp, fp, yp, size); ASSERT (n & (mask << 1) ? fp[0] != 0 && fp[0] != 1 : 1); fp[0] -= (n & mask ? 2 : 0); /* possible -2 */ ASSERT (alloc >= size+1); xp[size] = 0; yp[size] = 0; fp[size] = c; size += (c != 0); /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. F[2k-1]>= 1; /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of F[2k+1] and F[2k-1]. */ ASSERT_NOCARRY (mpn_sub_n ((n & mask ? f1p : fp), fp, f1p, size)); /* Can have a high zero after replacing F[2k+1] with F[2k]. f1p will have a high zero if fp does. */ ASSERT (fp[size-1] != 0 || f1p[size-1] == 0); size -= (fp[size-1] == 0); } while (mask != 1); TMP_FREE (marker); } TRACE (printf ("done size=%ld\n", size); mpn_trace ("fp ", fp, size); mpn_trace ("f1p", f1p, size)); return size; } /* ------------------------------------------------------------------------- */ #if GENERATE_FIB_TABLE /* Generate the tables of fibonacci data. This doesn't depend on the limb size of the host, and doesn't need mpz_fib_ui working. The bit sizes in the table[] below will get specific setups so that a build with GMP_NUMB_BITS equal to one of those values has as much data in __gmp_fib_table as will fit that number of bits. A build with GMP_NUMB_BITS equal to some other value will effectively fall back to the previous set of generated data. For instance if 8 and 16 bits have been generated, but a build with 13 bits is done then __gmp_fib_table will only contain 8 bit values, whereas it could probably fit a few more. Everything still works, it's just that the table scheme is not fully exploited. */ int main (void) { static struct { int bits; int fib_limit; int luc_limit; } table[] = { { 4 }, { 8 }, { 16 }, { 32 }, { 64 }, }; int i, t; mpz_t f[500]; mpz_t l; mpz_init (l); mpz_init_set_si (f[0], 1L); /* F[-1] */ mpz_init_set_si (f[1], 0L); /* F[0] */ for (i = 2; i < numberof(f); i++) { mpz_init (f[i]); mpz_add (f[i], f[i-1], f[i-2]); } for (i = 1; i < numberof (f); i++) { /* L[n] = F[n]+2*F[n-1] */ mpz_add (l, f[i], f[i-1]); mpz_add (l, l, f[i-1]); for (t = 0; t < numberof (table); t++) { if (mpz_sizeinbase (f[i], 2) <= table[t].bits) table[t].fib_limit = i-1; if (mpz_sizeinbase (l, 2) <= table[t].bits) table[t].luc_limit = i-1; } } if (table[t].fib_limit == numberof (f) + 1) { printf ("Oops, need bigger f[] array\n"); abort (); } for (t = numberof (table) - 1; t >= 0; t--) { printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits); printf ("#define FIB_TABLE_LIMIT %d\n", table[t].fib_limit); printf ("#define FIB_TABLE_LUCNUM_LIMIT %d\n", table[t].luc_limit); if (t != 0) printf ("#else\n"); } for (t = 0; t < numberof (table); t++) printf ("#endif /* %d */\n", table[t].bits); printf ("\n"); printf ("\n"); printf ("const mp_limb_t\n"); printf ("__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n"); printf ("\n"); t = 0; i = 0; printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits); for (;;) { gmp_printf (" CNST_LIMB (0x%ZX), /* %d */\n", f[i], i-1); if (i-1 == table[t].fib_limit) { printf ("#endif\n"); do { t++; if (t >= numberof (table)) goto done; } while (i-1 == table[t].fib_limit); printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits); } i++; } done: printf ("};\n"); return 0; } #endif