Annotation of OpenXM_contrib/gmp/mpn/generic/fib2_ui.c, Revision 1.1
1.1 ! ohara 1: /* mpn_fib2_ui -- calculate Fibonacci numbers.
! 2:
! 3: Copyright 2001, 2002 Free Software Foundation, Inc.
! 4:
! 5: This file is part of the GNU MP Library.
! 6:
! 7: The GNU MP Library is free software; you can redistribute it and/or modify
! 8: it under the terms of the GNU Lesser General Public License as published by
! 9: the Free Software Foundation; either version 2.1 of the License, or (at your
! 10: option) any later version.
! 11:
! 12: The GNU MP Library is distributed in the hope that it will be useful, but
! 13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 15: License for more details.
! 16:
! 17: You should have received a copy of the GNU Lesser General Public License
! 18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: MA 02111-1307, USA. */
! 21:
! 22: #include <stdio.h>
! 23: #include "gmp.h"
! 24: #include "gmp-impl.h"
! 25: #include "longlong.h"
! 26:
! 27:
! 28: /* change this to "#define TRACE(x) x" for diagnostics */
! 29: #define TRACE(x)
! 30:
! 31:
! 32: /* The following table was generated by code at the end of this file. */
! 33:
! 34: const mp_limb_t
! 35: __gmp_fib_table[FIB_TABLE_LIMIT+2] = {
! 36:
! 37: #if GMP_NUMB_BITS >= 4
! 38: CNST_LIMB (0x1), /* -1 */
! 39: CNST_LIMB (0x0), /* 0 */
! 40: CNST_LIMB (0x1), /* 1 */
! 41: CNST_LIMB (0x1), /* 2 */
! 42: CNST_LIMB (0x2), /* 3 */
! 43: CNST_LIMB (0x3), /* 4 */
! 44: CNST_LIMB (0x5), /* 5 */
! 45: CNST_LIMB (0x8), /* 6 */
! 46: CNST_LIMB (0xD), /* 7 */
! 47: #endif
! 48: #if GMP_NUMB_BITS >= 8
! 49: CNST_LIMB (0x15), /* 8 */
! 50: CNST_LIMB (0x22), /* 9 */
! 51: CNST_LIMB (0x37), /* 10 */
! 52: CNST_LIMB (0x59), /* 11 */
! 53: CNST_LIMB (0x90), /* 12 */
! 54: CNST_LIMB (0xE9), /* 13 */
! 55: #endif
! 56: #if GMP_NUMB_BITS >= 16
! 57: CNST_LIMB (0x179), /* 14 */
! 58: CNST_LIMB (0x262), /* 15 */
! 59: CNST_LIMB (0x3DB), /* 16 */
! 60: CNST_LIMB (0x63D), /* 17 */
! 61: CNST_LIMB (0xA18), /* 18 */
! 62: CNST_LIMB (0x1055), /* 19 */
! 63: CNST_LIMB (0x1A6D), /* 20 */
! 64: CNST_LIMB (0x2AC2), /* 21 */
! 65: CNST_LIMB (0x452F), /* 22 */
! 66: CNST_LIMB (0x6FF1), /* 23 */
! 67: CNST_LIMB (0xB520), /* 24 */
! 68: #endif
! 69: #if GMP_NUMB_BITS >= 32
! 70: CNST_LIMB (0x12511), /* 25 */
! 71: CNST_LIMB (0x1DA31), /* 26 */
! 72: CNST_LIMB (0x2FF42), /* 27 */
! 73: CNST_LIMB (0x4D973), /* 28 */
! 74: CNST_LIMB (0x7D8B5), /* 29 */
! 75: CNST_LIMB (0xCB228), /* 30 */
! 76: CNST_LIMB (0x148ADD), /* 31 */
! 77: CNST_LIMB (0x213D05), /* 32 */
! 78: CNST_LIMB (0x35C7E2), /* 33 */
! 79: CNST_LIMB (0x5704E7), /* 34 */
! 80: CNST_LIMB (0x8CCCC9), /* 35 */
! 81: CNST_LIMB (0xE3D1B0), /* 36 */
! 82: CNST_LIMB (0x1709E79), /* 37 */
! 83: CNST_LIMB (0x2547029), /* 38 */
! 84: CNST_LIMB (0x3C50EA2), /* 39 */
! 85: CNST_LIMB (0x6197ECB), /* 40 */
! 86: CNST_LIMB (0x9DE8D6D), /* 41 */
! 87: CNST_LIMB (0xFF80C38), /* 42 */
! 88: CNST_LIMB (0x19D699A5), /* 43 */
! 89: CNST_LIMB (0x29CEA5DD), /* 44 */
! 90: CNST_LIMB (0x43A53F82), /* 45 */
! 91: CNST_LIMB (0x6D73E55F), /* 46 */
! 92: CNST_LIMB (0xB11924E1), /* 47 */
! 93: #endif
! 94: #if GMP_NUMB_BITS >= 64
! 95: CNST_LIMB (0x11E8D0A40), /* 48 */
! 96: CNST_LIMB (0x1CFA62F21), /* 49 */
! 97: CNST_LIMB (0x2EE333961), /* 50 */
! 98: CNST_LIMB (0x4BDD96882), /* 51 */
! 99: CNST_LIMB (0x7AC0CA1E3), /* 52 */
! 100: CNST_LIMB (0xC69E60A65), /* 53 */
! 101: CNST_LIMB (0x1415F2AC48), /* 54 */
! 102: CNST_LIMB (0x207FD8B6AD), /* 55 */
! 103: CNST_LIMB (0x3495CB62F5), /* 56 */
! 104: CNST_LIMB (0x5515A419A2), /* 57 */
! 105: CNST_LIMB (0x89AB6F7C97), /* 58 */
! 106: CNST_LIMB (0xDEC1139639), /* 59 */
! 107: CNST_LIMB (0x1686C8312D0), /* 60 */
! 108: CNST_LIMB (0x2472D96A909), /* 61 */
! 109: CNST_LIMB (0x3AF9A19BBD9), /* 62 */
! 110: CNST_LIMB (0x5F6C7B064E2), /* 63 */
! 111: CNST_LIMB (0x9A661CA20BB), /* 64 */
! 112: CNST_LIMB (0xF9D297A859D), /* 65 */
! 113: CNST_LIMB (0x19438B44A658), /* 66 */
! 114: CNST_LIMB (0x28E0B4BF2BF5), /* 67 */
! 115: CNST_LIMB (0x42244003D24D), /* 68 */
! 116: CNST_LIMB (0x6B04F4C2FE42), /* 69 */
! 117: CNST_LIMB (0xAD2934C6D08F), /* 70 */
! 118: CNST_LIMB (0x1182E2989CED1), /* 71 */
! 119: CNST_LIMB (0x1C5575E509F60), /* 72 */
! 120: CNST_LIMB (0x2DD8587DA6E31), /* 73 */
! 121: CNST_LIMB (0x4A2DCE62B0D91), /* 74 */
! 122: CNST_LIMB (0x780626E057BC2), /* 75 */
! 123: CNST_LIMB (0xC233F54308953), /* 76 */
! 124: CNST_LIMB (0x13A3A1C2360515), /* 77 */
! 125: CNST_LIMB (0x1FC6E116668E68), /* 78 */
! 126: CNST_LIMB (0x336A82D89C937D), /* 79 */
! 127: CNST_LIMB (0x533163EF0321E5), /* 80 */
! 128: CNST_LIMB (0x869BE6C79FB562), /* 81 */
! 129: CNST_LIMB (0xD9CD4AB6A2D747), /* 82 */
! 130: CNST_LIMB (0x16069317E428CA9), /* 83 */
! 131: CNST_LIMB (0x23A367C34E563F0), /* 84 */
! 132: CNST_LIMB (0x39A9FADB327F099), /* 85 */
! 133: CNST_LIMB (0x5D4D629E80D5489), /* 86 */
! 134: CNST_LIMB (0x96F75D79B354522), /* 87 */
! 135: CNST_LIMB (0xF444C01834299AB), /* 88 */
! 136: CNST_LIMB (0x18B3C1D91E77DECD), /* 89 */
! 137: CNST_LIMB (0x27F80DDAA1BA7878), /* 90 */
! 138: CNST_LIMB (0x40ABCFB3C0325745), /* 91 */
! 139: CNST_LIMB (0x68A3DD8E61ECCFBD), /* 92 */
! 140: CNST_LIMB (0xA94FAD42221F2702), /* 93 */
! 141: #endif
! 142: };
! 143:
! 144:
! 145: /* Store F[n] at fp and F[n-1] at f1p. fp and f1p should have room for
! 146: MPN_FIB2_SIZE(n) limbs.
! 147:
! 148: The return value is the actual number of limbs stored, this will be at
! 149: least 1. fp[size-1] will be non-zero, except when n==0, in which case
! 150: fp[0] is 0 and f1p[0] is 1. f1p[size-1] can be zero, since F[n-1]<F[n]
! 151: (for n>0).
! 152:
! 153: Notes:
! 154:
! 155: In F[2k+1] with k even, +2 is applied to 4*F[k]^2 just by ORing into the
! 156: low limb.
! 157:
! 158: In F[2k+1] with k odd, -2 is applied to the low limb of 4*F[k]^2 -
! 159: F[k-1]^2. This F[2k+1] is an F[4m+3] and such numbers are congruent to
! 160: 1, 2 or 5 mod 8, which means no underflow reaching it with a -2 (since
! 161: that would leave 6 or 7 mod 8).
! 162:
! 163: This property of F[4m+3] can be verified by induction on F[4m+3] =
! 164: 7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
! 165: identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
! 166:
! 167: Enhancements:
! 168:
! 169: If there was an mpn_addlshift, it'd be possible to eliminate the yp
! 170: temporary, using xp=F[k]^2, fp=F[k-1]^2, f1p=xp+fp, fp+=4*fp, fp-=f1p,
! 171: fp+=2*(-1)^n, etc. */
! 172:
! 173: mp_size_t
! 174: mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
! 175: {
! 176: mp_ptr xp, yp;
! 177: mp_size_t size;
! 178: unsigned long nfirst, mask;
! 179: TMP_DECL (marker);
! 180:
! 181: TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
! 182:
! 183: ASSERT (! MPN_OVERLAP_P (fp, MPN_FIB2_SIZE(n), f1p, MPN_FIB2_SIZE(n)));
! 184:
! 185: /* Take a starting pair from the table. */
! 186: mask = 1;
! 187: for (nfirst = n; nfirst > FIB_TABLE_LIMIT; nfirst /= 2)
! 188: mask <<= 1;
! 189: TRACE (printf ("nfirst=%lu mask=0x%lX\n", nfirst, mask));
! 190:
! 191: f1p[0] = FIB_TABLE ((int) nfirst - 1);
! 192: fp[0] = FIB_TABLE (nfirst);
! 193: size = 1;
! 194:
! 195: /* Skip to the end if the table lookup gives the final answer. */
! 196: if (mask != 1)
! 197: {
! 198: mp_size_t alloc;
! 199:
! 200: TMP_MARK (marker);
! 201: alloc = MPN_FIB2_SIZE (n);
! 202: TMP_ALLOC_LIMBS_2 (xp,alloc, yp,alloc);
! 203:
! 204: do
! 205: {
! 206: mp_limb_t c;
! 207:
! 208: /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
! 209: n&mask upwards.
! 210:
! 211: The next bit of n is n&(mask>>1) and we'll double to the pair
! 212: fp==F[2k],f1p==F[2k-1] or fp==F[2k+1],f1p==F[2k], according as
! 213: that bit is 0 or 1 respectively. */
! 214:
! 215: TRACE (printf ("k=%lu mask=0x%lX size=%ld alloc=%ld\n",
! 216: n >> refmpn_count_trailing_zeros(mask),
! 217: mask, size, alloc);
! 218: mpn_trace ("fp ", fp, size);
! 219: mpn_trace ("f1p", f1p, size));
! 220:
! 221: /* fp normalized, f1p at most one high zero */
! 222: ASSERT (fp[size-1] != 0);
! 223: ASSERT (f1p[size-1] != 0 || f1p[size-2] != 0);
! 224:
! 225: /* f1p[size-1] might be zero, but this occurs rarely, so it's not
! 226: worth bothering checking for it */
! 227: ASSERT (alloc >= 2*size);
! 228: mpn_sqr_n (xp, fp, size);
! 229: mpn_sqr_n (yp, f1p, size);
! 230: size *= 2;
! 231:
! 232: /* Shrink if possible. Since fp was normalized there'll be at
! 233: most one high zero on xp (and if there is then there's one on
! 234: yp too). */
! 235: ASSERT (xp[size-1] != 0 || yp[size-1] == 0);
! 236: size -= (xp[size-1] == 0);
! 237: ASSERT (xp[size-1] != 0); /* only one xp high zero */
! 238:
! 239: /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
! 240: n&mask is the low bit of our implied k. */
! 241: c = mpn_lshift (fp, xp, size, 2);
! 242: fp[0] |= (n & mask ? 0 : 2); /* possible +2 */
! 243: c -= mpn_sub_n (fp, fp, yp, size);
! 244: ASSERT (n & (mask << 1) ? fp[0] != 0 && fp[0] != 1 : 1);
! 245: fp[0] -= (n & mask ? 2 : 0); /* possible -2 */
! 246: ASSERT (alloc >= size+1);
! 247: xp[size] = 0;
! 248: yp[size] = 0;
! 249: fp[size] = c;
! 250: size += (c != 0);
! 251:
! 252: /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2.
! 253: F[2k-1]<F[2k+1] so no carry out of "size" limbs. */
! 254: ASSERT_NOCARRY (mpn_add_n (f1p, xp, yp, size));
! 255:
! 256: /* now n&mask is the new bit of n being considered */
! 257: mask >>= 1;
! 258:
! 259: /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
! 260: F[2k+1] and F[2k-1]. */
! 261: ASSERT_NOCARRY (mpn_sub_n ((n & mask ? f1p : fp), fp, f1p, size));
! 262:
! 263: /* Can have a high zero after replacing F[2k+1] with F[2k].
! 264: f1p will have a high zero if fp does. */
! 265: ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
! 266: size -= (fp[size-1] == 0);
! 267: }
! 268: while (mask != 1);
! 269:
! 270: TMP_FREE (marker);
! 271: }
! 272:
! 273: TRACE (printf ("done size=%ld\n", size);
! 274: mpn_trace ("fp ", fp, size);
! 275: mpn_trace ("f1p", f1p, size));
! 276:
! 277: return size;
! 278: }
! 279:
! 280:
! 281:
! 282:
! 283:
! 284: /* ------------------------------------------------------------------------- */
! 285:
! 286: #if GENERATE_FIB_TABLE
! 287: /* Generate the tables of fibonacci data. This doesn't depend on the limb
! 288: size of the host, and doesn't need mpz_fib_ui working.
! 289:
! 290: The bit sizes in the table[] below will get specific setups so that a
! 291: build with GMP_NUMB_BITS equal to one of those values has as much data in
! 292: __gmp_fib_table as will fit that number of bits.
! 293:
! 294: A build with GMP_NUMB_BITS equal to some other value will effectively
! 295: fall back to the previous set of generated data. For instance if 8 and
! 296: 16 bits have been generated, but a build with 13 bits is done then
! 297: __gmp_fib_table will only contain 8 bit values, whereas it could probably
! 298: fit a few more. Everything still works, it's just that the table scheme
! 299: is not fully exploited. */
! 300:
! 301: int
! 302: main (void)
! 303: {
! 304: static struct {
! 305: int bits;
! 306: int fib_limit;
! 307: int luc_limit;
! 308: } table[] = {
! 309: { 4 },
! 310: { 8 },
! 311: { 16 },
! 312: { 32 },
! 313: { 64 },
! 314: };
! 315:
! 316: int i, t;
! 317: mpz_t f[500];
! 318: mpz_t l;
! 319:
! 320: mpz_init (l);
! 321: mpz_init_set_si (f[0], 1L); /* F[-1] */
! 322: mpz_init_set_si (f[1], 0L); /* F[0] */
! 323: for (i = 2; i < numberof(f); i++)
! 324: {
! 325: mpz_init (f[i]);
! 326: mpz_add (f[i], f[i-1], f[i-2]);
! 327: }
! 328:
! 329: for (i = 1; i < numberof (f); i++)
! 330: {
! 331: /* L[n] = F[n]+2*F[n-1] */
! 332: mpz_add (l, f[i], f[i-1]);
! 333: mpz_add (l, l, f[i-1]);
! 334:
! 335: for (t = 0; t < numberof (table); t++)
! 336: {
! 337: if (mpz_sizeinbase (f[i], 2) <= table[t].bits)
! 338: table[t].fib_limit = i-1;
! 339: if (mpz_sizeinbase (l, 2) <= table[t].bits)
! 340: table[t].luc_limit = i-1;
! 341: }
! 342: }
! 343: if (table[t].fib_limit == numberof (f) + 1)
! 344: {
! 345: printf ("Oops, need bigger f[] array\n");
! 346: abort ();
! 347: }
! 348:
! 349: for (t = numberof (table) - 1; t >= 0; t--)
! 350: {
! 351: printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
! 352: printf ("#define FIB_TABLE_LIMIT %d\n", table[t].fib_limit);
! 353: printf ("#define FIB_TABLE_LUCNUM_LIMIT %d\n", table[t].luc_limit);
! 354: if (t != 0)
! 355: printf ("#else\n");
! 356: }
! 357: for (t = 0; t < numberof (table); t++)
! 358: printf ("#endif /* %d */\n", table[t].bits);
! 359: printf ("\n");
! 360: printf ("\n");
! 361:
! 362: printf ("const mp_limb_t\n");
! 363: printf ("__gmp_fib_table[FIB_TABLE_LIMIT+2] = {\n");
! 364: printf ("\n");
! 365: t = 0;
! 366: i = 0;
! 367: printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
! 368: for (;;)
! 369: {
! 370: gmp_printf (" CNST_LIMB (0x%ZX), /* %d */\n", f[i], i-1);
! 371:
! 372: if (i-1 == table[t].fib_limit)
! 373: {
! 374: printf ("#endif\n");
! 375: do
! 376: {
! 377: t++;
! 378: if (t >= numberof (table))
! 379: goto done;
! 380: }
! 381: while (i-1 == table[t].fib_limit);
! 382: printf ("#if GMP_NUMB_BITS >= %d\n", table[t].bits);
! 383: }
! 384: i++;
! 385: }
! 386: done:
! 387: printf ("};\n");
! 388:
! 389: return 0;
! 390: }
! 391:
! 392: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>