=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpz/Attic/fib_ui.c,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpz/Attic/fib_ui.c 2000/09/09 14:12:51 1.1.1.1 +++ OpenXM_contrib/gmp/mpz/Attic/fib_ui.c 2003/08/25 16:06:33 1.1.1.2 @@ -1,6 +1,6 @@ -/* mpz_fib_ui(result, n) -- Set RESULT to the Nth Fibonacci number. +/* mpz_fib_ui -- calculate Fibonacci numbers. -Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. +Copyright 2000, 2001 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -19,147 +19,128 @@ along with the GNU MP Library; see the file COPYING.LI the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include #include "gmp.h" #include "gmp-impl.h" +#include "longlong.h" -/* This is fast, but could be made somewhat faster and neater. - The timing is somewhat fluctuating for even/odd sizes because - of the extra hair used to save variables and operations. Here - are a few things one might want to address: - 1. Avoid using 4 intermediate variables in mpz_fib_bigcase. - 2. Call mpn functions directly. Straightforward for these functions. - 3. Merge the three functions into one. -Said by Kevin: - Consider using the Lucas numbers L[n] as an auxiliary sequence, making - it possible to do the "doubling" operation in mpz_fib_bigcase with two - squares rather than two multiplies. The formulas are a little more - complicated, something like the following (untested). +/* change to "#define TRACE(x) x" to get some traces */ +#define TRACE(x) - F[2n] = ((F[n]+L[n])^2 - 6*F[n]^2 - 4*(-1)^n) / 2 - L[2n] = 5*F[n]^2 + 2*(-1)^n - F[2n+1] = (F[2n] + L[2n]) / 2 - L[2n+1] = (5*F[2n] + L[2n]) / 2 +/* In the F[2k+1] below for k odd, the -2 won't give a borrow from the low + limb because the result F[2k+1] is an F[4m+3] and such numbers are always + == 1, 2 or 5 mod 8, whereas an underflow would leave 6 or 7. (This is + the same as in mpn_fib2_ui.) - The Lucas number that comes for free here could even be returned. + In the F[2k+1] for k even, the +2 won't give a carry out of the low limb + in normal circumstances. This is an F[4m+1] and we claim that F[3*2^b+1] + == 1 mod 2^b is the first F[4m+1] congruent to 0 or 1 mod 2^b, and hence + if n < 2^BITS_PER_MP_LIMB then F[n] cannot have a low limb of 0 or 1. No + proof for this claim, but it's been verified up to b==32 and has such a + nice pattern it must be true :-). Of interest is that F[3*2^b] == 0 mod + 2^(b+1) seems to hold too. - Maybe there's formulas with two squares using just F[n], but I don't - know of any. -*/ + When n >= 2^BITS_PER_MP_LIMB, which can arise in test setups with a small + limb, then the low limb of F[4m+1] can certainly be 1, and an mpn_add_1 + must be used. */ -/* Determine the needed storage for Fib(n). */ -#define FIB_SIZE(n) (((mp_size_t) ((n)*0.695)) / BITS_PER_MP_LIMB + 2) - -static void mpz_fib_bigcase _PROTO ((mpz_t, mpz_t, unsigned long int)); -static void mpz_fib_basecase _PROTO ((mpz_t, mpz_t, unsigned long int)); - - -#ifndef FIB_THRESHOLD -#define FIB_THRESHOLD 60 -#endif - void -#if __STDC__ -mpz_fib_ui (mpz_t r, unsigned long int n) -#else -mpz_fib_ui (r, n) - mpz_t r; - unsigned long int n; -#endif +mpz_fib_ui (mpz_ptr fn, unsigned long n) { - if (n == 0) - mpz_set_ui (r, 0); - else + mp_ptr fp, xp, yp; + mp_size_t size, xalloc; + unsigned long n2; + mp_limb_t c, c2; + TMP_DECL (marker); + + if (n <= FIB_TABLE_LIMIT) { - mpz_t t1; - mpz_init (t1); - if (n < FIB_THRESHOLD) - mpz_fib_basecase (t1, r, n); - else - mpz_fib_bigcase (t1, r, n); - mpz_clear (t1); + PTR(fn)[0] = FIB_TABLE (n); + SIZ(fn) = (n != 0); /* F[0]==0, others are !=0 */ + return; } -} -static void -#if __STDC__ -mpz_fib_basecase (mpz_t t1, mpz_t t2, unsigned long int n) -#else -mpz_fib_basecase (t1, t2, n) - mpz_t t1; - mpz_t t2; - unsigned long int n; -#endif -{ - unsigned long int m, i; + n2 = n/2; + xalloc = MPN_FIB2_SIZE (n2) + 1; + MPZ_REALLOC (fn, 2*xalloc+1); + fp = PTR (fn); - mpz_set_ui (t1, 0); - mpz_set_ui (t2, 1); - m = n/2; - for (i = 0; i < m; i++) + TMP_MARK (marker); + TMP_ALLOC_LIMBS_2 (xp,xalloc, yp,xalloc); + size = mpn_fib2_ui (xp, yp, n2); + + TRACE (printf ("mpz_fib_ui last step n=%lu size=%ld bit=%lu\n", + n >> 1, size, n&1); + mpn_trace ("xp", xp, size); + mpn_trace ("yp", yp, size)); + + if (n & 1) { - mpz_add (t1, t1, t2); - mpz_add (t2, t1, t2); - } - if ((n & 1) == 0) - { - mpz_sub (t1, t2, t1); - mpz_sub (t2, t2, t1); /* trick: recover t1 value just overwritten */ - } -} + /* F[2k+1] = (2F[k]+F[k-1])*(2F[k]-F[k-1]) + 2*(-1)^k */ + mp_size_t xsize, ysize; -static void -#if __STDC__ -mpz_fib_bigcase (mpz_t t1, mpz_t t2, unsigned long int n) +#if HAVE_NATIVE_mpn_addsub_n + xp[size] = mpn_lshift (xp, xp, size, 1); + yp[size] = 0; + ASSERT_NOCARRY (mpn_addsub_n (xp, yp, xp, yp, size+1)); + xsize = size + (xp[size] != 0); + ysize = size + (yp[size] != 0); #else -mpz_fib_bigcase (t1, t2, n) - mpz_t t1; - mpz_t t2; - unsigned long int n; + c2 = mpn_lshift (fp, xp, size, 1); + c = c2 + mpn_add_n (xp, fp, yp, size); + xp[size] = c; + xsize = size + (c != 0); + c2 -= mpn_sub_n (yp, fp, yp, size); + yp[size] = c2; + ASSERT (c2 <= 1); + ysize = size + c2; #endif -{ - unsigned long int n2; - int ni, i; - mpz_t x1, x2, u1, u2; - ni = 0; - for (n2 = n; n2 >= FIB_THRESHOLD; n2 /= 2) - ni++; + size = xsize + ysize; + c = mpn_mul (fp, xp, xsize, yp, ysize); - mpz_fib_basecase (t1, t2, n2); - - mpz_init (x1); - mpz_init (x2); - mpz_init (u1); - mpz_init (u2); - - for (i = ni - 1; i >= 0; i--) +#if BITS_PER_MP_LIMB >= BITS_PER_ULONG + /* no overflow, see comments above */ + ASSERT (n & 2 ? fp[0] >= 2 : fp[0] <= MP_LIMB_T_MAX-2); + fp[0] += (n & 2 ? -CNST_LIMB(2) : CNST_LIMB(2)); +#else + /* this code only for testing with small limbs, limb= 2); + fp[0] -= 2; + } + else + { + ASSERT (c != MP_LIMB_T_MAX); /* because it's the high of a mul */ + c += mpn_add_1 (fp, fp, size-1, CNST_LIMB(2)); + fp[size-1] = c; + } +#endif + } + else { - mpz_mul_2exp (x1, t1, 1); - mpz_mul_2exp (x2, t2, 1); + /* F[2k] = F[k]*(F[k]+2F[k-1]) */ - mpz_add (x1, x1, t2); - mpz_sub (x2, x2, t1); + mp_size_t xsize, ysize; + c = mpn_lshift (yp, yp, size, 1); + c += mpn_add_n (yp, yp, xp, size); + yp[size] = c; + xsize = size; + ysize = size + (c != 0); + size += ysize; + c = mpn_mul (fp, yp, ysize, xp, xsize); + } - mpz_mul (u1, t2, x1); - mpz_mul (u2, t1, x2); + /* one or two high zeros */ + size -= (c == 0); + size -= (fp[size-1] == 0); + SIZ(fn) = size; - if (((n >> i) & 1) == 0) - { - mpz_sub (t1, u1, u2); - mpz_set (t2, u1); - } - else - { - mpz_set (t1, u1); - mpz_mul_2exp (t2, u1, 1); - mpz_sub (t2, t2, u2); - } - } + TRACE (printf ("done special, size=%ld\n", size); + mpn_trace ("fp ", fp, size)); - mpz_clear (x1); - mpz_clear (x2); - mpz_clear (u1); - mpz_clear (u2); + TMP_FREE (marker); }