=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpz/Attic/fac_ui.c,v retrieving revision 1.1.1.2 retrieving revision 1.1.1.3 diff -u -p -r1.1.1.2 -r1.1.1.3 --- OpenXM_contrib/gmp/mpz/Attic/fac_ui.c 2000/09/09 14:12:50 1.1.1.2 +++ OpenXM_contrib/gmp/mpz/Attic/fac_ui.c 2003/08/25 16:06:33 1.1.1.3 @@ -1,6 +1,7 @@ /* mpz_fac_ui(result, n) -- Set RESULT to N!. -Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc. +Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation, +Inc. This file is part of the GNU MP Library. @@ -19,31 +20,78 @@ along with the GNU MP Library; see the file COPYING.LI the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#ifdef DBG -#include -#endif - #include "gmp.h" #include "gmp-impl.h" #include "longlong.h" + +/* Enhancements: + + Data tables could be used for results up to 3 or 4 limbs to avoid + fiddling around with small quantities. + + The product accumulation might be worth splitting out into something that + could be used elsewhere too. + + The tree of partial products should be done with TMP_ALLOC, not mpz_init. + It should be possible to know a maximum size at each level. + + Factors of two could be stripped from k to save some multiplying (but not + very much). The same could be done with factors of 3, perhaps. + + The prime factorization of n! is easy to determine, it might be worth + using that rather than a simple 1 to n. The powering of primes could do + some squaring instead of multiplying. There's probably other ways to use + some squaring too. */ + + +/* for single non-zero limb */ +#define MPZ_SET_1_NZ(z,n) \ + do { \ + mpz_ptr __z = (z); \ + ASSERT ((n) != 0); \ + PTR(__z)[0] = (n); \ + SIZ(__z) = 1; \ + } while (0) + +/* for single non-zero limb */ +#define MPZ_INIT_SET_1_NZ(z,n) \ + do { \ + mpz_ptr __iz = (z); \ + ALLOC(__iz) = 1; \ + PTR(__iz) = __GMP_ALLOCATE_FUNC_LIMBS (1); \ + MPZ_SET_1_NZ (__iz, n); \ + } while (0) + +/* for src>0 and n>0 */ +#define MPZ_MUL_1_POS(dst,src,n) \ + do { \ + mpz_ptr __dst = (dst); \ + mpz_srcptr __src = (src); \ + mp_size_t __size = SIZ(__src); \ + mp_ptr __dst_p; \ + mp_limb_t __c; \ + \ + ASSERT (__size > 0); \ + ASSERT ((n) != 0); \ + \ + MPZ_REALLOC (__dst, __size+1); \ + __dst_p = PTR(__dst); \ + \ + __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n); \ + __dst_p[__size] = __c; \ + SIZ(__dst) = __size + (__c != 0); \ + \ + } while (0) + + void -#if __STDC__ mpz_fac_ui (mpz_ptr result, unsigned long int n) -#else -mpz_fac_ui (result, n) - mpz_ptr result; - unsigned long int n; -#endif { #if SIMPLE_FAC - /* Be silly. Just multiply the numbers in ascending order. O(n**2). */ - unsigned long int k; - mpz_set_ui (result, 1L); - for (k = 2; k <= n; k++) mpz_mul_ui (result, result, k); #else @@ -54,8 +102,8 @@ mpz_fac_ui (result, n) as possible. When the operands have about the same size, mpn_mul becomes faster. */ - unsigned long int p, k; - mp_limb_t p1, p0; + unsigned long k; + mp_limb_t p, p1, p0; /* Stack of partial products, used to make the computation balanced (i.e. make the sizes of the multiplication operands equal). The @@ -76,17 +124,26 @@ mpz_fac_ui (result, n) See below. */ unsigned int tree_cnt; + /* for testing with small limbs */ + if (MP_LIMB_T_MAX < ULONG_MAX) + ASSERT_ALWAYS (n <= MP_LIMB_T_MAX); + top = top_limit_so_far = -1; tree_cnt = 0; p = 1; for (k = 2; k <= n; k++) { /* Multiply the partial product in P with K. */ - umul_ppmm (p1, p0, (mp_limb_t) p, (mp_limb_t) k); + umul_ppmm (p1, p0, p, (mp_limb_t) k); +#if GMP_NAIL_BITS == 0 +#define OVERFLOW (p1 != 0) +#else +#define OVERFLOW ((p1 | (p0 >> GMP_NUMB_BITS)) != 0) +#endif /* Did we get overflow into the high limb, i.e. is the partial product now more than one limb? */ - if (p1 != 0) + if OVERFLOW { tree_cnt++; @@ -98,7 +155,7 @@ mpz_fac_ui (result, n) one-limb partial products), which means that we have a single-limb product on the top of MP_STACK. */ - mpz_mul_ui (mp_stack[top], mp_stack[top], p); + MPZ_MUL_1_POS (mp_stack[top], mp_stack[top], p); /* If TREE_CNT is divisable by 4, 8,..., we have two similar-sized partial products with 2, 4,... limbs at @@ -123,11 +180,11 @@ mpz_fac_ui (result, n) abort(); /* The stack is now bigger than ever, initialize the top element. */ - mpz_init_set_ui (mp_stack[top], p); + MPZ_INIT_SET_1_NZ (mp_stack[top], p); top_limit_so_far++; } else - mpz_set_ui (mp_stack[top], p); + MPZ_SET_1_NZ (mp_stack[top], p); } /* We ignored the last result from umul_ppmm. Put K in P as the @@ -137,16 +194,16 @@ mpz_fac_ui (result, n) else /* We didn't get overflow in umul_ppmm. Put p0 in P and try with one more value of K. */ - p = p0; /* bogus if long != mp_limb_t */ + p = p0; } /* We have partial products in mp_stack[0..top], in descending order. We also have a small partial product in p. Their product is the final result. */ if (top < 0) - mpz_set_ui (result, p); + MPZ_SET_1_NZ (result, p); else - mpz_mul_ui (result, mp_stack[top--], p); + MPZ_MUL_1_POS (result, mp_stack[top--], p); while (top >= 0) mpz_mul (result, result, mp_stack[top--]);