=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/generic/Attic/mul_basecase.c,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.2 diff -u -p -r1.1.1.1 -r1.1.1.2 --- OpenXM_contrib/gmp/mpn/generic/Attic/mul_basecase.c 2000/09/09 14:12:26 1.1.1.1 +++ OpenXM_contrib/gmp/mpn/generic/Attic/mul_basecase.c 2003/08/25 16:06:20 1.1.1.2 @@ -5,7 +5,7 @@ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. -Copyright (C) 1991, 1992, 1993, 1994, 1996, 1997, 2000 Free Software +Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -28,60 +28,95 @@ MA 02111-1307, USA. */ #include "gmp.h" #include "gmp-impl.h" -/* Handle simple cases with traditional multiplication. - This is the most critical code of multiplication. All multiplies rely on - this, both small and huge. Small ones arrive here immediately, huge ones - arrive here as this is the base case for Karatsuba's recursive algorithm. */ +/* Multiply {up,usize} by {vp,vsize} and write the result to + {prodp,usize+vsize}. Must have usize>=vsize. + Note that prodp gets usize+vsize limbs stored, even if the actual result + only needs usize+vsize-1. + + There's no good reason to call here with vsize>=MUL_KARATSUBA_THRESHOLD. + Currently this is allowed, but it might not be in the future. + + This is the most critical code for multiplication. All multiplies rely + on this, both small and huge. Small ones arrive here immediately, huge + ones arrive here as this is the base case for Karatsuba's recursive + algorithm. */ + void -#if __STDC__ -mpn_mul_basecase (mp_ptr prodp, - mp_srcptr up, mp_size_t usize, - mp_srcptr vp, mp_size_t vsize) -#else -mpn_mul_basecase (prodp, up, usize, vp, vsize) - mp_ptr prodp; - mp_srcptr up; - mp_size_t usize; - mp_srcptr vp; - mp_size_t vsize; -#endif +mpn_mul_basecase (mp_ptr rp, + mp_srcptr up, mp_size_t un, + mp_srcptr vp, mp_size_t vn) { - /* We first multiply by the low order one or two limbs, as the result can - be stored, not added, to PROD. We also avoid a loop for zeroing this - way. */ + ASSERT (un >= vn); + ASSERT (vn >= 1); + ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un)); + ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn)); + + /* We first multiply by the low order limb (or depending on optional function + availability, limbs). This result can be stored, not added, to rp. We + also avoid a loop for zeroing this way. */ + #if HAVE_NATIVE_mpn_mul_2 - if (vsize >= 2) + if (vn >= 2) { - prodp[usize + 1] = mpn_mul_2 (prodp, up, usize, vp[0], vp[1]); - prodp += 2, vp += 2, vsize -= 2; + rp[un + 1] = mpn_mul_2 (rp, up, un, vp); + rp += 2, vp += 2, vn -= 2; } else { - prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]); + rp[un] = mpn_mul_1 (rp, up, un, vp[0]); return; } #else - prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]); - prodp += 1, vp += 1, vsize -= 1; + rp[un] = mpn_mul_1 (rp, up, un, vp[0]); + rp += 1, vp += 1, vn -= 1; #endif -#if HAVE_NATIVE_mpn_addmul_2 - while (vsize >= 2) + /* Now accumulate the product of up[] and the next low-order limb (or + depending on optional function availability, limbs) from vp[0]. */ + +#define MAX_LEFT MP_SIZE_T_MAX + +#if HAVE_NATIVE_mpn_addmul_4 + while (vn >= 4) { - prodp[usize + 1] = mpn_addmul_2 (prodp, up, usize, vp[0], vp[1]); - prodp += 2, vp += 2, vsize -= 2; + rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp); + rp += 4, vp += 4, vn -= 4; } - if (vsize != 0) - prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]); -#else - /* For each iteration in the loop, multiply U with one limb from V, and - add the result to PROD. */ - while (vsize != 0) +#undef MAX_LEFT +#define MAX_LEFT 3 +#endif + +#if HAVE_NATIVE_mpn_addmul_3 + while (vn >= 3) { - prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]); - prodp += 1, vp += 1, vsize -= 1; + rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp); + rp += 3, vp += 3, vn -= 3; + if (MAX_LEFT - 3 <= 3) + break; } +#undef MAX_LEFT +#define MAX_LEFT 2 #endif + +#if HAVE_NATIVE_mpn_addmul_2 + while (vn >= 2) + { + rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp); + rp += 2, vp += 2, vn -= 2; + if (MAX_LEFT - 2 <= 2) + break; + } +#undef MAX_LEFT +#define MAX_LEFT 1 +#endif + + while (vn >= 1) + { + rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); + rp += 1, vp += 1, vn -= 1; + if (MAX_LEFT - 1 <= 1) + break; + } }