=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/generic/Attic/mul.c,v retrieving revision 1.1.1.2 retrieving revision 1.1.1.3 diff -u -p -r1.1.1.2 -r1.1.1.3 --- OpenXM_contrib/gmp/mpn/generic/Attic/mul.c 2000/09/09 14:12:26 1.1.1.2 +++ OpenXM_contrib/gmp/mpn/generic/Attic/mul.c 2003/08/25 16:06:20 1.1.1.3 @@ -6,7 +6,7 @@ THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. -Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software +Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -42,43 +42,48 @@ MA 02111-1307, USA. */ the multiplier and the multiplicand. */ void -#if __STDC__ mpn_sqr_n (mp_ptr prodp, - mp_srcptr up, mp_size_t un) -#else -mpn_sqr_n (prodp, up, un) - mp_ptr prodp; - mp_srcptr up; - mp_size_t un; -#endif + mp_srcptr up, mp_size_t un) { - if (un < KARATSUBA_SQR_THRESHOLD) + ASSERT (un >= 1); + ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un)); + + /* FIXME: Can this be removed? */ + if (un == 0) + return; + + if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD)) + { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */ + mpn_mul_basecase (prodp, up, un, up, un); + } + else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD)) { /* plain schoolbook multiplication */ - if (un == 0) - return; mpn_sqr_basecase (prodp, up, un); } - else if (un < TOOM3_SQR_THRESHOLD) + else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD)) { /* karatsuba multiplication */ mp_ptr tspace; TMP_DECL (marker); TMP_MARK (marker); - tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB); + tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un)); mpn_kara_sqr_n (prodp, up, un, tspace); TMP_FREE (marker); } #if WANT_FFT || TUNE_PROGRAM_BUILD - else if (un < FFT_SQR_THRESHOLD) + else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD)) #else else #endif - { /* toom3 multiplication */ - mp_ptr tspace; - TMP_DECL (marker); - TMP_MARK (marker); - tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB); + { /* Toom3 multiplication. + Use workspace from the heap, as stack may be limited. Since n is + at least MUL_TOOM3_THRESHOLD, the multiplication will take much + longer than malloc()/free(). */ + mp_ptr tspace; + mp_size_t tsize; + tsize = MPN_TOOM3_SQR_N_TSIZE (un); + tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize); mpn_toom3_sqr_n (prodp, up, un, tspace); - TMP_FREE (marker); + __GMP_FREE_FUNC_LIMBS (tspace, tsize); } #if WANT_FFT || TUNE_PROGRAM_BUILD else @@ -90,29 +95,25 @@ mpn_sqr_n (prodp, up, un) } mp_limb_t -#if __STDC__ mpn_mul (mp_ptr prodp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) -#else -mpn_mul (prodp, up, un, vp, vn) - mp_ptr prodp; - mp_srcptr up; - mp_size_t un; - mp_srcptr vp; - mp_size_t vn; -#endif { mp_size_t l; mp_limb_t c; + ASSERT (un >= vn); + ASSERT (vn >= 1); + ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un)); + ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn)); + if (up == vp && un == vn) { mpn_sqr_n (prodp, up, un); return prodp[2 * un - 1]; } - if (vn < KARATSUBA_MUL_THRESHOLD) + if (vn < MUL_KARATSUBA_THRESHOLD) { /* long multiplication */ mpn_mul_basecase (prodp, up, un, vp, vn); return prodp[un + vn - 1]; @@ -130,20 +131,20 @@ mpn_mul (prodp, up, un, vp, vn) up += vn; un -= vn; - if (un < vn) + if (un < vn) { /* Swap u's and v's. */ - MPN_SRCPTR_SWAP (up,un, vp,vn); + MPN_SRCPTR_SWAP (up,un, vp,vn); } - ws = (mp_ptr) TMP_ALLOC (((vn >= KARATSUBA_MUL_THRESHOLD ? vn : un) + vn) + ws = (mp_ptr) TMP_ALLOC (((vn >= MUL_KARATSUBA_THRESHOLD ? vn : un) + vn) * BYTES_PER_MP_LIMB); t = 0; - while (vn >= KARATSUBA_MUL_THRESHOLD) + while (vn >= MUL_KARATSUBA_THRESHOLD) { mpn_mul_n (ws, up, vp, vn); - if (l <= 2*vn) + if (l <= 2*vn) { t += mpn_add_n (prodp, prodp, ws, l); if (l != 2*vn) @@ -161,22 +162,22 @@ mpn_mul (prodp, up, un, vp, vn) l -= vn; up += vn; un -= vn; - if (un < vn) + if (un < vn) { /* Swap u's and v's. */ - MPN_SRCPTR_SWAP (up,un, vp,vn); + MPN_SRCPTR_SWAP (up,un, vp,vn); } } - if (vn) + if (vn != 0) { mpn_mul_basecase (ws, up, un, vp, vn); - if (l <= un + vn) + if (l <= un + vn) { t += mpn_add_n (prodp, prodp, ws, l); if (l != un + vn) t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t); - } + } else { c = mpn_add_n (prodp, prodp, ws, un + vn); @@ -184,7 +185,7 @@ mpn_mul (prodp, up, un, vp, vn) } } - TMP_FREE (marker); - } + TMP_FREE (marker); + } return prodp[un + vn - 1]; }