version 1.1.1.2, 2000/09/09 14:12:26 |
version 1.1.1.3, 2003/08/25 16:06:20 |
|
|
THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. |
|
|
|
|
Copyright (C) 1991, 1993, 1994, 1996, 1997, 1999, 2000 Free Software |
Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002 Free Software |
Foundation, Inc. |
Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
Line 42 MA 02111-1307, USA. */ |
|
Line 42 MA 02111-1307, USA. */ |
|
the multiplier and the multiplicand. */ |
the multiplier and the multiplicand. */ |
|
|
void |
void |
#if __STDC__ |
|
mpn_sqr_n (mp_ptr prodp, |
mpn_sqr_n (mp_ptr prodp, |
mp_srcptr up, mp_size_t un) |
mp_srcptr up, mp_size_t un) |
#else |
|
mpn_sqr_n (prodp, up, un) |
|
mp_ptr prodp; |
|
mp_srcptr up; |
|
mp_size_t un; |
|
#endif |
|
{ |
{ |
if (un < KARATSUBA_SQR_THRESHOLD) |
ASSERT (un >= 1); |
|
ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un)); |
|
|
|
/* FIXME: Can this be removed? */ |
|
if (un == 0) |
|
return; |
|
|
|
if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD)) |
|
{ /* mul_basecase is faster than sqr_basecase on small sizes sometimes */ |
|
mpn_mul_basecase (prodp, up, un, up, un); |
|
} |
|
else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD)) |
{ /* plain schoolbook multiplication */ |
{ /* plain schoolbook multiplication */ |
if (un == 0) |
|
return; |
|
mpn_sqr_basecase (prodp, up, un); |
mpn_sqr_basecase (prodp, up, un); |
} |
} |
else if (un < TOOM3_SQR_THRESHOLD) |
else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD)) |
{ /* karatsuba multiplication */ |
{ /* karatsuba multiplication */ |
mp_ptr tspace; |
mp_ptr tspace; |
TMP_DECL (marker); |
TMP_DECL (marker); |
TMP_MARK (marker); |
TMP_MARK (marker); |
tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB); |
tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un)); |
mpn_kara_sqr_n (prodp, up, un, tspace); |
mpn_kara_sqr_n (prodp, up, un, tspace); |
TMP_FREE (marker); |
TMP_FREE (marker); |
} |
} |
#if WANT_FFT || TUNE_PROGRAM_BUILD |
#if WANT_FFT || TUNE_PROGRAM_BUILD |
else if (un < FFT_SQR_THRESHOLD) |
else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD)) |
#else |
#else |
else |
else |
#endif |
#endif |
{ /* toom3 multiplication */ |
{ /* Toom3 multiplication. |
mp_ptr tspace; |
Use workspace from the heap, as stack may be limited. Since n is |
TMP_DECL (marker); |
at least MUL_TOOM3_THRESHOLD, the multiplication will take much |
TMP_MARK (marker); |
longer than malloc()/free(). */ |
tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB); |
mp_ptr tspace; |
|
mp_size_t tsize; |
|
tsize = MPN_TOOM3_SQR_N_TSIZE (un); |
|
tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize); |
mpn_toom3_sqr_n (prodp, up, un, tspace); |
mpn_toom3_sqr_n (prodp, up, un, tspace); |
TMP_FREE (marker); |
__GMP_FREE_FUNC_LIMBS (tspace, tsize); |
} |
} |
#if WANT_FFT || TUNE_PROGRAM_BUILD |
#if WANT_FFT || TUNE_PROGRAM_BUILD |
else |
else |
Line 90 mpn_sqr_n (prodp, up, un) |
|
Line 95 mpn_sqr_n (prodp, up, un) |
|
} |
} |
|
|
mp_limb_t |
mp_limb_t |
#if __STDC__ |
|
mpn_mul (mp_ptr prodp, |
mpn_mul (mp_ptr prodp, |
mp_srcptr up, mp_size_t un, |
mp_srcptr up, mp_size_t un, |
mp_srcptr vp, mp_size_t vn) |
mp_srcptr vp, mp_size_t vn) |
#else |
|
mpn_mul (prodp, up, un, vp, vn) |
|
mp_ptr prodp; |
|
mp_srcptr up; |
|
mp_size_t un; |
|
mp_srcptr vp; |
|
mp_size_t vn; |
|
#endif |
|
{ |
{ |
mp_size_t l; |
mp_size_t l; |
mp_limb_t c; |
mp_limb_t c; |
|
|
|
ASSERT (un >= vn); |
|
ASSERT (vn >= 1); |
|
ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un)); |
|
ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn)); |
|
|
if (up == vp && un == vn) |
if (up == vp && un == vn) |
{ |
{ |
mpn_sqr_n (prodp, up, un); |
mpn_sqr_n (prodp, up, un); |
return prodp[2 * un - 1]; |
return prodp[2 * un - 1]; |
} |
} |
|
|
if (vn < KARATSUBA_MUL_THRESHOLD) |
if (vn < MUL_KARATSUBA_THRESHOLD) |
{ /* long multiplication */ |
{ /* long multiplication */ |
mpn_mul_basecase (prodp, up, un, vp, vn); |
mpn_mul_basecase (prodp, up, un, vp, vn); |
return prodp[un + vn - 1]; |
return prodp[un + vn - 1]; |
Line 130 mpn_mul (prodp, up, un, vp, vn) |
|
Line 131 mpn_mul (prodp, up, un, vp, vn) |
|
up += vn; |
up += vn; |
un -= vn; |
un -= vn; |
|
|
if (un < vn) |
if (un < vn) |
{ |
{ |
/* Swap u's and v's. */ |
/* Swap u's and v's. */ |
MPN_SRCPTR_SWAP (up,un, vp,vn); |
MPN_SRCPTR_SWAP (up,un, vp,vn); |
} |
} |
|
|
ws = (mp_ptr) TMP_ALLOC (((vn >= KARATSUBA_MUL_THRESHOLD ? vn : un) + vn) |
ws = (mp_ptr) TMP_ALLOC (((vn >= MUL_KARATSUBA_THRESHOLD ? vn : un) + vn) |
* BYTES_PER_MP_LIMB); |
* BYTES_PER_MP_LIMB); |
|
|
t = 0; |
t = 0; |
while (vn >= KARATSUBA_MUL_THRESHOLD) |
while (vn >= MUL_KARATSUBA_THRESHOLD) |
{ |
{ |
mpn_mul_n (ws, up, vp, vn); |
mpn_mul_n (ws, up, vp, vn); |
if (l <= 2*vn) |
if (l <= 2*vn) |
{ |
{ |
t += mpn_add_n (prodp, prodp, ws, l); |
t += mpn_add_n (prodp, prodp, ws, l); |
if (l != 2*vn) |
if (l != 2*vn) |
Line 161 mpn_mul (prodp, up, un, vp, vn) |
|
Line 162 mpn_mul (prodp, up, un, vp, vn) |
|
l -= vn; |
l -= vn; |
up += vn; |
up += vn; |
un -= vn; |
un -= vn; |
if (un < vn) |
if (un < vn) |
{ |
{ |
/* Swap u's and v's. */ |
/* Swap u's and v's. */ |
MPN_SRCPTR_SWAP (up,un, vp,vn); |
MPN_SRCPTR_SWAP (up,un, vp,vn); |
} |
} |
} |
} |
|
|
if (vn) |
if (vn != 0) |
{ |
{ |
mpn_mul_basecase (ws, up, un, vp, vn); |
mpn_mul_basecase (ws, up, un, vp, vn); |
if (l <= un + vn) |
if (l <= un + vn) |
{ |
{ |
t += mpn_add_n (prodp, prodp, ws, l); |
t += mpn_add_n (prodp, prodp, ws, l); |
if (l != un + vn) |
if (l != un + vn) |
t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t); |
t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t); |
} |
} |
else |
else |
{ |
{ |
c = mpn_add_n (prodp, prodp, ws, un + vn); |
c = mpn_add_n (prodp, prodp, ws, un + vn); |
Line 184 mpn_mul (prodp, up, un, vp, vn) |
|
Line 185 mpn_mul (prodp, up, un, vp, vn) |
|
} |
} |
} |
} |
|
|
TMP_FREE (marker); |
TMP_FREE (marker); |
} |
} |
return prodp[un + vn - 1]; |
return prodp[un + vn - 1]; |
} |
} |