Annotation of OpenXM_contrib/gmp/mpn/generic/mul.c, Revision 1.1.1.3
1.1 maekawa 1: /* mpn_mul -- Multiply two natural numbers.
2:
1.1.1.2 maekawa 3: THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul)
4: ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH
5: THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED
6: THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
7:
8:
1.1.1.3 ! ohara 9: Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002 Free Software
1.1.1.2 maekawa 10: Foundation, Inc.
1.1 maekawa 11:
12: This file is part of the GNU MP Library.
13:
14: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 maekawa 15: it under the terms of the GNU Lesser General Public License as published by
16: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 17: option) any later version.
18:
19: The GNU MP Library is distributed in the hope that it will be useful, but
20: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 maekawa 21: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 22: License for more details.
23:
1.1.1.2 maekawa 24: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 25: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
26: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27: MA 02111-1307, USA. */
28:
29: #include "gmp.h"
30: #include "gmp-impl.h"
31:
1.1.1.2 maekawa 32: /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
33: (pointed to by VP, with VN limbs), and store the result at PRODP. The
34: result is UN + VN limbs. Return the most significant limb of the result.
1.1 maekawa 35:
1.1.1.2 maekawa 36: NOTE: The space pointed to by PRODP is overwritten before finished with U
37: and V, so overlap is an error.
1.1 maekawa 38:
39: Argument constraints:
1.1.1.2 maekawa 40: 1. UN >= VN.
41: 2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
42: the multiplier and the multiplicand. */
43:
44: void
45: mpn_sqr_n (mp_ptr prodp,
1.1.1.3 ! ohara 46: mp_srcptr up, mp_size_t un)
1.1.1.2 maekawa 47: {
1.1.1.3 ! ohara 48: ASSERT (un >= 1);
! 49: ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un));
! 50:
! 51: /* FIXME: Can this be removed? */
! 52: if (un == 0)
! 53: return;
! 54:
! 55: if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD))
! 56: { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
! 57: mpn_mul_basecase (prodp, up, un, up, un);
! 58: }
! 59: else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))
1.1.1.2 maekawa 60: { /* plain schoolbook multiplication */
61: mpn_sqr_basecase (prodp, up, un);
62: }
1.1.1.3 ! ohara 63: else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD))
1.1.1.2 maekawa 64: { /* karatsuba multiplication */
65: mp_ptr tspace;
66: TMP_DECL (marker);
67: TMP_MARK (marker);
1.1.1.3 ! ohara 68: tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un));
1.1.1.2 maekawa 69: mpn_kara_sqr_n (prodp, up, un, tspace);
70: TMP_FREE (marker);
71: }
72: #if WANT_FFT || TUNE_PROGRAM_BUILD
1.1.1.3 ! ohara 73: else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD))
1.1.1.2 maekawa 74: #else
75: else
76: #endif
1.1.1.3 ! ohara 77: { /* Toom3 multiplication.
! 78: Use workspace from the heap, as stack may be limited. Since n is
! 79: at least MUL_TOOM3_THRESHOLD, the multiplication will take much
! 80: longer than malloc()/free(). */
! 81: mp_ptr tspace;
! 82: mp_size_t tsize;
! 83: tsize = MPN_TOOM3_SQR_N_TSIZE (un);
! 84: tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize);
1.1.1.2 maekawa 85: mpn_toom3_sqr_n (prodp, up, un, tspace);
1.1.1.3 ! ohara 86: __GMP_FREE_FUNC_LIMBS (tspace, tsize);
1.1.1.2 maekawa 87: }
88: #if WANT_FFT || TUNE_PROGRAM_BUILD
89: else
90: {
91: /* schoenhage multiplication */
92: mpn_mul_fft_full (prodp, up, un, up, un);
93: }
1.1 maekawa 94: #endif
1.1.1.2 maekawa 95: }
1.1 maekawa 96:
97: mp_limb_t
98: mpn_mul (mp_ptr prodp,
1.1.1.2 maekawa 99: mp_srcptr up, mp_size_t un,
100: mp_srcptr vp, mp_size_t vn)
1.1 maekawa 101: {
1.1.1.2 maekawa 102: mp_size_t l;
103: mp_limb_t c;
1.1 maekawa 104:
1.1.1.3 ! ohara 105: ASSERT (un >= vn);
! 106: ASSERT (vn >= 1);
! 107: ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
! 108: ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
! 109:
1.1.1.2 maekawa 110: if (up == vp && un == vn)
1.1 maekawa 111: {
1.1.1.2 maekawa 112: mpn_sqr_n (prodp, up, un);
113: return prodp[2 * un - 1];
114: }
115:
1.1.1.3 ! ohara 116: if (vn < MUL_KARATSUBA_THRESHOLD)
1.1.1.2 maekawa 117: { /* long multiplication */
118: mpn_mul_basecase (prodp, up, un, vp, vn);
119: return prodp[un + vn - 1];
120: }
121:
122: mpn_mul_n (prodp, up, vp, vn);
123: if (un != vn)
124: { mp_limb_t t;
125: mp_ptr ws;
126: TMP_DECL (marker);
127: TMP_MARK (marker);
128:
129: prodp += vn;
130: l = vn;
131: up += vn;
132: un -= vn;
1.1 maekawa 133:
1.1.1.3 ! ohara 134: if (un < vn)
1.1 maekawa 135: {
1.1.1.2 maekawa 136: /* Swap u's and v's. */
1.1.1.3 ! ohara 137: MPN_SRCPTR_SWAP (up,un, vp,vn);
1.1 maekawa 138: }
139:
1.1.1.3 ! ohara 140: ws = (mp_ptr) TMP_ALLOC (((vn >= MUL_KARATSUBA_THRESHOLD ? vn : un) + vn)
1.1.1.2 maekawa 141: * BYTES_PER_MP_LIMB);
1.1 maekawa 142:
1.1.1.2 maekawa 143: t = 0;
1.1.1.3 ! ohara 144: while (vn >= MUL_KARATSUBA_THRESHOLD)
1.1 maekawa 145: {
1.1.1.2 maekawa 146: mpn_mul_n (ws, up, vp, vn);
1.1.1.3 ! ohara 147: if (l <= 2*vn)
1.1 maekawa 148: {
1.1.1.2 maekawa 149: t += mpn_add_n (prodp, prodp, ws, l);
150: if (l != 2*vn)
151: {
152: t = mpn_add_1 (prodp + l, ws + l, 2*vn - l, t);
153: l = 2*vn;
154: }
1.1 maekawa 155: }
156: else
1.1.1.2 maekawa 157: {
158: c = mpn_add_n (prodp, prodp, ws, 2*vn);
159: t += mpn_add_1 (prodp + 2*vn, prodp + 2*vn, l - 2*vn, c);
160: }
161: prodp += vn;
162: l -= vn;
163: up += vn;
164: un -= vn;
1.1.1.3 ! ohara 165: if (un < vn)
1.1.1.2 maekawa 166: {
167: /* Swap u's and v's. */
1.1.1.3 ! ohara 168: MPN_SRCPTR_SWAP (up,un, vp,vn);
1.1.1.2 maekawa 169: }
1.1 maekawa 170: }
171:
1.1.1.3 ! ohara 172: if (vn != 0)
1.1 maekawa 173: {
1.1.1.2 maekawa 174: mpn_mul_basecase (ws, up, un, vp, vn);
1.1.1.3 ! ohara 175: if (l <= un + vn)
1.1.1.2 maekawa 176: {
177: t += mpn_add_n (prodp, prodp, ws, l);
178: if (l != un + vn)
179: t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t);
1.1.1.3 ! ohara 180: }
1.1.1.2 maekawa 181: else
182: {
183: c = mpn_add_n (prodp, prodp, ws, un + vn);
184: t += mpn_add_1 (prodp + un + vn, prodp + un + vn, l - un - vn, c);
185: }
1.1 maekawa 186: }
187:
1.1.1.3 ! ohara 188: TMP_FREE (marker);
! 189: }
1.1.1.2 maekawa 190: return prodp[un + vn - 1];
1.1 maekawa 191: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>