Annotation of OpenXM_contrib/gmp/mpn/generic/mul_n.c, Revision 1.1.1.1
1.1 maekawa 1: /* mpn_mul_n -- Multiply two natural numbers of length n.
2:
3: Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
4:
5: This file is part of the GNU MP Library.
6:
7: The GNU MP Library is free software; you can redistribute it and/or modify
8: it under the terms of the GNU Library General Public License as published by
9: the Free Software Foundation; either version 2 of the License, or (at your
10: option) any later version.
11:
12: The GNU MP Library is distributed in the hope that it will be useful, but
13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15: License for more details.
16:
17: You should have received a copy of the GNU Library General Public License
18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: MA 02111-1307, USA. */
21:
22: #include "gmp.h"
23: #include "gmp-impl.h"
24:
25: /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
26: both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
27: always stored. Return the most significant limb.
28:
29: Argument constraints:
30: 1. PRODP != UP and PRODP != VP, i.e. the destination
31: must be distinct from the multiplier and the multiplicand. */
32:
33: /* If KARATSUBA_THRESHOLD is not already defined, define it to a
34: value which is good on most machines. */
35: #ifndef KARATSUBA_THRESHOLD
36: #define KARATSUBA_THRESHOLD 32
37: #endif
38:
39: /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
40: #if KARATSUBA_THRESHOLD < 2
41: #undef KARATSUBA_THRESHOLD
42: #define KARATSUBA_THRESHOLD 2
43: #endif
44:
45: /* Handle simple cases with traditional multiplication.
46:
47: This is the most critical code of multiplication. All multiplies rely
48: on this, both small and huge. Small ones arrive here immediately. Huge
49: ones arrive here as this is the base case for Karatsuba's recursive
50: algorithm below. */
51:
52: void
53: #if __STDC__
54: impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
55: #else
56: impn_mul_n_basecase (prodp, up, vp, size)
57: mp_ptr prodp;
58: mp_srcptr up;
59: mp_srcptr vp;
60: mp_size_t size;
61: #endif
62: {
63: mp_size_t i;
64: mp_limb_t cy_limb;
65: mp_limb_t v_limb;
66:
67: /* Multiply by the first limb in V separately, as the result can be
68: stored (not added) to PROD. We also avoid a loop for zeroing. */
69: v_limb = vp[0];
70: if (v_limb <= 1)
71: {
72: if (v_limb == 1)
73: MPN_COPY (prodp, up, size);
74: else
75: MPN_ZERO (prodp, size);
76: cy_limb = 0;
77: }
78: else
79: cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
80:
81: prodp[size] = cy_limb;
82: prodp++;
83:
84: /* For each iteration in the outer loop, multiply one limb from
85: U with one limb from V, and add it to PROD. */
86: for (i = 1; i < size; i++)
87: {
88: v_limb = vp[i];
89: if (v_limb <= 1)
90: {
91: cy_limb = 0;
92: if (v_limb == 1)
93: cy_limb = mpn_add_n (prodp, prodp, up, size);
94: }
95: else
96: cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
97:
98: prodp[size] = cy_limb;
99: prodp++;
100: }
101: }
102:
103: void
104: #if __STDC__
105: impn_mul_n (mp_ptr prodp,
106: mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
107: #else
108: impn_mul_n (prodp, up, vp, size, tspace)
109: mp_ptr prodp;
110: mp_srcptr up;
111: mp_srcptr vp;
112: mp_size_t size;
113: mp_ptr tspace;
114: #endif
115: {
116: if ((size & 1) != 0)
117: {
118: /* The size is odd, the code code below doesn't handle that.
119: Multiply the least significant (size - 1) limbs with a recursive
120: call, and handle the most significant limb of S1 and S2
121: separately. */
122: /* A slightly faster way to do this would be to make the Karatsuba
123: code below behave as if the size were even, and let it check for
124: odd size in the end. I.e., in essence move this code to the end.
125: Doing so would save us a recursive call, and potentially make the
126: stack grow a lot less. */
127:
128: mp_size_t esize = size - 1; /* even size */
129: mp_limb_t cy_limb;
130:
131: MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
132: cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
133: prodp[esize + esize] = cy_limb;
134: cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
135:
136: prodp[esize + size] = cy_limb;
137: }
138: else
139: {
140: /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
141:
142: Split U in two pieces, U1 and U0, such that
143: U = U0 + U1*(B**n),
144: and V in V1 and V0, such that
145: V = V0 + V1*(B**n).
146:
147: UV is then computed recursively using the identity
148:
149: 2n n n n
150: UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
151: 1 1 1 0 0 1 0 0
152:
153: Where B = 2**BITS_PER_MP_LIMB. */
154:
155: mp_size_t hsize = size >> 1;
156: mp_limb_t cy;
157: int negflg;
158:
159: /*** Product H. ________________ ________________
160: |_____U1 x V1____||____U0 x V0_____| */
161: /* Put result in upper part of PROD and pass low part of TSPACE
162: as new TSPACE. */
163: MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
164:
165: /*** Product M. ________________
166: |_(U1-U0)(V0-V1)_| */
167: if (mpn_cmp (up + hsize, up, hsize) >= 0)
168: {
169: mpn_sub_n (prodp, up + hsize, up, hsize);
170: negflg = 0;
171: }
172: else
173: {
174: mpn_sub_n (prodp, up, up + hsize, hsize);
175: negflg = 1;
176: }
177: if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
178: {
179: mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
180: negflg ^= 1;
181: }
182: else
183: {
184: mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
185: /* No change of NEGFLG. */
186: }
187: /* Read temporary operands from low part of PROD.
188: Put result in low part of TSPACE using upper part of TSPACE
189: as new TSPACE. */
190: MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
191:
192: /*** Add/copy product H. */
193: MPN_COPY (prodp + hsize, prodp + size, hsize);
194: cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
195:
196: /*** Add product M (if NEGFLG M is a negative number). */
197: if (negflg)
198: cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
199: else
200: cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
201:
202: /*** Product L. ________________ ________________
203: |________________||____U0 x V0_____| */
204: /* Read temporary operands from low part of PROD.
205: Put result in low part of TSPACE using upper part of TSPACE
206: as new TSPACE. */
207: MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
208:
209: /*** Add/copy Product L (twice). */
210:
211: cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
212: if (cy)
213: mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
214:
215: MPN_COPY (prodp, tspace, hsize);
216: cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
217: if (cy)
218: mpn_add_1 (prodp + size, prodp + size, size, 1);
219: }
220: }
221:
222: void
223: #if __STDC__
224: impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
225: #else
226: impn_sqr_n_basecase (prodp, up, size)
227: mp_ptr prodp;
228: mp_srcptr up;
229: mp_size_t size;
230: #endif
231: {
232: mp_size_t i;
233: mp_limb_t cy_limb;
234: mp_limb_t v_limb;
235:
236: /* Multiply by the first limb in V separately, as the result can be
237: stored (not added) to PROD. We also avoid a loop for zeroing. */
238: v_limb = up[0];
239: if (v_limb <= 1)
240: {
241: if (v_limb == 1)
242: MPN_COPY (prodp, up, size);
243: else
244: MPN_ZERO (prodp, size);
245: cy_limb = 0;
246: }
247: else
248: cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
249:
250: prodp[size] = cy_limb;
251: prodp++;
252:
253: /* For each iteration in the outer loop, multiply one limb from
254: U with one limb from V, and add it to PROD. */
255: for (i = 1; i < size; i++)
256: {
257: v_limb = up[i];
258: if (v_limb <= 1)
259: {
260: cy_limb = 0;
261: if (v_limb == 1)
262: cy_limb = mpn_add_n (prodp, prodp, up, size);
263: }
264: else
265: cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
266:
267: prodp[size] = cy_limb;
268: prodp++;
269: }
270: }
271:
272: void
273: #if __STDC__
274: impn_sqr_n (mp_ptr prodp,
275: mp_srcptr up, mp_size_t size, mp_ptr tspace)
276: #else
277: impn_sqr_n (prodp, up, size, tspace)
278: mp_ptr prodp;
279: mp_srcptr up;
280: mp_size_t size;
281: mp_ptr tspace;
282: #endif
283: {
284: if ((size & 1) != 0)
285: {
286: /* The size is odd, the code code below doesn't handle that.
287: Multiply the least significant (size - 1) limbs with a recursive
288: call, and handle the most significant limb of S1 and S2
289: separately. */
290: /* A slightly faster way to do this would be to make the Karatsuba
291: code below behave as if the size were even, and let it check for
292: odd size in the end. I.e., in essence move this code to the end.
293: Doing so would save us a recursive call, and potentially make the
294: stack grow a lot less. */
295:
296: mp_size_t esize = size - 1; /* even size */
297: mp_limb_t cy_limb;
298:
299: MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
300: cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
301: prodp[esize + esize] = cy_limb;
302: cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
303:
304: prodp[esize + size] = cy_limb;
305: }
306: else
307: {
308: mp_size_t hsize = size >> 1;
309: mp_limb_t cy;
310:
311: /*** Product H. ________________ ________________
312: |_____U1 x U1____||____U0 x U0_____| */
313: /* Put result in upper part of PROD and pass low part of TSPACE
314: as new TSPACE. */
315: MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
316:
317: /*** Product M. ________________
318: |_(U1-U0)(U0-U1)_| */
319: if (mpn_cmp (up + hsize, up, hsize) >= 0)
320: {
321: mpn_sub_n (prodp, up + hsize, up, hsize);
322: }
323: else
324: {
325: mpn_sub_n (prodp, up, up + hsize, hsize);
326: }
327:
328: /* Read temporary operands from low part of PROD.
329: Put result in low part of TSPACE using upper part of TSPACE
330: as new TSPACE. */
331: MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
332:
333: /*** Add/copy product H. */
334: MPN_COPY (prodp + hsize, prodp + size, hsize);
335: cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
336:
337: /*** Add product M (if NEGFLG M is a negative number). */
338: cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
339:
340: /*** Product L. ________________ ________________
341: |________________||____U0 x U0_____| */
342: /* Read temporary operands from low part of PROD.
343: Put result in low part of TSPACE using upper part of TSPACE
344: as new TSPACE. */
345: MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
346:
347: /*** Add/copy Product L (twice). */
348:
349: cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
350: if (cy)
351: mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
352:
353: MPN_COPY (prodp, tspace, hsize);
354: cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
355: if (cy)
356: mpn_add_1 (prodp + size, prodp + size, size, 1);
357: }
358: }
359:
360: /* This should be made into an inline function in gmp.h. */
361: inline void
362: #if __STDC__
363: mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
364: #else
365: mpn_mul_n (prodp, up, vp, size)
366: mp_ptr prodp;
367: mp_srcptr up;
368: mp_srcptr vp;
369: mp_size_t size;
370: #endif
371: {
372: TMP_DECL (marker);
373: TMP_MARK (marker);
374: if (up == vp)
375: {
376: if (size < KARATSUBA_THRESHOLD)
377: {
378: impn_sqr_n_basecase (prodp, up, size);
379: }
380: else
381: {
382: mp_ptr tspace;
383: tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
384: impn_sqr_n (prodp, up, size, tspace);
385: }
386: }
387: else
388: {
389: if (size < KARATSUBA_THRESHOLD)
390: {
391: impn_mul_n_basecase (prodp, up, vp, size);
392: }
393: else
394: {
395: mp_ptr tspace;
396: tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
397: impn_mul_n (prodp, up, vp, size, tspace);
398: }
399: }
400: TMP_FREE (marker);
401: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>