Annotation of OpenXM_contrib/gmp/mpn/generic/mul_n.c, Revision 1.1
1.1 ! maekawa 1: /* mpn_mul_n -- Multiply two natural numbers of length n.
! 2:
! 3: Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
! 4:
! 5: This file is part of the GNU MP Library.
! 6:
! 7: The GNU MP Library is free software; you can redistribute it and/or modify
! 8: it under the terms of the GNU Library General Public License as published by
! 9: the Free Software Foundation; either version 2 of the License, or (at your
! 10: option) any later version.
! 11:
! 12: The GNU MP Library is distributed in the hope that it will be useful, but
! 13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 15: License for more details.
! 16:
! 17: You should have received a copy of the GNU Library General Public License
! 18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: MA 02111-1307, USA. */
! 21:
! 22: #include "gmp.h"
! 23: #include "gmp-impl.h"
! 24:
! 25: /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
! 26: both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
! 27: always stored. Return the most significant limb.
! 28:
! 29: Argument constraints:
! 30: 1. PRODP != UP and PRODP != VP, i.e. the destination
! 31: must be distinct from the multiplier and the multiplicand. */
! 32:
! 33: /* If KARATSUBA_THRESHOLD is not already defined, define it to a
! 34: value which is good on most machines. */
! 35: #ifndef KARATSUBA_THRESHOLD
! 36: #define KARATSUBA_THRESHOLD 32
! 37: #endif
! 38:
! 39: /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
! 40: #if KARATSUBA_THRESHOLD < 2
! 41: #undef KARATSUBA_THRESHOLD
! 42: #define KARATSUBA_THRESHOLD 2
! 43: #endif
! 44:
! 45: /* Handle simple cases with traditional multiplication.
! 46:
! 47: This is the most critical code of multiplication. All multiplies rely
! 48: on this, both small and huge. Small ones arrive here immediately. Huge
! 49: ones arrive here as this is the base case for Karatsuba's recursive
! 50: algorithm below. */
! 51:
! 52: void
! 53: #if __STDC__
! 54: impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
! 55: #else
! 56: impn_mul_n_basecase (prodp, up, vp, size)
! 57: mp_ptr prodp;
! 58: mp_srcptr up;
! 59: mp_srcptr vp;
! 60: mp_size_t size;
! 61: #endif
! 62: {
! 63: mp_size_t i;
! 64: mp_limb_t cy_limb;
! 65: mp_limb_t v_limb;
! 66:
! 67: /* Multiply by the first limb in V separately, as the result can be
! 68: stored (not added) to PROD. We also avoid a loop for zeroing. */
! 69: v_limb = vp[0];
! 70: if (v_limb <= 1)
! 71: {
! 72: if (v_limb == 1)
! 73: MPN_COPY (prodp, up, size);
! 74: else
! 75: MPN_ZERO (prodp, size);
! 76: cy_limb = 0;
! 77: }
! 78: else
! 79: cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
! 80:
! 81: prodp[size] = cy_limb;
! 82: prodp++;
! 83:
! 84: /* For each iteration in the outer loop, multiply one limb from
! 85: U with one limb from V, and add it to PROD. */
! 86: for (i = 1; i < size; i++)
! 87: {
! 88: v_limb = vp[i];
! 89: if (v_limb <= 1)
! 90: {
! 91: cy_limb = 0;
! 92: if (v_limb == 1)
! 93: cy_limb = mpn_add_n (prodp, prodp, up, size);
! 94: }
! 95: else
! 96: cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
! 97:
! 98: prodp[size] = cy_limb;
! 99: prodp++;
! 100: }
! 101: }
! 102:
! 103: void
! 104: #if __STDC__
! 105: impn_mul_n (mp_ptr prodp,
! 106: mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
! 107: #else
! 108: impn_mul_n (prodp, up, vp, size, tspace)
! 109: mp_ptr prodp;
! 110: mp_srcptr up;
! 111: mp_srcptr vp;
! 112: mp_size_t size;
! 113: mp_ptr tspace;
! 114: #endif
! 115: {
! 116: if ((size & 1) != 0)
! 117: {
! 118: /* The size is odd, the code code below doesn't handle that.
! 119: Multiply the least significant (size - 1) limbs with a recursive
! 120: call, and handle the most significant limb of S1 and S2
! 121: separately. */
! 122: /* A slightly faster way to do this would be to make the Karatsuba
! 123: code below behave as if the size were even, and let it check for
! 124: odd size in the end. I.e., in essence move this code to the end.
! 125: Doing so would save us a recursive call, and potentially make the
! 126: stack grow a lot less. */
! 127:
! 128: mp_size_t esize = size - 1; /* even size */
! 129: mp_limb_t cy_limb;
! 130:
! 131: MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
! 132: cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
! 133: prodp[esize + esize] = cy_limb;
! 134: cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
! 135:
! 136: prodp[esize + size] = cy_limb;
! 137: }
! 138: else
! 139: {
! 140: /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
! 141:
! 142: Split U in two pieces, U1 and U0, such that
! 143: U = U0 + U1*(B**n),
! 144: and V in V1 and V0, such that
! 145: V = V0 + V1*(B**n).
! 146:
! 147: UV is then computed recursively using the identity
! 148:
! 149: 2n n n n
! 150: UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
! 151: 1 1 1 0 0 1 0 0
! 152:
! 153: Where B = 2**BITS_PER_MP_LIMB. */
! 154:
! 155: mp_size_t hsize = size >> 1;
! 156: mp_limb_t cy;
! 157: int negflg;
! 158:
! 159: /*** Product H. ________________ ________________
! 160: |_____U1 x V1____||____U0 x V0_____| */
! 161: /* Put result in upper part of PROD and pass low part of TSPACE
! 162: as new TSPACE. */
! 163: MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
! 164:
! 165: /*** Product M. ________________
! 166: |_(U1-U0)(V0-V1)_| */
! 167: if (mpn_cmp (up + hsize, up, hsize) >= 0)
! 168: {
! 169: mpn_sub_n (prodp, up + hsize, up, hsize);
! 170: negflg = 0;
! 171: }
! 172: else
! 173: {
! 174: mpn_sub_n (prodp, up, up + hsize, hsize);
! 175: negflg = 1;
! 176: }
! 177: if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
! 178: {
! 179: mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
! 180: negflg ^= 1;
! 181: }
! 182: else
! 183: {
! 184: mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
! 185: /* No change of NEGFLG. */
! 186: }
! 187: /* Read temporary operands from low part of PROD.
! 188: Put result in low part of TSPACE using upper part of TSPACE
! 189: as new TSPACE. */
! 190: MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
! 191:
! 192: /*** Add/copy product H. */
! 193: MPN_COPY (prodp + hsize, prodp + size, hsize);
! 194: cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
! 195:
! 196: /*** Add product M (if NEGFLG M is a negative number). */
! 197: if (negflg)
! 198: cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
! 199: else
! 200: cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
! 201:
! 202: /*** Product L. ________________ ________________
! 203: |________________||____U0 x V0_____| */
! 204: /* Read temporary operands from low part of PROD.
! 205: Put result in low part of TSPACE using upper part of TSPACE
! 206: as new TSPACE. */
! 207: MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
! 208:
! 209: /*** Add/copy Product L (twice). */
! 210:
! 211: cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
! 212: if (cy)
! 213: mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
! 214:
! 215: MPN_COPY (prodp, tspace, hsize);
! 216: cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
! 217: if (cy)
! 218: mpn_add_1 (prodp + size, prodp + size, size, 1);
! 219: }
! 220: }
! 221:
! 222: void
! 223: #if __STDC__
! 224: impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
! 225: #else
! 226: impn_sqr_n_basecase (prodp, up, size)
! 227: mp_ptr prodp;
! 228: mp_srcptr up;
! 229: mp_size_t size;
! 230: #endif
! 231: {
! 232: mp_size_t i;
! 233: mp_limb_t cy_limb;
! 234: mp_limb_t v_limb;
! 235:
! 236: /* Multiply by the first limb in V separately, as the result can be
! 237: stored (not added) to PROD. We also avoid a loop for zeroing. */
! 238: v_limb = up[0];
! 239: if (v_limb <= 1)
! 240: {
! 241: if (v_limb == 1)
! 242: MPN_COPY (prodp, up, size);
! 243: else
! 244: MPN_ZERO (prodp, size);
! 245: cy_limb = 0;
! 246: }
! 247: else
! 248: cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
! 249:
! 250: prodp[size] = cy_limb;
! 251: prodp++;
! 252:
! 253: /* For each iteration in the outer loop, multiply one limb from
! 254: U with one limb from V, and add it to PROD. */
! 255: for (i = 1; i < size; i++)
! 256: {
! 257: v_limb = up[i];
! 258: if (v_limb <= 1)
! 259: {
! 260: cy_limb = 0;
! 261: if (v_limb == 1)
! 262: cy_limb = mpn_add_n (prodp, prodp, up, size);
! 263: }
! 264: else
! 265: cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
! 266:
! 267: prodp[size] = cy_limb;
! 268: prodp++;
! 269: }
! 270: }
! 271:
! 272: void
! 273: #if __STDC__
! 274: impn_sqr_n (mp_ptr prodp,
! 275: mp_srcptr up, mp_size_t size, mp_ptr tspace)
! 276: #else
! 277: impn_sqr_n (prodp, up, size, tspace)
! 278: mp_ptr prodp;
! 279: mp_srcptr up;
! 280: mp_size_t size;
! 281: mp_ptr tspace;
! 282: #endif
! 283: {
! 284: if ((size & 1) != 0)
! 285: {
! 286: /* The size is odd, the code code below doesn't handle that.
! 287: Multiply the least significant (size - 1) limbs with a recursive
! 288: call, and handle the most significant limb of S1 and S2
! 289: separately. */
! 290: /* A slightly faster way to do this would be to make the Karatsuba
! 291: code below behave as if the size were even, and let it check for
! 292: odd size in the end. I.e., in essence move this code to the end.
! 293: Doing so would save us a recursive call, and potentially make the
! 294: stack grow a lot less. */
! 295:
! 296: mp_size_t esize = size - 1; /* even size */
! 297: mp_limb_t cy_limb;
! 298:
! 299: MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
! 300: cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
! 301: prodp[esize + esize] = cy_limb;
! 302: cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
! 303:
! 304: prodp[esize + size] = cy_limb;
! 305: }
! 306: else
! 307: {
! 308: mp_size_t hsize = size >> 1;
! 309: mp_limb_t cy;
! 310:
! 311: /*** Product H. ________________ ________________
! 312: |_____U1 x U1____||____U0 x U0_____| */
! 313: /* Put result in upper part of PROD and pass low part of TSPACE
! 314: as new TSPACE. */
! 315: MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
! 316:
! 317: /*** Product M. ________________
! 318: |_(U1-U0)(U0-U1)_| */
! 319: if (mpn_cmp (up + hsize, up, hsize) >= 0)
! 320: {
! 321: mpn_sub_n (prodp, up + hsize, up, hsize);
! 322: }
! 323: else
! 324: {
! 325: mpn_sub_n (prodp, up, up + hsize, hsize);
! 326: }
! 327:
! 328: /* Read temporary operands from low part of PROD.
! 329: Put result in low part of TSPACE using upper part of TSPACE
! 330: as new TSPACE. */
! 331: MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
! 332:
! 333: /*** Add/copy product H. */
! 334: MPN_COPY (prodp + hsize, prodp + size, hsize);
! 335: cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
! 336:
! 337: /*** Add product M (if NEGFLG M is a negative number). */
! 338: cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
! 339:
! 340: /*** Product L. ________________ ________________
! 341: |________________||____U0 x U0_____| */
! 342: /* Read temporary operands from low part of PROD.
! 343: Put result in low part of TSPACE using upper part of TSPACE
! 344: as new TSPACE. */
! 345: MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
! 346:
! 347: /*** Add/copy Product L (twice). */
! 348:
! 349: cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
! 350: if (cy)
! 351: mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
! 352:
! 353: MPN_COPY (prodp, tspace, hsize);
! 354: cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
! 355: if (cy)
! 356: mpn_add_1 (prodp + size, prodp + size, size, 1);
! 357: }
! 358: }
! 359:
! 360: /* This should be made into an inline function in gmp.h. */
! 361: inline void
! 362: #if __STDC__
! 363: mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
! 364: #else
! 365: mpn_mul_n (prodp, up, vp, size)
! 366: mp_ptr prodp;
! 367: mp_srcptr up;
! 368: mp_srcptr vp;
! 369: mp_size_t size;
! 370: #endif
! 371: {
! 372: TMP_DECL (marker);
! 373: TMP_MARK (marker);
! 374: if (up == vp)
! 375: {
! 376: if (size < KARATSUBA_THRESHOLD)
! 377: {
! 378: impn_sqr_n_basecase (prodp, up, size);
! 379: }
! 380: else
! 381: {
! 382: mp_ptr tspace;
! 383: tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
! 384: impn_sqr_n (prodp, up, size, tspace);
! 385: }
! 386: }
! 387: else
! 388: {
! 389: if (size < KARATSUBA_THRESHOLD)
! 390: {
! 391: impn_mul_n_basecase (prodp, up, vp, size);
! 392: }
! 393: else
! 394: {
! 395: mp_ptr tspace;
! 396: tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
! 397: impn_mul_n (prodp, up, vp, size, tspace);
! 398: }
! 399: }
! 400: TMP_FREE (marker);
! 401: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>