OpenXM_contrib/gmp/mpn/generic/mul_n.c - annotate

Return to mul_n.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / generic
Annotation of OpenXM_contrib/gmp/mpn/generic/mul_n.c, Revision 1.1.1.1

1.1       maekawa     1: /* mpn_mul_n -- Multiply two natural numbers of length n.
                      2:
                      3: Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
                      4:
                      5: This file is part of the GNU MP Library.
                      6:
                      7: The GNU MP Library is free software; you can redistribute it and/or modify
                      8: it under the terms of the GNU Library General Public License as published by
                      9: the Free Software Foundation; either version 2 of the License, or (at your
                     10: option) any later version.
                     11:
                     12: The GNU MP Library is distributed in the hope that it will be useful, but
                     13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
                     15: License for more details.
                     16:
                     17: You should have received a copy of the GNU Library General Public License
                     18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: MA 02111-1307, USA. */
                     21:
                     22: #include "gmp.h"
                     23: #include "gmp-impl.h"
                     24:
                     25: /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
                     26:    both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
                     27:    always stored.  Return the most significant limb.
                     28:
                     29:    Argument constraints:
                     30:    1. PRODP != UP and PRODP != VP, i.e. the destination
                     31:       must be distinct from the multiplier and the multiplicand.  */
                     32:
                     33: /* If KARATSUBA_THRESHOLD is not already defined, define it to a
                     34:    value which is good on most machines.  */
                     35: #ifndef KARATSUBA_THRESHOLD
                     36: #define KARATSUBA_THRESHOLD 32
                     37: #endif
                     38:
                     39: /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
                     40: #if KARATSUBA_THRESHOLD < 2
                     41: #undef KARATSUBA_THRESHOLD
                     42: #define KARATSUBA_THRESHOLD 2
                     43: #endif
                     44:
                     45: /* Handle simple cases with traditional multiplication.
                     46:
                     47:    This is the most critical code of multiplication.  All multiplies rely
                     48:    on this, both small and huge.  Small ones arrive here immediately.  Huge
                     49:    ones arrive here as this is the base case for Karatsuba's recursive
                     50:    algorithm below.  */
                     51:
                     52: void
                     53: #if __STDC__
                     54: impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
                     55: #else
                     56: impn_mul_n_basecase (prodp, up, vp, size)
                     57:      mp_ptr prodp;
                     58:      mp_srcptr up;
                     59:      mp_srcptr vp;
                     60:      mp_size_t size;
                     61: #endif
                     62: {
                     63:   mp_size_t i;
                     64:   mp_limb_t cy_limb;
                     65:   mp_limb_t v_limb;
                     66:
                     67:   /* Multiply by the first limb in V separately, as the result can be
                     68:      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
                     69:   v_limb = vp[0];
                     70:   if (v_limb <= 1)
                     71:     {
                     72:       if (v_limb == 1)
                     73:        MPN_COPY (prodp, up, size);
                     74:       else
                     75:        MPN_ZERO (prodp, size);
                     76:       cy_limb = 0;
                     77:     }
                     78:   else
                     79:     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
                     80:
                     81:   prodp[size] = cy_limb;
                     82:   prodp++;
                     83:
                     84:   /* For each iteration in the outer loop, multiply one limb from
                     85:      U with one limb from V, and add it to PROD.  */
                     86:   for (i = 1; i < size; i++)
                     87:     {
                     88:       v_limb = vp[i];
                     89:       if (v_limb <= 1)
                     90:        {
                     91:          cy_limb = 0;
                     92:          if (v_limb == 1)
                     93:            cy_limb = mpn_add_n (prodp, prodp, up, size);
                     94:        }
                     95:       else
                     96:        cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
                     97:
                     98:       prodp[size] = cy_limb;
                     99:       prodp++;
                    100:     }
                    101: }
                    102:
                    103: void
                    104: #if __STDC__
                    105: impn_mul_n (mp_ptr prodp,
                    106:             mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
                    107: #else
                    108: impn_mul_n (prodp, up, vp, size, tspace)
                    109:      mp_ptr prodp;
                    110:      mp_srcptr up;
                    111:      mp_srcptr vp;
                    112:      mp_size_t size;
                    113:      mp_ptr tspace;
                    114: #endif
                    115: {
                    116:   if ((size & 1) != 0)
                    117:     {
                    118:       /* The size is odd, the code code below doesn't handle that.
                    119:         Multiply the least significant (size - 1) limbs with a recursive
                    120:         call, and handle the most significant limb of S1 and S2
                    121:         separately.  */
                    122:       /* A slightly faster way to do this would be to make the Karatsuba
                    123:         code below behave as if the size were even, and let it check for
                    124:         odd size in the end.  I.e., in essence move this code to the end.
                    125:         Doing so would save us a recursive call, and potentially make the
                    126:         stack grow a lot less.  */
                    127:
                    128:       mp_size_t esize = size - 1;      /* even size */
                    129:       mp_limb_t cy_limb;
                    130:
                    131:       MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
                    132:       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
                    133:       prodp[esize + esize] = cy_limb;
                    134:       cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
                    135:
                    136:       prodp[esize + size] = cy_limb;
                    137:     }
                    138:   else
                    139:     {
                    140:       /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
                    141:
                    142:         Split U in two pieces, U1 and U0, such that
                    143:         U = U0 + U1*(B**n),
                    144:         and V in V1 and V0, such that
                    145:         V = V0 + V1*(B**n).
                    146:
                    147:         UV is then computed recursively using the identity
                    148:
                    149:                2n   n          n                     n
                    150:         UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
                    151:                        1 1        1  0   0  1              0 0
                    152:
                    153:         Where B = 2**BITS_PER_MP_LIMB.  */
                    154:
                    155:       mp_size_t hsize = size >> 1;
                    156:       mp_limb_t cy;
                    157:       int negflg;
                    158:
                    159:       /*** Product H.   ________________  ________________
                    160:                        |_____U1 x V1____||____U0 x V0_____|  */
                    161:       /* Put result in upper part of PROD and pass low part of TSPACE
                    162:         as new TSPACE.  */
                    163:       MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
                    164:
                    165:       /*** Product M.   ________________
                    166:                        |_(U1-U0)(V0-V1)_|  */
                    167:       if (mpn_cmp (up + hsize, up, hsize) >= 0)
                    168:        {
                    169:          mpn_sub_n (prodp, up + hsize, up, hsize);
                    170:          negflg = 0;
                    171:        }
                    172:       else
                    173:        {
                    174:          mpn_sub_n (prodp, up, up + hsize, hsize);
                    175:          negflg = 1;
                    176:        }
                    177:       if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
                    178:        {
                    179:          mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
                    180:          negflg ^= 1;
                    181:        }
                    182:       else
                    183:        {
                    184:          mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
                    185:          /* No change of NEGFLG.  */
                    186:        }
                    187:       /* Read temporary operands from low part of PROD.
                    188:         Put result in low part of TSPACE using upper part of TSPACE
                    189:         as new TSPACE.  */
                    190:       MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
                    191:
                    192:       /*** Add/copy product H.  */
                    193:       MPN_COPY (prodp + hsize, prodp + size, hsize);
                    194:       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
                    195:
                    196:       /*** Add product M (if NEGFLG M is a negative number).  */
                    197:       if (negflg)
                    198:        cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
                    199:       else
                    200:        cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
                    201:
                    202:       /*** Product L.   ________________  ________________
                    203:                        |________________||____U0 x V0_____|  */
                    204:       /* Read temporary operands from low part of PROD.
                    205:         Put result in low part of TSPACE using upper part of TSPACE
                    206:         as new TSPACE.  */
                    207:       MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
                    208:
                    209:       /*** Add/copy Product L (twice).  */
                    210:
                    211:       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
                    212:       if (cy)
                    213:        mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
                    214:
                    215:       MPN_COPY (prodp, tspace, hsize);
                    216:       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
                    217:       if (cy)
                    218:        mpn_add_1 (prodp + size, prodp + size, size, 1);
                    219:     }
                    220: }
                    221:
                    222: void
                    223: #if __STDC__
                    224: impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
                    225: #else
                    226: impn_sqr_n_basecase (prodp, up, size)
                    227:      mp_ptr prodp;
                    228:      mp_srcptr up;
                    229:      mp_size_t size;
                    230: #endif
                    231: {
                    232:   mp_size_t i;
                    233:   mp_limb_t cy_limb;
                    234:   mp_limb_t v_limb;
                    235:
                    236:   /* Multiply by the first limb in V separately, as the result can be
                    237:      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
                    238:   v_limb = up[0];
                    239:   if (v_limb <= 1)
                    240:     {
                    241:       if (v_limb == 1)
                    242:        MPN_COPY (prodp, up, size);
                    243:       else
                    244:        MPN_ZERO (prodp, size);
                    245:       cy_limb = 0;
                    246:     }
                    247:   else
                    248:     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
                    249:
                    250:   prodp[size] = cy_limb;
                    251:   prodp++;
                    252:
                    253:   /* For each iteration in the outer loop, multiply one limb from
                    254:      U with one limb from V, and add it to PROD.  */
                    255:   for (i = 1; i < size; i++)
                    256:     {
                    257:       v_limb = up[i];
                    258:       if (v_limb <= 1)
                    259:        {
                    260:          cy_limb = 0;
                    261:          if (v_limb == 1)
                    262:            cy_limb = mpn_add_n (prodp, prodp, up, size);
                    263:        }
                    264:       else
                    265:        cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
                    266:
                    267:       prodp[size] = cy_limb;
                    268:       prodp++;
                    269:     }
                    270: }
                    271:
                    272: void
                    273: #if __STDC__
                    274: impn_sqr_n (mp_ptr prodp,
                    275:             mp_srcptr up, mp_size_t size, mp_ptr tspace)
                    276: #else
                    277: impn_sqr_n (prodp, up, size, tspace)
                    278:      mp_ptr prodp;
                    279:      mp_srcptr up;
                    280:      mp_size_t size;
                    281:      mp_ptr tspace;
                    282: #endif
                    283: {
                    284:   if ((size & 1) != 0)
                    285:     {
                    286:       /* The size is odd, the code code below doesn't handle that.
                    287:         Multiply the least significant (size - 1) limbs with a recursive
                    288:         call, and handle the most significant limb of S1 and S2
                    289:         separately.  */
                    290:       /* A slightly faster way to do this would be to make the Karatsuba
                    291:         code below behave as if the size were even, and let it check for
                    292:         odd size in the end.  I.e., in essence move this code to the end.
                    293:         Doing so would save us a recursive call, and potentially make the
                    294:         stack grow a lot less.  */
                    295:
                    296:       mp_size_t esize = size - 1;      /* even size */
                    297:       mp_limb_t cy_limb;
                    298:
                    299:       MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
                    300:       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
                    301:       prodp[esize + esize] = cy_limb;
                    302:       cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
                    303:
                    304:       prodp[esize + size] = cy_limb;
                    305:     }
                    306:   else
                    307:     {
                    308:       mp_size_t hsize = size >> 1;
                    309:       mp_limb_t cy;
                    310:
                    311:       /*** Product H.   ________________  ________________
                    312:                        |_____U1 x U1____||____U0 x U0_____|  */
                    313:       /* Put result in upper part of PROD and pass low part of TSPACE
                    314:         as new TSPACE.  */
                    315:       MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
                    316:
                    317:       /*** Product M.   ________________
                    318:                        |_(U1-U0)(U0-U1)_|  */
                    319:       if (mpn_cmp (up + hsize, up, hsize) >= 0)
                    320:        {
                    321:          mpn_sub_n (prodp, up + hsize, up, hsize);
                    322:        }
                    323:       else
                    324:        {
                    325:          mpn_sub_n (prodp, up, up + hsize, hsize);
                    326:        }
                    327:
                    328:       /* Read temporary operands from low part of PROD.
                    329:         Put result in low part of TSPACE using upper part of TSPACE
                    330:         as new TSPACE.  */
                    331:       MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
                    332:
                    333:       /*** Add/copy product H.  */
                    334:       MPN_COPY (prodp + hsize, prodp + size, hsize);
                    335:       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
                    336:
                    337:       /*** Add product M (if NEGFLG M is a negative number).  */
                    338:       cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
                    339:
                    340:       /*** Product L.   ________________  ________________
                    341:                        |________________||____U0 x U0_____|  */
                    342:       /* Read temporary operands from low part of PROD.
                    343:         Put result in low part of TSPACE using upper part of TSPACE
                    344:         as new TSPACE.  */
                    345:       MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
                    346:
                    347:       /*** Add/copy Product L (twice).  */
                    348:
                    349:       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
                    350:       if (cy)
                    351:        mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
                    352:
                    353:       MPN_COPY (prodp, tspace, hsize);
                    354:       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
                    355:       if (cy)
                    356:        mpn_add_1 (prodp + size, prodp + size, size, 1);
                    357:     }
                    358: }
                    359:
                    360: /* This should be made into an inline function in gmp.h.  */
                    361: inline void
                    362: #if __STDC__
                    363: mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
                    364: #else
                    365: mpn_mul_n (prodp, up, vp, size)
                    366:      mp_ptr prodp;
                    367:      mp_srcptr up;
                    368:      mp_srcptr vp;
                    369:      mp_size_t size;
                    370: #endif
                    371: {
                    372:   TMP_DECL (marker);
                    373:   TMP_MARK (marker);
                    374:   if (up == vp)
                    375:     {
                    376:       if (size < KARATSUBA_THRESHOLD)
                    377:        {
                    378:          impn_sqr_n_basecase (prodp, up, size);
                    379:        }
                    380:       else
                    381:        {
                    382:          mp_ptr tspace;
                    383:          tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
                    384:          impn_sqr_n (prodp, up, size, tspace);
                    385:        }
                    386:     }
                    387:   else
                    388:     {
                    389:       if (size < KARATSUBA_THRESHOLD)
                    390:        {
                    391:          impn_mul_n_basecase (prodp, up, vp, size);
                    392:        }
                    393:       else
                    394:        {
                    395:          mp_ptr tspace;
                    396:          tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
                    397:          impn_mul_n (prodp, up, vp, size, tspace);
                    398:        }
                    399:     }
                    400:   TMP_FREE (marker);
                    401: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>