[BACK]Return to mul.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Annotation of OpenXM_contrib/gmp/mpn/generic/mul.c, Revision 1.1.1.3

1.1       maekawa     1: /* mpn_mul -- Multiply two natural numbers.
                      2:
1.1.1.2   maekawa     3:    THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul)
                      4:    ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY SAFE TO REACH
                      5:    THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
                      6:    THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
                      7:
                      8:
1.1.1.3 ! ohara       9: Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002 Free Software
1.1.1.2   maekawa    10: Foundation, Inc.
1.1       maekawa    11:
                     12: This file is part of the GNU MP Library.
                     13:
                     14: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2   maekawa    15: it under the terms of the GNU Lesser General Public License as published by
                     16: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1       maekawa    17: option) any later version.
                     18:
                     19: The GNU MP Library is distributed in the hope that it will be useful, but
                     20: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2   maekawa    21: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1.1       maekawa    22: License for more details.
                     23:
1.1.1.2   maekawa    24: You should have received a copy of the GNU Lesser General Public License
1.1       maekawa    25: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     26: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     27: MA 02111-1307, USA. */
                     28:
                     29: #include "gmp.h"
                     30: #include "gmp-impl.h"
                     31:
1.1.1.2   maekawa    32: /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
                     33:    (pointed to by VP, with VN limbs), and store the result at PRODP.  The
                     34:    result is UN + VN limbs.  Return the most significant limb of the result.
1.1       maekawa    35:
1.1.1.2   maekawa    36:    NOTE: The space pointed to by PRODP is overwritten before finished with U
                     37:    and V, so overlap is an error.
1.1       maekawa    38:
                     39:    Argument constraints:
1.1.1.2   maekawa    40:    1. UN >= VN.
                     41:    2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
                     42:       the multiplier and the multiplicand.  */
                     43:
                     44: void
                     45: mpn_sqr_n (mp_ptr prodp,
1.1.1.3 ! ohara      46:           mp_srcptr up, mp_size_t un)
1.1.1.2   maekawa    47: {
1.1.1.3 ! ohara      48:   ASSERT (un >= 1);
        !            49:   ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un));
        !            50:
        !            51:   /* FIXME: Can this be removed? */
        !            52:   if (un == 0)
        !            53:     return;
        !            54:
        !            55:   if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD))
        !            56:     { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
        !            57:       mpn_mul_basecase (prodp, up, un, up, un);
        !            58:     }
        !            59:   else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))
1.1.1.2   maekawa    60:     { /* plain schoolbook multiplication */
                     61:       mpn_sqr_basecase (prodp, up, un);
                     62:     }
1.1.1.3 ! ohara      63:   else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD))
1.1.1.2   maekawa    64:     { /* karatsuba multiplication */
                     65:       mp_ptr tspace;
                     66:       TMP_DECL (marker);
                     67:       TMP_MARK (marker);
1.1.1.3 ! ohara      68:       tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un));
1.1.1.2   maekawa    69:       mpn_kara_sqr_n (prodp, up, un, tspace);
                     70:       TMP_FREE (marker);
                     71:     }
                     72: #if WANT_FFT || TUNE_PROGRAM_BUILD
1.1.1.3 ! ohara      73:   else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD))
1.1.1.2   maekawa    74: #else
                     75:   else
                     76: #endif
1.1.1.3 ! ohara      77:     { /* Toom3 multiplication.
        !            78:         Use workspace from the heap, as stack may be limited.  Since n is
        !            79:         at least MUL_TOOM3_THRESHOLD, the multiplication will take much
        !            80:         longer than malloc()/free().  */
        !            81:       mp_ptr     tspace;
        !            82:       mp_size_t  tsize;
        !            83:       tsize = MPN_TOOM3_SQR_N_TSIZE (un);
        !            84:       tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize);
1.1.1.2   maekawa    85:       mpn_toom3_sqr_n (prodp, up, un, tspace);
1.1.1.3 ! ohara      86:       __GMP_FREE_FUNC_LIMBS (tspace, tsize);
1.1.1.2   maekawa    87:     }
                     88: #if WANT_FFT || TUNE_PROGRAM_BUILD
                     89:   else
                     90:     {
                     91:       /* schoenhage multiplication */
                     92:       mpn_mul_fft_full (prodp, up, un, up, un);
                     93:     }
1.1       maekawa    94: #endif
1.1.1.2   maekawa    95: }
1.1       maekawa    96:
                     97: mp_limb_t
                     98: mpn_mul (mp_ptr prodp,
1.1.1.2   maekawa    99:         mp_srcptr up, mp_size_t un,
                    100:         mp_srcptr vp, mp_size_t vn)
1.1       maekawa   101: {
1.1.1.2   maekawa   102:   mp_size_t l;
                    103:   mp_limb_t c;
1.1       maekawa   104:
1.1.1.3 ! ohara     105:   ASSERT (un >= vn);
        !           106:   ASSERT (vn >= 1);
        !           107:   ASSERT (! MPN_OVERLAP_P (prodp, un+vn, up, un));
        !           108:   ASSERT (! MPN_OVERLAP_P (prodp, un+vn, vp, vn));
        !           109:
1.1.1.2   maekawa   110:   if (up == vp && un == vn)
1.1       maekawa   111:     {
1.1.1.2   maekawa   112:       mpn_sqr_n (prodp, up, un);
                    113:       return prodp[2 * un - 1];
                    114:     }
                    115:
1.1.1.3 ! ohara     116:   if (vn < MUL_KARATSUBA_THRESHOLD)
1.1.1.2   maekawa   117:     { /* long multiplication */
                    118:       mpn_mul_basecase (prodp, up, un, vp, vn);
                    119:       return prodp[un + vn - 1];
                    120:     }
                    121:
                    122:   mpn_mul_n (prodp, up, vp, vn);
                    123:   if (un != vn)
                    124:     { mp_limb_t t;
                    125:       mp_ptr ws;
                    126:       TMP_DECL (marker);
                    127:       TMP_MARK (marker);
                    128:
                    129:       prodp += vn;
                    130:       l = vn;
                    131:       up += vn;
                    132:       un -= vn;
1.1       maekawa   133:
1.1.1.3 ! ohara     134:       if (un < vn)
1.1       maekawa   135:        {
1.1.1.2   maekawa   136:          /* Swap u's and v's. */
1.1.1.3 ! ohara     137:          MPN_SRCPTR_SWAP (up,un, vp,vn);
1.1       maekawa   138:        }
                    139:
1.1.1.3 ! ohara     140:       ws = (mp_ptr) TMP_ALLOC (((vn >= MUL_KARATSUBA_THRESHOLD ? vn : un) + vn)
1.1.1.2   maekawa   141:                               * BYTES_PER_MP_LIMB);
1.1       maekawa   142:
1.1.1.2   maekawa   143:       t = 0;
1.1.1.3 ! ohara     144:       while (vn >= MUL_KARATSUBA_THRESHOLD)
1.1       maekawa   145:        {
1.1.1.2   maekawa   146:          mpn_mul_n (ws, up, vp, vn);
1.1.1.3 ! ohara     147:          if (l <= 2*vn)
1.1       maekawa   148:            {
1.1.1.2   maekawa   149:              t += mpn_add_n (prodp, prodp, ws, l);
                    150:              if (l != 2*vn)
                    151:                {
                    152:                  t = mpn_add_1 (prodp + l, ws + l, 2*vn - l, t);
                    153:                  l = 2*vn;
                    154:                }
1.1       maekawa   155:            }
                    156:          else
1.1.1.2   maekawa   157:            {
                    158:              c = mpn_add_n (prodp, prodp, ws, 2*vn);
                    159:              t += mpn_add_1 (prodp + 2*vn, prodp + 2*vn, l - 2*vn, c);
                    160:            }
                    161:          prodp += vn;
                    162:          l -= vn;
                    163:          up += vn;
                    164:          un -= vn;
1.1.1.3 ! ohara     165:          if (un < vn)
1.1.1.2   maekawa   166:            {
                    167:              /* Swap u's and v's. */
1.1.1.3 ! ohara     168:              MPN_SRCPTR_SWAP (up,un, vp,vn);
1.1.1.2   maekawa   169:            }
1.1       maekawa   170:        }
                    171:
1.1.1.3 ! ohara     172:       if (vn != 0)
1.1       maekawa   173:        {
1.1.1.2   maekawa   174:          mpn_mul_basecase (ws, up, un, vp, vn);
1.1.1.3 ! ohara     175:          if (l <= un + vn)
1.1.1.2   maekawa   176:            {
                    177:              t += mpn_add_n (prodp, prodp, ws, l);
                    178:              if (l != un + vn)
                    179:                t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t);
1.1.1.3 ! ohara     180:            }
1.1.1.2   maekawa   181:          else
                    182:            {
                    183:              c = mpn_add_n (prodp, prodp, ws, un + vn);
                    184:              t += mpn_add_1 (prodp + un + vn, prodp + un + vn, l - un - vn, c);
                    185:            }
1.1       maekawa   186:        }
                    187:
1.1.1.3 ! ohara     188:       TMP_FREE (marker);
        !           189:     }
1.1.1.2   maekawa   190:   return prodp[un + vn - 1];
1.1       maekawa   191: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>