[BACK]Return to addsub_n.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Annotation of OpenXM_contrib/gmp/mpn/generic/addsub_n.c, Revision 1.1

1.1     ! maekawa     1: /* mpn_addsub_n -- Add and Subtract two limb vectors of equal, non-zero length.
        !             2:
        !             3: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
        !             4:
        !             5: This file is part of the GNU MP Library.
        !             6:
        !             7: The GNU MP Library is free software; you can redistribute it and/or modify
        !             8: it under the terms of the GNU Lesser General Public License as published by
        !             9: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            10: option) any later version.
        !            11:
        !            12: The GNU MP Library is distributed in the hope that it will be useful, but
        !            13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            15: License for more details.
        !            16:
        !            17: You should have received a copy of the GNU Lesser General Public License
        !            18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            20: MA 02111-1307, USA. */
        !            21:
        !            22: #include "gmp.h"
        !            23: #include "gmp-impl.h"
        !            24:
        !            25: #ifndef L1_CACHE_SIZE
        !            26: #define L1_CACHE_SIZE 8192     /* only 68040 has less than this */
        !            27: #endif
        !            28:
        !            29: #define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
        !            30:
        !            31:
        !            32: /* mpn_addsub_n.
        !            33:    r1[] = s1[] + s2[]
        !            34:    r2[] = s1[] - s2[]
        !            35:    All operands have n limbs.
        !            36:    In-place operations allowed.  */
        !            37: mp_limb_t
        !            38: #if __STDC__
        !            39: mpn_addsub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
        !            40: #else
        !            41: mpn_addsub_n (r1p, r2p, s1p, s2p, n)
        !            42:      mp_ptr r1p, r2p;
        !            43:      mp_srcptr s1p, s2p;
        !            44:      mp_size_t n;
        !            45: #endif
        !            46: {
        !            47:   mp_limb_t acyn, acyo;                /* carry for add */
        !            48:   mp_limb_t scyn, scyo;                /* carry for subtract */
        !            49:   mp_size_t off;               /* offset in operands */
        !            50:   mp_size_t this_n;            /* size of current chunk */
        !            51:
        !            52:   /* We alternatingly add and subtract in chunks that fit into the (L1)
        !            53:      cache.  Since the chunks are several hundred limbs, the function call
        !            54:      overhead is insignificant, but we get much better locality.  */
        !            55:
        !            56:   /* We have three variant of the inner loop, the proper loop is chosen
        !            57:      depending on whether r1 or r2 are the same operand as s1 or s2.  */
        !            58:
        !            59:   if (r1p != s1p && r1p != s2p)
        !            60:     {
        !            61:       /* r1 is not identical to either input operand.  We can therefore write
        !            62:         to r1 directly, without using temporary storage.  */
        !            63:       acyo = 0;
        !            64:       scyo = 0;
        !            65:       for (off = 0; off < n; off += PART_SIZE)
        !            66:        {
        !            67:          this_n = MIN (n - off, PART_SIZE);
        !            68: #if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
        !            69:          acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
        !            70: #else
        !            71:          acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
        !            72:          acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
        !            73: #endif
        !            74: #if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
        !            75:          scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
        !            76: #else
        !            77:          scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
        !            78:          scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
        !            79: #endif
        !            80:        }
        !            81:     }
        !            82:   else if (r2p != s1p && r2p != s2p)
        !            83:     {
        !            84:       /* r2 is not identical to either input operand.  We can therefore write
        !            85:         to r2 directly, without using temporary storage.  */
        !            86:       acyo = 0;
        !            87:       scyo = 0;
        !            88:       for (off = 0; off < n; off += PART_SIZE)
        !            89:        {
        !            90:          this_n = MIN (n - off, PART_SIZE);
        !            91: #if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
        !            92:          scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
        !            93: #else
        !            94:          scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
        !            95:          scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
        !            96: #endif
        !            97: #if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
        !            98:          acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
        !            99: #else
        !           100:          acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
        !           101:          acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
        !           102: #endif
        !           103:        }
        !           104:     }
        !           105:   else
        !           106:     {
        !           107:       /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2=s2 or vice versa)
        !           108:         Need temporary storage.  */
        !           109:       mp_limb_t tp[PART_SIZE];
        !           110:       acyo = 0;
        !           111:       scyo = 0;
        !           112:       for (off = 0; off < n; off += PART_SIZE)
        !           113:        {
        !           114:          this_n = MIN (n - off, PART_SIZE);
        !           115: #if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
        !           116:          acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
        !           117: #else
        !           118:          acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
        !           119:          acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
        !           120: #endif
        !           121: #if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
        !           122:          scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
        !           123: #else
        !           124:          scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
        !           125:          scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
        !           126: #endif
        !           127:          MPN_COPY (r1p + off, tp, this_n);
        !           128:        }
        !           129:     }
        !           130:
        !           131:   return 2 * acyo + scyo;
        !           132: }
        !           133:
        !           134: #ifdef MAIN
        !           135: #include <stdlib.h>
        !           136: #include <stdio.h>
        !           137: #include "timing.h"
        !           138:
        !           139: long cputime ();
        !           140:
        !           141: int
        !           142: main (int argc, char **argv)
        !           143: {
        !           144:   mp_ptr r1p, r2p, s1p, s2p;
        !           145:   double t;
        !           146:   mp_size_t n;
        !           147:
        !           148:   n = strtol (argv[1], 0, 0);
        !           149:
        !           150:   r1p = malloc (n * BYTES_PER_MP_LIMB);
        !           151:   r2p = malloc (n * BYTES_PER_MP_LIMB);
        !           152:   s1p = malloc (n * BYTES_PER_MP_LIMB);
        !           153:   s2p = malloc (n * BYTES_PER_MP_LIMB);
        !           154:   TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
        !           155:   printf ("              separate add and sub: %.3f\n", t);
        !           156:   TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n));
        !           157:   printf ("combined addsub separate variables: %.3f\n", t);
        !           158:   TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
        !           159:   printf ("        combined addsub r1 overlap: %.3f\n", t);
        !           160:   TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
        !           161:   printf ("        combined addsub r2 overlap: %.3f\n", t);
        !           162:   TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n));
        !           163:   printf ("          combined addsub in-place: %.3f\n", t);
        !           164:
        !           165:   return 0;
        !           166: }
        !           167: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>