Annotation of OpenXM_contrib/gmp/mpn/generic/addsub_n.c, Revision 1.1
1.1 ! maekawa 1: /* mpn_addsub_n -- Add and Subtract two limb vectors of equal, non-zero length.
! 2:
! 3: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
! 4:
! 5: This file is part of the GNU MP Library.
! 6:
! 7: The GNU MP Library is free software; you can redistribute it and/or modify
! 8: it under the terms of the GNU Lesser General Public License as published by
! 9: the Free Software Foundation; either version 2.1 of the License, or (at your
! 10: option) any later version.
! 11:
! 12: The GNU MP Library is distributed in the hope that it will be useful, but
! 13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 15: License for more details.
! 16:
! 17: You should have received a copy of the GNU Lesser General Public License
! 18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: MA 02111-1307, USA. */
! 21:
! 22: #include "gmp.h"
! 23: #include "gmp-impl.h"
! 24:
! 25: #ifndef L1_CACHE_SIZE
! 26: #define L1_CACHE_SIZE 8192 /* only 68040 has less than this */
! 27: #endif
! 28:
! 29: #define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
! 30:
! 31:
! 32: /* mpn_addsub_n.
! 33: r1[] = s1[] + s2[]
! 34: r2[] = s1[] - s2[]
! 35: All operands have n limbs.
! 36: In-place operations allowed. */
! 37: mp_limb_t
! 38: #if __STDC__
! 39: mpn_addsub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
! 40: #else
! 41: mpn_addsub_n (r1p, r2p, s1p, s2p, n)
! 42: mp_ptr r1p, r2p;
! 43: mp_srcptr s1p, s2p;
! 44: mp_size_t n;
! 45: #endif
! 46: {
! 47: mp_limb_t acyn, acyo; /* carry for add */
! 48: mp_limb_t scyn, scyo; /* carry for subtract */
! 49: mp_size_t off; /* offset in operands */
! 50: mp_size_t this_n; /* size of current chunk */
! 51:
! 52: /* We alternatingly add and subtract in chunks that fit into the (L1)
! 53: cache. Since the chunks are several hundred limbs, the function call
! 54: overhead is insignificant, but we get much better locality. */
! 55:
! 56: /* We have three variant of the inner loop, the proper loop is chosen
! 57: depending on whether r1 or r2 are the same operand as s1 or s2. */
! 58:
! 59: if (r1p != s1p && r1p != s2p)
! 60: {
! 61: /* r1 is not identical to either input operand. We can therefore write
! 62: to r1 directly, without using temporary storage. */
! 63: acyo = 0;
! 64: scyo = 0;
! 65: for (off = 0; off < n; off += PART_SIZE)
! 66: {
! 67: this_n = MIN (n - off, PART_SIZE);
! 68: #if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
! 69: acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
! 70: #else
! 71: acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
! 72: acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
! 73: #endif
! 74: #if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
! 75: scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
! 76: #else
! 77: scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
! 78: scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
! 79: #endif
! 80: }
! 81: }
! 82: else if (r2p != s1p && r2p != s2p)
! 83: {
! 84: /* r2 is not identical to either input operand. We can therefore write
! 85: to r2 directly, without using temporary storage. */
! 86: acyo = 0;
! 87: scyo = 0;
! 88: for (off = 0; off < n; off += PART_SIZE)
! 89: {
! 90: this_n = MIN (n - off, PART_SIZE);
! 91: #if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
! 92: scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
! 93: #else
! 94: scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
! 95: scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
! 96: #endif
! 97: #if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
! 98: acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
! 99: #else
! 100: acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
! 101: acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
! 102: #endif
! 103: }
! 104: }
! 105: else
! 106: {
! 107: /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2=s2 or vice versa)
! 108: Need temporary storage. */
! 109: mp_limb_t tp[PART_SIZE];
! 110: acyo = 0;
! 111: scyo = 0;
! 112: for (off = 0; off < n; off += PART_SIZE)
! 113: {
! 114: this_n = MIN (n - off, PART_SIZE);
! 115: #if HAVE_NATIVE_mpn_add_nc || !HAVE_NATIVE_mpn_add_n
! 116: acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
! 117: #else
! 118: acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
! 119: acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
! 120: #endif
! 121: #if HAVE_NATIVE_mpn_sub_nc || !HAVE_NATIVE_mpn_sub_n
! 122: scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
! 123: #else
! 124: scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
! 125: scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
! 126: #endif
! 127: MPN_COPY (r1p + off, tp, this_n);
! 128: }
! 129: }
! 130:
! 131: return 2 * acyo + scyo;
! 132: }
! 133:
! 134: #ifdef MAIN
! 135: #include <stdlib.h>
! 136: #include <stdio.h>
! 137: #include "timing.h"
! 138:
! 139: long cputime ();
! 140:
! 141: int
! 142: main (int argc, char **argv)
! 143: {
! 144: mp_ptr r1p, r2p, s1p, s2p;
! 145: double t;
! 146: mp_size_t n;
! 147:
! 148: n = strtol (argv[1], 0, 0);
! 149:
! 150: r1p = malloc (n * BYTES_PER_MP_LIMB);
! 151: r2p = malloc (n * BYTES_PER_MP_LIMB);
! 152: s1p = malloc (n * BYTES_PER_MP_LIMB);
! 153: s2p = malloc (n * BYTES_PER_MP_LIMB);
! 154: TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
! 155: printf (" separate add and sub: %.3f\n", t);
! 156: TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n));
! 157: printf ("combined addsub separate variables: %.3f\n", t);
! 158: TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
! 159: printf (" combined addsub r1 overlap: %.3f\n", t);
! 160: TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
! 161: printf (" combined addsub r2 overlap: %.3f\n", t);
! 162: TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n));
! 163: printf (" combined addsub in-place: %.3f\n", t);
! 164:
! 165: return 0;
! 166: }
! 167: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>