Annotation of OpenXM_contrib/gmp/tune/speed-ext.c, Revision 1.1
1.1 ! maekawa 1: /* An example of extending the speed program to measure routines not in GMP. */
! 2:
! 3: /*
! 4: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
! 5:
! 6: This file is part of the GNU MP Library.
! 7:
! 8: The GNU MP Library is free software; you can redistribute it and/or modify
! 9: it under the terms of the GNU Lesser General Public License as published by
! 10: the Free Software Foundation; either version 2.1 of the License, or (at your
! 11: option) any later version.
! 12:
! 13: The GNU MP Library is distributed in the hope that it will be useful, but
! 14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: License for more details.
! 17:
! 18: You should have received a copy of the GNU Lesser General Public License
! 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: MA 02111-1307, USA.
! 22: */
! 23:
! 24:
! 25: /* The extension here is three versions of an mpn arithmetic mean. These
! 26: aren't meant to be particularly useful, just examples.
! 27:
! 28: You can run something like the following to compare their speeds.
! 29:
! 30: ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
! 31:
! 32: On RISC chips, mean_open() might be fastest if the compiler is doing a
! 33: good job. On the register starved x86s, mean_calls will be fastest.
! 34:
! 35:
! 36: Notes:
! 37:
! 38: SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
! 39: by speed.c in useful places. SPEED_EXTRA_PROTOS goes after the header
! 40: files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
! 41:
! 42: The advantage of this #include "speed.c" scheme is that there's no
! 43: editing of a copy of that file, and new features in new versions of it
! 44: will be immediately available.
! 45:
! 46: In a real program the routines mean_calls() etc would probably be in
! 47: separate C or assembler source files, and just the measuring
! 48: speed_mean_calls() etc would be here. Linking against other libraries
! 49: for things to measure is perfectly possible too.
! 50:
! 51: When attempting to compare two versions of the same named routine, say
! 52: like the generic and assembler versions of mpn_add_n(), creative use of
! 53: cc -D or #define is suggested, so one or both can be renamed and linked
! 54: into the same program. It'll be much easier to compare them side by side
! 55: than with separate programs for each.
! 56:
! 57: common.c has notes on writing speed measuring routines.
! 58:
! 59: Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
! 60: not using libtool) to get common.o and other objects needed by speed.c. */
! 61:
! 62:
! 63: #define SPEED_EXTRA_PROTOS \
! 64: double speed_mean_calls (struct speed_params *s); \
! 65: double speed_mean_open (struct speed_params *s); \
! 66: double speed_mean_open2 (struct speed_params *s);
! 67:
! 68: #define SPEED_EXTRA_ROUTINES \
! 69: { "mean_calls", speed_mean_calls }, \
! 70: { "mean_open", speed_mean_open }, \
! 71: { "mean_open2", speed_mean_open2 },
! 72:
! 73: #include "speed.c"
! 74:
! 75:
! 76: /* A straightforward implementation calling mpn subroutines.
! 77:
! 78: wp,size is set to (xp,size + yp,size) / 2. The return value is the
! 79: remainder from the division. The other versions are the same. */
! 80:
! 81: mp_limb_t
! 82: mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
! 83: {
! 84: mp_limb_t c, ret;
! 85:
! 86: ASSERT (size >= 1);
! 87:
! 88: c = mpn_add_n (wp, xp, yp, size);
! 89: ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1);
! 90: wp[size-1] |= (c << (BITS_PER_MP_LIMB-1));
! 91: return ret;
! 92: }
! 93:
! 94:
! 95: /* An open-coded version, making one pass over the data. The right shift is
! 96: done as the added limbs are produced. The addition code follows
! 97: mpn/generic/add_n.c. */
! 98:
! 99: mp_limb_t
! 100: mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
! 101: {
! 102: mp_limb_t w, wprev, x, y, c, ret;
! 103: mp_size_t i;
! 104:
! 105: ASSERT (size >= 1);
! 106:
! 107: x = xp[0];
! 108: y = yp[0];
! 109:
! 110: wprev = x + y;
! 111: c = (wprev < x);
! 112: ret = (wprev & 1);
! 113:
! 114: #define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1)))
! 115:
! 116: for (i = 1; i < size; i++)
! 117: {
! 118: x = xp[i];
! 119: y = yp[i];
! 120:
! 121: w = x + c;
! 122: c = (w < x);
! 123: w += y;
! 124: c += (w < y);
! 125:
! 126: wp[i-1] = RSHIFT (w, wprev);
! 127: wprev = w;
! 128: }
! 129:
! 130: wp[i-1] = RSHIFT (c, wprev);
! 131:
! 132: return ret;
! 133: }
! 134:
! 135:
! 136: /* Another one-pass version, but right shifting the source limbs rather than
! 137: the result limbs. There's not much chance of this being better than the
! 138: above, but it's an alternative at least. */
! 139:
! 140: mp_limb_t
! 141: mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
! 142: {
! 143: mp_limb_t w, x, y, xnext, ynext, c, ret;
! 144: mp_size_t i;
! 145:
! 146: ASSERT (size >= 1);
! 147:
! 148: x = xp[0];
! 149: y = yp[0];
! 150:
! 151: /* ret is the low bit of x+y, c is the carry out of that low bit add */
! 152: ret = (x ^ y) & 1;
! 153: c = (x & y) & 1;
! 154:
! 155: for (i = 0; i < size-1; i++)
! 156: {
! 157: xnext = xp[i+1];
! 158: ynext = yp[i+1];
! 159: x = RSHIFT (xnext, x);
! 160: y = RSHIFT (ynext, y);
! 161:
! 162: w = x + c;
! 163: c = (w < x);
! 164: w += y;
! 165: c += (w < y);
! 166: wp[i] = w;
! 167:
! 168: x = xnext;
! 169: y = ynext;
! 170: }
! 171:
! 172: wp[i] = (x >> 1) + (y >> 1) + c;
! 173:
! 174: return ret;
! 175: }
! 176:
! 177:
! 178: /* The speed measuring routines are the same apart from which function they
! 179: run, so a macro is used. Actually this macro is the same as
! 180: SPEED_ROUTINE_MPN_BINARY_N. */
! 181:
! 182: #define SPEED_ROUTINE_MEAN(mean_fun) \
! 183: { \
! 184: unsigned i; \
! 185: mp_ptr wp; \
! 186: double t; \
! 187: TMP_DECL (marker); \
! 188: \
! 189: SPEED_RESTRICT_COND (s->size >= 1); \
! 190: \
! 191: TMP_MARK (marker); \
! 192: wp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_wp); \
! 193: \
! 194: speed_operand_src (s, s->xp, s->size); \
! 195: speed_operand_src (s, s->yp, s->size); \
! 196: speed_operand_dst (s, wp, s->size); \
! 197: speed_cache_fill (s); \
! 198: \
! 199: speed_starttime (); \
! 200: i = s->reps; \
! 201: do \
! 202: mean_fun (wp, s->xp, s->yp, s->size); \
! 203: while (--i != 0); \
! 204: t = speed_endtime (); \
! 205: \
! 206: TMP_FREE (marker); \
! 207: return t; \
! 208: }
! 209:
! 210: double
! 211: speed_mean_calls (struct speed_params *s)
! 212: {
! 213: SPEED_ROUTINE_MEAN (mean_calls);
! 214: }
! 215:
! 216: double
! 217: speed_mean_open (struct speed_params *s)
! 218: {
! 219: SPEED_ROUTINE_MEAN (mean_open);
! 220: }
! 221:
! 222: double
! 223: speed_mean_open2 (struct speed_params *s)
! 224: {
! 225: SPEED_ROUTINE_MEAN (mean_open2);
! 226: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>