Annotation of OpenXM_contrib/gmp/tune/speed-ext.c, Revision 1.1.1.2
1.1 maekawa 1: /* An example of extending the speed program to measure routines not in GMP. */
2:
3: /*
1.1.1.2 ! ohara 4: Copyright 1999, 2000 Free Software Foundation, Inc.
1.1 maekawa 5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: License for more details.
17:
18: You should have received a copy of the GNU Lesser General Public License
19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA.
22: */
23:
24:
25: /* The extension here is three versions of an mpn arithmetic mean. These
26: aren't meant to be particularly useful, just examples.
27:
28: You can run something like the following to compare their speeds.
29:
30: ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
31:
32: On RISC chips, mean_open() might be fastest if the compiler is doing a
33: good job. On the register starved x86s, mean_calls will be fastest.
34:
35:
36: Notes:
37:
38: SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
39: by speed.c in useful places. SPEED_EXTRA_PROTOS goes after the header
40: files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
41:
42: The advantage of this #include "speed.c" scheme is that there's no
43: editing of a copy of that file, and new features in new versions of it
44: will be immediately available.
45:
46: In a real program the routines mean_calls() etc would probably be in
47: separate C or assembler source files, and just the measuring
48: speed_mean_calls() etc would be here. Linking against other libraries
49: for things to measure is perfectly possible too.
50:
51: When attempting to compare two versions of the same named routine, say
52: like the generic and assembler versions of mpn_add_n(), creative use of
53: cc -D or #define is suggested, so one or both can be renamed and linked
54: into the same program. It'll be much easier to compare them side by side
55: than with separate programs for each.
56:
57: common.c has notes on writing speed measuring routines.
58:
59: Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
60: not using libtool) to get common.o and other objects needed by speed.c. */
61:
62:
63: #define SPEED_EXTRA_PROTOS \
64: double speed_mean_calls (struct speed_params *s); \
65: double speed_mean_open (struct speed_params *s); \
66: double speed_mean_open2 (struct speed_params *s);
67:
68: #define SPEED_EXTRA_ROUTINES \
69: { "mean_calls", speed_mean_calls }, \
70: { "mean_open", speed_mean_open }, \
71: { "mean_open2", speed_mean_open2 },
72:
73: #include "speed.c"
74:
75:
76: /* A straightforward implementation calling mpn subroutines.
77:
78: wp,size is set to (xp,size + yp,size) / 2. The return value is the
79: remainder from the division. The other versions are the same. */
80:
81: mp_limb_t
82: mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
83: {
84: mp_limb_t c, ret;
85:
86: ASSERT (size >= 1);
87:
88: c = mpn_add_n (wp, xp, yp, size);
89: ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1);
90: wp[size-1] |= (c << (BITS_PER_MP_LIMB-1));
91: return ret;
92: }
93:
94:
95: /* An open-coded version, making one pass over the data. The right shift is
96: done as the added limbs are produced. The addition code follows
97: mpn/generic/add_n.c. */
98:
99: mp_limb_t
100: mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
101: {
102: mp_limb_t w, wprev, x, y, c, ret;
103: mp_size_t i;
104:
105: ASSERT (size >= 1);
106:
107: x = xp[0];
108: y = yp[0];
109:
110: wprev = x + y;
111: c = (wprev < x);
112: ret = (wprev & 1);
113:
114: #define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1)))
115:
116: for (i = 1; i < size; i++)
117: {
118: x = xp[i];
119: y = yp[i];
120:
121: w = x + c;
122: c = (w < x);
123: w += y;
124: c += (w < y);
125:
126: wp[i-1] = RSHIFT (w, wprev);
127: wprev = w;
128: }
129:
130: wp[i-1] = RSHIFT (c, wprev);
131:
132: return ret;
133: }
134:
135:
136: /* Another one-pass version, but right shifting the source limbs rather than
137: the result limbs. There's not much chance of this being better than the
138: above, but it's an alternative at least. */
139:
140: mp_limb_t
141: mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
142: {
143: mp_limb_t w, x, y, xnext, ynext, c, ret;
144: mp_size_t i;
145:
146: ASSERT (size >= 1);
147:
148: x = xp[0];
149: y = yp[0];
150:
151: /* ret is the low bit of x+y, c is the carry out of that low bit add */
152: ret = (x ^ y) & 1;
153: c = (x & y) & 1;
154:
155: for (i = 0; i < size-1; i++)
156: {
157: xnext = xp[i+1];
158: ynext = yp[i+1];
159: x = RSHIFT (xnext, x);
160: y = RSHIFT (ynext, y);
161:
162: w = x + c;
163: c = (w < x);
164: w += y;
165: c += (w < y);
166: wp[i] = w;
167:
168: x = xnext;
169: y = ynext;
170: }
171:
172: wp[i] = (x >> 1) + (y >> 1) + c;
173:
174: return ret;
175: }
176:
177:
178: /* The speed measuring routines are the same apart from which function they
179: run, so a macro is used. Actually this macro is the same as
180: SPEED_ROUTINE_MPN_BINARY_N. */
181:
182: #define SPEED_ROUTINE_MEAN(mean_fun) \
183: { \
184: unsigned i; \
185: mp_ptr wp; \
186: double t; \
187: TMP_DECL (marker); \
188: \
189: SPEED_RESTRICT_COND (s->size >= 1); \
190: \
191: TMP_MARK (marker); \
192: wp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_wp); \
193: \
194: speed_operand_src (s, s->xp, s->size); \
195: speed_operand_src (s, s->yp, s->size); \
196: speed_operand_dst (s, wp, s->size); \
197: speed_cache_fill (s); \
198: \
199: speed_starttime (); \
200: i = s->reps; \
201: do \
202: mean_fun (wp, s->xp, s->yp, s->size); \
203: while (--i != 0); \
204: t = speed_endtime (); \
205: \
206: TMP_FREE (marker); \
207: return t; \
208: }
209:
210: double
211: speed_mean_calls (struct speed_params *s)
212: {
213: SPEED_ROUTINE_MEAN (mean_calls);
214: }
215:
216: double
217: speed_mean_open (struct speed_params *s)
218: {
219: SPEED_ROUTINE_MEAN (mean_open);
220: }
221:
222: double
223: speed_mean_open2 (struct speed_params *s)
224: {
225: SPEED_ROUTINE_MEAN (mean_open2);
226: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>