Annotation of OpenXM_contrib/gmp/mpn/tests/addmul_1.c, Revision 1.1.1.1
1.1 maekawa 1: #include <stdio.h>
2: #include "gmp.h"
3: #include "gmp-impl.h"
4: #include "longlong.h"
5:
6: #ifndef USG
7: #include <sys/time.h>
8: #include <sys/resource.h>
9:
10: unsigned long
11: cputime ()
12: {
13: struct rusage rus;
14:
15: getrusage (0, &rus);
16: return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
17: }
18: #else
19: #include <time.h>
20:
21: #ifndef CLOCKS_PER_SEC
22: #define CLOCKS_PER_SEC 1000000
23: #endif
24:
25: #if CLOCKS_PER_SEC >= 10000
26: #define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
27: #else
28: #define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
29: #endif
30:
31: unsigned long
32: cputime ()
33: {
34: return CLOCK_TO_MILLISEC (clock ());
35: }
36: #endif
37:
38: #define M * 1000000
39:
40: #ifndef CLOCK
41: #if defined (__m88k__)
42: #define CLOCK 20 M
43: #elif defined (__i386__)
44: #define CLOCK (16.666667 M)
45: #elif defined (__m68k__)
46: #define CLOCK (20 M)
47: #elif defined (_IBMR2)
48: #define CLOCK (25 M)
49: #elif defined (__sparc__)
50: #define CLOCK (20 M)
51: #elif defined (__sun__)
52: #define CLOCK (20 M)
53: #elif defined (__mips)
54: #define CLOCK (40 M)
55: #elif defined (__hppa__)
56: #define CLOCK (50 M)
57: #elif defined (__alpha)
58: #define CLOCK (133 M)
59: #else
60: #error "Don't know CLOCK of your machine"
61: #endif
62: #endif
63:
64: #ifndef OPS
65: #define OPS 20000000
66: #endif
67: #ifndef SIZE
68: #define SIZE 496
69: #endif
70: #ifndef TIMES
71: #define TIMES OPS/SIZE
72: #else
73: #undef OPS
74: #define OPS (SIZE*TIMES)
75: #endif
76:
77: mp_limb_t
78: refmpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
79: register mp_ptr res_ptr;
80: register mp_srcptr s1_ptr;
81: mp_size_t s1_size;
82: register mp_limb_t s2_limb;
83: {
84: register mp_limb_t cy_limb;
85: register mp_size_t j;
86: register mp_limb_t prod_high, prod_low;
87: register mp_limb_t x;
88:
89: /* The loop counter and index J goes from -SIZE to -1. This way
90: the loop becomes faster. */
91: j = -s1_size;
92:
93: /* Offset the base pointers to compensate for the negative indices. */
94: res_ptr -= j;
95: s1_ptr -= j;
96:
97: cy_limb = 0;
98: do
99: {
100: umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
101:
102: prod_low += cy_limb;
103: cy_limb = (prod_low < cy_limb) + prod_high;
104:
105: x = res_ptr[j];
106: prod_low = x + prod_low;
107: cy_limb += (prod_low < x);
108: res_ptr[j] = prod_low;
109: }
110: while (++j != 0);
111:
112: return cy_limb;
113: }
114:
115: main (argc, argv)
116: int argc;
117: char **argv;
118: {
119: mp_limb_t s1[SIZE];
120: mp_limb_t dx[SIZE+2];
121: mp_limb_t dy[SIZE+2];
122: mp_limb_t cyx, cyy;
123: int i;
124: long t0, t;
125: int test;
126: mp_limb_t xlimb;
127: mp_size_t size;
128: double cyc;
129:
130: for (test = 0; ; test++)
131: {
132: #ifdef RANDOM
133: size = (random () % SIZE + 1);
134: #else
135: size = SIZE;
136: #endif
137:
138: mpn_random2 (s1, size);
139: mpn_random2 (dy+1, size);
140:
141: if (random () % 0x100 == 0)
142: xlimb = 0;
143: else
144: mpn_random2 (&xlimb, 1);
145:
146: dy[size+1] = 0x12345678;
147: dy[0] = 0x87654321;
148:
149: #if defined (PRINT) || defined (XPRINT)
150: printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb);
151: #endif
152: #ifdef PRINT
153: mpn_print (dy+1, size);
154: mpn_print (s1, size);
155: #endif
156:
157: MPN_COPY (dx, dy, size+2);
158: t0 = cputime();
159: for (i = 0; i < TIMES; i++)
160: cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb);
161: t = cputime() - t0;
162: #if TIMES != 1
163: cyc = ((double) t * CLOCK) / (OPS * 1000.0);
164: printf ("refmpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n",
165: t,
166: cyc,
167: CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB);
168: #endif
169:
170: MPN_COPY (dx, dy, size+2);
171: t0 = cputime();
172: for (i = 0; i < TIMES; i++)
173: cyy = mpn_addmul_1 (dx+1, s1, size, xlimb);
174: t = cputime() - t0;
175: #if TIMES != 1
176: cyc = ((double) t * CLOCK) / (OPS * 1000.0);
177: printf ("mpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n",
178: t,
179: cyc,
180: CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB);
181: #endif
182:
183: MPN_COPY (dx, dy, size+2);
184: cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb);
185: cyy = mpn_addmul_1 (dy+1, s1, size, xlimb);
186:
187: #ifdef PRINT
188: printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
189: mpn_print (dx+1, size);
190: printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
191: mpn_print (dy+1, size);
192: #endif
193:
194: #ifndef NOCHECK
195: if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
196: || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
197: {
198: #ifndef PRINT
199: printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
200: mpn_print (dx+1, size);
201: printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
202: mpn_print (dy+1, size);
203: #endif
204: abort();
205: }
206: #endif
207: }
208: }
209:
210: mpn_print (mp_ptr p, mp_size_t size)
211: {
212: mp_size_t i;
213:
214: for (i = size - 1; i >= 0; i--)
215: {
216: printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
217: #ifdef SPACE
218: if (i != 0)
219: printf (" ");
220: #endif
221: }
222: puts ("");
223: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>