Annotation of OpenXM_contrib/gmp/tune/speed.c, Revision 1.1.1.1
1.1 maekawa 1: /* Speed measuring program. */
2:
3: /*
4: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: License for more details.
17:
18: You should have received a copy of the GNU Lesser General Public License
19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA.
22: */
23:
24: /* Usage message is in the code below, run with no arguments to print it.
25: See README for interesting applications.
26:
27: To add a new routine foo(), create a speed_foo() function in the style of
28: the existing ones and add an entry in the routine[] array. Put FLAG_R if
29: speed_foo() wants an "r" parameter.
30:
31: The routines don't have help messages or descriptions, but most have
32: suggestive names. See the source code for full details.
33:
34: */
35:
36: #include "config.h"
37:
38: #if HAVE_GETOPT_H
39: #include <getopt.h> /* for getopt_long() */
40: #endif
41: #include <limits.h>
42: #include <stdio.h>
43: #include <stdlib.h>
44: #include <string.h>
45: #include <unistd.h> /* for getpid() */
46: #include <sys/time.h> /* for struct timeval for sys/resource.h */
47: #include <sys/resource.h> /* for getrusage() */
48:
49: #include "gmp.h"
50: #include "gmp-impl.h"
51:
52: #include "speed.h"
53:
54: #if !HAVE_DECL_OPTARG
55: extern char *optarg;
56: extern int optind, opterr;
57: #endif
58:
59: #if !HAVE_STRTOUL
60: #define strtoul(p,e,b) (unsigned long) strtol(p,e,b)
61: #endif
62:
63: #ifdef SPEED_EXTRA_PROTOS
64: SPEED_EXTRA_PROTOS
65: #endif
66:
67:
68: #define MPN_FILL(ptr, size, n) \
69: do { \
70: mp_size_t i; \
71: for (i = 0; i < size; i++) \
72: ptr[i] = n; \
73: } while (0)
74:
75: #define CMP_ABSOLUTE 1
76: #define CMP_RATIO 2
77: #define CMP_DIFFERENCE 3
78: #define CMP_DIFFPREV 4
79: int option_cmp = CMP_ABSOLUTE;
80:
81: #define UNIT_SECONDS 1
82: #define UNIT_CYCLES 2
83: #define UNIT_CYCLESPERLIMB 3
84: int option_unit = UNIT_SECONDS;
85:
86: #define DATA_RANDOM 1
87: #define DATA_RANDOM2 2
88: #define DATA_ZEROS 3
89: #define DATA_FFS 4
90: #define DATA_2FD 5
91: int option_data = DATA_RANDOM;
92:
93: int option_square = 0;
94: double option_factor = 0.0;
95: mp_size_t option_step = 1;
96: int option_gnuplot = 0;
97: char *option_gnuplot_basename;
98: struct size_array_t {
99: mp_size_t start, end;
100: } *size_array = NULL;
101: mp_size_t size_num = 0;
102: mp_size_t size_allocnum = 0;
103: int option_resource_usage = 0;
104: long option_seed = 123456789;
105:
106: struct speed_params sp;
107:
108: #define COLUMN_WIDTH 13 /* for the free-form output */
109:
110: #define FLAG_R (1<<0)
111: #define FLAG_R_OPTIONAL (1<<1)
112: #define FLAG_RSIZE (1<<2)
113:
114: const struct routine_t {
115: /* constants */
116: const char *name;
117: speed_function_t fun;
118: int flag;
119:
120: } routine[] = {
121:
122: { "noop", speed_noop },
123: { "noop_wxs", speed_noop_wxs },
124: { "noop_wxys", speed_noop_wxys },
125:
126: { "mpn_add_n", speed_mpn_add_n },
127: { "mpn_sub_n", speed_mpn_sub_n },
128: { "mpn_add_n_self", speed_mpn_add_n_self },
129: { "mpn_add_n_inplace", speed_mpn_add_n_inplace },
130:
131: { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R },
132: { "mpn_submul_1", speed_mpn_submul_1, FLAG_R },
133: { "mpn_mul_1", speed_mpn_mul_1, FLAG_R },
134:
135: { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R },
136: { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R },
137: #if HAVE_NATIVE_mpn_divrem_1c
138: { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R },
139: { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R },
140: #endif
141: { "mpn_mod_1", speed_mpn_mod_1, FLAG_R },
142: #if HAVE_NATIVE_mpn_mod_1c
143: { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R },
144: #endif
145:
146: { "mpn_divrem_2", speed_mpn_divrem_2, },
147: { "mpn_divexact_by3", speed_mpn_divexact_by3 },
148:
149: { "mpn_bz_divrem_n", speed_mpn_bz_divrem_n },
150: { "mpn_bz_divrem_sb", speed_mpn_bz_divrem_sb },
151: { "mpn_bz_tdiv_qr", speed_mpn_bz_tdiv_qr },
152:
153: { "mpn_lshift", speed_mpn_lshift, FLAG_R },
154: { "mpn_rshift", speed_mpn_rshift, FLAG_R },
155:
156: { "mpn_and_n", speed_mpn_and_n },
157: { "mpn_andn_n", speed_mpn_andn_n },
158: { "mpn_nand_n", speed_mpn_nand_n },
159: { "mpn_ior_n", speed_mpn_ior_n },
160: { "mpn_iorn_n", speed_mpn_iorn_n },
161: { "mpn_nior_n", speed_mpn_nior_n },
162: { "mpn_xor_n", speed_mpn_xor_n },
163: { "mpn_xnor_n", speed_mpn_xnor_n },
164:
165: { "mpn_popcount", speed_mpn_popcount },
166: { "mpn_hamdist", speed_mpn_hamdist },
167:
168: { "mpn_gcdext", speed_mpn_gcdext },
169: { "mpn_gcd", speed_mpn_gcd },
170: { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
171:
172: { "mpn_jacobi_base", speed_mpn_jacobi_base },
173:
174: { "mpn_mul_basecase", speed_mpn_mul_basecase, FLAG_R_OPTIONAL },
175: { "mpn_sqr_basecase", speed_mpn_sqr_basecase },
176:
177: { "mpn_mul_n", speed_mpn_mul_n },
178: { "mpn_sqr_n", speed_mpn_sqr_n },
179:
180: { "mpn_kara_mul_n", speed_mpn_kara_mul_n },
181: { "mpn_kara_sqr_n", speed_mpn_kara_sqr_n },
182: { "mpn_toom3_mul_n", speed_mpn_toom3_mul_n },
183: { "mpn_toom3_sqr_n", speed_mpn_toom3_sqr_n },
184: { "mpn_mul_fft_full", speed_mpn_mul_fft_full },
185: { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
186:
187: { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL },
188: { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
189:
190: { "mpz_add", speed_mpz_add },
191: { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_R_OPTIONAL },
192: { "mpz_fac_ui", speed_mpz_fac_ui },
193: { "mpz_fib_ui", speed_mpz_fib_ui },
194: { "mpz_powm", speed_mpz_powm },
195:
196: { "MPN_COPY", speed_MPN_COPY },
197: { "MPN_COPY_INCR", speed_MPN_COPY_INCR },
198: { "MPN_COPY_DECR", speed_MPN_COPY_DECR },
199: { "memcpy", speed_memcpy },
200:
201: { "modlimb_invert", speed_modlimb_invert },
202:
203: { "malloc_free", speed_malloc_free },
204: { "malloc_realloc_free", speed_malloc_realloc_free },
205: { "mp_allocate_free", speed_mp_allocate_free },
206: { "mp_allocate_reallocate_free", speed_mp_allocate_reallocate_free },
207: { "mpz_init_clear", speed_mpz_init_clear },
208: { "mpq_init_clear", speed_mpq_init_clear },
209: { "mpf_init_clear", speed_mpf_init_clear },
210: { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear },
211:
212: { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL },
213: #if HAVE_NATIVE_mpn_umul_ppmm
214: { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
215: #endif
216:
217: { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL },
218: { "udiv_qrnnd_preinv", speed_udiv_qrnnd_preinv, FLAG_R_OPTIONAL },
219: { "udiv_qrnnd_preinv2norm", speed_udiv_qrnnd_preinv2norm, FLAG_R_OPTIONAL },
220: #if HAVE_NATIVE_mpn_udiv_qrnnd
221: { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL },
222: #endif
223:
224: #ifdef SPEED_EXTRA_ROUTINES
225: SPEED_EXTRA_ROUTINES
226: #endif
227: };
228:
229:
230: struct choice_t {
231: const struct routine_t *p;
232: int r;
233: double time;
234: int no_time;
235: double prev_time;
236: const char *name;
237: };
238: struct choice_t *choice;
239: int num_choices = 0;
240:
241:
242: void
243: data_fill (mp_ptr ptr, mp_size_t size)
244: {
245: switch (option_data) {
246: case DATA_RANDOM:
247: mpn_random (ptr, size);
248: break;
249: case DATA_RANDOM2:
250: mpn_random2 (ptr, size);
251: break;
252: case DATA_ZEROS:
253: MPN_ZERO (ptr, size);
254: break;
255: case DATA_FFS:
256: MPN_FILL (ptr, size, MP_LIMB_T_MAX);
257: break;
258: case DATA_2FD:
259: MPN_FILL (ptr, size, MP_LIMB_T_MAX);
260: ptr[0] -= 2;
261: break;
262: default:
263: abort();
264: /*NOTREACHED*/
265: }
266: }
267:
268: /* The code here handling the various combinations of output options isn't
269: too attractive, but it works and is fairly clean. */
270:
271: #define SIZE_TO_DIVISOR(n) \
272: (option_square == 1 ? (n)*(n) \
273: : option_square == 2 ? (n)*((n)+1)/2 \
274: : (n))
275:
276: void
277: run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
278: {
279: const char *first_open_fastest, *first_open_notfastest, *first_close;
280: int i, fastest;
281: double fastest_time;
282: TMP_DECL (marker);
283:
284: TMP_MARK (marker);
285: sp.xp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_xp);
286: sp.yp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_yp);
287:
288: data_fill (s->xp, s->size);
289: data_fill (s->yp, s->size);
290:
291: if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
292: {
293: first_open_fastest = "(#";
294: first_open_notfastest = " (";
295: first_close = ")";
296: }
297: else
298: {
299: first_open_fastest = "#";
300: first_open_notfastest = " ";
301: first_close = "";
302: }
303:
304: fastest = -1;
305: fastest_time = -1.0;
306: for (i = 0; i < num_choices; i++)
307: {
308: s->r = choice[i].r;
309: choice[i].time = speed_measure (choice[i].p->fun, s);
310: choice[i].no_time = (choice[i].time == -1.0);
311:
312:
313: /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
314: is before any differences. */
315: {
316: double t;
317: t = choice[i].time;
318: if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
319: {
320: if (choice[i].prev_time == -1.0)
321: choice[i].no_time = 1;
322: else
323: choice[i].time = choice[i].time - choice[i].prev_time;
324: }
325: choice[i].prev_time = t;
326: }
327:
328: if (choice[i].no_time)
329: continue;
330:
331: /* Look for the fastest after CMP_DIFFPREV has been applied, but
332: before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown
333: if there's more than one routine. */
334: if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
335: {
336: fastest = i;
337: fastest_time = choice[i].time;
338: }
339:
340: if (option_cmp == CMP_DIFFPREV)
341: {
342: /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
343: if (option_unit == UNIT_CYCLES)
344: choice[i].time /= speed_cycletime;
345: else if (option_unit == UNIT_CYCLESPERLIMB)
346: {
347: if (prev_size == -1)
348: choice[i].time /= speed_cycletime;
349: else
350: choice[i].time /= (speed_cycletime
351: * (SIZE_TO_DIVISOR(s->size)
352: - SIZE_TO_DIVISOR(prev_size)));
353: }
354: }
355: else
356: {
357: if (option_unit == UNIT_CYCLES)
358: choice[i].time /= speed_cycletime;
359: else if (option_unit == UNIT_CYCLESPERLIMB)
360: choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
361:
362: if (option_cmp == CMP_RATIO && i > 0)
363: {
364: /* A ratio isn't affected by the units chosen. */
365: if (choice[0].no_time || choice[0].time == 0.0)
366: choice[i].no_time = 1;
367: else
368: choice[i].time /= choice[0].time;
369: }
370: else if (option_cmp == CMP_DIFFERENCE && i > 0)
371: {
372: if (choice[0].no_time)
373: {
374: choice[i].no_time = 1;
375: continue;
376: }
377: choice[i].time -= choice[0].time;
378: }
379: }
380: }
381:
382: if (option_gnuplot)
383: {
384: /* In CMP_DIFFPREV, don't print anything for the first size, start
385: with the second where an actual difference is available.
386:
387: In CMP_RATIO, print the first column as 1.0.
388:
389: The 9 decimals printed is much more than the expected precision of
390: the measurements actually. */
391:
392: if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
393: {
394: fprintf (fp, "%-6ld ", s->size);
395: for (i = 0; i < num_choices; i++)
396: fprintf (fp, " %.9e",
397: choice[i].no_time ? 0.0
398: : (option_cmp == CMP_RATIO && i == 0) ? 1.0
399: : choice[i].time);
400: fprintf (fp, "\n");
401: }
402: }
403: else
404: {
405: fprintf (fp, "%-6ld ", s->size);
406: for (i = 0; i < num_choices; i++)
407: {
408: char buf[128];
409: int decimals;
410:
411: if (choice[i].no_time)
412: decimals = 0, choice[i].time = 0.0;
413: else if (option_unit == UNIT_CYCLESPERLIMB
414: || (option_cmp == CMP_RATIO && i > 0))
415: decimals = 4;
416: else if (option_unit == UNIT_CYCLES)
417: decimals = 2;
418: else
419: decimals = 9;
420:
421: sprintf (buf, "%s%.*f%s",
422: i == fastest ? first_open_fastest : first_open_notfastest,
423: decimals, choice[i].time, first_close);
424: fprintf (fp, " %*s", COLUMN_WIDTH, buf);
425: }
426: fprintf (fp, "\n");
427: }
428:
429: TMP_FREE (marker);
430: }
431:
432: void
433: run_all (FILE *fp)
434: {
435: mp_size_t prev_size;
436: int i;
437: TMP_DECL (marker);
438:
439: TMP_MARK (marker);
440: sp.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_xp);
441: sp.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_yp);
442:
443: data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
444: data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
445:
446: for (i = 0; i < size_num; i++)
447: {
448: sp.size = size_array[i].start;
449: prev_size = -1;
450: for (;;)
451: {
452: mp_size_t step;
453:
454: if (option_data == DATA_2FD && sp.size >= 2)
455: sp.xp[sp.size-1] = 2;
456:
457: run_one (fp, &sp, prev_size);
458: prev_size = sp.size;
459:
460: if (option_data == DATA_2FD && sp.size >= 2)
461: sp.xp[sp.size-1] = MP_LIMB_T_MAX;
462:
463: if (option_factor != 0.0)
464: {
465: step = (mp_size_t) (sp.size * option_factor - sp.size);
466: if (step < 1)
467: step = 1;
468: }
469: else
470: step = 1;
471: if (step < option_step)
472: step = option_step;
473:
474: sp.size += step;
475: if (sp.size > size_array[i].end)
476: break;
477: }
478: }
479:
480: TMP_FREE (marker);
481: }
482:
483:
484: FILE *
485: fopen_for_write (const char *filename)
486: {
487: FILE *fp;
488: if ((fp = fopen (filename, "w")) == NULL)
489: {
490: fprintf (stderr, "Cannot create %s\n", filename);
491: exit(1);
492: }
493: return fp;
494: }
495:
496: void
497: fclose_written (FILE *fp, const char *filename)
498: {
499: int err;
500:
501: err = ferror (fp);
502: err |= fclose (fp);
503:
504: if (err)
505: {
506: fprintf (stderr, "Error writing %s\n", filename);
507: exit(1);
508: }
509: }
510:
511:
512: void
513: run_gnuplot (void)
514: {
515: char *plot_filename;
516: char *data_filename;
517: FILE *fp;
518: int i;
519:
520: plot_filename = (char *) (*_mp_allocate_func)
521: (strlen (option_gnuplot_basename) + 20);
522: data_filename = (char *) (*_mp_allocate_func)
523: (strlen (option_gnuplot_basename) + 20);
524:
525: sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
526: sprintf (data_filename, "%s.data", option_gnuplot_basename);
527:
528: fp = fopen_for_write (plot_filename);
529:
530: /* Putting the key at the top left is usually good, and you can change it
531: interactively if it's not. */
532: fprintf (fp, "set key left\n");
533:
534: /* designed to make it possible to see crossovers easily */
535: fprintf (fp, "set data style linespoints\n");
536:
537: fprintf (fp, "plot ");
538: for (i = 0; i < num_choices; i++)
539: {
540: fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
541: fprintf (fp, " title \"%s\"", choice[i].name);
542:
543: if (i != num_choices-1)
544: fprintf (fp, ", \\");
545: fprintf (fp, "\n");
546: }
547:
548: fprintf (fp, "load \"-\"\n");
549: fclose_written (fp, plot_filename);
550:
551: fp = fopen_for_write (data_filename);
552:
553: /* Unbuffered so you can see where the program was up to if it crashes or
554: you kill it. */
555: setbuf (fp, NULL);
556:
557: run_all (fp);
558: fclose_written (fp, data_filename);
559: }
560:
561:
562: /* Return a long with n many one bits (starting from the least significant) */
563: #define LONG_ONES(n) \
564: ((n) == BITS_PER_LONGINT ? -1L : (n) == 0 ? 0L : (1L << (n)) - 1)
565:
566: long
567: r_string (const char *s)
568: {
569: const char *s_orig = s;
570: long n;
571:
572: if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
573: n = strtoul (s+2, (char **) &s, 16);
574: else
575: n = strtol (s, (char **) &s, 10);
576:
577: if (strcmp (s, "bits") == 0)
578: {
579: mp_limb_t l;
580: if (n > BITS_PER_LONGINT)
581: {
582: fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
583: n, BITS_PER_LONGINT);
584: exit (1);
585: }
586: mpn_random (&l, 1);
587: return (l | (1 << (n-1))) & LONG_ONES(n);
588: }
589: else if (strcmp (s, "ones") == 0)
590: {
591: if (n > BITS_PER_LONGINT)
592: {
593: fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
594: n, BITS_PER_LONGINT);
595: exit (1);
596: }
597: return LONG_ONES (n);
598: }
599: else if (*s != '\0')
600: {
601: fprintf (stderr, "invalid r parameter: %s\n", s_orig);
602: exit (1);
603: }
604:
605: return n;
606: }
607:
608:
609: void
610: routine_find (struct choice_t *c, const char *s)
611: {
612: int i;
613: size_t nlen;
614:
615: for (i = 0; i < numberof (routine); i++)
616: {
617: nlen = strlen (routine[i].name);
618: if (memcmp (s, routine[i].name, nlen) != 0)
619: continue;
620:
621: if (s[nlen] == '.')
622: {
623: /* match, with a .r parameter */
624:
625: if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
626: {
627: fprintf (stderr, "Choice %s bad: doesn't take a \".<r>\" paramater\n", s);
628: exit (1);
629: }
630:
631: c->p = &routine[i];
632: c->r = r_string (s + nlen + 1);
633: c->name = s;
634: return;
635: }
636:
637: if (s[nlen] == '\0')
638: {
639: /* match, with no parameter */
640:
641: if (routine[i].flag & FLAG_R)
642: {
643: fprintf (stderr, "Choice %s bad: needs a \".<r>\" paramater\n", s);
644: exit (1);
645: }
646:
647: c->p = &routine[i];
648: c->r = 0;
649: c->name = s;
650: return;
651: }
652: }
653:
654: fprintf (stderr, "Choice %s unrecognised\n", s);
655: exit (1);
656: }
657:
658:
659: void
660: usage (void)
661: {
662: int i;
663:
664: printf ("\
665: Usage: speed [-options] -s size <routine>...\n\
666: Measure the speed of some routines.\n\
667: Times are in seconds, accuracy is shown.\n\
668: \n\
669: -p num set precision as number of time units each routine must run\n\
670: -s size[-end][,size[-end]]... sizes to measure\n\
671: single sizes or ranges, sep with comma or use multiple -s\n\
672: -t step step through sizes by given amount\n\
673: -f factor step through sizes by given factor (eg. 1.05)\n\
674: -r show times as ratios of the first routine\n\
675: -d show times as difference from the first routine\n\
676: -D show times as difference from previous size shown\n\
677: -c show times in CPU cycles\n\
678: -C show times in cycles per limb\n\
679: -u print resource usage (memory) at end\n\
680: -P name output plot files \"name.gnuplot\" and \"name.data\"\n\
681: -a <type> use given data: random(default), random2, zeros, ffs\n\
682: -x, -y, -w, -W <align> specify data alignments, sources and dests\n\
683: -o addrs print addresses of data blocks\n\
684: \n\
685: If both -t and -f are used, it means step by the factor or the step, whichever\n\
686: is greater.\n\
687: If both -C and -D are used, it means cycles per however many limbs between a\n\
688: size and the previous size.\n\
689: \n\
690: After running with -P, plots can be viewed with Gnuplot or Quickplot.\n\
691: \"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n\
692: a log/log plot).\n\
693: \"quickplot -s name.data\" (has interactive zooming, and note -s is important\n\
694: when viewing more than one routine, it means same axis scales for all data).\n\
695: \n\
696: The available routines are as follows.\n\
697: \n\
698: ");
699:
700: for (i = 0; i < numberof (routine); i++)
701: {
702: if (routine[i].flag & FLAG_R)
703: printf ("\t%s.r\n", routine[i].name);
704: else if (routine[i].flag & FLAG_R_OPTIONAL)
705: printf ("\t%s (optional .r)\n", routine[i].name);
706: else
707: printf ("\t%s\n", routine[i].name);
708: }
709:
710: printf ("\n\
711: Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n\
712: r should be in decimal, or use 0xN for hexadecimal.\n\
713: Special forms for r are Nbits for a random N bit number, and Nones for N one\n\
714: bits.\n\
715: \n\
716: Times for sizes out of the range accepted by a routine are shown as 0.\n\
717: The fastest routine at each size is marked with a # (free form output only).\n\
718: \n\
719: %s\
720: \n\
721: Gnuplot home page http://www.cs.dartmouth.edu/gnuplot_info.html\n\
722: Quickplot home page http://www.kachinatech.com/~quickplot\n\
723: ", speed_time_string);
724: }
725:
726: int
727: main (int argc, char *argv[])
728: {
729: #define OPTION_ALIGN 1000
730: #if HAVE_GETOPT_LONG
731: static const struct option longopts[] = {
732: { "align", required_argument, NULL, OPTION_ALIGN },
733: { "align-x", required_argument, NULL, 'x' },
734: { "align-y", required_argument, NULL, 'y' },
735: { "align-w", required_argument, NULL, 'w' },
736: { "align-w2", required_argument, NULL, 'W' },
737: { "data", required_argument, NULL, 'a' },
738: { "cycles", no_argument, NULL, 'c' },
739: { "cycles-per-limb", no_argument, NULL, 'C' },
740: { "diff", no_argument, NULL, 'd' },
741: { "diff-prev", no_argument, NULL, 'D' },
742: { "difference", no_argument, NULL, 'd' },
743: { "difference-prev", no_argument, NULL, 'D' },
744: { "factor", required_argument, NULL, 'f' },
745: { "plot", no_argument, NULL, 'P' },
746: { "precision", required_argument, NULL, 'p' },
747: { "ratio", no_argument, NULL, 'r' },
748: { "randomize", no_argument, NULL, 'R' },
749: { "sizes", required_argument, NULL, 's' },
750: { "step", required_argument, NULL, 't' },
751: { "resources", required_argument, NULL, 'u' },
752: { "uncached", no_argument, NULL, 'z' },
753: { NULL }
754: };
755: #endif
756:
757: int i;
758: int opt;
759:
760: /* Unbuffered so output goes straight out when directed to a pipe or file
761: and isn't lost if you kill the program half way. */
762: setbuf (stdout, NULL);
763:
764: #define OPTSTRING "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"
765: #if HAVE_GETOPT_LONG
766: while ((opt = getopt_long(argc, argv, OPTSTRING, longopts, NULL))
767: != EOF)
768: #else
769: while ((opt = getopt(argc, argv, OPTSTRING)) != EOF)
770: #endif
771: {
772: switch (opt) {
773: case 'a':
774: if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM;
775: else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
776: else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
777: else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
778: else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
779: else
780: {
781: fprintf (stderr, "unrecognised data option: %s\n", optarg);
782: exit (1);
783: }
784: break;
785: case 'C':
786: if (option_unit != UNIT_SECONDS) goto bad_unit;
787: option_unit = UNIT_CYCLESPERLIMB;
788: break;
789: case 'c':
790: if (option_unit != UNIT_SECONDS)
791: {
792: bad_unit:
793: fprintf (stderr, "cannot use more than one of -c, -C\n");
794: exit (1);
795: }
796: option_unit = UNIT_CYCLES;
797: break;
798: case 'D':
799: if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
800: option_cmp = CMP_DIFFPREV;
801: break;
802: case 'd':
803: if (option_cmp != CMP_ABSOLUTE)
804: {
805: bad_cmp:
806: fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
807: exit (1);
808: }
809: option_cmp = CMP_DIFFERENCE;
810: break;
811: case 'E':
812: option_square = 1;
813: break;
814: case 'F':
815: option_square = 2;
816: break;
817: case 'f':
818: option_factor = atof (optarg);
819: if (option_factor <= 1.0)
820: {
821: fprintf (stderr, "-f factor must be > 1.0\n");
822: exit (1);
823: }
824: break;
825: case 'o':
826: speed_option_set (optarg);
827: break;
828: case 'P':
829: option_gnuplot = 1;
830: option_gnuplot_basename = optarg;
831: break;
832: case 'p':
833: speed_precision = atoi (optarg);
834: break;
835: case 'R':
836: option_seed = time (NULL);
837: break;
838: case 'r':
839: if (option_cmp != CMP_ABSOLUTE)
840: goto bad_cmp;
841: option_cmp = CMP_RATIO;
842: break;
843: case 's':
844: {
845: char *s;
846: for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
847: {
848: if (size_num == size_allocnum)
849: {
850: size_array = (struct size_array_t *)
851: _mp_allocate_or_reallocate
852: (size_array,
853: size_allocnum * sizeof(size_array[0]),
854: (size_allocnum+10) * sizeof(size_array[0]));
855: size_allocnum += 10;
856: }
857: if (sscanf (s, "%ld-%ld",
858: &size_array[size_num].start,
859: &size_array[size_num].end) != 2)
860: {
861: size_array[size_num].start = size_array[size_num].end
862: = atol (s);
863: }
864:
865: if (size_array[size_num].start < 1
866: || size_array[size_num].end < 1
867: || size_array[size_num].start > size_array[size_num].end)
868: {
869: fprintf (stderr, "invalid size parameter: %s\n", s);
870: exit (1);
871: }
872:
873: size_num++;
874: }
875: }
876: break;
877: case 't':
878: option_step = atol (optarg);
879: if (option_step < 1)
880: {
881: fprintf (stderr, "-t step must be >= 1\n");
882: exit (1);
883: }
884: break;
885: case 'u':
886: option_resource_usage = 1;
887: break;
888: case 'z':
889: sp.cache = 1;
890: break;
891: case OPTION_ALIGN:
892: abort();
893: case 'x':
894: sp.align_xp = atol (optarg);
895: break;
896: case 'y':
897: sp.align_yp = atol (optarg);
898: break;
899: case 'w':
900: sp.align_wp = atol (optarg);
901: break;
902: case 'W':
903: sp.align_wp2 = atol (optarg);
904: break;
905: case '?':
906: exit(1);
907: }
908: }
909:
910: if (optind >= argc)
911: {
912: usage ();
913: exit (1);
914: }
915:
916: if (size_num == 0)
917: {
918: fprintf (stderr, "-s <size> must be specified\n");
919: exit (1);
920: }
921:
922: srand (option_seed);
923: srandom (option_seed);
924: srand48 (option_seed);
925:
926: choice = (struct choice_t *) (*_mp_allocate_func)
927: ((argc - optind) * sizeof(choice[0]));
928: for ( ; optind < argc; optind++)
929: {
930: struct choice_t c;
931: routine_find (&c, argv[optind]);
932: choice[num_choices] = c;
933: num_choices++;
934: }
935:
936: if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
937: num_choices < 2)
938: {
939: fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
940: }
941:
942: speed_time_init ();
943:
944: if ((option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
945: && speed_cycletime == 1.0)
946: {
947: fprintf (stderr, "Times in cycles requested, but CPU frequency unknown.\n");
948: fprintf (stderr, "Use environment variable GMP_CPU_FREQUENCY in Hertz, eg. 450e6\n");
949: exit (1);
950: }
951:
952: if (option_gnuplot)
953: {
954: run_gnuplot ();
955: }
956: else
957: {
958: if (option_unit == UNIT_SECONDS)
959: printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
960: else
961: printf ("overhead %.2f cycles",
962: speed_measure (speed_noop, NULL) / speed_cycletime);
963: printf (", precision %d units of %.2e secs, cycle %.1e\n",
964: speed_precision, speed_unittime, speed_cycletime);
965:
966: printf (" ");
967: for (i = 0; i < num_choices; i++)
968: printf (" %*s", COLUMN_WIDTH, choice[i].name);
969: printf ("\n");
970:
971: run_all (stdout);
972: }
973:
974: if (option_resource_usage)
975: {
976: #if defined(linux)
977: /* This is Linux kernel specific. */
978: char buf[128];
979: sprintf (buf, "cat /proc/%d/status", getpid());
980: system (buf);
981:
982: #else
983: /* This doesn't give data sizes on Linux 2.0.36, only utime. */
984: struct rusage r;
985: if (getrusage (RUSAGE_SELF, &r) != 0)
986: perror ("getrusage");
987: else
988: printf ("utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
989: r.ru_utime.tv_sec, r.ru_utime.tv_usec,
990: r.ru_idrss, r.ru_isrss, r.ru_ixrss);
991: #endif
992: }
993:
994: return 0;
995: }
996:
997:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>