Annotation of OpenXM_contrib/gmp/tune/speed.c, Revision 1.1
1.1 ! maekawa 1: /* Speed measuring program. */
! 2:
! 3: /*
! 4: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
! 5:
! 6: This file is part of the GNU MP Library.
! 7:
! 8: The GNU MP Library is free software; you can redistribute it and/or modify
! 9: it under the terms of the GNU Lesser General Public License as published by
! 10: the Free Software Foundation; either version 2.1 of the License, or (at your
! 11: option) any later version.
! 12:
! 13: The GNU MP Library is distributed in the hope that it will be useful, but
! 14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: License for more details.
! 17:
! 18: You should have received a copy of the GNU Lesser General Public License
! 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: MA 02111-1307, USA.
! 22: */
! 23:
! 24: /* Usage message is in the code below, run with no arguments to print it.
! 25: See README for interesting applications.
! 26:
! 27: To add a new routine foo(), create a speed_foo() function in the style of
! 28: the existing ones and add an entry in the routine[] array. Put FLAG_R if
! 29: speed_foo() wants an "r" parameter.
! 30:
! 31: The routines don't have help messages or descriptions, but most have
! 32: suggestive names. See the source code for full details.
! 33:
! 34: */
! 35:
! 36: #include "config.h"
! 37:
! 38: #if HAVE_GETOPT_H
! 39: #include <getopt.h> /* for getopt_long() */
! 40: #endif
! 41: #include <limits.h>
! 42: #include <stdio.h>
! 43: #include <stdlib.h>
! 44: #include <string.h>
! 45: #include <unistd.h> /* for getpid() */
! 46: #include <sys/time.h> /* for struct timeval for sys/resource.h */
! 47: #include <sys/resource.h> /* for getrusage() */
! 48:
! 49: #include "gmp.h"
! 50: #include "gmp-impl.h"
! 51:
! 52: #include "speed.h"
! 53:
! 54: #if !HAVE_DECL_OPTARG
! 55: extern char *optarg;
! 56: extern int optind, opterr;
! 57: #endif
! 58:
! 59: #if !HAVE_STRTOUL
! 60: #define strtoul(p,e,b) (unsigned long) strtol(p,e,b)
! 61: #endif
! 62:
! 63: #ifdef SPEED_EXTRA_PROTOS
! 64: SPEED_EXTRA_PROTOS
! 65: #endif
! 66:
! 67:
! 68: #define MPN_FILL(ptr, size, n) \
! 69: do { \
! 70: mp_size_t i; \
! 71: for (i = 0; i < size; i++) \
! 72: ptr[i] = n; \
! 73: } while (0)
! 74:
! 75: #define CMP_ABSOLUTE 1
! 76: #define CMP_RATIO 2
! 77: #define CMP_DIFFERENCE 3
! 78: #define CMP_DIFFPREV 4
! 79: int option_cmp = CMP_ABSOLUTE;
! 80:
! 81: #define UNIT_SECONDS 1
! 82: #define UNIT_CYCLES 2
! 83: #define UNIT_CYCLESPERLIMB 3
! 84: int option_unit = UNIT_SECONDS;
! 85:
! 86: #define DATA_RANDOM 1
! 87: #define DATA_RANDOM2 2
! 88: #define DATA_ZEROS 3
! 89: #define DATA_FFS 4
! 90: #define DATA_2FD 5
! 91: int option_data = DATA_RANDOM;
! 92:
! 93: int option_square = 0;
! 94: double option_factor = 0.0;
! 95: mp_size_t option_step = 1;
! 96: int option_gnuplot = 0;
! 97: char *option_gnuplot_basename;
! 98: struct size_array_t {
! 99: mp_size_t start, end;
! 100: } *size_array = NULL;
! 101: mp_size_t size_num = 0;
! 102: mp_size_t size_allocnum = 0;
! 103: int option_resource_usage = 0;
! 104: long option_seed = 123456789;
! 105:
! 106: struct speed_params sp;
! 107:
! 108: #define COLUMN_WIDTH 13 /* for the free-form output */
! 109:
! 110: #define FLAG_R (1<<0)
! 111: #define FLAG_R_OPTIONAL (1<<1)
! 112: #define FLAG_RSIZE (1<<2)
! 113:
! 114: const struct routine_t {
! 115: /* constants */
! 116: const char *name;
! 117: speed_function_t fun;
! 118: int flag;
! 119:
! 120: } routine[] = {
! 121:
! 122: { "noop", speed_noop },
! 123: { "noop_wxs", speed_noop_wxs },
! 124: { "noop_wxys", speed_noop_wxys },
! 125:
! 126: { "mpn_add_n", speed_mpn_add_n },
! 127: { "mpn_sub_n", speed_mpn_sub_n },
! 128: { "mpn_add_n_self", speed_mpn_add_n_self },
! 129: { "mpn_add_n_inplace", speed_mpn_add_n_inplace },
! 130:
! 131: { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R },
! 132: { "mpn_submul_1", speed_mpn_submul_1, FLAG_R },
! 133: { "mpn_mul_1", speed_mpn_mul_1, FLAG_R },
! 134:
! 135: { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R },
! 136: { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R },
! 137: #if HAVE_NATIVE_mpn_divrem_1c
! 138: { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R },
! 139: { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R },
! 140: #endif
! 141: { "mpn_mod_1", speed_mpn_mod_1, FLAG_R },
! 142: #if HAVE_NATIVE_mpn_mod_1c
! 143: { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R },
! 144: #endif
! 145:
! 146: { "mpn_divrem_2", speed_mpn_divrem_2, },
! 147: { "mpn_divexact_by3", speed_mpn_divexact_by3 },
! 148:
! 149: { "mpn_bz_divrem_n", speed_mpn_bz_divrem_n },
! 150: { "mpn_bz_divrem_sb", speed_mpn_bz_divrem_sb },
! 151: { "mpn_bz_tdiv_qr", speed_mpn_bz_tdiv_qr },
! 152:
! 153: { "mpn_lshift", speed_mpn_lshift, FLAG_R },
! 154: { "mpn_rshift", speed_mpn_rshift, FLAG_R },
! 155:
! 156: { "mpn_and_n", speed_mpn_and_n },
! 157: { "mpn_andn_n", speed_mpn_andn_n },
! 158: { "mpn_nand_n", speed_mpn_nand_n },
! 159: { "mpn_ior_n", speed_mpn_ior_n },
! 160: { "mpn_iorn_n", speed_mpn_iorn_n },
! 161: { "mpn_nior_n", speed_mpn_nior_n },
! 162: { "mpn_xor_n", speed_mpn_xor_n },
! 163: { "mpn_xnor_n", speed_mpn_xnor_n },
! 164:
! 165: { "mpn_popcount", speed_mpn_popcount },
! 166: { "mpn_hamdist", speed_mpn_hamdist },
! 167:
! 168: { "mpn_gcdext", speed_mpn_gcdext },
! 169: { "mpn_gcd", speed_mpn_gcd },
! 170: { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
! 171:
! 172: { "mpn_jacobi_base", speed_mpn_jacobi_base },
! 173:
! 174: { "mpn_mul_basecase", speed_mpn_mul_basecase, FLAG_R_OPTIONAL },
! 175: { "mpn_sqr_basecase", speed_mpn_sqr_basecase },
! 176:
! 177: { "mpn_mul_n", speed_mpn_mul_n },
! 178: { "mpn_sqr_n", speed_mpn_sqr_n },
! 179:
! 180: { "mpn_kara_mul_n", speed_mpn_kara_mul_n },
! 181: { "mpn_kara_sqr_n", speed_mpn_kara_sqr_n },
! 182: { "mpn_toom3_mul_n", speed_mpn_toom3_mul_n },
! 183: { "mpn_toom3_sqr_n", speed_mpn_toom3_sqr_n },
! 184: { "mpn_mul_fft_full", speed_mpn_mul_fft_full },
! 185: { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
! 186:
! 187: { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL },
! 188: { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
! 189:
! 190: { "mpz_add", speed_mpz_add },
! 191: { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_R_OPTIONAL },
! 192: { "mpz_fac_ui", speed_mpz_fac_ui },
! 193: { "mpz_fib_ui", speed_mpz_fib_ui },
! 194: { "mpz_powm", speed_mpz_powm },
! 195:
! 196: { "MPN_COPY", speed_MPN_COPY },
! 197: { "MPN_COPY_INCR", speed_MPN_COPY_INCR },
! 198: { "MPN_COPY_DECR", speed_MPN_COPY_DECR },
! 199: { "memcpy", speed_memcpy },
! 200:
! 201: { "modlimb_invert", speed_modlimb_invert },
! 202:
! 203: { "malloc_free", speed_malloc_free },
! 204: { "malloc_realloc_free", speed_malloc_realloc_free },
! 205: { "mp_allocate_free", speed_mp_allocate_free },
! 206: { "mp_allocate_reallocate_free", speed_mp_allocate_reallocate_free },
! 207: { "mpz_init_clear", speed_mpz_init_clear },
! 208: { "mpq_init_clear", speed_mpq_init_clear },
! 209: { "mpf_init_clear", speed_mpf_init_clear },
! 210: { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear },
! 211:
! 212: { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL },
! 213: #if HAVE_NATIVE_mpn_umul_ppmm
! 214: { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
! 215: #endif
! 216:
! 217: { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL },
! 218: { "udiv_qrnnd_preinv", speed_udiv_qrnnd_preinv, FLAG_R_OPTIONAL },
! 219: { "udiv_qrnnd_preinv2norm", speed_udiv_qrnnd_preinv2norm, FLAG_R_OPTIONAL },
! 220: #if HAVE_NATIVE_mpn_udiv_qrnnd
! 221: { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL },
! 222: #endif
! 223:
! 224: #ifdef SPEED_EXTRA_ROUTINES
! 225: SPEED_EXTRA_ROUTINES
! 226: #endif
! 227: };
! 228:
! 229:
! 230: struct choice_t {
! 231: const struct routine_t *p;
! 232: int r;
! 233: double time;
! 234: int no_time;
! 235: double prev_time;
! 236: const char *name;
! 237: };
! 238: struct choice_t *choice;
! 239: int num_choices = 0;
! 240:
! 241:
! 242: void
! 243: data_fill (mp_ptr ptr, mp_size_t size)
! 244: {
! 245: switch (option_data) {
! 246: case DATA_RANDOM:
! 247: mpn_random (ptr, size);
! 248: break;
! 249: case DATA_RANDOM2:
! 250: mpn_random2 (ptr, size);
! 251: break;
! 252: case DATA_ZEROS:
! 253: MPN_ZERO (ptr, size);
! 254: break;
! 255: case DATA_FFS:
! 256: MPN_FILL (ptr, size, MP_LIMB_T_MAX);
! 257: break;
! 258: case DATA_2FD:
! 259: MPN_FILL (ptr, size, MP_LIMB_T_MAX);
! 260: ptr[0] -= 2;
! 261: break;
! 262: default:
! 263: abort();
! 264: /*NOTREACHED*/
! 265: }
! 266: }
! 267:
! 268: /* The code here handling the various combinations of output options isn't
! 269: too attractive, but it works and is fairly clean. */
! 270:
! 271: #define SIZE_TO_DIVISOR(n) \
! 272: (option_square == 1 ? (n)*(n) \
! 273: : option_square == 2 ? (n)*((n)+1)/2 \
! 274: : (n))
! 275:
! 276: void
! 277: run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
! 278: {
! 279: const char *first_open_fastest, *first_open_notfastest, *first_close;
! 280: int i, fastest;
! 281: double fastest_time;
! 282: TMP_DECL (marker);
! 283:
! 284: TMP_MARK (marker);
! 285: sp.xp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_xp);
! 286: sp.yp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_yp);
! 287:
! 288: data_fill (s->xp, s->size);
! 289: data_fill (s->yp, s->size);
! 290:
! 291: if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
! 292: {
! 293: first_open_fastest = "(#";
! 294: first_open_notfastest = " (";
! 295: first_close = ")";
! 296: }
! 297: else
! 298: {
! 299: first_open_fastest = "#";
! 300: first_open_notfastest = " ";
! 301: first_close = "";
! 302: }
! 303:
! 304: fastest = -1;
! 305: fastest_time = -1.0;
! 306: for (i = 0; i < num_choices; i++)
! 307: {
! 308: s->r = choice[i].r;
! 309: choice[i].time = speed_measure (choice[i].p->fun, s);
! 310: choice[i].no_time = (choice[i].time == -1.0);
! 311:
! 312:
! 313: /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
! 314: is before any differences. */
! 315: {
! 316: double t;
! 317: t = choice[i].time;
! 318: if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
! 319: {
! 320: if (choice[i].prev_time == -1.0)
! 321: choice[i].no_time = 1;
! 322: else
! 323: choice[i].time = choice[i].time - choice[i].prev_time;
! 324: }
! 325: choice[i].prev_time = t;
! 326: }
! 327:
! 328: if (choice[i].no_time)
! 329: continue;
! 330:
! 331: /* Look for the fastest after CMP_DIFFPREV has been applied, but
! 332: before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown
! 333: if there's more than one routine. */
! 334: if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
! 335: {
! 336: fastest = i;
! 337: fastest_time = choice[i].time;
! 338: }
! 339:
! 340: if (option_cmp == CMP_DIFFPREV)
! 341: {
! 342: /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
! 343: if (option_unit == UNIT_CYCLES)
! 344: choice[i].time /= speed_cycletime;
! 345: else if (option_unit == UNIT_CYCLESPERLIMB)
! 346: {
! 347: if (prev_size == -1)
! 348: choice[i].time /= speed_cycletime;
! 349: else
! 350: choice[i].time /= (speed_cycletime
! 351: * (SIZE_TO_DIVISOR(s->size)
! 352: - SIZE_TO_DIVISOR(prev_size)));
! 353: }
! 354: }
! 355: else
! 356: {
! 357: if (option_unit == UNIT_CYCLES)
! 358: choice[i].time /= speed_cycletime;
! 359: else if (option_unit == UNIT_CYCLESPERLIMB)
! 360: choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
! 361:
! 362: if (option_cmp == CMP_RATIO && i > 0)
! 363: {
! 364: /* A ratio isn't affected by the units chosen. */
! 365: if (choice[0].no_time || choice[0].time == 0.0)
! 366: choice[i].no_time = 1;
! 367: else
! 368: choice[i].time /= choice[0].time;
! 369: }
! 370: else if (option_cmp == CMP_DIFFERENCE && i > 0)
! 371: {
! 372: if (choice[0].no_time)
! 373: {
! 374: choice[i].no_time = 1;
! 375: continue;
! 376: }
! 377: choice[i].time -= choice[0].time;
! 378: }
! 379: }
! 380: }
! 381:
! 382: if (option_gnuplot)
! 383: {
! 384: /* In CMP_DIFFPREV, don't print anything for the first size, start
! 385: with the second where an actual difference is available.
! 386:
! 387: In CMP_RATIO, print the first column as 1.0.
! 388:
! 389: The 9 decimals printed is much more than the expected precision of
! 390: the measurements actually. */
! 391:
! 392: if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
! 393: {
! 394: fprintf (fp, "%-6ld ", s->size);
! 395: for (i = 0; i < num_choices; i++)
! 396: fprintf (fp, " %.9e",
! 397: choice[i].no_time ? 0.0
! 398: : (option_cmp == CMP_RATIO && i == 0) ? 1.0
! 399: : choice[i].time);
! 400: fprintf (fp, "\n");
! 401: }
! 402: }
! 403: else
! 404: {
! 405: fprintf (fp, "%-6ld ", s->size);
! 406: for (i = 0; i < num_choices; i++)
! 407: {
! 408: char buf[128];
! 409: int decimals;
! 410:
! 411: if (choice[i].no_time)
! 412: decimals = 0, choice[i].time = 0.0;
! 413: else if (option_unit == UNIT_CYCLESPERLIMB
! 414: || (option_cmp == CMP_RATIO && i > 0))
! 415: decimals = 4;
! 416: else if (option_unit == UNIT_CYCLES)
! 417: decimals = 2;
! 418: else
! 419: decimals = 9;
! 420:
! 421: sprintf (buf, "%s%.*f%s",
! 422: i == fastest ? first_open_fastest : first_open_notfastest,
! 423: decimals, choice[i].time, first_close);
! 424: fprintf (fp, " %*s", COLUMN_WIDTH, buf);
! 425: }
! 426: fprintf (fp, "\n");
! 427: }
! 428:
! 429: TMP_FREE (marker);
! 430: }
! 431:
! 432: void
! 433: run_all (FILE *fp)
! 434: {
! 435: mp_size_t prev_size;
! 436: int i;
! 437: TMP_DECL (marker);
! 438:
! 439: TMP_MARK (marker);
! 440: sp.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_xp);
! 441: sp.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_yp);
! 442:
! 443: data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
! 444: data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
! 445:
! 446: for (i = 0; i < size_num; i++)
! 447: {
! 448: sp.size = size_array[i].start;
! 449: prev_size = -1;
! 450: for (;;)
! 451: {
! 452: mp_size_t step;
! 453:
! 454: if (option_data == DATA_2FD && sp.size >= 2)
! 455: sp.xp[sp.size-1] = 2;
! 456:
! 457: run_one (fp, &sp, prev_size);
! 458: prev_size = sp.size;
! 459:
! 460: if (option_data == DATA_2FD && sp.size >= 2)
! 461: sp.xp[sp.size-1] = MP_LIMB_T_MAX;
! 462:
! 463: if (option_factor != 0.0)
! 464: {
! 465: step = (mp_size_t) (sp.size * option_factor - sp.size);
! 466: if (step < 1)
! 467: step = 1;
! 468: }
! 469: else
! 470: step = 1;
! 471: if (step < option_step)
! 472: step = option_step;
! 473:
! 474: sp.size += step;
! 475: if (sp.size > size_array[i].end)
! 476: break;
! 477: }
! 478: }
! 479:
! 480: TMP_FREE (marker);
! 481: }
! 482:
! 483:
! 484: FILE *
! 485: fopen_for_write (const char *filename)
! 486: {
! 487: FILE *fp;
! 488: if ((fp = fopen (filename, "w")) == NULL)
! 489: {
! 490: fprintf (stderr, "Cannot create %s\n", filename);
! 491: exit(1);
! 492: }
! 493: return fp;
! 494: }
! 495:
! 496: void
! 497: fclose_written (FILE *fp, const char *filename)
! 498: {
! 499: int err;
! 500:
! 501: err = ferror (fp);
! 502: err |= fclose (fp);
! 503:
! 504: if (err)
! 505: {
! 506: fprintf (stderr, "Error writing %s\n", filename);
! 507: exit(1);
! 508: }
! 509: }
! 510:
! 511:
! 512: void
! 513: run_gnuplot (void)
! 514: {
! 515: char *plot_filename;
! 516: char *data_filename;
! 517: FILE *fp;
! 518: int i;
! 519:
! 520: plot_filename = (char *) (*_mp_allocate_func)
! 521: (strlen (option_gnuplot_basename) + 20);
! 522: data_filename = (char *) (*_mp_allocate_func)
! 523: (strlen (option_gnuplot_basename) + 20);
! 524:
! 525: sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
! 526: sprintf (data_filename, "%s.data", option_gnuplot_basename);
! 527:
! 528: fp = fopen_for_write (plot_filename);
! 529:
! 530: /* Putting the key at the top left is usually good, and you can change it
! 531: interactively if it's not. */
! 532: fprintf (fp, "set key left\n");
! 533:
! 534: /* designed to make it possible to see crossovers easily */
! 535: fprintf (fp, "set data style linespoints\n");
! 536:
! 537: fprintf (fp, "plot ");
! 538: for (i = 0; i < num_choices; i++)
! 539: {
! 540: fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
! 541: fprintf (fp, " title \"%s\"", choice[i].name);
! 542:
! 543: if (i != num_choices-1)
! 544: fprintf (fp, ", \\");
! 545: fprintf (fp, "\n");
! 546: }
! 547:
! 548: fprintf (fp, "load \"-\"\n");
! 549: fclose_written (fp, plot_filename);
! 550:
! 551: fp = fopen_for_write (data_filename);
! 552:
! 553: /* Unbuffered so you can see where the program was up to if it crashes or
! 554: you kill it. */
! 555: setbuf (fp, NULL);
! 556:
! 557: run_all (fp);
! 558: fclose_written (fp, data_filename);
! 559: }
! 560:
! 561:
! 562: /* Return a long with n many one bits (starting from the least significant) */
! 563: #define LONG_ONES(n) \
! 564: ((n) == BITS_PER_LONGINT ? -1L : (n) == 0 ? 0L : (1L << (n)) - 1)
! 565:
! 566: long
! 567: r_string (const char *s)
! 568: {
! 569: const char *s_orig = s;
! 570: long n;
! 571:
! 572: if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
! 573: n = strtoul (s+2, (char **) &s, 16);
! 574: else
! 575: n = strtol (s, (char **) &s, 10);
! 576:
! 577: if (strcmp (s, "bits") == 0)
! 578: {
! 579: mp_limb_t l;
! 580: if (n > BITS_PER_LONGINT)
! 581: {
! 582: fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
! 583: n, BITS_PER_LONGINT);
! 584: exit (1);
! 585: }
! 586: mpn_random (&l, 1);
! 587: return (l | (1 << (n-1))) & LONG_ONES(n);
! 588: }
! 589: else if (strcmp (s, "ones") == 0)
! 590: {
! 591: if (n > BITS_PER_LONGINT)
! 592: {
! 593: fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
! 594: n, BITS_PER_LONGINT);
! 595: exit (1);
! 596: }
! 597: return LONG_ONES (n);
! 598: }
! 599: else if (*s != '\0')
! 600: {
! 601: fprintf (stderr, "invalid r parameter: %s\n", s_orig);
! 602: exit (1);
! 603: }
! 604:
! 605: return n;
! 606: }
! 607:
! 608:
! 609: void
! 610: routine_find (struct choice_t *c, const char *s)
! 611: {
! 612: int i;
! 613: size_t nlen;
! 614:
! 615: for (i = 0; i < numberof (routine); i++)
! 616: {
! 617: nlen = strlen (routine[i].name);
! 618: if (memcmp (s, routine[i].name, nlen) != 0)
! 619: continue;
! 620:
! 621: if (s[nlen] == '.')
! 622: {
! 623: /* match, with a .r parameter */
! 624:
! 625: if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
! 626: {
! 627: fprintf (stderr, "Choice %s bad: doesn't take a \".<r>\" paramater\n", s);
! 628: exit (1);
! 629: }
! 630:
! 631: c->p = &routine[i];
! 632: c->r = r_string (s + nlen + 1);
! 633: c->name = s;
! 634: return;
! 635: }
! 636:
! 637: if (s[nlen] == '\0')
! 638: {
! 639: /* match, with no parameter */
! 640:
! 641: if (routine[i].flag & FLAG_R)
! 642: {
! 643: fprintf (stderr, "Choice %s bad: needs a \".<r>\" paramater\n", s);
! 644: exit (1);
! 645: }
! 646:
! 647: c->p = &routine[i];
! 648: c->r = 0;
! 649: c->name = s;
! 650: return;
! 651: }
! 652: }
! 653:
! 654: fprintf (stderr, "Choice %s unrecognised\n", s);
! 655: exit (1);
! 656: }
! 657:
! 658:
! 659: void
! 660: usage (void)
! 661: {
! 662: int i;
! 663:
! 664: printf ("\
! 665: Usage: speed [-options] -s size <routine>...\n\
! 666: Measure the speed of some routines.\n\
! 667: Times are in seconds, accuracy is shown.\n\
! 668: \n\
! 669: -p num set precision as number of time units each routine must run\n\
! 670: -s size[-end][,size[-end]]... sizes to measure\n\
! 671: single sizes or ranges, sep with comma or use multiple -s\n\
! 672: -t step step through sizes by given amount\n\
! 673: -f factor step through sizes by given factor (eg. 1.05)\n\
! 674: -r show times as ratios of the first routine\n\
! 675: -d show times as difference from the first routine\n\
! 676: -D show times as difference from previous size shown\n\
! 677: -c show times in CPU cycles\n\
! 678: -C show times in cycles per limb\n\
! 679: -u print resource usage (memory) at end\n\
! 680: -P name output plot files \"name.gnuplot\" and \"name.data\"\n\
! 681: -a <type> use given data: random(default), random2, zeros, ffs\n\
! 682: -x, -y, -w, -W <align> specify data alignments, sources and dests\n\
! 683: -o addrs print addresses of data blocks\n\
! 684: \n\
! 685: If both -t and -f are used, it means step by the factor or the step, whichever\n\
! 686: is greater.\n\
! 687: If both -C and -D are used, it means cycles per however many limbs between a\n\
! 688: size and the previous size.\n\
! 689: \n\
! 690: After running with -P, plots can be viewed with Gnuplot or Quickplot.\n\
! 691: \"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n\
! 692: a log/log plot).\n\
! 693: \"quickplot -s name.data\" (has interactive zooming, and note -s is important\n\
! 694: when viewing more than one routine, it means same axis scales for all data).\n\
! 695: \n\
! 696: The available routines are as follows.\n\
! 697: \n\
! 698: ");
! 699:
! 700: for (i = 0; i < numberof (routine); i++)
! 701: {
! 702: if (routine[i].flag & FLAG_R)
! 703: printf ("\t%s.r\n", routine[i].name);
! 704: else if (routine[i].flag & FLAG_R_OPTIONAL)
! 705: printf ("\t%s (optional .r)\n", routine[i].name);
! 706: else
! 707: printf ("\t%s\n", routine[i].name);
! 708: }
! 709:
! 710: printf ("\n\
! 711: Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n\
! 712: r should be in decimal, or use 0xN for hexadecimal.\n\
! 713: Special forms for r are Nbits for a random N bit number, and Nones for N one\n\
! 714: bits.\n\
! 715: \n\
! 716: Times for sizes out of the range accepted by a routine are shown as 0.\n\
! 717: The fastest routine at each size is marked with a # (free form output only).\n\
! 718: \n\
! 719: %s\
! 720: \n\
! 721: Gnuplot home page http://www.cs.dartmouth.edu/gnuplot_info.html\n\
! 722: Quickplot home page http://www.kachinatech.com/~quickplot\n\
! 723: ", speed_time_string);
! 724: }
! 725:
! 726: int
! 727: main (int argc, char *argv[])
! 728: {
! 729: #define OPTION_ALIGN 1000
! 730: #if HAVE_GETOPT_LONG
! 731: static const struct option longopts[] = {
! 732: { "align", required_argument, NULL, OPTION_ALIGN },
! 733: { "align-x", required_argument, NULL, 'x' },
! 734: { "align-y", required_argument, NULL, 'y' },
! 735: { "align-w", required_argument, NULL, 'w' },
! 736: { "align-w2", required_argument, NULL, 'W' },
! 737: { "data", required_argument, NULL, 'a' },
! 738: { "cycles", no_argument, NULL, 'c' },
! 739: { "cycles-per-limb", no_argument, NULL, 'C' },
! 740: { "diff", no_argument, NULL, 'd' },
! 741: { "diff-prev", no_argument, NULL, 'D' },
! 742: { "difference", no_argument, NULL, 'd' },
! 743: { "difference-prev", no_argument, NULL, 'D' },
! 744: { "factor", required_argument, NULL, 'f' },
! 745: { "plot", no_argument, NULL, 'P' },
! 746: { "precision", required_argument, NULL, 'p' },
! 747: { "ratio", no_argument, NULL, 'r' },
! 748: { "randomize", no_argument, NULL, 'R' },
! 749: { "sizes", required_argument, NULL, 's' },
! 750: { "step", required_argument, NULL, 't' },
! 751: { "resources", required_argument, NULL, 'u' },
! 752: { "uncached", no_argument, NULL, 'z' },
! 753: { NULL }
! 754: };
! 755: #endif
! 756:
! 757: int i;
! 758: int opt;
! 759:
! 760: /* Unbuffered so output goes straight out when directed to a pipe or file
! 761: and isn't lost if you kill the program half way. */
! 762: setbuf (stdout, NULL);
! 763:
! 764: #define OPTSTRING "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"
! 765: #if HAVE_GETOPT_LONG
! 766: while ((opt = getopt_long(argc, argv, OPTSTRING, longopts, NULL))
! 767: != EOF)
! 768: #else
! 769: while ((opt = getopt(argc, argv, OPTSTRING)) != EOF)
! 770: #endif
! 771: {
! 772: switch (opt) {
! 773: case 'a':
! 774: if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM;
! 775: else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
! 776: else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
! 777: else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
! 778: else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
! 779: else
! 780: {
! 781: fprintf (stderr, "unrecognised data option: %s\n", optarg);
! 782: exit (1);
! 783: }
! 784: break;
! 785: case 'C':
! 786: if (option_unit != UNIT_SECONDS) goto bad_unit;
! 787: option_unit = UNIT_CYCLESPERLIMB;
! 788: break;
! 789: case 'c':
! 790: if (option_unit != UNIT_SECONDS)
! 791: {
! 792: bad_unit:
! 793: fprintf (stderr, "cannot use more than one of -c, -C\n");
! 794: exit (1);
! 795: }
! 796: option_unit = UNIT_CYCLES;
! 797: break;
! 798: case 'D':
! 799: if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
! 800: option_cmp = CMP_DIFFPREV;
! 801: break;
! 802: case 'd':
! 803: if (option_cmp != CMP_ABSOLUTE)
! 804: {
! 805: bad_cmp:
! 806: fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
! 807: exit (1);
! 808: }
! 809: option_cmp = CMP_DIFFERENCE;
! 810: break;
! 811: case 'E':
! 812: option_square = 1;
! 813: break;
! 814: case 'F':
! 815: option_square = 2;
! 816: break;
! 817: case 'f':
! 818: option_factor = atof (optarg);
! 819: if (option_factor <= 1.0)
! 820: {
! 821: fprintf (stderr, "-f factor must be > 1.0\n");
! 822: exit (1);
! 823: }
! 824: break;
! 825: case 'o':
! 826: speed_option_set (optarg);
! 827: break;
! 828: case 'P':
! 829: option_gnuplot = 1;
! 830: option_gnuplot_basename = optarg;
! 831: break;
! 832: case 'p':
! 833: speed_precision = atoi (optarg);
! 834: break;
! 835: case 'R':
! 836: option_seed = time (NULL);
! 837: break;
! 838: case 'r':
! 839: if (option_cmp != CMP_ABSOLUTE)
! 840: goto bad_cmp;
! 841: option_cmp = CMP_RATIO;
! 842: break;
! 843: case 's':
! 844: {
! 845: char *s;
! 846: for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
! 847: {
! 848: if (size_num == size_allocnum)
! 849: {
! 850: size_array = (struct size_array_t *)
! 851: _mp_allocate_or_reallocate
! 852: (size_array,
! 853: size_allocnum * sizeof(size_array[0]),
! 854: (size_allocnum+10) * sizeof(size_array[0]));
! 855: size_allocnum += 10;
! 856: }
! 857: if (sscanf (s, "%ld-%ld",
! 858: &size_array[size_num].start,
! 859: &size_array[size_num].end) != 2)
! 860: {
! 861: size_array[size_num].start = size_array[size_num].end
! 862: = atol (s);
! 863: }
! 864:
! 865: if (size_array[size_num].start < 1
! 866: || size_array[size_num].end < 1
! 867: || size_array[size_num].start > size_array[size_num].end)
! 868: {
! 869: fprintf (stderr, "invalid size parameter: %s\n", s);
! 870: exit (1);
! 871: }
! 872:
! 873: size_num++;
! 874: }
! 875: }
! 876: break;
! 877: case 't':
! 878: option_step = atol (optarg);
! 879: if (option_step < 1)
! 880: {
! 881: fprintf (stderr, "-t step must be >= 1\n");
! 882: exit (1);
! 883: }
! 884: break;
! 885: case 'u':
! 886: option_resource_usage = 1;
! 887: break;
! 888: case 'z':
! 889: sp.cache = 1;
! 890: break;
! 891: case OPTION_ALIGN:
! 892: abort();
! 893: case 'x':
! 894: sp.align_xp = atol (optarg);
! 895: break;
! 896: case 'y':
! 897: sp.align_yp = atol (optarg);
! 898: break;
! 899: case 'w':
! 900: sp.align_wp = atol (optarg);
! 901: break;
! 902: case 'W':
! 903: sp.align_wp2 = atol (optarg);
! 904: break;
! 905: case '?':
! 906: exit(1);
! 907: }
! 908: }
! 909:
! 910: if (optind >= argc)
! 911: {
! 912: usage ();
! 913: exit (1);
! 914: }
! 915:
! 916: if (size_num == 0)
! 917: {
! 918: fprintf (stderr, "-s <size> must be specified\n");
! 919: exit (1);
! 920: }
! 921:
! 922: srand (option_seed);
! 923: srandom (option_seed);
! 924: srand48 (option_seed);
! 925:
! 926: choice = (struct choice_t *) (*_mp_allocate_func)
! 927: ((argc - optind) * sizeof(choice[0]));
! 928: for ( ; optind < argc; optind++)
! 929: {
! 930: struct choice_t c;
! 931: routine_find (&c, argv[optind]);
! 932: choice[num_choices] = c;
! 933: num_choices++;
! 934: }
! 935:
! 936: if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
! 937: num_choices < 2)
! 938: {
! 939: fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
! 940: }
! 941:
! 942: speed_time_init ();
! 943:
! 944: if ((option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
! 945: && speed_cycletime == 1.0)
! 946: {
! 947: fprintf (stderr, "Times in cycles requested, but CPU frequency unknown.\n");
! 948: fprintf (stderr, "Use environment variable GMP_CPU_FREQUENCY in Hertz, eg. 450e6\n");
! 949: exit (1);
! 950: }
! 951:
! 952: if (option_gnuplot)
! 953: {
! 954: run_gnuplot ();
! 955: }
! 956: else
! 957: {
! 958: if (option_unit == UNIT_SECONDS)
! 959: printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
! 960: else
! 961: printf ("overhead %.2f cycles",
! 962: speed_measure (speed_noop, NULL) / speed_cycletime);
! 963: printf (", precision %d units of %.2e secs, cycle %.1e\n",
! 964: speed_precision, speed_unittime, speed_cycletime);
! 965:
! 966: printf (" ");
! 967: for (i = 0; i < num_choices; i++)
! 968: printf (" %*s", COLUMN_WIDTH, choice[i].name);
! 969: printf ("\n");
! 970:
! 971: run_all (stdout);
! 972: }
! 973:
! 974: if (option_resource_usage)
! 975: {
! 976: #if defined(linux)
! 977: /* This is Linux kernel specific. */
! 978: char buf[128];
! 979: sprintf (buf, "cat /proc/%d/status", getpid());
! 980: system (buf);
! 981:
! 982: #else
! 983: /* This doesn't give data sizes on Linux 2.0.36, only utime. */
! 984: struct rusage r;
! 985: if (getrusage (RUSAGE_SELF, &r) != 0)
! 986: perror ("getrusage");
! 987: else
! 988: printf ("utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
! 989: r.ru_utime.tv_sec, r.ru_utime.tv_usec,
! 990: r.ru_idrss, r.ru_isrss, r.ru_ixrss);
! 991: #endif
! 992: }
! 993:
! 994: return 0;
! 995: }
! 996:
! 997:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>