[BACK]Return to speed.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tune

Annotation of OpenXM_contrib/gmp/tune/speed.c, Revision 1.1.1.2

1.1.1.2 ! ohara       1: /* Speed measuring program.
1.1       maekawa     2:
1.1.1.2 ! ohara       3: Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1       maekawa     4:
                      5: This file is part of the GNU MP Library.
                      6:
                      7: The GNU MP Library is free software; you can redistribute it and/or modify
                      8: it under the terms of the GNU Lesser General Public License as published by
                      9: the Free Software Foundation; either version 2.1 of the License, or (at your
                     10: option) any later version.
                     11:
                     12: The GNU MP Library is distributed in the hope that it will be useful, but
                     13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: License for more details.
                     16:
                     17: You should have received a copy of the GNU Lesser General Public License
                     18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
1.1.1.2 ! ohara      20: MA 02111-1307, USA. */
1.1       maekawa    21:
                     22: /* Usage message is in the code below, run with no arguments to print it.
                     23:    See README for interesting applications.
                     24:
                     25:    To add a new routine foo(), create a speed_foo() function in the style of
                     26:    the existing ones and add an entry in the routine[] array.  Put FLAG_R if
                     27:    speed_foo() wants an "r" parameter.
                     28:
                     29:    The routines don't have help messages or descriptions, but most have
                     30:    suggestive names.  See the source code for full details.
                     31:
                     32: */
                     33:
                     34: #include "config.h"
                     35:
                     36: #include <limits.h>
                     37: #include <stdio.h>
                     38: #include <stdlib.h>
                     39: #include <string.h>
1.1.1.2 ! ohara      40:
        !            41: #if HAVE_UNISTD_H
        !            42: #include <unistd.h>  /* for getpid, R_OK */
        !            43: #endif
        !            44:
        !            45: #if TIME_WITH_SYS_TIME
        !            46: # include <sys/time.h>  /* for struct timeval */
        !            47: # include <time.h>
        !            48: #else
        !            49: # if HAVE_SYS_TIME_H
        !            50: #  include <sys/time.h>
        !            51: # else
        !            52: #  include <time.h>
        !            53: # endif
        !            54: #endif
        !            55:
        !            56: #if HAVE_SYS_RESOURCE_H
1.1       maekawa    57: #include <sys/resource.h>  /* for getrusage() */
1.1.1.2 ! ohara      58: #endif
        !            59:
1.1       maekawa    60:
                     61: #include "gmp.h"
                     62: #include "gmp-impl.h"
1.1.1.2 ! ohara      63: #include "longlong.h"  /* for the benefit of speed-many.c */
        !            64: #include "tests.h"
1.1       maekawa    65: #include "speed.h"
                     66:
1.1.1.2 ! ohara      67:
1.1       maekawa    68: #if !HAVE_DECL_OPTARG
                     69: extern char *optarg;
                     70: extern int optind, opterr;
                     71: #endif
                     72:
                     73: #if !HAVE_STRTOUL
                     74: #define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
                     75: #endif
                     76:
                     77: #ifdef SPEED_EXTRA_PROTOS
                     78: SPEED_EXTRA_PROTOS
                     79: #endif
1.1.1.2 ! ohara      80: #ifdef SPEED_EXTRA_PROTOS2
        !            81: SPEED_EXTRA_PROTOS2
        !            82: #endif
1.1       maekawa    83:
                     84:
1.1.1.2 ! ohara      85: #define MPN_FILL(ptr, size, n)          \
        !            86:   do {                                  \
        !            87:     mp_size_t __i;                      \
        !            88:     ASSERT ((size) >= 0);               \
        !            89:     for (__i = 0; __i < (size); __i++)  \
        !            90:       (ptr)[__i] = (n);                 \
1.1       maekawa    91:   } while (0)
                     92:
1.1.1.2 ! ohara      93:
        !            94: #if BITS_PER_MP_LIMB == 32
        !            95: #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
        !            96: #endif
        !            97: #if BITS_PER_MP_LIMB == 64
        !            98: #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
        !            99: #endif
        !           100:
        !           101:
1.1       maekawa   102: #define CMP_ABSOLUTE     1
                    103: #define CMP_RATIO        2
                    104: #define CMP_DIFFERENCE   3
                    105: #define CMP_DIFFPREV     4
                    106: int  option_cmp = CMP_ABSOLUTE;
                    107:
                    108: #define UNIT_SECONDS        1
                    109: #define UNIT_CYCLES         2
                    110: #define UNIT_CYCLESPERLIMB  3
                    111: int  option_unit = UNIT_SECONDS;
                    112:
                    113: #define DATA_RANDOM   1
                    114: #define DATA_RANDOM2  2
                    115: #define DATA_ZEROS    3
1.1.1.2 ! ohara     116: #define DATA_AAS      4
        !           117: #define DATA_FFS      5
        !           118: #define DATA_2FD      6
1.1       maekawa   119: int  option_data = DATA_RANDOM;
                    120:
                    121: int        option_square = 0;
                    122: double     option_factor = 0.0;
                    123: mp_size_t  option_step = 1;
                    124: int        option_gnuplot = 0;
                    125: char      *option_gnuplot_basename;
                    126: struct size_array_t {
                    127:   mp_size_t start, end;
                    128: } *size_array = NULL;
                    129: mp_size_t  size_num = 0;
                    130: mp_size_t  size_allocnum = 0;
                    131: int        option_resource_usage = 0;
                    132: long       option_seed = 123456789;
                    133:
                    134: struct speed_params  sp;
                    135:
                    136: #define COLUMN_WIDTH  13  /* for the free-form output */
                    137:
1.1.1.2 ! ohara     138: #define FLAG_R            (1<<0)  /* require ".r" */
        !           139: #define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
1.1       maekawa   140: #define FLAG_RSIZE        (1<<2)
1.1.1.2 ! ohara     141: #define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
1.1       maekawa   142:
                    143: const struct routine_t {
                    144:   /* constants */
                    145:   const char        *name;
                    146:   speed_function_t  fun;
                    147:   int               flag;
                    148:
                    149: } routine[] = {
                    150:
                    151:   { "noop",              speed_noop                 },
                    152:   { "noop_wxs",          speed_noop_wxs             },
                    153:   { "noop_wxys",         speed_noop_wxys            },
                    154:
1.1.1.2 ! ohara     155:   { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
        !           156:   { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
1.1       maekawa   157:
                    158:   { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
                    159:   { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
                    160:   { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
1.1.1.2 ! ohara     161:   { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
        !           162: #if HAVE_NATIVE_mpn_mul_2
        !           163:   { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R },
        !           164: #endif
1.1       maekawa   165:
                    166:   { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
                    167:   { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
                    168: #if HAVE_NATIVE_mpn_divrem_1c
                    169:   { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
                    170:   { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
                    171: #endif
                    172:   { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
                    173: #if HAVE_NATIVE_mpn_mod_1c
                    174:   { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
                    175: #endif
1.1.1.2 ! ohara     176:   { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
        !           177:   { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
        !           178:   { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
        !           179:
        !           180:   { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
        !           181:   { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
        !           182:   { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
        !           183:   { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
        !           184:   { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
        !           185:   { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
1.1       maekawa   186:
                    187:   { "mpn_divrem_2",      speed_mpn_divrem_2,        },
1.1.1.2 ! ohara     188:   { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
        !           189:   { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
        !           190:
        !           191:   { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
        !           192:   { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
        !           193:
        !           194: #if HAVE_NATIVE_mpn_modexact_1c_odd
        !           195:   { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
        !           196: #endif
        !           197:   { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
1.1       maekawa   198:
1.1.1.2 ! ohara     199:   { "mpn_dc_tdiv_qr",       speed_mpn_dc_tdiv_qr       },
        !           200:   { "mpn_dc_divrem_n",      speed_mpn_dc_divrem_n      },
        !           201:   { "mpn_dc_divrem_sb",     speed_mpn_dc_divrem_sb     },
        !           202:   { "mpn_dc_divrem_sb_div", speed_mpn_dc_divrem_sb_div },
        !           203:   { "mpn_dc_divrem_sb_inv", speed_mpn_dc_divrem_sb_inv },
        !           204:
        !           205:   { "mpn_sb_divrem_m3",     speed_mpn_sb_divrem_m3     },
        !           206:   { "mpn_sb_divrem_m3_div", speed_mpn_sb_divrem_m3_div },
        !           207:   { "mpn_sb_divrem_m3_inv", speed_mpn_sb_divrem_m3_inv },
1.1       maekawa   208:
                    209:   { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
                    210:   { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
                    211:
1.1.1.2 ! ohara     212:   { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
        !           213:   { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
        !           214:   { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
        !           215:   { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
        !           216:   { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
        !           217:   { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
        !           218:   { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
        !           219:   { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
        !           220:   { "mpn_com_n",         speed_mpn_com_n            },
1.1       maekawa   221:
                    222:   { "mpn_popcount",      speed_mpn_popcount         },
                    223:   { "mpn_hamdist",       speed_mpn_hamdist          },
                    224:
1.1.1.2 ! ohara     225:   { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
        !           226:   { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
1.1       maekawa   227:
1.1.1.2 ! ohara     228:   { "mpn_gcd",           speed_mpn_gcd                    },
        !           229:   { "mpn_gcd_binary",    speed_mpn_gcd_binary             },
        !           230:   { "find_a",            speed_find_a,        FLAG_NODATA },
        !           231: #if HAVE_NATIVE_mpn_gcd_finda
        !           232:   { "mpn_gcd_finda",     speed_mpn_gcd_finda, FLAG_NODATA },
        !           233: #endif
        !           234:
        !           235:   { "mpn_gcdext",            speed_mpn_gcdext            },
        !           236:   { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
        !           237:   { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
        !           238:   { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
        !           239:   { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
        !           240:
        !           241:   { "mpz_jacobi",        speed_mpz_jacobi           },
1.1       maekawa   242:   { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
1.1.1.2 ! ohara     243:   { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
        !           244:   { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
        !           245:   { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
1.1       maekawa   246:
                    247:   { "mpn_mul_basecase",  speed_mpn_mul_basecase, FLAG_R_OPTIONAL },
                    248:   { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
1.1.1.2 ! ohara     249: #if HAVE_NATIVE_mpn_sqr_diagonal
        !           250:   { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
        !           251: #endif
1.1       maekawa   252:
                    253:   { "mpn_mul_n",         speed_mpn_mul_n            },
                    254:   { "mpn_sqr_n",         speed_mpn_sqr_n            },
                    255:
                    256:   { "mpn_kara_mul_n",    speed_mpn_kara_mul_n       },
                    257:   { "mpn_kara_sqr_n",    speed_mpn_kara_sqr_n       },
                    258:   { "mpn_toom3_mul_n",   speed_mpn_toom3_mul_n      },
                    259:   { "mpn_toom3_sqr_n",   speed_mpn_toom3_sqr_n      },
1.1.1.2 ! ohara     260:   { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
        !           261:   { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
1.1       maekawa   262:
                    263:   { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
                    264:   { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
                    265:
1.1.1.2 ! ohara     266:   { "mpn_toom3_mul_n_mpn",   speed_mpn_toom3_mul_n_mpn   },
        !           267:   { "mpn_toom3_mul_n_open",  speed_mpn_toom3_mul_n_open  },
        !           268:   { "mpn_toom3_sqr_n_mpn",   speed_mpn_toom3_sqr_n_mpn   },
        !           269:   { "mpn_toom3_sqr_n_open",  speed_mpn_toom3_sqr_n_open  },
        !           270:
        !           271:   { "mpn_get_str",       speed_mpn_get_str,  FLAG_R_OPTIONAL },
        !           272:
        !           273:   { "mpn_set_str",          speed_mpn_set_str,          FLAG_R_OPTIONAL },
        !           274:   { "mpn_set_str_basecase", speed_mpn_set_str_basecase, FLAG_R_OPTIONAL },
        !           275:   { "mpn_set_str_subquad",  speed_mpn_set_str_subquad,  FLAG_R_OPTIONAL },
        !           276:
        !           277:   { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
        !           278:
        !           279:   { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
        !           280:   { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
        !           281:   { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
        !           282:   { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
        !           283:   { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
        !           284:
1.1       maekawa   285:   { "mpz_add",           speed_mpz_add              },
1.1.1.2 ! ohara     286:   { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
        !           287:   { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
1.1       maekawa   288:   { "mpz_powm",          speed_mpz_powm             },
1.1.1.2 ! ohara     289:   { "mpz_powm_mod",      speed_mpz_powm_mod         },
        !           290:   { "mpz_powm_redc",     speed_mpz_powm_redc        },
        !           291:   { "mpz_powm_ui",       speed_mpz_powm_ui          },
        !           292:
        !           293:   { "mpz_mod",           speed_mpz_mod              },
        !           294:   { "redc",              speed_redc                 },
1.1       maekawa   295:
                    296:   { "MPN_COPY",          speed_MPN_COPY             },
                    297:   { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
                    298:   { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
                    299:   { "memcpy",            speed_memcpy               },
1.1.1.2 ! ohara     300: #if HAVE_NATIVE_mpn_copyi
        !           301:   { "mpn_copyi",         speed_mpn_copyi            },
        !           302: #endif
        !           303: #if HAVE_NATIVE_mpn_copyd
        !           304:   { "mpn_copyd",         speed_mpn_copyd            },
        !           305: #endif
1.1       maekawa   306:
1.1.1.2 ! ohara     307:   { "MPN_ZERO",          speed_MPN_ZERO             },
1.1       maekawa   308:
1.1.1.2 ! ohara     309:   { "modlimb_invert",       speed_modlimb_invert,       FLAG_NODATA },
        !           310:   { "modlimb_invert_mul1",  speed_modlimb_invert_mul1,  FLAG_NODATA },
        !           311:   { "modlimb_invert_loop",  speed_modlimb_invert_loop,  FLAG_NODATA },
        !           312:   { "modlimb_invert_cond",  speed_modlimb_invert_cond,  FLAG_NODATA },
        !           313:   { "modlimb_invert_arith", speed_modlimb_invert_arith, FLAG_NODATA },
        !           314:
        !           315:   { "malloc_free",                  speed_malloc_free                  },
        !           316:   { "malloc_realloc_free",          speed_malloc_realloc_free          },
        !           317:   { "gmp_allocate_free",            speed_gmp_allocate_free            },
        !           318:   { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
        !           319:   { "mpz_init_clear",               speed_mpz_init_clear               },
        !           320:   { "mpq_init_clear",               speed_mpq_init_clear               },
        !           321:   { "mpf_init_clear",               speed_mpf_init_clear               },
        !           322:   { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
1.1       maekawa   323:
                    324:   { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
                    325: #if HAVE_NATIVE_mpn_umul_ppmm
                    326:   { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
                    327: #endif
                    328:
1.1.1.2 ! ohara     329:   { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
        !           330:   { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
        !           331:
1.1       maekawa   332:   { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
                    333:   { "udiv_qrnnd_preinv",      speed_udiv_qrnnd_preinv,      FLAG_R_OPTIONAL },
                    334:   { "udiv_qrnnd_preinv2norm", speed_udiv_qrnnd_preinv2norm, FLAG_R_OPTIONAL },
1.1.1.2 ! ohara     335:   { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
1.1       maekawa   336: #if HAVE_NATIVE_mpn_udiv_qrnnd
                    337:   { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
                    338: #endif
1.1.1.2 ! ohara     339:   { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
        !           340:
        !           341:   { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
        !           342:   { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
1.1       maekawa   343:
                    344: #ifdef SPEED_EXTRA_ROUTINES
                    345:   SPEED_EXTRA_ROUTINES
                    346: #endif
1.1.1.2 ! ohara     347: #ifdef SPEED_EXTRA_ROUTINES2
        !           348:   SPEED_EXTRA_ROUTINES2
        !           349: #endif
1.1       maekawa   350: };
                    351:
                    352:
                    353: struct choice_t {
                    354:   const struct routine_t  *p;
1.1.1.2 ! ohara     355:   mp_limb_t               r;
        !           356:   double                  scale;
1.1       maekawa   357:   double                  time;
                    358:   int                     no_time;
                    359:   double                  prev_time;
                    360:   const char              *name;
                    361: };
                    362: struct choice_t  *choice;
                    363: int  num_choices = 0;
                    364:
                    365:
                    366: void
                    367: data_fill (mp_ptr ptr, mp_size_t size)
                    368: {
                    369:   switch (option_data) {
                    370:   case DATA_RANDOM:
                    371:     mpn_random (ptr, size);
                    372:     break;
                    373:   case DATA_RANDOM2:
                    374:     mpn_random2 (ptr, size);
                    375:     break;
                    376:   case DATA_ZEROS:
                    377:     MPN_ZERO (ptr, size);
                    378:     break;
1.1.1.2 ! ohara     379:   case DATA_AAS:
        !           380:     MPN_FILL (ptr, size, GMP_NUMB_0xAA);
        !           381:     break;
1.1       maekawa   382:   case DATA_FFS:
1.1.1.2 ! ohara     383:     MPN_FILL (ptr, size, GMP_NUMB_MAX);
1.1       maekawa   384:     break;
                    385:   case DATA_2FD:
1.1.1.2 ! ohara     386:     MPN_FILL (ptr, size, GMP_NUMB_MAX);
1.1       maekawa   387:     ptr[0] -= 2;
                    388:     break;
                    389:   default:
                    390:     abort();
                    391:     /*NOTREACHED*/
                    392:   }
                    393: }
                    394:
                    395: /* The code here handling the various combinations of output options isn't
                    396:    too attractive, but it works and is fairly clean.  */
                    397:
                    398: #define SIZE_TO_DIVISOR(n)              \
                    399:   (option_square == 1 ? (n)*(n)         \
                    400:   : option_square == 2 ? (n)*((n)+1)/2  \
                    401:   : (n))
                    402:
                    403: void
                    404: run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
                    405: {
                    406:   const char  *first_open_fastest, *first_open_notfastest, *first_close;
1.1.1.2 ! ohara     407:   int         i, fastest, want_data;
1.1       maekawa   408:   double      fastest_time;
                    409:   TMP_DECL (marker);
                    410:
                    411:   TMP_MARK (marker);
                    412:
1.1.1.2 ! ohara     413:   /* allocate data, unless all routines are NODATA */
        !           414:   want_data = 0;
        !           415:   for (i = 0; i < num_choices; i++)
        !           416:     want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
        !           417:
        !           418:   if (want_data)
        !           419:     {
        !           420:       sp.xp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_xp);
        !           421:       sp.yp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_yp);
        !           422:
        !           423:       data_fill (s->xp, s->size);
        !           424:       data_fill (s->yp, s->size);
        !           425:     }
        !           426:   else
        !           427:     {
        !           428:       sp.xp = NULL;
        !           429:       sp.yp = NULL;
        !           430:     }
1.1       maekawa   431:
                    432:   if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
                    433:     {
                    434:       first_open_fastest = "(#";
                    435:       first_open_notfastest = " (";
                    436:       first_close = ")";
                    437:     }
                    438:   else
                    439:     {
                    440:       first_open_fastest = "#";
                    441:       first_open_notfastest = " ";
                    442:       first_close = "";
                    443:     }
                    444:
                    445:   fastest = -1;
                    446:   fastest_time = -1.0;
                    447:   for (i = 0; i < num_choices; i++)
                    448:     {
                    449:       s->r = choice[i].r;
                    450:       choice[i].time = speed_measure (choice[i].p->fun, s);
                    451:       choice[i].no_time = (choice[i].time == -1.0);
1.1.1.2 ! ohara     452:       choice[i].time *= choice[i].scale;
1.1       maekawa   453:
                    454:       /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
                    455:          is before any differences.  */
                    456:       {
                    457:         double     t;
                    458:         t = choice[i].time;
                    459:         if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
                    460:           {
                    461:             if (choice[i].prev_time == -1.0)
                    462:               choice[i].no_time = 1;
                    463:             else
                    464:               choice[i].time = choice[i].time - choice[i].prev_time;
                    465:           }
                    466:         choice[i].prev_time = t;
                    467:       }
                    468:
                    469:       if (choice[i].no_time)
                    470:         continue;
                    471:
                    472:       /* Look for the fastest after CMP_DIFFPREV has been applied, but
                    473:          before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
                    474:          if there's more than one routine.  */
                    475:       if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
                    476:         {
                    477:           fastest = i;
                    478:           fastest_time = choice[i].time;
                    479:         }
                    480:
                    481:       if (option_cmp == CMP_DIFFPREV)
                    482:         {
                    483:           /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
                    484:           if (option_unit == UNIT_CYCLES)
                    485:             choice[i].time /= speed_cycletime;
                    486:           else if (option_unit == UNIT_CYCLESPERLIMB)
                    487:             {
                    488:               if (prev_size == -1)
                    489:                 choice[i].time /= speed_cycletime;
                    490:               else
                    491:                 choice[i].time /=  (speed_cycletime
                    492:                                     * (SIZE_TO_DIVISOR(s->size)
                    493:                                        - SIZE_TO_DIVISOR(prev_size)));
                    494:             }
                    495:         }
                    496:       else
                    497:         {
                    498:           if (option_unit == UNIT_CYCLES)
                    499:             choice[i].time /= speed_cycletime;
                    500:           else if (option_unit == UNIT_CYCLESPERLIMB)
                    501:             choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
                    502:
                    503:           if (option_cmp == CMP_RATIO && i > 0)
                    504:             {
                    505:               /* A ratio isn't affected by the units chosen. */
                    506:               if (choice[0].no_time || choice[0].time == 0.0)
                    507:                 choice[i].no_time = 1;
                    508:               else
                    509:                 choice[i].time /= choice[0].time;
                    510:             }
                    511:           else if (option_cmp == CMP_DIFFERENCE && i > 0)
                    512:             {
                    513:               if (choice[0].no_time)
                    514:                 {
                    515:                   choice[i].no_time = 1;
                    516:                   continue;
                    517:                 }
                    518:               choice[i].time -= choice[0].time;
                    519:             }
                    520:         }
                    521:     }
                    522:
                    523:   if (option_gnuplot)
                    524:     {
                    525:       /* In CMP_DIFFPREV, don't print anything for the first size, start
                    526:          with the second where an actual difference is available.
                    527:
                    528:          In CMP_RATIO, print the first column as 1.0.
                    529:
                    530:          The 9 decimals printed is much more than the expected precision of
                    531:          the measurements actually. */
                    532:
                    533:       if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
                    534:         {
                    535:           fprintf (fp, "%-6ld ", s->size);
                    536:           for (i = 0; i < num_choices; i++)
                    537:             fprintf (fp, "  %.9e",
                    538:                      choice[i].no_time ? 0.0
                    539:                      : (option_cmp == CMP_RATIO && i == 0) ? 1.0
                    540:                      : choice[i].time);
                    541:           fprintf (fp, "\n");
                    542:         }
                    543:     }
                    544:   else
                    545:     {
                    546:       fprintf (fp, "%-6ld ", s->size);
                    547:       for (i = 0; i < num_choices; i++)
                    548:         {
                    549:           char  buf[128];
                    550:           int   decimals;
                    551:
                    552:           if (choice[i].no_time)
                    553:             decimals = 0, choice[i].time = 0.0;
                    554:           else if (option_unit == UNIT_CYCLESPERLIMB
                    555:                    || (option_cmp == CMP_RATIO && i > 0))
                    556:             decimals = 4;
                    557:           else if (option_unit == UNIT_CYCLES)
                    558:             decimals = 2;
                    559:           else
                    560:             decimals = 9;
                    561:
                    562:           sprintf (buf, "%s%.*f%s",
                    563:                    i == fastest ? first_open_fastest : first_open_notfastest,
                    564:                    decimals, choice[i].time, first_close);
                    565:           fprintf (fp, " %*s", COLUMN_WIDTH, buf);
                    566:         }
                    567:       fprintf (fp, "\n");
                    568:     }
                    569:
                    570:   TMP_FREE (marker);
                    571: }
                    572:
                    573: void
                    574: run_all (FILE *fp)
                    575: {
                    576:   mp_size_t  prev_size;
                    577:   int        i;
                    578:   TMP_DECL (marker);
                    579:
                    580:   TMP_MARK (marker);
                    581:   sp.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_xp);
                    582:   sp.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_yp);
                    583:
                    584:   data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
                    585:   data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
                    586:
                    587:   for (i = 0; i < size_num; i++)
                    588:     {
                    589:       sp.size = size_array[i].start;
                    590:       prev_size = -1;
                    591:       for (;;)
                    592:         {
                    593:           mp_size_t  step;
                    594:
                    595:           if (option_data == DATA_2FD && sp.size >= 2)
                    596:             sp.xp[sp.size-1] = 2;
                    597:
                    598:           run_one (fp, &sp, prev_size);
                    599:           prev_size = sp.size;
                    600:
                    601:           if (option_data == DATA_2FD && sp.size >= 2)
                    602:             sp.xp[sp.size-1] = MP_LIMB_T_MAX;
                    603:
                    604:           if (option_factor != 0.0)
                    605:             {
                    606:               step = (mp_size_t) (sp.size * option_factor - sp.size);
                    607:               if (step < 1)
                    608:                 step = 1;
                    609:             }
                    610:           else
                    611:             step = 1;
                    612:           if (step < option_step)
                    613:             step = option_step;
                    614:
                    615:           sp.size += step;
                    616:           if (sp.size > size_array[i].end)
                    617:             break;
                    618:         }
                    619:     }
                    620:
                    621:   TMP_FREE (marker);
                    622: }
                    623:
                    624:
                    625: FILE *
                    626: fopen_for_write (const char *filename)
                    627: {
                    628:   FILE  *fp;
                    629:   if ((fp = fopen (filename, "w")) == NULL)
                    630:     {
                    631:       fprintf (stderr, "Cannot create %s\n", filename);
                    632:       exit(1);
                    633:     }
                    634:   return fp;
                    635: }
                    636:
                    637: void
                    638: fclose_written (FILE *fp, const char *filename)
                    639: {
                    640:   int  err;
                    641:
                    642:   err = ferror (fp);
                    643:   err |= fclose (fp);
                    644:
                    645:   if (err)
                    646:     {
                    647:       fprintf (stderr, "Error writing %s\n", filename);
                    648:       exit(1);
                    649:     }
                    650: }
                    651:
                    652:
                    653: void
1.1.1.2 ! ohara     654: run_gnuplot (int argc, char *argv[])
1.1       maekawa   655: {
                    656:   char  *plot_filename;
                    657:   char  *data_filename;
                    658:   FILE  *fp;
                    659:   int   i;
                    660:
1.1.1.2 ! ohara     661:   plot_filename = (char *) (*__gmp_allocate_func)
1.1       maekawa   662:     (strlen (option_gnuplot_basename) + 20);
1.1.1.2 ! ohara     663:   data_filename = (char *) (*__gmp_allocate_func)
1.1       maekawa   664:     (strlen (option_gnuplot_basename) + 20);
                    665:
                    666:   sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
                    667:   sprintf (data_filename, "%s.data",    option_gnuplot_basename);
                    668:
                    669:   fp = fopen_for_write (plot_filename);
                    670:
1.1.1.2 ! ohara     671:   fprintf (fp, "# Generated with:\n");
        !           672:   fprintf (fp, "#");
        !           673:   for (i = 0; i < argc; i++)
        !           674:     fprintf (fp, " %s", argv[i]);
        !           675:   fprintf (fp, "\n");
        !           676:   fprintf (fp, "\n");
        !           677:
1.1       maekawa   678:   /* Putting the key at the top left is usually good, and you can change it
                    679:      interactively if it's not. */
                    680:   fprintf (fp, "set key left\n");
                    681:
                    682:   /* designed to make it possible to see crossovers easily */
                    683:   fprintf (fp, "set data style linespoints\n");
                    684:
                    685:   fprintf (fp, "plot ");
                    686:   for (i = 0; i < num_choices; i++)
                    687:     {
                    688:       fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
                    689:       fprintf (fp, " title \"%s\"", choice[i].name);
                    690:
                    691:       if (i != num_choices-1)
                    692:         fprintf (fp, ", \\");
                    693:       fprintf (fp, "\n");
                    694:     }
                    695:
                    696:   fprintf (fp, "load \"-\"\n");
                    697:   fclose_written (fp, plot_filename);
                    698:
                    699:   fp = fopen_for_write (data_filename);
                    700:
                    701:   /* Unbuffered so you can see where the program was up to if it crashes or
                    702:      you kill it. */
                    703:   setbuf (fp, NULL);
                    704:
                    705:   run_all (fp);
                    706:   fclose_written (fp, data_filename);
                    707: }
                    708:
                    709:
1.1.1.2 ! ohara     710: /* Return a limb with n many one bits (starting from the least significant) */
1.1       maekawa   711:
1.1.1.2 ! ohara     712: #define LIMB_ONES(n) \
        !           713:   ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX      \
        !           714:     : (n) == 0 ? CNST_LIMB(0)                   \
        !           715:     : (CNST_LIMB(1) << (n)) - 1)
        !           716:
        !           717: mp_limb_t
1.1       maekawa   718: r_string (const char *s)
                    719: {
                    720:   const char  *s_orig = s;
1.1.1.2 ! ohara     721:   long        n;
        !           722:
        !           723:   if (strcmp (s, "aas") == 0)
        !           724:     return GMP_NUMB_0xAA;
        !           725:
        !           726:   {
        !           727:     mpz_t      z;
        !           728:     mp_limb_t  l;
        !           729:     int        set, siz;
        !           730:
        !           731:     mpz_init (z);
        !           732:     set = mpz_set_str (z, s, 0);
        !           733:     siz = SIZ(z);
        !           734:     l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
        !           735:     mpz_clear (z);
        !           736:     if (set == 0)
        !           737:       {
        !           738:         if (siz > 1 || siz < -1)
        !           739:           printf ("Warning, r parameter %s truncated to %d bits\n",
        !           740:                   s_orig, BITS_PER_MP_LIMB);
        !           741:         return l;
        !           742:       }
        !           743:   }
1.1       maekawa   744:
                    745:   if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
                    746:     n = strtoul (s+2, (char **) &s, 16);
                    747:   else
                    748:     n = strtol (s, (char **) &s, 10);
                    749:
                    750:   if (strcmp (s, "bits") == 0)
                    751:     {
                    752:       mp_limb_t  l;
1.1.1.2 ! ohara     753:       if (n > BITS_PER_MP_LIMB)
1.1       maekawa   754:         {
                    755:           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
1.1.1.2 ! ohara     756:                    n, BITS_PER_MP_LIMB);
1.1       maekawa   757:           exit (1);
                    758:         }
                    759:       mpn_random (&l, 1);
1.1.1.2 ! ohara     760:       return (l | (1 << (n-1))) & LIMB_ONES(n);
1.1       maekawa   761:     }
                    762:   else  if (strcmp (s, "ones") == 0)
                    763:     {
1.1.1.2 ! ohara     764:       if (n > BITS_PER_MP_LIMB)
1.1       maekawa   765:         {
                    766:           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
1.1.1.2 ! ohara     767:                    n, BITS_PER_MP_LIMB);
1.1       maekawa   768:           exit (1);
                    769:         }
1.1.1.2 ! ohara     770:       return LIMB_ONES (n);
1.1       maekawa   771:     }
                    772:   else if (*s != '\0')
                    773:     {
                    774:       fprintf (stderr, "invalid r parameter: %s\n", s_orig);
                    775:       exit (1);
                    776:     }
                    777:
                    778:   return n;
                    779: }
                    780:
                    781:
                    782: void
1.1.1.2 ! ohara     783: routine_find (struct choice_t *c, const char *s_orig)
1.1       maekawa   784: {
1.1.1.2 ! ohara     785:   const char  *s;
1.1       maekawa   786:   int     i;
                    787:   size_t  nlen;
                    788:
1.1.1.2 ! ohara     789:   c->name = s_orig;
        !           790:   s = strchr (s_orig, '*');
        !           791:   if (s != NULL)
        !           792:     {
        !           793:       c->scale = atof(s_orig);
        !           794:       s++;
        !           795:     }
        !           796:   else
        !           797:     {
        !           798:       c->scale = 1.0;
        !           799:       s = s_orig;
        !           800:     }
        !           801:
1.1       maekawa   802:   for (i = 0; i < numberof (routine); i++)
                    803:     {
                    804:       nlen = strlen (routine[i].name);
                    805:       if (memcmp (s, routine[i].name, nlen) != 0)
                    806:         continue;
                    807:
                    808:       if (s[nlen] == '.')
                    809:         {
                    810:           /* match, with a .r parameter */
                    811:
                    812:           if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
                    813:             {
1.1.1.2 ! ohara     814:               fprintf (stderr,
        !           815:                        "Choice %s bad: doesn't take a \".<r>\" parameter\n",
        !           816:                        s_orig);
1.1       maekawa   817:               exit (1);
                    818:             }
                    819:
                    820:           c->p = &routine[i];
                    821:           c->r = r_string (s + nlen + 1);
                    822:           return;
                    823:         }
                    824:
                    825:       if (s[nlen] == '\0')
                    826:         {
                    827:           /* match, with no parameter */
                    828:
                    829:           if (routine[i].flag & FLAG_R)
                    830:             {
1.1.1.2 ! ohara     831:               fprintf (stderr,
        !           832:                        "Choice %s bad: needs a \".<r>\" parameter\n",
        !           833:                        s_orig);
1.1       maekawa   834:               exit (1);
                    835:             }
                    836:
                    837:           c->p = &routine[i];
                    838:           c->r = 0;
                    839:           return;
                    840:         }
                    841:     }
                    842:
1.1.1.2 ! ohara     843:   fprintf (stderr, "Choice %s unrecognised\n", s_orig);
1.1       maekawa   844:   exit (1);
                    845: }
                    846:
                    847:
                    848: void
                    849: usage (void)
                    850: {
                    851:   int  i;
                    852:
1.1.1.2 ! ohara     853:   speed_time_init ();
        !           854:
1.1       maekawa   855:   printf ("\
                    856: Usage: speed [-options] -s size <routine>...\n\
                    857: Measure the speed of some routines.\n\
                    858: Times are in seconds, accuracy is shown.\n\
                    859: \n\
                    860:    -p num     set precision as number of time units each routine must run\n\
                    861:    -s size[-end][,size[-end]]...   sizes to measure\n\
                    862:               single sizes or ranges, sep with comma or use multiple -s\n\
                    863:    -t step    step through sizes by given amount\n\
                    864:    -f factor  step through sizes by given factor (eg. 1.05)\n\
                    865:    -r         show times as ratios of the first routine\n\
                    866:    -d         show times as difference from the first routine\n\
                    867:    -D         show times as difference from previous size shown\n\
                    868:    -c         show times in CPU cycles\n\
                    869:    -C         show times in cycles per limb\n\
                    870:    -u         print resource usage (memory) at end\n\
                    871:    -P name    output plot files \"name.gnuplot\" and \"name.data\"\n\
1.1.1.2 ! ohara     872:    -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n\
1.1       maekawa   873:    -x, -y, -w, -W <align>  specify data alignments, sources and dests\n\
                    874:    -o addrs   print addresses of data blocks\n\
                    875: \n\
                    876: If both -t and -f are used, it means step by the factor or the step, whichever\n\
                    877: is greater.\n\
                    878: If both -C and -D are used, it means cycles per however many limbs between a\n\
                    879: size and the previous size.\n\
                    880: \n\
                    881: After running with -P, plots can be viewed with Gnuplot or Quickplot.\n\
                    882: \"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n\
                    883: a log/log plot).\n\
                    884: \"quickplot -s name.data\" (has interactive zooming, and note -s is important\n\
                    885: when viewing more than one routine, it means same axis scales for all data).\n\
                    886: \n\
                    887: The available routines are as follows.\n\
                    888: \n\
                    889: ");
                    890:
                    891:   for (i = 0; i < numberof (routine); i++)
                    892:     {
                    893:       if (routine[i].flag & FLAG_R)
                    894:         printf ("\t%s.r\n", routine[i].name);
                    895:       else if (routine[i].flag & FLAG_R_OPTIONAL)
                    896:         printf ("\t%s (optional .r)\n", routine[i].name);
                    897:       else
                    898:         printf ("\t%s\n", routine[i].name);
                    899:     }
                    900:
                    901:   printf ("\n\
                    902: Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n\
                    903: r should be in decimal, or use 0xN for hexadecimal.\n\
1.1.1.2 ! ohara     904: \n\
        !           905: Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n\
        !           906: N one bits, or \"aas\" for 0xAA..AA.\n\
1.1       maekawa   907: \n\
                    908: Times for sizes out of the range accepted by a routine are shown as 0.\n\
                    909: The fastest routine at each size is marked with a # (free form output only).\n\
                    910: \n\
                    911: %s\
                    912: \n\
                    913: Gnuplot home page http://www.cs.dartmouth.edu/gnuplot_info.html\n\
                    914: Quickplot home page http://www.kachinatech.com/~quickplot\n\
                    915: ", speed_time_string);
                    916: }
                    917:
                    918: int
                    919: main (int argc, char *argv[])
                    920: {
                    921:   int  i;
                    922:   int  opt;
                    923:
                    924:   /* Unbuffered so output goes straight out when directed to a pipe or file
1.1.1.2 ! ohara     925:      and isn't lost on killing the program half way.  */
1.1       maekawa   926:   setbuf (stdout, NULL);
                    927:
1.1.1.2 ! ohara     928:   for (;;)
        !           929:     {
        !           930:       opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
        !           931:       if (opt == EOF)
        !           932:         break;
        !           933:
        !           934:       switch (opt) {
        !           935:       case 'a':
        !           936:         if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
        !           937:         else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
        !           938:         else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
        !           939:         else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
        !           940:         else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
        !           941:         else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
        !           942:         else
1.1       maekawa   943:           {
1.1.1.2 ! ohara     944:             fprintf (stderr, "unrecognised data option: %s\n", optarg);
        !           945:             exit (1);
        !           946:           }
        !           947:         break;
        !           948:       case 'C':
        !           949:         if (option_unit  != UNIT_SECONDS) goto bad_unit;
        !           950:         option_unit = UNIT_CYCLESPERLIMB;
        !           951:         break;
        !           952:       case 'c':
        !           953:         if (option_unit != UNIT_SECONDS)
        !           954:           {
        !           955:           bad_unit:
        !           956:             fprintf (stderr, "cannot use more than one of -c, -C\n");
        !           957:             exit (1);
        !           958:           }
        !           959:         option_unit = UNIT_CYCLES;
        !           960:         break;
        !           961:       case 'D':
        !           962:         if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
        !           963:         option_cmp = CMP_DIFFPREV;
        !           964:         break;
        !           965:       case 'd':
        !           966:         if (option_cmp != CMP_ABSOLUTE)
        !           967:           {
        !           968:           bad_cmp:
        !           969:             fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
        !           970:             exit (1);
1.1       maekawa   971:           }
1.1.1.2 ! ohara     972:         option_cmp = CMP_DIFFERENCE;
        !           973:         break;
        !           974:       case 'E':
        !           975:         option_square = 1;
        !           976:         break;
        !           977:       case 'F':
        !           978:         option_square = 2;
        !           979:         break;
        !           980:       case 'f':
        !           981:         option_factor = atof (optarg);
        !           982:         if (option_factor <= 1.0)
        !           983:           {
        !           984:             fprintf (stderr, "-f factor must be > 1.0\n");
        !           985:             exit (1);
        !           986:           }
        !           987:         break;
        !           988:       case 'o':
        !           989:         speed_option_set (optarg);
        !           990:         break;
        !           991:       case 'P':
        !           992:         option_gnuplot = 1;
        !           993:         option_gnuplot_basename = optarg;
        !           994:         break;
        !           995:       case 'p':
        !           996:         speed_precision = atoi (optarg);
        !           997:         break;
        !           998:       case 'R':
        !           999:         option_seed = time (NULL);
        !          1000:         break;
        !          1001:       case 'r':
        !          1002:         if (option_cmp != CMP_ABSOLUTE)
        !          1003:           goto bad_cmp;
        !          1004:         option_cmp = CMP_RATIO;
        !          1005:         break;
        !          1006:       case 's':
        !          1007:         {
        !          1008:           char  *s;
        !          1009:           for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
1.1       maekawa  1010:             {
1.1.1.2 ! ohara    1011:               if (size_num == size_allocnum)
        !          1012:                 {
        !          1013:                   size_array = (struct size_array_t *)
        !          1014:                     __gmp_allocate_or_reallocate
        !          1015:                     (size_array,
        !          1016:                      size_allocnum * sizeof(size_array[0]),
        !          1017:                      (size_allocnum+10) * sizeof(size_array[0]));
        !          1018:                   size_allocnum += 10;
        !          1019:                 }
        !          1020:               if (sscanf (s, "%ld-%ld",
        !          1021:                           &size_array[size_num].start,
        !          1022:                           &size_array[size_num].end) != 2)
        !          1023:                 {
        !          1024:                   size_array[size_num].start = size_array[size_num].end
        !          1025:                     = atol (s);
        !          1026:                 }
        !          1027:
        !          1028:               if (size_array[size_num].start < 0
        !          1029:                   || size_array[size_num].end < 0
        !          1030:                   || size_array[size_num].start > size_array[size_num].end)
        !          1031:                 {
        !          1032:                   fprintf (stderr, "invalid size parameter: %s\n", s);
        !          1033:                   exit (1);
        !          1034:                 }
        !          1035:
        !          1036:               size_num++;
1.1       maekawa  1037:             }
                   1038:         }
1.1.1.2 ! ohara    1039:         break;
        !          1040:       case 't':
        !          1041:         option_step = atol (optarg);
        !          1042:         if (option_step < 1)
        !          1043:           {
        !          1044:             fprintf (stderr, "-t step must be >= 1\n");
        !          1045:             exit (1);
        !          1046:           }
        !          1047:         break;
        !          1048:       case 'u':
        !          1049:         option_resource_usage = 1;
        !          1050:         break;
        !          1051:       case 'z':
        !          1052:         sp.cache = 1;
        !          1053:         break;
        !          1054:       case 'x':
        !          1055:         sp.align_xp = atol (optarg);
        !          1056:         break;
        !          1057:       case 'y':
        !          1058:         sp.align_yp = atol (optarg);
        !          1059:         break;
        !          1060:       case 'w':
        !          1061:         sp.align_wp = atol (optarg);
        !          1062:         break;
        !          1063:       case 'W':
        !          1064:         sp.align_wp2 = atol (optarg);
        !          1065:         break;
        !          1066:       case '?':
        !          1067:         exit(1);
1.1       maekawa  1068:       }
1.1.1.2 ! ohara    1069:     }
1.1       maekawa  1070:
                   1071:   if (optind >= argc)
                   1072:     {
                   1073:       usage ();
                   1074:       exit (1);
                   1075:     }
                   1076:
                   1077:   if (size_num == 0)
                   1078:     {
                   1079:       fprintf (stderr, "-s <size> must be specified\n");
                   1080:       exit (1);
                   1081:     }
                   1082:
1.1.1.2 ! ohara    1083:   gmp_randseed_ui (RANDS, option_seed);
1.1       maekawa  1084:
1.1.1.2 ! ohara    1085:   choice = (struct choice_t *) (*__gmp_allocate_func)
1.1       maekawa  1086:     ((argc - optind) * sizeof(choice[0]));
                   1087:   for ( ; optind < argc; optind++)
                   1088:     {
                   1089:       struct choice_t  c;
                   1090:       routine_find (&c, argv[optind]);
                   1091:       choice[num_choices] = c;
                   1092:       num_choices++;
                   1093:     }
                   1094:
                   1095:   if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
                   1096:       num_choices < 2)
                   1097:     {
                   1098:       fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
                   1099:     }
                   1100:
                   1101:   speed_time_init ();
1.1.1.2 ! ohara    1102:   if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
        !          1103:     speed_cycletime_need_cycles ();
        !          1104:   else
        !          1105:     speed_cycletime_need_seconds ();
1.1       maekawa  1106:
                   1107:   if (option_gnuplot)
                   1108:     {
1.1.1.2 ! ohara    1109:       run_gnuplot (argc, argv);
1.1       maekawa  1110:     }
                   1111:   else
                   1112:     {
                   1113:       if (option_unit == UNIT_SECONDS)
                   1114:         printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
                   1115:       else
                   1116:         printf ("overhead %.2f cycles",
                   1117:                 speed_measure (speed_noop, NULL) / speed_cycletime);
1.1.1.2 ! ohara    1118:       printf (", precision %d units of %.2e secs",
        !          1119:               speed_precision, speed_unittime);
        !          1120:
        !          1121:       if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
        !          1122:         printf (", CPU freq unknown\n");
        !          1123:       else
        !          1124:         printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
1.1       maekawa  1125:
                   1126:       printf ("       ");
                   1127:       for (i = 0; i < num_choices; i++)
                   1128:         printf (" %*s", COLUMN_WIDTH, choice[i].name);
                   1129:       printf ("\n");
                   1130:
                   1131:       run_all (stdout);
                   1132:     }
                   1133:
                   1134:   if (option_resource_usage)
                   1135:     {
1.1.1.2 ! ohara    1136: #if HAVE_GETRUSAGE
        !          1137:       {
        !          1138:         /* This doesn't give data sizes on linux 2.0.x, only utime. */
        !          1139:         struct rusage  r;
        !          1140:         if (getrusage (RUSAGE_SELF, &r) != 0)
        !          1141:           perror ("getrusage");
        !          1142:         else
        !          1143:           printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
        !          1144:                   r.ru_utime.tv_sec, r.ru_utime.tv_usec,
        !          1145:                   r.ru_idrss, r.ru_isrss, r.ru_ixrss);
        !          1146:       }
1.1       maekawa  1147: #else
1.1.1.2 ! ohara    1148:       printf ("getrusage() not available\n");
1.1       maekawa  1149: #endif
1.1.1.2 ! ohara    1150:
        !          1151:       /* Linux kernel. */
        !          1152:       {
        !          1153:         char  buf[128];
        !          1154:         sprintf (buf, "/proc/%d/status", getpid());
        !          1155:         if (access (buf, R_OK) == 0)
        !          1156:           {
        !          1157:             sprintf (buf, "cat /proc/%d/status", getpid());
        !          1158:             system (buf);
        !          1159:           }
        !          1160:
        !          1161:       }
1.1       maekawa  1162:     }
                   1163:
                   1164:   return 0;
                   1165: }
                   1166:
                   1167:

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>