[BACK]Return to speed.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tune

Annotation of OpenXM_contrib/gmp/tune/speed.c, Revision 1.1.1.1

1.1       maekawa     1: /* Speed measuring program. */
                      2:
                      3: /*
                      4: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
                      5:
                      6: This file is part of the GNU MP Library.
                      7:
                      8: The GNU MP Library is free software; you can redistribute it and/or modify
                      9: it under the terms of the GNU Lesser General Public License as published by
                     10: the Free Software Foundation; either version 2.1 of the License, or (at your
                     11: option) any later version.
                     12:
                     13: The GNU MP Library is distributed in the hope that it will be useful, but
                     14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     16: License for more details.
                     17:
                     18: You should have received a copy of the GNU Lesser General Public License
                     19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: MA 02111-1307, USA.
                     22: */
                     23:
                     24: /* Usage message is in the code below, run with no arguments to print it.
                     25:    See README for interesting applications.
                     26:
                     27:    To add a new routine foo(), create a speed_foo() function in the style of
                     28:    the existing ones and add an entry in the routine[] array.  Put FLAG_R if
                     29:    speed_foo() wants an "r" parameter.
                     30:
                     31:    The routines don't have help messages or descriptions, but most have
                     32:    suggestive names.  See the source code for full details.
                     33:
                     34: */
                     35:
                     36: #include "config.h"
                     37:
                     38: #if HAVE_GETOPT_H
                     39: #include <getopt.h>  /* for getopt_long() */
                     40: #endif
                     41: #include <limits.h>
                     42: #include <stdio.h>
                     43: #include <stdlib.h>
                     44: #include <string.h>
                     45: #include <unistd.h> /* for getpid() */
                     46: #include <sys/time.h>  /* for struct timeval for sys/resource.h */
                     47: #include <sys/resource.h>  /* for getrusage() */
                     48:
                     49: #include "gmp.h"
                     50: #include "gmp-impl.h"
                     51:
                     52: #include "speed.h"
                     53:
                     54: #if !HAVE_DECL_OPTARG
                     55: extern char *optarg;
                     56: extern int optind, opterr;
                     57: #endif
                     58:
                     59: #if !HAVE_STRTOUL
                     60: #define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
                     61: #endif
                     62:
                     63: #ifdef SPEED_EXTRA_PROTOS
                     64: SPEED_EXTRA_PROTOS
                     65: #endif
                     66:
                     67:
                     68: #define MPN_FILL(ptr, size, n)                  \
                     69:   do {                                          \
                     70:     mp_size_t  i;                               \
                     71:     for (i = 0; i < size; i++)                  \
                     72:       ptr[i] = n;                               \
                     73:   } while (0)
                     74:
                     75: #define CMP_ABSOLUTE     1
                     76: #define CMP_RATIO        2
                     77: #define CMP_DIFFERENCE   3
                     78: #define CMP_DIFFPREV     4
                     79: int  option_cmp = CMP_ABSOLUTE;
                     80:
                     81: #define UNIT_SECONDS        1
                     82: #define UNIT_CYCLES         2
                     83: #define UNIT_CYCLESPERLIMB  3
                     84: int  option_unit = UNIT_SECONDS;
                     85:
                     86: #define DATA_RANDOM   1
                     87: #define DATA_RANDOM2  2
                     88: #define DATA_ZEROS    3
                     89: #define DATA_FFS      4
                     90: #define DATA_2FD      5
                     91: int  option_data = DATA_RANDOM;
                     92:
                     93: int        option_square = 0;
                     94: double     option_factor = 0.0;
                     95: mp_size_t  option_step = 1;
                     96: int        option_gnuplot = 0;
                     97: char      *option_gnuplot_basename;
                     98: struct size_array_t {
                     99:   mp_size_t start, end;
                    100: } *size_array = NULL;
                    101: mp_size_t  size_num = 0;
                    102: mp_size_t  size_allocnum = 0;
                    103: int        option_resource_usage = 0;
                    104: long       option_seed = 123456789;
                    105:
                    106: struct speed_params  sp;
                    107:
                    108: #define COLUMN_WIDTH  13  /* for the free-form output */
                    109:
                    110: #define FLAG_R            (1<<0)
                    111: #define FLAG_R_OPTIONAL   (1<<1)
                    112: #define FLAG_RSIZE        (1<<2)
                    113:
                    114: const struct routine_t {
                    115:   /* constants */
                    116:   const char        *name;
                    117:   speed_function_t  fun;
                    118:   int               flag;
                    119:
                    120: } routine[] = {
                    121:
                    122:   { "noop",              speed_noop                 },
                    123:   { "noop_wxs",          speed_noop_wxs             },
                    124:   { "noop_wxys",         speed_noop_wxys            },
                    125:
                    126:   { "mpn_add_n",         speed_mpn_add_n            },
                    127:   { "mpn_sub_n",         speed_mpn_sub_n            },
                    128:   { "mpn_add_n_self",    speed_mpn_add_n_self       },
                    129:   { "mpn_add_n_inplace", speed_mpn_add_n_inplace    },
                    130:
                    131:   { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
                    132:   { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
                    133:   { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
                    134:
                    135:   { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
                    136:   { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
                    137: #if HAVE_NATIVE_mpn_divrem_1c
                    138:   { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
                    139:   { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
                    140: #endif
                    141:   { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
                    142: #if HAVE_NATIVE_mpn_mod_1c
                    143:   { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
                    144: #endif
                    145:
                    146:   { "mpn_divrem_2",      speed_mpn_divrem_2,        },
                    147:   { "mpn_divexact_by3",  speed_mpn_divexact_by3     },
                    148:
                    149:   { "mpn_bz_divrem_n",   speed_mpn_bz_divrem_n      },
                    150:   { "mpn_bz_divrem_sb",  speed_mpn_bz_divrem_sb     },
                    151:   { "mpn_bz_tdiv_qr",    speed_mpn_bz_tdiv_qr       },
                    152:
                    153:   { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
                    154:   { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
                    155:
                    156:   { "mpn_and_n",         speed_mpn_and_n            },
                    157:   { "mpn_andn_n",        speed_mpn_andn_n           },
                    158:   { "mpn_nand_n",        speed_mpn_nand_n           },
                    159:   { "mpn_ior_n",         speed_mpn_ior_n            },
                    160:   { "mpn_iorn_n",        speed_mpn_iorn_n           },
                    161:   { "mpn_nior_n",        speed_mpn_nior_n           },
                    162:   { "mpn_xor_n",         speed_mpn_xor_n            },
                    163:   { "mpn_xnor_n",        speed_mpn_xnor_n           },
                    164:
                    165:   { "mpn_popcount",      speed_mpn_popcount         },
                    166:   { "mpn_hamdist",       speed_mpn_hamdist          },
                    167:
                    168:   { "mpn_gcdext",        speed_mpn_gcdext           },
                    169:   { "mpn_gcd",           speed_mpn_gcd              },
                    170:   { "mpn_gcd_1",         speed_mpn_gcd_1, FLAG_R_OPTIONAL },
                    171:
                    172:   { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
                    173:
                    174:   { "mpn_mul_basecase",  speed_mpn_mul_basecase, FLAG_R_OPTIONAL },
                    175:   { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
                    176:
                    177:   { "mpn_mul_n",         speed_mpn_mul_n            },
                    178:   { "mpn_sqr_n",         speed_mpn_sqr_n            },
                    179:
                    180:   { "mpn_kara_mul_n",    speed_mpn_kara_mul_n       },
                    181:   { "mpn_kara_sqr_n",    speed_mpn_kara_sqr_n       },
                    182:   { "mpn_toom3_mul_n",   speed_mpn_toom3_mul_n      },
                    183:   { "mpn_toom3_sqr_n",   speed_mpn_toom3_sqr_n      },
                    184:   { "mpn_mul_fft_full",     speed_mpn_mul_fft_full     },
                    185:   { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
                    186:
                    187:   { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
                    188:   { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
                    189:
                    190:   { "mpz_add",           speed_mpz_add              },
                    191:   { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_R_OPTIONAL },
                    192:   { "mpz_fac_ui",        speed_mpz_fac_ui           },
                    193:   { "mpz_fib_ui",        speed_mpz_fib_ui           },
                    194:   { "mpz_powm",          speed_mpz_powm             },
                    195:
                    196:   { "MPN_COPY",          speed_MPN_COPY             },
                    197:   { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
                    198:   { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
                    199:   { "memcpy",            speed_memcpy               },
                    200:
                    201:   { "modlimb_invert",    speed_modlimb_invert       },
                    202:
                    203:   { "malloc_free",                 speed_malloc_free                 },
                    204:   { "malloc_realloc_free",         speed_malloc_realloc_free         },
                    205:   { "mp_allocate_free",            speed_mp_allocate_free            },
                    206:   { "mp_allocate_reallocate_free", speed_mp_allocate_reallocate_free },
                    207:   { "mpz_init_clear",              speed_mpz_init_clear              },
                    208:   { "mpq_init_clear",              speed_mpq_init_clear              },
                    209:   { "mpf_init_clear",              speed_mpf_init_clear              },
                    210:   { "mpz_init_realloc_clear",      speed_mpz_init_realloc_clear      },
                    211:
                    212:   { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
                    213: #if HAVE_NATIVE_mpn_umul_ppmm
                    214:   { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
                    215: #endif
                    216:
                    217:   { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
                    218:   { "udiv_qrnnd_preinv",      speed_udiv_qrnnd_preinv,      FLAG_R_OPTIONAL },
                    219:   { "udiv_qrnnd_preinv2norm", speed_udiv_qrnnd_preinv2norm, FLAG_R_OPTIONAL },
                    220: #if HAVE_NATIVE_mpn_udiv_qrnnd
                    221:   { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
                    222: #endif
                    223:
                    224: #ifdef SPEED_EXTRA_ROUTINES
                    225:   SPEED_EXTRA_ROUTINES
                    226: #endif
                    227: };
                    228:
                    229:
                    230: struct choice_t {
                    231:   const struct routine_t  *p;
                    232:   int                     r;
                    233:   double                  time;
                    234:   int                     no_time;
                    235:   double                  prev_time;
                    236:   const char              *name;
                    237: };
                    238: struct choice_t  *choice;
                    239: int  num_choices = 0;
                    240:
                    241:
                    242: void
                    243: data_fill (mp_ptr ptr, mp_size_t size)
                    244: {
                    245:   switch (option_data) {
                    246:   case DATA_RANDOM:
                    247:     mpn_random (ptr, size);
                    248:     break;
                    249:   case DATA_RANDOM2:
                    250:     mpn_random2 (ptr, size);
                    251:     break;
                    252:   case DATA_ZEROS:
                    253:     MPN_ZERO (ptr, size);
                    254:     break;
                    255:   case DATA_FFS:
                    256:     MPN_FILL (ptr, size, MP_LIMB_T_MAX);
                    257:     break;
                    258:   case DATA_2FD:
                    259:     MPN_FILL (ptr, size, MP_LIMB_T_MAX);
                    260:     ptr[0] -= 2;
                    261:     break;
                    262:   default:
                    263:     abort();
                    264:     /*NOTREACHED*/
                    265:   }
                    266: }
                    267:
                    268: /* The code here handling the various combinations of output options isn't
                    269:    too attractive, but it works and is fairly clean.  */
                    270:
                    271: #define SIZE_TO_DIVISOR(n)              \
                    272:   (option_square == 1 ? (n)*(n)         \
                    273:   : option_square == 2 ? (n)*((n)+1)/2  \
                    274:   : (n))
                    275:
                    276: void
                    277: run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
                    278: {
                    279:   const char  *first_open_fastest, *first_open_notfastest, *first_close;
                    280:   int         i, fastest;
                    281:   double      fastest_time;
                    282:   TMP_DECL (marker);
                    283:
                    284:   TMP_MARK (marker);
                    285:   sp.xp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_xp);
                    286:   sp.yp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_yp);
                    287:
                    288:   data_fill (s->xp, s->size);
                    289:   data_fill (s->yp, s->size);
                    290:
                    291:   if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
                    292:     {
                    293:       first_open_fastest = "(#";
                    294:       first_open_notfastest = " (";
                    295:       first_close = ")";
                    296:     }
                    297:   else
                    298:     {
                    299:       first_open_fastest = "#";
                    300:       first_open_notfastest = " ";
                    301:       first_close = "";
                    302:     }
                    303:
                    304:   fastest = -1;
                    305:   fastest_time = -1.0;
                    306:   for (i = 0; i < num_choices; i++)
                    307:     {
                    308:       s->r = choice[i].r;
                    309:       choice[i].time = speed_measure (choice[i].p->fun, s);
                    310:       choice[i].no_time = (choice[i].time == -1.0);
                    311:
                    312:
                    313:       /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
                    314:          is before any differences.  */
                    315:       {
                    316:         double     t;
                    317:         t = choice[i].time;
                    318:         if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
                    319:           {
                    320:             if (choice[i].prev_time == -1.0)
                    321:               choice[i].no_time = 1;
                    322:             else
                    323:               choice[i].time = choice[i].time - choice[i].prev_time;
                    324:           }
                    325:         choice[i].prev_time = t;
                    326:       }
                    327:
                    328:       if (choice[i].no_time)
                    329:         continue;
                    330:
                    331:       /* Look for the fastest after CMP_DIFFPREV has been applied, but
                    332:          before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
                    333:          if there's more than one routine.  */
                    334:       if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
                    335:         {
                    336:           fastest = i;
                    337:           fastest_time = choice[i].time;
                    338:         }
                    339:
                    340:       if (option_cmp == CMP_DIFFPREV)
                    341:         {
                    342:           /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
                    343:           if (option_unit == UNIT_CYCLES)
                    344:             choice[i].time /= speed_cycletime;
                    345:           else if (option_unit == UNIT_CYCLESPERLIMB)
                    346:             {
                    347:               if (prev_size == -1)
                    348:                 choice[i].time /= speed_cycletime;
                    349:               else
                    350:                 choice[i].time /=  (speed_cycletime
                    351:                                     * (SIZE_TO_DIVISOR(s->size)
                    352:                                        - SIZE_TO_DIVISOR(prev_size)));
                    353:             }
                    354:         }
                    355:       else
                    356:         {
                    357:           if (option_unit == UNIT_CYCLES)
                    358:             choice[i].time /= speed_cycletime;
                    359:           else if (option_unit == UNIT_CYCLESPERLIMB)
                    360:             choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
                    361:
                    362:           if (option_cmp == CMP_RATIO && i > 0)
                    363:             {
                    364:               /* A ratio isn't affected by the units chosen. */
                    365:               if (choice[0].no_time || choice[0].time == 0.0)
                    366:                 choice[i].no_time = 1;
                    367:               else
                    368:                 choice[i].time /= choice[0].time;
                    369:             }
                    370:           else if (option_cmp == CMP_DIFFERENCE && i > 0)
                    371:             {
                    372:               if (choice[0].no_time)
                    373:                 {
                    374:                   choice[i].no_time = 1;
                    375:                   continue;
                    376:                 }
                    377:               choice[i].time -= choice[0].time;
                    378:             }
                    379:         }
                    380:     }
                    381:
                    382:   if (option_gnuplot)
                    383:     {
                    384:       /* In CMP_DIFFPREV, don't print anything for the first size, start
                    385:          with the second where an actual difference is available.
                    386:
                    387:          In CMP_RATIO, print the first column as 1.0.
                    388:
                    389:          The 9 decimals printed is much more than the expected precision of
                    390:          the measurements actually. */
                    391:
                    392:       if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
                    393:         {
                    394:           fprintf (fp, "%-6ld ", s->size);
                    395:           for (i = 0; i < num_choices; i++)
                    396:             fprintf (fp, "  %.9e",
                    397:                      choice[i].no_time ? 0.0
                    398:                      : (option_cmp == CMP_RATIO && i == 0) ? 1.0
                    399:                      : choice[i].time);
                    400:           fprintf (fp, "\n");
                    401:         }
                    402:     }
                    403:   else
                    404:     {
                    405:       fprintf (fp, "%-6ld ", s->size);
                    406:       for (i = 0; i < num_choices; i++)
                    407:         {
                    408:           char  buf[128];
                    409:           int   decimals;
                    410:
                    411:           if (choice[i].no_time)
                    412:             decimals = 0, choice[i].time = 0.0;
                    413:           else if (option_unit == UNIT_CYCLESPERLIMB
                    414:                    || (option_cmp == CMP_RATIO && i > 0))
                    415:             decimals = 4;
                    416:           else if (option_unit == UNIT_CYCLES)
                    417:             decimals = 2;
                    418:           else
                    419:             decimals = 9;
                    420:
                    421:           sprintf (buf, "%s%.*f%s",
                    422:                    i == fastest ? first_open_fastest : first_open_notfastest,
                    423:                    decimals, choice[i].time, first_close);
                    424:           fprintf (fp, " %*s", COLUMN_WIDTH, buf);
                    425:         }
                    426:       fprintf (fp, "\n");
                    427:     }
                    428:
                    429:   TMP_FREE (marker);
                    430: }
                    431:
                    432: void
                    433: run_all (FILE *fp)
                    434: {
                    435:   mp_size_t  prev_size;
                    436:   int        i;
                    437:   TMP_DECL (marker);
                    438:
                    439:   TMP_MARK (marker);
                    440:   sp.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_xp);
                    441:   sp.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_yp);
                    442:
                    443:   data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
                    444:   data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
                    445:
                    446:   for (i = 0; i < size_num; i++)
                    447:     {
                    448:       sp.size = size_array[i].start;
                    449:       prev_size = -1;
                    450:       for (;;)
                    451:         {
                    452:           mp_size_t  step;
                    453:
                    454:           if (option_data == DATA_2FD && sp.size >= 2)
                    455:             sp.xp[sp.size-1] = 2;
                    456:
                    457:           run_one (fp, &sp, prev_size);
                    458:           prev_size = sp.size;
                    459:
                    460:           if (option_data == DATA_2FD && sp.size >= 2)
                    461:             sp.xp[sp.size-1] = MP_LIMB_T_MAX;
                    462:
                    463:           if (option_factor != 0.0)
                    464:             {
                    465:               step = (mp_size_t) (sp.size * option_factor - sp.size);
                    466:               if (step < 1)
                    467:                 step = 1;
                    468:             }
                    469:           else
                    470:             step = 1;
                    471:           if (step < option_step)
                    472:             step = option_step;
                    473:
                    474:           sp.size += step;
                    475:           if (sp.size > size_array[i].end)
                    476:             break;
                    477:         }
                    478:     }
                    479:
                    480:   TMP_FREE (marker);
                    481: }
                    482:
                    483:
                    484: FILE *
                    485: fopen_for_write (const char *filename)
                    486: {
                    487:   FILE  *fp;
                    488:   if ((fp = fopen (filename, "w")) == NULL)
                    489:     {
                    490:       fprintf (stderr, "Cannot create %s\n", filename);
                    491:       exit(1);
                    492:     }
                    493:   return fp;
                    494: }
                    495:
                    496: void
                    497: fclose_written (FILE *fp, const char *filename)
                    498: {
                    499:   int  err;
                    500:
                    501:   err = ferror (fp);
                    502:   err |= fclose (fp);
                    503:
                    504:   if (err)
                    505:     {
                    506:       fprintf (stderr, "Error writing %s\n", filename);
                    507:       exit(1);
                    508:     }
                    509: }
                    510:
                    511:
                    512: void
                    513: run_gnuplot (void)
                    514: {
                    515:   char  *plot_filename;
                    516:   char  *data_filename;
                    517:   FILE  *fp;
                    518:   int   i;
                    519:
                    520:   plot_filename = (char *) (*_mp_allocate_func)
                    521:     (strlen (option_gnuplot_basename) + 20);
                    522:   data_filename = (char *) (*_mp_allocate_func)
                    523:     (strlen (option_gnuplot_basename) + 20);
                    524:
                    525:   sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
                    526:   sprintf (data_filename, "%s.data",    option_gnuplot_basename);
                    527:
                    528:   fp = fopen_for_write (plot_filename);
                    529:
                    530:   /* Putting the key at the top left is usually good, and you can change it
                    531:      interactively if it's not. */
                    532:   fprintf (fp, "set key left\n");
                    533:
                    534:   /* designed to make it possible to see crossovers easily */
                    535:   fprintf (fp, "set data style linespoints\n");
                    536:
                    537:   fprintf (fp, "plot ");
                    538:   for (i = 0; i < num_choices; i++)
                    539:     {
                    540:       fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
                    541:       fprintf (fp, " title \"%s\"", choice[i].name);
                    542:
                    543:       if (i != num_choices-1)
                    544:         fprintf (fp, ", \\");
                    545:       fprintf (fp, "\n");
                    546:     }
                    547:
                    548:   fprintf (fp, "load \"-\"\n");
                    549:   fclose_written (fp, plot_filename);
                    550:
                    551:   fp = fopen_for_write (data_filename);
                    552:
                    553:   /* Unbuffered so you can see where the program was up to if it crashes or
                    554:      you kill it. */
                    555:   setbuf (fp, NULL);
                    556:
                    557:   run_all (fp);
                    558:   fclose_written (fp, data_filename);
                    559: }
                    560:
                    561:
                    562: /* Return a long with n many one bits (starting from the least significant) */
                    563: #define LONG_ONES(n) \
                    564:   ((n) == BITS_PER_LONGINT ? -1L : (n) == 0 ? 0L : (1L << (n)) - 1)
                    565:
                    566: long
                    567: r_string (const char *s)
                    568: {
                    569:   const char  *s_orig = s;
                    570:   long  n;
                    571:
                    572:   if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
                    573:     n = strtoul (s+2, (char **) &s, 16);
                    574:   else
                    575:     n = strtol (s, (char **) &s, 10);
                    576:
                    577:   if (strcmp (s, "bits") == 0)
                    578:     {
                    579:       mp_limb_t  l;
                    580:       if (n > BITS_PER_LONGINT)
                    581:         {
                    582:           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
                    583:                    n, BITS_PER_LONGINT);
                    584:           exit (1);
                    585:         }
                    586:       mpn_random (&l, 1);
                    587:       return (l | (1 << (n-1))) & LONG_ONES(n);
                    588:     }
                    589:   else  if (strcmp (s, "ones") == 0)
                    590:     {
                    591:       if (n > BITS_PER_LONGINT)
                    592:         {
                    593:           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
                    594:                    n, BITS_PER_LONGINT);
                    595:           exit (1);
                    596:         }
                    597:       return LONG_ONES (n);
                    598:     }
                    599:   else if (*s != '\0')
                    600:     {
                    601:       fprintf (stderr, "invalid r parameter: %s\n", s_orig);
                    602:       exit (1);
                    603:     }
                    604:
                    605:   return n;
                    606: }
                    607:
                    608:
                    609: void
                    610: routine_find (struct choice_t *c, const char *s)
                    611: {
                    612:   int     i;
                    613:   size_t  nlen;
                    614:
                    615:   for (i = 0; i < numberof (routine); i++)
                    616:     {
                    617:       nlen = strlen (routine[i].name);
                    618:       if (memcmp (s, routine[i].name, nlen) != 0)
                    619:         continue;
                    620:
                    621:       if (s[nlen] == '.')
                    622:         {
                    623:           /* match, with a .r parameter */
                    624:
                    625:           if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
                    626:             {
                    627:               fprintf (stderr, "Choice %s bad: doesn't take a \".<r>\" paramater\n", s);
                    628:               exit (1);
                    629:             }
                    630:
                    631:           c->p = &routine[i];
                    632:           c->r = r_string (s + nlen + 1);
                    633:           c->name = s;
                    634:           return;
                    635:         }
                    636:
                    637:       if (s[nlen] == '\0')
                    638:         {
                    639:           /* match, with no parameter */
                    640:
                    641:           if (routine[i].flag & FLAG_R)
                    642:             {
                    643:               fprintf (stderr, "Choice %s bad: needs a \".<r>\" paramater\n", s);
                    644:               exit (1);
                    645:             }
                    646:
                    647:           c->p = &routine[i];
                    648:           c->r = 0;
                    649:           c->name = s;
                    650:           return;
                    651:         }
                    652:     }
                    653:
                    654:   fprintf (stderr, "Choice %s unrecognised\n", s);
                    655:   exit (1);
                    656: }
                    657:
                    658:
                    659: void
                    660: usage (void)
                    661: {
                    662:   int  i;
                    663:
                    664:   printf ("\
                    665: Usage: speed [-options] -s size <routine>...\n\
                    666: Measure the speed of some routines.\n\
                    667: Times are in seconds, accuracy is shown.\n\
                    668: \n\
                    669:    -p num     set precision as number of time units each routine must run\n\
                    670:    -s size[-end][,size[-end]]...   sizes to measure\n\
                    671:               single sizes or ranges, sep with comma or use multiple -s\n\
                    672:    -t step    step through sizes by given amount\n\
                    673:    -f factor  step through sizes by given factor (eg. 1.05)\n\
                    674:    -r         show times as ratios of the first routine\n\
                    675:    -d         show times as difference from the first routine\n\
                    676:    -D         show times as difference from previous size shown\n\
                    677:    -c         show times in CPU cycles\n\
                    678:    -C         show times in cycles per limb\n\
                    679:    -u         print resource usage (memory) at end\n\
                    680:    -P name    output plot files \"name.gnuplot\" and \"name.data\"\n\
                    681:    -a <type>  use given data: random(default), random2, zeros, ffs\n\
                    682:    -x, -y, -w, -W <align>  specify data alignments, sources and dests\n\
                    683:    -o addrs   print addresses of data blocks\n\
                    684: \n\
                    685: If both -t and -f are used, it means step by the factor or the step, whichever\n\
                    686: is greater.\n\
                    687: If both -C and -D are used, it means cycles per however many limbs between a\n\
                    688: size and the previous size.\n\
                    689: \n\
                    690: After running with -P, plots can be viewed with Gnuplot or Quickplot.\n\
                    691: \"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n\
                    692: a log/log plot).\n\
                    693: \"quickplot -s name.data\" (has interactive zooming, and note -s is important\n\
                    694: when viewing more than one routine, it means same axis scales for all data).\n\
                    695: \n\
                    696: The available routines are as follows.\n\
                    697: \n\
                    698: ");
                    699:
                    700:   for (i = 0; i < numberof (routine); i++)
                    701:     {
                    702:       if (routine[i].flag & FLAG_R)
                    703:         printf ("\t%s.r\n", routine[i].name);
                    704:       else if (routine[i].flag & FLAG_R_OPTIONAL)
                    705:         printf ("\t%s (optional .r)\n", routine[i].name);
                    706:       else
                    707:         printf ("\t%s\n", routine[i].name);
                    708:     }
                    709:
                    710:   printf ("\n\
                    711: Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n\
                    712: r should be in decimal, or use 0xN for hexadecimal.\n\
                    713: Special forms for r are Nbits for a random N bit number, and Nones for N one\n\
                    714: bits.\n\
                    715: \n\
                    716: Times for sizes out of the range accepted by a routine are shown as 0.\n\
                    717: The fastest routine at each size is marked with a # (free form output only).\n\
                    718: \n\
                    719: %s\
                    720: \n\
                    721: Gnuplot home page http://www.cs.dartmouth.edu/gnuplot_info.html\n\
                    722: Quickplot home page http://www.kachinatech.com/~quickplot\n\
                    723: ", speed_time_string);
                    724: }
                    725:
                    726: int
                    727: main (int argc, char *argv[])
                    728: {
                    729: #define OPTION_ALIGN     1000
                    730: #if HAVE_GETOPT_LONG
                    731:   static const struct option  longopts[] = {
                    732:     { "align",           required_argument, NULL, OPTION_ALIGN    },
                    733:     { "align-x",         required_argument, NULL, 'x' },
                    734:     { "align-y",         required_argument, NULL, 'y' },
                    735:     { "align-w",         required_argument, NULL, 'w' },
                    736:     { "align-w2",        required_argument, NULL, 'W' },
                    737:     { "data",            required_argument, NULL, 'a' },
                    738:     { "cycles",          no_argument,       NULL, 'c' },
                    739:     { "cycles-per-limb", no_argument,       NULL, 'C' },
                    740:     { "diff",            no_argument,       NULL, 'd' },
                    741:     { "diff-prev",       no_argument,       NULL, 'D' },
                    742:     { "difference",      no_argument,       NULL, 'd' },
                    743:     { "difference-prev", no_argument,       NULL, 'D' },
                    744:     { "factor",          required_argument, NULL, 'f' },
                    745:     { "plot",            no_argument,       NULL, 'P' },
                    746:     { "precision",       required_argument, NULL, 'p' },
                    747:     { "ratio",           no_argument,       NULL, 'r' },
                    748:     { "randomize",       no_argument,       NULL, 'R' },
                    749:     { "sizes",           required_argument, NULL, 's' },
                    750:     { "step",            required_argument, NULL, 't' },
                    751:     { "resources",       required_argument, NULL, 'u' },
                    752:     { "uncached",        no_argument,       NULL, 'z' },
                    753:     { NULL }
                    754:   };
                    755: #endif
                    756:
                    757:   int  i;
                    758:   int  opt;
                    759:
                    760:   /* Unbuffered so output goes straight out when directed to a pipe or file
                    761:      and isn't lost if you kill the program half way.  */
                    762:   setbuf (stdout, NULL);
                    763:
                    764: #define OPTSTRING  "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"
                    765: #if HAVE_GETOPT_LONG
                    766:   while ((opt = getopt_long(argc, argv, OPTSTRING, longopts, NULL))
                    767:          != EOF)
                    768: #else
                    769:     while ((opt = getopt(argc, argv, OPTSTRING)) != EOF)
                    770: #endif
                    771:       {
                    772:         switch (opt) {
                    773:         case 'a':
                    774:           if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
                    775:           else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
                    776:           else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
                    777:           else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
                    778:           else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
                    779:           else
                    780:             {
                    781:               fprintf (stderr, "unrecognised data option: %s\n", optarg);
                    782:               exit (1);
                    783:             }
                    784:           break;
                    785:         case 'C':
                    786:           if (option_unit  != UNIT_SECONDS) goto bad_unit;
                    787:           option_unit = UNIT_CYCLESPERLIMB;
                    788:           break;
                    789:         case 'c':
                    790:           if (option_unit != UNIT_SECONDS)
                    791:             {
                    792:             bad_unit:
                    793:               fprintf (stderr, "cannot use more than one of -c, -C\n");
                    794:               exit (1);
                    795:             }
                    796:           option_unit = UNIT_CYCLES;
                    797:           break;
                    798:         case 'D':
                    799:           if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
                    800:           option_cmp = CMP_DIFFPREV;
                    801:           break;
                    802:         case 'd':
                    803:           if (option_cmp != CMP_ABSOLUTE)
                    804:             {
                    805:             bad_cmp:
                    806:               fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
                    807:               exit (1);
                    808:             }
                    809:           option_cmp = CMP_DIFFERENCE;
                    810:           break;
                    811:         case 'E':
                    812:           option_square = 1;
                    813:           break;
                    814:         case 'F':
                    815:           option_square = 2;
                    816:           break;
                    817:         case 'f':
                    818:           option_factor = atof (optarg);
                    819:           if (option_factor <= 1.0)
                    820:             {
                    821:               fprintf (stderr, "-f factor must be > 1.0\n");
                    822:               exit (1);
                    823:             }
                    824:           break;
                    825:         case 'o':
                    826:           speed_option_set (optarg);
                    827:           break;
                    828:         case 'P':
                    829:           option_gnuplot = 1;
                    830:           option_gnuplot_basename = optarg;
                    831:           break;
                    832:         case 'p':
                    833:           speed_precision = atoi (optarg);
                    834:           break;
                    835:         case 'R':
                    836:           option_seed = time (NULL);
                    837:           break;
                    838:         case 'r':
                    839:           if (option_cmp != CMP_ABSOLUTE)
                    840:             goto bad_cmp;
                    841:           option_cmp = CMP_RATIO;
                    842:           break;
                    843:         case 's':
                    844:           {
                    845:             char  *s;
                    846:             for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
                    847:               {
                    848:                 if (size_num == size_allocnum)
                    849:                   {
                    850:                     size_array = (struct size_array_t *)
                    851:                       _mp_allocate_or_reallocate
                    852:                       (size_array,
                    853:                        size_allocnum * sizeof(size_array[0]),
                    854:                        (size_allocnum+10) * sizeof(size_array[0]));
                    855:                     size_allocnum += 10;
                    856:                   }
                    857:                 if (sscanf (s, "%ld-%ld",
                    858:                             &size_array[size_num].start,
                    859:                             &size_array[size_num].end) != 2)
                    860:                   {
                    861:                     size_array[size_num].start = size_array[size_num].end
                    862:                       = atol (s);
                    863:                   }
                    864:
                    865:                 if (size_array[size_num].start < 1
                    866:                     || size_array[size_num].end < 1
                    867:                     || size_array[size_num].start > size_array[size_num].end)
                    868:                   {
                    869:                     fprintf (stderr, "invalid size parameter: %s\n", s);
                    870:                     exit (1);
                    871:                   }
                    872:
                    873:                 size_num++;
                    874:               }
                    875:           }
                    876:           break;
                    877:         case 't':
                    878:           option_step = atol (optarg);
                    879:           if (option_step < 1)
                    880:             {
                    881:               fprintf (stderr, "-t step must be >= 1\n");
                    882:               exit (1);
                    883:             }
                    884:           break;
                    885:         case 'u':
                    886:           option_resource_usage = 1;
                    887:           break;
                    888:         case 'z':
                    889:           sp.cache = 1;
                    890:           break;
                    891:         case OPTION_ALIGN:
                    892:           abort();
                    893:         case 'x':
                    894:           sp.align_xp = atol (optarg);
                    895:           break;
                    896:         case 'y':
                    897:           sp.align_yp = atol (optarg);
                    898:           break;
                    899:         case 'w':
                    900:           sp.align_wp = atol (optarg);
                    901:           break;
                    902:         case 'W':
                    903:           sp.align_wp2 = atol (optarg);
                    904:           break;
                    905:         case '?':
                    906:           exit(1);
                    907:         }
                    908:       }
                    909:
                    910:   if (optind >= argc)
                    911:     {
                    912:       usage ();
                    913:       exit (1);
                    914:     }
                    915:
                    916:   if (size_num == 0)
                    917:     {
                    918:       fprintf (stderr, "-s <size> must be specified\n");
                    919:       exit (1);
                    920:     }
                    921:
                    922:   srand (option_seed);
                    923:   srandom (option_seed);
                    924:   srand48 (option_seed);
                    925:
                    926:   choice = (struct choice_t *) (*_mp_allocate_func)
                    927:     ((argc - optind) * sizeof(choice[0]));
                    928:   for ( ; optind < argc; optind++)
                    929:     {
                    930:       struct choice_t  c;
                    931:       routine_find (&c, argv[optind]);
                    932:       choice[num_choices] = c;
                    933:       num_choices++;
                    934:     }
                    935:
                    936:   if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
                    937:       num_choices < 2)
                    938:     {
                    939:       fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
                    940:     }
                    941:
                    942:   speed_time_init ();
                    943:
                    944:   if ((option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
                    945:       && speed_cycletime == 1.0)
                    946:     {
                    947:       fprintf (stderr, "Times in cycles requested, but CPU frequency unknown.\n");
                    948:       fprintf (stderr, "Use environment variable GMP_CPU_FREQUENCY in Hertz, eg. 450e6\n");
                    949:       exit (1);
                    950:     }
                    951:
                    952:   if (option_gnuplot)
                    953:     {
                    954:       run_gnuplot ();
                    955:     }
                    956:   else
                    957:     {
                    958:       if (option_unit == UNIT_SECONDS)
                    959:         printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
                    960:       else
                    961:         printf ("overhead %.2f cycles",
                    962:                 speed_measure (speed_noop, NULL) / speed_cycletime);
                    963:       printf (", precision %d units of %.2e secs, cycle %.1e\n",
                    964:               speed_precision, speed_unittime, speed_cycletime);
                    965:
                    966:       printf ("       ");
                    967:       for (i = 0; i < num_choices; i++)
                    968:         printf (" %*s", COLUMN_WIDTH, choice[i].name);
                    969:       printf ("\n");
                    970:
                    971:       run_all (stdout);
                    972:     }
                    973:
                    974:   if (option_resource_usage)
                    975:     {
                    976: #if defined(linux)
                    977:       /* This is Linux kernel specific. */
                    978:       char  buf[128];
                    979:       sprintf (buf, "cat /proc/%d/status", getpid());
                    980:       system (buf);
                    981:
                    982: #else
                    983:       /* This doesn't give data sizes on Linux 2.0.36, only utime. */
                    984:       struct rusage  r;
                    985:       if (getrusage (RUSAGE_SELF, &r) != 0)
                    986:         perror ("getrusage");
                    987:       else
                    988:         printf ("utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
                    989:                 r.ru_utime.tv_sec, r.ru_utime.tv_usec,
                    990:                 r.ru_idrss, r.ru_isrss, r.ru_ixrss);
                    991: #endif
                    992:     }
                    993:
                    994:   return 0;
                    995: }
                    996:
                    997:

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>