[BACK]Return to speed.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tune

Annotation of OpenXM_contrib/gmp/tune/speed.c, Revision 1.1

1.1     ! maekawa     1: /* Speed measuring program. */
        !             2:
        !             3: /*
        !             4: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
        !             5:
        !             6: This file is part of the GNU MP Library.
        !             7:
        !             8: The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: it under the terms of the GNU Lesser General Public License as published by
        !            10: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            11: option) any later version.
        !            12:
        !            13: The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: License for more details.
        !            17:
        !            18: You should have received a copy of the GNU Lesser General Public License
        !            19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: MA 02111-1307, USA.
        !            22: */
        !            23:
        !            24: /* Usage message is in the code below, run with no arguments to print it.
        !            25:    See README for interesting applications.
        !            26:
        !            27:    To add a new routine foo(), create a speed_foo() function in the style of
        !            28:    the existing ones and add an entry in the routine[] array.  Put FLAG_R if
        !            29:    speed_foo() wants an "r" parameter.
        !            30:
        !            31:    The routines don't have help messages or descriptions, but most have
        !            32:    suggestive names.  See the source code for full details.
        !            33:
        !            34: */
        !            35:
        !            36: #include "config.h"
        !            37:
        !            38: #if HAVE_GETOPT_H
        !            39: #include <getopt.h>  /* for getopt_long() */
        !            40: #endif
        !            41: #include <limits.h>
        !            42: #include <stdio.h>
        !            43: #include <stdlib.h>
        !            44: #include <string.h>
        !            45: #include <unistd.h> /* for getpid() */
        !            46: #include <sys/time.h>  /* for struct timeval for sys/resource.h */
        !            47: #include <sys/resource.h>  /* for getrusage() */
        !            48:
        !            49: #include "gmp.h"
        !            50: #include "gmp-impl.h"
        !            51:
        !            52: #include "speed.h"
        !            53:
        !            54: #if !HAVE_DECL_OPTARG
        !            55: extern char *optarg;
        !            56: extern int optind, opterr;
        !            57: #endif
        !            58:
        !            59: #if !HAVE_STRTOUL
        !            60: #define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
        !            61: #endif
        !            62:
        !            63: #ifdef SPEED_EXTRA_PROTOS
        !            64: SPEED_EXTRA_PROTOS
        !            65: #endif
        !            66:
        !            67:
        !            68: #define MPN_FILL(ptr, size, n)                  \
        !            69:   do {                                          \
        !            70:     mp_size_t  i;                               \
        !            71:     for (i = 0; i < size; i++)                  \
        !            72:       ptr[i] = n;                               \
        !            73:   } while (0)
        !            74:
        !            75: #define CMP_ABSOLUTE     1
        !            76: #define CMP_RATIO        2
        !            77: #define CMP_DIFFERENCE   3
        !            78: #define CMP_DIFFPREV     4
        !            79: int  option_cmp = CMP_ABSOLUTE;
        !            80:
        !            81: #define UNIT_SECONDS        1
        !            82: #define UNIT_CYCLES         2
        !            83: #define UNIT_CYCLESPERLIMB  3
        !            84: int  option_unit = UNIT_SECONDS;
        !            85:
        !            86: #define DATA_RANDOM   1
        !            87: #define DATA_RANDOM2  2
        !            88: #define DATA_ZEROS    3
        !            89: #define DATA_FFS      4
        !            90: #define DATA_2FD      5
        !            91: int  option_data = DATA_RANDOM;
        !            92:
        !            93: int        option_square = 0;
        !            94: double     option_factor = 0.0;
        !            95: mp_size_t  option_step = 1;
        !            96: int        option_gnuplot = 0;
        !            97: char      *option_gnuplot_basename;
        !            98: struct size_array_t {
        !            99:   mp_size_t start, end;
        !           100: } *size_array = NULL;
        !           101: mp_size_t  size_num = 0;
        !           102: mp_size_t  size_allocnum = 0;
        !           103: int        option_resource_usage = 0;
        !           104: long       option_seed = 123456789;
        !           105:
        !           106: struct speed_params  sp;
        !           107:
        !           108: #define COLUMN_WIDTH  13  /* for the free-form output */
        !           109:
        !           110: #define FLAG_R            (1<<0)
        !           111: #define FLAG_R_OPTIONAL   (1<<1)
        !           112: #define FLAG_RSIZE        (1<<2)
        !           113:
        !           114: const struct routine_t {
        !           115:   /* constants */
        !           116:   const char        *name;
        !           117:   speed_function_t  fun;
        !           118:   int               flag;
        !           119:
        !           120: } routine[] = {
        !           121:
        !           122:   { "noop",              speed_noop                 },
        !           123:   { "noop_wxs",          speed_noop_wxs             },
        !           124:   { "noop_wxys",         speed_noop_wxys            },
        !           125:
        !           126:   { "mpn_add_n",         speed_mpn_add_n            },
        !           127:   { "mpn_sub_n",         speed_mpn_sub_n            },
        !           128:   { "mpn_add_n_self",    speed_mpn_add_n_self       },
        !           129:   { "mpn_add_n_inplace", speed_mpn_add_n_inplace    },
        !           130:
        !           131:   { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
        !           132:   { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
        !           133:   { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
        !           134:
        !           135:   { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
        !           136:   { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
        !           137: #if HAVE_NATIVE_mpn_divrem_1c
        !           138:   { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
        !           139:   { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
        !           140: #endif
        !           141:   { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
        !           142: #if HAVE_NATIVE_mpn_mod_1c
        !           143:   { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
        !           144: #endif
        !           145:
        !           146:   { "mpn_divrem_2",      speed_mpn_divrem_2,        },
        !           147:   { "mpn_divexact_by3",  speed_mpn_divexact_by3     },
        !           148:
        !           149:   { "mpn_bz_divrem_n",   speed_mpn_bz_divrem_n      },
        !           150:   { "mpn_bz_divrem_sb",  speed_mpn_bz_divrem_sb     },
        !           151:   { "mpn_bz_tdiv_qr",    speed_mpn_bz_tdiv_qr       },
        !           152:
        !           153:   { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
        !           154:   { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
        !           155:
        !           156:   { "mpn_and_n",         speed_mpn_and_n            },
        !           157:   { "mpn_andn_n",        speed_mpn_andn_n           },
        !           158:   { "mpn_nand_n",        speed_mpn_nand_n           },
        !           159:   { "mpn_ior_n",         speed_mpn_ior_n            },
        !           160:   { "mpn_iorn_n",        speed_mpn_iorn_n           },
        !           161:   { "mpn_nior_n",        speed_mpn_nior_n           },
        !           162:   { "mpn_xor_n",         speed_mpn_xor_n            },
        !           163:   { "mpn_xnor_n",        speed_mpn_xnor_n           },
        !           164:
        !           165:   { "mpn_popcount",      speed_mpn_popcount         },
        !           166:   { "mpn_hamdist",       speed_mpn_hamdist          },
        !           167:
        !           168:   { "mpn_gcdext",        speed_mpn_gcdext           },
        !           169:   { "mpn_gcd",           speed_mpn_gcd              },
        !           170:   { "mpn_gcd_1",         speed_mpn_gcd_1, FLAG_R_OPTIONAL },
        !           171:
        !           172:   { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
        !           173:
        !           174:   { "mpn_mul_basecase",  speed_mpn_mul_basecase, FLAG_R_OPTIONAL },
        !           175:   { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
        !           176:
        !           177:   { "mpn_mul_n",         speed_mpn_mul_n            },
        !           178:   { "mpn_sqr_n",         speed_mpn_sqr_n            },
        !           179:
        !           180:   { "mpn_kara_mul_n",    speed_mpn_kara_mul_n       },
        !           181:   { "mpn_kara_sqr_n",    speed_mpn_kara_sqr_n       },
        !           182:   { "mpn_toom3_mul_n",   speed_mpn_toom3_mul_n      },
        !           183:   { "mpn_toom3_sqr_n",   speed_mpn_toom3_sqr_n      },
        !           184:   { "mpn_mul_fft_full",     speed_mpn_mul_fft_full     },
        !           185:   { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
        !           186:
        !           187:   { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
        !           188:   { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
        !           189:
        !           190:   { "mpz_add",           speed_mpz_add              },
        !           191:   { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_R_OPTIONAL },
        !           192:   { "mpz_fac_ui",        speed_mpz_fac_ui           },
        !           193:   { "mpz_fib_ui",        speed_mpz_fib_ui           },
        !           194:   { "mpz_powm",          speed_mpz_powm             },
        !           195:
        !           196:   { "MPN_COPY",          speed_MPN_COPY             },
        !           197:   { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
        !           198:   { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
        !           199:   { "memcpy",            speed_memcpy               },
        !           200:
        !           201:   { "modlimb_invert",    speed_modlimb_invert       },
        !           202:
        !           203:   { "malloc_free",                 speed_malloc_free                 },
        !           204:   { "malloc_realloc_free",         speed_malloc_realloc_free         },
        !           205:   { "mp_allocate_free",            speed_mp_allocate_free            },
        !           206:   { "mp_allocate_reallocate_free", speed_mp_allocate_reallocate_free },
        !           207:   { "mpz_init_clear",              speed_mpz_init_clear              },
        !           208:   { "mpq_init_clear",              speed_mpq_init_clear              },
        !           209:   { "mpf_init_clear",              speed_mpf_init_clear              },
        !           210:   { "mpz_init_realloc_clear",      speed_mpz_init_realloc_clear      },
        !           211:
        !           212:   { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
        !           213: #if HAVE_NATIVE_mpn_umul_ppmm
        !           214:   { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
        !           215: #endif
        !           216:
        !           217:   { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
        !           218:   { "udiv_qrnnd_preinv",      speed_udiv_qrnnd_preinv,      FLAG_R_OPTIONAL },
        !           219:   { "udiv_qrnnd_preinv2norm", speed_udiv_qrnnd_preinv2norm, FLAG_R_OPTIONAL },
        !           220: #if HAVE_NATIVE_mpn_udiv_qrnnd
        !           221:   { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
        !           222: #endif
        !           223:
        !           224: #ifdef SPEED_EXTRA_ROUTINES
        !           225:   SPEED_EXTRA_ROUTINES
        !           226: #endif
        !           227: };
        !           228:
        !           229:
        !           230: struct choice_t {
        !           231:   const struct routine_t  *p;
        !           232:   int                     r;
        !           233:   double                  time;
        !           234:   int                     no_time;
        !           235:   double                  prev_time;
        !           236:   const char              *name;
        !           237: };
        !           238: struct choice_t  *choice;
        !           239: int  num_choices = 0;
        !           240:
        !           241:
        !           242: void
        !           243: data_fill (mp_ptr ptr, mp_size_t size)
        !           244: {
        !           245:   switch (option_data) {
        !           246:   case DATA_RANDOM:
        !           247:     mpn_random (ptr, size);
        !           248:     break;
        !           249:   case DATA_RANDOM2:
        !           250:     mpn_random2 (ptr, size);
        !           251:     break;
        !           252:   case DATA_ZEROS:
        !           253:     MPN_ZERO (ptr, size);
        !           254:     break;
        !           255:   case DATA_FFS:
        !           256:     MPN_FILL (ptr, size, MP_LIMB_T_MAX);
        !           257:     break;
        !           258:   case DATA_2FD:
        !           259:     MPN_FILL (ptr, size, MP_LIMB_T_MAX);
        !           260:     ptr[0] -= 2;
        !           261:     break;
        !           262:   default:
        !           263:     abort();
        !           264:     /*NOTREACHED*/
        !           265:   }
        !           266: }
        !           267:
        !           268: /* The code here handling the various combinations of output options isn't
        !           269:    too attractive, but it works and is fairly clean.  */
        !           270:
        !           271: #define SIZE_TO_DIVISOR(n)              \
        !           272:   (option_square == 1 ? (n)*(n)         \
        !           273:   : option_square == 2 ? (n)*((n)+1)/2  \
        !           274:   : (n))
        !           275:
        !           276: void
        !           277: run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
        !           278: {
        !           279:   const char  *first_open_fastest, *first_open_notfastest, *first_close;
        !           280:   int         i, fastest;
        !           281:   double      fastest_time;
        !           282:   TMP_DECL (marker);
        !           283:
        !           284:   TMP_MARK (marker);
        !           285:   sp.xp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_xp);
        !           286:   sp.yp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_yp);
        !           287:
        !           288:   data_fill (s->xp, s->size);
        !           289:   data_fill (s->yp, s->size);
        !           290:
        !           291:   if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
        !           292:     {
        !           293:       first_open_fastest = "(#";
        !           294:       first_open_notfastest = " (";
        !           295:       first_close = ")";
        !           296:     }
        !           297:   else
        !           298:     {
        !           299:       first_open_fastest = "#";
        !           300:       first_open_notfastest = " ";
        !           301:       first_close = "";
        !           302:     }
        !           303:
        !           304:   fastest = -1;
        !           305:   fastest_time = -1.0;
        !           306:   for (i = 0; i < num_choices; i++)
        !           307:     {
        !           308:       s->r = choice[i].r;
        !           309:       choice[i].time = speed_measure (choice[i].p->fun, s);
        !           310:       choice[i].no_time = (choice[i].time == -1.0);
        !           311:
        !           312:
        !           313:       /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
        !           314:          is before any differences.  */
        !           315:       {
        !           316:         double     t;
        !           317:         t = choice[i].time;
        !           318:         if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
        !           319:           {
        !           320:             if (choice[i].prev_time == -1.0)
        !           321:               choice[i].no_time = 1;
        !           322:             else
        !           323:               choice[i].time = choice[i].time - choice[i].prev_time;
        !           324:           }
        !           325:         choice[i].prev_time = t;
        !           326:       }
        !           327:
        !           328:       if (choice[i].no_time)
        !           329:         continue;
        !           330:
        !           331:       /* Look for the fastest after CMP_DIFFPREV has been applied, but
        !           332:          before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
        !           333:          if there's more than one routine.  */
        !           334:       if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
        !           335:         {
        !           336:           fastest = i;
        !           337:           fastest_time = choice[i].time;
        !           338:         }
        !           339:
        !           340:       if (option_cmp == CMP_DIFFPREV)
        !           341:         {
        !           342:           /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
        !           343:           if (option_unit == UNIT_CYCLES)
        !           344:             choice[i].time /= speed_cycletime;
        !           345:           else if (option_unit == UNIT_CYCLESPERLIMB)
        !           346:             {
        !           347:               if (prev_size == -1)
        !           348:                 choice[i].time /= speed_cycletime;
        !           349:               else
        !           350:                 choice[i].time /=  (speed_cycletime
        !           351:                                     * (SIZE_TO_DIVISOR(s->size)
        !           352:                                        - SIZE_TO_DIVISOR(prev_size)));
        !           353:             }
        !           354:         }
        !           355:       else
        !           356:         {
        !           357:           if (option_unit == UNIT_CYCLES)
        !           358:             choice[i].time /= speed_cycletime;
        !           359:           else if (option_unit == UNIT_CYCLESPERLIMB)
        !           360:             choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
        !           361:
        !           362:           if (option_cmp == CMP_RATIO && i > 0)
        !           363:             {
        !           364:               /* A ratio isn't affected by the units chosen. */
        !           365:               if (choice[0].no_time || choice[0].time == 0.0)
        !           366:                 choice[i].no_time = 1;
        !           367:               else
        !           368:                 choice[i].time /= choice[0].time;
        !           369:             }
        !           370:           else if (option_cmp == CMP_DIFFERENCE && i > 0)
        !           371:             {
        !           372:               if (choice[0].no_time)
        !           373:                 {
        !           374:                   choice[i].no_time = 1;
        !           375:                   continue;
        !           376:                 }
        !           377:               choice[i].time -= choice[0].time;
        !           378:             }
        !           379:         }
        !           380:     }
        !           381:
        !           382:   if (option_gnuplot)
        !           383:     {
        !           384:       /* In CMP_DIFFPREV, don't print anything for the first size, start
        !           385:          with the second where an actual difference is available.
        !           386:
        !           387:          In CMP_RATIO, print the first column as 1.0.
        !           388:
        !           389:          The 9 decimals printed is much more than the expected precision of
        !           390:          the measurements actually. */
        !           391:
        !           392:       if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
        !           393:         {
        !           394:           fprintf (fp, "%-6ld ", s->size);
        !           395:           for (i = 0; i < num_choices; i++)
        !           396:             fprintf (fp, "  %.9e",
        !           397:                      choice[i].no_time ? 0.0
        !           398:                      : (option_cmp == CMP_RATIO && i == 0) ? 1.0
        !           399:                      : choice[i].time);
        !           400:           fprintf (fp, "\n");
        !           401:         }
        !           402:     }
        !           403:   else
        !           404:     {
        !           405:       fprintf (fp, "%-6ld ", s->size);
        !           406:       for (i = 0; i < num_choices; i++)
        !           407:         {
        !           408:           char  buf[128];
        !           409:           int   decimals;
        !           410:
        !           411:           if (choice[i].no_time)
        !           412:             decimals = 0, choice[i].time = 0.0;
        !           413:           else if (option_unit == UNIT_CYCLESPERLIMB
        !           414:                    || (option_cmp == CMP_RATIO && i > 0))
        !           415:             decimals = 4;
        !           416:           else if (option_unit == UNIT_CYCLES)
        !           417:             decimals = 2;
        !           418:           else
        !           419:             decimals = 9;
        !           420:
        !           421:           sprintf (buf, "%s%.*f%s",
        !           422:                    i == fastest ? first_open_fastest : first_open_notfastest,
        !           423:                    decimals, choice[i].time, first_close);
        !           424:           fprintf (fp, " %*s", COLUMN_WIDTH, buf);
        !           425:         }
        !           426:       fprintf (fp, "\n");
        !           427:     }
        !           428:
        !           429:   TMP_FREE (marker);
        !           430: }
        !           431:
        !           432: void
        !           433: run_all (FILE *fp)
        !           434: {
        !           435:   mp_size_t  prev_size;
        !           436:   int        i;
        !           437:   TMP_DECL (marker);
        !           438:
        !           439:   TMP_MARK (marker);
        !           440:   sp.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_xp);
        !           441:   sp.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, sp.align_yp);
        !           442:
        !           443:   data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
        !           444:   data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
        !           445:
        !           446:   for (i = 0; i < size_num; i++)
        !           447:     {
        !           448:       sp.size = size_array[i].start;
        !           449:       prev_size = -1;
        !           450:       for (;;)
        !           451:         {
        !           452:           mp_size_t  step;
        !           453:
        !           454:           if (option_data == DATA_2FD && sp.size >= 2)
        !           455:             sp.xp[sp.size-1] = 2;
        !           456:
        !           457:           run_one (fp, &sp, prev_size);
        !           458:           prev_size = sp.size;
        !           459:
        !           460:           if (option_data == DATA_2FD && sp.size >= 2)
        !           461:             sp.xp[sp.size-1] = MP_LIMB_T_MAX;
        !           462:
        !           463:           if (option_factor != 0.0)
        !           464:             {
        !           465:               step = (mp_size_t) (sp.size * option_factor - sp.size);
        !           466:               if (step < 1)
        !           467:                 step = 1;
        !           468:             }
        !           469:           else
        !           470:             step = 1;
        !           471:           if (step < option_step)
        !           472:             step = option_step;
        !           473:
        !           474:           sp.size += step;
        !           475:           if (sp.size > size_array[i].end)
        !           476:             break;
        !           477:         }
        !           478:     }
        !           479:
        !           480:   TMP_FREE (marker);
        !           481: }
        !           482:
        !           483:
        !           484: FILE *
        !           485: fopen_for_write (const char *filename)
        !           486: {
        !           487:   FILE  *fp;
        !           488:   if ((fp = fopen (filename, "w")) == NULL)
        !           489:     {
        !           490:       fprintf (stderr, "Cannot create %s\n", filename);
        !           491:       exit(1);
        !           492:     }
        !           493:   return fp;
        !           494: }
        !           495:
        !           496: void
        !           497: fclose_written (FILE *fp, const char *filename)
        !           498: {
        !           499:   int  err;
        !           500:
        !           501:   err = ferror (fp);
        !           502:   err |= fclose (fp);
        !           503:
        !           504:   if (err)
        !           505:     {
        !           506:       fprintf (stderr, "Error writing %s\n", filename);
        !           507:       exit(1);
        !           508:     }
        !           509: }
        !           510:
        !           511:
        !           512: void
        !           513: run_gnuplot (void)
        !           514: {
        !           515:   char  *plot_filename;
        !           516:   char  *data_filename;
        !           517:   FILE  *fp;
        !           518:   int   i;
        !           519:
        !           520:   plot_filename = (char *) (*_mp_allocate_func)
        !           521:     (strlen (option_gnuplot_basename) + 20);
        !           522:   data_filename = (char *) (*_mp_allocate_func)
        !           523:     (strlen (option_gnuplot_basename) + 20);
        !           524:
        !           525:   sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
        !           526:   sprintf (data_filename, "%s.data",    option_gnuplot_basename);
        !           527:
        !           528:   fp = fopen_for_write (plot_filename);
        !           529:
        !           530:   /* Putting the key at the top left is usually good, and you can change it
        !           531:      interactively if it's not. */
        !           532:   fprintf (fp, "set key left\n");
        !           533:
        !           534:   /* designed to make it possible to see crossovers easily */
        !           535:   fprintf (fp, "set data style linespoints\n");
        !           536:
        !           537:   fprintf (fp, "plot ");
        !           538:   for (i = 0; i < num_choices; i++)
        !           539:     {
        !           540:       fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
        !           541:       fprintf (fp, " title \"%s\"", choice[i].name);
        !           542:
        !           543:       if (i != num_choices-1)
        !           544:         fprintf (fp, ", \\");
        !           545:       fprintf (fp, "\n");
        !           546:     }
        !           547:
        !           548:   fprintf (fp, "load \"-\"\n");
        !           549:   fclose_written (fp, plot_filename);
        !           550:
        !           551:   fp = fopen_for_write (data_filename);
        !           552:
        !           553:   /* Unbuffered so you can see where the program was up to if it crashes or
        !           554:      you kill it. */
        !           555:   setbuf (fp, NULL);
        !           556:
        !           557:   run_all (fp);
        !           558:   fclose_written (fp, data_filename);
        !           559: }
        !           560:
        !           561:
        !           562: /* Return a long with n many one bits (starting from the least significant) */
        !           563: #define LONG_ONES(n) \
        !           564:   ((n) == BITS_PER_LONGINT ? -1L : (n) == 0 ? 0L : (1L << (n)) - 1)
        !           565:
        !           566: long
        !           567: r_string (const char *s)
        !           568: {
        !           569:   const char  *s_orig = s;
        !           570:   long  n;
        !           571:
        !           572:   if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
        !           573:     n = strtoul (s+2, (char **) &s, 16);
        !           574:   else
        !           575:     n = strtol (s, (char **) &s, 10);
        !           576:
        !           577:   if (strcmp (s, "bits") == 0)
        !           578:     {
        !           579:       mp_limb_t  l;
        !           580:       if (n > BITS_PER_LONGINT)
        !           581:         {
        !           582:           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
        !           583:                    n, BITS_PER_LONGINT);
        !           584:           exit (1);
        !           585:         }
        !           586:       mpn_random (&l, 1);
        !           587:       return (l | (1 << (n-1))) & LONG_ONES(n);
        !           588:     }
        !           589:   else  if (strcmp (s, "ones") == 0)
        !           590:     {
        !           591:       if (n > BITS_PER_LONGINT)
        !           592:         {
        !           593:           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
        !           594:                    n, BITS_PER_LONGINT);
        !           595:           exit (1);
        !           596:         }
        !           597:       return LONG_ONES (n);
        !           598:     }
        !           599:   else if (*s != '\0')
        !           600:     {
        !           601:       fprintf (stderr, "invalid r parameter: %s\n", s_orig);
        !           602:       exit (1);
        !           603:     }
        !           604:
        !           605:   return n;
        !           606: }
        !           607:
        !           608:
        !           609: void
        !           610: routine_find (struct choice_t *c, const char *s)
        !           611: {
        !           612:   int     i;
        !           613:   size_t  nlen;
        !           614:
        !           615:   for (i = 0; i < numberof (routine); i++)
        !           616:     {
        !           617:       nlen = strlen (routine[i].name);
        !           618:       if (memcmp (s, routine[i].name, nlen) != 0)
        !           619:         continue;
        !           620:
        !           621:       if (s[nlen] == '.')
        !           622:         {
        !           623:           /* match, with a .r parameter */
        !           624:
        !           625:           if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
        !           626:             {
        !           627:               fprintf (stderr, "Choice %s bad: doesn't take a \".<r>\" paramater\n", s);
        !           628:               exit (1);
        !           629:             }
        !           630:
        !           631:           c->p = &routine[i];
        !           632:           c->r = r_string (s + nlen + 1);
        !           633:           c->name = s;
        !           634:           return;
        !           635:         }
        !           636:
        !           637:       if (s[nlen] == '\0')
        !           638:         {
        !           639:           /* match, with no parameter */
        !           640:
        !           641:           if (routine[i].flag & FLAG_R)
        !           642:             {
        !           643:               fprintf (stderr, "Choice %s bad: needs a \".<r>\" paramater\n", s);
        !           644:               exit (1);
        !           645:             }
        !           646:
        !           647:           c->p = &routine[i];
        !           648:           c->r = 0;
        !           649:           c->name = s;
        !           650:           return;
        !           651:         }
        !           652:     }
        !           653:
        !           654:   fprintf (stderr, "Choice %s unrecognised\n", s);
        !           655:   exit (1);
        !           656: }
        !           657:
        !           658:
        !           659: void
        !           660: usage (void)
        !           661: {
        !           662:   int  i;
        !           663:
        !           664:   printf ("\
        !           665: Usage: speed [-options] -s size <routine>...\n\
        !           666: Measure the speed of some routines.\n\
        !           667: Times are in seconds, accuracy is shown.\n\
        !           668: \n\
        !           669:    -p num     set precision as number of time units each routine must run\n\
        !           670:    -s size[-end][,size[-end]]...   sizes to measure\n\
        !           671:               single sizes or ranges, sep with comma or use multiple -s\n\
        !           672:    -t step    step through sizes by given amount\n\
        !           673:    -f factor  step through sizes by given factor (eg. 1.05)\n\
        !           674:    -r         show times as ratios of the first routine\n\
        !           675:    -d         show times as difference from the first routine\n\
        !           676:    -D         show times as difference from previous size shown\n\
        !           677:    -c         show times in CPU cycles\n\
        !           678:    -C         show times in cycles per limb\n\
        !           679:    -u         print resource usage (memory) at end\n\
        !           680:    -P name    output plot files \"name.gnuplot\" and \"name.data\"\n\
        !           681:    -a <type>  use given data: random(default), random2, zeros, ffs\n\
        !           682:    -x, -y, -w, -W <align>  specify data alignments, sources and dests\n\
        !           683:    -o addrs   print addresses of data blocks\n\
        !           684: \n\
        !           685: If both -t and -f are used, it means step by the factor or the step, whichever\n\
        !           686: is greater.\n\
        !           687: If both -C and -D are used, it means cycles per however many limbs between a\n\
        !           688: size and the previous size.\n\
        !           689: \n\
        !           690: After running with -P, plots can be viewed with Gnuplot or Quickplot.\n\
        !           691: \"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n\
        !           692: a log/log plot).\n\
        !           693: \"quickplot -s name.data\" (has interactive zooming, and note -s is important\n\
        !           694: when viewing more than one routine, it means same axis scales for all data).\n\
        !           695: \n\
        !           696: The available routines are as follows.\n\
        !           697: \n\
        !           698: ");
        !           699:
        !           700:   for (i = 0; i < numberof (routine); i++)
        !           701:     {
        !           702:       if (routine[i].flag & FLAG_R)
        !           703:         printf ("\t%s.r\n", routine[i].name);
        !           704:       else if (routine[i].flag & FLAG_R_OPTIONAL)
        !           705:         printf ("\t%s (optional .r)\n", routine[i].name);
        !           706:       else
        !           707:         printf ("\t%s\n", routine[i].name);
        !           708:     }
        !           709:
        !           710:   printf ("\n\
        !           711: Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n\
        !           712: r should be in decimal, or use 0xN for hexadecimal.\n\
        !           713: Special forms for r are Nbits for a random N bit number, and Nones for N one\n\
        !           714: bits.\n\
        !           715: \n\
        !           716: Times for sizes out of the range accepted by a routine are shown as 0.\n\
        !           717: The fastest routine at each size is marked with a # (free form output only).\n\
        !           718: \n\
        !           719: %s\
        !           720: \n\
        !           721: Gnuplot home page http://www.cs.dartmouth.edu/gnuplot_info.html\n\
        !           722: Quickplot home page http://www.kachinatech.com/~quickplot\n\
        !           723: ", speed_time_string);
        !           724: }
        !           725:
        !           726: int
        !           727: main (int argc, char *argv[])
        !           728: {
        !           729: #define OPTION_ALIGN     1000
        !           730: #if HAVE_GETOPT_LONG
        !           731:   static const struct option  longopts[] = {
        !           732:     { "align",           required_argument, NULL, OPTION_ALIGN    },
        !           733:     { "align-x",         required_argument, NULL, 'x' },
        !           734:     { "align-y",         required_argument, NULL, 'y' },
        !           735:     { "align-w",         required_argument, NULL, 'w' },
        !           736:     { "align-w2",        required_argument, NULL, 'W' },
        !           737:     { "data",            required_argument, NULL, 'a' },
        !           738:     { "cycles",          no_argument,       NULL, 'c' },
        !           739:     { "cycles-per-limb", no_argument,       NULL, 'C' },
        !           740:     { "diff",            no_argument,       NULL, 'd' },
        !           741:     { "diff-prev",       no_argument,       NULL, 'D' },
        !           742:     { "difference",      no_argument,       NULL, 'd' },
        !           743:     { "difference-prev", no_argument,       NULL, 'D' },
        !           744:     { "factor",          required_argument, NULL, 'f' },
        !           745:     { "plot",            no_argument,       NULL, 'P' },
        !           746:     { "precision",       required_argument, NULL, 'p' },
        !           747:     { "ratio",           no_argument,       NULL, 'r' },
        !           748:     { "randomize",       no_argument,       NULL, 'R' },
        !           749:     { "sizes",           required_argument, NULL, 's' },
        !           750:     { "step",            required_argument, NULL, 't' },
        !           751:     { "resources",       required_argument, NULL, 'u' },
        !           752:     { "uncached",        no_argument,       NULL, 'z' },
        !           753:     { NULL }
        !           754:   };
        !           755: #endif
        !           756:
        !           757:   int  i;
        !           758:   int  opt;
        !           759:
        !           760:   /* Unbuffered so output goes straight out when directed to a pipe or file
        !           761:      and isn't lost if you kill the program half way.  */
        !           762:   setbuf (stdout, NULL);
        !           763:
        !           764: #define OPTSTRING  "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"
        !           765: #if HAVE_GETOPT_LONG
        !           766:   while ((opt = getopt_long(argc, argv, OPTSTRING, longopts, NULL))
        !           767:          != EOF)
        !           768: #else
        !           769:     while ((opt = getopt(argc, argv, OPTSTRING)) != EOF)
        !           770: #endif
        !           771:       {
        !           772:         switch (opt) {
        !           773:         case 'a':
        !           774:           if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
        !           775:           else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
        !           776:           else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
        !           777:           else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
        !           778:           else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
        !           779:           else
        !           780:             {
        !           781:               fprintf (stderr, "unrecognised data option: %s\n", optarg);
        !           782:               exit (1);
        !           783:             }
        !           784:           break;
        !           785:         case 'C':
        !           786:           if (option_unit  != UNIT_SECONDS) goto bad_unit;
        !           787:           option_unit = UNIT_CYCLESPERLIMB;
        !           788:           break;
        !           789:         case 'c':
        !           790:           if (option_unit != UNIT_SECONDS)
        !           791:             {
        !           792:             bad_unit:
        !           793:               fprintf (stderr, "cannot use more than one of -c, -C\n");
        !           794:               exit (1);
        !           795:             }
        !           796:           option_unit = UNIT_CYCLES;
        !           797:           break;
        !           798:         case 'D':
        !           799:           if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
        !           800:           option_cmp = CMP_DIFFPREV;
        !           801:           break;
        !           802:         case 'd':
        !           803:           if (option_cmp != CMP_ABSOLUTE)
        !           804:             {
        !           805:             bad_cmp:
        !           806:               fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
        !           807:               exit (1);
        !           808:             }
        !           809:           option_cmp = CMP_DIFFERENCE;
        !           810:           break;
        !           811:         case 'E':
        !           812:           option_square = 1;
        !           813:           break;
        !           814:         case 'F':
        !           815:           option_square = 2;
        !           816:           break;
        !           817:         case 'f':
        !           818:           option_factor = atof (optarg);
        !           819:           if (option_factor <= 1.0)
        !           820:             {
        !           821:               fprintf (stderr, "-f factor must be > 1.0\n");
        !           822:               exit (1);
        !           823:             }
        !           824:           break;
        !           825:         case 'o':
        !           826:           speed_option_set (optarg);
        !           827:           break;
        !           828:         case 'P':
        !           829:           option_gnuplot = 1;
        !           830:           option_gnuplot_basename = optarg;
        !           831:           break;
        !           832:         case 'p':
        !           833:           speed_precision = atoi (optarg);
        !           834:           break;
        !           835:         case 'R':
        !           836:           option_seed = time (NULL);
        !           837:           break;
        !           838:         case 'r':
        !           839:           if (option_cmp != CMP_ABSOLUTE)
        !           840:             goto bad_cmp;
        !           841:           option_cmp = CMP_RATIO;
        !           842:           break;
        !           843:         case 's':
        !           844:           {
        !           845:             char  *s;
        !           846:             for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
        !           847:               {
        !           848:                 if (size_num == size_allocnum)
        !           849:                   {
        !           850:                     size_array = (struct size_array_t *)
        !           851:                       _mp_allocate_or_reallocate
        !           852:                       (size_array,
        !           853:                        size_allocnum * sizeof(size_array[0]),
        !           854:                        (size_allocnum+10) * sizeof(size_array[0]));
        !           855:                     size_allocnum += 10;
        !           856:                   }
        !           857:                 if (sscanf (s, "%ld-%ld",
        !           858:                             &size_array[size_num].start,
        !           859:                             &size_array[size_num].end) != 2)
        !           860:                   {
        !           861:                     size_array[size_num].start = size_array[size_num].end
        !           862:                       = atol (s);
        !           863:                   }
        !           864:
        !           865:                 if (size_array[size_num].start < 1
        !           866:                     || size_array[size_num].end < 1
        !           867:                     || size_array[size_num].start > size_array[size_num].end)
        !           868:                   {
        !           869:                     fprintf (stderr, "invalid size parameter: %s\n", s);
        !           870:                     exit (1);
        !           871:                   }
        !           872:
        !           873:                 size_num++;
        !           874:               }
        !           875:           }
        !           876:           break;
        !           877:         case 't':
        !           878:           option_step = atol (optarg);
        !           879:           if (option_step < 1)
        !           880:             {
        !           881:               fprintf (stderr, "-t step must be >= 1\n");
        !           882:               exit (1);
        !           883:             }
        !           884:           break;
        !           885:         case 'u':
        !           886:           option_resource_usage = 1;
        !           887:           break;
        !           888:         case 'z':
        !           889:           sp.cache = 1;
        !           890:           break;
        !           891:         case OPTION_ALIGN:
        !           892:           abort();
        !           893:         case 'x':
        !           894:           sp.align_xp = atol (optarg);
        !           895:           break;
        !           896:         case 'y':
        !           897:           sp.align_yp = atol (optarg);
        !           898:           break;
        !           899:         case 'w':
        !           900:           sp.align_wp = atol (optarg);
        !           901:           break;
        !           902:         case 'W':
        !           903:           sp.align_wp2 = atol (optarg);
        !           904:           break;
        !           905:         case '?':
        !           906:           exit(1);
        !           907:         }
        !           908:       }
        !           909:
        !           910:   if (optind >= argc)
        !           911:     {
        !           912:       usage ();
        !           913:       exit (1);
        !           914:     }
        !           915:
        !           916:   if (size_num == 0)
        !           917:     {
        !           918:       fprintf (stderr, "-s <size> must be specified\n");
        !           919:       exit (1);
        !           920:     }
        !           921:
        !           922:   srand (option_seed);
        !           923:   srandom (option_seed);
        !           924:   srand48 (option_seed);
        !           925:
        !           926:   choice = (struct choice_t *) (*_mp_allocate_func)
        !           927:     ((argc - optind) * sizeof(choice[0]));
        !           928:   for ( ; optind < argc; optind++)
        !           929:     {
        !           930:       struct choice_t  c;
        !           931:       routine_find (&c, argv[optind]);
        !           932:       choice[num_choices] = c;
        !           933:       num_choices++;
        !           934:     }
        !           935:
        !           936:   if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
        !           937:       num_choices < 2)
        !           938:     {
        !           939:       fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
        !           940:     }
        !           941:
        !           942:   speed_time_init ();
        !           943:
        !           944:   if ((option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
        !           945:       && speed_cycletime == 1.0)
        !           946:     {
        !           947:       fprintf (stderr, "Times in cycles requested, but CPU frequency unknown.\n");
        !           948:       fprintf (stderr, "Use environment variable GMP_CPU_FREQUENCY in Hertz, eg. 450e6\n");
        !           949:       exit (1);
        !           950:     }
        !           951:
        !           952:   if (option_gnuplot)
        !           953:     {
        !           954:       run_gnuplot ();
        !           955:     }
        !           956:   else
        !           957:     {
        !           958:       if (option_unit == UNIT_SECONDS)
        !           959:         printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
        !           960:       else
        !           961:         printf ("overhead %.2f cycles",
        !           962:                 speed_measure (speed_noop, NULL) / speed_cycletime);
        !           963:       printf (", precision %d units of %.2e secs, cycle %.1e\n",
        !           964:               speed_precision, speed_unittime, speed_cycletime);
        !           965:
        !           966:       printf ("       ");
        !           967:       for (i = 0; i < num_choices; i++)
        !           968:         printf (" %*s", COLUMN_WIDTH, choice[i].name);
        !           969:       printf ("\n");
        !           970:
        !           971:       run_all (stdout);
        !           972:     }
        !           973:
        !           974:   if (option_resource_usage)
        !           975:     {
        !           976: #if defined(linux)
        !           977:       /* This is Linux kernel specific. */
        !           978:       char  buf[128];
        !           979:       sprintf (buf, "cat /proc/%d/status", getpid());
        !           980:       system (buf);
        !           981:
        !           982: #else
        !           983:       /* This doesn't give data sizes on Linux 2.0.36, only utime. */
        !           984:       struct rusage  r;
        !           985:       if (getrusage (RUSAGE_SELF, &r) != 0)
        !           986:         perror ("getrusage");
        !           987:       else
        !           988:         printf ("utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
        !           989:                 r.ru_utime.tv_sec, r.ru_utime.tv_usec,
        !           990:                 r.ru_idrss, r.ru_isrss, r.ru_ixrss);
        !           991: #endif
        !           992:     }
        !           993:
        !           994:   return 0;
        !           995: }
        !           996:
        !           997:

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>