[BACK]Return to tuneup.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tune

Annotation of OpenXM_contrib/gmp/tune/tuneup.c, Revision 1.1.1.1

1.1       maekawa     1: /* Create tuned thresholds for various algorithms. */
                      2:
                      3: /*
                      4: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
                      5:
                      6: This file is part of the GNU MP Library.
                      7:
                      8: The GNU MP Library is free software; you can redistribute it and/or modify
                      9: it under the terms of the GNU Lesser General Public License as published by
                     10: the Free Software Foundation; either version 2.1 of the License, or (at your
                     11: option) any later version.
                     12:
                     13: The GNU MP Library is distributed in the hope that it will be useful, but
                     14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     16: License for more details.
                     17:
                     18: You should have received a copy of the GNU Lesser General Public License
                     19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: MA 02111-1307, USA.
                     22: */
                     23:
                     24: /* Usage: tune [-t] [-t] [-p precision]
                     25:
                     26:    -t turns on some diagnostic traces, a second -t turns on more traces.
                     27:
                     28:    The thresholds are determined as follows.  A crossover may not be a
                     29:    single size but rather a range where it oscillates between method A or
                     30:    method B faster.  If the threshold is set making B used where A is faster
                     31:    (or vice versa) that's bad.  Badness is the percentage time lost and
                     32:    total badness is the sum of this over all sizes measured.  The threshold
                     33:    is set to minimize total badness.
                     34:
                     35:    Suppose, as sizes increase, method B becomes faster than method A.  The
                     36:    effect of the rule is that, as you look at increasing sizes, isolated
                     37:    points where B is faster are ignored, but when it's consistently faster,
                     38:    or faster on balance, then the threshold is set there.  The same result
                     39:    is obtained thinking in the other direction of A becoming faster at
                     40:    smaller sizes.
                     41:
                     42:    In practice the thresholds tend to be chosen to bring on the next
                     43:    algorithm fairly quickly.
                     44:
                     45:    This rule is attractive because it's got a basis in reason and is fairly
                     46:    easy to implement, but no work has been done to actually compare it in
                     47:    absolute terms to other possibilities.
                     48:
                     49:    Sometimes running the program twice produces slightly different results.
                     50:    This is probably because there's so little separating algorithms near
                     51:    their crossover, and on that basis it should make little or no difference
                     52:    to the final speed of the relevant routines, but nothing has been done to
                     53:    check that carefully.
                     54:
                     55:    Limitations:
                     56:
                     57:    The FFTs aren't subject to the same badness rule as the other thresholds,
                     58:    so each k is probably being brought on a touch early.  This isn't likely
                     59:    to make a difference, and the simpler probing means fewer tests.
                     60:
                     61: */
                     62:
                     63: #define TUNE_PROGRAM_BUILD  1
                     64:
                     65: #include <math.h>
                     66: #include <stdio.h>
                     67: #include <stdlib.h>
                     68: #include <time.h>
                     69: #include <unistd.h>
                     70:
                     71: #include "gmp.h"
                     72: #include "gmp-impl.h"
                     73:
                     74: #include "speed.h"
                     75: #include "sqr_basecase.h"
                     76:
                     77: #if !HAVE_DECL_OPTARG
                     78: extern char *optarg;
                     79: extern int optind, opterr;
                     80: #endif
                     81:
                     82:
                     83: #define MAX_SIZE        1000  /* limbs */
                     84: #define STEP_FACTOR     0.01  /* how much to step sizes by (rounded down) */
                     85: #define MAX_TABLE       2     /* threshold entries */
                     86:
                     87:
                     88: #if WANT_FFT
                     89: mp_size_t  option_fft_max_size = 50000;  /* limbs */
                     90: #else
                     91: mp_size_t  option_fft_max_size = 0;
                     92: #endif
                     93: int        option_trace = 0;
                     94: int        option_fft_trace = 0;
                     95: struct speed_params  s;
                     96:
                     97: struct dat_t {
                     98:   mp_size_t  size;
                     99:   double     d;
                    100: } *dat = NULL;
                    101: int  ndat = 0;
                    102: int  allocdat = 0;
                    103:
                    104:
                    105: /* Each "_threshold" array must be 1 bigger than the number of thresholds
                    106:    being tuned in a set, because one() stores an value in the entry above
                    107:    the one it's determining. */
                    108:
                    109: mp_size_t  mul_threshold[MAX_TABLE+1] = { MP_SIZE_T_MAX };
                    110: mp_size_t  fft_modf_mul_threshold = MP_SIZE_T_MAX;
                    111: mp_size_t  sqr_threshold[MAX_TABLE+1] = { MP_SIZE_T_MAX };
                    112: mp_size_t  fft_modf_sqr_threshold = MP_SIZE_T_MAX;
                    113: mp_size_t  bz_threshold[2] = { MP_SIZE_T_MAX };
                    114: mp_size_t  fib_threshold[2] = { MP_SIZE_T_MAX };
                    115: mp_size_t  powm_threshold[2] = { MP_SIZE_T_MAX };
                    116: mp_size_t  gcd_accel_threshold[2] = { MP_SIZE_T_MAX };
                    117: mp_size_t  gcdext_threshold[2] = { MP_SIZE_T_MAX };
                    118:
                    119:
                    120: #ifndef KARATSUBA_SQR_MAX
                    121: #define KARATSUBA_SQR_MAX  0 /* meaning no limit */
                    122: #endif
                    123:
                    124: struct param_t {
                    125:   const char  *name[MAX_TABLE];
                    126:   int         stop_since_change;
                    127:   mp_size_t   min_size;
                    128:   mp_size_t   max_size[MAX_TABLE];
                    129: };
                    130:
                    131:
                    132: /* Add an entry to the end of the dat[] array, reallocing to make it bigger
                    133:    if necessary.  */
                    134: void
                    135: add_dat (mp_size_t size, double d)
                    136: {
                    137: #define ALLOCDAT_STEP  500
                    138:
                    139:   ASSERT_ALWAYS (ndat <= allocdat);
                    140:
                    141:   if (ndat == allocdat)
                    142:     {
                    143:       dat = (struct dat_t *) _mp_allocate_or_reallocate
                    144:         (dat, allocdat * sizeof(dat[0]),
                    145:          (allocdat+ALLOCDAT_STEP) * sizeof(dat[0]));
                    146:       allocdat += ALLOCDAT_STEP;
                    147:     }
                    148:
                    149:   dat[ndat].size = size;
                    150:   dat[ndat].d = d;
                    151:   ndat++;
                    152: }
                    153:
                    154:
                    155: /* Return the threshold size based on the data accumulated. */
                    156: mp_size_t
                    157: analyze_dat (int i, int final)
                    158: {
                    159:   double  x, min_x;
                    160:   int     j, min_j;
                    161:
                    162:   /* If the threshold is set at dat[0].size, any positive values are bad. */
                    163:   x = 0.0;
                    164:   for (j = 0; j < ndat; j++)
                    165:     if (dat[j].d > 0.0)
                    166:       x += dat[j].d;
                    167:
                    168:   if (option_trace >= 2 && final)
                    169:     {
                    170:       printf ("\n");
                    171:       printf ("x is the sum of the badness from setting thresh at given size\n");
                    172:       printf ("  (minimum x is sought)\n");
                    173:       printf ("i=%d size=%ld  first x=%.4f\n", i, dat[j].size, x);
                    174:     }
                    175:
                    176:   min_x = x;
                    177:   min_j = 0;
                    178:
                    179:
                    180:   /* When stepping to the next dat[j].size, positive values are no longer
                    181:      bad (so subtracted), negative values become bad (so add the absolute
                    182:      value, meaning subtract). */
                    183:   for (j = 0; j < ndat; x -= dat[j].d, j++)
                    184:     {
                    185:       if (option_trace >= 2 && final)
                    186:         printf ("i=%d size=%ld  x=%.4f\n", i, dat[j].size, x);
                    187:
                    188:       if (x < min_x)
                    189:         {
                    190:           min_x = x;
                    191:           min_j = j;
                    192:         }
                    193:     }
                    194:
                    195:   return min_j;
                    196: }
                    197:
                    198:
                    199: double
                    200: tuneup_measure (speed_function_t fun, struct speed_params *s)
                    201: {
                    202:   static mp_ptr  xp, yp;
                    203:   double   t;
                    204:   TMP_DECL (marker);
                    205:
                    206:   TMP_MARK (marker);
                    207:   s->xp = SPEED_TMP_ALLOC_LIMBS (s->size, 0);
                    208:   s->yp = SPEED_TMP_ALLOC_LIMBS (s->size, 0);
                    209:
                    210:   mpn_random (s->xp, s->size);
                    211:   mpn_random (s->yp, s->size);
                    212:
                    213:   t = speed_measure (fun, s);
                    214:
                    215:   TMP_FREE (marker);
                    216:   return t;
                    217: }
                    218:
                    219:
                    220: void
                    221: print_define (const char *name, mp_size_t value)
                    222: {
                    223:   printf ("#ifndef %s\n", name);
                    224:   printf ("#define %-23s  ", name);
                    225:   if (value == MP_SIZE_T_MAX)
                    226:     printf ("MP_SIZE_T_MAX\n");
                    227:   else
                    228:     printf ("%5ld\n", value);
                    229:   printf ("#endif\n");
                    230: }
                    231:
                    232:
                    233: /* table[i+1] needs to be set to a sensible value when testing method i+1
                    234:    because mpn_mul_n uses TOOM3_MUL_THRESHOLD to size the temporary
                    235:    workspace for mpn_kara_mul_n. */
                    236:
                    237: void
                    238: one (speed_function_t function, mp_size_t table[], size_t max_table,
                    239:      struct param_t *param)
                    240: {
                    241:   static struct param_t  dummy;
                    242:   int  i;
                    243:
                    244:   if (param == NULL)  param = &dummy;
                    245:
                    246: #define DEFAULT(x,n)  if (param->x == 0)  param->x = (n);
                    247:
                    248:   DEFAULT (stop_since_change, 80);
                    249:   DEFAULT (min_size, 10);
                    250:   for (i = 0; i < numberof (param->max_size); i++)
                    251:     DEFAULT (max_size[i], MAX_SIZE);
                    252:
                    253:   s.size = param->min_size;
                    254:
                    255:   for (i = 0; i < max_table && s.size < MAX_SIZE; i++)
                    256:     {
                    257:       int  since_positive, since_thresh_change;
                    258:       int  thresh_idx, new_thresh_idx;
                    259:
                    260:       ndat = 0;
                    261:       since_positive = 0;
                    262:       since_thresh_change = 0;
                    263:       thresh_idx = 0;
                    264:
                    265:       if (option_trace >= 2)
                    266:         {
                    267:           printf ("             algorithm-A  algorithm-B   ratio  possible\n");
                    268:           printf ("              (seconds)    (seconds)    diff    thresh\n");
                    269:         }
                    270:
                    271:       for ( ; s.size < MAX_SIZE;
                    272:             s.size += MAX ((mp_size_t) floor (s.size * STEP_FACTOR), 1))
                    273:         {
                    274:           double   ti, tiplus1, d;
                    275:
                    276:           /* If there's a size limit and it's reached then it should still
                    277:              be sensible to analyze the data since we want the threshold put
                    278:              either at or near the limit.  */
                    279:           if (s.size >= param->max_size[i])
                    280:             {
                    281:               if (option_trace)
                    282:                 printf ("Reached maximum size (%ld) without otherwise stopping\n",
                    283:                         param->max_size[i]);
                    284:               break;
                    285:             }
                    286:
                    287:           /*
                    288:             FIXME: check minimum size requirements are met, possibly by just
                    289:             checking for the -1 returns from the speed functions.
                    290:             if (s.size < MPN_TOOM_TABLE_TO_MINSIZE (i))
                    291:             continue;
                    292:           */
                    293:
                    294:           /* using method i at this size */
                    295:           table[i] = s.size+1;
                    296:           table[i+1] = MAX_SIZE;
                    297:           ti = tuneup_measure (function, &s);
                    298:           if (ti == -1.0)
                    299:             abort ();
                    300:
                    301:           /* using method i+1 at this size */
                    302:           table[i] = s.size;
                    303:           table[i+1] = s.size+1;
                    304:           tiplus1 = tuneup_measure (function, &s);
                    305:           if (tiplus1 == -1.0)
                    306:             abort ();
                    307:
                    308:           /* Calculate the fraction by which the one or the other routine is
                    309:              slower.  */
                    310:           if (tiplus1 >= ti)
                    311:             d = (tiplus1 - ti) / tiplus1;  /* negative */
                    312:           else
                    313:             d = (tiplus1 - ti) / ti;       /* positive */
                    314:
                    315:           add_dat (s.size, d);
                    316:
                    317:           new_thresh_idx = analyze_dat (i, 0);
                    318:
                    319:
                    320:           if (option_trace >= 2)
                    321:             printf ("i=%d size=%ld  %.9f  %.9f  % .4f %c  %d\n",
                    322:                     i, s.size, ti, tiplus1, d,
                    323:                     ti > tiplus1 ? '#' : ' ',
                    324:                     dat[new_thresh_idx].size);
                    325:
                    326:           /* Stop if the last time method i was faster was more than a
                    327:              certain number of measurements ago.  */
                    328: #define STOP_SINCE_POSITIVE  200
                    329:           if (d >= 0)
                    330:             since_positive = 0;
                    331:           else
                    332:             if (++since_positive > STOP_SINCE_POSITIVE)
                    333:               {
                    334:                 if (option_trace >= 1)
                    335:                   printf ("i=%d stopped due to since_positive (%d)\n",
                    336:                           i, STOP_SINCE_POSITIVE);
                    337:                 break;
                    338:               }
                    339:
                    340:           /* Stop if method i has become slower by a certain factor. */
                    341: #define STOP_FACTOR   1.2
                    342:           if (ti >= tiplus1 * STOP_FACTOR)
                    343:             {
                    344:               if (option_trace >= 1)
                    345:                 printf ("i=%d stopped due to ti >= tiplus1 * factor (%.1f)\n",
                    346:                         i, STOP_FACTOR);
                    347:               break;
                    348:             }
                    349:
                    350:           /* Stop if the threshold implied hasn't changed in a certain
                    351:              number of measurements.  (It's this condition that ususally
                    352:              stops the loop.) */
                    353:           if (thresh_idx != new_thresh_idx)
                    354:             since_thresh_change = 0, thresh_idx = new_thresh_idx;
                    355:           else
                    356:             if (++since_thresh_change > param->stop_since_change)
                    357:               {
                    358:                 if (option_trace >= 1)
                    359:                   printf ("i=%d stopped due to since_thresh_change (%d)\n",
                    360:                           i, param->stop_since_change);
                    361:                 break;
                    362:               }
                    363:
                    364:           /* Stop if the threshold implied is more than a certain number of
                    365:              measurements ago.  */
                    366: #define STOP_SINCE_AFTER   500
                    367:           if (ndat - thresh_idx > STOP_SINCE_AFTER)
                    368:             {
                    369:               if (option_trace >= 1)
                    370:                 printf ("i=%d stopped due to ndat - thresh_idx > amount (%d)\n",
                    371:                         i, STOP_SINCE_AFTER);
                    372:               break;
                    373:             }
                    374:         }
                    375:
                    376:       /* Stop when the size limit is reached before the end of the
                    377:          crossover, without a specified param->max_size[i]. */
                    378:       if (s.size >= MAX_SIZE)
                    379:         {
                    380:           fprintf (stderr, "%s\n", param->name[i]);
                    381:           fprintf (stderr, "i=%d sizes %ld to %ld total %d measurements\n",
                    382:                    i, dat[0].size, dat[ndat-1].size, ndat);
                    383:           fprintf (stderr, "    max size reached before end of crossover\n");
                    384:           break;
                    385:         }
                    386:
                    387:       if (option_trace >= 1)
                    388:         printf ("i=%d sizes %ld to %ld total %d measurements\n",
                    389:                 i, dat[0].size, dat[ndat-1].size, ndat);
                    390:
                    391:       if (ndat == 0)
                    392:         break;
                    393:
                    394:       table[i] = dat[analyze_dat (i, 1)].size;
                    395:
                    396:       print_define (param->name[i], table[i]);
                    397:
                    398:       /* Look for the next threshold starting from the current one, but back
                    399:          a bit. */
                    400:       s.size = table[i]+1;
                    401:     }
                    402: }
                    403:
                    404:
                    405: /* Special probing for the fft thresholds.  The size restrictions on the
                    406:    FFTs mean the graph of time vs size has a step effect.  See this for
                    407:    example using
                    408:
                    409:        ./speed -s 4096-16384 -t 128 -P foo mpn_mul_fft.8 mpn_mul_fft.9
                    410:        gnuplot foo.gnuplot
                    411:
                    412:    The current approach is to compare routines at the midpoint of relevant
                    413:    steps.  Arguably a more sophisticated system of threshold data is wanted
                    414:    if this step effect remains. */
                    415:
                    416: struct fft_param_t {
                    417:   const char        *table_name;
                    418:   const char        *threshold_name;
                    419:   const char        *modf_threshold_name;
                    420:   mp_size_t         *p_threshold;
                    421:   mp_size_t         *p_modf_threshold;
                    422:   mp_size_t         first_size;
                    423:   mp_size_t         max_size;
                    424:   speed_function_t  function;
                    425:   speed_function_t  mul_function;
                    426:   mp_size_t         sqr;
                    427: };
                    428:
                    429: /* mpn_mul_fft requires pl a multiple of 2^k limbs, but with
                    430:    N=pl*BIT_PER_MP_LIMB it internally also pads out so N/2^k is a multiple
                    431:    of 2^(k-1) bits. */
                    432:
                    433: mp_size_t
                    434: fft_step_size (int k)
                    435: {
                    436:   if (2*k-1 > BITS_PER_INT)
                    437:     {
                    438:       printf ("Can't handle k=%d\n", k);
                    439:       abort ();
                    440:     }
                    441:   return (1<<k) * (MAX (1<<(k-1), BITS_PER_MP_LIMB)) / BITS_PER_MP_LIMB;
                    442: }
                    443:
                    444: mp_size_t
                    445: fft_next_size (mp_size_t pl, int k)
                    446: {
                    447:   mp_size_t  m = fft_step_size (k);
                    448:
                    449: /*    printf ("[k=%d %ld] %ld ->", k, m, pl); */
                    450:
                    451:   if (pl == 0 || (pl & (m-1)) != 0)
                    452:     pl = (pl | (m-1)) + 1;
                    453:
                    454: /*    printf (" %ld\n", pl); */
                    455:   return pl;
                    456: }
                    457:
                    458: void
                    459: fft (struct fft_param_t *p)
                    460: {
                    461:   mp_size_t  size;
                    462:   int        i, k;
                    463:
                    464:   for (i = 0; i < numberof (mpn_fft_table[p->sqr]); i++)
                    465:     mpn_fft_table[p->sqr][i] = MP_SIZE_T_MAX;
                    466:
                    467:   *p->p_threshold = MP_SIZE_T_MAX;
                    468:   *p->p_modf_threshold = MP_SIZE_T_MAX;
                    469:
                    470:   option_trace = MAX (option_trace, option_fft_trace);
                    471:
                    472:   printf ("#ifndef %s\n", p->table_name);
                    473:   printf ("#define %s  {", p->table_name);
                    474:   if (option_trace >= 2)
                    475:     printf ("\n");
                    476:
                    477:   k = FFT_FIRST_K;
                    478:   size = p->first_size;
                    479:   for (;;)
                    480:     {
                    481:       double  tk, tk1;
                    482:
                    483:       size = fft_next_size (size+1, k+1);
                    484:
                    485:       if (size >= p->max_size)
                    486:         break;
                    487:       if (k >= FFT_FIRST_K + numberof (mpn_fft_table[p->sqr]))
                    488:         break;
                    489:
                    490:       usleep(10000);
                    491:
                    492:       /* compare k to k+1 in the middle of the current k+1 step */
                    493:       s.size = size + fft_step_size (k+1) / 2;
                    494:       s.r = k;
                    495:       tk = tuneup_measure (p->function, &s);
                    496:       if (tk == -1.0)
                    497:         abort ();
                    498:
                    499:       usleep(10000);
                    500:
                    501:       s.r = k+1;
                    502:       tk1 = tuneup_measure (p->function, &s);
                    503:       if (tk1 == -1.0)
                    504:         abort ();
                    505:
                    506:       if (option_trace >= 2)
                    507:         printf ("at %ld   size=%ld  k=%d  %.9lf   k=%d %.9lf\n",
                    508:                 size, s.size, k, tk, k+1, tk1);
                    509:
                    510:       /* declare the k+1 threshold as soon as it's faster at its midpoint */
                    511:       if (tk1 < tk)
                    512:         {
                    513:           mpn_fft_table[p->sqr][k-FFT_FIRST_K] = s.size;
                    514:           printf (" %ld,", s.size);
                    515:           if (option_trace >= 2) printf ("\n");
                    516:           k++;
                    517:         }
                    518:     }
                    519:
                    520:   mpn_fft_table[p->sqr][k-FFT_FIRST_K] = 0;
                    521:   printf (" 0 }\n");
                    522:   printf ("#endif\n");
                    523:
                    524:
                    525:   size = p->first_size;
                    526:
                    527:   /* Declare an FFT faster than a plain toom3 etc multiplication found as
                    528:      soon as one faster measurement obtained.  A multiplication in the
                    529:      middle of the FFT step is tested.  */
                    530:   for (;;)
                    531:     {
                    532:       int     modf = (*p->p_modf_threshold == MP_SIZE_T_MAX);
                    533:       double  tk, tm;
                    534:
                    535:       /* k=7 should be the first FFT which can beat toom3 on a full
                    536:          multiply, so jump to that threshold and save some probing after the
                    537:          modf threshold is found.  */
                    538:       if (!modf && size < mpn_fft_table[p->sqr][2])
                    539:         {
                    540:           size = mpn_fft_table[p->sqr][2];
                    541:           if (option_trace >= 2)
                    542:             printf ("jump to size=%ld\n", size);
                    543:         }
                    544:
                    545:       size = fft_next_size (size+1, mpn_fft_best_k (size, p->sqr));
                    546:       k = mpn_fft_best_k (size, p->sqr);
                    547:
                    548:       if (size >= p->max_size)
                    549:         break;
                    550:
                    551:       usleep(10000);
                    552:
                    553:       s.size = size + fft_step_size (k) / 2;
                    554:       s.r = k;
                    555:       tk = tuneup_measure (p->function, &s);
                    556:       if (tk == -1.0)
                    557:         abort ();
                    558:
                    559:       usleep(10000);
                    560:
                    561:       if (!modf)  s.size /= 2;
                    562:       tm = tuneup_measure (p->mul_function, &s);
                    563:       if (tm == -1.0)
                    564:         abort ();
                    565:
                    566:       if (option_trace >= 2)
                    567:         printf ("at %ld   size=%ld   k=%d  %.9lf   size=%ld %s mul %.9lf\n",
                    568:                 size,
                    569:                 size + fft_step_size (k) / 2, k, tk,
                    570:                 s.size, modf ? "modf" : "full", tm);
                    571:
                    572:       if (tk < tm)
                    573:         {
                    574:           if (modf)
                    575:             {
                    576:               *p->p_modf_threshold = s.size;
                    577:               print_define (p->modf_threshold_name, *p->p_modf_threshold);
                    578:             }
                    579:           else
                    580:             {
                    581:               *p->p_threshold = s.size;
                    582:               print_define (p->threshold_name,      *p->p_threshold);
                    583:               break;
                    584:             }
                    585:         }
                    586:     }
                    587:
                    588: }
                    589:
                    590:
                    591: void
                    592: all (void)
                    593: {
                    594:   TMP_DECL (marker);
                    595:
                    596:   TMP_MARK (marker);
                    597:   s.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, 0);
                    598:   s.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, 0);
                    599:
                    600:   speed_time_init ();
                    601:   fprintf (stderr, "speed_precision %d, speed_unittime %.2e\n",
                    602:            speed_precision, speed_unittime);
                    603:   fprintf (stderr, "MAX_SIZE %ld, fft_max_size %ld, STEP_FACTOR %.3f\n",
                    604:            MAX_SIZE, option_fft_max_size, STEP_FACTOR);
                    605:   fprintf (stderr, "\n");
                    606:
                    607:   {
                    608:     struct tm  *tp;
                    609:     time_t     t;
                    610:     time (&t);
                    611:     tp = localtime (&t);
                    612:     printf ("/* Generated by tuneup.c, %d-%02d-%02d. */\n\n",
                    613:             tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
                    614:   }
                    615:
                    616:   {
                    617:     static struct param_t  param;
                    618:     param.name[0] = "KARATSUBA_MUL_THRESHOLD";
                    619:     param.name[1] = "TOOM3_MUL_THRESHOLD";
                    620:     param.max_size[1] = TOOM3_MUL_THRESHOLD_LIMIT;
                    621:     one (speed_mpn_mul_n, mul_threshold, numberof(mul_threshold)-1, &param);
                    622:   }
                    623:   printf("\n");
                    624:
                    625:   {
                    626:     static struct param_t  param;
                    627:     param.name[0] = "KARATSUBA_SQR_THRESHOLD";
                    628:     param.name[1] = "TOOM3_SQR_THRESHOLD";
                    629:     param.max_size[0] = KARATSUBA_SQR_MAX;
                    630:     one (speed_mpn_sqr_n, sqr_threshold, numberof(sqr_threshold)-1, &param);
                    631:   }
                    632:   printf("\n");
                    633:
                    634:   {
                    635:     static struct param_t  param;
                    636:     param.name[0] = "BZ_THRESHOLD";
                    637:     one (speed_mpn_bz_tdiv_qr, bz_threshold, 1, &param);
                    638:   }
                    639:   printf("\n");
                    640:
                    641:   {
                    642:     static struct param_t  param;
                    643:     param.name[0] = "FIB_THRESHOLD";
                    644:     one (speed_mpz_fib_ui, fib_threshold, 1, &param);
                    645:   }
                    646:   printf("\n");
                    647:
                    648:   /* mpz_powm becomes slow before long, so stop soon after the determined
                    649:      threshold stops changing. */
                    650:   {
                    651:     static struct param_t  param;
                    652:     param.name[0] = "POWM_THRESHOLD";
                    653:     param.stop_since_change = 15;
                    654:     one (speed_mpz_powm, powm_threshold, 1, &param);
                    655:   }
                    656:   printf("\n");
                    657:
                    658:   {
                    659:     static struct param_t  param;
                    660:     param.name[0] = "GCD_ACCEL_THRESHOLD";
                    661:     param.min_size = 1;
                    662:     one (speed_mpn_gcd, gcd_accel_threshold, 1, &param);
                    663:   }
                    664:   {
                    665:     static struct param_t  param;
                    666:     param.name[0] = "GCDEXT_THRESHOLD";
                    667:     param.min_size = 1;
                    668:     param.max_size[0] = 200;
                    669:     one (speed_mpn_gcdext, gcdext_threshold, 1, &param);
                    670:   }
                    671:   printf("\n");
                    672:
                    673:   if (option_fft_max_size != 0)
                    674:     {
                    675:       {
                    676:         static struct fft_param_t  param;
                    677:         param.table_name          = "FFT_MUL_TABLE";
                    678:         param.threshold_name      = "FFT_MUL_THRESHOLD";
                    679:         param.p_threshold         = &FFT_MUL_THRESHOLD;
                    680:         param.modf_threshold_name = "FFT_MODF_MUL_THRESHOLD";
                    681:         param.p_modf_threshold    = &FFT_MODF_MUL_THRESHOLD;
                    682:         param.first_size          = TOOM3_MUL_THRESHOLD / 2;
                    683:         param.max_size            = option_fft_max_size;
                    684:         param.function            = speed_mpn_mul_fft;
                    685:         param.mul_function        = speed_mpn_mul_n;
                    686:         param.sqr = 0;
                    687:         fft (&param);
                    688:       }
                    689:       printf("\n");
                    690:       {
                    691:         static struct fft_param_t  param;
                    692:         param.table_name          = "FFT_SQR_TABLE";
                    693:         param.threshold_name      = "FFT_SQR_THRESHOLD";
                    694:         param.p_threshold         = &FFT_SQR_THRESHOLD;
                    695:         param.modf_threshold_name = "FFT_MODF_SQR_THRESHOLD";
                    696:         param.p_modf_threshold    = &FFT_MODF_SQR_THRESHOLD;
                    697:         param.first_size          = TOOM3_SQR_THRESHOLD / 2;
                    698:         param.max_size            = option_fft_max_size;
                    699:         param.function            = speed_mpn_mul_fft_sqr;
                    700:         param.mul_function        = speed_mpn_sqr_n;
                    701:         param.sqr = 0;
                    702:         fft (&param);
                    703:       }
                    704:       printf ("\n");
                    705:     }
                    706:
                    707:   TMP_FREE (marker);
                    708: }
                    709:
                    710:
                    711: int
                    712: main (int argc, char *argv[])
                    713: {
                    714:   int  opt;
                    715:
                    716:   /* Unbuffered so if output is redirected to a file it isn't lost if the
                    717:      program is killed part way through.  */
                    718:   setbuf (stdout, NULL);
                    719:   setbuf (stderr, NULL);
                    720:
                    721:   while ((opt = getopt(argc, argv, "f:o:p:t")) != EOF)
                    722:     {
                    723:       switch (opt) {
                    724:       case 'f':
                    725:         if (optarg[0] == 't')
                    726:           option_fft_trace = 2;
                    727:         else
                    728:           option_fft_max_size = atol (optarg);
                    729:         break;
                    730:       case 'o':
                    731:         speed_option_set (optarg);
                    732:         break;
                    733:       case 'p':
                    734:         speed_precision = atoi (optarg);
                    735:         break;
                    736:       case 't':
                    737:         option_trace++;
                    738:         break;
                    739:       case '?':
                    740:         exit(1);
                    741:       }
                    742:     }
                    743:
                    744:   all ();
                    745:   return 0;
                    746: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>