[BACK]Return to tuneup.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tune

Annotation of OpenXM_contrib/gmp/tune/tuneup.c, Revision 1.1

1.1     ! maekawa     1: /* Create tuned thresholds for various algorithms. */
        !             2:
        !             3: /*
        !             4: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
        !             5:
        !             6: This file is part of the GNU MP Library.
        !             7:
        !             8: The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: it under the terms of the GNU Lesser General Public License as published by
        !            10: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            11: option) any later version.
        !            12:
        !            13: The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: License for more details.
        !            17:
        !            18: You should have received a copy of the GNU Lesser General Public License
        !            19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: MA 02111-1307, USA.
        !            22: */
        !            23:
        !            24: /* Usage: tune [-t] [-t] [-p precision]
        !            25:
        !            26:    -t turns on some diagnostic traces, a second -t turns on more traces.
        !            27:
        !            28:    The thresholds are determined as follows.  A crossover may not be a
        !            29:    single size but rather a range where it oscillates between method A or
        !            30:    method B faster.  If the threshold is set making B used where A is faster
        !            31:    (or vice versa) that's bad.  Badness is the percentage time lost and
        !            32:    total badness is the sum of this over all sizes measured.  The threshold
        !            33:    is set to minimize total badness.
        !            34:
        !            35:    Suppose, as sizes increase, method B becomes faster than method A.  The
        !            36:    effect of the rule is that, as you look at increasing sizes, isolated
        !            37:    points where B is faster are ignored, but when it's consistently faster,
        !            38:    or faster on balance, then the threshold is set there.  The same result
        !            39:    is obtained thinking in the other direction of A becoming faster at
        !            40:    smaller sizes.
        !            41:
        !            42:    In practice the thresholds tend to be chosen to bring on the next
        !            43:    algorithm fairly quickly.
        !            44:
        !            45:    This rule is attractive because it's got a basis in reason and is fairly
        !            46:    easy to implement, but no work has been done to actually compare it in
        !            47:    absolute terms to other possibilities.
        !            48:
        !            49:    Sometimes running the program twice produces slightly different results.
        !            50:    This is probably because there's so little separating algorithms near
        !            51:    their crossover, and on that basis it should make little or no difference
        !            52:    to the final speed of the relevant routines, but nothing has been done to
        !            53:    check that carefully.
        !            54:
        !            55:    Limitations:
        !            56:
        !            57:    The FFTs aren't subject to the same badness rule as the other thresholds,
        !            58:    so each k is probably being brought on a touch early.  This isn't likely
        !            59:    to make a difference, and the simpler probing means fewer tests.
        !            60:
        !            61: */
        !            62:
        !            63: #define TUNE_PROGRAM_BUILD  1
        !            64:
        !            65: #include <math.h>
        !            66: #include <stdio.h>
        !            67: #include <stdlib.h>
        !            68: #include <time.h>
        !            69: #include <unistd.h>
        !            70:
        !            71: #include "gmp.h"
        !            72: #include "gmp-impl.h"
        !            73:
        !            74: #include "speed.h"
        !            75: #include "sqr_basecase.h"
        !            76:
        !            77: #if !HAVE_DECL_OPTARG
        !            78: extern char *optarg;
        !            79: extern int optind, opterr;
        !            80: #endif
        !            81:
        !            82:
        !            83: #define MAX_SIZE        1000  /* limbs */
        !            84: #define STEP_FACTOR     0.01  /* how much to step sizes by (rounded down) */
        !            85: #define MAX_TABLE       2     /* threshold entries */
        !            86:
        !            87:
        !            88: #if WANT_FFT
        !            89: mp_size_t  option_fft_max_size = 50000;  /* limbs */
        !            90: #else
        !            91: mp_size_t  option_fft_max_size = 0;
        !            92: #endif
        !            93: int        option_trace = 0;
        !            94: int        option_fft_trace = 0;
        !            95: struct speed_params  s;
        !            96:
        !            97: struct dat_t {
        !            98:   mp_size_t  size;
        !            99:   double     d;
        !           100: } *dat = NULL;
        !           101: int  ndat = 0;
        !           102: int  allocdat = 0;
        !           103:
        !           104:
        !           105: /* Each "_threshold" array must be 1 bigger than the number of thresholds
        !           106:    being tuned in a set, because one() stores an value in the entry above
        !           107:    the one it's determining. */
        !           108:
        !           109: mp_size_t  mul_threshold[MAX_TABLE+1] = { MP_SIZE_T_MAX };
        !           110: mp_size_t  fft_modf_mul_threshold = MP_SIZE_T_MAX;
        !           111: mp_size_t  sqr_threshold[MAX_TABLE+1] = { MP_SIZE_T_MAX };
        !           112: mp_size_t  fft_modf_sqr_threshold = MP_SIZE_T_MAX;
        !           113: mp_size_t  bz_threshold[2] = { MP_SIZE_T_MAX };
        !           114: mp_size_t  fib_threshold[2] = { MP_SIZE_T_MAX };
        !           115: mp_size_t  powm_threshold[2] = { MP_SIZE_T_MAX };
        !           116: mp_size_t  gcd_accel_threshold[2] = { MP_SIZE_T_MAX };
        !           117: mp_size_t  gcdext_threshold[2] = { MP_SIZE_T_MAX };
        !           118:
        !           119:
        !           120: #ifndef KARATSUBA_SQR_MAX
        !           121: #define KARATSUBA_SQR_MAX  0 /* meaning no limit */
        !           122: #endif
        !           123:
        !           124: struct param_t {
        !           125:   const char  *name[MAX_TABLE];
        !           126:   int         stop_since_change;
        !           127:   mp_size_t   min_size;
        !           128:   mp_size_t   max_size[MAX_TABLE];
        !           129: };
        !           130:
        !           131:
        !           132: /* Add an entry to the end of the dat[] array, reallocing to make it bigger
        !           133:    if necessary.  */
        !           134: void
        !           135: add_dat (mp_size_t size, double d)
        !           136: {
        !           137: #define ALLOCDAT_STEP  500
        !           138:
        !           139:   ASSERT_ALWAYS (ndat <= allocdat);
        !           140:
        !           141:   if (ndat == allocdat)
        !           142:     {
        !           143:       dat = (struct dat_t *) _mp_allocate_or_reallocate
        !           144:         (dat, allocdat * sizeof(dat[0]),
        !           145:          (allocdat+ALLOCDAT_STEP) * sizeof(dat[0]));
        !           146:       allocdat += ALLOCDAT_STEP;
        !           147:     }
        !           148:
        !           149:   dat[ndat].size = size;
        !           150:   dat[ndat].d = d;
        !           151:   ndat++;
        !           152: }
        !           153:
        !           154:
        !           155: /* Return the threshold size based on the data accumulated. */
        !           156: mp_size_t
        !           157: analyze_dat (int i, int final)
        !           158: {
        !           159:   double  x, min_x;
        !           160:   int     j, min_j;
        !           161:
        !           162:   /* If the threshold is set at dat[0].size, any positive values are bad. */
        !           163:   x = 0.0;
        !           164:   for (j = 0; j < ndat; j++)
        !           165:     if (dat[j].d > 0.0)
        !           166:       x += dat[j].d;
        !           167:
        !           168:   if (option_trace >= 2 && final)
        !           169:     {
        !           170:       printf ("\n");
        !           171:       printf ("x is the sum of the badness from setting thresh at given size\n");
        !           172:       printf ("  (minimum x is sought)\n");
        !           173:       printf ("i=%d size=%ld  first x=%.4f\n", i, dat[j].size, x);
        !           174:     }
        !           175:
        !           176:   min_x = x;
        !           177:   min_j = 0;
        !           178:
        !           179:
        !           180:   /* When stepping to the next dat[j].size, positive values are no longer
        !           181:      bad (so subtracted), negative values become bad (so add the absolute
        !           182:      value, meaning subtract). */
        !           183:   for (j = 0; j < ndat; x -= dat[j].d, j++)
        !           184:     {
        !           185:       if (option_trace >= 2 && final)
        !           186:         printf ("i=%d size=%ld  x=%.4f\n", i, dat[j].size, x);
        !           187:
        !           188:       if (x < min_x)
        !           189:         {
        !           190:           min_x = x;
        !           191:           min_j = j;
        !           192:         }
        !           193:     }
        !           194:
        !           195:   return min_j;
        !           196: }
        !           197:
        !           198:
        !           199: double
        !           200: tuneup_measure (speed_function_t fun, struct speed_params *s)
        !           201: {
        !           202:   static mp_ptr  xp, yp;
        !           203:   double   t;
        !           204:   TMP_DECL (marker);
        !           205:
        !           206:   TMP_MARK (marker);
        !           207:   s->xp = SPEED_TMP_ALLOC_LIMBS (s->size, 0);
        !           208:   s->yp = SPEED_TMP_ALLOC_LIMBS (s->size, 0);
        !           209:
        !           210:   mpn_random (s->xp, s->size);
        !           211:   mpn_random (s->yp, s->size);
        !           212:
        !           213:   t = speed_measure (fun, s);
        !           214:
        !           215:   TMP_FREE (marker);
        !           216:   return t;
        !           217: }
        !           218:
        !           219:
        !           220: void
        !           221: print_define (const char *name, mp_size_t value)
        !           222: {
        !           223:   printf ("#ifndef %s\n", name);
        !           224:   printf ("#define %-23s  ", name);
        !           225:   if (value == MP_SIZE_T_MAX)
        !           226:     printf ("MP_SIZE_T_MAX\n");
        !           227:   else
        !           228:     printf ("%5ld\n", value);
        !           229:   printf ("#endif\n");
        !           230: }
        !           231:
        !           232:
        !           233: /* table[i+1] needs to be set to a sensible value when testing method i+1
        !           234:    because mpn_mul_n uses TOOM3_MUL_THRESHOLD to size the temporary
        !           235:    workspace for mpn_kara_mul_n. */
        !           236:
        !           237: void
        !           238: one (speed_function_t function, mp_size_t table[], size_t max_table,
        !           239:      struct param_t *param)
        !           240: {
        !           241:   static struct param_t  dummy;
        !           242:   int  i;
        !           243:
        !           244:   if (param == NULL)  param = &dummy;
        !           245:
        !           246: #define DEFAULT(x,n)  if (param->x == 0)  param->x = (n);
        !           247:
        !           248:   DEFAULT (stop_since_change, 80);
        !           249:   DEFAULT (min_size, 10);
        !           250:   for (i = 0; i < numberof (param->max_size); i++)
        !           251:     DEFAULT (max_size[i], MAX_SIZE);
        !           252:
        !           253:   s.size = param->min_size;
        !           254:
        !           255:   for (i = 0; i < max_table && s.size < MAX_SIZE; i++)
        !           256:     {
        !           257:       int  since_positive, since_thresh_change;
        !           258:       int  thresh_idx, new_thresh_idx;
        !           259:
        !           260:       ndat = 0;
        !           261:       since_positive = 0;
        !           262:       since_thresh_change = 0;
        !           263:       thresh_idx = 0;
        !           264:
        !           265:       if (option_trace >= 2)
        !           266:         {
        !           267:           printf ("             algorithm-A  algorithm-B   ratio  possible\n");
        !           268:           printf ("              (seconds)    (seconds)    diff    thresh\n");
        !           269:         }
        !           270:
        !           271:       for ( ; s.size < MAX_SIZE;
        !           272:             s.size += MAX ((mp_size_t) floor (s.size * STEP_FACTOR), 1))
        !           273:         {
        !           274:           double   ti, tiplus1, d;
        !           275:
        !           276:           /* If there's a size limit and it's reached then it should still
        !           277:              be sensible to analyze the data since we want the threshold put
        !           278:              either at or near the limit.  */
        !           279:           if (s.size >= param->max_size[i])
        !           280:             {
        !           281:               if (option_trace)
        !           282:                 printf ("Reached maximum size (%ld) without otherwise stopping\n",
        !           283:                         param->max_size[i]);
        !           284:               break;
        !           285:             }
        !           286:
        !           287:           /*
        !           288:             FIXME: check minimum size requirements are met, possibly by just
        !           289:             checking for the -1 returns from the speed functions.
        !           290:             if (s.size < MPN_TOOM_TABLE_TO_MINSIZE (i))
        !           291:             continue;
        !           292:           */
        !           293:
        !           294:           /* using method i at this size */
        !           295:           table[i] = s.size+1;
        !           296:           table[i+1] = MAX_SIZE;
        !           297:           ti = tuneup_measure (function, &s);
        !           298:           if (ti == -1.0)
        !           299:             abort ();
        !           300:
        !           301:           /* using method i+1 at this size */
        !           302:           table[i] = s.size;
        !           303:           table[i+1] = s.size+1;
        !           304:           tiplus1 = tuneup_measure (function, &s);
        !           305:           if (tiplus1 == -1.0)
        !           306:             abort ();
        !           307:
        !           308:           /* Calculate the fraction by which the one or the other routine is
        !           309:              slower.  */
        !           310:           if (tiplus1 >= ti)
        !           311:             d = (tiplus1 - ti) / tiplus1;  /* negative */
        !           312:           else
        !           313:             d = (tiplus1 - ti) / ti;       /* positive */
        !           314:
        !           315:           add_dat (s.size, d);
        !           316:
        !           317:           new_thresh_idx = analyze_dat (i, 0);
        !           318:
        !           319:
        !           320:           if (option_trace >= 2)
        !           321:             printf ("i=%d size=%ld  %.9f  %.9f  % .4f %c  %d\n",
        !           322:                     i, s.size, ti, tiplus1, d,
        !           323:                     ti > tiplus1 ? '#' : ' ',
        !           324:                     dat[new_thresh_idx].size);
        !           325:
        !           326:           /* Stop if the last time method i was faster was more than a
        !           327:              certain number of measurements ago.  */
        !           328: #define STOP_SINCE_POSITIVE  200
        !           329:           if (d >= 0)
        !           330:             since_positive = 0;
        !           331:           else
        !           332:             if (++since_positive > STOP_SINCE_POSITIVE)
        !           333:               {
        !           334:                 if (option_trace >= 1)
        !           335:                   printf ("i=%d stopped due to since_positive (%d)\n",
        !           336:                           i, STOP_SINCE_POSITIVE);
        !           337:                 break;
        !           338:               }
        !           339:
        !           340:           /* Stop if method i has become slower by a certain factor. */
        !           341: #define STOP_FACTOR   1.2
        !           342:           if (ti >= tiplus1 * STOP_FACTOR)
        !           343:             {
        !           344:               if (option_trace >= 1)
        !           345:                 printf ("i=%d stopped due to ti >= tiplus1 * factor (%.1f)\n",
        !           346:                         i, STOP_FACTOR);
        !           347:               break;
        !           348:             }
        !           349:
        !           350:           /* Stop if the threshold implied hasn't changed in a certain
        !           351:              number of measurements.  (It's this condition that ususally
        !           352:              stops the loop.) */
        !           353:           if (thresh_idx != new_thresh_idx)
        !           354:             since_thresh_change = 0, thresh_idx = new_thresh_idx;
        !           355:           else
        !           356:             if (++since_thresh_change > param->stop_since_change)
        !           357:               {
        !           358:                 if (option_trace >= 1)
        !           359:                   printf ("i=%d stopped due to since_thresh_change (%d)\n",
        !           360:                           i, param->stop_since_change);
        !           361:                 break;
        !           362:               }
        !           363:
        !           364:           /* Stop if the threshold implied is more than a certain number of
        !           365:              measurements ago.  */
        !           366: #define STOP_SINCE_AFTER   500
        !           367:           if (ndat - thresh_idx > STOP_SINCE_AFTER)
        !           368:             {
        !           369:               if (option_trace >= 1)
        !           370:                 printf ("i=%d stopped due to ndat - thresh_idx > amount (%d)\n",
        !           371:                         i, STOP_SINCE_AFTER);
        !           372:               break;
        !           373:             }
        !           374:         }
        !           375:
        !           376:       /* Stop when the size limit is reached before the end of the
        !           377:          crossover, without a specified param->max_size[i]. */
        !           378:       if (s.size >= MAX_SIZE)
        !           379:         {
        !           380:           fprintf (stderr, "%s\n", param->name[i]);
        !           381:           fprintf (stderr, "i=%d sizes %ld to %ld total %d measurements\n",
        !           382:                    i, dat[0].size, dat[ndat-1].size, ndat);
        !           383:           fprintf (stderr, "    max size reached before end of crossover\n");
        !           384:           break;
        !           385:         }
        !           386:
        !           387:       if (option_trace >= 1)
        !           388:         printf ("i=%d sizes %ld to %ld total %d measurements\n",
        !           389:                 i, dat[0].size, dat[ndat-1].size, ndat);
        !           390:
        !           391:       if (ndat == 0)
        !           392:         break;
        !           393:
        !           394:       table[i] = dat[analyze_dat (i, 1)].size;
        !           395:
        !           396:       print_define (param->name[i], table[i]);
        !           397:
        !           398:       /* Look for the next threshold starting from the current one, but back
        !           399:          a bit. */
        !           400:       s.size = table[i]+1;
        !           401:     }
        !           402: }
        !           403:
        !           404:
        !           405: /* Special probing for the fft thresholds.  The size restrictions on the
        !           406:    FFTs mean the graph of time vs size has a step effect.  See this for
        !           407:    example using
        !           408:
        !           409:        ./speed -s 4096-16384 -t 128 -P foo mpn_mul_fft.8 mpn_mul_fft.9
        !           410:        gnuplot foo.gnuplot
        !           411:
        !           412:    The current approach is to compare routines at the midpoint of relevant
        !           413:    steps.  Arguably a more sophisticated system of threshold data is wanted
        !           414:    if this step effect remains. */
        !           415:
        !           416: struct fft_param_t {
        !           417:   const char        *table_name;
        !           418:   const char        *threshold_name;
        !           419:   const char        *modf_threshold_name;
        !           420:   mp_size_t         *p_threshold;
        !           421:   mp_size_t         *p_modf_threshold;
        !           422:   mp_size_t         first_size;
        !           423:   mp_size_t         max_size;
        !           424:   speed_function_t  function;
        !           425:   speed_function_t  mul_function;
        !           426:   mp_size_t         sqr;
        !           427: };
        !           428:
        !           429: /* mpn_mul_fft requires pl a multiple of 2^k limbs, but with
        !           430:    N=pl*BIT_PER_MP_LIMB it internally also pads out so N/2^k is a multiple
        !           431:    of 2^(k-1) bits. */
        !           432:
        !           433: mp_size_t
        !           434: fft_step_size (int k)
        !           435: {
        !           436:   if (2*k-1 > BITS_PER_INT)
        !           437:     {
        !           438:       printf ("Can't handle k=%d\n", k);
        !           439:       abort ();
        !           440:     }
        !           441:   return (1<<k) * (MAX (1<<(k-1), BITS_PER_MP_LIMB)) / BITS_PER_MP_LIMB;
        !           442: }
        !           443:
        !           444: mp_size_t
        !           445: fft_next_size (mp_size_t pl, int k)
        !           446: {
        !           447:   mp_size_t  m = fft_step_size (k);
        !           448:
        !           449: /*    printf ("[k=%d %ld] %ld ->", k, m, pl); */
        !           450:
        !           451:   if (pl == 0 || (pl & (m-1)) != 0)
        !           452:     pl = (pl | (m-1)) + 1;
        !           453:
        !           454: /*    printf (" %ld\n", pl); */
        !           455:   return pl;
        !           456: }
        !           457:
        !           458: void
        !           459: fft (struct fft_param_t *p)
        !           460: {
        !           461:   mp_size_t  size;
        !           462:   int        i, k;
        !           463:
        !           464:   for (i = 0; i < numberof (mpn_fft_table[p->sqr]); i++)
        !           465:     mpn_fft_table[p->sqr][i] = MP_SIZE_T_MAX;
        !           466:
        !           467:   *p->p_threshold = MP_SIZE_T_MAX;
        !           468:   *p->p_modf_threshold = MP_SIZE_T_MAX;
        !           469:
        !           470:   option_trace = MAX (option_trace, option_fft_trace);
        !           471:
        !           472:   printf ("#ifndef %s\n", p->table_name);
        !           473:   printf ("#define %s  {", p->table_name);
        !           474:   if (option_trace >= 2)
        !           475:     printf ("\n");
        !           476:
        !           477:   k = FFT_FIRST_K;
        !           478:   size = p->first_size;
        !           479:   for (;;)
        !           480:     {
        !           481:       double  tk, tk1;
        !           482:
        !           483:       size = fft_next_size (size+1, k+1);
        !           484:
        !           485:       if (size >= p->max_size)
        !           486:         break;
        !           487:       if (k >= FFT_FIRST_K + numberof (mpn_fft_table[p->sqr]))
        !           488:         break;
        !           489:
        !           490:       usleep(10000);
        !           491:
        !           492:       /* compare k to k+1 in the middle of the current k+1 step */
        !           493:       s.size = size + fft_step_size (k+1) / 2;
        !           494:       s.r = k;
        !           495:       tk = tuneup_measure (p->function, &s);
        !           496:       if (tk == -1.0)
        !           497:         abort ();
        !           498:
        !           499:       usleep(10000);
        !           500:
        !           501:       s.r = k+1;
        !           502:       tk1 = tuneup_measure (p->function, &s);
        !           503:       if (tk1 == -1.0)
        !           504:         abort ();
        !           505:
        !           506:       if (option_trace >= 2)
        !           507:         printf ("at %ld   size=%ld  k=%d  %.9lf   k=%d %.9lf\n",
        !           508:                 size, s.size, k, tk, k+1, tk1);
        !           509:
        !           510:       /* declare the k+1 threshold as soon as it's faster at its midpoint */
        !           511:       if (tk1 < tk)
        !           512:         {
        !           513:           mpn_fft_table[p->sqr][k-FFT_FIRST_K] = s.size;
        !           514:           printf (" %ld,", s.size);
        !           515:           if (option_trace >= 2) printf ("\n");
        !           516:           k++;
        !           517:         }
        !           518:     }
        !           519:
        !           520:   mpn_fft_table[p->sqr][k-FFT_FIRST_K] = 0;
        !           521:   printf (" 0 }\n");
        !           522:   printf ("#endif\n");
        !           523:
        !           524:
        !           525:   size = p->first_size;
        !           526:
        !           527:   /* Declare an FFT faster than a plain toom3 etc multiplication found as
        !           528:      soon as one faster measurement obtained.  A multiplication in the
        !           529:      middle of the FFT step is tested.  */
        !           530:   for (;;)
        !           531:     {
        !           532:       int     modf = (*p->p_modf_threshold == MP_SIZE_T_MAX);
        !           533:       double  tk, tm;
        !           534:
        !           535:       /* k=7 should be the first FFT which can beat toom3 on a full
        !           536:          multiply, so jump to that threshold and save some probing after the
        !           537:          modf threshold is found.  */
        !           538:       if (!modf && size < mpn_fft_table[p->sqr][2])
        !           539:         {
        !           540:           size = mpn_fft_table[p->sqr][2];
        !           541:           if (option_trace >= 2)
        !           542:             printf ("jump to size=%ld\n", size);
        !           543:         }
        !           544:
        !           545:       size = fft_next_size (size+1, mpn_fft_best_k (size, p->sqr));
        !           546:       k = mpn_fft_best_k (size, p->sqr);
        !           547:
        !           548:       if (size >= p->max_size)
        !           549:         break;
        !           550:
        !           551:       usleep(10000);
        !           552:
        !           553:       s.size = size + fft_step_size (k) / 2;
        !           554:       s.r = k;
        !           555:       tk = tuneup_measure (p->function, &s);
        !           556:       if (tk == -1.0)
        !           557:         abort ();
        !           558:
        !           559:       usleep(10000);
        !           560:
        !           561:       if (!modf)  s.size /= 2;
        !           562:       tm = tuneup_measure (p->mul_function, &s);
        !           563:       if (tm == -1.0)
        !           564:         abort ();
        !           565:
        !           566:       if (option_trace >= 2)
        !           567:         printf ("at %ld   size=%ld   k=%d  %.9lf   size=%ld %s mul %.9lf\n",
        !           568:                 size,
        !           569:                 size + fft_step_size (k) / 2, k, tk,
        !           570:                 s.size, modf ? "modf" : "full", tm);
        !           571:
        !           572:       if (tk < tm)
        !           573:         {
        !           574:           if (modf)
        !           575:             {
        !           576:               *p->p_modf_threshold = s.size;
        !           577:               print_define (p->modf_threshold_name, *p->p_modf_threshold);
        !           578:             }
        !           579:           else
        !           580:             {
        !           581:               *p->p_threshold = s.size;
        !           582:               print_define (p->threshold_name,      *p->p_threshold);
        !           583:               break;
        !           584:             }
        !           585:         }
        !           586:     }
        !           587:
        !           588: }
        !           589:
        !           590:
        !           591: void
        !           592: all (void)
        !           593: {
        !           594:   TMP_DECL (marker);
        !           595:
        !           596:   TMP_MARK (marker);
        !           597:   s.xp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, 0);
        !           598:   s.yp_block = SPEED_TMP_ALLOC_LIMBS (SPEED_BLOCK_SIZE, 0);
        !           599:
        !           600:   speed_time_init ();
        !           601:   fprintf (stderr, "speed_precision %d, speed_unittime %.2e\n",
        !           602:            speed_precision, speed_unittime);
        !           603:   fprintf (stderr, "MAX_SIZE %ld, fft_max_size %ld, STEP_FACTOR %.3f\n",
        !           604:            MAX_SIZE, option_fft_max_size, STEP_FACTOR);
        !           605:   fprintf (stderr, "\n");
        !           606:
        !           607:   {
        !           608:     struct tm  *tp;
        !           609:     time_t     t;
        !           610:     time (&t);
        !           611:     tp = localtime (&t);
        !           612:     printf ("/* Generated by tuneup.c, %d-%02d-%02d. */\n\n",
        !           613:             tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday);
        !           614:   }
        !           615:
        !           616:   {
        !           617:     static struct param_t  param;
        !           618:     param.name[0] = "KARATSUBA_MUL_THRESHOLD";
        !           619:     param.name[1] = "TOOM3_MUL_THRESHOLD";
        !           620:     param.max_size[1] = TOOM3_MUL_THRESHOLD_LIMIT;
        !           621:     one (speed_mpn_mul_n, mul_threshold, numberof(mul_threshold)-1, &param);
        !           622:   }
        !           623:   printf("\n");
        !           624:
        !           625:   {
        !           626:     static struct param_t  param;
        !           627:     param.name[0] = "KARATSUBA_SQR_THRESHOLD";
        !           628:     param.name[1] = "TOOM3_SQR_THRESHOLD";
        !           629:     param.max_size[0] = KARATSUBA_SQR_MAX;
        !           630:     one (speed_mpn_sqr_n, sqr_threshold, numberof(sqr_threshold)-1, &param);
        !           631:   }
        !           632:   printf("\n");
        !           633:
        !           634:   {
        !           635:     static struct param_t  param;
        !           636:     param.name[0] = "BZ_THRESHOLD";
        !           637:     one (speed_mpn_bz_tdiv_qr, bz_threshold, 1, &param);
        !           638:   }
        !           639:   printf("\n");
        !           640:
        !           641:   {
        !           642:     static struct param_t  param;
        !           643:     param.name[0] = "FIB_THRESHOLD";
        !           644:     one (speed_mpz_fib_ui, fib_threshold, 1, &param);
        !           645:   }
        !           646:   printf("\n");
        !           647:
        !           648:   /* mpz_powm becomes slow before long, so stop soon after the determined
        !           649:      threshold stops changing. */
        !           650:   {
        !           651:     static struct param_t  param;
        !           652:     param.name[0] = "POWM_THRESHOLD";
        !           653:     param.stop_since_change = 15;
        !           654:     one (speed_mpz_powm, powm_threshold, 1, &param);
        !           655:   }
        !           656:   printf("\n");
        !           657:
        !           658:   {
        !           659:     static struct param_t  param;
        !           660:     param.name[0] = "GCD_ACCEL_THRESHOLD";
        !           661:     param.min_size = 1;
        !           662:     one (speed_mpn_gcd, gcd_accel_threshold, 1, &param);
        !           663:   }
        !           664:   {
        !           665:     static struct param_t  param;
        !           666:     param.name[0] = "GCDEXT_THRESHOLD";
        !           667:     param.min_size = 1;
        !           668:     param.max_size[0] = 200;
        !           669:     one (speed_mpn_gcdext, gcdext_threshold, 1, &param);
        !           670:   }
        !           671:   printf("\n");
        !           672:
        !           673:   if (option_fft_max_size != 0)
        !           674:     {
        !           675:       {
        !           676:         static struct fft_param_t  param;
        !           677:         param.table_name          = "FFT_MUL_TABLE";
        !           678:         param.threshold_name      = "FFT_MUL_THRESHOLD";
        !           679:         param.p_threshold         = &FFT_MUL_THRESHOLD;
        !           680:         param.modf_threshold_name = "FFT_MODF_MUL_THRESHOLD";
        !           681:         param.p_modf_threshold    = &FFT_MODF_MUL_THRESHOLD;
        !           682:         param.first_size          = TOOM3_MUL_THRESHOLD / 2;
        !           683:         param.max_size            = option_fft_max_size;
        !           684:         param.function            = speed_mpn_mul_fft;
        !           685:         param.mul_function        = speed_mpn_mul_n;
        !           686:         param.sqr = 0;
        !           687:         fft (&param);
        !           688:       }
        !           689:       printf("\n");
        !           690:       {
        !           691:         static struct fft_param_t  param;
        !           692:         param.table_name          = "FFT_SQR_TABLE";
        !           693:         param.threshold_name      = "FFT_SQR_THRESHOLD";
        !           694:         param.p_threshold         = &FFT_SQR_THRESHOLD;
        !           695:         param.modf_threshold_name = "FFT_MODF_SQR_THRESHOLD";
        !           696:         param.p_modf_threshold    = &FFT_MODF_SQR_THRESHOLD;
        !           697:         param.first_size          = TOOM3_SQR_THRESHOLD / 2;
        !           698:         param.max_size            = option_fft_max_size;
        !           699:         param.function            = speed_mpn_mul_fft_sqr;
        !           700:         param.mul_function        = speed_mpn_sqr_n;
        !           701:         param.sqr = 0;
        !           702:         fft (&param);
        !           703:       }
        !           704:       printf ("\n");
        !           705:     }
        !           706:
        !           707:   TMP_FREE (marker);
        !           708: }
        !           709:
        !           710:
        !           711: int
        !           712: main (int argc, char *argv[])
        !           713: {
        !           714:   int  opt;
        !           715:
        !           716:   /* Unbuffered so if output is redirected to a file it isn't lost if the
        !           717:      program is killed part way through.  */
        !           718:   setbuf (stdout, NULL);
        !           719:   setbuf (stderr, NULL);
        !           720:
        !           721:   while ((opt = getopt(argc, argv, "f:o:p:t")) != EOF)
        !           722:     {
        !           723:       switch (opt) {
        !           724:       case 'f':
        !           725:         if (optarg[0] == 't')
        !           726:           option_fft_trace = 2;
        !           727:         else
        !           728:           option_fft_max_size = atol (optarg);
        !           729:         break;
        !           730:       case 'o':
        !           731:         speed_option_set (optarg);
        !           732:         break;
        !           733:       case 'p':
        !           734:         speed_precision = atoi (optarg);
        !           735:         break;
        !           736:       case 't':
        !           737:         option_trace++;
        !           738:         break;
        !           739:       case '?':
        !           740:         exit(1);
        !           741:       }
        !           742:     }
        !           743:
        !           744:   all ();
        !           745:   return 0;
        !           746: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>