[BACK]Return to tune.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / pari-2.2 / src / test

Annotation of OpenXM_contrib/pari-2.2/src/test/tune.c, Revision 1.1

1.1     ! noro        1: /* $Id: tune.c,v 1.7 2002/06/09 00:36:54 karim Exp $
        !             2:
        !             3: Copyright (C) 2001  The PARI group.
        !             4:
        !             5: This file is part of the PARI/GP package.
        !             6:
        !             7: PARI/GP is free software; you can redistribute it and/or modify it under the
        !             8: terms of the GNU General Public License as published by the Free Software
        !             9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
        !            10: ANY WARRANTY WHATSOEVER.
        !            11:
        !            12: Check the License for details. You should have received a copy of it, along
        !            13: with the package; see the file 'COPYING'. If not, write to the Free Software
        !            14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
        !            15:
        !            16: /* This file is a quick hack adapted from gmp-4.0 tuning utilities
        !            17:  * (T. Granlund et al.)
        !            18:  *
        !            19:  * (GMU MP Library is Copyright Free Software Foundation, Inc.) */
        !            20: #include <pari.h>
        !            21:
        !            22: /* you might wish to tweak the Makefile so as to use GMP cycle counting
        !            23:  * functions (look for GMP in Oxxx/Makefile) */
        !            24:
        !            25: #define numberof(x) (sizeof(x) / sizeof((x)[0]))
        !            26:
        !            27: static int option_trace = 0;
        !            28:
        !            29: typedef struct {
        !            30:   ulong reps, size, type;
        !            31:   GEN x, y;
        !            32: } speed_param;
        !            33:
        !            34: typedef double (*speed_function_t)(ANYARG);
        !            35:
        !            36: #define MAX_TABLE 2
        !            37: #define MAX_SIZE 1000
        !            38:
        !            39: typedef struct {
        !            40:   const char        *name[MAX_TABLE];
        !            41:   speed_function_t  fun1;
        !            42:   speed_function_t  fun2;
        !            43:   double            step_factor;    /* how much to step sizes (rounded down) */
        !            44:   double            function_fudge; /* multiplier for "function" speeds */
        !            45:   int               stop_since_change;
        !            46:   double            stop_factor;
        !            47:   long              min_size[MAX_TABLE];
        !            48:   long              max_size[MAX_TABLE];
        !            49:   int               type; /* t_INT or t_REAL */
        !            50: } tune_param;
        !            51:
        !            52: /* ========================================================== */
        !            53: #ifdef GMP_TIMER
        !            54: /* needed to link with gmp-4.0/tune/{time,freq}.o */
        !            55: int speed_option_verbose = 0;
        !            56: extern double speed_unittime;
        !            57: extern int    speed_precision;
        !            58: void speed_starttime(void);
        !            59: double speed_endtime(void);
        !            60: #else
        !            61: static pari_timer __T;
        !            62: static double speed_unittime = 1e-4;
        !            63: static int    speed_precision= 1000;
        !            64: static void speed_starttime() { (void)TIMER(&__T); }
        !            65: static double speed_endtime() { return (double)TIMER(&__T)/1000.; }
        !            66: #endif
        !            67:
        !            68: /* ========================================================== */
        !            69:
        !            70: /* random int, n words */
        !            71: static GEN
        !            72: rand_INT(long n)
        !            73: {
        !            74:   gpmem_t av = avma;
        !            75:   GEN x, N = shifti(gun, n*BITS_IN_LONG);
        !            76:   if (n <= 0) err(talker,"n too small in rand_INT (%ld)", n);
        !            77:   do
        !            78:     x = genrand(N);
        !            79:   while (lgefint(x) != n+2);
        !            80:   return gerepileuptoint(av, x);
        !            81: }
        !            82:
        !            83: static GEN
        !            84: rand_REAL(long n)
        !            85: {
        !            86:   GEN r = cgetr(n+2);
        !            87:   gpmem_t av = avma;
        !            88:   GEN x = rand_INT(n);
        !            89:   affir(x,r); avma = av; return r;
        !            90: }
        !            91:
        !            92: static GEN
        !            93: rand_g(long n, long type)
        !            94: {
        !            95:   return (type == t_INT)? rand_INT(n): rand_REAL(n);
        !            96: }
        !            97:
        !            98: /* ========================================================== */
        !            99:
        !           100: #define TIME_FUN(call) {\
        !           101:   {                                            \
        !           102:     gpmem_t av = avma;                         \
        !           103:     int i;                                     \
        !           104:     speed_starttime();                         \
        !           105:     i = (s)->reps;                             \
        !           106:     do { call; avma = av; } while (--i != 0);  \
        !           107:   }                                            \
        !           108:   return speed_endtime();                      \
        !           109: }
        !           110:
        !           111: extern void setsqri(long a);
        !           112: extern void setmuli(long a);
        !           113: #define disable_Karatsuba(s) (setmuli(lg(s->x)), setsqri(lg(s->x)))
        !           114: #define  enable_Karatsuba(s) (setmuli(lg(s->x)-2), setsqri(lg(s->x)-2))
        !           115: #define MULFUN(call, s) {\
        !           116:   disable_Karatsuba(s);  \
        !           117:   TIME_FUN(call(s->x, s->y)); }
        !           118:
        !           119: #define SQRFUN(call, s) {\
        !           120:   disable_Karatsuba(s);  \
        !           121:   TIME_FUN(call(s->x)); }
        !           122:
        !           123: #define KARAMULFUN(call, s) {\
        !           124:   enable_Karatsuba(s);       \
        !           125:   TIME_FUN(call(s->x, s->y)); }
        !           126:
        !           127: #define KARASQRFUN(call, s) {\
        !           128:   enable_Karatsuba(s);       \
        !           129:   TIME_FUN(call(s->x)); }
        !           130:
        !           131: static double speed_mulii(speed_param *s) { MULFUN(mulii, s); }
        !           132: static double speed_sqri (speed_param *s) { SQRFUN( sqri, s); }
        !           133: static double speed_karamulii(speed_param *s) { KARAMULFUN(mulii, s); }
        !           134: static double speed_karasqri (speed_param *s) { KARASQRFUN( sqri, s); }
        !           135:
        !           136: extern  GEN init_remainder(GEN M);
        !           137: extern ulong invrev(ulong b);
        !           138: extern GEN red_montgomery(GEN T, GEN N, ulong inv);
        !           139: extern GEN resiimul(GEN x, GEN sy);
        !           140:
        !           141: #define INIT_RED(s, op)                                 \
        !           142:   long i, lx = lg(s->x);                                \
        !           143:   op = cgeti(2*lx - 2);                                 \
        !           144:   op[1] = evallgefint(2*lx - 2) | evalsigne(1);         \
        !           145:   for (i=2; i<lx; i++) op[i]      = s->x[i];            \
        !           146:   for (i=2; i<lx; i++) op[lx-2+i] = s->x[i];            \
        !           147:   modBIL(s->y) |= 1; /* make sure modulus is odd */
        !           148:
        !           149: static double speed_redc(speed_param *s) {
        !           150:   ulong inv;
        !           151:   GEN op;
        !           152:   INIT_RED(s, op);
        !           153:   inv = (ulong) -invrev(modBIL(s->y));
        !           154:   TIME_FUN( red_montgomery(op, s->y, inv) );
        !           155: };
        !           156: static double speed_modii(speed_param *s) {
        !           157:   GEN op;
        !           158:   INIT_RED(s, op);
        !           159:   TIME_FUN( resii(op, s->y) );
        !           160: };
        !           161: static double speed_resiimul(speed_param *s) {
        !           162:   GEN op, sM;
        !           163:   INIT_RED(s, op);
        !           164:   sM = init_remainder(s->y);
        !           165:   TIME_FUN( resiimul(op, sM) );
        !           166: }
        !           167:
        !           168: /* ========================================================== */
        !           169:
        !           170: int
        !           171: double_cmp_ptr(double *x, double *y) { return *x - *y; }
        !           172:
        !           173: double
        !           174: time_fun(speed_function_t fun, speed_param *s)
        !           175: {
        !           176: #define TOLERANCE 1.005 /* 0.5% */
        !           177:   gpmem_t av = avma;
        !           178:   double t[30];
        !           179:   long i,j,e;
        !           180:
        !           181:
        !           182:   s->x = rand_g(s->size, s->type);
        !           183:   s->y = rand_g(s->size, s->type);
        !           184:   s->reps = 1;
        !           185:   for (i = 0; i < numberof(t); i++)
        !           186:   {
        !           187:     for (;;)
        !           188:     {
        !           189:       double reps_d;
        !           190:       t[i] = fun(s);
        !           191:
        !           192:       if (!t[i]) { s->reps *= 10; continue; }
        !           193:
        !           194:       if (t[i] >= speed_unittime * speed_precision) break;
        !           195:
        !           196:
        !           197:       /* go to a value of reps to make t[i] >= precision */
        !           198:       reps_d = ceil (1.1 * s->reps
        !           199:                      * speed_unittime * speed_precision
        !           200:                      / max(t[i], speed_unittime));
        !           201:       if (reps_d > 2e9 || reps_d < 1.0)
        !           202:         err(talker, "Fatal error: new reps bad: %.2f\n", reps_d);
        !           203:
        !           204:       s->reps = (ulong)reps_d;
        !           205:     }
        !           206:
        !           207:     t[i] /= s->reps;
        !           208:
        !           209:     /* require 3 values within TOLERANCE when >= 2 secs, 4 when below */
        !           210:     e = (t[0] >= 2.0)? 3: 4;
        !           211:
        !           212:    /* Look for e many t[]'s within TOLERANCE of each other to consider a
        !           213:       valid measurement.  Return smallest among them.  */
        !           214:     if (i >= e)
        !           215:     {
        !           216:       qsort (t, i+1, sizeof(t[0]), (QSCOMP)double_cmp_ptr);
        !           217:       for (j = e-1; j < i; j++)
        !           218:         if (t[j] <= t[j-e+1] * TOLERANCE) { avma = av; return t[j-e+1]; }
        !           219:     }
        !           220:   }
        !           221:   err(talker,"couldn't measure time");
        !           222:   return -1.0; /* not reached */
        !           223: }
        !           224:
        !           225: struct dat_t {
        !           226:   long size;
        !           227:   double d;
        !           228: } *dat = NULL;
        !           229: int ndat = 0;
        !           230: int allocdat = 0;
        !           231:
        !           232: void
        !           233: add_dat(long size, double d)
        !           234: {
        !           235:   if (ndat == allocdat)
        !           236:   {
        !           237:     allocdat += max(allocdat, 100);
        !           238:     dat = (struct dat_t*) gprealloc((void*)dat, allocdat * sizeof(dat[0]));
        !           239:   }
        !           240:   dat[ndat].size = size;
        !           241:   dat[ndat].d    = d;
        !           242:   ndat++;
        !           243: }
        !           244:
        !           245: long
        !           246: analyze_dat(int final)
        !           247: {
        !           248:   double  x, min_x;
        !           249:   int     j, min_j;
        !           250:
        !           251:   /* If the threshold is set at dat[0].size, any positive values are bad. */
        !           252:   x = 0.0;
        !           253:   for (j = 0; j < ndat; j++)
        !           254:     if (dat[j].d > 0.0) x += dat[j].d;
        !           255:
        !           256:   if (final && option_trace >= 3)
        !           257:   {
        !           258:     printf("\n");
        !           259:     printf("x is the sum of the badness from setting thresh at given size\n");
        !           260:     printf("  (minimum x is sought)\n");
        !           261:     printf("size=%ld  first x=%.4f\n", dat[j].size, x);
        !           262:   }
        !           263:
        !           264:   min_x = x;
        !           265:   min_j = 0;
        !           266:
        !           267:   /* When stepping to the next dat[j].size, positive values are no longer
        !           268:      bad (so subtracted), negative values become bad (so add the absolute
        !           269:      value, meaning subtract). */
        !           270:   for (j = 0; j < ndat; j++)
        !           271:   {
        !           272:     if (final && option_trace >= 3)
        !           273:       printf ("size=%ld  x=%.4f\n", dat[j].size, x);
        !           274:
        !           275:     if (x < min_x) { min_x = x; min_j = j; }
        !           276:     x -= dat[j].d;
        !           277:   }
        !           278:
        !           279:   return min_j;
        !           280: }
        !           281:
        !           282: void
        !           283: print_define(const char *name, long value)
        !           284: {
        !           285:   printf("#define %-25s  %5ld\n\n", name, value);
        !           286: }
        !           287:
        !           288: double
        !           289: one(tune_param *param)
        !           290: {
        !           291:   int  since_positive, since_thresh_change;
        !           292:   int  thresh_idx, new_thresh_idx;
        !           293:   speed_param s;
        !           294:
        !           295: #define DEFAULT(x,n)  if (! (param->x))  param->x = (n);
        !           296:   DEFAULT (function_fudge, 1.0);
        !           297:   DEFAULT (fun2, param->fun1);
        !           298:   DEFAULT (step_factor, 0.01);  /* small steps by default */
        !           299:   DEFAULT (stop_since_change, 80);
        !           300:   DEFAULT (stop_factor, 1.2);
        !           301:   DEFAULT (type, t_INT);
        !           302:
        !           303:   s.type = param->type;
        !           304:   s.size = param->min_size[0];
        !           305:   ndat = 0;
        !           306:   since_positive = 0;
        !           307:   since_thresh_change = 0;
        !           308:   thresh_idx = 0;
        !           309:   if (option_trace >= 1)
        !           310:     printf("Setting %s...\n", param->name[0]);
        !           311:   if (option_trace >= 2)
        !           312:   {
        !           313:     printf("             algorithm-A  algorithm-B   ratio  possible\n");
        !           314:     printf("              (seconds)    (seconds)    diff    thresh\n");
        !           315:   }
        !           316:
        !           317:   for (;
        !           318:         s.size < MAX_SIZE;
        !           319:         s.size += max((long)floor(s.size * param->step_factor), 1))
        !           320:   {
        !           321:     double t1,t2,d;
        !           322:     t1 = time_fun(param->fun1, &s);
        !           323:     t2 = time_fun(param->fun2, &s);
        !           324:     if (t2 >= t1)
        !           325:       d = (t2 - t1) / t2; /* <= 0 */
        !           326:     else
        !           327:       d = (t2 - t1) / t1; /* > 0  */
        !           328:
        !           329:     add_dat(s.size, d);
        !           330:     new_thresh_idx = analyze_dat(0);
        !           331:
        !           332:     if (option_trace >= 2)
        !           333:     {
        !           334:       printf ("size =%3ld    %.9f  %.9f  % .4f %c  %ld\n",
        !           335:                s.size, t1, t2, d,
        !           336:                t1 > t2 ? '#' : ' ', dat[new_thresh_idx].size);
        !           337:     }
        !           338:
        !           339:     /* Stop if the last time method 1 was faster was more than a
        !           340:         certain number of measurements ago.  */
        !           341: #define STOP_SINCE_POSITIVE 100
        !           342:     if (d >= 0)
        !           343:       since_positive = 0;
        !           344:     else
        !           345:       if (++since_positive > STOP_SINCE_POSITIVE)
        !           346:       {
        !           347:         if (option_trace >= 1)
        !           348:           printf ("stopped due to since_positive (%d)\n",
        !           349:                   STOP_SINCE_POSITIVE);
        !           350:       }
        !           351:     /* Stop if method 1 has become slower by a certain factor. */
        !           352:     if (t1 >= t2 * param->stop_factor)
        !           353:     {
        !           354:       if (option_trace >= 1)
        !           355:         printf ("stopped due to t1 >= t2 * factor (%.1f)\n",
        !           356:                 param->stop_factor);
        !           357:       break;
        !           358:     }
        !           359:     /* Stop if the threshold implied hasn't changed in a certain
        !           360:        number of measurements. */
        !           361:     if (thresh_idx != new_thresh_idx)
        !           362:       since_thresh_change = 0, thresh_idx = new_thresh_idx;
        !           363:     else
        !           364:       if (++since_thresh_change > param->stop_since_change)
        !           365:       {
        !           366:         if (option_trace >= 1)
        !           367:           printf ("stopped due to since_thresh_change (%d)\n",
        !           368:                   param->stop_since_change);
        !           369:         break;
        !           370:       }
        !           371:   }
        !           372:   thresh_idx = dat[analyze_dat(1)].size;
        !           373:   print_define(param->name[0], thresh_idx);
        !           374:   return thresh_idx;
        !           375: }
        !           376:
        !           377: void error(int argc/* ignored */, char **argv)
        !           378: {
        !           379:   (void)argv;
        !           380:   err(talker, "usage: %s [-t] [-t] [-u unittime]", argv[0]);
        !           381: }
        !           382:
        !           383: int
        !           384: main(int argc, char **argv)
        !           385: {
        !           386:   int i, MONTGOMERY_LIMIT;
        !           387:   pari_init(4000000, 2);
        !           388:   for (i = 1; i < argc; i++)
        !           389:   {
        !           390:     char *s = argv[i];
        !           391:     if (*s++ != '-') error(argc,argv);
        !           392:     switch(*s) {
        !           393:       case 't': option_trace++; continue;
        !           394:       case 'u': s++;
        !           395:         if (!*s)
        !           396:         {
        !           397:           if (++i == argc) error(argc,argv);
        !           398:           s = argv[i];
        !           399:         }
        !           400:         speed_unittime = atof(s);
        !           401:         break;
        !           402:       default: error(argc,argv);
        !           403:     }
        !           404:   }
        !           405:
        !           406:   { static tune_param param;
        !           407:     param.name[0] = "KARATSUBA_MULI_LIMIT";
        !           408:     param.min_size[0] = 4;
        !           409:     param.fun1 = &speed_mulii;
        !           410:     param.fun2 = &speed_karamulii;
        !           411:     (void)one(&param);
        !           412:   }
        !           413:   { static tune_param param;
        !           414:     param.name[0] = "KARATSUBA_SQRI_LIMIT";
        !           415:     param.min_size[0] = 4;
        !           416:     param.fun1 = (speed_function_t)&speed_sqri;
        !           417:     param.fun2 = (speed_function_t)&speed_karasqri;
        !           418:     (void)one(&param);
        !           419:   }
        !           420:   { static tune_param param;
        !           421:     param.name[0] = "MONTGOMERY_LIMIT";
        !           422:     param.min_size[0] = 1;
        !           423:     param.fun1 = &speed_redc;
        !           424:     param.fun2 = &speed_modii;
        !           425:     MONTGOMERY_LIMIT = one(&param);
        !           426:   }
        !           427:   { static tune_param param;
        !           428:     param.name[0] = "RESIIMUL_LIMIT";
        !           429:     param.min_size[0] = MONTGOMERY_LIMIT;
        !           430:     param.fun1 = &speed_modii;
        !           431:     param.fun2 = &speed_resiimul;
        !           432:     (void)one(&param);
        !           433:   }
        !           434:
        !           435:   if (dat) free((void*)dat);
        !           436:   return 1;
        !           437: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>