Annotation of OpenXM_contrib/pari-2.2/src/test/tune.c, Revision 1.1
1.1 ! noro 1: /* $Id: tune.c,v 1.7 2002/06/09 00:36:54 karim Exp $
! 2:
! 3: Copyright (C) 2001 The PARI group.
! 4:
! 5: This file is part of the PARI/GP package.
! 6:
! 7: PARI/GP is free software; you can redistribute it and/or modify it under the
! 8: terms of the GNU General Public License as published by the Free Software
! 9: Foundation. It is distributed in the hope that it will be useful, but WITHOUT
! 10: ANY WARRANTY WHATSOEVER.
! 11:
! 12: Check the License for details. You should have received a copy of it, along
! 13: with the package; see the file 'COPYING'. If not, write to the Free Software
! 14: Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
! 15:
! 16: /* This file is a quick hack adapted from gmp-4.0 tuning utilities
! 17: * (T. Granlund et al.)
! 18: *
! 19: * (GMU MP Library is Copyright Free Software Foundation, Inc.) */
! 20: #include <pari.h>
! 21:
! 22: /* you might wish to tweak the Makefile so as to use GMP cycle counting
! 23: * functions (look for GMP in Oxxx/Makefile) */
! 24:
! 25: #define numberof(x) (sizeof(x) / sizeof((x)[0]))
! 26:
! 27: static int option_trace = 0;
! 28:
! 29: typedef struct {
! 30: ulong reps, size, type;
! 31: GEN x, y;
! 32: } speed_param;
! 33:
! 34: typedef double (*speed_function_t)(ANYARG);
! 35:
! 36: #define MAX_TABLE 2
! 37: #define MAX_SIZE 1000
! 38:
! 39: typedef struct {
! 40: const char *name[MAX_TABLE];
! 41: speed_function_t fun1;
! 42: speed_function_t fun2;
! 43: double step_factor; /* how much to step sizes (rounded down) */
! 44: double function_fudge; /* multiplier for "function" speeds */
! 45: int stop_since_change;
! 46: double stop_factor;
! 47: long min_size[MAX_TABLE];
! 48: long max_size[MAX_TABLE];
! 49: int type; /* t_INT or t_REAL */
! 50: } tune_param;
! 51:
! 52: /* ========================================================== */
! 53: #ifdef GMP_TIMER
! 54: /* needed to link with gmp-4.0/tune/{time,freq}.o */
! 55: int speed_option_verbose = 0;
! 56: extern double speed_unittime;
! 57: extern int speed_precision;
! 58: void speed_starttime(void);
! 59: double speed_endtime(void);
! 60: #else
! 61: static pari_timer __T;
! 62: static double speed_unittime = 1e-4;
! 63: static int speed_precision= 1000;
! 64: static void speed_starttime() { (void)TIMER(&__T); }
! 65: static double speed_endtime() { return (double)TIMER(&__T)/1000.; }
! 66: #endif
! 67:
! 68: /* ========================================================== */
! 69:
! 70: /* random int, n words */
! 71: static GEN
! 72: rand_INT(long n)
! 73: {
! 74: gpmem_t av = avma;
! 75: GEN x, N = shifti(gun, n*BITS_IN_LONG);
! 76: if (n <= 0) err(talker,"n too small in rand_INT (%ld)", n);
! 77: do
! 78: x = genrand(N);
! 79: while (lgefint(x) != n+2);
! 80: return gerepileuptoint(av, x);
! 81: }
! 82:
! 83: static GEN
! 84: rand_REAL(long n)
! 85: {
! 86: GEN r = cgetr(n+2);
! 87: gpmem_t av = avma;
! 88: GEN x = rand_INT(n);
! 89: affir(x,r); avma = av; return r;
! 90: }
! 91:
! 92: static GEN
! 93: rand_g(long n, long type)
! 94: {
! 95: return (type == t_INT)? rand_INT(n): rand_REAL(n);
! 96: }
! 97:
! 98: /* ========================================================== */
! 99:
! 100: #define TIME_FUN(call) {\
! 101: { \
! 102: gpmem_t av = avma; \
! 103: int i; \
! 104: speed_starttime(); \
! 105: i = (s)->reps; \
! 106: do { call; avma = av; } while (--i != 0); \
! 107: } \
! 108: return speed_endtime(); \
! 109: }
! 110:
! 111: extern void setsqri(long a);
! 112: extern void setmuli(long a);
! 113: #define disable_Karatsuba(s) (setmuli(lg(s->x)), setsqri(lg(s->x)))
! 114: #define enable_Karatsuba(s) (setmuli(lg(s->x)-2), setsqri(lg(s->x)-2))
! 115: #define MULFUN(call, s) {\
! 116: disable_Karatsuba(s); \
! 117: TIME_FUN(call(s->x, s->y)); }
! 118:
! 119: #define SQRFUN(call, s) {\
! 120: disable_Karatsuba(s); \
! 121: TIME_FUN(call(s->x)); }
! 122:
! 123: #define KARAMULFUN(call, s) {\
! 124: enable_Karatsuba(s); \
! 125: TIME_FUN(call(s->x, s->y)); }
! 126:
! 127: #define KARASQRFUN(call, s) {\
! 128: enable_Karatsuba(s); \
! 129: TIME_FUN(call(s->x)); }
! 130:
! 131: static double speed_mulii(speed_param *s) { MULFUN(mulii, s); }
! 132: static double speed_sqri (speed_param *s) { SQRFUN( sqri, s); }
! 133: static double speed_karamulii(speed_param *s) { KARAMULFUN(mulii, s); }
! 134: static double speed_karasqri (speed_param *s) { KARASQRFUN( sqri, s); }
! 135:
! 136: extern GEN init_remainder(GEN M);
! 137: extern ulong invrev(ulong b);
! 138: extern GEN red_montgomery(GEN T, GEN N, ulong inv);
! 139: extern GEN resiimul(GEN x, GEN sy);
! 140:
! 141: #define INIT_RED(s, op) \
! 142: long i, lx = lg(s->x); \
! 143: op = cgeti(2*lx - 2); \
! 144: op[1] = evallgefint(2*lx - 2) | evalsigne(1); \
! 145: for (i=2; i<lx; i++) op[i] = s->x[i]; \
! 146: for (i=2; i<lx; i++) op[lx-2+i] = s->x[i]; \
! 147: modBIL(s->y) |= 1; /* make sure modulus is odd */
! 148:
! 149: static double speed_redc(speed_param *s) {
! 150: ulong inv;
! 151: GEN op;
! 152: INIT_RED(s, op);
! 153: inv = (ulong) -invrev(modBIL(s->y));
! 154: TIME_FUN( red_montgomery(op, s->y, inv) );
! 155: };
! 156: static double speed_modii(speed_param *s) {
! 157: GEN op;
! 158: INIT_RED(s, op);
! 159: TIME_FUN( resii(op, s->y) );
! 160: };
! 161: static double speed_resiimul(speed_param *s) {
! 162: GEN op, sM;
! 163: INIT_RED(s, op);
! 164: sM = init_remainder(s->y);
! 165: TIME_FUN( resiimul(op, sM) );
! 166: }
! 167:
! 168: /* ========================================================== */
! 169:
! 170: int
! 171: double_cmp_ptr(double *x, double *y) { return *x - *y; }
! 172:
! 173: double
! 174: time_fun(speed_function_t fun, speed_param *s)
! 175: {
! 176: #define TOLERANCE 1.005 /* 0.5% */
! 177: gpmem_t av = avma;
! 178: double t[30];
! 179: long i,j,e;
! 180:
! 181:
! 182: s->x = rand_g(s->size, s->type);
! 183: s->y = rand_g(s->size, s->type);
! 184: s->reps = 1;
! 185: for (i = 0; i < numberof(t); i++)
! 186: {
! 187: for (;;)
! 188: {
! 189: double reps_d;
! 190: t[i] = fun(s);
! 191:
! 192: if (!t[i]) { s->reps *= 10; continue; }
! 193:
! 194: if (t[i] >= speed_unittime * speed_precision) break;
! 195:
! 196:
! 197: /* go to a value of reps to make t[i] >= precision */
! 198: reps_d = ceil (1.1 * s->reps
! 199: * speed_unittime * speed_precision
! 200: / max(t[i], speed_unittime));
! 201: if (reps_d > 2e9 || reps_d < 1.0)
! 202: err(talker, "Fatal error: new reps bad: %.2f\n", reps_d);
! 203:
! 204: s->reps = (ulong)reps_d;
! 205: }
! 206:
! 207: t[i] /= s->reps;
! 208:
! 209: /* require 3 values within TOLERANCE when >= 2 secs, 4 when below */
! 210: e = (t[0] >= 2.0)? 3: 4;
! 211:
! 212: /* Look for e many t[]'s within TOLERANCE of each other to consider a
! 213: valid measurement. Return smallest among them. */
! 214: if (i >= e)
! 215: {
! 216: qsort (t, i+1, sizeof(t[0]), (QSCOMP)double_cmp_ptr);
! 217: for (j = e-1; j < i; j++)
! 218: if (t[j] <= t[j-e+1] * TOLERANCE) { avma = av; return t[j-e+1]; }
! 219: }
! 220: }
! 221: err(talker,"couldn't measure time");
! 222: return -1.0; /* not reached */
! 223: }
! 224:
! 225: struct dat_t {
! 226: long size;
! 227: double d;
! 228: } *dat = NULL;
! 229: int ndat = 0;
! 230: int allocdat = 0;
! 231:
! 232: void
! 233: add_dat(long size, double d)
! 234: {
! 235: if (ndat == allocdat)
! 236: {
! 237: allocdat += max(allocdat, 100);
! 238: dat = (struct dat_t*) gprealloc((void*)dat, allocdat * sizeof(dat[0]));
! 239: }
! 240: dat[ndat].size = size;
! 241: dat[ndat].d = d;
! 242: ndat++;
! 243: }
! 244:
! 245: long
! 246: analyze_dat(int final)
! 247: {
! 248: double x, min_x;
! 249: int j, min_j;
! 250:
! 251: /* If the threshold is set at dat[0].size, any positive values are bad. */
! 252: x = 0.0;
! 253: for (j = 0; j < ndat; j++)
! 254: if (dat[j].d > 0.0) x += dat[j].d;
! 255:
! 256: if (final && option_trace >= 3)
! 257: {
! 258: printf("\n");
! 259: printf("x is the sum of the badness from setting thresh at given size\n");
! 260: printf(" (minimum x is sought)\n");
! 261: printf("size=%ld first x=%.4f\n", dat[j].size, x);
! 262: }
! 263:
! 264: min_x = x;
! 265: min_j = 0;
! 266:
! 267: /* When stepping to the next dat[j].size, positive values are no longer
! 268: bad (so subtracted), negative values become bad (so add the absolute
! 269: value, meaning subtract). */
! 270: for (j = 0; j < ndat; j++)
! 271: {
! 272: if (final && option_trace >= 3)
! 273: printf ("size=%ld x=%.4f\n", dat[j].size, x);
! 274:
! 275: if (x < min_x) { min_x = x; min_j = j; }
! 276: x -= dat[j].d;
! 277: }
! 278:
! 279: return min_j;
! 280: }
! 281:
! 282: void
! 283: print_define(const char *name, long value)
! 284: {
! 285: printf("#define %-25s %5ld\n\n", name, value);
! 286: }
! 287:
! 288: double
! 289: one(tune_param *param)
! 290: {
! 291: int since_positive, since_thresh_change;
! 292: int thresh_idx, new_thresh_idx;
! 293: speed_param s;
! 294:
! 295: #define DEFAULT(x,n) if (! (param->x)) param->x = (n);
! 296: DEFAULT (function_fudge, 1.0);
! 297: DEFAULT (fun2, param->fun1);
! 298: DEFAULT (step_factor, 0.01); /* small steps by default */
! 299: DEFAULT (stop_since_change, 80);
! 300: DEFAULT (stop_factor, 1.2);
! 301: DEFAULT (type, t_INT);
! 302:
! 303: s.type = param->type;
! 304: s.size = param->min_size[0];
! 305: ndat = 0;
! 306: since_positive = 0;
! 307: since_thresh_change = 0;
! 308: thresh_idx = 0;
! 309: if (option_trace >= 1)
! 310: printf("Setting %s...\n", param->name[0]);
! 311: if (option_trace >= 2)
! 312: {
! 313: printf(" algorithm-A algorithm-B ratio possible\n");
! 314: printf(" (seconds) (seconds) diff thresh\n");
! 315: }
! 316:
! 317: for (;
! 318: s.size < MAX_SIZE;
! 319: s.size += max((long)floor(s.size * param->step_factor), 1))
! 320: {
! 321: double t1,t2,d;
! 322: t1 = time_fun(param->fun1, &s);
! 323: t2 = time_fun(param->fun2, &s);
! 324: if (t2 >= t1)
! 325: d = (t2 - t1) / t2; /* <= 0 */
! 326: else
! 327: d = (t2 - t1) / t1; /* > 0 */
! 328:
! 329: add_dat(s.size, d);
! 330: new_thresh_idx = analyze_dat(0);
! 331:
! 332: if (option_trace >= 2)
! 333: {
! 334: printf ("size =%3ld %.9f %.9f % .4f %c %ld\n",
! 335: s.size, t1, t2, d,
! 336: t1 > t2 ? '#' : ' ', dat[new_thresh_idx].size);
! 337: }
! 338:
! 339: /* Stop if the last time method 1 was faster was more than a
! 340: certain number of measurements ago. */
! 341: #define STOP_SINCE_POSITIVE 100
! 342: if (d >= 0)
! 343: since_positive = 0;
! 344: else
! 345: if (++since_positive > STOP_SINCE_POSITIVE)
! 346: {
! 347: if (option_trace >= 1)
! 348: printf ("stopped due to since_positive (%d)\n",
! 349: STOP_SINCE_POSITIVE);
! 350: }
! 351: /* Stop if method 1 has become slower by a certain factor. */
! 352: if (t1 >= t2 * param->stop_factor)
! 353: {
! 354: if (option_trace >= 1)
! 355: printf ("stopped due to t1 >= t2 * factor (%.1f)\n",
! 356: param->stop_factor);
! 357: break;
! 358: }
! 359: /* Stop if the threshold implied hasn't changed in a certain
! 360: number of measurements. */
! 361: if (thresh_idx != new_thresh_idx)
! 362: since_thresh_change = 0, thresh_idx = new_thresh_idx;
! 363: else
! 364: if (++since_thresh_change > param->stop_since_change)
! 365: {
! 366: if (option_trace >= 1)
! 367: printf ("stopped due to since_thresh_change (%d)\n",
! 368: param->stop_since_change);
! 369: break;
! 370: }
! 371: }
! 372: thresh_idx = dat[analyze_dat(1)].size;
! 373: print_define(param->name[0], thresh_idx);
! 374: return thresh_idx;
! 375: }
! 376:
! 377: void error(int argc/* ignored */, char **argv)
! 378: {
! 379: (void)argv;
! 380: err(talker, "usage: %s [-t] [-t] [-u unittime]", argv[0]);
! 381: }
! 382:
! 383: int
! 384: main(int argc, char **argv)
! 385: {
! 386: int i, MONTGOMERY_LIMIT;
! 387: pari_init(4000000, 2);
! 388: for (i = 1; i < argc; i++)
! 389: {
! 390: char *s = argv[i];
! 391: if (*s++ != '-') error(argc,argv);
! 392: switch(*s) {
! 393: case 't': option_trace++; continue;
! 394: case 'u': s++;
! 395: if (!*s)
! 396: {
! 397: if (++i == argc) error(argc,argv);
! 398: s = argv[i];
! 399: }
! 400: speed_unittime = atof(s);
! 401: break;
! 402: default: error(argc,argv);
! 403: }
! 404: }
! 405:
! 406: { static tune_param param;
! 407: param.name[0] = "KARATSUBA_MULI_LIMIT";
! 408: param.min_size[0] = 4;
! 409: param.fun1 = &speed_mulii;
! 410: param.fun2 = &speed_karamulii;
! 411: (void)one(¶m);
! 412: }
! 413: { static tune_param param;
! 414: param.name[0] = "KARATSUBA_SQRI_LIMIT";
! 415: param.min_size[0] = 4;
! 416: param.fun1 = (speed_function_t)&speed_sqri;
! 417: param.fun2 = (speed_function_t)&speed_karasqri;
! 418: (void)one(¶m);
! 419: }
! 420: { static tune_param param;
! 421: param.name[0] = "MONTGOMERY_LIMIT";
! 422: param.min_size[0] = 1;
! 423: param.fun1 = &speed_redc;
! 424: param.fun2 = &speed_modii;
! 425: MONTGOMERY_LIMIT = one(¶m);
! 426: }
! 427: { static tune_param param;
! 428: param.name[0] = "RESIIMUL_LIMIT";
! 429: param.min_size[0] = MONTGOMERY_LIMIT;
! 430: param.fun1 = &speed_modii;
! 431: param.fun2 = &speed_resiimul;
! 432: (void)one(¶m);
! 433: }
! 434:
! 435: if (dat) free((void*)dat);
! 436: return 1;
! 437: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>