Annotation of OpenXM_contrib/gmp/tune/time.c, Revision 1.1.1.2
1.1.1.2 ! ohara 1: /* Time routines for speed measurments.
1.1 maekawa 2:
1.1.1.2 ! ohara 3: Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1 maekawa 4:
5: This file is part of the GNU MP Library.
6:
7: The GNU MP Library is free software; you can redistribute it and/or modify
8: it under the terms of the GNU Lesser General Public License as published by
9: the Free Software Foundation; either version 2.1 of the License, or (at your
10: option) any later version.
11:
12: The GNU MP Library is distributed in the hope that it will be useful, but
13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15: License for more details.
16:
17: You should have received a copy of the GNU Lesser General Public License
18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
1.1.1.2 ! ohara 20: MA 02111-1307, USA. */
! 21:
1.1 maekawa 22:
23: /* speed_time_init() - initialize timing things. speed_starttime() calls
24: this if it hasn't been done yet, so you only need to call this explicitly
25: if you want to use the global variables before the first measurement.
26:
27: speed_starttime() - start a time measurment.
28:
1.1.1.2 ! ohara 29: speed_endtime() - end a time measurment, return time taken (seconds or
! 30: cycles).
! 31:
! 32: speed_time_string - a string describing the time method in use.
1.1 maekawa 33:
34: speed_unittime - global variable with the unit of time measurement
1.1.1.2 ! ohara 35: accuracy (seconds or cycles).
1.1 maekawa 36:
37: speed_precision - global variable which is the intended accuracy of time
38: measurements. speed_measure() for instance runs target routines with
1.1.1.2 ! ohara 39: enough repetitions so it takes at least speed_unittime*speed_precision.
! 40: A program can provide an option so the user can set this, otherwise it
! 41: gets a default based on the measuring method chosen.
1.1 maekawa 42:
43: speed_cycletime - the time in seconds for each CPU cycle, for example on
1.1.1.2 ! ohara 44: a 100 MHz CPU this would be 1.0e-8. If the CPU frequency is unknown this
! 45: is 0.0 if the time base is in seconds, or 1.0 if it's in cycles.
1.1 maekawa 46:
47:
1.1.1.2 ! ohara 48: speed_endtime() and speed_unittime are normally in seconds, but if a
! 49: cycle counter is being used to measure and the CPU frequency is unknown,
! 50: then speed_endtime() returns cycles and speed_cycletime and
! 51: speed_unittime are 1.0.
! 52:
! 53: Notice that speed_unittime*speed_precision is the target duration for
! 54: speed_endtime(), irrespective of whether that's in seconds or cycles.
! 55:
! 56: Call speed_cycletime_need_seconds() to demand that speed_endtime() is in
! 57: seconds and not perhaps in cycles.
! 58:
! 59: Call speed_cycletime_need_cycles() to demand that speed_cycletime is
! 60: non-zero, so that speed_endtime()/speed_cycletime will work to give times
! 61: in cycles.
! 62:
! 63:
! 64: Notes:
! 65:
! 66: Various combinations of cycle counter, read_real_time(), getrusage(),
! 67: gettimeofday() and times() can arise, according to which are available
! 68: and their precision.
1.1 maekawa 69:
70:
1.1.1.2 ! ohara 71: Allowing speed_endtime() to return either seconds or cycles is only a
! 72: slight complication and makes it possible for the speed program to do
! 73: some sensible things without demanding the CPU frequency. If seconds are
! 74: being measured then it can always print seconds, and if cycles are being
! 75: measured then it can always print them without needing to know how long
! 76: they are. Also the tune program doesn't care at all what the units are.
1.1 maekawa 77:
1.1.1.2 ! ohara 78: GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
! 79: fail. This will be needed if times in seconds are wanted but a cycle
! 80: counter is being used, or if times in cycles are wanted but getrusage or
! 81: another seconds based timer is in use.
1.1 maekawa 82:
1.1.1.2 ! ohara 83: If the measuring method uses a cycle counter but supplements it with
! 84: getrusage or the like, then knowing the CPU frequency is mandatory since
! 85: the code compares values from the two.
1.1 maekawa 86:
87:
1.1.1.2 ! ohara 88: Solaris gethrtime() seems no more than a slow way to access the Sparc V9
! 89: cycle counter. gethrvtime() seems to be relevant only to LWP, it doesn't
! 90: for instance give nanosecond virtual time. So neither of these are used.
! 91:
! 92:
! 93: Bugs:
! 94:
! 95: getrusage_microseconds_p is fundamentally flawed, getrusage and
! 96: gettimeofday can have resolutions other than clock ticks or microseconds,
! 97: for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
! 98:
! 99: Enhancements:
! 100:
! 101: The SGI hardware counter has 64 bits on some machines, which could be
! 102: used when available. But perhaps 32 bits is enough range, and then rely
! 103: on the getrusage supplement.
! 104:
! 105: Maybe getrusage (or times) should be used as a supplement for any
! 106: wall-clock measuring method. Currently a wall clock with a good range
! 107: (eg. a 64-bit cycle counter) is used without a supplement.
! 108:
! 109: On PowerPC the timebase registers could be used, but would have to do
! 110: something to find out the speed. On 6xx chips it's normally 1/4 bus
! 111: speed, on 4xx chips it's wither that or an external clock. Measuring
! 112: against gettimeofday might be ok. */
! 113:
! 114:
! 115: #include "config.h"
! 116:
! 117: #include <errno.h>
! 118: #include <setjmp.h>
! 119: #include <signal.h>
! 120: #include <stddef.h>
1.1 maekawa 121: #include <stdio.h>
1.1.1.2 ! ohara 122: #include <string.h>
! 123: #include <stdlib.h> /* for getenv() */
! 124:
! 125: #if HAVE_FCNTL_H
! 126: #include <fcntl.h> /* for open() */
! 127: #endif
! 128:
! 129: #if HAVE_STDINT_H
! 130: #include <stdint.h> /* for uint64_t */
! 131: #endif
! 132:
1.1 maekawa 133: #if HAVE_UNISTD_H
1.1.1.2 ! ohara 134: #include <unistd.h> /* for sysconf() */
1.1 maekawa 135: #endif
136:
137: #include <sys/types.h>
1.1.1.2 ! ohara 138:
! 139: #if TIME_WITH_SYS_TIME
! 140: # include <sys/time.h> /* for struct timeval */
! 141: # include <time.h>
! 142: #else
! 143: # if HAVE_SYS_TIME_H
! 144: # include <sys/time.h>
! 145: # else
! 146: # include <time.h>
! 147: # endif
! 148: #endif
! 149:
! 150: #if HAVE_SYS_MMAN_H
! 151: #include <sys/mman.h> /* for mmap() */
! 152: #endif
! 153:
! 154: #if HAVE_SYS_RESOURCE_H
! 155: #include <sys/resource.h> /* for struct rusage */
! 156: #endif
! 157:
! 158: #if HAVE_SYS_SYSSGI_H
! 159: #include <sys/syssgi.h> /* for syssgi() */
! 160: #endif
! 161:
! 162: #if HAVE_SYS_SYSTEMCFG_H
! 163: #include <sys/systemcfg.h> /* for RTC_POWER on AIX */
! 164: #endif
! 165:
! 166: #if HAVE_SYS_TIMES_H
! 167: #include <sys/times.h> /* for times() and struct tms */
1.1 maekawa 168: #endif
169:
170: #include "gmp.h"
171: #include "gmp-impl.h"
172:
173: #include "speed.h"
174:
175:
1.1.1.2 ! ohara 176:
! 177: char speed_time_string[256];
! 178: int speed_precision = 0;
! 179: double speed_unittime;
! 180: double speed_cycletime = 0.0;
! 181:
! 182:
! 183: /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
! 184: native cc */
! 185: #define M_2POWU (((double) INT_MAX + 1.0) * 2.0)
! 186:
! 187: #define M_2POW32 4294967296.0
! 188: #define M_2POW64 (M_2POW32 * M_2POW32)
! 189:
! 190:
! 191: /* Conditionals for the time functions available are done with normal C
! 192: code, which is a lot easier than wildly nested preprocessor directives.
! 193:
! 194: The choice of what to use is partly made at run-time, according to
! 195: whether the cycle counter works and the measured accuracy of getrusage
! 196: and gettimeofday.
! 197:
! 198: A routine that's not available won't be getting called, but is an abort()
! 199: to be sure it isn't called mistakenly.
! 200:
! 201: It can be assumed that if a function exists then its data type will, but
! 202: if the function doesn't then the data type might or might not exist, so
! 203: the type can't be used unconditionally. The "struct_rusage" etc macros
! 204: provide dummies when the respective function doesn't exist. */
! 205:
! 206:
1.1 maekawa 207: #if HAVE_SPEED_CYCLECOUNTER
1.1.1.2 ! ohara 208: static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
1.1 maekawa 209: #else
1.1.1.2 ! ohara 210: static const int have_cycles = 0;
! 211: #define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available)
1.1 maekawa 212: #endif
213:
1.1.1.2 ! ohara 214: /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
! 215: microseconds. Same #ifdefs here as in longlong.h. */
! 216: #if defined (__GNUC__) && ! defined (NO_ASM) \
! 217: && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
! 218: static const int have_stck = 1;
! 219: static const int use_stck = 1; /* always use when available */
! 220: typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */
! 221: #define STCK(timestamp) \
! 222: do { \
! 223: asm ("stck %0" : "=m" (timestamp)); \
! 224: } while (0)
! 225: #else
! 226: static const int have_stck = 0;
! 227: static const int use_stck = 0;
! 228: typedef unsigned long stck_t; /* dummy */
! 229: #define STCK(timestamp) ASSERT_FAIL (stck instruction not available)
! 230: #endif
! 231: #define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */
1.1 maekawa 232:
1.1.1.2 ! ohara 233: /* Unicos 10.X has syssgi(), but not mmap(). */
! 234: #if HAVE_SYSSGI && HAVE_MMAP
! 235: static const int have_sgi = 1;
! 236: #else
! 237: static const int have_sgi = 0;
! 238: #endif
1.1 maekawa 239:
1.1.1.2 ! ohara 240: #if HAVE_READ_REAL_TIME
! 241: static const int have_rrt = 1;
! 242: #else
! 243: static const int have_rrt = 0;
! 244: #define read_real_time(t,s) ASSERT_FAIL (read_real_time not available)
! 245: #define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available)
! 246: #define RTC_POWER 1
! 247: #define RTC_POWER_PC 2
! 248: #define timebasestruct_t struct timebasestruct_dummy
! 249: struct timebasestruct_dummy {
! 250: int flag;
! 251: unsigned int tb_high;
! 252: unsigned int tb_low;
! 253: };
! 254: #endif
1.1 maekawa 255:
1.1.1.2 ! ohara 256: #if HAVE_CLOCK_GETTIME
! 257: static const int have_cgt = 1;
! 258: #define struct_timespec struct timespec
! 259: #else
! 260: static const int have_cgt = 0;
! 261: #define struct_timespec struct timespec_dummy
! 262: #define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1)
! 263: #define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1)
! 264: #endif
! 265:
! 266: #if HAVE_GETRUSAGE
! 267: static const int have_grus = 1;
! 268: #define struct_rusage struct rusage
! 269: #else
! 270: static const int have_grus = 0;
! 271: #define getrusage(n,ru) ASSERT_FAIL (getrusage not available)
! 272: #define struct_rusage struct rusage_dummy
! 273: #endif
! 274:
! 275: #if HAVE_GETTIMEOFDAY
! 276: static const int have_gtod = 1;
! 277: #define struct_timeval struct timeval
! 278: #else
! 279: static const int have_gtod = 0;
! 280: #define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available)
! 281: #define struct_timeval struct timeval_dummy
! 282: #endif
! 283:
! 284: #if HAVE_TIMES
! 285: static const int have_times = 1;
! 286: #define struct_tms struct tms
! 287: #else
! 288: static const int have_times = 0;
! 289: #define times(tms) ASSERT_FAIL (times not available)
! 290: #define struct_tms struct tms_dummy
! 291: #endif
! 292:
! 293: struct tms_dummy {
! 294: long tms_utime;
! 295: };
! 296: struct timeval_dummy {
! 297: long tv_sec;
! 298: long tv_usec;
! 299: };
! 300: struct rusage_dummy {
! 301: struct_timeval ru_utime;
! 302: };
! 303: struct timespec_dummy {
! 304: long tv_sec;
! 305: long tv_nsec;
! 306: };
! 307:
! 308: static int use_cycles;
! 309: static int use_sgi;
! 310: static int use_rrt;
! 311: static int use_cgt;
! 312: static int use_gtod;
! 313: static int use_grus;
! 314: static int use_times;
! 315: static int use_tick_boundary;
! 316:
! 317: static unsigned start_cycles[2];
! 318: static stck_t start_stck;
! 319: static unsigned start_sgi;
! 320: static timebasestruct_t start_rrt;
! 321: static struct_timespec start_cgt;
! 322: static struct_rusage start_grus;
! 323: static struct_timeval start_gtod;
! 324: static struct_tms start_times;
! 325:
! 326: static double cycles_limit = 1e100;
! 327: static double sgi_unittime;
! 328: static double cgt_unittime;
! 329: static double grus_unittime;
! 330: static double gtod_unittime;
! 331: static double times_unittime;
! 332:
! 333: /* for RTC_POWER format, ie. seconds and nanoseconds */
! 334: #define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9)
! 335:
! 336:
! 337: /* Return a string representing a time in seconds, nicely formatted.
! 338: Eg. "10.25ms". */
! 339: char *
! 340: unittime_string (double t)
1.1 maekawa 341: {
1.1.1.2 ! ohara 342: static char buf[128];
! 343:
! 344: const char *unit;
! 345: int prec;
1.1 maekawa 346:
1.1.1.2 ! ohara 347: /* choose units and scale */
! 348: if (t < 1e-6)
! 349: t *= 1e9, unit = "ns";
! 350: else if (t < 1e-3)
! 351: t *= 1e6, unit = "us";
! 352: else if (t < 1.0)
! 353: t *= 1e3, unit = "ms";
! 354: else
! 355: unit = "s";
! 356:
! 357: /* want 4 significant figures */
! 358: if (t < 1.0)
! 359: prec = 4;
! 360: else if (t < 10.0)
! 361: prec = 3;
! 362: else if (t < 100.0)
! 363: prec = 2;
! 364: else
! 365: prec = 1;
1.1 maekawa 366:
1.1.1.2 ! ohara 367: sprintf (buf, "%.*f%s", prec, t, unit);
! 368: return buf;
1.1 maekawa 369: }
370:
371:
1.1.1.2 ! ohara 372: static jmp_buf cycles_works_buf;
1.1 maekawa 373:
1.1.1.2 ! ohara 374: static RETSIGTYPE
! 375: cycles_works_handler (int sig)
! 376: {
! 377: longjmp (cycles_works_buf, 1);
! 378: }
1.1 maekawa 379:
380: int
1.1.1.2 ! ohara 381: cycles_works_p (void)
1.1 maekawa 382: {
1.1.1.2 ! ohara 383: static int result = -1;
! 384: RETSIGTYPE (*old_handler) _PROTO ((int));
! 385: unsigned cycles[2];
! 386:
! 387: /* suppress a warning about cycles[] unused */
! 388: cycles[0] = 0;
! 389:
! 390: if (result != -1)
! 391: goto done;
! 392:
! 393: #ifdef SIGILL
! 394: old_handler = signal (SIGILL, cycles_works_handler);
! 395: if (old_handler == SIG_ERR)
! 396: {
! 397: if (speed_option_verbose)
! 398: printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
! 399: goto yes;
! 400: }
! 401: if (setjmp (cycles_works_buf))
1.1 maekawa 402: {
1.1.1.2 ! ohara 403: if (speed_option_verbose)
! 404: printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
! 405: result = 0;
! 406: goto done;
1.1 maekawa 407: }
1.1.1.2 ! ohara 408: speed_cyclecounter (cycles);
! 409: signal (SIGILL, old_handler);
! 410: if (speed_option_verbose)
! 411: printf ("cycles_works_p(): speed_cyclecounter() works\n");
! 412: #else
1.1 maekawa 413:
1.1.1.2 ! ohara 414: if (speed_option_verbose)
! 415: printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
1.1 maekawa 416: #endif
417:
1.1.1.2 ! ohara 418: yes:
! 419: result = 1;
1.1 maekawa 420:
1.1.1.2 ! ohara 421: done:
! 422: return result;
! 423: }
1.1 maekawa 424:
425:
1.1.1.2 ! ohara 426: /* The number of clock ticks per second, but looking at sysconf rather than
! 427: just CLK_TCK, where possible. */
! 428: long
! 429: clk_tck (void)
1.1 maekawa 430: {
1.1.1.2 ! ohara 431: static long result = -1L;
! 432: if (result != -1L)
! 433: return result;
1.1 maekawa 434:
1.1.1.2 ! ohara 435: #if HAVE_SYSCONF
! 436: result = sysconf (_SC_CLK_TCK);
! 437: if (result != -1L)
1.1 maekawa 438: {
1.1.1.2 ! ohara 439: if (speed_option_verbose)
! 440: printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
! 441: return result;
1.1 maekawa 442: }
1.1.1.2 ! ohara 443:
! 444: fprintf (stderr,
! 445: "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
! 446: #endif
! 447:
! 448: #ifdef CLK_TCK
! 449: result = CLK_TCK;
! 450: if (speed_option_verbose)
! 451: printf ("CLK_TCK is %ld per second\n", result);
! 452: return result;
! 453: #else
! 454: fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
! 455: abort ();
! 456: #endif
1.1 maekawa 457: }
458:
459:
1.1.1.2 ! ohara 460: /* If two times can be observed less than half a clock tick apart, then
! 461: assume "get" is microsecond accurate.
! 462:
! 463: Two times only 1 microsecond apart are not believed, since some kernels
! 464: take it upon themselves to ensure gettimeofday doesn't return the same
! 465: value twice, for the benefit of applications using it for a timestamp.
! 466: This is obviously very stupid given the speed of CPUs these days.
! 467:
! 468: Making "reps" calls to noop_1() is designed to waste some CPU, with a
! 469: view to getting measurements 2 microseconds (or more) apart. "reps" is
! 470: increased progressively until such a period is seen.
! 471:
! 472: The outer loop "attempts" are just to allow for any random nonsense or
! 473: system load upsetting the measurements (ie. making two successive calls
! 474: to "get" come out as a longer interval than normal).
! 475:
! 476: Bugs:
! 477:
! 478: The assumption that any interval less than a half tick implies
! 479: microsecond resolution is obviously fairly rash, the true resolution
! 480: could be anything between a microsecond and that half tick. Perhaps
! 481: something special would have to be done on a system where this is the
! 482: case, since there's no obvious reliable way to detect it
! 483: automatically. */
! 484:
! 485: #define MICROSECONDS_P(name, type, get, sec, usec) \
! 486: { \
! 487: static int result = -1; \
! 488: type st, et; \
! 489: long dt, half_tick; \
! 490: unsigned attempt, reps, i, j; \
! 491: \
! 492: if (result != -1) \
! 493: return result; \
! 494: \
! 495: result = 0; \
! 496: half_tick = (1000000L / clk_tck ()) / 2; \
! 497: \
! 498: for (attempt = 0; attempt < 5; attempt++) \
! 499: { \
! 500: reps = 0; \
! 501: for (;;) \
! 502: { \
! 503: get (st); \
! 504: for (i = 0; i < reps; i++) \
! 505: for (j = 0; j < 100; j++) \
! 506: noop_1 (CNST_LIMB(0)); \
! 507: get (et); \
! 508: \
! 509: dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \
! 510: \
! 511: if (speed_option_verbose >= 2) \
! 512: printf ("%s attempt=%u, reps=%u, dt=%ld\n", \
! 513: name, attempt, reps, dt); \
! 514: \
! 515: if (dt >= 2) \
! 516: break; \
! 517: \
! 518: reps = (reps == 0 ? 1 : 2*reps); \
! 519: if (reps == 0) \
! 520: break; /* uint overflow, not normal */ \
! 521: } \
! 522: \
! 523: if (dt < half_tick) \
! 524: { \
! 525: result = 1; \
! 526: break; \
! 527: } \
! 528: } \
! 529: \
! 530: if (speed_option_verbose) \
! 531: { \
! 532: if (result) \
! 533: printf ("%s is microsecond accurate\n", name); \
! 534: else \
! 535: printf ("%s is only %s clock tick accurate\n", \
! 536: name, unittime_string (1.0/clk_tck())); \
! 537: } \
! 538: return result; \
! 539: }
! 540:
1.1 maekawa 541:
542: int
1.1.1.2 ! ohara 543: gettimeofday_microseconds_p (void)
1.1 maekawa 544: {
1.1.1.2 ! ohara 545: #define call_gettimeofday(t) gettimeofday (&(t), NULL)
! 546: #define timeval_tv_sec(t) ((t).tv_sec)
! 547: #define timeval_tv_usec(t) ((t).tv_usec)
! 548: MICROSECONDS_P ("gettimeofday", struct_timeval,
! 549: call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
1.1 maekawa 550: }
551:
1.1.1.2 ! ohara 552: int
! 553: getrusage_microseconds_p (void)
! 554: {
! 555: #define call_getrusage(t) getrusage (0, &(t))
! 556: #define rusage_tv_sec(t) ((t).ru_utime.tv_sec)
! 557: #define rusage_tv_usec(t) ((t).ru_utime.tv_usec)
! 558: MICROSECONDS_P ("getrusage", struct_rusage,
! 559: call_getrusage, rusage_tv_sec, rusage_tv_usec);
! 560: }
! 561:
! 562:
! 563: /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
! 564: of glibc (some time post 2.2).
1.1 maekawa 565:
1.1.1.2 ! ohara 566: CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
! 567: defined, but returning -1 for an error). */
! 568:
! 569: #ifdef CLOCK_PROCESS_CPUTIME_ID
! 570: # define CGT_ID CLOCK_PROCESS_CPUTIME_ID
! 571: #else
! 572: # ifdef CLOCK_VIRTUAL
! 573: # define CGT_ID CLOCK_VIRTUAL
! 574: # endif
! 575: #endif
! 576: #ifdef CGT_ID
! 577: # define HAVE_CGT_ID 1
! 578: #else
! 579: # define HAVE_CGT_ID 0
! 580: # define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1)
! 581: #endif
1.1 maekawa 582:
583: int
1.1.1.2 ! ohara 584: cgt_works_p (void)
1.1 maekawa 585: {
1.1.1.2 ! ohara 586: static int result = -1;
! 587: struct_timespec unit;
1.1 maekawa 588:
1.1.1.2 ! ohara 589: if (! have_cgt)
! 590: return 0;
! 591:
! 592: if (! HAVE_CGT_ID)
1.1 maekawa 593: {
1.1.1.2 ! ohara 594: if (speed_option_verbose)
! 595: printf ("clock_gettime don't know what ID to use\n");
! 596: result = 0;
! 597: return result;
! 598: }
1.1 maekawa 599:
1.1.1.2 ! ohara 600: if (result != -1)
! 601: return result;
1.1 maekawa 602:
1.1.1.2 ! ohara 603: /* trial run to see if it works */
! 604: if (clock_gettime (CGT_ID, &unit) != 0)
! 605: {
! 606: if (speed_option_verbose)
! 607: printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
! 608: result = 0;
! 609: return result;
! 610: }
! 611:
! 612: /* get the resolution */
! 613: if (clock_getres (CGT_ID, &unit) != 0)
! 614: {
! 615: if (speed_option_verbose)
! 616: printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
! 617: result = 0;
! 618: return result;
1.1 maekawa 619: }
620:
1.1.1.2 ! ohara 621: cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
! 622: printf ("clock_gettime is %s accurate\n",
! 623: unittime_string (cgt_unittime));
! 624: result = 1;
! 625: return result;
1.1 maekawa 626: }
627:
628:
1.1.1.2 ! ohara 629: volatile unsigned *sgi_addr;
1.1 maekawa 630:
1.1.1.2 ! ohara 631: int
! 632: sgi_works_p (void)
! 633: {
! 634: #if HAVE_SYSSGI && HAVE_MMAP
! 635: static int result = -1;
1.1 maekawa 636:
1.1.1.2 ! ohara 637: size_t pagesize, offset;
! 638: __psunsigned_t phys, physpage;
! 639: void *virtpage;
! 640: unsigned period_picoseconds;
! 641: int size, fd;
1.1 maekawa 642:
1.1.1.2 ! ohara 643: if (result != -1)
! 644: return result;
1.1 maekawa 645:
1.1.1.2 ! ohara 646: phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
! 647: if (phys == (__psunsigned_t) -1)
! 648: {
! 649: /* ENODEV is the error when a counter is not available */
! 650: if (speed_option_verbose)
! 651: printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
! 652: result = 0;
! 653: return result;
! 654: }
! 655: sgi_unittime = period_picoseconds * 1e-12;
1.1 maekawa 656:
1.1.1.2 ! ohara 657: /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
! 658: Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
! 659: obvious way to identify that without SGI_CYCLECNTR_SIZE. */
! 660: #ifdef SGI_CYCLECNTR_SIZE
! 661: size = syssgi (SGI_CYCLECNTR_SIZE);
! 662: if (size == -1)
! 663: {
! 664: if (speed_option_verbose)
! 665: {
! 666: printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
! 667: printf (" will assume size==4\n");
! 668: }
! 669: size = 32;
! 670: }
! 671: #else
! 672: size = 32;
1.1 maekawa 673: #endif
674:
1.1.1.2 ! ohara 675: if (size < 32)
! 676: {
! 677: printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
! 678: result = 0;
! 679: return result;
! 680: }
1.1 maekawa 681:
1.1.1.2 ! ohara 682: pagesize = getpagesize();
! 683: offset = (size_t) phys & (pagesize-1);
! 684: physpage = phys - offset;
1.1 maekawa 685:
1.1.1.2 ! ohara 686: /* shouldn't cross over a page boundary */
! 687: ASSERT_ALWAYS (offset + size/8 <= pagesize);
1.1 maekawa 688:
1.1.1.2 ! ohara 689: fd = open("/dev/mmem", O_RDONLY);
! 690: if (fd == -1)
! 691: {
! 692: if (speed_option_verbose)
! 693: printf ("open /dev/mmem: %s\n", strerror (errno));
! 694: result = 0;
! 695: return result;
! 696: }
1.1 maekawa 697:
1.1.1.2 ! ohara 698: virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
! 699: if (virtpage == (void *) -1)
! 700: {
! 701: if (speed_option_verbose)
! 702: printf ("mmap /dev/mmem: %s\n", strerror (errno));
! 703: result = 0;
! 704: return result;
! 705: }
1.1 maekawa 706:
1.1.1.2 ! ohara 707: /* address of least significant 4 bytes, knowing mips is big endian */
! 708: sgi_addr = (unsigned *) ((char *) virtpage + offset
! 709: + size/8 - sizeof(unsigned));
! 710: result = 1;
! 711: return result;
1.1 maekawa 712:
1.1.1.2 ! ohara 713: #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
1.1 maekawa 714: return 0;
1.1.1.2 ! ohara 715: #endif
1.1 maekawa 716: }
717:
718:
1.1.1.2 ! ohara 719: #define DEFAULT(var,n) \
! 720: do { \
! 721: if (! (var)) \
! 722: (var) = (n); \
! 723: } while (0)
1.1 maekawa 724:
725: void
726: speed_time_init (void)
727: {
1.1.1.2 ! ohara 728: double supplement_unittime = 0.0;
! 729:
! 730: static int speed_time_initialized = 0;
1.1 maekawa 731: if (speed_time_initialized)
732: return;
733: speed_time_initialized = 1;
734:
1.1.1.2 ! ohara 735: speed_cycletime_init ();
1.1 maekawa 736:
1.1.1.2 ! ohara 737: if (have_cycles && cycles_works_p ())
! 738: {
! 739: use_cycles = 1;
! 740: DEFAULT (speed_cycletime, 1.0);
! 741: speed_unittime = speed_cycletime;
! 742: DEFAULT (speed_precision, 10000);
! 743: strcpy (speed_time_string, "CPU cycle counter");
! 744:
! 745: /* only used if a supplementary method is chosen below */
! 746: cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
! 747: * speed_cycletime;
1.1 maekawa 748:
1.1.1.2 ! ohara 749: if (have_grus && getrusage_microseconds_p())
! 750: {
! 751: /* this is a good combination */
! 752: use_grus = 1;
! 753: supplement_unittime = grus_unittime = 1.0e-6;
! 754: strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
! 755: }
! 756: else if (have_cycles == 1)
! 757: {
! 758: /* When speed_cyclecounter has a limited range, look for something
! 759: to supplement it. */
! 760: if (have_gtod && gettimeofday_microseconds_p())
! 761: {
! 762: use_gtod = 1;
! 763: supplement_unittime = gtod_unittime = 1.0e-6;
! 764: strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
! 765: }
! 766: else if (have_grus)
! 767: {
! 768: use_grus = 1;
! 769: supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
! 770: sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
! 771: }
! 772: else if (have_times)
! 773: {
! 774: use_times = 1;
! 775: supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
! 776: sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
! 777: }
! 778: else if (have_gtod)
! 779: {
! 780: use_gtod = 1;
! 781: supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
! 782: sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
! 783: }
! 784: else
! 785: {
! 786: fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
! 787: fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n");
! 788: }
! 789: }
1.1 maekawa 790:
1.1.1.2 ! ohara 791: if (use_grus || use_times || use_gtod)
! 792: {
! 793: /* must know cycle period to compare cycles to other measuring
! 794: (via cycles_limit) */
! 795: speed_cycletime_need_seconds ();
1.1 maekawa 796:
1.1.1.2 ! ohara 797: if (speed_precision * supplement_unittime > cycles_limit)
! 798: {
! 799: fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
! 800: fprintf (stderr, " cycle counter and limited precision supplemental method\n");
! 801: fprintf (stderr, " (%s)\n", speed_time_string);
! 802: }
! 803: }
! 804: }
! 805: else if (have_stck)
! 806: {
! 807: strcpy (speed_time_string, "STCK timestamp");
! 808: /* stck is in units of 2^-12 microseconds, which is very likely higher
! 809: resolution than a cpu cycle */
! 810: if (speed_cycletime == 0.0)
! 811: speed_cycletime_fail
! 812: ("Need to know CPU frequency for effective stck unit");
! 813: speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
! 814: DEFAULT (speed_precision, 10000);
! 815: }
! 816: else if (have_sgi && sgi_works_p ())
! 817: {
! 818: use_sgi = 1;
! 819: DEFAULT (speed_precision, 10000);
! 820: speed_unittime = sgi_unittime;
! 821: sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
! 822: unittime_string (speed_unittime));
! 823: /* supplemented with getrusage, which we assume to have 1ms resolution */
! 824: use_grus = 1;
! 825: supplement_unittime = 1e-3;
! 826: }
! 827: else if (have_rrt)
! 828: {
! 829: timebasestruct_t t;
! 830: use_rrt = 1;
! 831: DEFAULT (speed_precision, 10000);
! 832: read_real_time (&t, sizeof(t));
! 833: switch (t.flag) {
! 834: case RTC_POWER:
! 835: /* FIXME: What's the actual RTC resolution? */
! 836: speed_unittime = 1e-7;
! 837: strcpy (speed_time_string, "read_real_time() power nanoseconds");
! 838: break;
! 839: case RTC_POWER_PC:
! 840: t.tb_high = 1;
! 841: t.tb_low = 0;
! 842: time_base_to_time (&t, sizeof(t));
! 843: speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
! 844: sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
! 845: unittime_string (speed_unittime));
! 846: break;
! 847: default:
! 848: fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
! 849: t.flag);
! 850: abort ();
! 851: }
! 852: }
! 853: else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
! 854: {
! 855: /* use clock_gettime if microsecond or better resolution */
! 856: choose_cgt:
! 857: use_cgt = 1;
! 858: speed_unittime = cgt_unittime;
! 859: DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
! 860: strcpy (speed_time_string, "microsecond accurate getrusage()");
! 861: }
! 862: else if (have_grus && getrusage_microseconds_p())
! 863: {
! 864: use_grus = 1;
! 865: speed_unittime = grus_unittime = 1.0e-6;
! 866: DEFAULT (speed_precision, 1000);
! 867: strcpy (speed_time_string, "microsecond accurate getrusage()");
! 868: }
! 869: else if (have_gtod && gettimeofday_microseconds_p())
! 870: {
! 871: use_gtod = 1;
! 872: speed_unittime = gtod_unittime = 1.0e-6;
! 873: DEFAULT (speed_precision, 1000);
! 874: strcpy (speed_time_string, "microsecond accurate gettimeofday()");
! 875: }
! 876: else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
! 877: {
! 878: /* use clock_gettime if 1 tick or better resolution */
! 879: goto choose_cgt;
! 880: }
! 881: else if (have_times)
! 882: {
! 883: use_times = 1;
! 884: use_tick_boundary = 1;
! 885: speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
! 886: DEFAULT (speed_precision, 200);
! 887: sprintf (speed_time_string, "%s clock tick times()",
! 888: unittime_string (speed_unittime));
! 889: }
! 890: else if (have_grus)
! 891: {
! 892: use_grus = 1;
! 893: use_tick_boundary = 1;
! 894: speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
! 895: DEFAULT (speed_precision, 200);
! 896: sprintf (speed_time_string, "%s clock tick getrusage()\n",
! 897: unittime_string (speed_unittime));
! 898: }
! 899: else if (have_gtod)
! 900: {
! 901: use_gtod = 1;
! 902: use_tick_boundary = 1;
! 903: speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
! 904: DEFAULT (speed_precision, 200);
! 905: sprintf (speed_time_string, "%s clock tick gettimeofday()",
! 906: unittime_string (speed_unittime));
! 907: }
! 908: else
! 909: {
! 910: fprintf (stderr, "No time measuring method available\n");
! 911: fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
! 912: abort ();
! 913: }
1.1 maekawa 914:
1.1.1.2 ! ohara 915: if (speed_option_verbose)
! 916: {
! 917: printf ("speed_time_init: %s\n", speed_time_string);
! 918: printf (" speed_precision %d\n", speed_precision);
! 919: printf (" speed_unittime %.2g\n", speed_unittime);
! 920: if (supplement_unittime)
! 921: printf (" supplement_unittime %.2g\n", supplement_unittime);
! 922: printf (" use_tick_boundary %d\n", use_tick_boundary);
! 923: if (have_cycles)
! 924: printf (" cycles_limit %.2g seconds\n", cycles_limit);
! 925: }
1.1 maekawa 926: }
927:
928:
929:
1.1.1.2 ! ohara 930: /* Burn up CPU until a clock tick boundary, for greater accuracy. Set the
! 931: corresponding "start_foo" appropriately too. */
1.1 maekawa 932:
1.1.1.2 ! ohara 933: void
! 934: grus_tick_boundary (void)
! 935: {
! 936: struct_rusage prev;
! 937: getrusage (0, &prev);
! 938: do {
! 939: getrusage (0, &start_grus);
! 940: } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
! 941: }
1.1 maekawa 942:
943: void
1.1.1.2 ! ohara 944: gtod_tick_boundary (void)
1.1 maekawa 945: {
1.1.1.2 ! ohara 946: struct_timeval prev;
! 947: gettimeofday (&prev, NULL);
! 948: do {
! 949: gettimeofday (&start_gtod, NULL);
! 950: } while (start_gtod.tv_usec == prev.tv_usec);
! 951: }
1.1 maekawa 952:
1.1.1.2 ! ohara 953: void
! 954: times_tick_boundary (void)
! 955: {
! 956: struct_tms prev;
! 957: times (&prev);
! 958: do
! 959: times (&start_times);
! 960: while (start_times.tms_utime == prev.tms_utime);
1.1 maekawa 961: }
962:
1.1.1.2 ! ohara 963:
! 964: /* "have_" values are tested to let unused code go dead. */
! 965:
1.1 maekawa 966: void
967: speed_starttime (void)
968: {
1.1.1.2 ! ohara 969: speed_time_init ();
1.1 maekawa 970:
1.1.1.2 ! ohara 971: if (have_grus && use_grus)
! 972: {
! 973: if (use_tick_boundary)
! 974: grus_tick_boundary ();
! 975: else
! 976: getrusage (0, &start_grus);
! 977: }
1.1 maekawa 978:
1.1.1.2 ! ohara 979: if (have_gtod && use_gtod)
! 980: {
! 981: if (use_tick_boundary)
! 982: gtod_tick_boundary ();
! 983: else
! 984: gettimeofday (&start_gtod, NULL);
! 985: }
1.1 maekawa 986:
1.1.1.2 ! ohara 987: if (have_times && use_times)
! 988: {
! 989: if (use_tick_boundary)
! 990: times_tick_boundary ();
! 991: else
! 992: times (&start_times);
! 993: }
1.1 maekawa 994:
1.1.1.2 ! ohara 995: if (have_cgt && use_cgt)
! 996: clock_gettime (CGT_ID, &start_cgt);
1.1 maekawa 997:
1.1.1.2 ! ohara 998: if (have_rrt && use_rrt)
! 999: read_real_time (&start_rrt, sizeof(start_rrt));
1.1 maekawa 1000:
1.1.1.2 ! ohara 1001: if (have_sgi && use_sgi)
! 1002: start_sgi = *sgi_addr;
1.1 maekawa 1003:
1.1.1.2 ! ohara 1004: if (have_stck && use_stck)
! 1005: STCK (start_stck);
1.1 maekawa 1006:
1.1.1.2 ! ohara 1007: /* Cycles sampled last for maximum accuracy. */
! 1008: if (have_cycles && use_cycles)
! 1009: speed_cyclecounter (start_cycles);
! 1010: }
1.1 maekawa 1011:
1012:
1.1.1.2 ! ohara 1013: /* Calculate the difference between two cycle counter samples, as a "double"
! 1014: counter of cycles.
1.1 maekawa 1015:
1.1.1.2 ! ohara 1016: The start and end values are allowed to cancel in integers in case the
! 1017: counter values are bigger than the 53 bits that normally fit in a double.
1.1 maekawa 1018:
1.1.1.2 ! ohara 1019: This works even if speed_cyclecounter() puts a value bigger than 32-bits
! 1020: in the low word (the high word always gets a 2**32 multiplier though). */
! 1021:
! 1022: double
! 1023: speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1.1 maekawa 1024: {
1.1.1.2 ! ohara 1025: unsigned d;
! 1026: double t;
1.1 maekawa 1027:
1.1.1.2 ! ohara 1028: if (have_cycles == 1)
! 1029: {
! 1030: t = (end[0] - start[0]);
! 1031: }
! 1032: else
! 1033: {
! 1034: d = end[0] - start[0];
! 1035: t = d - (d > end[0] ? M_2POWU : 0.0);
! 1036: t += (end[1] - start[1]) * M_2POW32;
! 1037: }
! 1038: return t;
1.1 maekawa 1039: }
1040:
1.1.1.2 ! ohara 1041:
! 1042: /* Calculate the difference between "start" and "end" using fields "sec" and
! 1043: "psec", where each "psec" is a "punit" of a second.
! 1044:
! 1045: The seconds parts are allowed to cancel before being combined with the
! 1046: psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
! 1047: double.
! 1048:
! 1049: Total time is only calculated in a "double" since an integer count of
! 1050: psecs might overflow. 2^32 microseconds is only a bit over an hour, or
! 1051: 2^32 nanoseconds only about 4 seconds.
! 1052:
! 1053: The casts to "long" are for the beneifit of timebasestruct_t, where the
! 1054: fields are only "unsigned int", but we want a signed difference. */
! 1055:
! 1056: #define DIFF_SECS_ROUTINE(sec, psec, punit) \
! 1057: { \
! 1058: long sec_diff, psec_diff; \
! 1059: sec_diff = (long) end->sec - (long) start->sec; \
! 1060: psec_diff = (long) end->psec - (long) start->psec; \
! 1061: return (double) sec_diff + punit * (double) psec_diff; \
! 1062: }
! 1063:
! 1064: double
! 1065: timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1.1 maekawa 1066: {
1.1.1.2 ! ohara 1067: DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
! 1068: }
1.1 maekawa 1069:
1.1.1.2 ! ohara 1070: double
! 1071: rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
! 1072: {
! 1073: DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1.1 maekawa 1074: }
1075:
1076: double
1.1.1.2 ! ohara 1077: timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1.1 maekawa 1078: {
1.1.1.2 ! ohara 1079: DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
! 1080: }
1.1 maekawa 1081:
1.1.1.2 ! ohara 1082: /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
! 1083: double
! 1084: timebasestruct_diff_secs (const timebasestruct_t *end,
! 1085: const timebasestruct_t *start)
! 1086: {
! 1087: DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1.1 maekawa 1088: }
1089:
1090:
1.1.1.2 ! ohara 1091: double
! 1092: speed_endtime (void)
! 1093: {
! 1094: #define END_USE(name,value) \
! 1095: do { \
! 1096: if (speed_option_verbose >= 3) \
! 1097: printf ("speed_endtime(): used %s\n", name); \
! 1098: result = value; \
! 1099: goto done; \
! 1100: } while (0)
! 1101:
! 1102: #define END_ENOUGH(name,value) \
! 1103: do { \
! 1104: if (speed_option_verbose >= 3) \
! 1105: printf ("speed_endtime(): %s gives enough precision\n", name); \
! 1106: result = value; \
! 1107: goto done; \
! 1108: } while (0)
! 1109:
! 1110: #define END_EXCEED(name,value) \
! 1111: do { \
! 1112: if (speed_option_verbose >= 3) \
! 1113: printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
! 1114: name); \
! 1115: result = value; \
! 1116: goto done; \
! 1117: } while (0)
! 1118:
! 1119: unsigned end_cycles[2];
! 1120: stck_t end_stck;
! 1121: unsigned end_sgi;
! 1122: timebasestruct_t end_rrt;
! 1123: struct_timespec end_cgt;
! 1124: struct_timeval end_gtod;
! 1125: struct_rusage end_grus;
! 1126: struct_tms end_times;
! 1127: double t_gtod, t_grus, t_times, t_cgt;
! 1128: double t_rrt, t_sgi, t_stck, t_cycles;
! 1129: double result;
! 1130:
! 1131: /* Cycles sampled first for maximum accuracy.
! 1132: "have_" values tested to let unused code go dead. */
! 1133:
! 1134: if (have_cycles && use_cycles) speed_cyclecounter (end_cycles);
! 1135: if (have_stck && use_stck) STCK (end_stck);
! 1136: if (have_sgi && use_sgi) end_sgi = *sgi_addr;
! 1137: if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt));
! 1138: if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt);
! 1139: if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL);
! 1140: if (have_grus && use_grus) getrusage (0, &end_grus);
! 1141: if (have_times && use_times) times (&end_times);
1.1 maekawa 1142:
1.1.1.2 ! ohara 1143: result = -1.0;
1.1 maekawa 1144:
1.1.1.2 ! ohara 1145: if (speed_option_verbose >= 4)
! 1146: {
! 1147: printf ("speed_endtime():\n");
! 1148: if (use_cycles)
! 1149: printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n",
! 1150: start_cycles[1], start_cycles[0],
! 1151: end_cycles[1], end_cycles[0]);
! 1152:
! 1153: if (use_stck)
! 1154: printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck);
! 1155:
! 1156: if (use_sgi)
! 1157: printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi);
! 1158:
! 1159: if (use_rrt)
! 1160: printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n",
! 1161: start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
! 1162: end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
! 1163:
! 1164: if (use_cgt)
! 1165: printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n",
! 1166: start_cgt.tv_sec, start_cgt.tv_nsec,
! 1167: end_cgt.tv_sec, end_cgt.tv_nsec);
! 1168:
! 1169: if (use_gtod)
! 1170: printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n",
! 1171: start_gtod.tv_sec, start_gtod.tv_usec,
! 1172: end_gtod.tv_sec, end_gtod.tv_usec);
! 1173:
! 1174: if (use_grus)
! 1175: printf (" getrusage %ld.%06ld -> %ld.%06ld\n",
! 1176: start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,
! 1177: end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);
! 1178:
! 1179: if (use_times)
! 1180: printf (" times %ld -> %ld\n",
! 1181: start_times.tms_utime, end_times.tms_utime);
! 1182: }
1.1 maekawa 1183:
1.1.1.2 ! ohara 1184: if (use_rrt)
! 1185: {
! 1186: time_base_to_time (&start_rrt, sizeof(start_rrt));
! 1187: time_base_to_time (&end_rrt, sizeof(end_rrt));
! 1188: t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
! 1189: END_USE ("read_real_time()", t_rrt);
! 1190: }
1.1 maekawa 1191:
1.1.1.2 ! ohara 1192: if (use_cgt)
! 1193: {
! 1194: t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
! 1195: END_USE ("clock_gettime()", t_cgt);
! 1196: }
1.1 maekawa 1197:
1.1.1.2 ! ohara 1198: if (use_grus)
! 1199: {
! 1200: t_grus = rusage_diff_secs (&end_grus, &start_grus);
1.1 maekawa 1201:
1.1.1.2 ! ohara 1202: /* Use getrusage() if the cycle counter limit would be exceeded, or if
! 1203: it provides enough accuracy already. */
! 1204: if (use_cycles)
! 1205: {
! 1206: if (t_grus >= speed_precision*grus_unittime)
! 1207: END_ENOUGH ("getrusage()", t_grus);
! 1208: if (t_grus >= cycles_limit)
! 1209: END_EXCEED ("getrusage()", t_grus);
! 1210: }
! 1211: }
1.1 maekawa 1212:
1.1.1.2 ! ohara 1213: if (use_times)
! 1214: {
! 1215: t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1.1 maekawa 1216:
1.1.1.2 ! ohara 1217: /* Use times() if the cycle counter limit would be exceeded, or if
! 1218: it provides enough accuracy already. */
! 1219: if (use_cycles)
! 1220: {
! 1221: if (t_times >= speed_precision*times_unittime)
! 1222: END_ENOUGH ("times()", t_times);
! 1223: if (t_times >= cycles_limit)
! 1224: END_EXCEED ("times()", t_times);
! 1225: }
! 1226: }
1.1 maekawa 1227:
1.1.1.2 ! ohara 1228: if (use_gtod)
! 1229: {
! 1230: t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1.1 maekawa 1231:
1.1.1.2 ! ohara 1232: /* Use gettimeofday() if it measured a value bigger than the cycle
! 1233: counter can handle. */
! 1234: if (use_cycles)
! 1235: {
! 1236: if (t_gtod >= cycles_limit)
! 1237: END_EXCEED ("gettimeofday()", t_gtod);
! 1238: }
! 1239: }
! 1240:
! 1241: if (use_stck)
! 1242: {
! 1243: t_stck = (end_stck - start_stck) * STCK_PERIOD;
! 1244: END_USE ("stck", t_stck);
! 1245: }
1.1 maekawa 1246:
1.1.1.2 ! ohara 1247: if (use_sgi)
1.1 maekawa 1248: {
1.1.1.2 ! ohara 1249: t_sgi = (end_sgi - start_sgi) * sgi_unittime;
! 1250: END_USE ("SGI hardware counter", t_sgi);
1.1 maekawa 1251: }
1252:
1.1.1.2 ! ohara 1253: if (use_cycles)
! 1254: {
! 1255: t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
! 1256: * speed_cycletime;
! 1257: END_USE ("cycle counter", t_cycles);
! 1258: }
1.1 maekawa 1259:
1.1.1.2 ! ohara 1260: if (use_grus && getrusage_microseconds_p())
! 1261: END_USE ("getrusage()", t_grus);
1.1 maekawa 1262:
1.1.1.2 ! ohara 1263: if (use_gtod && gettimeofday_microseconds_p())
! 1264: END_USE ("gettimeofday()", t_gtod);
1.1 maekawa 1265:
1.1.1.2 ! ohara 1266: if (use_times) END_USE ("times()", t_times);
! 1267: if (use_grus) END_USE ("getrusage()", t_grus);
! 1268: if (use_gtod) END_USE ("gettimeofday()", t_gtod);
1.1 maekawa 1269:
1.1.1.2 ! ohara 1270: fprintf (stderr, "speed_endtime(): oops, no time method available\n");
! 1271: abort ();
! 1272:
! 1273: done:
! 1274: if (result < 0.0)
! 1275: {
! 1276: fprintf (stderr,
! 1277: "speed_endtime(): fatal error: negative time measured: %.9f\n",
! 1278: result);
! 1279: abort ();
! 1280: }
! 1281: return result;
! 1282: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>