[BACK]Return to time.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tune

Annotation of OpenXM_contrib/gmp/tune/time.c, Revision 1.1.1.2

1.1.1.2 ! ohara       1: /* Time routines for speed measurments.
1.1       maekawa     2:
1.1.1.2 ! ohara       3: Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1       maekawa     4:
                      5: This file is part of the GNU MP Library.
                      6:
                      7: The GNU MP Library is free software; you can redistribute it and/or modify
                      8: it under the terms of the GNU Lesser General Public License as published by
                      9: the Free Software Foundation; either version 2.1 of the License, or (at your
                     10: option) any later version.
                     11:
                     12: The GNU MP Library is distributed in the hope that it will be useful, but
                     13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     15: License for more details.
                     16:
                     17: You should have received a copy of the GNU Lesser General Public License
                     18: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
1.1.1.2 ! ohara      20: MA 02111-1307, USA. */
        !            21:
1.1       maekawa    22:
                     23: /* speed_time_init() - initialize timing things.  speed_starttime() calls
                     24:    this if it hasn't been done yet, so you only need to call this explicitly
                     25:    if you want to use the global variables before the first measurement.
                     26:
                     27:    speed_starttime() - start a time measurment.
                     28:
1.1.1.2 ! ohara      29:    speed_endtime() - end a time measurment, return time taken (seconds or
        !            30:    cycles).
        !            31:
        !            32:    speed_time_string - a string describing the time method in use.
1.1       maekawa    33:
                     34:    speed_unittime - global variable with the unit of time measurement
1.1.1.2 ! ohara      35:    accuracy (seconds or cycles).
1.1       maekawa    36:
                     37:    speed_precision - global variable which is the intended accuracy of time
                     38:    measurements.  speed_measure() for instance runs target routines with
1.1.1.2 ! ohara      39:    enough repetitions so it takes at least speed_unittime*speed_precision.
        !            40:    A program can provide an option so the user can set this, otherwise it
        !            41:    gets a default based on the measuring method chosen.
1.1       maekawa    42:
                     43:    speed_cycletime - the time in seconds for each CPU cycle, for example on
1.1.1.2 ! ohara      44:    a 100 MHz CPU this would be 1.0e-8.  If the CPU frequency is unknown this
        !            45:    is 0.0 if the time base is in seconds, or 1.0 if it's in cycles.
1.1       maekawa    46:
                     47:
1.1.1.2 ! ohara      48:    speed_endtime() and speed_unittime are normally in seconds, but if a
        !            49:    cycle counter is being used to measure and the CPU frequency is unknown,
        !            50:    then speed_endtime() returns cycles and speed_cycletime and
        !            51:    speed_unittime are 1.0.
        !            52:
        !            53:    Notice that speed_unittime*speed_precision is the target duration for
        !            54:    speed_endtime(), irrespective of whether that's in seconds or cycles.
        !            55:
        !            56:    Call speed_cycletime_need_seconds() to demand that speed_endtime() is in
        !            57:    seconds and not perhaps in cycles.
        !            58:
        !            59:    Call speed_cycletime_need_cycles() to demand that speed_cycletime is
        !            60:    non-zero, so that speed_endtime()/speed_cycletime will work to give times
        !            61:    in cycles.
        !            62:
        !            63:
        !            64:    Notes:
        !            65:
        !            66:    Various combinations of cycle counter, read_real_time(), getrusage(),
        !            67:    gettimeofday() and times() can arise, according to which are available
        !            68:    and their precision.
1.1       maekawa    69:
                     70:
1.1.1.2 ! ohara      71:    Allowing speed_endtime() to return either seconds or cycles is only a
        !            72:    slight complication and makes it possible for the speed program to do
        !            73:    some sensible things without demanding the CPU frequency.  If seconds are
        !            74:    being measured then it can always print seconds, and if cycles are being
        !            75:    measured then it can always print them without needing to know how long
        !            76:    they are.  Also the tune program doesn't care at all what the units are.
1.1       maekawa    77:
1.1.1.2 ! ohara      78:    GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
        !            79:    fail.  This will be needed if times in seconds are wanted but a cycle
        !            80:    counter is being used, or if times in cycles are wanted but getrusage or
        !            81:    another seconds based timer is in use.
1.1       maekawa    82:
1.1.1.2 ! ohara      83:    If the measuring method uses a cycle counter but supplements it with
        !            84:    getrusage or the like, then knowing the CPU frequency is mandatory since
        !            85:    the code compares values from the two.
1.1       maekawa    86:
                     87:
1.1.1.2 ! ohara      88:    Solaris gethrtime() seems no more than a slow way to access the Sparc V9
        !            89:    cycle counter.  gethrvtime() seems to be relevant only to LWP, it doesn't
        !            90:    for instance give nanosecond virtual time.  So neither of these are used.
        !            91:
        !            92:
        !            93:    Bugs:
        !            94:
        !            95:    getrusage_microseconds_p is fundamentally flawed, getrusage and
        !            96:    gettimeofday can have resolutions other than clock ticks or microseconds,
        !            97:    for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
        !            98:
        !            99:    Enhancements:
        !           100:
        !           101:    The SGI hardware counter has 64 bits on some machines, which could be
        !           102:    used when available.  But perhaps 32 bits is enough range, and then rely
        !           103:    on the getrusage supplement.
        !           104:
        !           105:    Maybe getrusage (or times) should be used as a supplement for any
        !           106:    wall-clock measuring method.  Currently a wall clock with a good range
        !           107:    (eg. a 64-bit cycle counter) is used without a supplement.
        !           108:
        !           109:    On PowerPC the timebase registers could be used, but would have to do
        !           110:    something to find out the speed.  On 6xx chips it's normally 1/4 bus
        !           111:    speed, on 4xx chips it's wither that or an external clock.  Measuring
        !           112:    against gettimeofday might be ok.  */
        !           113:
        !           114:
        !           115: #include "config.h"
        !           116:
        !           117: #include <errno.h>
        !           118: #include <setjmp.h>
        !           119: #include <signal.h>
        !           120: #include <stddef.h>
1.1       maekawa   121: #include <stdio.h>
1.1.1.2 ! ohara     122: #include <string.h>
        !           123: #include <stdlib.h> /* for getenv() */
        !           124:
        !           125: #if HAVE_FCNTL_H
        !           126: #include <fcntl.h>  /* for open() */
        !           127: #endif
        !           128:
        !           129: #if HAVE_STDINT_H
        !           130: #include <stdint.h> /* for uint64_t */
        !           131: #endif
        !           132:
1.1       maekawa   133: #if HAVE_UNISTD_H
1.1.1.2 ! ohara     134: #include <unistd.h> /* for sysconf() */
1.1       maekawa   135: #endif
                    136:
                    137: #include <sys/types.h>
1.1.1.2 ! ohara     138:
        !           139: #if TIME_WITH_SYS_TIME
        !           140: # include <sys/time.h>  /* for struct timeval */
        !           141: # include <time.h>
        !           142: #else
        !           143: # if HAVE_SYS_TIME_H
        !           144: #  include <sys/time.h>
        !           145: # else
        !           146: #  include <time.h>
        !           147: # endif
        !           148: #endif
        !           149:
        !           150: #if HAVE_SYS_MMAN_H
        !           151: #include <sys/mman.h>      /* for mmap() */
        !           152: #endif
        !           153:
        !           154: #if HAVE_SYS_RESOURCE_H
        !           155: #include <sys/resource.h>  /* for struct rusage */
        !           156: #endif
        !           157:
        !           158: #if HAVE_SYS_SYSSGI_H
        !           159: #include <sys/syssgi.h>    /* for syssgi() */
        !           160: #endif
        !           161:
        !           162: #if HAVE_SYS_SYSTEMCFG_H
        !           163: #include <sys/systemcfg.h> /* for RTC_POWER on AIX */
        !           164: #endif
        !           165:
        !           166: #if HAVE_SYS_TIMES_H
        !           167: #include <sys/times.h>  /* for times() and struct tms */
1.1       maekawa   168: #endif
                    169:
                    170: #include "gmp.h"
                    171: #include "gmp-impl.h"
                    172:
                    173: #include "speed.h"
                    174:
                    175:
1.1.1.2 ! ohara     176:
        !           177: char    speed_time_string[256];
        !           178: int     speed_precision = 0;
        !           179: double  speed_unittime;
        !           180: double  speed_cycletime = 0.0;
        !           181:
        !           182:
        !           183: /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
        !           184:    native cc */
        !           185: #define M_2POWU   (((double) INT_MAX + 1.0) * 2.0)
        !           186:
        !           187: #define M_2POW32  4294967296.0
        !           188: #define M_2POW64  (M_2POW32 * M_2POW32)
        !           189:
        !           190:
        !           191: /* Conditionals for the time functions available are done with normal C
        !           192:    code, which is a lot easier than wildly nested preprocessor directives.
        !           193:
        !           194:    The choice of what to use is partly made at run-time, according to
        !           195:    whether the cycle counter works and the measured accuracy of getrusage
        !           196:    and gettimeofday.
        !           197:
        !           198:    A routine that's not available won't be getting called, but is an abort()
        !           199:    to be sure it isn't called mistakenly.
        !           200:
        !           201:    It can be assumed that if a function exists then its data type will, but
        !           202:    if the function doesn't then the data type might or might not exist, so
        !           203:    the type can't be used unconditionally.  The "struct_rusage" etc macros
        !           204:    provide dummies when the respective function doesn't exist. */
        !           205:
        !           206:
1.1       maekawa   207: #if HAVE_SPEED_CYCLECOUNTER
1.1.1.2 ! ohara     208: static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
1.1       maekawa   209: #else
1.1.1.2 ! ohara     210: static const int have_cycles = 0;
        !           211: #define speed_cyclecounter(p)  ASSERT_FAIL (speed_cyclecounter not available)
1.1       maekawa   212: #endif
                    213:
1.1.1.2 ! ohara     214: /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
        !           215:    microseconds.  Same #ifdefs here as in longlong.h.  */
        !           216: #if defined (__GNUC__) && ! defined (NO_ASM)                            \
        !           217:   && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
        !           218: static const int  have_stck = 1;
        !           219: static const int  use_stck = 1;  /* always use when available */
        !           220: typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
        !           221: #define STCK(timestamp)                 \
        !           222:   do {                                  \
        !           223:     asm ("stck %0" : "=m" (timestamp)); \
        !           224:   } while (0)
        !           225: #else
        !           226: static const int  have_stck = 0;
        !           227: static const int  use_stck = 0;
        !           228: typedef unsigned long  stck_t;   /* dummy */
        !           229: #define STCK(timestamp)  ASSERT_FAIL (stck instruction not available)
        !           230: #endif
        !           231: #define STCK_PERIOD      (1.0 / 4096e6)   /* 2^-12 microseconds */
1.1       maekawa   232:
1.1.1.2 ! ohara     233: /* Unicos 10.X has syssgi(), but not mmap(). */
        !           234: #if HAVE_SYSSGI && HAVE_MMAP
        !           235: static const int  have_sgi = 1;
        !           236: #else
        !           237: static const int  have_sgi = 0;
        !           238: #endif
1.1       maekawa   239:
1.1.1.2 ! ohara     240: #if HAVE_READ_REAL_TIME
        !           241: static const int have_rrt = 1;
        !           242: #else
        !           243: static const int have_rrt = 0;
        !           244: #define read_real_time(t,s)     ASSERT_FAIL (read_real_time not available)
        !           245: #define time_base_to_time(t,s)  ASSERT_FAIL (time_base_to_time not available)
        !           246: #define RTC_POWER     1
        !           247: #define RTC_POWER_PC  2
        !           248: #define timebasestruct_t   struct timebasestruct_dummy
        !           249: struct timebasestruct_dummy {
        !           250:   int             flag;
        !           251:   unsigned int    tb_high;
        !           252:   unsigned int    tb_low;
        !           253: };
        !           254: #endif
1.1       maekawa   255:
1.1.1.2 ! ohara     256: #if HAVE_CLOCK_GETTIME
        !           257: static const int have_cgt = 1;
        !           258: #define struct_timespec  struct timespec
        !           259: #else
        !           260: static const int have_cgt = 0;
        !           261: #define struct_timespec       struct timespec_dummy
        !           262: #define clock_gettime(id,ts)  (ASSERT_FAIL (clock_gettime not available), -1)
        !           263: #define clock_getres(id,ts)   (ASSERT_FAIL (clock_getres not available), -1)
        !           264: #endif
        !           265:
        !           266: #if HAVE_GETRUSAGE
        !           267: static const int have_grus = 1;
        !           268: #define struct_rusage   struct rusage
        !           269: #else
        !           270: static const int have_grus = 0;
        !           271: #define getrusage(n,ru)  ASSERT_FAIL (getrusage not available)
        !           272: #define struct_rusage    struct rusage_dummy
        !           273: #endif
        !           274:
        !           275: #if HAVE_GETTIMEOFDAY
        !           276: static const int have_gtod = 1;
        !           277: #define struct_timeval   struct timeval
        !           278: #else
        !           279: static const int have_gtod = 0;
        !           280: #define gettimeofday(tv,tz)  ASSERT_FAIL (gettimeofday not available)
        !           281: #define struct_timeval   struct timeval_dummy
        !           282: #endif
        !           283:
        !           284: #if HAVE_TIMES
        !           285: static const int have_times = 1;
        !           286: #define struct_tms   struct tms
        !           287: #else
        !           288: static const int have_times = 0;
        !           289: #define times(tms)   ASSERT_FAIL (times not available)
        !           290: #define struct_tms   struct tms_dummy
        !           291: #endif
        !           292:
        !           293: struct tms_dummy {
        !           294:   long  tms_utime;
        !           295: };
        !           296: struct timeval_dummy {
        !           297:   long  tv_sec;
        !           298:   long  tv_usec;
        !           299: };
        !           300: struct rusage_dummy {
        !           301:   struct_timeval ru_utime;
        !           302: };
        !           303: struct timespec_dummy {
        !           304:   long  tv_sec;
        !           305:   long  tv_nsec;
        !           306: };
        !           307:
        !           308: static int  use_cycles;
        !           309: static int  use_sgi;
        !           310: static int  use_rrt;
        !           311: static int  use_cgt;
        !           312: static int  use_gtod;
        !           313: static int  use_grus;
        !           314: static int  use_times;
        !           315: static int  use_tick_boundary;
        !           316:
        !           317: static unsigned         start_cycles[2];
        !           318: static stck_t           start_stck;
        !           319: static unsigned         start_sgi;
        !           320: static timebasestruct_t start_rrt;
        !           321: static struct_timespec  start_cgt;
        !           322: static struct_rusage    start_grus;
        !           323: static struct_timeval   start_gtod;
        !           324: static struct_tms       start_times;
        !           325:
        !           326: static double  cycles_limit = 1e100;
        !           327: static double  sgi_unittime;
        !           328: static double  cgt_unittime;
        !           329: static double  grus_unittime;
        !           330: static double  gtod_unittime;
        !           331: static double  times_unittime;
        !           332:
        !           333: /* for RTC_POWER format, ie. seconds and nanoseconds */
        !           334: #define TIMEBASESTRUCT_SECS(t)  ((t)->tb_high + (t)->tb_low * 1e-9)
        !           335:
        !           336:
        !           337: /* Return a string representing a time in seconds, nicely formatted.
        !           338:    Eg. "10.25ms".  */
        !           339: char *
        !           340: unittime_string (double t)
1.1       maekawa   341: {
1.1.1.2 ! ohara     342:   static char  buf[128];
        !           343:
        !           344:   const char  *unit;
        !           345:   int         prec;
1.1       maekawa   346:
1.1.1.2 ! ohara     347:   /* choose units and scale */
        !           348:   if (t < 1e-6)
        !           349:     t *= 1e9, unit = "ns";
        !           350:   else if (t < 1e-3)
        !           351:     t *= 1e6, unit = "us";
        !           352:   else if (t < 1.0)
        !           353:     t *= 1e3, unit = "ms";
        !           354:   else
        !           355:     unit = "s";
        !           356:
        !           357:   /* want 4 significant figures */
        !           358:   if (t < 1.0)
        !           359:     prec = 4;
        !           360:   else if (t < 10.0)
        !           361:     prec = 3;
        !           362:   else if (t < 100.0)
        !           363:     prec = 2;
        !           364:   else
        !           365:     prec = 1;
1.1       maekawa   366:
1.1.1.2 ! ohara     367:   sprintf (buf, "%.*f%s", prec, t, unit);
        !           368:   return buf;
1.1       maekawa   369: }
                    370:
                    371:
1.1.1.2 ! ohara     372: static jmp_buf  cycles_works_buf;
1.1       maekawa   373:
1.1.1.2 ! ohara     374: static RETSIGTYPE
        !           375: cycles_works_handler (int sig)
        !           376: {
        !           377:   longjmp (cycles_works_buf, 1);
        !           378: }
1.1       maekawa   379:
                    380: int
1.1.1.2 ! ohara     381: cycles_works_p (void)
1.1       maekawa   382: {
1.1.1.2 ! ohara     383:   static int  result = -1;
        !           384:   RETSIGTYPE (*old_handler) _PROTO ((int));
        !           385:   unsigned  cycles[2];
        !           386:
        !           387:   /* suppress a warning about cycles[] unused */
        !           388:   cycles[0] = 0;
        !           389:
        !           390:   if (result != -1)
        !           391:     goto done;
        !           392:
        !           393: #ifdef SIGILL
        !           394:   old_handler = signal (SIGILL, cycles_works_handler);
        !           395:   if (old_handler == SIG_ERR)
        !           396:     {
        !           397:       if (speed_option_verbose)
        !           398:         printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
        !           399:       goto yes;
        !           400:     }
        !           401:   if (setjmp (cycles_works_buf))
1.1       maekawa   402:     {
1.1.1.2 ! ohara     403:       if (speed_option_verbose)
        !           404:         printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
        !           405:       result = 0;
        !           406:       goto done;
1.1       maekawa   407:     }
1.1.1.2 ! ohara     408:   speed_cyclecounter (cycles);
        !           409:   signal (SIGILL, old_handler);
        !           410:   if (speed_option_verbose)
        !           411:     printf ("cycles_works_p(): speed_cyclecounter() works\n");
        !           412: #else
1.1       maekawa   413:
1.1.1.2 ! ohara     414:   if (speed_option_verbose)
        !           415:     printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
1.1       maekawa   416: #endif
                    417:
1.1.1.2 ! ohara     418:  yes:
        !           419:   result = 1;
1.1       maekawa   420:
1.1.1.2 ! ohara     421:  done:
        !           422:   return result;
        !           423: }
1.1       maekawa   424:
                    425:
1.1.1.2 ! ohara     426: /* The number of clock ticks per second, but looking at sysconf rather than
        !           427:    just CLK_TCK, where possible.  */
        !           428: long
        !           429: clk_tck (void)
1.1       maekawa   430: {
1.1.1.2 ! ohara     431:   static long  result = -1L;
        !           432:   if (result != -1L)
        !           433:     return result;
1.1       maekawa   434:
1.1.1.2 ! ohara     435: #if HAVE_SYSCONF
        !           436:   result = sysconf (_SC_CLK_TCK);
        !           437:   if (result != -1L)
1.1       maekawa   438:     {
1.1.1.2 ! ohara     439:       if (speed_option_verbose)
        !           440:         printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
        !           441:       return result;
1.1       maekawa   442:     }
1.1.1.2 ! ohara     443:
        !           444:   fprintf (stderr,
        !           445:            "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
        !           446: #endif
        !           447:
        !           448: #ifdef CLK_TCK
        !           449:   result = CLK_TCK;
        !           450:   if (speed_option_verbose)
        !           451:     printf ("CLK_TCK is %ld per second\n", result);
        !           452:   return result;
        !           453: #else
        !           454:   fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
        !           455:   abort ();
        !           456: #endif
1.1       maekawa   457: }
                    458:
                    459:
1.1.1.2 ! ohara     460: /* If two times can be observed less than half a clock tick apart, then
        !           461:    assume "get" is microsecond accurate.
        !           462:
        !           463:    Two times only 1 microsecond apart are not believed, since some kernels
        !           464:    take it upon themselves to ensure gettimeofday doesn't return the same
        !           465:    value twice, for the benefit of applications using it for a timestamp.
        !           466:    This is obviously very stupid given the speed of CPUs these days.
        !           467:
        !           468:    Making "reps" calls to noop_1() is designed to waste some CPU, with a
        !           469:    view to getting measurements 2 microseconds (or more) apart.  "reps" is
        !           470:    increased progressively until such a period is seen.
        !           471:
        !           472:    The outer loop "attempts" are just to allow for any random nonsense or
        !           473:    system load upsetting the measurements (ie. making two successive calls
        !           474:    to "get" come out as a longer interval than normal).
        !           475:
        !           476:    Bugs:
        !           477:
        !           478:    The assumption that any interval less than a half tick implies
        !           479:    microsecond resolution is obviously fairly rash, the true resolution
        !           480:    could be anything between a microsecond and that half tick.  Perhaps
        !           481:    something special would have to be done on a system where this is the
        !           482:    case, since there's no obvious reliable way to detect it
        !           483:    automatically.  */
        !           484:
        !           485: #define MICROSECONDS_P(name, type, get, sec, usec)                      \
        !           486:   {                                                                     \
        !           487:     static int  result = -1;                                            \
        !           488:     type      st, et;                                                   \
        !           489:     long      dt, half_tick;                                            \
        !           490:     unsigned  attempt, reps, i, j;                                      \
        !           491:                                                                         \
        !           492:     if (result != -1)                                                   \
        !           493:       return result;                                                    \
        !           494:                                                                         \
        !           495:     result = 0;                                                         \
        !           496:     half_tick = (1000000L / clk_tck ()) / 2;                            \
        !           497:                                                                         \
        !           498:     for (attempt = 0; attempt < 5; attempt++)                           \
        !           499:       {                                                                 \
        !           500:         reps = 0;                                                       \
        !           501:         for (;;)                                                        \
        !           502:           {                                                             \
        !           503:             get (st);                                                   \
        !           504:             for (i = 0; i < reps; i++)                                  \
        !           505:               for (j = 0; j < 100; j++)                                 \
        !           506:                 noop_1 (CNST_LIMB(0));                                  \
        !           507:             get (et);                                                   \
        !           508:                                                                         \
        !           509:             dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st);        \
        !           510:                                                                         \
        !           511:             if (speed_option_verbose >= 2)                              \
        !           512:               printf ("%s attempt=%u, reps=%u, dt=%ld\n",               \
        !           513:                       name, attempt, reps, dt);                         \
        !           514:                                                                         \
        !           515:             if (dt >= 2)                                                \
        !           516:               break;                                                    \
        !           517:                                                                         \
        !           518:             reps = (reps == 0 ? 1 : 2*reps);                            \
        !           519:             if (reps == 0)                                              \
        !           520:               break;  /* uint overflow, not normal */                   \
        !           521:           }                                                             \
        !           522:                                                                         \
        !           523:         if (dt < half_tick)                                             \
        !           524:           {                                                             \
        !           525:             result = 1;                                                 \
        !           526:             break;                                                      \
        !           527:           }                                                             \
        !           528:       }                                                                 \
        !           529:                                                                         \
        !           530:     if (speed_option_verbose)                                           \
        !           531:       {                                                                 \
        !           532:         if (result)                                                     \
        !           533:           printf ("%s is microsecond accurate\n", name);                \
        !           534:         else                                                            \
        !           535:           printf ("%s is only %s clock tick accurate\n",                \
        !           536:                   name, unittime_string (1.0/clk_tck()));               \
        !           537:       }                                                                 \
        !           538:     return result;                                                      \
        !           539:   }
        !           540:
1.1       maekawa   541:
                    542: int
1.1.1.2 ! ohara     543: gettimeofday_microseconds_p (void)
1.1       maekawa   544: {
1.1.1.2 ! ohara     545: #define call_gettimeofday(t)   gettimeofday (&(t), NULL)
        !           546: #define timeval_tv_sec(t)      ((t).tv_sec)
        !           547: #define timeval_tv_usec(t)     ((t).tv_usec)
        !           548:   MICROSECONDS_P ("gettimeofday", struct_timeval,
        !           549:                   call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
1.1       maekawa   550: }
                    551:
1.1.1.2 ! ohara     552: int
        !           553: getrusage_microseconds_p (void)
        !           554: {
        !           555: #define call_getrusage(t)   getrusage (0, &(t))
        !           556: #define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)
        !           557: #define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)
        !           558:   MICROSECONDS_P ("getrusage", struct_rusage,
        !           559:                   call_getrusage, rusage_tv_sec, rusage_tv_usec);
        !           560: }
        !           561:
        !           562:
        !           563: /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
        !           564:    of glibc (some time post 2.2).
1.1       maekawa   565:
1.1.1.2 ! ohara     566:    CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
        !           567:    defined, but returning -1 for an error).  */
        !           568:
        !           569: #ifdef CLOCK_PROCESS_CPUTIME_ID
        !           570: # define CGT_ID        CLOCK_PROCESS_CPUTIME_ID
        !           571: #else
        !           572: # ifdef CLOCK_VIRTUAL
        !           573: #  define CGT_ID       CLOCK_VIRTUAL
        !           574: # endif
        !           575: #endif
        !           576: #ifdef CGT_ID
        !           577: # define HAVE_CGT_ID  1
        !           578: #else
        !           579: # define HAVE_CGT_ID  0
        !           580: # define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
        !           581: #endif
1.1       maekawa   582:
                    583: int
1.1.1.2 ! ohara     584: cgt_works_p (void)
1.1       maekawa   585: {
1.1.1.2 ! ohara     586:   static int  result = -1;
        !           587:   struct_timespec  unit;
1.1       maekawa   588:
1.1.1.2 ! ohara     589:   if (! have_cgt)
        !           590:     return 0;
        !           591:
        !           592:   if (! HAVE_CGT_ID)
1.1       maekawa   593:     {
1.1.1.2 ! ohara     594:       if (speed_option_verbose)
        !           595:         printf ("clock_gettime don't know what ID to use\n");
        !           596:       result = 0;
        !           597:       return result;
        !           598:     }
1.1       maekawa   599:
1.1.1.2 ! ohara     600:   if (result != -1)
        !           601:     return result;
1.1       maekawa   602:
1.1.1.2 ! ohara     603:   /* trial run to see if it works */
        !           604:   if (clock_gettime (CGT_ID, &unit) != 0)
        !           605:     {
        !           606:       if (speed_option_verbose)
        !           607:         printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
        !           608:       result = 0;
        !           609:       return result;
        !           610:     }
        !           611:
        !           612:   /* get the resolution */
        !           613:   if (clock_getres (CGT_ID, &unit) != 0)
        !           614:     {
        !           615:       if (speed_option_verbose)
        !           616:         printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
        !           617:       result = 0;
        !           618:       return result;
1.1       maekawa   619:     }
                    620:
1.1.1.2 ! ohara     621:   cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
        !           622:   printf ("clock_gettime is %s accurate\n",
        !           623:           unittime_string (cgt_unittime));
        !           624:   result = 1;
        !           625:   return result;
1.1       maekawa   626: }
                    627:
                    628:
1.1.1.2 ! ohara     629: volatile unsigned  *sgi_addr;
1.1       maekawa   630:
1.1.1.2 ! ohara     631: int
        !           632: sgi_works_p (void)
        !           633: {
        !           634: #if HAVE_SYSSGI && HAVE_MMAP
        !           635:   static int  result = -1;
1.1       maekawa   636:
1.1.1.2 ! ohara     637:   size_t          pagesize, offset;
        !           638:   __psunsigned_t  phys, physpage;
        !           639:   void            *virtpage;
        !           640:   unsigned        period_picoseconds;
        !           641:   int             size, fd;
1.1       maekawa   642:
1.1.1.2 ! ohara     643:   if (result != -1)
        !           644:     return result;
1.1       maekawa   645:
1.1.1.2 ! ohara     646:   phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
        !           647:   if (phys == (__psunsigned_t) -1)
        !           648:     {
        !           649:       /* ENODEV is the error when a counter is not available */
        !           650:       if (speed_option_verbose)
        !           651:         printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
        !           652:       result = 0;
        !           653:       return result;
        !           654:     }
        !           655:   sgi_unittime = period_picoseconds * 1e-12;
1.1       maekawa   656:
1.1.1.2 ! ohara     657:   /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
        !           658:      Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
        !           659:      obvious way to identify that without SGI_CYCLECNTR_SIZE.  */
        !           660: #ifdef SGI_CYCLECNTR_SIZE
        !           661:   size = syssgi (SGI_CYCLECNTR_SIZE);
        !           662:   if (size == -1)
        !           663:     {
        !           664:       if (speed_option_verbose)
        !           665:         {
        !           666:           printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
        !           667:           printf ("    will assume size==4\n");
        !           668:         }
        !           669:       size = 32;
        !           670:     }
        !           671: #else
        !           672:   size = 32;
1.1       maekawa   673: #endif
                    674:
1.1.1.2 ! ohara     675:   if (size < 32)
        !           676:     {
        !           677:       printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
        !           678:       result = 0;
        !           679:       return result;
        !           680:     }
1.1       maekawa   681:
1.1.1.2 ! ohara     682:   pagesize = getpagesize();
        !           683:   offset = (size_t) phys & (pagesize-1);
        !           684:   physpage = phys - offset;
1.1       maekawa   685:
1.1.1.2 ! ohara     686:   /* shouldn't cross over a page boundary */
        !           687:   ASSERT_ALWAYS (offset + size/8 <= pagesize);
1.1       maekawa   688:
1.1.1.2 ! ohara     689:   fd = open("/dev/mmem", O_RDONLY);
        !           690:   if (fd == -1)
        !           691:     {
        !           692:       if (speed_option_verbose)
        !           693:         printf ("open /dev/mmem: %s\n", strerror (errno));
        !           694:       result = 0;
        !           695:       return result;
        !           696:     }
1.1       maekawa   697:
1.1.1.2 ! ohara     698:   virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
        !           699:   if (virtpage == (void *) -1)
        !           700:     {
        !           701:       if (speed_option_verbose)
        !           702:         printf ("mmap /dev/mmem: %s\n", strerror (errno));
        !           703:       result = 0;
        !           704:       return result;
        !           705:     }
1.1       maekawa   706:
1.1.1.2 ! ohara     707:   /* address of least significant 4 bytes, knowing mips is big endian */
        !           708:   sgi_addr = (unsigned *) ((char *) virtpage + offset
        !           709:                            + size/8 - sizeof(unsigned));
        !           710:   result = 1;
        !           711:   return result;
1.1       maekawa   712:
1.1.1.2 ! ohara     713: #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
1.1       maekawa   714:   return 0;
1.1.1.2 ! ohara     715: #endif
1.1       maekawa   716: }
                    717:
                    718:
1.1.1.2 ! ohara     719: #define DEFAULT(var,n)  \
        !           720:   do {                  \
        !           721:     if (! (var))        \
        !           722:       (var) = (n);      \
        !           723:   } while (0)
1.1       maekawa   724:
                    725: void
                    726: speed_time_init (void)
                    727: {
1.1.1.2 ! ohara     728:   double supplement_unittime = 0.0;
        !           729:
        !           730:   static int  speed_time_initialized = 0;
1.1       maekawa   731:   if (speed_time_initialized)
                    732:     return;
                    733:   speed_time_initialized = 1;
                    734:
1.1.1.2 ! ohara     735:   speed_cycletime_init ();
1.1       maekawa   736:
1.1.1.2 ! ohara     737:   if (have_cycles && cycles_works_p ())
        !           738:     {
        !           739:       use_cycles = 1;
        !           740:       DEFAULT (speed_cycletime, 1.0);
        !           741:       speed_unittime = speed_cycletime;
        !           742:       DEFAULT (speed_precision, 10000);
        !           743:       strcpy (speed_time_string, "CPU cycle counter");
        !           744:
        !           745:       /* only used if a supplementary method is chosen below */
        !           746:       cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
        !           747:         * speed_cycletime;
1.1       maekawa   748:
1.1.1.2 ! ohara     749:       if (have_grus && getrusage_microseconds_p())
        !           750:         {
        !           751:           /* this is a good combination */
        !           752:           use_grus = 1;
        !           753:           supplement_unittime = grus_unittime = 1.0e-6;
        !           754:           strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
        !           755:         }
        !           756:       else if (have_cycles == 1)
        !           757:         {
        !           758:           /* When speed_cyclecounter has a limited range, look for something
        !           759:              to supplement it. */
        !           760:           if (have_gtod && gettimeofday_microseconds_p())
        !           761:             {
        !           762:               use_gtod = 1;
        !           763:               supplement_unittime = gtod_unittime = 1.0e-6;
        !           764:               strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
        !           765:             }
        !           766:           else if (have_grus)
        !           767:             {
        !           768:               use_grus = 1;
        !           769:               supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
        !           770:               sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
        !           771:             }
        !           772:           else if (have_times)
        !           773:             {
        !           774:               use_times = 1;
        !           775:               supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
        !           776:               sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
        !           777:             }
        !           778:           else if (have_gtod)
        !           779:             {
        !           780:               use_gtod = 1;
        !           781:               supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
        !           782:               sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
        !           783:             }
        !           784:           else
        !           785:             {
        !           786:               fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
        !           787:               fprintf (stderr, "    Wraparounds may produce bad results on long measurements.\n");
        !           788:             }
        !           789:         }
1.1       maekawa   790:
1.1.1.2 ! ohara     791:       if (use_grus || use_times || use_gtod)
        !           792:         {
        !           793:           /* must know cycle period to compare cycles to other measuring
        !           794:              (via cycles_limit) */
        !           795:           speed_cycletime_need_seconds ();
1.1       maekawa   796:
1.1.1.2 ! ohara     797:           if (speed_precision * supplement_unittime > cycles_limit)
        !           798:             {
        !           799:               fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
        !           800:               fprintf (stderr, "    cycle counter and limited precision supplemental method\n");
        !           801:               fprintf (stderr, "    (%s)\n", speed_time_string);
        !           802:             }
        !           803:         }
        !           804:     }
        !           805:   else if (have_stck)
        !           806:     {
        !           807:       strcpy (speed_time_string, "STCK timestamp");
        !           808:       /* stck is in units of 2^-12 microseconds, which is very likely higher
        !           809:          resolution than a cpu cycle */
        !           810:       if (speed_cycletime == 0.0)
        !           811:         speed_cycletime_fail
        !           812:           ("Need to know CPU frequency for effective stck unit");
        !           813:       speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
        !           814:       DEFAULT (speed_precision, 10000);
        !           815:     }
        !           816:   else if (have_sgi && sgi_works_p ())
        !           817:     {
        !           818:       use_sgi = 1;
        !           819:       DEFAULT (speed_precision, 10000);
        !           820:       speed_unittime = sgi_unittime;
        !           821:       sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
        !           822:                unittime_string (speed_unittime));
        !           823:       /* supplemented with getrusage, which we assume to have 1ms resolution */
        !           824:       use_grus = 1;
        !           825:       supplement_unittime = 1e-3;
        !           826:     }
        !           827:   else if (have_rrt)
        !           828:     {
        !           829:       timebasestruct_t  t;
        !           830:       use_rrt = 1;
        !           831:       DEFAULT (speed_precision, 10000);
        !           832:       read_real_time (&t, sizeof(t));
        !           833:       switch (t.flag) {
        !           834:       case RTC_POWER:
        !           835:         /* FIXME: What's the actual RTC resolution? */
        !           836:         speed_unittime = 1e-7;
        !           837:         strcpy (speed_time_string, "read_real_time() power nanoseconds");
        !           838:         break;
        !           839:       case RTC_POWER_PC:
        !           840:         t.tb_high = 1;
        !           841:         t.tb_low = 0;
        !           842:         time_base_to_time (&t, sizeof(t));
        !           843:         speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
        !           844:         sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
        !           845:                  unittime_string (speed_unittime));
        !           846:         break;
        !           847:       default:
        !           848:         fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
        !           849:                  t.flag);
        !           850:         abort ();
        !           851:       }
        !           852:     }
        !           853:   else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
        !           854:     {
        !           855:       /* use clock_gettime if microsecond or better resolution */
        !           856:     choose_cgt:
        !           857:       use_cgt = 1;
        !           858:       speed_unittime = cgt_unittime;
        !           859:       DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
        !           860:       strcpy (speed_time_string, "microsecond accurate getrusage()");
        !           861:     }
        !           862:   else if (have_grus && getrusage_microseconds_p())
        !           863:     {
        !           864:       use_grus = 1;
        !           865:       speed_unittime = grus_unittime = 1.0e-6;
        !           866:       DEFAULT (speed_precision, 1000);
        !           867:       strcpy (speed_time_string, "microsecond accurate getrusage()");
        !           868:     }
        !           869:   else if (have_gtod && gettimeofday_microseconds_p())
        !           870:     {
        !           871:       use_gtod = 1;
        !           872:       speed_unittime = gtod_unittime = 1.0e-6;
        !           873:       DEFAULT (speed_precision, 1000);
        !           874:       strcpy (speed_time_string, "microsecond accurate gettimeofday()");
        !           875:     }
        !           876:   else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
        !           877:     {
        !           878:       /* use clock_gettime if 1 tick or better resolution */
        !           879:       goto choose_cgt;
        !           880:     }
        !           881:   else if (have_times)
        !           882:     {
        !           883:       use_times = 1;
        !           884:       use_tick_boundary = 1;
        !           885:       speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
        !           886:       DEFAULT (speed_precision, 200);
        !           887:       sprintf (speed_time_string, "%s clock tick times()",
        !           888:                unittime_string (speed_unittime));
        !           889:     }
        !           890:   else if (have_grus)
        !           891:     {
        !           892:       use_grus = 1;
        !           893:       use_tick_boundary = 1;
        !           894:       speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
        !           895:       DEFAULT (speed_precision, 200);
        !           896:       sprintf (speed_time_string, "%s clock tick getrusage()\n",
        !           897:                unittime_string (speed_unittime));
        !           898:     }
        !           899:   else if (have_gtod)
        !           900:     {
        !           901:       use_gtod = 1;
        !           902:       use_tick_boundary = 1;
        !           903:       speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
        !           904:       DEFAULT (speed_precision, 200);
        !           905:       sprintf (speed_time_string, "%s clock tick gettimeofday()",
        !           906:                unittime_string (speed_unittime));
        !           907:     }
        !           908:   else
        !           909:     {
        !           910:       fprintf (stderr, "No time measuring method available\n");
        !           911:       fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
        !           912:       abort ();
        !           913:     }
1.1       maekawa   914:
1.1.1.2 ! ohara     915:   if (speed_option_verbose)
        !           916:     {
        !           917:       printf ("speed_time_init: %s\n", speed_time_string);
        !           918:       printf ("    speed_precision     %d\n", speed_precision);
        !           919:       printf ("    speed_unittime      %.2g\n", speed_unittime);
        !           920:       if (supplement_unittime)
        !           921:         printf ("    supplement_unittime %.2g\n", supplement_unittime);
        !           922:       printf ("    use_tick_boundary   %d\n", use_tick_boundary);
        !           923:       if (have_cycles)
        !           924:         printf ("    cycles_limit        %.2g seconds\n", cycles_limit);
        !           925:     }
1.1       maekawa   926: }
                    927:
                    928:
                    929:
1.1.1.2 ! ohara     930: /* Burn up CPU until a clock tick boundary, for greater accuracy.  Set the
        !           931:    corresponding "start_foo" appropriately too. */
1.1       maekawa   932:
1.1.1.2 ! ohara     933: void
        !           934: grus_tick_boundary (void)
        !           935: {
        !           936:   struct_rusage  prev;
        !           937:   getrusage (0, &prev);
        !           938:   do {
        !           939:     getrusage (0, &start_grus);
        !           940:   } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
        !           941: }
1.1       maekawa   942:
                    943: void
1.1.1.2 ! ohara     944: gtod_tick_boundary (void)
1.1       maekawa   945: {
1.1.1.2 ! ohara     946:   struct_timeval  prev;
        !           947:   gettimeofday (&prev, NULL);
        !           948:   do {
        !           949:     gettimeofday (&start_gtod, NULL);
        !           950:   } while (start_gtod.tv_usec == prev.tv_usec);
        !           951: }
1.1       maekawa   952:
1.1.1.2 ! ohara     953: void
        !           954: times_tick_boundary (void)
        !           955: {
        !           956:   struct_tms  prev;
        !           957:   times (&prev);
        !           958:   do
        !           959:     times (&start_times);
        !           960:   while (start_times.tms_utime == prev.tms_utime);
1.1       maekawa   961: }
                    962:
1.1.1.2 ! ohara     963:
        !           964: /* "have_" values are tested to let unused code go dead.  */
        !           965:
1.1       maekawa   966: void
                    967: speed_starttime (void)
                    968: {
1.1.1.2 ! ohara     969:   speed_time_init ();
1.1       maekawa   970:
1.1.1.2 ! ohara     971:   if (have_grus && use_grus)
        !           972:     {
        !           973:       if (use_tick_boundary)
        !           974:         grus_tick_boundary ();
        !           975:       else
        !           976:         getrusage (0, &start_grus);
        !           977:     }
1.1       maekawa   978:
1.1.1.2 ! ohara     979:   if (have_gtod && use_gtod)
        !           980:     {
        !           981:       if (use_tick_boundary)
        !           982:         gtod_tick_boundary ();
        !           983:       else
        !           984:         gettimeofday (&start_gtod, NULL);
        !           985:     }
1.1       maekawa   986:
1.1.1.2 ! ohara     987:   if (have_times && use_times)
        !           988:     {
        !           989:       if (use_tick_boundary)
        !           990:         times_tick_boundary ();
        !           991:       else
        !           992:         times (&start_times);
        !           993:     }
1.1       maekawa   994:
1.1.1.2 ! ohara     995:   if (have_cgt && use_cgt)
        !           996:     clock_gettime (CGT_ID, &start_cgt);
1.1       maekawa   997:
1.1.1.2 ! ohara     998:   if (have_rrt && use_rrt)
        !           999:     read_real_time (&start_rrt, sizeof(start_rrt));
1.1       maekawa  1000:
1.1.1.2 ! ohara    1001:   if (have_sgi && use_sgi)
        !          1002:     start_sgi = *sgi_addr;
1.1       maekawa  1003:
1.1.1.2 ! ohara    1004:   if (have_stck && use_stck)
        !          1005:     STCK (start_stck);
1.1       maekawa  1006:
1.1.1.2 ! ohara    1007:   /* Cycles sampled last for maximum accuracy. */
        !          1008:   if (have_cycles && use_cycles)
        !          1009:     speed_cyclecounter (start_cycles);
        !          1010: }
1.1       maekawa  1011:
                   1012:
1.1.1.2 ! ohara    1013: /* Calculate the difference between two cycle counter samples, as a "double"
        !          1014:    counter of cycles.
1.1       maekawa  1015:
1.1.1.2 ! ohara    1016:    The start and end values are allowed to cancel in integers in case the
        !          1017:    counter values are bigger than the 53 bits that normally fit in a double.
1.1       maekawa  1018:
1.1.1.2 ! ohara    1019:    This works even if speed_cyclecounter() puts a value bigger than 32-bits
        !          1020:    in the low word (the high word always gets a 2**32 multiplier though). */
        !          1021:
        !          1022: double
        !          1023: speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
1.1       maekawa  1024: {
1.1.1.2 ! ohara    1025:   unsigned  d;
        !          1026:   double    t;
1.1       maekawa  1027:
1.1.1.2 ! ohara    1028:   if (have_cycles == 1)
        !          1029:     {
        !          1030:       t = (end[0] - start[0]);
        !          1031:     }
        !          1032:   else
        !          1033:     {
        !          1034:       d = end[0] - start[0];
        !          1035:       t = d - (d > end[0] ? M_2POWU : 0.0);
        !          1036:       t += (end[1] - start[1]) * M_2POW32;
        !          1037:     }
        !          1038:   return t;
1.1       maekawa  1039: }
                   1040:
1.1.1.2 ! ohara    1041:
        !          1042: /* Calculate the difference between "start" and "end" using fields "sec" and
        !          1043:    "psec", where each "psec" is a "punit" of a second.
        !          1044:
        !          1045:    The seconds parts are allowed to cancel before being combined with the
        !          1046:    psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
        !          1047:    double.
        !          1048:
        !          1049:    Total time is only calculated in a "double" since an integer count of
        !          1050:    psecs might overflow.  2^32 microseconds is only a bit over an hour, or
        !          1051:    2^32 nanoseconds only about 4 seconds.
        !          1052:
        !          1053:    The casts to "long" are for the beneifit of timebasestruct_t, where the
        !          1054:    fields are only "unsigned int", but we want a signed difference.  */
        !          1055:
        !          1056: #define DIFF_SECS_ROUTINE(sec, psec, punit)                     \
        !          1057:   {                                                             \
        !          1058:     long  sec_diff, psec_diff;                                  \
        !          1059:     sec_diff = (long) end->sec - (long) start->sec;             \
        !          1060:     psec_diff = (long) end->psec - (long) start->psec;          \
        !          1061:     return (double) sec_diff + punit * (double) psec_diff;      \
        !          1062:   }
        !          1063:
        !          1064: double
        !          1065: timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
1.1       maekawa  1066: {
1.1.1.2 ! ohara    1067:   DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
        !          1068: }
1.1       maekawa  1069:
1.1.1.2 ! ohara    1070: double
        !          1071: rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
        !          1072: {
        !          1073:   DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
1.1       maekawa  1074: }
                   1075:
                   1076: double
1.1.1.2 ! ohara    1077: timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
1.1       maekawa  1078: {
1.1.1.2 ! ohara    1079:   DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
        !          1080: }
1.1       maekawa  1081:
1.1.1.2 ! ohara    1082: /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
        !          1083: double
        !          1084: timebasestruct_diff_secs (const timebasestruct_t *end,
        !          1085:                           const timebasestruct_t *start)
        !          1086: {
        !          1087:   DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
1.1       maekawa  1088: }
                   1089:
                   1090:
1.1.1.2 ! ohara    1091: double
        !          1092: speed_endtime (void)
        !          1093: {
        !          1094: #define END_USE(name,value)                             \
        !          1095:   do {                                                  \
        !          1096:     if (speed_option_verbose >= 3)                      \
        !          1097:       printf ("speed_endtime(): used %s\n", name);      \
        !          1098:     result = value;                                     \
        !          1099:     goto done;                                          \
        !          1100:   } while (0)
        !          1101:
        !          1102: #define END_ENOUGH(name,value)                                          \
        !          1103:   do {                                                                  \
        !          1104:     if (speed_option_verbose >= 3)                                      \
        !          1105:       printf ("speed_endtime(): %s gives enough precision\n", name);    \
        !          1106:     result = value;                                                     \
        !          1107:     goto done;                                                          \
        !          1108:   } while (0)
        !          1109:
        !          1110: #define END_EXCEED(name,value)                                            \
        !          1111:   do {                                                                    \
        !          1112:     if (speed_option_verbose >= 3)                                        \
        !          1113:       printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
        !          1114:               name);                                                      \
        !          1115:     result = value;                                                       \
        !          1116:     goto done;                                                            \
        !          1117:   } while (0)
        !          1118:
        !          1119:   unsigned          end_cycles[2];
        !          1120:   stck_t            end_stck;
        !          1121:   unsigned          end_sgi;
        !          1122:   timebasestruct_t  end_rrt;
        !          1123:   struct_timespec   end_cgt;
        !          1124:   struct_timeval    end_gtod;
        !          1125:   struct_rusage     end_grus;
        !          1126:   struct_tms        end_times;
        !          1127:   double            t_gtod, t_grus, t_times, t_cgt;
        !          1128:   double            t_rrt, t_sgi, t_stck, t_cycles;
        !          1129:   double            result;
        !          1130:
        !          1131:   /* Cycles sampled first for maximum accuracy.
        !          1132:      "have_" values tested to let unused code go dead.  */
        !          1133:
        !          1134:   if (have_cycles && use_cycles)  speed_cyclecounter (end_cycles);
        !          1135:   if (have_stck   && use_stck)    STCK (end_stck);
        !          1136:   if (have_sgi    && use_sgi)     end_sgi = *sgi_addr;
        !          1137:   if (have_rrt    && use_rrt)     read_real_time (&end_rrt, sizeof(end_rrt));
        !          1138:   if (have_cgt    && use_cgt)     clock_gettime (CGT_ID, &end_cgt);
        !          1139:   if (have_gtod   && use_gtod)    gettimeofday (&end_gtod, NULL);
        !          1140:   if (have_grus   && use_grus)    getrusage (0, &end_grus);
        !          1141:   if (have_times  && use_times)   times (&end_times);
1.1       maekawa  1142:
1.1.1.2 ! ohara    1143:   result = -1.0;
1.1       maekawa  1144:
1.1.1.2 ! ohara    1145:   if (speed_option_verbose >= 4)
        !          1146:     {
        !          1147:       printf ("speed_endtime():\n");
        !          1148:       if (use_cycles)
        !          1149:         printf ("   cycles  0x%X,0x%X -> 0x%X,0x%X\n",
        !          1150:                 start_cycles[1], start_cycles[0],
        !          1151:                 end_cycles[1], end_cycles[0]);
        !          1152:
        !          1153:       if (use_stck)
        !          1154:         printf ("   stck  0x%lX -> 0x%lX\n", start_stck, end_stck);
        !          1155:
        !          1156:       if (use_sgi)
        !          1157:         printf ("   sgi  0x%X -> 0x%X\n", start_sgi, end_sgi);
        !          1158:
        !          1159:       if (use_rrt)
        !          1160:         printf ("   read_real_time  (%d)%u,%u -> (%d)%u,%u\n",
        !          1161:                 start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
        !          1162:                 end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
        !          1163:
        !          1164:       if (use_cgt)
        !          1165:         printf ("   clock_gettime  %ld.%09ld -> %ld.%09ld\n",
        !          1166:                 start_cgt.tv_sec, start_cgt.tv_nsec,
        !          1167:                 end_cgt.tv_sec, end_cgt.tv_nsec);
        !          1168:
        !          1169:       if (use_gtod)
        !          1170:         printf ("   gettimeofday  %ld.%06ld -> %ld.%06ld\n",
        !          1171:                 start_gtod.tv_sec, start_gtod.tv_usec,
        !          1172:                 end_gtod.tv_sec, end_gtod.tv_usec);
        !          1173:
        !          1174:       if (use_grus)
        !          1175:         printf ("   getrusage  %ld.%06ld -> %ld.%06ld\n",
        !          1176:                 start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,
        !          1177:                 end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);
        !          1178:
        !          1179:       if (use_times)
        !          1180:         printf ("   times  %ld -> %ld\n",
        !          1181:                 start_times.tms_utime, end_times.tms_utime);
        !          1182:     }
1.1       maekawa  1183:
1.1.1.2 ! ohara    1184:   if (use_rrt)
        !          1185:     {
        !          1186:       time_base_to_time (&start_rrt, sizeof(start_rrt));
        !          1187:       time_base_to_time (&end_rrt, sizeof(end_rrt));
        !          1188:       t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
        !          1189:       END_USE ("read_real_time()", t_rrt);
        !          1190:     }
1.1       maekawa  1191:
1.1.1.2 ! ohara    1192:   if (use_cgt)
        !          1193:     {
        !          1194:       t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
        !          1195:       END_USE ("clock_gettime()", t_cgt);
        !          1196:     }
1.1       maekawa  1197:
1.1.1.2 ! ohara    1198:   if (use_grus)
        !          1199:     {
        !          1200:       t_grus = rusage_diff_secs (&end_grus, &start_grus);
1.1       maekawa  1201:
1.1.1.2 ! ohara    1202:       /* Use getrusage() if the cycle counter limit would be exceeded, or if
        !          1203:          it provides enough accuracy already. */
        !          1204:       if (use_cycles)
        !          1205:         {
        !          1206:           if (t_grus >= speed_precision*grus_unittime)
        !          1207:             END_ENOUGH ("getrusage()", t_grus);
        !          1208:           if (t_grus >= cycles_limit)
        !          1209:             END_EXCEED ("getrusage()", t_grus);
        !          1210:         }
        !          1211:     }
1.1       maekawa  1212:
1.1.1.2 ! ohara    1213:   if (use_times)
        !          1214:     {
        !          1215:       t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
1.1       maekawa  1216:
1.1.1.2 ! ohara    1217:       /* Use times() if the cycle counter limit would be exceeded, or if
        !          1218:          it provides enough accuracy already. */
        !          1219:       if (use_cycles)
        !          1220:         {
        !          1221:           if (t_times >= speed_precision*times_unittime)
        !          1222:             END_ENOUGH ("times()", t_times);
        !          1223:           if (t_times >= cycles_limit)
        !          1224:             END_EXCEED ("times()", t_times);
        !          1225:         }
        !          1226:     }
1.1       maekawa  1227:
1.1.1.2 ! ohara    1228:   if (use_gtod)
        !          1229:     {
        !          1230:       t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
1.1       maekawa  1231:
1.1.1.2 ! ohara    1232:       /* Use gettimeofday() if it measured a value bigger than the cycle
        !          1233:          counter can handle.  */
        !          1234:       if (use_cycles)
        !          1235:         {
        !          1236:           if (t_gtod >= cycles_limit)
        !          1237:             END_EXCEED ("gettimeofday()", t_gtod);
        !          1238:         }
        !          1239:     }
        !          1240:
        !          1241:   if (use_stck)
        !          1242:     {
        !          1243:       t_stck = (end_stck - start_stck) * STCK_PERIOD;
        !          1244:       END_USE ("stck", t_stck);
        !          1245:     }
1.1       maekawa  1246:
1.1.1.2 ! ohara    1247:   if (use_sgi)
1.1       maekawa  1248:     {
1.1.1.2 ! ohara    1249:       t_sgi = (end_sgi - start_sgi) * sgi_unittime;
        !          1250:       END_USE ("SGI hardware counter", t_sgi);
1.1       maekawa  1251:     }
                   1252:
1.1.1.2 ! ohara    1253:   if (use_cycles)
        !          1254:     {
        !          1255:       t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
        !          1256:         * speed_cycletime;
        !          1257:       END_USE ("cycle counter", t_cycles);
        !          1258:     }
1.1       maekawa  1259:
1.1.1.2 ! ohara    1260:   if (use_grus && getrusage_microseconds_p())
        !          1261:     END_USE ("getrusage()", t_grus);
1.1       maekawa  1262:
1.1.1.2 ! ohara    1263:   if (use_gtod && gettimeofday_microseconds_p())
        !          1264:     END_USE ("gettimeofday()", t_gtod);
1.1       maekawa  1265:
1.1.1.2 ! ohara    1266:   if (use_times)  END_USE ("times()",        t_times);
        !          1267:   if (use_grus)   END_USE ("getrusage()",    t_grus);
        !          1268:   if (use_gtod)   END_USE ("gettimeofday()", t_gtod);
1.1       maekawa  1269:
1.1.1.2 ! ohara    1270:   fprintf (stderr, "speed_endtime(): oops, no time method available\n");
        !          1271:   abort ();
        !          1272:
        !          1273:  done:
        !          1274:   if (result < 0.0)
        !          1275:     {
        !          1276:       fprintf (stderr,
        !          1277:                "speed_endtime(): fatal error: negative time measured: %.9f\n",
        !          1278:                result);
        !          1279:       abort ();
        !          1280:     }
        !          1281:   return result;
        !          1282: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>