[BACK]Return to speed-ext.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tune

Annotation of OpenXM_contrib/gmp/tune/speed-ext.c, Revision 1.1.1.1

1.1       maekawa     1: /* An example of extending the speed program to measure routines not in GMP. */
                      2:
                      3: /*
                      4: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
                      5:
                      6: This file is part of the GNU MP Library.
                      7:
                      8: The GNU MP Library is free software; you can redistribute it and/or modify
                      9: it under the terms of the GNU Lesser General Public License as published by
                     10: the Free Software Foundation; either version 2.1 of the License, or (at your
                     11: option) any later version.
                     12:
                     13: The GNU MP Library is distributed in the hope that it will be useful, but
                     14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     16: License for more details.
                     17:
                     18: You should have received a copy of the GNU Lesser General Public License
                     19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: MA 02111-1307, USA.
                     22: */
                     23:
                     24:
                     25: /* The extension here is three versions of an mpn arithmetic mean.  These
                     26:    aren't meant to be particularly useful, just examples.
                     27:
                     28:    You can run something like the following to compare their speeds.
                     29:
                     30:            ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
                     31:
                     32:    On RISC chips, mean_open() might be fastest if the compiler is doing a
                     33:    good job.  On the register starved x86s, mean_calls will be fastest.
                     34:
                     35:
                     36:    Notes:
                     37:
                     38:    SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
                     39:    by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header
                     40:    files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
                     41:
                     42:    The advantage of this #include "speed.c" scheme is that there's no
                     43:    editing of a copy of that file, and new features in new versions of it
                     44:    will be immediately available.
                     45:
                     46:    In a real program the routines mean_calls() etc would probably be in
                     47:    separate C or assembler source files, and just the measuring
                     48:    speed_mean_calls() etc would be here.  Linking against other libraries
                     49:    for things to measure is perfectly possible too.
                     50:
                     51:    When attempting to compare two versions of the same named routine, say
                     52:    like the generic and assembler versions of mpn_add_n(), creative use of
                     53:    cc -D or #define is suggested, so one or both can be renamed and linked
                     54:    into the same program.  It'll be much easier to compare them side by side
                     55:    than with separate programs for each.
                     56:
                     57:    common.c has notes on writing speed measuring routines.
                     58:
                     59:    Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
                     60:    not using libtool) to get common.o and other objects needed by speed.c.  */
                     61:
                     62:
                     63: #define SPEED_EXTRA_PROTOS                              \
                     64:   double speed_mean_calls (struct speed_params *s);     \
                     65:   double speed_mean_open  (struct speed_params *s);     \
                     66:   double speed_mean_open2 (struct speed_params *s);
                     67:
                     68: #define SPEED_EXTRA_ROUTINES            \
                     69:   { "mean_calls",  speed_mean_calls  }, \
                     70:   { "mean_open",   speed_mean_open   }, \
                     71:   { "mean_open2",  speed_mean_open2  },
                     72:
                     73: #include "speed.c"
                     74:
                     75:
                     76: /* A straightforward implementation calling mpn subroutines.
                     77:
                     78:    wp,size is set to (xp,size + yp,size) / 2.  The return value is the
                     79:    remainder from the division.  The other versions are the same.  */
                     80:
                     81: mp_limb_t
                     82: mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
                     83: {
                     84:   mp_limb_t  c, ret;
                     85:
                     86:   ASSERT (size >= 1);
                     87:
                     88:   c = mpn_add_n (wp, xp, yp, size);
                     89:   ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1);
                     90:   wp[size-1] |= (c << (BITS_PER_MP_LIMB-1));
                     91:   return ret;
                     92: }
                     93:
                     94:
                     95: /* An open-coded version, making one pass over the data.  The right shift is
                     96:    done as the added limbs are produced.  The addition code follows
                     97:    mpn/generic/add_n.c. */
                     98:
                     99: mp_limb_t
                    100: mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
                    101: {
                    102:   mp_limb_t  w, wprev, x, y, c, ret;
                    103:   mp_size_t  i;
                    104:
                    105:   ASSERT (size >= 1);
                    106:
                    107:   x = xp[0];
                    108:   y = yp[0];
                    109:
                    110:   wprev = x + y;
                    111:   c = (wprev < x);
                    112:   ret = (wprev & 1);
                    113:
                    114: #define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1)))
                    115:
                    116:   for (i = 1; i < size; i++)
                    117:     {
                    118:       x = xp[i];
                    119:       y = yp[i];
                    120:
                    121:       w = x + c;
                    122:       c = (w < x);
                    123:       w += y;
                    124:       c += (w < y);
                    125:
                    126:       wp[i-1] = RSHIFT (w, wprev);
                    127:       wprev = w;
                    128:     }
                    129:
                    130:   wp[i-1] = RSHIFT (c, wprev);
                    131:
                    132:   return ret;
                    133: }
                    134:
                    135:
                    136: /* Another one-pass version, but right shifting the source limbs rather than
                    137:    the result limbs.  There's not much chance of this being better than the
                    138:    above, but it's an alternative at least. */
                    139:
                    140: mp_limb_t
                    141: mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
                    142: {
                    143:   mp_limb_t  w, x, y, xnext, ynext, c, ret;
                    144:   mp_size_t  i;
                    145:
                    146:   ASSERT (size >= 1);
                    147:
                    148:   x = xp[0];
                    149:   y = yp[0];
                    150:
                    151:   /* ret is the low bit of x+y, c is the carry out of that low bit add */
                    152:   ret = (x ^ y) & 1;
                    153:   c   = (x & y) & 1;
                    154:
                    155:   for (i = 0; i < size-1; i++)
                    156:     {
                    157:       xnext = xp[i+1];
                    158:       ynext = yp[i+1];
                    159:       x = RSHIFT (xnext, x);
                    160:       y = RSHIFT (ynext, y);
                    161:
                    162:       w = x + c;
                    163:       c = (w < x);
                    164:       w += y;
                    165:       c += (w < y);
                    166:       wp[i] = w;
                    167:
                    168:       x = xnext;
                    169:       y = ynext;
                    170:     }
                    171:
                    172:   wp[i] = (x >> 1) + (y >> 1) + c;
                    173:
                    174:   return ret;
                    175: }
                    176:
                    177:
                    178: /* The speed measuring routines are the same apart from which function they
                    179:    run, so a macro is used.  Actually this macro is the same as
                    180:    SPEED_ROUTINE_MPN_BINARY_N.  */
                    181:
                    182: #define SPEED_ROUTINE_MEAN(mean_fun)                    \
                    183:   {                                                     \
                    184:     unsigned  i;                                        \
                    185:     mp_ptr    wp;                                       \
                    186:     double    t;                                        \
                    187:     TMP_DECL (marker);                                  \
                    188:                                                         \
                    189:     SPEED_RESTRICT_COND (s->size >= 1);                 \
                    190:                                                         \
                    191:     TMP_MARK (marker);                                  \
                    192:     wp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_wp);  \
                    193:                                                         \
                    194:     speed_operand_src (s, s->xp, s->size);              \
                    195:     speed_operand_src (s, s->yp, s->size);              \
                    196:     speed_operand_dst (s, wp, s->size);                 \
                    197:     speed_cache_fill (s);                               \
                    198:                                                         \
                    199:     speed_starttime ();                                 \
                    200:     i = s->reps;                                        \
                    201:     do                                                  \
                    202:       mean_fun (wp, s->xp, s->yp, s->size);             \
                    203:     while (--i != 0);                                   \
                    204:     t = speed_endtime ();                               \
                    205:                                                         \
                    206:     TMP_FREE (marker);                                  \
                    207:     return t;                                           \
                    208:   }
                    209:
                    210: double
                    211: speed_mean_calls (struct speed_params *s)
                    212: {
                    213:   SPEED_ROUTINE_MEAN (mean_calls);
                    214: }
                    215:
                    216: double
                    217: speed_mean_open (struct speed_params *s)
                    218: {
                    219:   SPEED_ROUTINE_MEAN (mean_open);
                    220: }
                    221:
                    222: double
                    223: speed_mean_open2 (struct speed_params *s)
                    224: {
                    225:   SPEED_ROUTINE_MEAN (mean_open2);
                    226: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>