[BACK]Return to speed-ext.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tune

Annotation of OpenXM_contrib/gmp/tune/speed-ext.c, Revision 1.1

1.1     ! maekawa     1: /* An example of extending the speed program to measure routines not in GMP. */
        !             2:
        !             3: /*
        !             4: Copyright (C) 1999, 2000 Free Software Foundation, Inc.
        !             5:
        !             6: This file is part of the GNU MP Library.
        !             7:
        !             8: The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: it under the terms of the GNU Lesser General Public License as published by
        !            10: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            11: option) any later version.
        !            12:
        !            13: The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: License for more details.
        !            17:
        !            18: You should have received a copy of the GNU Lesser General Public License
        !            19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: MA 02111-1307, USA.
        !            22: */
        !            23:
        !            24:
        !            25: /* The extension here is three versions of an mpn arithmetic mean.  These
        !            26:    aren't meant to be particularly useful, just examples.
        !            27:
        !            28:    You can run something like the following to compare their speeds.
        !            29:
        !            30:            ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
        !            31:
        !            32:    On RISC chips, mean_open() might be fastest if the compiler is doing a
        !            33:    good job.  On the register starved x86s, mean_calls will be fastest.
        !            34:
        !            35:
        !            36:    Notes:
        !            37:
        !            38:    SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
        !            39:    by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header
        !            40:    files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
        !            41:
        !            42:    The advantage of this #include "speed.c" scheme is that there's no
        !            43:    editing of a copy of that file, and new features in new versions of it
        !            44:    will be immediately available.
        !            45:
        !            46:    In a real program the routines mean_calls() etc would probably be in
        !            47:    separate C or assembler source files, and just the measuring
        !            48:    speed_mean_calls() etc would be here.  Linking against other libraries
        !            49:    for things to measure is perfectly possible too.
        !            50:
        !            51:    When attempting to compare two versions of the same named routine, say
        !            52:    like the generic and assembler versions of mpn_add_n(), creative use of
        !            53:    cc -D or #define is suggested, so one or both can be renamed and linked
        !            54:    into the same program.  It'll be much easier to compare them side by side
        !            55:    than with separate programs for each.
        !            56:
        !            57:    common.c has notes on writing speed measuring routines.
        !            58:
        !            59:    Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
        !            60:    not using libtool) to get common.o and other objects needed by speed.c.  */
        !            61:
        !            62:
        !            63: #define SPEED_EXTRA_PROTOS                              \
        !            64:   double speed_mean_calls (struct speed_params *s);     \
        !            65:   double speed_mean_open  (struct speed_params *s);     \
        !            66:   double speed_mean_open2 (struct speed_params *s);
        !            67:
        !            68: #define SPEED_EXTRA_ROUTINES            \
        !            69:   { "mean_calls",  speed_mean_calls  }, \
        !            70:   { "mean_open",   speed_mean_open   }, \
        !            71:   { "mean_open2",  speed_mean_open2  },
        !            72:
        !            73: #include "speed.c"
        !            74:
        !            75:
        !            76: /* A straightforward implementation calling mpn subroutines.
        !            77:
        !            78:    wp,size is set to (xp,size + yp,size) / 2.  The return value is the
        !            79:    remainder from the division.  The other versions are the same.  */
        !            80:
        !            81: mp_limb_t
        !            82: mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
        !            83: {
        !            84:   mp_limb_t  c, ret;
        !            85:
        !            86:   ASSERT (size >= 1);
        !            87:
        !            88:   c = mpn_add_n (wp, xp, yp, size);
        !            89:   ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1);
        !            90:   wp[size-1] |= (c << (BITS_PER_MP_LIMB-1));
        !            91:   return ret;
        !            92: }
        !            93:
        !            94:
        !            95: /* An open-coded version, making one pass over the data.  The right shift is
        !            96:    done as the added limbs are produced.  The addition code follows
        !            97:    mpn/generic/add_n.c. */
        !            98:
        !            99: mp_limb_t
        !           100: mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
        !           101: {
        !           102:   mp_limb_t  w, wprev, x, y, c, ret;
        !           103:   mp_size_t  i;
        !           104:
        !           105:   ASSERT (size >= 1);
        !           106:
        !           107:   x = xp[0];
        !           108:   y = yp[0];
        !           109:
        !           110:   wprev = x + y;
        !           111:   c = (wprev < x);
        !           112:   ret = (wprev & 1);
        !           113:
        !           114: #define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1)))
        !           115:
        !           116:   for (i = 1; i < size; i++)
        !           117:     {
        !           118:       x = xp[i];
        !           119:       y = yp[i];
        !           120:
        !           121:       w = x + c;
        !           122:       c = (w < x);
        !           123:       w += y;
        !           124:       c += (w < y);
        !           125:
        !           126:       wp[i-1] = RSHIFT (w, wprev);
        !           127:       wprev = w;
        !           128:     }
        !           129:
        !           130:   wp[i-1] = RSHIFT (c, wprev);
        !           131:
        !           132:   return ret;
        !           133: }
        !           134:
        !           135:
        !           136: /* Another one-pass version, but right shifting the source limbs rather than
        !           137:    the result limbs.  There's not much chance of this being better than the
        !           138:    above, but it's an alternative at least. */
        !           139:
        !           140: mp_limb_t
        !           141: mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
        !           142: {
        !           143:   mp_limb_t  w, x, y, xnext, ynext, c, ret;
        !           144:   mp_size_t  i;
        !           145:
        !           146:   ASSERT (size >= 1);
        !           147:
        !           148:   x = xp[0];
        !           149:   y = yp[0];
        !           150:
        !           151:   /* ret is the low bit of x+y, c is the carry out of that low bit add */
        !           152:   ret = (x ^ y) & 1;
        !           153:   c   = (x & y) & 1;
        !           154:
        !           155:   for (i = 0; i < size-1; i++)
        !           156:     {
        !           157:       xnext = xp[i+1];
        !           158:       ynext = yp[i+1];
        !           159:       x = RSHIFT (xnext, x);
        !           160:       y = RSHIFT (ynext, y);
        !           161:
        !           162:       w = x + c;
        !           163:       c = (w < x);
        !           164:       w += y;
        !           165:       c += (w < y);
        !           166:       wp[i] = w;
        !           167:
        !           168:       x = xnext;
        !           169:       y = ynext;
        !           170:     }
        !           171:
        !           172:   wp[i] = (x >> 1) + (y >> 1) + c;
        !           173:
        !           174:   return ret;
        !           175: }
        !           176:
        !           177:
        !           178: /* The speed measuring routines are the same apart from which function they
        !           179:    run, so a macro is used.  Actually this macro is the same as
        !           180:    SPEED_ROUTINE_MPN_BINARY_N.  */
        !           181:
        !           182: #define SPEED_ROUTINE_MEAN(mean_fun)                    \
        !           183:   {                                                     \
        !           184:     unsigned  i;                                        \
        !           185:     mp_ptr    wp;                                       \
        !           186:     double    t;                                        \
        !           187:     TMP_DECL (marker);                                  \
        !           188:                                                         \
        !           189:     SPEED_RESTRICT_COND (s->size >= 1);                 \
        !           190:                                                         \
        !           191:     TMP_MARK (marker);                                  \
        !           192:     wp = SPEED_TMP_ALLOC_LIMBS (s->size, s->align_wp);  \
        !           193:                                                         \
        !           194:     speed_operand_src (s, s->xp, s->size);              \
        !           195:     speed_operand_src (s, s->yp, s->size);              \
        !           196:     speed_operand_dst (s, wp, s->size);                 \
        !           197:     speed_cache_fill (s);                               \
        !           198:                                                         \
        !           199:     speed_starttime ();                                 \
        !           200:     i = s->reps;                                        \
        !           201:     do                                                  \
        !           202:       mean_fun (wp, s->xp, s->yp, s->size);             \
        !           203:     while (--i != 0);                                   \
        !           204:     t = speed_endtime ();                               \
        !           205:                                                         \
        !           206:     TMP_FREE (marker);                                  \
        !           207:     return t;                                           \
        !           208:   }
        !           209:
        !           210: double
        !           211: speed_mean_calls (struct speed_params *s)
        !           212: {
        !           213:   SPEED_ROUTINE_MEAN (mean_calls);
        !           214: }
        !           215:
        !           216: double
        !           217: speed_mean_open (struct speed_params *s)
        !           218: {
        !           219:   SPEED_ROUTINE_MEAN (mean_open);
        !           220: }
        !           221:
        !           222: double
        !           223: speed_mean_open2 (struct speed_params *s)
        !           224: {
        !           225:   SPEED_ROUTINE_MEAN (mean_open2);
        !           226: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>