[BACK]Return to try.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / tests

Annotation of OpenXM_contrib/gmp/mpn/tests/try.c, Revision 1.1.1.1

1.1       maekawa     1: /* Run some tests on various mpn routines.
                      2:
                      3:    THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT.  IT'S ALMOST CERTAIN TO
                      4:    BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.  */
                      5:
                      6: /*
                      7: Copyright (C) 2000 Free Software Foundation, Inc.
                      8:
                      9: This file is part of the GNU MP Library.
                     10:
                     11: The GNU MP Library is free software; you can redistribute it and/or modify
                     12: it under the terms of the GNU Lesser General Public License as published by
                     13: the Free Software Foundation; either version 2.1 of the License, or (at your
                     14: option) any later version.
                     15:
                     16: The GNU MP Library is distributed in the hope that it will be useful, but
                     17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     18: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     19: License for more details.
                     20:
                     21: You should have received a copy of the GNU Lesser General Public License
                     22: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     24: MA 02111-1307, USA.
                     25: */
                     26:
                     27:
                     28: /* Usage: try [options] <function>...
                     29:
                     30:    For example, "./try mpn_add_n" to run tests of that function.
                     31:
                     32:    Combinations of alignments and overlaps are tested, with redzones above
                     33:    or below the destinations, and with the sources write-protected.
                     34:
                     35:    The number of tests performed becomes ridiculously large with all the
                     36:    combinations, and for that reason this can't be a part of a "make check",
                     37:    it's meant only for development.  The code isn't very pretty either.
                     38:
                     39:    During development it can help to disable the redzones, since seeing the
                     40:    rest of the destination written can show where the wrong part is, or if
                     41:    the dst pointers are off by 1 or whatever.  The magic DEADVAL initial
                     42:    fill (see below) will show locations never written.
                     43:
                     44:    The -s option can be used to test only certain size operands, which is
                     45:    useful if some new code doesn't yet support say sizes less than the
                     46:    unrolling, or whatever.
                     47:
                     48:    When a problem occurs it'll of course be necessary to run the program
                     49:    under gdb to find out quite where, how and why it's going wrong.  Disable
                     50:    the spinner with the -W option when doing this, or single stepping won't
                     51:    work.  Using -1 to run with simple data can be useful.
                     52:
                     53:    New functions to test can be added by defining a TRY_TYPE_, adding an
                     54:    entry to try_array[] and adding a call to the call() function (if the
                     55:    type isn't already supported).  Extra TRY_TYPE_ bits can be easily added
                     56:    if necessary.
                     57:
                     58:
                     59:    Future:
                     60:
                     61:    Automatically detect gdb and disable the spinner timer (use -W for now).
                     62:
                     63:    Make a way to re-run a failing case in the debugger.  Have an option to
                     64:    snapshot each test case before it's run so the data is available if a
                     65:    segv occurs.  (This should be more reliable than the current print_all()
                     66:    in the signal handler.)
                     67:
                     68:    When alignment means a dst isn't hard against the redzone, check the
                     69:    space in between remains unchanged.
                     70:
                     71:    See if the 80x86 debug registers can do redzones on byte boundaries.
                     72:
                     73:    When a source overlaps a destination, don't run both s[i].high 0 and 1,
                     74:    as s[i].high has no effect.  Maybe encode s[i].high into overlap->s[i].
                     75:
                     76:    When partial overlaps aren't done, don't loop over source alignments
                     77:    during overlaps.
                     78:
                     79: */
                     80:
                     81:
                     82: /* always do assertion checking */
                     83: #define WANT_ASSERT 1
                     84:
                     85: #include "config.h"
                     86:
                     87: #if HAVE_GETOPT_H
                     88: #include <getopt.h>  /* for getopt_long() */
                     89: #endif
                     90: #include <limits.h>
                     91: #include <signal.h>
                     92: #include <stdio.h>
                     93: #include <stdlib.h>
                     94: #include <string.h>
                     95: #include <time.h>
                     96: #include <unistd.h>
                     97: #include <sys/mman.h>
                     98:
                     99: #include "gmp.h"
                    100: #include "gmp-impl.h"
                    101:
                    102: #include "ref.h"
                    103: #include "try.h"
                    104:
                    105: #if HAVE_SPA_EXTRAS
                    106: #include "spa-out.asm.h"
                    107: #endif
                    108:
                    109: #if !HAVE_DECL_OPTARG
                    110: extern char *optarg;
                    111: extern int optind, opterr;
                    112: #endif
                    113:
                    114:
                    115: #define DEFAULT_REPETITIONS  10
                    116:
                    117: int  option_repetitions = DEFAULT_REPETITIONS;
                    118: int  option_spinner = 1;
                    119: int  option_redzones = 1;
                    120: int  option_firstsize = 0;
                    121: int  option_lastsize = 500;
                    122: int  option_firstsize2 = 0;
                    123:
                    124: #define ALIGNMENTS          4
                    125: #define OVERLAPS            4
                    126: #define CARRY_RANDOMS       5
                    127: #define MULTIPLIER_RANDOMS  5
                    128: #define DIVISOR_RANDOMS     5
                    129: #define XSIZE_COUNT         4
                    130:
                    131: int  option_print = 0;
                    132:
                    133: #define DATA_TRAND  0
                    134: #define DATA_ZEROS  1
                    135: #define DATA_SEQ    2
                    136: #define DATA_FFS    3
                    137: #define DATA_2FD    4
                    138: int  option_data = DATA_TRAND;
                    139:
                    140:
                    141: mp_size_t  pagesize;
                    142: #define PAGESIZE_LIMBS  (pagesize / BYTES_PER_MP_LIMB)
                    143:
                    144: /* must be a multiple of the page size */
                    145: #define REDZONE_BYTES   (pagesize * 16)
                    146: #define REDZONE_LIMBS   (REDZONE_BYTES / BYTES_PER_MP_LIMB)
                    147:
                    148:
                    149: #define MAX3(x,y,z)   (MAX (x, MAX (y, z)))
                    150:
                    151: #if BITS_PER_MP_LIMB == 32
                    152: #define DEADVAL  CNST_LIMB(0xDEADBEEF)
                    153: #else
                    154: #define DEADVAL  CNST_LIMB(0xDEADBEEFBADDCAFE)
                    155: #endif
                    156:
                    157:
                    158: #define TRY_RETVAL           (1<<0)
                    159: #define TRY_SIZE2            (1<<1)
                    160: #define TRY_SHIFT            (1<<2)
                    161: #define TRY_CARRYBIT         (1<<3)
                    162: #define TRY_CARRY3           (1<<4)
                    163: #define TRY_CARRY4           (1<<4)
                    164: #define TRY_CARRYLIMB        (1<<5)
                    165: #define TRY_MULTIPLIER       (1<<6)
                    166: #define TRY_DIVISOR          (1<<7)
                    167: #define TRY_DOUBLE_DST       (1<<8)
                    168: #define TRY_DST0_INIT        (1<<9)
                    169: #define TRY_XSIZE            (1<<10)
                    170: #define TRY_SIZE_ZERO        (1<<11)
                    171: #define TRY_DST_SIZE_RETVAL  (1<<12)
                    172: #define TRY_SRC1_GCDDATA     (1<<13)
                    173:
                    174: #define TRY_OVERLAP_LOW_TO_HIGH  (1<<15) /* Default is allow full overlap. */
                    175: #define TRY_OVERLAP_HIGH_TO_LOW  (1<<16)
                    176: #define TRY_OVERLAP_NONE         (1<<17)
                    177: #define TRY_OVERLAP_NOTSRCS      (1<<18)
                    178:
                    179: #define TRY_SRC0        (1<<20)
                    180: #define TRY_SRC1        (TRY_SRC0 << 1)
                    181:
                    182: #define TRY_DST0        (1<<24)
                    183: #define TRY_DST1        (TRY_DST0 << 1)
                    184:
                    185:
                    186: #define TRY_SRC(n)      (TRY_SRC0 << (n))
                    187: #define TRY_DST(n)      (TRY_DST0 << (n))
                    188:
                    189: #define TRY_CARRYANY  (TRY_CARRYBIT | TRY_CARRY3 | TRY_CARRY4 | TRY_CARRYLIMB)
                    190:
                    191:
                    192: #define TRY_TYPE_AORS_N      (TRY_RETVAL | TRY_DST0 | TRY_SRC0 | TRY_SRC1)
                    193: #define TRY_TYPE_AORS_NC     (TRY_TYPE_AORS_N | TRY_CARRYBIT)
                    194:
                    195: #define TRY_TYPE_AORSMUL_1   (TRY_TYPE_MUL_1  | TRY_DST0_INIT)
                    196: #define TRY_TYPE_AORSMUL_1C  (TRY_TYPE_MUL_1C | TRY_DST0_INIT)
                    197:
                    198: #define TRY_TYPE_LOGOPS_N    (TRY_DST0 | TRY_SRC0 | TRY_SRC1)
                    199:
                    200: #define TRY_TYPE_ADDSUB_N \
                    201:   (TRY_RETVAL | TRY_DST0 | TRY_DST1 | TRY_SRC0 | TRY_SRC1)
                    202: #define TRY_TYPE_ADDSUB_NC \
                    203:   (TRY_TYPE_ADDSUB_N | TRY_CARRY4)
                    204:
                    205: #define TRY_TYPE_COPYI \
                    206:   (TRY_DST0 | TRY_SRC0 | TRY_OVERLAP_LOW_TO_HIGH | TRY_SIZE_ZERO)
                    207: #define TRY_TYPE_COPYD \
                    208:   (TRY_DST0 | TRY_SRC0 | TRY_OVERLAP_HIGH_TO_LOW | TRY_SIZE_ZERO)
                    209: #define TRY_TYPE_COM_N   (TRY_DST0 | TRY_SRC0)
                    210:
                    211: #define TRY_TYPE_MOD_1   (TRY_RETVAL | TRY_SRC0 | TRY_DIVISOR | TRY_SIZE_ZERO)
                    212: #define TRY_TYPE_MOD_1C       (TRY_TYPE_MOD_1 | TRY_CARRYLIMB)
                    213: #define TRY_TYPE_DIVMOD_1     (TRY_TYPE_MOD_1  | TRY_DST0)
                    214: #define TRY_TYPE_DIVMOD_1C    (TRY_TYPE_MOD_1C | TRY_DST0)
                    215: #define TRY_TYPE_DIVREM_1     (TRY_TYPE_DIVMOD_1  | TRY_XSIZE)
                    216: #define TRY_TYPE_DIVREM_1C    (TRY_TYPE_DIVMOD_1C | TRY_XSIZE)
                    217: #define TRY_TYPE_MOD_1_RSHIFT (TRY_RETVAL | TRY_SRC0 | TRY_SHIFT | TRY_DIVISOR)
                    218:
                    219: #define TRY_TYPE_DIVEXACT_BY3   (TRY_RETVAL | TRY_DST0 | TRY_SRC0)
                    220: #define TRY_TYPE_DIVEXACT_BY3C  (TRY_TYPE_DIVEXACT_BY3 | TRY_CARRY3)
                    221:
                    222: #define TRY_TYPE_GCD_1   (TRY_RETVAL | TRY_SRC0 | TRY_DIVISOR)
                    223: #define TRY_TYPE_GCD                                            \
                    224:   (TRY_RETVAL | TRY_DST0 | TRY_SRC0 | TRY_SRC1 | TRY_SIZE2      \
                    225:    | TRY_DST_SIZE_RETVAL | TRY_OVERLAP_NOTSRCS | TRY_SRC1_GCDDATA)
                    226:
                    227: #define TRY_TYPE_MUL_1   (TRY_RETVAL | TRY_DST0 | TRY_SRC0 | TRY_MULTIPLIER)
                    228: #define TRY_TYPE_MUL_1C  (TRY_TYPE_MUL_1 | TRY_CARRYLIMB)
                    229:
                    230: #define TRY_TYPE_MUL_BASECASE \
                    231:   (TRY_DST0 | TRY_SRC0 | TRY_SRC1 | TRY_SIZE2 | TRY_OVERLAP_NONE)
                    232: #define TRY_TYPE_MUL_N \
                    233:   (TRY_DST0 | TRY_SRC0 | TRY_SRC1 | TRY_DOUBLE_DST | TRY_OVERLAP_NONE)
                    234: #define TRY_TYPE_SQR \
                    235:   (TRY_DST0 | TRY_SRC0 | TRY_DOUBLE_DST | TRY_OVERLAP_NONE)
                    236:
                    237: #define TRY_TYPE_RSHIFT \
                    238:   (TRY_RETVAL | TRY_DST0 | TRY_SRC0 | TRY_SHIFT | TRY_OVERLAP_LOW_TO_HIGH)
                    239: #define TRY_TYPE_LSHIFT \
                    240:   (TRY_RETVAL | TRY_DST0 | TRY_SRC0 | TRY_SHIFT | TRY_OVERLAP_HIGH_TO_LOW)
                    241:
                    242: #define TRY_TYPE_POPCOUNT   (TRY_RETVAL | TRY_SRC0 | TRY_SIZE_ZERO)
                    243: #define TRY_TYPE_HAMDIST    (TRY_TYPE_POPCOUNT | TRY_SRC1)
                    244:
                    245:
                    246: /* The following are macros if there's no native versions, so wrap them in
                    247:    functions that can be in try_array[]. */
                    248:
                    249: void
                    250: MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    251: { MPN_COPY_INCR (rp, sp, size); }
                    252:
                    253: void
                    254: MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    255: { MPN_COPY_DECR (rp, sp, size); }
                    256:
                    257: void
                    258: mpn_com_n_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    259: { mpn_com_n (rp, sp, size); }
                    260:
                    261: void
                    262: mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
                    263: { mpn_and_n (rp, s1, s2, size); }
                    264:
                    265: void
                    266: mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
                    267: { mpn_andn_n (rp, s1, s2, size); }
                    268:
                    269: void
                    270: mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
                    271: { mpn_nand_n (rp, s1, s2, size); }
                    272:
                    273: void
                    274: mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
                    275: { mpn_ior_n (rp, s1, s2, size); }
                    276:
                    277: void
                    278: mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
                    279: { mpn_iorn_n (rp, s1, s2, size); }
                    280:
                    281: void
                    282: mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
                    283: { mpn_nior_n (rp, s1, s2, size); }
                    284:
                    285: void
                    286: mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
                    287: { mpn_xor_n (rp, s1, s2, size); }
                    288:
                    289: void
                    290: mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
                    291: { mpn_xnor_n (rp, s1, s2, size); }
                    292:
                    293: void
                    294: mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    295: { mpn_divexact_by3 (rp, sp, size); }
                    296:
                    297:
                    298: struct try_t {
                    299:   struct try_one_t {
                    300:     tryfun_t    function;
                    301:     const char  *name;
                    302:   } ref, fun;
                    303:   int         flag;
                    304:   mp_size_t   minsize;
                    305: };
                    306:
                    307: #if HAVE_STRINGIZE
                    308: #define TRY(fun)        { (tryfun_t) fun,       #fun }
                    309: #define TRY_FUNFUN(fun) { (tryfun_t) fun##_fun, #fun }
                    310: #else
                    311: #define TRY(fun)        { (tryfun_t) fun,         "fun" }
                    312: #define TRY_FUNFUN(fun) { (tryfun_t) fun/**/_fun, "fun" }
                    313: #endif
                    314:
                    315: struct try_t try_array[] = {
                    316:   { TRY(refmpn_add_n),     TRY(mpn_add_n),     TRY_TYPE_AORS_N  },
                    317:   { TRY(refmpn_sub_n),     TRY(mpn_sub_n),     TRY_TYPE_AORS_N  },
                    318: #if HAVE_NATIVE_mpn_add_nc
                    319:   { TRY(refmpn_add_nc),    TRY(mpn_add_nc),    TRY_TYPE_AORS_NC },
                    320: #endif
                    321: #if HAVE_NATIVE_mpn_sub_nc
                    322:   { TRY(refmpn_sub_nc),    TRY(mpn_sub_nc),    TRY_TYPE_AORS_NC },
                    323: #endif
                    324:
                    325:   { TRY(refmpn_addmul_1),  TRY(mpn_addmul_1),  TRY_TYPE_AORSMUL_1  },
                    326:   { TRY(refmpn_submul_1),  TRY(mpn_submul_1),  TRY_TYPE_AORSMUL_1  },
                    327: #if HAVE_NATIVE_mpn_addmul_1c
                    328:   { TRY(refmpn_addmul_1c), TRY(mpn_addmul_1c), TRY_TYPE_AORSMUL_1C },
                    329: #endif
                    330: #if HAVE_NATIVE_mpn_submul_1c
                    331:   { TRY(refmpn_submul_1c), TRY(mpn_submul_1c), TRY_TYPE_AORSMUL_1C },
                    332: #endif
                    333:
                    334:   { TRY(refmpn_com_n),    TRY_FUNFUN(mpn_com_n),  TRY_TYPE_COM_N },
                    335:
                    336:   { TRY(refmpn_copyi),    TRY_FUNFUN(MPN_COPY_INCR), TRY_TYPE_COPYI },
                    337:   { TRY(refmpn_copyd),    TRY_FUNFUN(MPN_COPY_DECR), TRY_TYPE_COPYD },
                    338:
                    339:   { TRY(refmpn_and_n),    TRY_FUNFUN(mpn_and_n),  TRY_TYPE_LOGOPS_N },
                    340:   { TRY(refmpn_andn_n),   TRY_FUNFUN(mpn_andn_n), TRY_TYPE_LOGOPS_N },
                    341:   { TRY(refmpn_nand_n),   TRY_FUNFUN(mpn_nand_n), TRY_TYPE_LOGOPS_N },
                    342:   { TRY(refmpn_ior_n),    TRY_FUNFUN(mpn_ior_n),  TRY_TYPE_LOGOPS_N },
                    343:   { TRY(refmpn_iorn_n),   TRY_FUNFUN(mpn_iorn_n), TRY_TYPE_LOGOPS_N },
                    344:   { TRY(refmpn_nior_n),   TRY_FUNFUN(mpn_nior_n), TRY_TYPE_LOGOPS_N },
                    345:   { TRY(refmpn_xor_n),    TRY_FUNFUN(mpn_xor_n),  TRY_TYPE_LOGOPS_N },
                    346:   { TRY(refmpn_xnor_n),   TRY_FUNFUN(mpn_xnor_n), TRY_TYPE_LOGOPS_N },
                    347:
                    348:   { TRY(refmpn_divrem_1),     TRY(mpn_divrem_1),     TRY_TYPE_DIVREM_1 },
                    349:   { TRY(refmpn_mod_1),        TRY(mpn_mod_1),        TRY_TYPE_MOD_1 },
                    350:   { TRY(refmpn_mod_1_rshift), TRY(mpn_mod_1_rshift), TRY_TYPE_MOD_1_RSHIFT },
                    351: #if HAVE_NATIVE_mpn_divrem_1c
                    352:   { TRY(refmpn_divrem_1c),    TRY(mpn_divrem_1c),    TRY_TYPE_DIVREM_1C },
                    353: #endif
                    354: #if HAVE_NATIVE_mpn_mod_1c
                    355:   { TRY(refmpn_mod_1c),       TRY(mpn_mod_1c),       TRY_TYPE_MOD_1C },
                    356: #endif
                    357:   { TRY(refmpn_divexact_by3), TRY_FUNFUN(mpn_divexact_by3),
                    358:                                                      TRY_TYPE_DIVEXACT_BY3 },
                    359:   { TRY(refmpn_divexact_by3c),TRY(mpn_divexact_by3c),TRY_TYPE_DIVEXACT_BY3C },
                    360:
                    361:   { TRY(refmpn_mul_1),    TRY(mpn_mul_1),      TRY_TYPE_MUL_1 },
                    362: #if HAVE_NATIVE_mpn_mul_1c
                    363:   { TRY(refmpn_mul_1c),   TRY(mpn_mul_1c),     TRY_TYPE_MUL_1C },
                    364: #endif
                    365:
                    366:   { TRY(refmpn_rshift),   TRY(mpn_rshift),     TRY_TYPE_RSHIFT },
                    367:   { TRY(refmpn_lshift),   TRY(mpn_lshift),     TRY_TYPE_LSHIFT },
                    368:
                    369:
                    370:   { TRY(refmpn_mul_basecase), TRY(mpn_mul_basecase), TRY_TYPE_MUL_BASECASE },
                    371:   { TRY(refmpn_sqr),          TRY(mpn_sqr_basecase), TRY_TYPE_SQR },
                    372:
                    373:   { TRY(refmpn_mul_basecase), TRY(mpn_mul),    TRY_TYPE_MUL_BASECASE },
                    374:   { TRY(refmpn_mul_n),        TRY(mpn_mul_n),  TRY_TYPE_MUL_N },
                    375:   { TRY(refmpn_sqr),          TRY(mpn_sqr_n),  TRY_TYPE_SQR },
                    376:
                    377:   { TRY(refmpn_gcd_1),    TRY(mpn_gcd_1),      TRY_TYPE_GCD_1 },
                    378:   { TRY(refmpn_gcd),      TRY(mpn_gcd),        TRY_TYPE_GCD   },
                    379:
                    380:   { TRY(refmpn_popcount), TRY(mpn_popcount),   TRY_TYPE_POPCOUNT },
                    381:   { TRY(refmpn_hamdist),  TRY(mpn_hamdist),    TRY_TYPE_HAMDIST },
                    382:
                    383: #if 0
                    384:   /* need wrapper functions since they take workspace arguments */
                    385:   { TRY(refmpn_mul_n), TRY_FUNFUN(mpn_kara_mul_n),  TRY_TYPE_MUL_N,
                    386:                                                       MPN_KARA_MINSIZE},
                    387:   { TRY(refmpn_sqr),   TRY_FUNFUN(mpn_kara_sqr_n),  TRY_TYPE_SQR,
                    388:                                                       MPN_KARA_MINSIZE},
                    389:   { TRY(refmpn_mul_n), TRY_FUNFUN(mpn_toom3_mul_n), TRY_TYPE_MUL_N,
                    390:                                                       MPN_TOOM3_MINSIZE},
                    391:   { TRY(refmpn_sqr),   TRY_FUNFUN(mpn_toom3_sqr_n), TRY_TYPE_SQR,
                    392:                                                       MPN_TOOM3_MINSIZE },
                    393: #endif
                    394:
                    395: #if HAVE_SPA_EXTRAS
                    396: #include "spa-out.t-table.i"
                    397: #endif
                    398: };
                    399:
                    400: struct try_t *tr = &try_array[0];
                    401:
                    402:
                    403: struct region_t {
                    404:   mp_ptr     ptr;
                    405:   mp_size_t  size;
                    406: };
                    407:
                    408:
                    409: #define TRAP_NOWHERE 0
                    410: #define TRAP_REF     1
                    411: #define TRAP_FUN     2
                    412: #define TRAP_SETUPS  3
                    413: int trap_location = TRAP_NOWHERE;
                    414:
                    415:
                    416: /* Find least significant limb position where p1,size and p2,size differ.  */
                    417: mp_size_t
                    418: mpn_diff_lowest (mp_srcptr p1, mp_srcptr p2, mp_size_t size)
                    419: {
                    420:   mp_size_t  i;
                    421:
                    422:   for (i = 0; i < size; i++)
                    423:     if (p1[i] != p2[i])
                    424:       return i;
                    425:
                    426:   /* no differences */
                    427:   return -1;
                    428: }
                    429:
                    430:
                    431: /* Find most significant limb position where p1,size and p2,size differ.  */
                    432: mp_size_t
                    433: mpn_diff_highest (mp_srcptr p1, mp_srcptr p2, mp_size_t size)
                    434: {
                    435:   mp_size_t  i;
                    436:
                    437:   for (i = size-1; i >= 0; i--)
                    438:     if (p1[i] != p2[i])
                    439:       return i;
                    440:
                    441:   /* no differences */
                    442:   return -1;
                    443: }
                    444:
                    445:
                    446: /* Return p advanced to the next multiple of "align" bytes.  "align" must be
                    447:    a power of 2.  Care is taken not to assume sizeof(int)==sizeof(pointer).  */
                    448: void *
                    449: align_pointer (void *p, size_t align)
                    450: {
                    451:   unsigned  d;
                    452:   d = ((unsigned) p) & (align-1);
                    453:   d = (d != 0 ? align-d : 0);
                    454:   return (void *) (((char *) p) + d);
                    455: }
                    456:
                    457: /* malloc n limbs on a multiple of m bytes boundary */
                    458: mp_ptr
                    459: malloc_limbs_aligned (size_t n, size_t m)
                    460: {
                    461:   return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
                    462: }
                    463:
                    464: void
                    465: mprotect_maybe (void *addr, size_t len, int prot)
                    466: {
                    467:   if (!option_redzones)
                    468:     return;
                    469:
                    470:   if (mprotect (addr, len, prot) != 0)
                    471:     {
                    472:       fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X\n", addr, len, prot);
                    473:       exit (1);
                    474:     }
                    475: }
                    476:
                    477: /* round "a" up to a multiple of "m" */
                    478: size_t
                    479: round_up_multiple (size_t a, size_t m)
                    480: {
                    481:   unsigned long  r;
                    482:
                    483:   r = a % m;
                    484:   if (r == 0)
                    485:     return a;
                    486:   else
                    487:     return a + (m - r);
                    488: }
                    489:
                    490: void
                    491: malloc_region (struct region_t *r, mp_size_t n)
                    492: {
                    493:   mp_ptr  p;
                    494:
                    495:   ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
                    496:
                    497:   r->size = round_up_multiple (n, PAGESIZE_LIMBS);
                    498:   p = malloc_limbs_aligned (r->size + REDZONE_LIMBS*2, pagesize);
                    499:   mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
                    500:
                    501:   r->ptr = p + REDZONE_LIMBS;
                    502:   mprotect_maybe (r->ptr + r->size, REDZONE_BYTES, PROT_NONE);
                    503: }
                    504:
                    505: void
                    506: mprotect_region (const struct region_t *r, int prot)
                    507: {
                    508:   mprotect_maybe (r->ptr, r->size, prot);
                    509: }
                    510:
                    511:
                    512: #define NUM_SOURCES  2
                    513: #define NUM_DESTS    2
                    514:
                    515: struct source_t {
                    516:   struct region_t  region;
                    517:   int        high;
                    518:   mp_size_t  align;
                    519:   mp_ptr     p;
                    520: };
                    521:
                    522: struct source_t  s[NUM_SOURCES];
                    523:
                    524: struct dest_t {
                    525:   int        high;
                    526:   mp_size_t  align;
                    527: };
                    528:
                    529: struct dest_t  d[NUM_SOURCES];
                    530:
                    531: struct source_each_t {
                    532:   mp_ptr     p;
                    533: };
                    534:
                    535: struct dest_each_t {
                    536:   struct region_t  region;
                    537:   mp_ptr     p;
                    538: };
                    539:
                    540: mp_size_t  size;
                    541: mp_size_t  size2;
                    542: mp_size_t  dsize;
                    543: unsigned long   shift;
                    544:
                    545: struct each_t {
                    546:   const char  *name;
                    547:   struct dest_each_t    d[numberof(d)];
                    548:   struct source_each_t  s[numberof(s)];
                    549:   mp_limb_t  retval;
                    550: };
                    551:
                    552: struct each_t  ref = { "Ref" };
                    553: struct each_t  fun = { "Fun" };
                    554:
                    555: #define SRC_SIZE(n) \
                    556:   ((n) == 1 && (tr->flag & (TRY_SIZE2|TRY_XSIZE)) ? size2 : size)
                    557:
                    558:
                    559: /* First four entries must be 0,1,2,3 for TRY_CARRYBIT, TRY_CARRY3, and
                    560:    TRY_CARRY4 */
                    561: mp_limb_t  carry_array[] = {
                    562:   0, 1, 2, 3,
                    563:   4,
                    564:   (mp_limb_t) 1 << 8,
                    565:   (mp_limb_t) 1 << 16,
                    566:   (mp_limb_t) -1
                    567: };
                    568: mp_limb_t  carry;
                    569: int        carry_index;
                    570:
                    571: #define CARRY_COUNT                                                     \
                    572:   ((tr->flag & TRY_CARRYBIT) ? 2                                        \
                    573:    : (tr->flag & TRY_CARRY3) ? 3                                        \
                    574:    : (tr->flag & TRY_CARRY4) ? 4                                        \
                    575:    : (tr->flag & TRY_CARRYLIMB) ? numberof(carry_array) + CARRY_RANDOMS \
                    576:    : 1)
                    577:
                    578: #define MPN_RANDOM_ALT(index,dst,size) \
                    579:   (((index) & 1) ? mpn_random (dst, size) : mpn_random2 (dst, size))
                    580:
                    581: /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
                    582:    the same type */
                    583: #define CARRY_ITERATION                                                 \
                    584:   for (carry_index = 0;                                                 \
                    585:        (carry_index < numberof (carry_array)                            \
                    586:         ? (carry = carry_array[carry_index])                            \
                    587:         : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)),    \
                    588:        carry_index < CARRY_COUNT;                                       \
                    589:        carry_index++)
                    590:
                    591:
                    592: mp_limb_t  multiplier_array[] = {
                    593:   0, 1, 2, 3,
                    594:   (mp_limb_t) 1 << 8,
                    595:   (mp_limb_t) 1 << 16,
                    596:   (mp_limb_t) -3,
                    597:   (mp_limb_t) -2,
                    598:   (mp_limb_t) -1,
                    599: };
                    600: mp_limb_t  multiplier;
                    601: int        multiplier_index;
                    602:
                    603: mp_limb_t  divisor_array[] = {
                    604:   1, 2, 3,
                    605:   (mp_limb_t) 1 << 8,
                    606:   (mp_limb_t) 1 << 16,
                    607:   (mp_limb_t) -3,
                    608:   (mp_limb_t) -2,
                    609:   (mp_limb_t) -1,
                    610: };
                    611:
                    612: mp_limb_t  divisor;
                    613: int        divisor_index;
                    614:
                    615: /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
                    616:    the same type */
                    617: #define ARRAY_ITERATION(var, index, limit, array, randoms, cond)        \
                    618:   for (index = 0;                                                       \
                    619:        (index < numberof (array)                                        \
                    620:         ? CAST_TO_VOID (var = array[index])                             \
                    621:         : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)),            \
                    622:        index < limit;                                                   \
                    623:        index++)
                    624:
                    625: #define MULTIPLIER_COUNT                                \
                    626:   ((tr->flag & TRY_MULTIPLIER)                          \
                    627:     ? numberof (multiplier_array) + MULTIPLIER_RANDOMS  \
                    628:     : 1)
                    629:
                    630: #define MULTIPLIER_ITERATION                                            \
                    631:   ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT,       \
                    632:                   multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
                    633:
                    634: #define DIVISOR_COUNT                           \
                    635:   ((tr->flag & TRY_DIVISOR)                    \
                    636:    ? numberof (divisor_array) + DIVISOR_RANDOMS \
                    637:    : 1)
                    638:
                    639: #define DIVISOR_ITERATION                                               \
                    640:   ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
                    641:                   DIVISOR_RANDOMS, TRY_DIVISOR)
                    642:
                    643:
                    644: /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
                    645:    d[0] or d[1] respectively, -1 means a separate (write-protected)
                    646:    location. */
                    647:
                    648: struct overlap_t {
                    649:   int  s[NUM_SOURCES];
                    650: } overlap_array[] = {
                    651:   { { -1, -1 } },
                    652:   { {  0, -1 } },
                    653:   { { -1,  0 } },
                    654:   { {  0,  0 } },
                    655:   { {  1, -1 } },
                    656:   { { -1,  1 } },
                    657:   { {  1,  1 } },
                    658:   { {  0,  1 } },
                    659:   { {  1,  0 } },
                    660: };
                    661:
                    662: struct overlap_t  *overlap, *overlap_limit;
                    663:
                    664: #define OVERLAP_COUNT                   \
                    665:   (tr->flag & TRY_OVERLAP_NONE ? 1      \
                    666:    : tr->flag & TRY_OVERLAP_NOTSRCS ? 3 \
                    667:    : tr->flag & TRY_DST1 ? 9            \
                    668:    : tr->flag & TRY_SRC1 ? 4            \
                    669:    : tr->flag & TRY_DST0 ? 2            \
                    670:    : 1)
                    671:
                    672: #define OVERLAP_ITERATION                               \
                    673:   for (overlap = &overlap_array[0],                     \
                    674:     overlap_limit = &overlap_array[OVERLAP_COUNT];      \
                    675:     overlap < overlap_limit;                            \
                    676:     overlap++)
                    677:
                    678:
                    679: #define T_RAND_COUNT  2
                    680: int  t_rand;
                    681:
                    682: void
                    683: t_random (mp_ptr ptr, mp_size_t n)
                    684: {
                    685:   if (size == 0)
                    686:     return;
                    687:
                    688:   switch (option_data) {
                    689:   case DATA_TRAND:
                    690:     switch (t_rand) {
                    691:     case 0: mpn_random (ptr, n); break;
                    692:     case 1: mpn_random2 (ptr, n); break;
                    693:     default: abort();
                    694:     }
                    695:     break;
                    696:   case DATA_SEQ:
                    697:     {
                    698:       static mp_limb_t  counter = 0;
                    699:       mp_size_t  i;
                    700:       for (i = 0; i < n; i++)
                    701:         ptr[i] = ++counter;
                    702:     }
                    703:     break;
                    704:   case DATA_ZEROS:
                    705:     refmpn_fill (ptr, n, (mp_limb_t) 0);
                    706:     break;
                    707:   case DATA_FFS:
                    708:     refmpn_fill (ptr, n, (mp_limb_t) -1);
                    709:     break;
                    710:   case DATA_2FD:
                    711:     /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
                    712:        inducing the q1_ff special case in the mul-by-inverse part of some
                    713:        versions of divrem_1 and mod_1. */
                    714:     refmpn_fill (ptr, n, (mp_limb_t) -1);
                    715:     ptr[n-1] = 2;
                    716:     ptr[0] -= 2;
                    717:     break;
                    718:
                    719:   default:
                    720:     abort();
                    721:   }
                    722: }
                    723: #define T_RAND_ITERATION \
                    724:   for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
                    725:
                    726:
                    727: void
                    728: print_each (const struct each_t *e)
                    729: {
                    730:   int  i;
                    731:
                    732:   printf ("%s %s\n", e->name, e == &ref ? tr->ref.name : tr->fun.name);
                    733:   if (tr->flag & TRY_RETVAL)
                    734:     printf ("   retval %08lX\n", e->retval);
                    735:
                    736:   for (i = 0; i < numberof (e->d); i++)
                    737:     {
                    738:       if (tr->flag & TRY_DST(i))
                    739:         {
                    740:           mpn_tracen ("   d[%d]", i, e->d[i].p, dsize);
                    741:           printf ("        located %p\n", e->d[i].p);
                    742:         }
                    743:     }
                    744:
                    745:   for (i = 0; i < numberof (e->s); i++)
                    746:     if (tr->flag & TRY_SRC(i))
                    747:       printf ("   s[%d] located %p\n", i, e->s[i].p);
                    748: }
                    749:
                    750: void
                    751: print_all (void)
                    752: {
                    753:   int  i;
                    754:
                    755:   printf ("\n");
                    756:   printf ("size  %ld\n", size);
                    757:   if (tr->flag & (TRY_SIZE2|TRY_XSIZE))
                    758:     printf ("size2 %ld\n", size2);
                    759:   if (dsize != size)
                    760:     printf ("dsize %ld\n", dsize);
                    761:   if (tr->flag & TRY_MULTIPLIER)
                    762:     printf ("   multiplier 0x%lX\n", multiplier);
                    763:   if (tr->flag & TRY_DIVISOR)
                    764:     printf ("   divisor 0x%lX\n", divisor);
                    765:   if (tr->flag & TRY_SHIFT)
                    766:     printf ("   shift %lu\n", shift);
                    767:   if (tr->flag & TRY_CARRYANY)
                    768:     printf ("   carry %lX\n", carry);
                    769:
                    770:   for (i = 0; i < numberof (d); i++)
                    771:     if (tr->flag & TRY_DST(i))
                    772:       printf ("   d[%d] %s, align %ld\n",
                    773:               i, d[i].high ? "high" : "low", d[i].align);
                    774:
                    775:   for (i = 0; i < numberof (s); i++)
                    776:     {
                    777:       if (tr->flag & TRY_SRC(i))
                    778:         {
                    779:           printf ("   s[%d] %s, align %ld, ",
                    780:                   i, s[i].high ? "high" : "low", s[i].align);
                    781:           switch (overlap->s[i]) {
                    782:           case -1:
                    783:             printf ("no overlap\n");
                    784:             break;
                    785:           default:
                    786:             printf ("==d[%d]%s\n",
                    787:                     overlap->s[i],
                    788:                     tr->flag & TRY_OVERLAP_LOW_TO_HIGH ? "+a"
                    789:                     : tr->flag & TRY_OVERLAP_HIGH_TO_LOW ? "-a"
                    790:                     : "");
                    791:             break;
                    792:           }
                    793:           mpn_tracen ("   s[%d]", i, s[i].p, SRC_SIZE(i));
                    794:         }
                    795:     }
                    796:
                    797:   if (tr->flag & TRY_DST0_INIT)
                    798:     mpn_trace ("   d[0]", s[1].region.ptr, size);
                    799:
                    800:   print_each (&ref);
                    801:   print_each (&fun);
                    802: }
                    803:
                    804: void
                    805: compare (void)
                    806: {
                    807:   int  error = 0;
                    808:   int  i;
                    809:
                    810:   if ((tr->flag & TRY_RETVAL) && ref.retval != fun.retval)
                    811:     {
                    812:       printf ("Different return values\n");
                    813:       error = 1;
                    814:     }
                    815:
                    816:   if (! CALLING_CONVENTIONS_CHECK ())
                    817:     error = 1;
                    818:
                    819:   if (tr->flag & TRY_DST_SIZE_RETVAL)
                    820:     dsize = ref.retval;
                    821:
                    822:   for (i = 0; i < numberof (ref.d); i++)
                    823:     {
                    824:       if (!(tr->flag & TRY_DST(i)))
                    825:         continue;
                    826:
                    827:       if (dsize != 0 && refmpn_cmp (ref.d[i].p, fun.d[i].p, dsize) != 0)
                    828:         {
                    829:           printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
                    830:                   i,
                    831:                   mpn_diff_lowest (ref.d[i].p, fun.d[i].p, dsize),
                    832:                   mpn_diff_highest (ref.d[i].p, fun.d[i].p, dsize));
                    833:           error = 1;
                    834:         }
                    835:     }
                    836:
                    837:   if (error)
                    838:     {
                    839:       print_all();
                    840:       abort();
                    841:     }
                    842: }
                    843:
                    844: void
                    845: call (struct each_t *e, tryfun_t function)
                    846: {
                    847:   switch (tr->flag) {
                    848:   case TRY_TYPE_AORS_N:
                    849:     e->retval = CALLING_CONVENTIONS (function)
                    850:       (e->d[0].p, e->s[0].p, e->s[1].p, size);
                    851:     break;
                    852:   case TRY_TYPE_AORS_NC:
                    853:     e->retval = CALLING_CONVENTIONS (function)
                    854:       (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
                    855:     break;
                    856:
                    857:   case TRY_TYPE_LOGOPS_N:
                    858:     CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
                    859:     break;
                    860:
                    861:   case TRY_TYPE_ADDSUB_N:
                    862:     e->retval = CALLING_CONVENTIONS (function)
                    863:       (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
                    864:     break;
                    865:   case TRY_TYPE_ADDSUB_NC:
                    866:     e->retval = CALLING_CONVENTIONS (function)
                    867:       (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
                    868:     break;
                    869:
                    870:   case TRY_TYPE_COPYI:
                    871:   case TRY_TYPE_COPYD:
                    872:   case TRY_TYPE_COM_N:
                    873:     CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
                    874:     break;
                    875:
                    876:   case TRY_TYPE_DIVEXACT_BY3:
                    877:     e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
                    878:     break;
                    879:   case TRY_TYPE_DIVEXACT_BY3C:
                    880:     e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
                    881:                                                 carry);
                    882:     break;
                    883:
                    884:   case TRY_TYPE_DIVMOD_1:
                    885:     e->retval = CALLING_CONVENTIONS (function)
                    886:       (e->d[0].p, e->s[0].p, size, divisor);
                    887:     break;
                    888:   case TRY_TYPE_DIVMOD_1C:
                    889:     e->retval = CALLING_CONVENTIONS (function)
                    890:       (e->d[0].p, e->s[0].p, size, divisor, carry);
                    891:     break;
                    892:   case TRY_TYPE_DIVREM_1:
                    893:     e->retval = CALLING_CONVENTIONS (function)
                    894:       (e->d[0].p, size2, e->s[0].p, size, divisor);
                    895:     break;
                    896:   case TRY_TYPE_DIVREM_1C:
                    897:     e->retval = CALLING_CONVENTIONS (function)
                    898:       (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
                    899:     break;
                    900:   case TRY_TYPE_MOD_1:
                    901:     e->retval = CALLING_CONVENTIONS (function)
                    902:       (e->s[0].p, size, divisor);
                    903:     break;
                    904:   case TRY_TYPE_MOD_1C:
                    905:     e->retval = CALLING_CONVENTIONS (function)
                    906:       (e->s[0].p, size, divisor, carry);
                    907:     break;
                    908:   case TRY_TYPE_MOD_1_RSHIFT:
                    909:     e->retval = CALLING_CONVENTIONS (function)
                    910:       (e->s[0].p, size, shift, divisor);
                    911:     break;
                    912:
                    913:   case TRY_TYPE_GCD_1:
                    914:     /* Must have a non-zero src, but this probably isn't the best way to do
                    915:        it. */
                    916:     if (refmpn_zero_p (e->s[0].p, size))
                    917:       e->retval = 0;
                    918:     else
                    919:       e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
                    920:     break;
                    921:
                    922:   case TRY_TYPE_GCD:
                    923:     /* Sources are destroyed, so they're saved and replaced, but a general
                    924:        approach to this might be better.  Note that it's still e->s[0].p and
                    925:        e->s[1].p that are passed, to get the desired alignments. */
                    926:     {
                    927:       mp_ptr  s0 = refmpn_malloc_limbs (size);
                    928:       mp_ptr  s1 = refmpn_malloc_limbs (size2);
                    929:       refmpn_copyi (s0, e->s[0].p, size);
                    930:       refmpn_copyi (s1, e->s[1].p, size2);
                    931:
                    932:       mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
                    933:       mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
                    934:       e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
                    935:                                                   e->s[0].p, size,
                    936:                                                   e->s[1].p, size2);
                    937:       refmpn_copyi (e->s[0].p, s0, size);
                    938:       refmpn_copyi (e->s[1].p, s1, size2);
                    939:       free (s0);
                    940:       free (s1);
                    941:     }
                    942:     break;
                    943:
                    944:   case TRY_TYPE_MUL_1:
                    945:   case TRY_TYPE_AORSMUL_1:
                    946:     e->retval = CALLING_CONVENTIONS (function)
                    947:       (e->d[0].p, e->s[0].p, size, multiplier);
                    948:     break;
                    949:   case TRY_TYPE_MUL_1C:
                    950:   case TRY_TYPE_AORSMUL_1C:
                    951:     /* TRY_TYPE_AORSMUL_1C same */
                    952:     e->retval = CALLING_CONVENTIONS (function)
                    953:       (e->d[0].p, e->s[0].p, size, multiplier, carry);
                    954:     break;
                    955:
                    956:   case TRY_TYPE_MUL_BASECASE:
                    957:     CALLING_CONVENTIONS (function)
                    958:       (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
                    959:     break;
                    960:   case TRY_TYPE_MUL_N:
                    961:     CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
                    962:     break;
                    963:   case TRY_TYPE_SQR:
                    964:     CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
                    965:     break;
                    966:
                    967:   case TRY_TYPE_LSHIFT:
                    968:   case TRY_TYPE_RSHIFT:
                    969:     e->retval = CALLING_CONVENTIONS (function)
                    970:       (e->d[0].p, e->s[0].p, size, shift);
                    971:     break;
                    972:
                    973:   case TRY_TYPE_POPCOUNT:
                    974:     e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
                    975:     break;
                    976:   case TRY_TYPE_HAMDIST:
                    977:     e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, e->s[1].p, size);
                    978:     break;
                    979:
                    980:   default:
                    981:     printf ("Unknown routine type 0x%X\n", tr->flag);
                    982:     abort ();
                    983:     break;
                    984:   }
                    985: }
                    986:
                    987:
                    988: void
                    989: pointer_setup (struct each_t *e)
                    990: {
                    991:   int  i, j;
                    992:
                    993:   if (tr->flag & TRY_DOUBLE_DST)
                    994:     dsize = 2*size;
                    995:   else if (tr->flag & (TRY_SIZE2|TRY_XSIZE))
                    996:     dsize = size+size2;
                    997:   else
                    998:     dsize = size;
                    999:
                   1000:   /* establish e->d[].p destinations */
                   1001:   for (i = 0; i < numberof (e->d); i++)
                   1002:     {
                   1003:       mp_size_t  offset = 0;
                   1004:
                   1005:       /* possible room for overlapping sources */
                   1006:       for (j = 0; j < numberof (overlap->s); j++)
                   1007:         if (overlap->s[j] == i)
                   1008:           offset = MAX (offset, s[j].align);
                   1009:
                   1010:       if (d[i].high)
                   1011:         {
                   1012:           e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
                   1013:             - dsize - d[i].align;
                   1014:           if (tr->flag & TRY_OVERLAP_LOW_TO_HIGH)
                   1015:             e->d[i].p -= offset;
                   1016:         }
                   1017:       else
                   1018:         {
                   1019:           e->d[i].p = e->d[i].region.ptr + d[i].align;
                   1020:           if (tr->flag & TRY_OVERLAP_HIGH_TO_LOW)
                   1021:             e->d[i].p += offset;
                   1022:         }
                   1023:     }
                   1024:
                   1025:   /* establish e->s[].p sources */
                   1026:   for (i = 0; i < numberof (s); i++)
                   1027:     {
                   1028:       int  o = overlap->s[i];
                   1029:       switch (o) {
                   1030:       case -1:
                   1031:         /* no overlap */
                   1032:         e->s[i].p = s[i].p;
                   1033:         break;
                   1034:       case 0:
                   1035:       case 1:
                   1036:         /* overlap with d[o] */
                   1037:         if (tr->flag & TRY_OVERLAP_HIGH_TO_LOW)
                   1038:           e->s[i].p = e->d[o].p - s[i].align;
                   1039:         else if (tr->flag & TRY_OVERLAP_LOW_TO_HIGH)
                   1040:           e->s[i].p = e->d[o].p + s[i].align;
                   1041:         else if (tr->flag & TRY_XSIZE)
                   1042:           e->s[i].p = e->d[o].p + size2;
                   1043:         else
                   1044:           e->s[i].p = e->d[o].p;
                   1045:         break;
                   1046:       default:
                   1047:         abort();
                   1048:         break;
                   1049:       }
                   1050:     }
                   1051: }
                   1052:
                   1053:
                   1054: void
                   1055: try_one (void)
                   1056: {
                   1057:   int  i;
                   1058:
                   1059:   if (option_spinner)
                   1060:     spinner();
                   1061:   spinner_count++;
                   1062:
                   1063:   trap_location = TRAP_SETUPS;
                   1064:
                   1065:   for (i = 0; i < numberof (s); i++)
                   1066:     {
                   1067:       if (s[i].high)
                   1068:         s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
                   1069:       else
                   1070:         s[i].p = s[i].region.ptr + s[i].align;
                   1071:     }
                   1072:
                   1073:   pointer_setup (&ref);
                   1074:   pointer_setup (&fun);
                   1075:
                   1076:   if (tr->flag & TRY_DST0_INIT)
                   1077:     {
                   1078:       t_random (s[1].region.ptr, dsize);
                   1079:       MPN_COPY (fun.d[0].p, s[1].region.ptr, dsize);
                   1080:       MPN_COPY (ref.d[0].p, s[1].region.ptr, dsize);
                   1081:     }
                   1082:   else if (tr->flag & TRY_DST0)
                   1083:     {
                   1084:       refmpn_fill (ref.d[0].p, dsize, DEADVAL);
                   1085:       refmpn_fill (fun.d[0].p, dsize, DEADVAL);
                   1086:     }
                   1087:   for (i = 1; i < numberof (d); i++)
                   1088:     {
                   1089:       if (!(tr->flag & TRY_DST(i)))
                   1090:         continue;
                   1091:
                   1092:       refmpn_fill (ref.d[i].p, dsize, DEADVAL);
                   1093:       refmpn_fill (fun.d[i].p, dsize, DEADVAL);
                   1094:     }
                   1095:
                   1096:   ref.retval = 0x04152637;
                   1097:   fun.retval = 0x8C9DAEBF;
                   1098:
                   1099:   for (i = 0; i < numberof (s); i++)
                   1100:     {
                   1101:       if (!(tr->flag & TRY_SRC(i)))
                   1102:         continue;
                   1103:
                   1104:       mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
                   1105:       t_random (s[i].p, SRC_SIZE(i));
                   1106:
                   1107:       if (tr->flag & TRY_SRC1_GCDDATA)
                   1108:         {
                   1109:           /* s[1] no more bits than s[0] */
                   1110:           if (i == 1 && size2 == size)
                   1111:             s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
                   1112:
                   1113:           /* normalized */
                   1114:           s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
                   1115:
                   1116:           /* odd */
                   1117:           s[i].p[0] |= 1;
                   1118:         }
                   1119:
                   1120:       mprotect_region (&s[i].region, PROT_READ);
                   1121:
                   1122:       if (ref.s[i].p != s[i].p)
                   1123:         {
                   1124:           MPN_COPY (ref.s[i].p, s[i].p, SRC_SIZE(i));
                   1125:           MPN_COPY (fun.s[i].p, s[i].p, SRC_SIZE(i));
                   1126:         }
                   1127:     }
                   1128:
                   1129:   /* special requirement of divmod_1c,divrem_1c,mod_1c */
                   1130:   if (tr->flag == TRY_TYPE_DIVMOD_1C
                   1131:       || tr->flag == TRY_TYPE_DIVREM_1C
                   1132:       || tr->flag == TRY_TYPE_MOD_1C)
                   1133:     carry %= divisor;
                   1134:
                   1135:   if (option_print)
                   1136:     print_all();
                   1137:
                   1138:   trap_location = TRAP_REF;
                   1139:   call (&ref, tr->ref.function);
                   1140:   trap_location = TRAP_FUN;
                   1141:   call (&fun, tr->fun.function);
                   1142:   trap_location = TRAP_NOWHERE;
                   1143:
                   1144:   compare ();
                   1145: }
                   1146:
                   1147:
                   1148: #define SIZE_ITERATION                                          \
                   1149:   for (size = MAX3 (option_firstsize,                           \
                   1150:                     tr->minsize,                                \
                   1151:                     (tr->flag & TRY_SIZE_ZERO) ? 0 : 1);        \
                   1152:        size <= option_lastsize;                                 \
                   1153:        size++)
                   1154:
                   1155: #define SIZE2_FIRST                             \
                   1156:   (option_firstsize2 != 0 ? option_firstsize2   \
                   1157:    : tr->flag & TRY_SIZE2 ? 1                   \
                   1158:    : tr->flag & TRY_XSIZE ? 0                   \
                   1159:    : 0)
                   1160: #define SIZE2_LAST                              \
                   1161:   (tr->flag & TRY_SIZE2 ? size                  \
                   1162:    : tr->flag & TRY_XSIZE ? XSIZE_COUNT-1       \
                   1163:    : 0)
                   1164:
                   1165: #define SIZE2_ITERATION \
                   1166:   for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
                   1167:
                   1168: #define ALIGN_COUNT(cond)  ((cond) ? ALIGNMENTS : 1)
                   1169: #define ALIGN_ITERATION(w,n,cond) \
                   1170:   for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
                   1171:
                   1172: #define HIGH_LIMIT(cond)  ((cond) != 0)
                   1173: #define HIGH_COUNT(cond)  (HIGH_LIMIT (cond) + 1)
                   1174: #define HIGH_ITERATION(w,n,cond) \
                   1175:   for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
                   1176:
                   1177: #define SHIFT_LIMIT \
                   1178:   ((unsigned long) ((tr->flag & TRY_SHIFT) ? BITS_PER_MP_LIMB-1 : 1))
                   1179:
                   1180: #define SHIFT_ITERATION \
                   1181:   for (shift = 1; shift <= SHIFT_LIMIT; shift++)
                   1182:
                   1183:
                   1184: void
                   1185: try_many (void)
                   1186: {
                   1187:   int   i;
                   1188:
                   1189:   {
                   1190:     unsigned long  total = 1;
                   1191:
                   1192:     total *= option_repetitions;
                   1193:     total *= option_lastsize;
                   1194:     if (tr->flag & TRY_SIZE2) total *= (option_lastsize+1)/2;
                   1195:     if (tr->flag & TRY_XSIZE) total *= XSIZE_COUNT;
                   1196:
                   1197:     total *= SHIFT_LIMIT;
                   1198:     total *= MULTIPLIER_COUNT;
                   1199:     total *= DIVISOR_COUNT;
                   1200:     total *= CARRY_COUNT;
                   1201:     total *= T_RAND_COUNT;
                   1202:
                   1203:     total *= HIGH_COUNT (tr->flag & TRY_DST0);
                   1204:     total *= HIGH_COUNT (tr->flag & TRY_DST1);
                   1205:     total *= HIGH_COUNT (tr->flag & TRY_SRC0);
                   1206:     total *= HIGH_COUNT (tr->flag & TRY_SRC1);
                   1207:
                   1208:     total *= ALIGN_COUNT (tr->flag & TRY_DST0);
                   1209:     total *= ALIGN_COUNT (tr->flag & TRY_DST1);
                   1210:     total *= ALIGN_COUNT (tr->flag & TRY_SRC0);
                   1211:     total *= ALIGN_COUNT (tr->flag & TRY_SRC1);
                   1212:
                   1213:     total *= OVERLAP_COUNT;
                   1214:
                   1215:     printf ("%s %lu\n", tr->fun.name, total);
                   1216:   }
                   1217:
                   1218:   spinner_count = 0;
                   1219:
                   1220:   for (i = 0; i < option_repetitions; i++)
                   1221:     SIZE_ITERATION
                   1222:       SIZE2_ITERATION
                   1223:
                   1224:       SHIFT_ITERATION
                   1225:       MULTIPLIER_ITERATION
                   1226:       DIVISOR_ITERATION
                   1227:       CARRY_ITERATION /* must be after divisor */
                   1228:       T_RAND_ITERATION
                   1229:
                   1230:       HIGH_ITERATION(d,0, tr->flag & TRY_DST0)
                   1231:       HIGH_ITERATION(d,1, tr->flag & TRY_DST1)
                   1232:       HIGH_ITERATION(s,0, tr->flag & TRY_SRC0)
                   1233:       HIGH_ITERATION(s,1, tr->flag & TRY_SRC1)
                   1234:
                   1235:       ALIGN_ITERATION(d,0, tr->flag & TRY_DST0)
                   1236:       ALIGN_ITERATION(d,1, tr->flag & TRY_DST1)
                   1237:       ALIGN_ITERATION(s,0, tr->flag & TRY_SRC0)
                   1238:       ALIGN_ITERATION(s,1, tr->flag & TRY_SRC1)
                   1239:
                   1240:       OVERLAP_ITERATION
                   1241:       try_one();
                   1242:
                   1243:   printf("\n");
                   1244: }
                   1245:
                   1246:
                   1247: /* Usually print_all() doesn't show much, but it might give a hint as to
                   1248:    where the function was up to when it died. */
                   1249: void
                   1250: trap (int sig)
                   1251: {
                   1252:   const char *name = "noname";
                   1253:
                   1254:   switch (sig) {
                   1255:   case SIGILL:  name = "SIGILL";  break;
                   1256: #ifdef SIGBUS
                   1257:   case SIGBUS:  name = "SIGBUS";  break;
                   1258: #endif
                   1259:   case SIGSEGV: name = "SIGSEGV"; break;
                   1260:   case SIGFPE:  name = "SIGFPE";  break;
                   1261:   }
                   1262:
                   1263:   printf ("\n\nSIGNAL TRAP: %s\n", name);
                   1264:
                   1265:   switch (trap_location) {
                   1266:   case TRAP_REF:
                   1267:     printf ("  in reference function: %s\n", tr->ref.name);
                   1268:     break;
                   1269:   case TRAP_FUN:
                   1270:     printf ("  in test function: %s\n", tr->fun.name);
                   1271:     print_all ();
                   1272:     break;
                   1273:   case TRAP_SETUPS:
                   1274:     printf ("  in parameter setups\n");
                   1275:     print_all ();
                   1276:     break;
                   1277:   default:
                   1278:     printf ("  somewhere unknown\n");
                   1279:     break;
                   1280:   }
                   1281:   exit (1);
                   1282: }
                   1283:
                   1284:
                   1285: void
                   1286: try_init (void)
                   1287: {
                   1288: #if HAVE_GETPAGESIZE
                   1289:   /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
                   1290:      know _SC_PAGESIZE. */
                   1291:   pagesize = getpagesize ();
                   1292: #else
                   1293: #if HAVE_SYSCONF
                   1294:   if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
                   1295:     {
                   1296:       /* According to the linux man page, sysconf doesn't set errno */
                   1297:       fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
                   1298:       exit (1);
                   1299:     }
                   1300: #else
                   1301: Error, error, cannot get page size
                   1302: #endif
                   1303: #endif
                   1304:
                   1305:   printf ("pagesize is 0x%lX bytes\n", pagesize);
                   1306:
                   1307:   signal (SIGILL,  trap);
                   1308: #ifdef SIGBUS
                   1309:   signal (SIGBUS,  trap);
                   1310: #endif
                   1311:   signal (SIGSEGV, trap);
                   1312:   signal (SIGFPE,  trap);
                   1313:
                   1314:   {
                   1315:     int  i;
                   1316:
                   1317:     for (i = 0; i < numberof (s); i++)
                   1318:       {
                   1319:         malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
                   1320:         printf ("s[%d] %p to %p (0x%lX bytes)\n",
                   1321:                 i, s[i].region.ptr,
                   1322:                 s[i].region.ptr + s[i].region.size,
                   1323:                 s[i].region.size * BYTES_PER_MP_LIMB);
                   1324:       }
                   1325:
                   1326: #define INIT_EACH(e,es)                                                 \
                   1327:     for (i = 0; i < numberof (e.d); i++)                                \
                   1328:       {                                                                 \
                   1329:         malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
                   1330:         printf ("%s d[%d] %p to %p (0x%lX bytes)\n",                    \
                   1331:                 es, i, e.d[i].region.ptr,                               \
                   1332:                 e.d[i].region.ptr + e.d[i].region.size,                 \
                   1333:                 e.d[i].region.size * BYTES_PER_MP_LIMB);                \
                   1334:       }
                   1335:
                   1336:     INIT_EACH(ref, "ref");
                   1337:     INIT_EACH(fun, "fun");
                   1338:   }
                   1339: }
                   1340:
                   1341: int
                   1342: strmatch_wild (const char *pattern, const char *str)
                   1343: {
                   1344:   size_t  plen, slen;
                   1345:
                   1346:   /* wildcard at start */
                   1347:   if (pattern[0] == '*')
                   1348:     {
                   1349:       pattern++;
                   1350:       plen = strlen (pattern);
                   1351:       slen = strlen (str);
                   1352:       return (plen == 0
                   1353:               || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
                   1354:     }
                   1355:
                   1356:   /* wildcard at end */
                   1357:   plen = strlen (pattern);
                   1358:   if (plen >= 1 && pattern[plen-1] == '*')
                   1359:     return (memcmp (pattern, str, plen-1) == 0);
                   1360:
                   1361:   /* no wildcards */
                   1362:   return (strcmp (pattern, str) == 0);
                   1363: }
                   1364:
                   1365: void
                   1366: try_name (const char *name)
                   1367: {
                   1368:   int  found = 0;
                   1369:   int  i;
                   1370:
                   1371:   for (i = 0; i < numberof (try_array); i++)
                   1372:     {
                   1373:       if (strmatch_wild (name, try_array[i].fun.name))
                   1374:         {
                   1375:           tr = &try_array[i];
                   1376:           try_many ();
                   1377:           found = 1;
                   1378:         }
                   1379:     }
                   1380:
                   1381:   if (!found)
                   1382:     {
                   1383:       printf ("%s unknown\n", name);
                   1384:       /* exit (1); */
                   1385:     }
                   1386: }
                   1387:
                   1388:
                   1389: void
                   1390: usage (const char *prog)
                   1391: {
                   1392:   int  col = 0;
                   1393:   int  i;
                   1394:
                   1395:   printf ("Usage: %s [options] function...\n\
                   1396:     -1        use limb data 1,2,3,etc\n\
                   1397:     -9        use limb data all 0xFF..FFs\n\
                   1398:     -a zeros  use limb data all zeros\n\
                   1399:     -a ffs    use limb data all 0xFF..FFs (same as -9)\n\
                   1400:     -a 2fd    use data 0x2FFF...FFFD\n\
                   1401:     -p        print each case tried (try this if seg faulting)\n\
                   1402:     -R        seed random numbers from time()\n\
                   1403:     -r reps   set repetitions (default %d)\n\
                   1404:     -S seed   randomize from given seed\n\
                   1405:     -s size   starting size to test\n\
                   1406:     -s s1-s2  range of sizes to test\n\
                   1407:     -W        don't show the spinner (use this in gdb)\n\
                   1408:     -z        disable mprotect() redzones\n\
                   1409: Default data is mpn_random() and mpn_random2().\n\
                   1410: \n\
                   1411: Functions that can be tested:\n\
                   1412: ", prog, DEFAULT_REPETITIONS);
                   1413:
                   1414:   for (i = 0; i < numberof (try_array); i++)
                   1415:     {
                   1416:       if (col + 1 + strlen (try_array[i].fun.name) > 79)
                   1417:         {
                   1418:           printf ("\n");
                   1419:           col = 0;
                   1420:         }
                   1421:       printf (" %s", try_array[i].fun.name);
                   1422:       col += 1 + strlen (try_array[i].fun.name);
                   1423:     }
                   1424:   printf ("\n");
                   1425:
                   1426:   exit(1);
                   1427: }
                   1428:
                   1429:
                   1430: int
                   1431: main (int argc, char *argv[])
                   1432: {
                   1433:   int  i;
                   1434:
                   1435:   /* unbuffered output */
                   1436:   setbuf (stdout, NULL);
                   1437:   setbuf (stderr, NULL);
                   1438:
                   1439:   /* always trace in hex, upper-case so can paste into bc */
                   1440:   mp_trace_base = -16;
                   1441:
                   1442:   {
                   1443:     unsigned  seed = 123;
                   1444:     int   opt;
                   1445:
                   1446:     while ((opt = getopt(argc, argv, "19a:pRr:S:s:Wz")) != EOF)
                   1447:       {
                   1448:         switch (opt) {
                   1449:         case '1':
                   1450:           /* use limb data values 1, 2, 3, ... etc */
                   1451:           option_data = DATA_SEQ;
                   1452:           break;
                   1453:         case '9':
                   1454:           /* use limb data values 0xFFF...FFF always */
                   1455:           option_data = DATA_FFS;
                   1456:           break;
                   1457:         case 'a':
                   1458:           if (strcmp (optarg, "zeros") == 0)     option_data = DATA_ZEROS;
                   1459:           else if (strcmp (optarg, "seq") == 0)  option_data = DATA_SEQ;
                   1460:           else if (strcmp (optarg, "ffs") == 0)  option_data = DATA_FFS;
                   1461:           else if (strcmp (optarg, "2fd") == 0)  option_data = DATA_2FD;
                   1462:           else
                   1463:             {
                   1464:               fprintf (stderr, "unrecognised data option: %s\n", optarg);
                   1465:               exit (1);
                   1466:             }
                   1467:           break;
                   1468:         case 'p':
                   1469:           option_print = 1;
                   1470:           break;
                   1471:         case 'R':
                   1472:           /* randomize */
                   1473:          seed = time (NULL);
                   1474:           break;
                   1475:         case 'r':
                   1476:          option_repetitions = atoi (optarg);
                   1477:           break;
                   1478:         case 's':
                   1479:           {
                   1480:             char  *p;
                   1481:             option_firstsize = atoi (optarg);
                   1482:             if ((p = strchr (optarg, '-')) != NULL)
                   1483:               option_lastsize = atoi (p+1);
                   1484:           }
                   1485:           break;
                   1486:         case 'S':
                   1487:           /* -S <size> sets the starting size for the second of a two size
                   1488:              routine (like mpn_mul_basecase) */
                   1489:          option_firstsize2 = atoi (optarg);
                   1490:           break;
                   1491:         case 'W':
                   1492:           /* use this when running in the debugger */
                   1493:           option_spinner = 0;
                   1494:           break;
                   1495:         case 'z':
                   1496:           /* disable redzones */
                   1497:           option_redzones = 0;
                   1498:           break;
                   1499:         case '?':
                   1500:           usage (argv[0]);
                   1501:           break;
                   1502:         }
                   1503:       }
                   1504:
                   1505:     srandom (seed);
                   1506:   }
                   1507:
                   1508:   try_init();
                   1509:
                   1510:   if (argc <= optind)
                   1511:     usage (argv[0]);
                   1512:
                   1513:   for (i = optind; i < argc; i++)
                   1514:     try_name (argv[i]);
                   1515:
                   1516:   return 0;
                   1517: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>