Annotation of OpenXM_contrib/gmp/tests/devel/try.c, Revision 1.1
1.1 ! ohara 1: /* Run some tests on various mpn routines.
! 2:
! 3: THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
! 4: BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
! 5:
! 6: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
! 7:
! 8: This file is part of the GNU MP Library.
! 9:
! 10: The GNU MP Library is free software; you can redistribute it and/or modify
! 11: it under the terms of the GNU Lesser General Public License as published by
! 12: the Free Software Foundation; either version 2.1 of the License, or (at your
! 13: option) any later version.
! 14:
! 15: The GNU MP Library is distributed in the hope that it will be useful, but
! 16: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 17: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 18: License for more details.
! 19:
! 20: You should have received a copy of the GNU Lesser General Public License
! 21: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 22: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 23: MA 02111-1307, USA. */
! 24:
! 25:
! 26: /* Usage: try [options] <function>...
! 27:
! 28: For example, "./try mpn_add_n" to run tests of that function.
! 29:
! 30: Combinations of alignments and overlaps are tested, with redzones above
! 31: or below the destinations, and with the sources write-protected.
! 32:
! 33: The number of tests performed becomes ridiculously large with all the
! 34: combinations, and for that reason this can't be a part of a "make check",
! 35: it's meant only for development. The code isn't very pretty either.
! 36:
! 37: During development it can help to disable the redzones, since seeing the
! 38: rest of the destination written can show where the wrong part is, or if
! 39: the dst pointers are off by 1 or whatever. The magic DEADVAL initial
! 40: fill (see below) will show locations never written.
! 41:
! 42: The -s option can be used to test only certain size operands, which is
! 43: useful if some new code doesn't yet support say sizes less than the
! 44: unrolling, or whatever.
! 45:
! 46: When a problem occurs it'll of course be necessary to run the program
! 47: under gdb to find out quite where, how and why it's going wrong. Disable
! 48: the spinner with the -W option when doing this, or single stepping won't
! 49: work. Using the "-1" option to run with simple data can be useful.
! 50:
! 51: New functions to test can be added in try_array[]. If a new TYPE is
! 52: required then add it to the existing constants, set up its parameters in
! 53: param_init(), and add it to the call() function. Extra parameter fields
! 54: can be added if necessary, or further interpretations given to existing
! 55: fields.
! 56:
! 57:
! 58: Bugs:
! 59:
! 60: umul_ppmm support is not very good, lots of source data is generated
! 61: whereas only two limbs are needed.
! 62:
! 63:
! 64: Future:
! 65:
! 66: Make a little scheme for interpreting the "SIZE" selections uniformly.
! 67:
! 68: Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
! 69: source limbs. Possibly increase the default repetitions in that case.
! 70:
! 71: Automatically detect gdb and disable the spinner (use -W for now).
! 72:
! 73: Make a way to re-run a failing case in the debugger. Have an option to
! 74: snapshot each test case before it's run so the data is available if a
! 75: segv occurs. (This should be more reliable than the current print_all()
! 76: in the signal handler.)
! 77:
! 78: When alignment means a dst isn't hard against the redzone, check the
! 79: space in between remains unchanged.
! 80:
! 81: See if the 80x86 debug registers can do redzones on byte boundaries.
! 82:
! 83: When a source overlaps a destination, don't run both s[i].high 0 and 1,
! 84: as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i].
! 85:
! 86: When partial overlaps aren't done, don't loop over source alignments
! 87: during overlaps.
! 88:
! 89: Try to make the looping code a bit less horrible. Right now it's pretty
! 90: hard to see what iterations are actually done.
! 91:
! 92: When there's no overlap, run with both src>dst and src<dst. A subtle
! 93: calling-conventions violation occured in a P6 copy which depended on the
! 94: relative location of src and dst.
! 95:
! 96: */
! 97:
! 98:
! 99: /* always do assertion checking */
! 100: #define WANT_ASSERT 1
! 101:
! 102: #include "config.h"
! 103:
! 104: #include <errno.h>
! 105: #include <limits.h>
! 106: #include <signal.h>
! 107: #include <stdio.h>
! 108: #include <stdlib.h>
! 109: #include <string.h>
! 110: #include <time.h>
! 111:
! 112: #if HAVE_UNISTD_H
! 113: #include <unistd.h>
! 114: #endif
! 115:
! 116: #if HAVE_SYS_MMAN_H
! 117: #include <sys/mman.h>
! 118: #endif
! 119:
! 120: #include "gmp.h"
! 121: #include "gmp-impl.h"
! 122: #include "longlong.h"
! 123: #include "tests.h"
! 124:
! 125:
! 126: #if WANT_ASSERT
! 127: #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
! 128: #else
! 129: #define ASSERT_CARRY(expr) (expr)
! 130: #endif
! 131:
! 132:
! 133: #if !HAVE_DECL_OPTARG
! 134: extern char *optarg;
! 135: extern int optind, opterr;
! 136: #endif
! 137:
! 138: /* Rumour has it some systems lack a define of PROT_NONE. */
! 139: #ifndef PROT_NONE
! 140: #define PROT_NONE 0
! 141: #endif
! 142:
! 143: /* Dummy defines for when mprotect doesn't exist. */
! 144: #ifndef PROT_READ
! 145: #define PROT_READ 0
! 146: #endif
! 147: #ifndef PROT_WRITE
! 148: #define PROT_WRITE 0
! 149: #endif
! 150:
! 151: #ifdef EXTRA_PROTOS
! 152: EXTRA_PROTOS
! 153: #endif
! 154: #ifdef EXTRA_PROTOS2
! 155: EXTRA_PROTOS2
! 156: #endif
! 157:
! 158:
! 159: #define DEFAULT_REPETITIONS 10
! 160:
! 161: int option_repetitions = DEFAULT_REPETITIONS;
! 162: int option_spinner = 1;
! 163: int option_redzones = 1;
! 164: int option_firstsize = 0;
! 165: int option_lastsize = 500;
! 166: int option_firstsize2 = 0;
! 167:
! 168: #define ALIGNMENTS 4
! 169: #define OVERLAPS 4
! 170: #define CARRY_RANDOMS 5
! 171: #define MULTIPLIER_RANDOMS 5
! 172: #define DIVISOR_RANDOMS 5
! 173: #define FRACTION_COUNT 4
! 174:
! 175: int option_print = 0;
! 176:
! 177: #define DATA_TRAND 0
! 178: #define DATA_ZEROS 1
! 179: #define DATA_SEQ 2
! 180: #define DATA_FFS 3
! 181: #define DATA_2FD 4
! 182: int option_data = DATA_TRAND;
! 183:
! 184:
! 185: mp_size_t pagesize;
! 186: #define PAGESIZE_LIMBS (pagesize / BYTES_PER_MP_LIMB)
! 187:
! 188: /* must be a multiple of the page size */
! 189: #define REDZONE_BYTES (pagesize * 16)
! 190: #define REDZONE_LIMBS (REDZONE_BYTES / BYTES_PER_MP_LIMB)
! 191:
! 192:
! 193: #define MAX3(x,y,z) (MAX (x, MAX (y, z)))
! 194:
! 195: #if BITS_PER_MP_LIMB == 32
! 196: #define DEADVAL CNST_LIMB(0xDEADBEEF)
! 197: #else
! 198: #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE)
! 199: #endif
! 200:
! 201:
! 202: struct region_t {
! 203: mp_ptr ptr;
! 204: mp_size_t size;
! 205: };
! 206:
! 207:
! 208: #define TRAP_NOWHERE 0
! 209: #define TRAP_REF 1
! 210: #define TRAP_FUN 2
! 211: #define TRAP_SETUPS 3
! 212: int trap_location = TRAP_NOWHERE;
! 213:
! 214:
! 215: #define NUM_SOURCES 2
! 216: #define NUM_DESTS 2
! 217:
! 218: struct source_t {
! 219: struct region_t region;
! 220: int high;
! 221: mp_size_t align;
! 222: mp_ptr p;
! 223: };
! 224:
! 225: struct source_t s[NUM_SOURCES];
! 226:
! 227: struct dest_t {
! 228: int high;
! 229: mp_size_t align;
! 230: mp_size_t size;
! 231: };
! 232:
! 233: struct dest_t d[NUM_DESTS];
! 234:
! 235: struct source_each_t {
! 236: mp_ptr p;
! 237: };
! 238:
! 239: struct dest_each_t {
! 240: struct region_t region;
! 241: mp_ptr p;
! 242: };
! 243:
! 244: mp_size_t size;
! 245: mp_size_t size2;
! 246: unsigned long shift;
! 247: mp_limb_t carry;
! 248: mp_limb_t divisor;
! 249: mp_limb_t multiplier;
! 250:
! 251: struct each_t {
! 252: const char *name;
! 253: struct dest_each_t d[NUM_DESTS];
! 254: struct source_each_t s[NUM_SOURCES];
! 255: mp_limb_t retval;
! 256: };
! 257:
! 258: struct each_t ref = { "Ref" };
! 259: struct each_t fun = { "Fun" };
! 260:
! 261: #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
! 262:
! 263: void validate_fail _PROTO ((void));
! 264:
! 265:
! 266: #if HAVE_TRY_NEW_C
! 267: #include "try-new.c"
! 268: #endif
! 269:
! 270:
! 271: typedef mp_limb_t (*tryfun_t) _PROTO ((ANYARGS));
! 272:
! 273: struct try_t {
! 274: char retval;
! 275:
! 276: char src[2];
! 277: char dst[2];
! 278:
! 279: #define SIZE_YES 1
! 280: #define SIZE_ALLOW_ZERO 2
! 281: #define SIZE_1 3 /* 1 limb */
! 282: #define SIZE_2 4 /* 2 limbs */
! 283: #define SIZE_3 5 /* 3 limbs */
! 284: #define SIZE_FRACTION 6 /* size2 is fraction for divrem etc */
! 285: #define SIZE_SIZE2 7
! 286: #define SIZE_PLUS_1 8
! 287: #define SIZE_SUM 9
! 288: #define SIZE_DIFF 10
! 289: #define SIZE_DIFF_PLUS_1 11
! 290: #define SIZE_RETVAL 12
! 291: #define SIZE_CEIL_HALF 13
! 292: #define SIZE_GET_STR 14
! 293: char size;
! 294: char size2;
! 295: char dst_size[2];
! 296:
! 297: char dst_bytes[2];
! 298:
! 299: char dst0_from_src1;
! 300:
! 301: #define CARRY_BIT 1 /* single bit 0 or 1 */
! 302: #define CARRY_3 2 /* 0, 1, 2 */
! 303: #define CARRY_4 3 /* 0 to 3 */
! 304: #define CARRY_LIMB 4 /* any limb value */
! 305: #define CARRY_DIVISOR 5 /* carry<divisor */
! 306: char carry;
! 307:
! 308: /* a fudge to tell the output when to print negatives */
! 309: char carry_sign;
! 310:
! 311: char multiplier;
! 312: char shift;
! 313:
! 314: #define DIVISOR_LIMB 1
! 315: #define DIVISOR_NORM 2
! 316: #define DIVISOR_ODD 3
! 317: char divisor;
! 318:
! 319: #define DATA_NON_ZERO 1
! 320: #define DATA_GCD 2
! 321: #define DATA_SRC1_ODD 3
! 322: #define DATA_SRC1_HIGHBIT 4
! 323: #define DATA_MULTIPLE_DIVISOR 5
! 324: #define DATA_UDIV_QRNND 6
! 325: char data;
! 326:
! 327: /* Default is allow full overlap. */
! 328: #define OVERLAP_NONE 1
! 329: #define OVERLAP_LOW_TO_HIGH 2
! 330: #define OVERLAP_HIGH_TO_LOW 3
! 331: #define OVERLAP_NOT_SRCS 4
! 332: #define OVERLAP_NOT_SRC2 8
! 333: char overlap;
! 334:
! 335: tryfun_t reference;
! 336: const char *reference_name;
! 337:
! 338: void (*validate) _PROTO ((void));
! 339: const char *validate_name;
! 340: };
! 341:
! 342: struct try_t *tr;
! 343:
! 344:
! 345: void
! 346: validate_mod_34lsub1 (void)
! 347: {
! 348: #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (BITS_PER_MP_LIMB / 4))) - 1)
! 349:
! 350: mp_srcptr ptr = s[0].p;
! 351: int error = 0;
! 352: mp_limb_t got, got_mod, want, want_mod;
! 353:
! 354: ASSERT (size >= 1);
! 355:
! 356: got = fun.retval;
! 357: got_mod = got % CNST_34LSUB1;
! 358:
! 359: want = refmpn_mod_34lsub1 (ptr, size);
! 360: want_mod = want % CNST_34LSUB1;
! 361:
! 362: if (got_mod != want_mod)
! 363: {
! 364: printf ("got 0x%lX reduced from 0x%lX\n", got_mod, got);
! 365: printf ("want 0x%lX reduced from 0x%lX\n", want_mod, want);
! 366: error = 1;
! 367: }
! 368:
! 369: if (error)
! 370: validate_fail ();
! 371: }
! 372:
! 373: void
! 374: validate_divexact_1 (void)
! 375: {
! 376: mp_srcptr src = s[0].p;
! 377: mp_srcptr dst = fun.d[0].p;
! 378: int error = 0;
! 379:
! 380: ASSERT (size >= 1);
! 381:
! 382: {
! 383: mp_ptr tp = refmpn_malloc_limbs (size);
! 384: mp_limb_t rem;
! 385:
! 386: rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
! 387: if (rem != 0)
! 388: {
! 389: printf ("Remainder a%%d == 0x%lX, mpn_divexact_1 undefined\n", rem);
! 390: error = 1;
! 391: }
! 392: if (! refmpn_equal_anynail (tp, dst, size))
! 393: {
! 394: printf ("Quotient a/d wrong\n");
! 395: mpn_trace ("fun ", dst, size);
! 396: mpn_trace ("want", tp, size);
! 397: error = 1;
! 398: }
! 399: free (tp);
! 400: }
! 401:
! 402: if (error)
! 403: validate_fail ();
! 404: }
! 405:
! 406:
! 407: void
! 408: validate_modexact_1c_odd (void)
! 409: {
! 410: mp_srcptr ptr = s[0].p;
! 411: mp_limb_t r = fun.retval;
! 412: int error = 0;
! 413:
! 414: ASSERT (size >= 1);
! 415: ASSERT (divisor & 1);
! 416:
! 417: if (carry < divisor)
! 418: {
! 419: if (! (r < divisor))
! 420: {
! 421: printf ("Don't have r < divisor\n");
! 422: error = 1;
! 423: }
! 424: }
! 425: else /* carry >= divisor */
! 426: {
! 427: if (! (r <= divisor))
! 428: {
! 429: printf ("Don't have r <= divisor\n");
! 430: error = 1;
! 431: }
! 432: }
! 433:
! 434: {
! 435: mp_limb_t c = carry % divisor;
! 436: mp_ptr tp = refmpn_malloc_limbs (size+1);
! 437: mp_size_t k;
! 438:
! 439: for (k = size-1; k <= size; k++)
! 440: {
! 441: /* set {tp,size+1} to r*b^k + a - c */
! 442: refmpn_copyi (tp, ptr, size);
! 443: tp[size] = 0;
! 444: ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
! 445: if (refmpn_sub_1 (tp, tp, size+1, c))
! 446: ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
! 447:
! 448: if (refmpn_mod_1 (tp, size+1, divisor) == 0)
! 449: goto good_remainder;
! 450: }
! 451: printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
! 452: error = 1;
! 453:
! 454: good_remainder:
! 455: free (tp);
! 456: }
! 457:
! 458: if (error)
! 459: validate_fail ();
! 460: }
! 461:
! 462: void
! 463: validate_modexact_1_odd (void)
! 464: {
! 465: carry = 0;
! 466: validate_modexact_1c_odd ();
! 467: }
! 468:
! 469:
! 470: void
! 471: validate_sqrtrem (void)
! 472: {
! 473: mp_srcptr orig_ptr = s[0].p;
! 474: mp_size_t orig_size = size;
! 475: mp_size_t root_size = (size+1)/2;
! 476: mp_srcptr root_ptr = fun.d[0].p;
! 477: mp_size_t rem_size = fun.retval;
! 478: mp_srcptr rem_ptr = fun.d[1].p;
! 479: mp_size_t prod_size = 2*root_size;
! 480: mp_ptr p;
! 481: int error = 0;
! 482:
! 483: if (rem_size < 0 || rem_size > size)
! 484: {
! 485: printf ("Bad remainder size retval %ld\n", rem_size);
! 486: validate_fail ();
! 487: }
! 488:
! 489: p = refmpn_malloc_limbs (prod_size);
! 490:
! 491: p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
! 492: if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
! 493: {
! 494: printf ("Remainder bigger than 2*root\n");
! 495: error = 1;
! 496: }
! 497:
! 498: refmpn_sqr (p, root_ptr, root_size);
! 499: if (rem_size != 0)
! 500: refmpn_add (p, p, prod_size, rem_ptr, rem_size);
! 501: if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
! 502: {
! 503: printf ("root^2+rem != original\n");
! 504: mpn_trace ("prod", p, prod_size);
! 505: error = 1;
! 506: }
! 507: free (p);
! 508:
! 509: if (error)
! 510: validate_fail ();
! 511: }
! 512:
! 513:
! 514: /* These types are indexes into the param[] array and are arbitrary so long
! 515: as they're all distinct and within the size of param[]. Renumber
! 516: whenever necessary or desired. */
! 517:
! 518: #define TYPE_ADD 1
! 519: #define TYPE_ADD_N 2
! 520: #define TYPE_ADD_NC 3
! 521: #define TYPE_SUB 4
! 522: #define TYPE_SUB_N 5
! 523: #define TYPE_SUB_NC 6
! 524:
! 525: #define TYPE_MUL_1 7
! 526: #define TYPE_MUL_1C 8
! 527:
! 528: #define TYPE_MUL_2 9
! 529:
! 530: #define TYPE_ADDMUL_1 10
! 531: #define TYPE_ADDMUL_1C 11
! 532: #define TYPE_SUBMUL_1 12
! 533: #define TYPE_SUBMUL_1C 13
! 534:
! 535: #define TYPE_ADDSUB_N 14
! 536: #define TYPE_ADDSUB_NC 15
! 537:
! 538: #define TYPE_RSHIFT 16
! 539: #define TYPE_LSHIFT 17
! 540:
! 541: #define TYPE_COPY 20
! 542: #define TYPE_COPYI 21
! 543: #define TYPE_COPYD 22
! 544: #define TYPE_COM_N 23
! 545:
! 546: #define TYPE_MOD_1 25
! 547: #define TYPE_MOD_1C 26
! 548: #define TYPE_DIVMOD_1 27
! 549: #define TYPE_DIVMOD_1C 28
! 550: #define TYPE_DIVREM_1 29
! 551: #define TYPE_DIVREM_1C 30
! 552: #define TYPE_PREINV_DIVREM_1 31
! 553: #define TYPE_PREINV_MOD_1 32
! 554: #define TYPE_MOD_34LSUB1 33
! 555: #define TYPE_UDIV_QRNND 34
! 556:
! 557: #define TYPE_DIVEXACT_1 35
! 558: #define TYPE_DIVEXACT_BY3 36
! 559: #define TYPE_DIVEXACT_BY3C 37
! 560:
! 561: #define TYPE_MODEXACT_1_ODD 38
! 562: #define TYPE_MODEXACT_1C_ODD 39
! 563:
! 564: #define TYPE_GCD 40
! 565: #define TYPE_GCD_1 41
! 566: #define TYPE_GCD_FINDA 42
! 567: #define TYPE_MPZ_JACOBI 43
! 568: #define TYPE_MPZ_KRONECKER 44
! 569: #define TYPE_MPZ_KRONECKER_UI 45
! 570: #define TYPE_MPZ_KRONECKER_SI 46
! 571: #define TYPE_MPZ_UI_KRONECKER 47
! 572: #define TYPE_MPZ_SI_KRONECKER 48
! 573:
! 574: #define TYPE_AND_N 50
! 575: #define TYPE_NAND_N 51
! 576: #define TYPE_ANDN_N 52
! 577: #define TYPE_IOR_N 53
! 578: #define TYPE_IORN_N 54
! 579: #define TYPE_NIOR_N 55
! 580: #define TYPE_XOR_N 56
! 581: #define TYPE_XNOR_N 57
! 582:
! 583: #define TYPE_POPCOUNT 58
! 584: #define TYPE_HAMDIST 59
! 585:
! 586: #define TYPE_MUL_BASECASE 60
! 587: #define TYPE_MUL_N 61
! 588: #define TYPE_SQR 62
! 589: #define TYPE_UMUL_PPMM 63
! 590: #define TYPE_UMUL_PPMM_R 64
! 591:
! 592: #define TYPE_SB_DIVREM_MN 70
! 593: #define TYPE_TDIV_QR 71
! 594:
! 595: #define TYPE_SQRTREM 80
! 596: #define TYPE_ZERO 81
! 597: #define TYPE_GET_STR 82
! 598:
! 599: #define TYPE_EXTRA 90
! 600:
! 601: struct try_t param[150];
! 602:
! 603:
! 604: void
! 605: param_init (void)
! 606: {
! 607: struct try_t *p;
! 608:
! 609: #define COPY(index) memcpy (p, ¶m[index], sizeof (*p))
! 610:
! 611: #if HAVE_STRINGIZE
! 612: #define REFERENCE(fun) \
! 613: p->reference = (tryfun_t) fun; \
! 614: p->reference_name = #fun
! 615: #define VALIDATE(fun) \
! 616: p->validate = fun; \
! 617: p->validate_name = #fun
! 618: #else
! 619: #define REFERENCE(fun) \
! 620: p->reference = (tryfun_t) fun; \
! 621: p->reference_name = "fun"
! 622: #define VALIDATE(fun) \
! 623: p->validate = fun; \
! 624: p->validate_name = "fun"
! 625: #endif
! 626:
! 627:
! 628: p = ¶m[TYPE_ADD_N];
! 629: p->retval = 1;
! 630: p->dst[0] = 1;
! 631: p->src[0] = 1;
! 632: p->src[1] = 1;
! 633: REFERENCE (refmpn_add_n);
! 634:
! 635: p = ¶m[TYPE_ADD_NC];
! 636: COPY (TYPE_ADD_N);
! 637: p->carry = CARRY_BIT;
! 638: REFERENCE (refmpn_add_nc);
! 639:
! 640: p = ¶m[TYPE_SUB_N];
! 641: COPY (TYPE_ADD_N);
! 642: REFERENCE (refmpn_sub_n);
! 643:
! 644: p = ¶m[TYPE_SUB_NC];
! 645: COPY (TYPE_ADD_NC);
! 646: REFERENCE (refmpn_sub_nc);
! 647:
! 648: p = ¶m[TYPE_ADD];
! 649: COPY (TYPE_ADD_N);
! 650: p->size = SIZE_ALLOW_ZERO;
! 651: p->size2 = 1;
! 652: REFERENCE (refmpn_add);
! 653:
! 654: p = ¶m[TYPE_SUB];
! 655: COPY (TYPE_ADD);
! 656: REFERENCE (refmpn_sub);
! 657:
! 658:
! 659: p = ¶m[TYPE_MUL_1];
! 660: p->retval = 1;
! 661: p->dst[0] = 1;
! 662: p->src[0] = 1;
! 663: p->multiplier = 1;
! 664: p->overlap = OVERLAP_LOW_TO_HIGH;
! 665: REFERENCE (refmpn_mul_1);
! 666:
! 667: p = ¶m[TYPE_MUL_1C];
! 668: COPY (TYPE_MUL_1);
! 669: p->carry = CARRY_LIMB;
! 670: REFERENCE (refmpn_mul_1c);
! 671:
! 672:
! 673: p = ¶m[TYPE_MUL_2];
! 674: p->retval = 1;
! 675: p->dst[0] = 1;
! 676: p->dst_size[0] = SIZE_PLUS_1;
! 677: p->src[0] = 1;
! 678: p->src[1] = 1;
! 679: p->size2 = SIZE_2;
! 680: p->overlap = OVERLAP_NOT_SRC2;
! 681: REFERENCE (refmpn_mul_2);
! 682:
! 683:
! 684: p = ¶m[TYPE_ADDMUL_1];
! 685: p->retval = 1;
! 686: p->dst[0] = 1;
! 687: p->src[0] = 1;
! 688: p->multiplier = 1;
! 689: p->dst0_from_src1 = 1;
! 690: REFERENCE (refmpn_addmul_1);
! 691:
! 692: p = ¶m[TYPE_ADDMUL_1C];
! 693: COPY (TYPE_ADDMUL_1);
! 694: p->carry = CARRY_LIMB;
! 695: REFERENCE (refmpn_addmul_1c);
! 696:
! 697: p = ¶m[TYPE_SUBMUL_1];
! 698: COPY (TYPE_ADDMUL_1);
! 699: REFERENCE (refmpn_submul_1);
! 700:
! 701: p = ¶m[TYPE_SUBMUL_1C];
! 702: COPY (TYPE_ADDMUL_1C);
! 703: REFERENCE (refmpn_submul_1c);
! 704:
! 705:
! 706: p = ¶m[TYPE_AND_N];
! 707: p->dst[0] = 1;
! 708: p->src[0] = 1;
! 709: p->src[1] = 1;
! 710: REFERENCE (refmpn_and_n);
! 711:
! 712: p = ¶m[TYPE_ANDN_N];
! 713: COPY (TYPE_AND_N);
! 714: REFERENCE (refmpn_andn_n);
! 715:
! 716: p = ¶m[TYPE_NAND_N];
! 717: COPY (TYPE_AND_N);
! 718: REFERENCE (refmpn_nand_n);
! 719:
! 720: p = ¶m[TYPE_IOR_N];
! 721: COPY (TYPE_AND_N);
! 722: REFERENCE (refmpn_ior_n);
! 723:
! 724: p = ¶m[TYPE_IORN_N];
! 725: COPY (TYPE_AND_N);
! 726: REFERENCE (refmpn_iorn_n);
! 727:
! 728: p = ¶m[TYPE_NIOR_N];
! 729: COPY (TYPE_AND_N);
! 730: REFERENCE (refmpn_nior_n);
! 731:
! 732: p = ¶m[TYPE_XOR_N];
! 733: COPY (TYPE_AND_N);
! 734: REFERENCE (refmpn_xor_n);
! 735:
! 736: p = ¶m[TYPE_XNOR_N];
! 737: COPY (TYPE_AND_N);
! 738: REFERENCE (refmpn_xnor_n);
! 739:
! 740:
! 741: p = ¶m[TYPE_ADDSUB_N];
! 742: p->retval = 1;
! 743: p->dst[0] = 1;
! 744: p->dst[1] = 1;
! 745: p->src[0] = 1;
! 746: p->src[1] = 1;
! 747: REFERENCE (refmpn_addsub_n);
! 748:
! 749: p = ¶m[TYPE_ADDSUB_NC];
! 750: COPY (TYPE_ADDSUB_N);
! 751: p->carry = CARRY_4;
! 752: REFERENCE (refmpn_addsub_nc);
! 753:
! 754:
! 755: p = ¶m[TYPE_COPY];
! 756: p->dst[0] = 1;
! 757: p->src[0] = 1;
! 758: p->overlap = OVERLAP_NONE;
! 759: p->size = SIZE_ALLOW_ZERO;
! 760: REFERENCE (refmpn_copy);
! 761:
! 762: p = ¶m[TYPE_COPYI];
! 763: p->dst[0] = 1;
! 764: p->src[0] = 1;
! 765: p->overlap = OVERLAP_LOW_TO_HIGH;
! 766: p->size = SIZE_ALLOW_ZERO;
! 767: REFERENCE (refmpn_copyi);
! 768:
! 769: p = ¶m[TYPE_COPYD];
! 770: p->dst[0] = 1;
! 771: p->src[0] = 1;
! 772: p->overlap = OVERLAP_HIGH_TO_LOW;
! 773: p->size = SIZE_ALLOW_ZERO;
! 774: REFERENCE (refmpn_copyd);
! 775:
! 776: p = ¶m[TYPE_COM_N];
! 777: p->dst[0] = 1;
! 778: p->src[0] = 1;
! 779: REFERENCE (refmpn_com_n);
! 780:
! 781:
! 782: p = ¶m[TYPE_MOD_1];
! 783: p->retval = 1;
! 784: p->src[0] = 1;
! 785: p->size = SIZE_ALLOW_ZERO;
! 786: p->divisor = DIVISOR_LIMB;
! 787: REFERENCE (refmpn_mod_1);
! 788:
! 789: p = ¶m[TYPE_MOD_1C];
! 790: COPY (TYPE_MOD_1);
! 791: p->carry = CARRY_DIVISOR;
! 792: REFERENCE (refmpn_mod_1c);
! 793:
! 794: p = ¶m[TYPE_DIVMOD_1];
! 795: COPY (TYPE_MOD_1);
! 796: p->dst[0] = 1;
! 797: REFERENCE (refmpn_divmod_1);
! 798:
! 799: p = ¶m[TYPE_DIVMOD_1C];
! 800: COPY (TYPE_DIVMOD_1);
! 801: p->carry = CARRY_DIVISOR;
! 802: REFERENCE (refmpn_divmod_1c);
! 803:
! 804: p = ¶m[TYPE_DIVREM_1];
! 805: COPY (TYPE_DIVMOD_1);
! 806: p->size2 = SIZE_FRACTION;
! 807: p->dst_size[0] = SIZE_SUM;
! 808: REFERENCE (refmpn_divrem_1);
! 809:
! 810: p = ¶m[TYPE_DIVREM_1C];
! 811: COPY (TYPE_DIVREM_1);
! 812: p->carry = CARRY_DIVISOR;
! 813: REFERENCE (refmpn_divrem_1c);
! 814:
! 815: p = ¶m[TYPE_PREINV_DIVREM_1];
! 816: COPY (TYPE_DIVREM_1);
! 817: p->size = SIZE_YES; /* ie. no size==0 */
! 818: REFERENCE (refmpn_preinv_divrem_1);
! 819:
! 820: p = ¶m[TYPE_PREINV_MOD_1];
! 821: p->retval = 1;
! 822: p->src[0] = 1;
! 823: p->divisor = DIVISOR_NORM;
! 824: REFERENCE (refmpn_preinv_mod_1);
! 825:
! 826: p = ¶m[TYPE_MOD_34LSUB1];
! 827: p->retval = 1;
! 828: p->src[0] = 1;
! 829: VALIDATE (validate_mod_34lsub1);
! 830:
! 831: p = ¶m[TYPE_UDIV_QRNND];
! 832: p->retval = 1;
! 833: p->src[0] = 1;
! 834: p->dst[0] = 1;
! 835: p->dst_size[0] = SIZE_1;
! 836: p->divisor = DIVISOR_LIMB;
! 837: p->data = DATA_UDIV_QRNND;
! 838: p->overlap = OVERLAP_NONE;
! 839: REFERENCE (refmpn_udiv_qrnnd);
! 840:
! 841:
! 842: p = ¶m[TYPE_DIVEXACT_1];
! 843: p->dst[0] = 1;
! 844: p->src[0] = 1;
! 845: p->divisor = 1;
! 846: p->data = DATA_MULTIPLE_DIVISOR;
! 847: VALIDATE (validate_divexact_1);
! 848: REFERENCE (refmpn_divmod_1);
! 849:
! 850:
! 851: p = ¶m[TYPE_DIVEXACT_BY3];
! 852: p->retval = 1;
! 853: p->dst[0] = 1;
! 854: p->src[0] = 1;
! 855: REFERENCE (refmpn_divexact_by3);
! 856:
! 857: p = ¶m[TYPE_DIVEXACT_BY3C];
! 858: COPY (TYPE_DIVEXACT_BY3);
! 859: p->carry = CARRY_3;
! 860: REFERENCE (refmpn_divexact_by3c);
! 861:
! 862:
! 863: p = ¶m[TYPE_MODEXACT_1_ODD];
! 864: p->retval = 1;
! 865: p->src[0] = 1;
! 866: p->divisor = DIVISOR_ODD;
! 867: VALIDATE (validate_modexact_1_odd);
! 868:
! 869: p = ¶m[TYPE_MODEXACT_1C_ODD];
! 870: COPY (TYPE_MODEXACT_1_ODD);
! 871: p->carry = CARRY_LIMB;
! 872: VALIDATE (validate_modexact_1c_odd);
! 873:
! 874:
! 875: p = ¶m[TYPE_GCD_1];
! 876: p->retval = 1;
! 877: p->src[0] = 1;
! 878: p->data = DATA_NON_ZERO;
! 879: p->divisor = DIVISOR_LIMB;
! 880: REFERENCE (refmpn_gcd_1);
! 881:
! 882: p = ¶m[TYPE_GCD];
! 883: p->retval = 1;
! 884: p->dst[0] = 1;
! 885: p->src[0] = 1;
! 886: p->src[1] = 1;
! 887: p->size2 = 1;
! 888: p->dst_size[0] = SIZE_RETVAL;
! 889: p->overlap = OVERLAP_NOT_SRCS;
! 890: p->data = DATA_GCD;
! 891: REFERENCE (refmpn_gcd);
! 892:
! 893: /* FIXME: size==2 */
! 894: p = ¶m[TYPE_GCD_FINDA];
! 895: p->retval = 1;
! 896: p->src[0] = 1;
! 897: REFERENCE (refmpn_gcd_finda);
! 898:
! 899:
! 900: p = ¶m[TYPE_MPZ_JACOBI];
! 901: p->retval = 1;
! 902: p->src[0] = 1;
! 903: p->size = SIZE_ALLOW_ZERO;
! 904: p->src[1] = 1;
! 905: p->size2 = 1;
! 906: p->carry = CARRY_4;
! 907: p->carry_sign = 1;
! 908: REFERENCE (refmpz_jacobi);
! 909:
! 910: p = ¶m[TYPE_MPZ_KRONECKER];
! 911: COPY (TYPE_MPZ_JACOBI);
! 912: REFERENCE (refmpz_kronecker);
! 913:
! 914:
! 915: p = ¶m[TYPE_MPZ_KRONECKER_UI];
! 916: p->retval = 1;
! 917: p->src[0] = 1;
! 918: p->size = SIZE_ALLOW_ZERO;
! 919: p->multiplier = 1;
! 920: p->carry = CARRY_BIT;
! 921: REFERENCE (refmpz_kronecker_ui);
! 922:
! 923: p = ¶m[TYPE_MPZ_KRONECKER_SI];
! 924: COPY (TYPE_MPZ_KRONECKER_UI);
! 925: REFERENCE (refmpz_kronecker_si);
! 926:
! 927: p = ¶m[TYPE_MPZ_UI_KRONECKER];
! 928: COPY (TYPE_MPZ_KRONECKER_UI);
! 929: REFERENCE (refmpz_ui_kronecker);
! 930:
! 931: p = ¶m[TYPE_MPZ_SI_KRONECKER];
! 932: COPY (TYPE_MPZ_KRONECKER_UI);
! 933: REFERENCE (refmpz_si_kronecker);
! 934:
! 935:
! 936: p = ¶m[TYPE_SQR];
! 937: p->dst[0] = 1;
! 938: p->src[0] = 1;
! 939: p->dst_size[0] = SIZE_SUM;
! 940: p->overlap = OVERLAP_NONE;
! 941: REFERENCE (refmpn_sqr);
! 942:
! 943: p = ¶m[TYPE_MUL_N];
! 944: COPY (TYPE_SQR);
! 945: p->src[1] = 1;
! 946: REFERENCE (refmpn_mul_n);
! 947:
! 948: p = ¶m[TYPE_MUL_BASECASE];
! 949: COPY (TYPE_MUL_N);
! 950: p->size2 = 1;
! 951: REFERENCE (refmpn_mul_basecase);
! 952:
! 953: p = ¶m[TYPE_UMUL_PPMM];
! 954: p->retval = 1;
! 955: p->src[0] = 1;
! 956: p->dst[0] = 1;
! 957: p->dst_size[0] = SIZE_1;
! 958: p->overlap = OVERLAP_NONE;
! 959: REFERENCE (refmpn_umul_ppmm);
! 960:
! 961: p = ¶m[TYPE_UMUL_PPMM_R];
! 962: COPY (TYPE_UMUL_PPMM);
! 963: REFERENCE (refmpn_umul_ppmm_r);
! 964:
! 965:
! 966: p = ¶m[TYPE_RSHIFT];
! 967: p->retval = 1;
! 968: p->dst[0] = 1;
! 969: p->src[0] = 1;
! 970: p->shift = 1;
! 971: p->overlap = OVERLAP_LOW_TO_HIGH;
! 972: REFERENCE (refmpn_rshift);
! 973:
! 974: p = ¶m[TYPE_LSHIFT];
! 975: COPY (TYPE_RSHIFT);
! 976: p->overlap = OVERLAP_HIGH_TO_LOW;
! 977: REFERENCE (refmpn_lshift);
! 978:
! 979:
! 980: p = ¶m[TYPE_POPCOUNT];
! 981: p->retval = 1;
! 982: p->src[0] = 1;
! 983: REFERENCE (refmpn_popcount);
! 984:
! 985: p = ¶m[TYPE_HAMDIST];
! 986: COPY (TYPE_POPCOUNT);
! 987: p->src[1] = 1;
! 988: REFERENCE (refmpn_hamdist);
! 989:
! 990:
! 991: p = ¶m[TYPE_SB_DIVREM_MN];
! 992: p->retval = 1;
! 993: p->dst[0] = 1;
! 994: p->dst[1] = 1;
! 995: p->src[0] = 1;
! 996: p->src[1] = 1;
! 997: p->data = DATA_SRC1_HIGHBIT;
! 998: p->size2 = 1;
! 999: p->dst_size[0] = SIZE_DIFF;
! 1000: p->overlap = OVERLAP_NONE;
! 1001: REFERENCE (refmpn_sb_divrem_mn);
! 1002:
! 1003: p = ¶m[TYPE_TDIV_QR];
! 1004: p->dst[0] = 1;
! 1005: p->dst[1] = 1;
! 1006: p->src[0] = 1;
! 1007: p->src[1] = 1;
! 1008: p->size2 = 1;
! 1009: p->dst_size[0] = SIZE_DIFF_PLUS_1;
! 1010: p->dst_size[1] = SIZE_SIZE2;
! 1011: p->overlap = OVERLAP_NONE;
! 1012: REFERENCE (refmpn_tdiv_qr);
! 1013:
! 1014: p = ¶m[TYPE_SQRTREM];
! 1015: p->retval = 1;
! 1016: p->dst[0] = 1;
! 1017: p->dst[1] = 1;
! 1018: p->src[0] = 1;
! 1019: p->dst_size[0] = SIZE_CEIL_HALF;
! 1020: p->dst_size[1] = SIZE_RETVAL;
! 1021: p->overlap = OVERLAP_NONE;
! 1022: VALIDATE (validate_sqrtrem);
! 1023:
! 1024: p = ¶m[TYPE_ZERO];
! 1025: p->dst[0] = 1;
! 1026: p->size = SIZE_ALLOW_ZERO;
! 1027: REFERENCE (refmpn_zero);
! 1028:
! 1029: p = ¶m[TYPE_GET_STR];
! 1030: p->src[0] = 1;
! 1031: p->size = SIZE_ALLOW_ZERO;
! 1032: p->dst[0] = 1;
! 1033: p->dst[1] = 1;
! 1034: p->dst_size[0] = SIZE_GET_STR;
! 1035: p->dst_bytes[0] = 1;
! 1036: p->overlap = OVERLAP_NONE;
! 1037: REFERENCE (refmpn_get_str);
! 1038:
! 1039: #ifdef EXTRA_PARAM_INIT
! 1040: EXTRA_PARAM_INIT
! 1041: #endif
! 1042: }
! 1043:
! 1044:
! 1045: /* The following are macros if there's no native versions, so wrap them in
! 1046: functions that can be in try_array[]. */
! 1047:
! 1048: void
! 1049: MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 1050: { MPN_COPY (rp, sp, size); }
! 1051:
! 1052: void
! 1053: MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 1054: { MPN_COPY_INCR (rp, sp, size); }
! 1055:
! 1056: void
! 1057: MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 1058: { MPN_COPY_DECR (rp, sp, size); }
! 1059:
! 1060: void
! 1061: __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 1062: { __GMPN_COPY (rp, sp, size); }
! 1063:
! 1064: #ifdef __GMPN_COPY_INCR
! 1065: void
! 1066: __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 1067: { __GMPN_COPY_INCR (rp, sp, size); }
! 1068: #endif
! 1069:
! 1070: void
! 1071: mpn_com_n_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 1072: { mpn_com_n (rp, sp, size); }
! 1073:
! 1074: void
! 1075: mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
! 1076: { mpn_and_n (rp, s1, s2, size); }
! 1077:
! 1078: void
! 1079: mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
! 1080: { mpn_andn_n (rp, s1, s2, size); }
! 1081:
! 1082: void
! 1083: mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
! 1084: { mpn_nand_n (rp, s1, s2, size); }
! 1085:
! 1086: void
! 1087: mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
! 1088: { mpn_ior_n (rp, s1, s2, size); }
! 1089:
! 1090: void
! 1091: mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
! 1092: { mpn_iorn_n (rp, s1, s2, size); }
! 1093:
! 1094: void
! 1095: mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
! 1096: { mpn_nior_n (rp, s1, s2, size); }
! 1097:
! 1098: void
! 1099: mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
! 1100: { mpn_xor_n (rp, s1, s2, size); }
! 1101:
! 1102: void
! 1103: mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
! 1104: { mpn_xnor_n (rp, s1, s2, size); }
! 1105:
! 1106: mp_limb_t
! 1107: udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
! 1108: {
! 1109: mp_limb_t q;
! 1110: udiv_qrnnd (q, *remptr, n1, n0, d);
! 1111: return q;
! 1112: }
! 1113:
! 1114: void
! 1115: mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 1116: { mpn_divexact_by3 (rp, sp, size); }
! 1117:
! 1118: void
! 1119: mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
! 1120: { mpn_modexact_1_odd (ptr, size, divisor); }
! 1121:
! 1122: void
! 1123: mpn_kara_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
! 1124: {
! 1125: mp_ptr tspace;
! 1126: TMP_DECL (marker);
! 1127: TMP_MARK (marker);
! 1128: tspace = TMP_ALLOC_LIMBS (MPN_KARA_MUL_N_TSIZE (size));
! 1129: mpn_kara_mul_n (dst, src1, src2, size, tspace);
! 1130: }
! 1131: void
! 1132: mpn_kara_sqr_n_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
! 1133: {
! 1134: mp_ptr tspace;
! 1135: TMP_DECL (marker);
! 1136: TMP_MARK (marker);
! 1137: tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (size));
! 1138: mpn_kara_sqr_n (dst, src, size, tspace);
! 1139: TMP_FREE (marker);
! 1140: }
! 1141: void
! 1142: mpn_toom3_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
! 1143: {
! 1144: mp_ptr tspace;
! 1145: TMP_DECL (marker);
! 1146: TMP_MARK (marker);
! 1147: tspace = TMP_ALLOC_LIMBS (MPN_TOOM3_MUL_N_TSIZE (size));
! 1148: mpn_toom3_mul_n (dst, src1, src2, size, tspace);
! 1149: }
! 1150: void
! 1151: mpn_toom3_sqr_n_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
! 1152: {
! 1153: mp_ptr tspace;
! 1154: TMP_DECL (marker);
! 1155: TMP_MARK (marker);
! 1156: tspace = TMP_ALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (size));
! 1157: mpn_toom3_sqr_n (dst, src, size, tspace);
! 1158: TMP_FREE (marker);
! 1159: }
! 1160: mp_limb_t
! 1161: umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
! 1162: {
! 1163: mp_limb_t high;
! 1164: umul_ppmm (high, *lowptr, m1, m2);
! 1165: return high;
! 1166: }
! 1167: mp_limb_t
! 1168: mpn_umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
! 1169: {
! 1170: mp_limb_t high;
! 1171: umul_ppmm (high, *lowptr, m1, m2);
! 1172: return high;
! 1173: }
! 1174:
! 1175: void
! 1176: MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
! 1177: { MPN_ZERO (ptr, size); }
! 1178:
! 1179:
! 1180: struct choice_t {
! 1181: const char *name;
! 1182: tryfun_t function;
! 1183: int type;
! 1184: mp_size_t minsize;
! 1185: };
! 1186:
! 1187: #if HAVE_STRINGIZE
! 1188: #define TRY(fun) #fun, (tryfun_t) fun
! 1189: #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
! 1190: #else
! 1191: #define TRY(fun) "fun", (tryfun_t) fun
! 1192: #define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun
! 1193: #endif
! 1194:
! 1195: const struct choice_t choice_array[] = {
! 1196: { TRY(mpn_add), TYPE_ADD },
! 1197: { TRY(mpn_sub), TYPE_SUB },
! 1198:
! 1199: { TRY(mpn_add_n), TYPE_ADD_N },
! 1200: { TRY(mpn_sub_n), TYPE_SUB_N },
! 1201:
! 1202: #if HAVE_NATIVE_mpn_add_nc
! 1203: { TRY(mpn_add_nc), TYPE_ADD_NC },
! 1204: #endif
! 1205: #if HAVE_NATIVE_mpn_sub_nc
! 1206: { TRY(mpn_sub_nc), TYPE_SUB_NC },
! 1207: #endif
! 1208:
! 1209: { TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
! 1210: { TRY(mpn_submul_1), TYPE_SUBMUL_1 },
! 1211: #if HAVE_NATIVE_mpn_addmul_1c
! 1212: { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
! 1213: #endif
! 1214: #if HAVE_NATIVE_mpn_submul_1c
! 1215: { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
! 1216: #endif
! 1217:
! 1218: { TRY_FUNFUN(mpn_com_n), TYPE_COM_N },
! 1219:
! 1220: { TRY_FUNFUN(MPN_COPY), TYPE_COPY },
! 1221: { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
! 1222: { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
! 1223:
! 1224: { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY },
! 1225: #ifdef __GMPN_COPY_INCR
! 1226: { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
! 1227: #endif
! 1228:
! 1229: { TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
! 1230: { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
! 1231: { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
! 1232: { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N },
! 1233: { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
! 1234: { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
! 1235: { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N },
! 1236: { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
! 1237:
! 1238: { TRY(mpn_divrem_1), TYPE_DIVREM_1 },
! 1239: #if USE_PREINV_DIVREM_1
! 1240: { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
! 1241: #endif
! 1242: { TRY(mpn_mod_1), TYPE_MOD_1 },
! 1243: { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
! 1244: #if HAVE_NATIVE_mpn_divrem_1c
! 1245: { TRY(mpn_divrem_1c), TYPE_DIVREM_1C },
! 1246: #endif
! 1247: #if HAVE_NATIVE_mpn_mod_1c
! 1248: { TRY(mpn_mod_1c), TYPE_MOD_1C },
! 1249: #endif
! 1250: { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 },
! 1251: { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
! 1252:
! 1253: { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
! 1254: { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
! 1255: { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
! 1256:
! 1257: { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
! 1258: { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD },
! 1259:
! 1260:
! 1261: { TRY(mpn_sb_divrem_mn), TYPE_SB_DIVREM_MN, 3},
! 1262: { TRY(mpn_tdiv_qr), TYPE_TDIV_QR },
! 1263:
! 1264: { TRY(mpn_mul_1), TYPE_MUL_1 },
! 1265: #if HAVE_NATIVE_mpn_mul_1c
! 1266: { TRY(mpn_mul_1c), TYPE_MUL_1C },
! 1267: #endif
! 1268: #if HAVE_NATIVE_mpn_mul_2
! 1269: { TRY(mpn_mul_2), TYPE_MUL_2 },
! 1270: #endif
! 1271:
! 1272: { TRY(mpn_rshift), TYPE_RSHIFT },
! 1273: { TRY(mpn_lshift), TYPE_LSHIFT },
! 1274:
! 1275:
! 1276: { TRY(mpn_mul_basecase), TYPE_MUL_BASECASE },
! 1277: { TRY(mpn_sqr_basecase), TYPE_SQR },
! 1278:
! 1279: { TRY(mpn_mul), TYPE_MUL_BASECASE },
! 1280: { TRY(mpn_mul_n), TYPE_MUL_N },
! 1281: { TRY(mpn_sqr_n), TYPE_SQR },
! 1282:
! 1283: { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
! 1284:
! 1285: { TRY_FUNFUN(mpn_kara_mul_n), TYPE_MUL_N, MPN_KARA_MUL_N_MINSIZE },
! 1286: { TRY_FUNFUN(mpn_kara_sqr_n), TYPE_SQR, MPN_KARA_SQR_N_MINSIZE },
! 1287: { TRY_FUNFUN(mpn_toom3_mul_n), TYPE_MUL_N, MPN_TOOM3_MUL_N_MINSIZE },
! 1288: { TRY_FUNFUN(mpn_toom3_sqr_n), TYPE_SQR, MPN_TOOM3_SQR_N_MINSIZE },
! 1289:
! 1290: { TRY(mpn_gcd_1), TYPE_GCD_1 },
! 1291: { TRY(mpn_gcd), TYPE_GCD },
! 1292: #if HAVE_NATIVE_mpn_gcd_finda
! 1293: { TRY(mpn_gcd_finda), TYPE_GCD_FINDA },
! 1294: #endif
! 1295: { TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
! 1296: { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
! 1297: { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
! 1298: { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
! 1299: { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
! 1300:
! 1301: { TRY(mpn_popcount), TYPE_POPCOUNT },
! 1302: { TRY(mpn_hamdist), TYPE_HAMDIST },
! 1303:
! 1304: { TRY(mpn_sqrtrem), TYPE_SQRTREM },
! 1305:
! 1306: { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
! 1307:
! 1308: { TRY(mpn_get_str), TYPE_GET_STR },
! 1309:
! 1310: #ifdef EXTRA_ROUTINES
! 1311: EXTRA_ROUTINES
! 1312: #endif
! 1313: };
! 1314:
! 1315: const struct choice_t *choice = NULL;
! 1316:
! 1317:
! 1318: void
! 1319: mprotect_maybe (void *addr, size_t len, int prot)
! 1320: {
! 1321: if (!option_redzones)
! 1322: return;
! 1323:
! 1324: #if HAVE_MPROTECT
! 1325: if (mprotect (addr, len, prot) != 0)
! 1326: {
! 1327: fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X\n", addr, len, prot);
! 1328: exit (1);
! 1329: }
! 1330: #else
! 1331: {
! 1332: static int warned = 0;
! 1333: if (!warned)
! 1334: {
! 1335: fprintf (stderr,
! 1336: "mprotect not available, bounds testing not performed\n");
! 1337: warned = 1;
! 1338: }
! 1339: }
! 1340: #endif
! 1341: }
! 1342:
! 1343: /* round "a" up to a multiple of "m" */
! 1344: size_t
! 1345: round_up_multiple (size_t a, size_t m)
! 1346: {
! 1347: unsigned long r;
! 1348:
! 1349: r = a % m;
! 1350: if (r == 0)
! 1351: return a;
! 1352: else
! 1353: return a + (m - r);
! 1354: }
! 1355:
! 1356:
! 1357: /* On some systems it seems that only an mmap'ed region can be mprotect'ed,
! 1358: for instance HP-UX 10.
! 1359:
! 1360: mmap will almost certainly return a pointer already aligned to a page
! 1361: boundary, but it's easy enough to share the alignment handling with the
! 1362: malloc case. */
! 1363:
! 1364: void
! 1365: malloc_region (struct region_t *r, mp_size_t n)
! 1366: {
! 1367: mp_ptr p;
! 1368: size_t nbytes;
! 1369:
! 1370: ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
! 1371:
! 1372: n = round_up_multiple (n, PAGESIZE_LIMBS);
! 1373: r->size = n;
! 1374:
! 1375: nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;
! 1376:
! 1377: #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
! 1378: #define MAP_ANON MAP_ANONYMOUS
! 1379: #endif
! 1380:
! 1381: #if HAVE_MMAP && defined (MAP_ANON)
! 1382: /* note must pass fd=-1 for MAP_ANON on BSD */
! 1383: p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
! 1384: if (p == (void *) -1)
! 1385: {
! 1386: fprintf (stderr, "Cannot mmap %#x anon bytes\n", nbytes);
! 1387: exit (1);
! 1388: }
! 1389: #else
! 1390: p = malloc (nbytes);
! 1391: ASSERT_ALWAYS (p != NULL);
! 1392: #endif
! 1393:
! 1394: p = align_pointer (p, pagesize);
! 1395:
! 1396: mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
! 1397: p += REDZONE_LIMBS;
! 1398: r->ptr = p;
! 1399:
! 1400: mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
! 1401: }
! 1402:
! 1403: void
! 1404: mprotect_region (const struct region_t *r, int prot)
! 1405: {
! 1406: mprotect_maybe (r->ptr, r->size, prot);
! 1407: }
! 1408:
! 1409:
! 1410: /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
! 1411: and CARRY_4 */
! 1412: mp_limb_t carry_array[] = {
! 1413: 0, 1, 2, 3,
! 1414: 4,
! 1415: CNST_LIMB(1) << 8,
! 1416: CNST_LIMB(1) << 16,
! 1417: GMP_NUMB_MAX
! 1418: };
! 1419: int carry_index;
! 1420:
! 1421: #define CARRY_COUNT \
! 1422: ((tr->carry == CARRY_BIT) ? 2 \
! 1423: : tr->carry == CARRY_3 ? 3 \
! 1424: : tr->carry == CARRY_4 ? 4 \
! 1425: : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \
! 1426: ? numberof(carry_array) + CARRY_RANDOMS \
! 1427: : 1)
! 1428:
! 1429: #define MPN_RANDOM_ALT(index,dst,size) \
! 1430: (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
! 1431:
! 1432: /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
! 1433: the same type */
! 1434: #define CARRY_ITERATION \
! 1435: for (carry_index = 0; \
! 1436: (carry_index < numberof (carry_array) \
! 1437: ? (carry = carry_array[carry_index]) \
! 1438: : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \
! 1439: (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \
! 1440: carry_index < CARRY_COUNT; \
! 1441: carry_index++)
! 1442:
! 1443:
! 1444: mp_limb_t multiplier_array[] = {
! 1445: 0, 1, 2, 3,
! 1446: CNST_LIMB(1) << 8,
! 1447: CNST_LIMB(1) << 16,
! 1448: GMP_NUMB_MAX - 2,
! 1449: GMP_NUMB_MAX - 1,
! 1450: GMP_NUMB_MAX
! 1451: };
! 1452: int multiplier_index;
! 1453:
! 1454: mp_limb_t divisor_array[] = {
! 1455: 1, 2, 3,
! 1456: CNST_LIMB(1) << 8,
! 1457: CNST_LIMB(1) << 16,
! 1458: GMP_NUMB_HIGHBIT,
! 1459: GMP_NUMB_HIGHBIT + 1,
! 1460: GMP_NUMB_MAX - 2,
! 1461: GMP_NUMB_MAX - 1,
! 1462: GMP_NUMB_MAX
! 1463: };
! 1464:
! 1465: int divisor_index;
! 1466:
! 1467: /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
! 1468: the same type */
! 1469: #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \
! 1470: for (index = 0; \
! 1471: (index < numberof (array) \
! 1472: ? (var = array[index]) \
! 1473: : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \
! 1474: index < limit; \
! 1475: index++)
! 1476:
! 1477: #define MULTIPLIER_COUNT \
! 1478: (tr->multiplier \
! 1479: ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \
! 1480: : 1)
! 1481:
! 1482: #define MULTIPLIER_ITERATION \
! 1483: ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \
! 1484: multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
! 1485:
! 1486: #define DIVISOR_COUNT \
! 1487: (tr->divisor \
! 1488: ? numberof (divisor_array) + DIVISOR_RANDOMS \
! 1489: : 1)
! 1490:
! 1491: #define DIVISOR_ITERATION \
! 1492: ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
! 1493: DIVISOR_RANDOMS, TRY_DIVISOR)
! 1494:
! 1495:
! 1496: /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
! 1497: d[0] or d[1] respectively, -1 means a separate (write-protected)
! 1498: location. */
! 1499:
! 1500: struct overlap_t {
! 1501: int s[NUM_SOURCES];
! 1502: } overlap_array[] = {
! 1503: { { -1, -1 } },
! 1504: { { 0, -1 } },
! 1505: { { -1, 0 } },
! 1506: { { 0, 0 } },
! 1507: { { 1, -1 } },
! 1508: { { -1, 1 } },
! 1509: { { 1, 1 } },
! 1510: { { 0, 1 } },
! 1511: { { 1, 0 } },
! 1512: };
! 1513:
! 1514: struct overlap_t *overlap, *overlap_limit;
! 1515:
! 1516: #define OVERLAP_COUNT \
! 1517: (tr->overlap & OVERLAP_NONE ? 1 \
! 1518: : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
! 1519: : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
! 1520: : tr->dst[1] ? 9 \
! 1521: : tr->src[1] ? 4 \
! 1522: : tr->dst[0] ? 2 \
! 1523: : 1)
! 1524:
! 1525: #define OVERLAP_ITERATION \
! 1526: for (overlap = &overlap_array[0], \
! 1527: overlap_limit = &overlap_array[OVERLAP_COUNT]; \
! 1528: overlap < overlap_limit; \
! 1529: overlap++)
! 1530:
! 1531:
! 1532: int base = 10;
! 1533:
! 1534: #define T_RAND_COUNT 2
! 1535: int t_rand;
! 1536:
! 1537: void
! 1538: t_random (mp_ptr ptr, mp_size_t n)
! 1539: {
! 1540: if (n == 0)
! 1541: return;
! 1542:
! 1543: switch (option_data) {
! 1544: case DATA_TRAND:
! 1545: switch (t_rand) {
! 1546: case 0: refmpn_random (ptr, n); break;
! 1547: case 1: refmpn_random2 (ptr, n); break;
! 1548: default: abort();
! 1549: }
! 1550: break;
! 1551: case DATA_SEQ:
! 1552: {
! 1553: static mp_limb_t counter = 0;
! 1554: mp_size_t i;
! 1555: for (i = 0; i < n; i++)
! 1556: ptr[i] = ++counter;
! 1557: }
! 1558: break;
! 1559: case DATA_ZEROS:
! 1560: refmpn_zero (ptr, n);
! 1561: break;
! 1562: case DATA_FFS:
! 1563: refmpn_fill (ptr, n, GMP_NUMB_MAX);
! 1564: break;
! 1565: case DATA_2FD:
! 1566: /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
! 1567: inducing the q1_ff special case in the mul-by-inverse part of some
! 1568: versions of divrem_1 and mod_1. */
! 1569: refmpn_fill (ptr, n, (mp_limb_t) -1);
! 1570: ptr[n-1] = 2;
! 1571: ptr[0] -= 2;
! 1572: break;
! 1573:
! 1574: default:
! 1575: abort();
! 1576: }
! 1577: }
! 1578: #define T_RAND_ITERATION \
! 1579: for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
! 1580:
! 1581:
! 1582: void
! 1583: print_each (const struct each_t *e)
! 1584: {
! 1585: int i;
! 1586:
! 1587: printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
! 1588: if (tr->retval)
! 1589: mpn_trace (" retval", &e->retval, 1);
! 1590:
! 1591: for (i = 0; i < NUM_DESTS; i++)
! 1592: {
! 1593: if (tr->dst[i])
! 1594: {
! 1595: if (tr->dst_bytes[i])
! 1596: byte_tracen (" d[%d]", i, e->d[i].p, d[i].size);
! 1597: else
! 1598: mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size);
! 1599: printf (" located %p\n", e->d[i].p);
! 1600: }
! 1601: }
! 1602:
! 1603: for (i = 0; i < NUM_SOURCES; i++)
! 1604: if (tr->src[i])
! 1605: printf (" s[%d] located %p\n", i, e->s[i].p);
! 1606: }
! 1607:
! 1608:
! 1609: void
! 1610: print_all (void)
! 1611: {
! 1612: int i;
! 1613:
! 1614: printf ("\n");
! 1615: printf ("size %ld\n", size);
! 1616: if (tr->size2)
! 1617: printf ("size2 %ld\n", size2);
! 1618:
! 1619: for (i = 0; i < NUM_DESTS; i++)
! 1620: if (d[i].size != size)
! 1621: printf ("d[%d].size %ld\n", i, d[i].size);
! 1622:
! 1623: if (tr->multiplier)
! 1624: mpn_trace (" multiplier", &multiplier, 1);
! 1625: if (tr->divisor)
! 1626: mpn_trace (" divisor", &divisor, 1);
! 1627: if (tr->shift)
! 1628: printf (" shift %lu\n", shift);
! 1629: if (tr->carry)
! 1630: mpn_trace (" carry", &carry, 1);
! 1631:
! 1632: for (i = 0; i < NUM_DESTS; i++)
! 1633: if (tr->dst[i])
! 1634: printf (" d[%d] %s, align %ld, size %ld\n",
! 1635: i, d[i].high ? "high" : "low", d[i].align, d[i].size);
! 1636:
! 1637: for (i = 0; i < NUM_SOURCES; i++)
! 1638: {
! 1639: if (tr->src[i])
! 1640: {
! 1641: printf (" s[%d] %s, align %ld, ",
! 1642: i, s[i].high ? "high" : "low", s[i].align);
! 1643: switch (overlap->s[i]) {
! 1644: case -1:
! 1645: printf ("no overlap\n");
! 1646: break;
! 1647: default:
! 1648: printf ("==d[%d]%s\n",
! 1649: overlap->s[i],
! 1650: tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
! 1651: : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
! 1652: : "");
! 1653: break;
! 1654: }
! 1655: printf (" s[%d]=", i);
! 1656: if (tr->carry_sign && (carry & (1 << i)))
! 1657: printf ("-");
! 1658: mpn_trace (NULL, s[i].p, SRC_SIZE(i));
! 1659: }
! 1660: }
! 1661:
! 1662: if (tr->dst0_from_src1)
! 1663: mpn_trace (" d[0]", s[1].region.ptr, size);
! 1664:
! 1665: if (tr->reference)
! 1666: print_each (&ref);
! 1667: print_each (&fun);
! 1668: }
! 1669:
! 1670: void
! 1671: compare (void)
! 1672: {
! 1673: int error = 0;
! 1674: int i;
! 1675:
! 1676: if (tr->retval && ref.retval != fun.retval)
! 1677: {
! 1678: printf ("Different return values (%lu, %lu)\n",
! 1679: ref.retval, fun.retval);
! 1680: error = 1;
! 1681: }
! 1682:
! 1683: for (i = 0; i < NUM_DESTS; i++)
! 1684: {
! 1685: switch (tr->dst_size[0]) {
! 1686: case SIZE_RETVAL:
! 1687: d[i].size = ref.retval;
! 1688: break;
! 1689: }
! 1690: }
! 1691:
! 1692: for (i = 0; i < NUM_DESTS; i++)
! 1693: {
! 1694: if (! tr->dst[i])
! 1695: continue;
! 1696:
! 1697: if (tr->dst_bytes[i])
! 1698: {
! 1699: if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
! 1700: {
! 1701: printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
! 1702: i,
! 1703: byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
! 1704: byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
! 1705: error = 1;
! 1706: }
! 1707: }
! 1708: else
! 1709: {
! 1710: if (d[i].size != 0
! 1711: && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
! 1712: {
! 1713: printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
! 1714: i,
! 1715: mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
! 1716: mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
! 1717: error = 1;
! 1718: }
! 1719: }
! 1720: }
! 1721:
! 1722: if (error)
! 1723: {
! 1724: print_all();
! 1725: abort();
! 1726: }
! 1727: }
! 1728:
! 1729:
! 1730: /* The functions are cast if the return value should be a long rather than
! 1731: the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This
! 1732: might not be enough if some actual calling conventions checking is
! 1733: implemented on a long long limb system. */
! 1734:
! 1735: void
! 1736: call (struct each_t *e, tryfun_t function)
! 1737: {
! 1738: switch (choice->type) {
! 1739: case TYPE_ADD:
! 1740: case TYPE_SUB:
! 1741: e->retval = CALLING_CONVENTIONS (function)
! 1742: (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
! 1743: break;
! 1744:
! 1745: case TYPE_ADD_N:
! 1746: case TYPE_SUB_N:
! 1747: e->retval = CALLING_CONVENTIONS (function)
! 1748: (e->d[0].p, e->s[0].p, e->s[1].p, size);
! 1749: break;
! 1750: case TYPE_ADD_NC:
! 1751: case TYPE_SUB_NC:
! 1752: e->retval = CALLING_CONVENTIONS (function)
! 1753: (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
! 1754: break;
! 1755:
! 1756: case TYPE_MUL_1:
! 1757: case TYPE_ADDMUL_1:
! 1758: case TYPE_SUBMUL_1:
! 1759: e->retval = CALLING_CONVENTIONS (function)
! 1760: (e->d[0].p, e->s[0].p, size, multiplier);
! 1761: break;
! 1762: case TYPE_MUL_1C:
! 1763: case TYPE_ADDMUL_1C:
! 1764: case TYPE_SUBMUL_1C:
! 1765: e->retval = CALLING_CONVENTIONS (function)
! 1766: (e->d[0].p, e->s[0].p, size, multiplier, carry);
! 1767: break;
! 1768:
! 1769: case TYPE_MUL_2:
! 1770: e->retval = CALLING_CONVENTIONS (function)
! 1771: (e->d[0].p, e->s[0].p, size, e->s[1].p);
! 1772: break;
! 1773:
! 1774: case TYPE_AND_N:
! 1775: case TYPE_ANDN_N:
! 1776: case TYPE_NAND_N:
! 1777: case TYPE_IOR_N:
! 1778: case TYPE_IORN_N:
! 1779: case TYPE_NIOR_N:
! 1780: case TYPE_XOR_N:
! 1781: case TYPE_XNOR_N:
! 1782: CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
! 1783: break;
! 1784:
! 1785: case TYPE_ADDSUB_N:
! 1786: e->retval = CALLING_CONVENTIONS (function)
! 1787: (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
! 1788: break;
! 1789: case TYPE_ADDSUB_NC:
! 1790: e->retval = CALLING_CONVENTIONS (function)
! 1791: (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
! 1792: break;
! 1793:
! 1794: case TYPE_COPY:
! 1795: case TYPE_COPYI:
! 1796: case TYPE_COPYD:
! 1797: case TYPE_COM_N:
! 1798: CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
! 1799: break;
! 1800:
! 1801:
! 1802: case TYPE_DIVEXACT_BY3:
! 1803: e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
! 1804: break;
! 1805: case TYPE_DIVEXACT_BY3C:
! 1806: e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
! 1807: carry);
! 1808: break;
! 1809:
! 1810:
! 1811: case TYPE_DIVMOD_1:
! 1812: case TYPE_DIVEXACT_1:
! 1813: e->retval = CALLING_CONVENTIONS (function)
! 1814: (e->d[0].p, e->s[0].p, size, divisor);
! 1815: break;
! 1816: case TYPE_DIVMOD_1C:
! 1817: e->retval = CALLING_CONVENTIONS (function)
! 1818: (e->d[0].p, e->s[0].p, size, divisor, carry);
! 1819: break;
! 1820: case TYPE_DIVREM_1:
! 1821: e->retval = CALLING_CONVENTIONS (function)
! 1822: (e->d[0].p, size2, e->s[0].p, size, divisor);
! 1823: break;
! 1824: case TYPE_DIVREM_1C:
! 1825: e->retval = CALLING_CONVENTIONS (function)
! 1826: (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
! 1827: break;
! 1828: case TYPE_PREINV_DIVREM_1:
! 1829: {
! 1830: mp_limb_t dinv;
! 1831: unsigned shift;
! 1832: shift = refmpn_count_leading_zeros (divisor);
! 1833: dinv = refmpn_invert_limb (divisor << shift);
! 1834: e->retval = CALLING_CONVENTIONS (function)
! 1835: (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
! 1836: }
! 1837: break;
! 1838: case TYPE_MOD_1:
! 1839: case TYPE_MODEXACT_1_ODD:
! 1840: e->retval = CALLING_CONVENTIONS (function)
! 1841: (e->s[0].p, size, divisor);
! 1842: break;
! 1843: case TYPE_MOD_1C:
! 1844: case TYPE_MODEXACT_1C_ODD:
! 1845: e->retval = CALLING_CONVENTIONS (function)
! 1846: (e->s[0].p, size, divisor, carry);
! 1847: break;
! 1848: case TYPE_PREINV_MOD_1:
! 1849: e->retval = CALLING_CONVENTIONS (function)
! 1850: (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
! 1851: break;
! 1852: case TYPE_MOD_34LSUB1:
! 1853: e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
! 1854: break;
! 1855: case TYPE_UDIV_QRNND:
! 1856: e->retval = CALLING_CONVENTIONS (function)
! 1857: (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
! 1858: break;
! 1859:
! 1860: case TYPE_SB_DIVREM_MN:
! 1861: refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */
! 1862: refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */
! 1863: e->retval = CALLING_CONVENTIONS (function)
! 1864: (e->d[0].p, e->d[1].p, size, e->s[1].p, size2);
! 1865: refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */
! 1866: break;
! 1867: case TYPE_TDIV_QR:
! 1868: CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
! 1869: e->s[0].p, size, e->s[1].p, size2);
! 1870: break;
! 1871:
! 1872: case TYPE_GCD_1:
! 1873: /* Must have a non-zero src, but this probably isn't the best way to do
! 1874: it. */
! 1875: if (refmpn_zero_p (e->s[0].p, size))
! 1876: e->retval = 0;
! 1877: else
! 1878: e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
! 1879: break;
! 1880:
! 1881: case TYPE_GCD:
! 1882: /* Sources are destroyed, so they're saved and replaced, but a general
! 1883: approach to this might be better. Note that it's still e->s[0].p and
! 1884: e->s[1].p that are passed, to get the desired alignments. */
! 1885: {
! 1886: mp_ptr s0 = refmpn_malloc_limbs (size);
! 1887: mp_ptr s1 = refmpn_malloc_limbs (size2);
! 1888: refmpn_copyi (s0, e->s[0].p, size);
! 1889: refmpn_copyi (s1, e->s[1].p, size2);
! 1890:
! 1891: mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
! 1892: mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
! 1893: e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
! 1894: e->s[0].p, size,
! 1895: e->s[1].p, size2);
! 1896: refmpn_copyi (e->s[0].p, s0, size);
! 1897: refmpn_copyi (e->s[1].p, s1, size2);
! 1898: free (s0);
! 1899: free (s1);
! 1900: }
! 1901: break;
! 1902:
! 1903: case TYPE_GCD_FINDA:
! 1904: {
! 1905: /* FIXME: do this with a flag */
! 1906: mp_limb_t c[2];
! 1907: c[0] = e->s[0].p[0];
! 1908: c[0] += (c[0] == 0);
! 1909: c[1] = e->s[0].p[0];
! 1910: c[1] += (c[1] == 0);
! 1911: e->retval = CALLING_CONVENTIONS (function) (c);
! 1912: }
! 1913: break;
! 1914:
! 1915: case TYPE_MPZ_JACOBI:
! 1916: case TYPE_MPZ_KRONECKER:
! 1917: {
! 1918: mpz_t a, b;
! 1919: PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
! 1920: PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
! 1921: e->retval = CALLING_CONVENTIONS (function) (a, b);
! 1922: }
! 1923: break;
! 1924: case TYPE_MPZ_KRONECKER_UI:
! 1925: {
! 1926: mpz_t a;
! 1927: PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
! 1928: e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
! 1929: }
! 1930: break;
! 1931: case TYPE_MPZ_KRONECKER_SI:
! 1932: {
! 1933: mpz_t a;
! 1934: PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
! 1935: e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
! 1936: }
! 1937: break;
! 1938: case TYPE_MPZ_UI_KRONECKER:
! 1939: {
! 1940: mpz_t b;
! 1941: PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
! 1942: e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
! 1943: }
! 1944: break;
! 1945: case TYPE_MPZ_SI_KRONECKER:
! 1946: {
! 1947: mpz_t b;
! 1948: PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
! 1949: e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
! 1950: }
! 1951: break;
! 1952:
! 1953: case TYPE_MUL_BASECASE:
! 1954: CALLING_CONVENTIONS (function)
! 1955: (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
! 1956: break;
! 1957: case TYPE_MUL_N:
! 1958: CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
! 1959: break;
! 1960: case TYPE_SQR:
! 1961: CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
! 1962: break;
! 1963:
! 1964: case TYPE_UMUL_PPMM:
! 1965: e->retval = CALLING_CONVENTIONS (function)
! 1966: (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
! 1967: break;
! 1968: case TYPE_UMUL_PPMM_R:
! 1969: e->retval = CALLING_CONVENTIONS (function)
! 1970: (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
! 1971: break;
! 1972:
! 1973: case TYPE_LSHIFT:
! 1974: case TYPE_RSHIFT:
! 1975: e->retval = CALLING_CONVENTIONS (function)
! 1976: (e->d[0].p, e->s[0].p, size, shift);
! 1977: break;
! 1978:
! 1979: case TYPE_POPCOUNT:
! 1980: e->retval = (* (unsigned long (*)(ANYARGS))
! 1981: CALLING_CONVENTIONS (function)) (e->s[0].p, size);
! 1982: break;
! 1983: case TYPE_HAMDIST:
! 1984: e->retval = (* (unsigned long (*)(ANYARGS))
! 1985: CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
! 1986: break;
! 1987:
! 1988: case TYPE_SQRTREM:
! 1989: e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
! 1990: (e->d[0].p, e->d[1].p, e->s[0].p, size);
! 1991: break;
! 1992:
! 1993: case TYPE_ZERO:
! 1994: CALLING_CONVENTIONS (function) (e->d[0].p, size);
! 1995: break;
! 1996:
! 1997: case TYPE_GET_STR:
! 1998: {
! 1999: size_t sizeinbase, fill;
! 2000: char *dst;
! 2001: MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
! 2002: ASSERT_ALWAYS (sizeinbase <= d[0].size);
! 2003: fill = d[0].size - sizeinbase;
! 2004: if (d[0].high)
! 2005: {
! 2006: memset (e->d[0].p, 0xBA, fill);
! 2007: dst = (char *) e->d[0].p + fill;
! 2008: }
! 2009: else
! 2010: {
! 2011: dst = (char *) e->d[0].p;
! 2012: memset (dst + sizeinbase, 0xBA, fill);
! 2013: }
! 2014: if (POW2_P (base))
! 2015: {
! 2016: e->retval = CALLING_CONVENTIONS (function) (dst, base,
! 2017: e->s[0].p, size);
! 2018: }
! 2019: else
! 2020: {
! 2021: refmpn_copy (e->d[1].p, e->s[0].p, size);
! 2022: e->retval = CALLING_CONVENTIONS (function) (dst, base,
! 2023: e->d[1].p, size);
! 2024: }
! 2025: refmpn_zero (e->d[1].p, size); /* cloberred or unused */
! 2026: }
! 2027: break;
! 2028:
! 2029: #ifdef EXTRA_CALL
! 2030: EXTRA_CALL
! 2031: #endif
! 2032:
! 2033: default:
! 2034: printf ("Unknown routine type %d\n", choice->type);
! 2035: abort ();
! 2036: break;
! 2037: }
! 2038: }
! 2039:
! 2040:
! 2041: void
! 2042: pointer_setup (struct each_t *e)
! 2043: {
! 2044: int i, j;
! 2045:
! 2046: for (i = 0; i < NUM_DESTS; i++)
! 2047: {
! 2048: switch (tr->dst_size[i]) {
! 2049: case 0:
! 2050: case SIZE_RETVAL: /* will be adjusted later */
! 2051: d[i].size = size;
! 2052: break;
! 2053:
! 2054: case SIZE_1:
! 2055: d[i].size = 1;
! 2056: break;
! 2057: case SIZE_2:
! 2058: d[i].size = 2;
! 2059: break;
! 2060: case SIZE_3:
! 2061: d[i].size = 3;
! 2062: break;
! 2063:
! 2064: case SIZE_PLUS_1:
! 2065: d[i].size = size+1;
! 2066: break;
! 2067:
! 2068: case SIZE_SUM:
! 2069: if (tr->size2)
! 2070: d[i].size = size + size2;
! 2071: else
! 2072: d[i].size = 2*size;
! 2073: break;
! 2074:
! 2075: case SIZE_SIZE2:
! 2076: d[i].size = size2;
! 2077: break;
! 2078:
! 2079: case SIZE_DIFF:
! 2080: d[i].size = size - size2;
! 2081: break;
! 2082:
! 2083: case SIZE_DIFF_PLUS_1:
! 2084: d[i].size = size - size2 + 1;
! 2085: break;
! 2086:
! 2087: case SIZE_CEIL_HALF:
! 2088: d[i].size = (size+1)/2;
! 2089: break;
! 2090:
! 2091: case SIZE_GET_STR:
! 2092: {
! 2093: mp_limb_t ff = GMP_NUMB_MAX;
! 2094: MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
! 2095: }
! 2096: break;
! 2097:
! 2098: default:
! 2099: printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
! 2100: abort ();
! 2101: }
! 2102: }
! 2103:
! 2104: /* establish e->d[].p destinations */
! 2105: for (i = 0; i < NUM_DESTS; i++)
! 2106: {
! 2107: mp_size_t offset = 0;
! 2108:
! 2109: /* possible room for overlapping sources */
! 2110: for (j = 0; j < numberof (overlap->s); j++)
! 2111: if (overlap->s[j] == i)
! 2112: offset = MAX (offset, s[j].align);
! 2113:
! 2114: if (d[i].high)
! 2115: {
! 2116: if (tr->dst_bytes[i])
! 2117: {
! 2118: e->d[i].p = (mp_ptr)
! 2119: ((char *) (e->d[i].region.ptr + e->d[i].region.size)
! 2120: - d[i].size - d[i].align);
! 2121: }
! 2122: else
! 2123: {
! 2124: e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
! 2125: - d[i].size - d[i].align;
! 2126: if (tr->overlap == OVERLAP_LOW_TO_HIGH)
! 2127: e->d[i].p -= offset;
! 2128: }
! 2129: }
! 2130: else
! 2131: {
! 2132: if (tr->dst_bytes[i])
! 2133: {
! 2134: e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
! 2135: }
! 2136: else
! 2137: {
! 2138: e->d[i].p = e->d[i].region.ptr + d[i].align;
! 2139: if (tr->overlap == OVERLAP_HIGH_TO_LOW)
! 2140: e->d[i].p += offset;
! 2141: }
! 2142: }
! 2143: }
! 2144:
! 2145: /* establish e->s[].p sources */
! 2146: for (i = 0; i < NUM_SOURCES; i++)
! 2147: {
! 2148: int o = overlap->s[i];
! 2149: switch (o) {
! 2150: case -1:
! 2151: /* no overlap */
! 2152: e->s[i].p = s[i].p;
! 2153: break;
! 2154: case 0:
! 2155: case 1:
! 2156: /* overlap with d[o] */
! 2157: if (tr->overlap == OVERLAP_HIGH_TO_LOW)
! 2158: e->s[i].p = e->d[o].p - s[i].align;
! 2159: else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
! 2160: e->s[i].p = e->d[o].p + s[i].align;
! 2161: else if (tr->size2 == SIZE_FRACTION)
! 2162: e->s[i].p = e->d[o].p + size2;
! 2163: else
! 2164: e->s[i].p = e->d[o].p;
! 2165: break;
! 2166: default:
! 2167: abort();
! 2168: break;
! 2169: }
! 2170: }
! 2171: }
! 2172:
! 2173:
! 2174: void
! 2175: validate_fail (void)
! 2176: {
! 2177: if (tr->reference)
! 2178: {
! 2179: trap_location = TRAP_REF;
! 2180: call (&ref, tr->reference);
! 2181: trap_location = TRAP_NOWHERE;
! 2182: }
! 2183:
! 2184: print_all();
! 2185: abort();
! 2186: }
! 2187:
! 2188:
! 2189: void
! 2190: try_one (void)
! 2191: {
! 2192: int i;
! 2193:
! 2194: if (option_spinner)
! 2195: spinner();
! 2196: spinner_count++;
! 2197:
! 2198: trap_location = TRAP_SETUPS;
! 2199:
! 2200: if (tr->divisor == DIVISOR_NORM)
! 2201: divisor |= GMP_NUMB_HIGHBIT;
! 2202: if (tr->divisor == DIVISOR_ODD)
! 2203: divisor |= 1;
! 2204:
! 2205: for (i = 0; i < NUM_SOURCES; i++)
! 2206: {
! 2207: if (s[i].high)
! 2208: s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
! 2209: else
! 2210: s[i].p = s[i].region.ptr + s[i].align;
! 2211: }
! 2212:
! 2213: pointer_setup (&ref);
! 2214: pointer_setup (&fun);
! 2215:
! 2216: for (i = 0; i < NUM_DESTS; i++)
! 2217: {
! 2218: if (! tr->dst[i])
! 2219: continue;
! 2220:
! 2221: if (tr->dst0_from_src1 && i==0)
! 2222: {
! 2223: t_random (s[1].region.ptr, d[0].size);
! 2224: MPN_COPY (fun.d[0].p, s[1].region.ptr, d[0].size);
! 2225: MPN_COPY (ref.d[0].p, s[1].region.ptr, d[0].size);
! 2226: }
! 2227: else if (tr->dst_bytes[i])
! 2228: {
! 2229: memset (ref.d[i].p, 0xBA, d[i].size);
! 2230: memset (fun.d[i].p, 0xBA, d[i].size);
! 2231: }
! 2232: else
! 2233: {
! 2234: refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
! 2235: refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
! 2236: }
! 2237: }
! 2238:
! 2239: ref.retval = 0x04152637;
! 2240: fun.retval = 0x8C9DAEBF;
! 2241:
! 2242: for (i = 0; i < NUM_SOURCES; i++)
! 2243: {
! 2244: if (! tr->src[i])
! 2245: continue;
! 2246:
! 2247: mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
! 2248: t_random (s[i].p, SRC_SIZE(i));
! 2249:
! 2250: switch (tr->data) {
! 2251: case DATA_NON_ZERO:
! 2252: if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
! 2253: s[i].p[0] = 1;
! 2254: break;
! 2255:
! 2256: case DATA_MULTIPLE_DIVISOR:
! 2257: /* same number of low zero bits as divisor */
! 2258: s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
! 2259: refmpn_sub_1 (s[i].p, s[i].p, size,
! 2260: refmpn_mod_1 (s[i].p, size, divisor));
! 2261: break;
! 2262:
! 2263: case DATA_GCD:
! 2264: /* s[1] no more bits than s[0] */
! 2265: if (i == 1 && size2 == size)
! 2266: s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
! 2267:
! 2268: /* high limb non-zero */
! 2269: s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
! 2270:
! 2271: /* odd */
! 2272: s[i].p[0] |= 1;
! 2273: break;
! 2274:
! 2275: case DATA_SRC1_ODD:
! 2276: if (i == 1)
! 2277: s[i].p[0] |= 1;
! 2278: break;
! 2279:
! 2280: case DATA_SRC1_HIGHBIT:
! 2281: if (i == 1)
! 2282: {
! 2283: if (tr->size2)
! 2284: s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
! 2285: else
! 2286: s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
! 2287: }
! 2288: break;
! 2289:
! 2290: case DATA_UDIV_QRNND:
! 2291: s[i].p[1] %= divisor;
! 2292: break;
! 2293: }
! 2294:
! 2295: mprotect_region (&s[i].region, PROT_READ);
! 2296:
! 2297: if (ref.s[i].p != s[i].p)
! 2298: {
! 2299: refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
! 2300: refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
! 2301: }
! 2302: }
! 2303:
! 2304: if (option_print)
! 2305: print_all();
! 2306:
! 2307: if (tr->validate != NULL)
! 2308: {
! 2309: trap_location = TRAP_FUN;
! 2310: call (&fun, choice->function);
! 2311: trap_location = TRAP_NOWHERE;
! 2312:
! 2313: if (! CALLING_CONVENTIONS_CHECK ())
! 2314: {
! 2315: print_all();
! 2316: abort();
! 2317: }
! 2318:
! 2319: (*tr->validate) ();
! 2320: }
! 2321: else
! 2322: {
! 2323: trap_location = TRAP_REF;
! 2324: call (&ref, tr->reference);
! 2325: trap_location = TRAP_FUN;
! 2326: call (&fun, choice->function);
! 2327: trap_location = TRAP_NOWHERE;
! 2328:
! 2329: if (! CALLING_CONVENTIONS_CHECK ())
! 2330: {
! 2331: print_all();
! 2332: abort();
! 2333: }
! 2334:
! 2335: compare ();
! 2336: }
! 2337: }
! 2338:
! 2339:
! 2340: #define SIZE_ITERATION \
! 2341: for (size = MAX3 (option_firstsize, \
! 2342: choice->minsize, \
! 2343: (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1); \
! 2344: size <= option_lastsize; \
! 2345: size++)
! 2346:
! 2347: #define SIZE2_FIRST \
! 2348: (tr->size2 == SIZE_2 ? 2 \
! 2349: : tr->size2 == SIZE_FRACTION ? 0 \
! 2350: : tr->size2 ? \
! 2351: MAX (choice->minsize, (option_firstsize2 != 0 \
! 2352: ? option_firstsize2 : 1)) \
! 2353: : 0)
! 2354:
! 2355: #define SIZE2_LAST \
! 2356: (tr->size2 == SIZE_2 ? 2 \
! 2357: : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
! 2358: : tr->size2 ? size \
! 2359: : 0)
! 2360:
! 2361: #define SIZE2_ITERATION \
! 2362: for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
! 2363:
! 2364: #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1)
! 2365: #define ALIGN_ITERATION(w,n,cond) \
! 2366: for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
! 2367:
! 2368: #define HIGH_LIMIT(cond) ((cond) != 0)
! 2369: #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1)
! 2370: #define HIGH_ITERATION(w,n,cond) \
! 2371: for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
! 2372:
! 2373: #define SHIFT_LIMIT \
! 2374: ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
! 2375:
! 2376: #define SHIFT_ITERATION \
! 2377: for (shift = 1; shift <= SHIFT_LIMIT; shift++)
! 2378:
! 2379:
! 2380: void
! 2381: try_many (void)
! 2382: {
! 2383: int i;
! 2384:
! 2385: {
! 2386: unsigned long total = 1;
! 2387:
! 2388: total *= option_repetitions;
! 2389: total *= option_lastsize;
! 2390: if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
! 2391: else if (tr->size2) total *= (option_lastsize+1)/2;
! 2392:
! 2393: total *= SHIFT_LIMIT;
! 2394: total *= MULTIPLIER_COUNT;
! 2395: total *= DIVISOR_COUNT;
! 2396: total *= CARRY_COUNT;
! 2397: total *= T_RAND_COUNT;
! 2398:
! 2399: total *= HIGH_COUNT (tr->dst[0]);
! 2400: total *= HIGH_COUNT (tr->dst[1]);
! 2401: total *= HIGH_COUNT (tr->src[0]);
! 2402: total *= HIGH_COUNT (tr->src[1]);
! 2403:
! 2404: total *= ALIGN_COUNT (tr->dst[0]);
! 2405: total *= ALIGN_COUNT (tr->dst[1]);
! 2406: total *= ALIGN_COUNT (tr->src[0]);
! 2407: total *= ALIGN_COUNT (tr->src[1]);
! 2408:
! 2409: total *= OVERLAP_COUNT;
! 2410:
! 2411: printf ("%s %lu\n", choice->name, total);
! 2412: }
! 2413:
! 2414: spinner_count = 0;
! 2415:
! 2416: for (i = 0; i < option_repetitions; i++)
! 2417: SIZE_ITERATION
! 2418: SIZE2_ITERATION
! 2419:
! 2420: SHIFT_ITERATION
! 2421: MULTIPLIER_ITERATION
! 2422: DIVISOR_ITERATION
! 2423: CARRY_ITERATION /* must be after divisor */
! 2424: T_RAND_ITERATION
! 2425:
! 2426: HIGH_ITERATION(d,0, tr->dst[0])
! 2427: HIGH_ITERATION(d,1, tr->dst[1])
! 2428: HIGH_ITERATION(s,0, tr->src[0])
! 2429: HIGH_ITERATION(s,1, tr->src[1])
! 2430:
! 2431: ALIGN_ITERATION(d,0, tr->dst[0])
! 2432: ALIGN_ITERATION(d,1, tr->dst[1])
! 2433: ALIGN_ITERATION(s,0, tr->src[0])
! 2434: ALIGN_ITERATION(s,1, tr->src[1])
! 2435:
! 2436: OVERLAP_ITERATION
! 2437: try_one();
! 2438:
! 2439: printf("\n");
! 2440: }
! 2441:
! 2442:
! 2443: /* Usually print_all() doesn't show much, but it might give a hint as to
! 2444: where the function was up to when it died. */
! 2445: void
! 2446: trap (int sig)
! 2447: {
! 2448: const char *name = "noname";
! 2449:
! 2450: switch (sig) {
! 2451: case SIGILL: name = "SIGILL"; break;
! 2452: #ifdef SIGBUS
! 2453: case SIGBUS: name = "SIGBUS"; break;
! 2454: #endif
! 2455: case SIGSEGV: name = "SIGSEGV"; break;
! 2456: case SIGFPE: name = "SIGFPE"; break;
! 2457: }
! 2458:
! 2459: printf ("\n\nSIGNAL TRAP: %s\n", name);
! 2460:
! 2461: switch (trap_location) {
! 2462: case TRAP_REF:
! 2463: printf (" in reference function: %s\n", tr->reference_name);
! 2464: break;
! 2465: case TRAP_FUN:
! 2466: printf (" in test function: %s\n", choice->name);
! 2467: print_all ();
! 2468: break;
! 2469: case TRAP_SETUPS:
! 2470: printf (" in parameter setups\n");
! 2471: print_all ();
! 2472: break;
! 2473: default:
! 2474: printf (" somewhere unknown\n");
! 2475: break;
! 2476: }
! 2477: exit (1);
! 2478: }
! 2479:
! 2480:
! 2481: void
! 2482: try_init (void)
! 2483: {
! 2484: #if HAVE_GETPAGESIZE
! 2485: /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
! 2486: know _SC_PAGESIZE. */
! 2487: pagesize = getpagesize ();
! 2488: #else
! 2489: #if HAVE_SYSCONF
! 2490: if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
! 2491: {
! 2492: /* According to the linux man page, sysconf doesn't set errno */
! 2493: fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
! 2494: exit (1);
! 2495: }
! 2496: #else
! 2497: Error, error, cannot get page size
! 2498: #endif
! 2499: #endif
! 2500:
! 2501: printf ("pagesize is 0x%lX bytes\n", pagesize);
! 2502:
! 2503: signal (SIGILL, trap);
! 2504: #ifdef SIGBUS
! 2505: signal (SIGBUS, trap);
! 2506: #endif
! 2507: signal (SIGSEGV, trap);
! 2508: signal (SIGFPE, trap);
! 2509:
! 2510: {
! 2511: int i;
! 2512:
! 2513: for (i = 0; i < NUM_SOURCES; i++)
! 2514: {
! 2515: malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
! 2516: printf ("s[%d] %p to %p (0x%lX bytes)\n",
! 2517: i, s[i].region.ptr,
! 2518: s[i].region.ptr + s[i].region.size,
! 2519: s[i].region.size * BYTES_PER_MP_LIMB);
! 2520: }
! 2521:
! 2522: #define INIT_EACH(e,es) \
! 2523: for (i = 0; i < NUM_DESTS; i++) \
! 2524: { \
! 2525: malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
! 2526: printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \
! 2527: es, i, e.d[i].region.ptr, \
! 2528: e.d[i].region.ptr + e.d[i].region.size, \
! 2529: e.d[i].region.size * BYTES_PER_MP_LIMB); \
! 2530: }
! 2531:
! 2532: INIT_EACH(ref, "ref");
! 2533: INIT_EACH(fun, "fun");
! 2534: }
! 2535: }
! 2536:
! 2537: int
! 2538: strmatch_wild (const char *pattern, const char *str)
! 2539: {
! 2540: size_t plen, slen;
! 2541:
! 2542: /* wildcard at start */
! 2543: if (pattern[0] == '*')
! 2544: {
! 2545: pattern++;
! 2546: plen = strlen (pattern);
! 2547: slen = strlen (str);
! 2548: return (plen == 0
! 2549: || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
! 2550: }
! 2551:
! 2552: /* wildcard at end */
! 2553: plen = strlen (pattern);
! 2554: if (plen >= 1 && pattern[plen-1] == '*')
! 2555: return (memcmp (pattern, str, plen-1) == 0);
! 2556:
! 2557: /* no wildcards */
! 2558: return (strcmp (pattern, str) == 0);
! 2559: }
! 2560:
! 2561: void
! 2562: try_name (const char *name)
! 2563: {
! 2564: int found = 0;
! 2565: int i;
! 2566:
! 2567: for (i = 0; i < numberof (choice_array); i++)
! 2568: {
! 2569: if (strmatch_wild (name, choice_array[i].name))
! 2570: {
! 2571: choice = &choice_array[i];
! 2572: tr = ¶m[choice->type];
! 2573: try_many ();
! 2574: found = 1;
! 2575: }
! 2576: }
! 2577:
! 2578: if (!found)
! 2579: {
! 2580: printf ("%s unknown\n", name);
! 2581: /* exit (1); */
! 2582: }
! 2583: }
! 2584:
! 2585:
! 2586: void
! 2587: usage (const char *prog)
! 2588: {
! 2589: int col = 0;
! 2590: int i;
! 2591:
! 2592: printf ("Usage: %s [options] function...\n\
! 2593: -1 use limb data 1,2,3,etc\n\
! 2594: -9 use limb data all 0xFF..FFs\n\
! 2595: -a zeros use limb data all zeros\n\
! 2596: -a ffs use limb data all 0xFF..FFs (same as -9)\n\
! 2597: -a 2fd use data 0x2FFF...FFFD\n\
! 2598: -p print each case tried (try this if seg faulting)\n\
! 2599: -R seed random numbers from time()\n\
! 2600: -r reps set repetitions (default %d)\n\
! 2601: -s size starting size to test\n\
! 2602: -S size2 starting size2 to test\n\
! 2603: -s s1-s2 range of sizes to test\n\
! 2604: -W don't show the spinner (use this in gdb)\n\
! 2605: -z disable mprotect() redzones\n\
! 2606: Default data is refmpn_random() and refmpn_random2().\n\
! 2607: \n\
! 2608: Functions that can be tested:\n\
! 2609: ", prog, DEFAULT_REPETITIONS);
! 2610:
! 2611: for (i = 0; i < numberof (choice_array); i++)
! 2612: {
! 2613: if (col + 1 + strlen (choice_array[i].name) > 79)
! 2614: {
! 2615: printf ("\n");
! 2616: col = 0;
! 2617: }
! 2618: printf (" %s", choice_array[i].name);
! 2619: col += 1 + strlen (choice_array[i].name);
! 2620: }
! 2621: printf ("\n");
! 2622:
! 2623: exit(1);
! 2624: }
! 2625:
! 2626:
! 2627: int
! 2628: main (int argc, char *argv[])
! 2629: {
! 2630: int i;
! 2631:
! 2632: /* unbuffered output */
! 2633: setbuf (stdout, NULL);
! 2634: setbuf (stderr, NULL);
! 2635:
! 2636: /* default trace in hex, and in upper-case so can paste into bc */
! 2637: mp_trace_base = -16;
! 2638:
! 2639: param_init ();
! 2640:
! 2641: {
! 2642: unsigned seed = 123;
! 2643: int opt;
! 2644:
! 2645: while ((opt = getopt(argc, argv, "19a:b:pRr:S:s:Wz")) != EOF)
! 2646: {
! 2647: switch (opt) {
! 2648: case '1':
! 2649: /* use limb data values 1, 2, 3, ... etc */
! 2650: option_data = DATA_SEQ;
! 2651: break;
! 2652: case '9':
! 2653: /* use limb data values 0xFFF...FFF always */
! 2654: option_data = DATA_FFS;
! 2655: break;
! 2656: case 'a':
! 2657: if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS;
! 2658: else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ;
! 2659: else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS;
! 2660: else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD;
! 2661: else
! 2662: {
! 2663: fprintf (stderr, "unrecognised data option: %s\n", optarg);
! 2664: exit (1);
! 2665: }
! 2666: break;
! 2667: case 'b':
! 2668: mp_trace_base = atoi (optarg);
! 2669: break;
! 2670: case 'p':
! 2671: option_print = 1;
! 2672: break;
! 2673: case 'R':
! 2674: /* randomize */
! 2675: seed = time (NULL);
! 2676: break;
! 2677: case 'r':
! 2678: option_repetitions = atoi (optarg);
! 2679: break;
! 2680: case 's':
! 2681: {
! 2682: char *p;
! 2683: option_firstsize = atoi (optarg);
! 2684: if ((p = strchr (optarg, '-')) != NULL)
! 2685: option_lastsize = atoi (p+1);
! 2686: }
! 2687: break;
! 2688: case 'S':
! 2689: /* -S <size> sets the starting size for the second of a two size
! 2690: routine (like mpn_mul_basecase) */
! 2691: option_firstsize2 = atoi (optarg);
! 2692: break;
! 2693: case 'W':
! 2694: /* use this when running in the debugger */
! 2695: option_spinner = 0;
! 2696: break;
! 2697: case 'z':
! 2698: /* disable redzones */
! 2699: option_redzones = 0;
! 2700: break;
! 2701: case '?':
! 2702: usage (argv[0]);
! 2703: break;
! 2704: }
! 2705: }
! 2706:
! 2707: gmp_randseed_ui (RANDS, seed);
! 2708: }
! 2709:
! 2710: try_init();
! 2711:
! 2712: if (argc <= optind)
! 2713: usage (argv[0]);
! 2714:
! 2715: for (i = optind; i < argc; i++)
! 2716: try_name (argv[i]);
! 2717:
! 2718: return 0;
! 2719: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>