Annotation of OpenXM_contrib/gmp/gmp-impl.h, Revision 1.1.1.4
1.1 maekawa 1: /* Include file for internal GNU MP types and definitions.
2:
1.1.1.2 maekawa 3: THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4: BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
5:
1.1.1.4 ! ohara 6: Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002 Free
! 7: Software Foundation, Inc.
1.1 maekawa 8:
9: This file is part of the GNU MP Library.
10:
11: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 maekawa 12: it under the terms of the GNU Lesser General Public License as published by
13: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 14: option) any later version.
15:
16: The GNU MP Library is distributed in the hope that it will be useful, but
17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 maekawa 18: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 19: License for more details.
20:
1.1.1.2 maekawa 21: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 22: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24: MA 02111-1307, USA. */
25:
1.1.1.4 ! ohara 26:
! 27: #ifndef __GMP_IMPL_H__
! 28: #define __GMP_IMPL_H__
! 29:
! 30: /* On Cray vector systems "short" and "unsigned short" might not be the same
! 31: number of bits, making the SHRT_MAX defaults below fail. (This depends
! 32: on compiler options.) Instead use limits.h. */
! 33: #if defined _CRAY
! 34: #include <limits.h>
! 35: #endif
! 36:
! 37: #if ! __GMP_WITHIN_CONFIGURE
1.1.1.2 maekawa 38: #include "config.h"
39: #include "gmp-mparam.h"
1.1.1.4 ! ohara 40: #endif
1.1.1.2 maekawa 41:
1.1.1.4 ! ohara 42: #ifdef __cplusplus
! 43: #include <string>
1.1 maekawa 44: #endif
45:
1.1.1.4 ! ohara 46: /* Might search and replace _PROTO to __GMP_PROTO internally one day, to
! 47: avoid two names for one thing, but no hurry for that. */
! 48: #define _PROTO(x) __GMP_PROTO(x)
! 49:
! 50: /* The following tries to get a good version of alloca. The tests are
! 51: adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
! 52: Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
! 53: be setup appropriately.
! 54:
! 55: ifndef alloca - a cpp define might already exist.
! 56: glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
! 57: HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
! 58:
! 59: GCC __builtin_alloca - preferred whenever available.
! 60:
! 61: _AIX pragma - IBM compilers need a #pragma in "each module that needs to
! 62: use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was
! 63: used in past versions of GMP, retained still in case it matters.
! 64:
! 65: The autoconf manual says this pragma needs to be at the start of a C
! 66: file, apart from comments and preprocessor directives. Is that true?
! 67: xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
! 68: from gmp.h.
! 69: */
! 70:
! 71: #ifndef alloca
! 72: # ifdef __GNUC__
! 73: # define alloca __builtin_alloca
! 74: # else
! 75: # ifdef __DECC
! 76: # define alloca(x) __ALLOCA(x)
! 77: # else
! 78: # ifdef _MSC_VER
! 79: # include <malloc.h>
! 80: # define alloca _alloca
! 81: # else
! 82: # if HAVE_ALLOCA_H
! 83: # include <alloca.h>
! 84: # else
! 85: # if defined (_AIX) || defined (_IBMR2)
! 86: #pragma alloca
! 87: # else
! 88: char *alloca ();
! 89: # endif
! 90: # endif
! 91: # endif
! 92: # endif
! 93: # endif
1.1 maekawa 94: #endif
1.1.1.4 ! ohara 95:
! 96:
! 97: /* const and signed must match __gmp_const and __gmp_signed, so follow the
! 98: decision made for those in gmp.h. */
! 99: #if ! __GMP_HAVE_CONST
! 100: #define const /* empty */
! 101: #define signed /* empty */
1.1 maekawa 102: #endif
1.1.1.4 ! ohara 103:
! 104:
! 105: /* "const" basically means a function does nothing but examine its arguments
! 106: and give a return value, it doesn't read or write any memory (neither
! 107: global nor pointed to by arguments), and has no other side-effects. This
! 108: is more restrictive than "pure". See info node "(gcc)Function
! 109: Attributes". */
! 110: #if HAVE_ATTRIBUTE_CONST
! 111: #define ATTRIBUTE_CONST __attribute__ ((const))
! 112: #else
! 113: #define ATTRIBUTE_CONST
1.1 maekawa 114: #endif
1.1.1.4 ! ohara 115:
! 116: #if HAVE_ATTRIBUTE_NORETURN
! 117: #define ATTRIBUTE_NORETURN __attribute__ ((noreturn))
! 118: #else
! 119: #define ATTRIBUTE_NORETURN
1.1 maekawa 120: #endif
121:
1.1.1.4 ! ohara 122: /* "malloc" means a function behaves like malloc in that the pointer it
! 123: returns doesn't alias anything. */
! 124: #if HAVE_ATTRIBUTE_MALLOC
! 125: #define ATTRIBUTE_MALLOC __attribute__ ((malloc))
! 126: #else
! 127: #define ATTRIBUTE_MALLOC
1.1.1.2 maekawa 128: #endif
129:
1.1.1.4 ! ohara 130: #ifdef _CRAY
! 131: #define CRAY_Pragma(str) _Pragma (str)
1.1 maekawa 132: #else
1.1.1.4 ! ohara 133: #define CRAY_Pragma(str)
! 134: #endif
! 135:
! 136:
! 137: #if ! HAVE_STRCHR
! 138: #define strchr(s,c) index(s,c)
! 139: #endif
! 140:
! 141: #if ! HAVE_MEMSET
! 142: #define memset(p, c, n) \
! 143: do { \
! 144: ASSERT (n >= 0); \
! 145: int __i; \
! 146: for (__i = 0; __i < (n); __i++) \
! 147: (p)[__i] = (c); \
! 148: } while (0)
! 149: #endif
! 150:
! 151: /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
! 152: mode. Falling back to a memcpy will give maximum portability, since it
! 153: works no matter whether va_list is a pointer, struct or array. */
! 154: #if ! defined (va_copy) && defined (__va_copy)
! 155: #define va_copy(dst,src) __va_copy(dst,src)
! 156: #endif
! 157: #if ! defined (va_copy)
! 158: #define va_copy(dst,src) \
! 159: do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
! 160: #endif
! 161:
! 162:
! 163: #if defined (__cplusplus)
! 164: extern "C" {
! 165: #endif
! 166:
! 167:
! 168: /* Usage: TMP_DECL (marker);
! 169: TMP_MARK (marker);
! 170: ptr = TMP_ALLOC (bytes);
! 171: TMP_FREE (marker);
! 172:
! 173: TMP_DECL just declares a variable, but might be empty and so must be last
! 174: in a list of variables. TMP_MARK must be done before any TMP_ALLOC.
! 175: TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a
! 176: TMP_MARK was made, but then no TMP_ALLOCs.
! 177:
! 178: The name "marker" isn't used by the malloc-reentrant and debug methods,
! 179: instead they hardcode a name which TMP_ALLOC will know. For that reason
! 180: two TMP_DECLs are not allowed, unless one is in a nested "{ }" block, and
! 181: in that case TMP_MARK, TMP_ALLOC and TMP_FREE will refer to the TMP_DECL
! 182: which is in scope, irrespective of the marker name given. */
! 183:
! 184: /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
! 185: __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC
! 186: isn't used for "double"s, so that's not in the union. */
! 187: union tmp_align_t {
! 188: mp_limb_t l;
! 189: char *p;
! 190: };
! 191: #define __TMP_ALIGN sizeof (union tmp_align_t)
! 192:
! 193: /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
! 194: "a" must be an unsigned type. */
! 195: #define ROUND_UP_MULTIPLE(a,m) ((a) + (-(a))%(m))
! 196:
! 197: #if WANT_TMP_ALLOCA
! 198: /* Each TMP_ALLOC is simply an alloca(), and nothing else is needed.
! 199: This is the preferred method. */
1.1 maekawa 200: #define TMP_DECL(m)
201: #define TMP_ALLOC(x) alloca(x)
202: #define TMP_MARK(m)
203: #define TMP_FREE(m)
204: #endif
205:
1.1.1.4 ! ohara 206: #if WANT_TMP_REENTRANT
! 207: /* See tal-reent.c for some comments. */
! 208: struct tmp_reentrant_t {
! 209: struct tmp_reentrant_t *next;
! 210: size_t size; /* bytes, including header */
! 211: };
! 212: void *__gmp_tmp_reentrant_alloc _PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
! 213: void __gmp_tmp_reentrant_free _PROTO ((struct tmp_reentrant_t *));
! 214: #define TMP_DECL(marker) struct tmp_reentrant_t *__tmp_marker
! 215: /* don't demand NULL, just cast a zero */
! 216: #define TMP_MARK(marker) \
! 217: do { __tmp_marker = (struct tmp_reentrant_t *) 0; } while (0)
! 218: #define TMP_ALLOC(size) __gmp_tmp_reentrant_alloc (&__tmp_marker, size)
! 219: #define TMP_FREE(marker) __gmp_tmp_reentrant_free (__tmp_marker)
! 220: #endif
! 221:
! 222: #if WANT_TMP_NOTREENTRANT
! 223: /* See tal-notreent.c for some comments. */
! 224: struct tmp_marker
! 225: {
! 226: struct tmp_stack *which_chunk;
! 227: void *alloc_point;
! 228: };
! 229: typedef struct tmp_marker tmp_marker;
! 230: void *__gmp_tmp_alloc _PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
! 231: void __gmp_tmp_mark _PROTO ((tmp_marker *));
! 232: void __gmp_tmp_free _PROTO ((tmp_marker *));
! 233: #define TMP_DECL(marker) tmp_marker marker
! 234: /* gcc recognises "(-(8*n))%8" or the like is always zero, which means the
! 235: rounding up is a noop for allocs of whole limbs. */
! 236: #define TMP_ALLOC(size) \
! 237: __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (size), __TMP_ALIGN))
! 238: #define TMP_MARK(marker) __gmp_tmp_mark (&marker)
! 239: #define TMP_FREE(marker) __gmp_tmp_free (&marker)
! 240: #endif
! 241:
! 242: #if WANT_TMP_DEBUG
! 243: /* See tal-debug.c for some comments. */
! 244: struct tmp_debug_t {
! 245: struct tmp_debug_entry_t *list;
! 246: const char *file;
! 247: int line;
! 248: };
! 249: struct tmp_debug_entry_t {
! 250: struct tmp_debug_entry_t *next;
! 251: char *block;
! 252: size_t size;
! 253: };
! 254: void __gmp_tmp_debug_mark _PROTO ((const char *, int, struct tmp_debug_t **,
! 255: struct tmp_debug_t *,
! 256: const char *, const char *));
! 257: void *__gmp_tmp_debug_alloc _PROTO ((const char *, int, int,
! 258: struct tmp_debug_t **, const char *,
! 259: size_t)) ATTRIBUTE_MALLOC;
! 260: void __gmp_tmp_debug_free _PROTO ((const char *, int, int,
! 261: struct tmp_debug_t **,
! 262: const char *, const char *));
! 263: #if HAVE_STRINGIZE
! 264: #define TMP_DECL(marker) TMP_DECL_NAME(marker, #marker)
! 265: #define TMP_MARK(marker) TMP_MARK_NAME(marker, #marker)
! 266: #define TMP_FREE(marker) TMP_FREE_NAME(marker, #marker)
! 267: #else
! 268: #define TMP_DECL(marker) TMP_DECL_NAME(marker, "marker")
! 269: #define TMP_MARK(marker) TMP_MARK_NAME(marker, "marker")
! 270: #define TMP_FREE(marker) TMP_FREE_NAME(marker, "marker")
! 271: #endif
! 272: /* The marker variable is designed to provoke an uninitialized varialble
! 273: warning from the compiler if TMP_FREE is used without a TMP_MARK.
! 274: __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick
! 275: these things up too. */
! 276: #define TMP_DECL_NAME(marker, marker_name) \
! 277: int marker; \
! 278: int __tmp_marker_inscope; \
! 279: const char *__tmp_marker_name = marker_name; \
! 280: struct tmp_debug_t __tmp_marker_struct; \
! 281: /* don't demand NULL, just cast a zero */ \
! 282: struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0
! 283: #define TMP_MARK_NAME(marker, marker_name) \
! 284: do { \
! 285: marker = 1; \
! 286: __tmp_marker_inscope = 1; \
! 287: __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
! 288: &__tmp_marker, &__tmp_marker_struct, \
! 289: __tmp_marker_name, marker_name); \
! 290: } while (0)
! 291: #define TMP_ALLOC(size) \
! 292: __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
! 293: __tmp_marker_inscope, \
! 294: &__tmp_marker, __tmp_marker_name, size)
! 295: #define TMP_FREE_NAME(marker, marker_name) \
! 296: do { \
! 297: __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
! 298: marker, &__tmp_marker, \
! 299: __tmp_marker_name, marker_name); \
! 300: } while (0)
! 301: #endif
! 302:
! 303:
1.1.1.2 maekawa 304: /* Allocating various types. */
305: #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
306: #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
307: #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
308:
1.1.1.4 ! ohara 309: /* It's more efficient to allocate one block than two. This is certainly
! 310: true of the malloc methods, but it can even be true of alloca if that
! 311: involves copying a chunk of stack (various RISCs), or a call to a stack
! 312: bounds check (mingw). In any case, when debugging keep separate blocks
! 313: so a redzoning malloc debugger can protect each individually. */
! 314: #if WANT_TMP_DEBUG
! 315: #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
! 316: do { \
! 317: (xp) = TMP_ALLOC_LIMBS (xsize); \
! 318: (yp) = TMP_ALLOC_LIMBS (ysize); \
! 319: } while (0)
! 320: #else
! 321: #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
! 322: do { \
! 323: (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
! 324: (yp) = (xp) + (xsize); \
! 325: } while (0)
1.1 maekawa 326: #endif
327:
1.1.1.4 ! ohara 328:
! 329: /* From gmp.h, nicer names for internal use. */
! 330: #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size)
! 331:
! 332: #define ABS(x) ((x) >= 0 ? (x) : -(x))
1.1 maekawa 333: #define MIN(l,o) ((l) < (o) ? (l) : (o))
334: #define MAX(h,i) ((h) > (i) ? (h) : (i))
1.1.1.2 maekawa 335: #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
1.1 maekawa 336:
337: /* Field access macros. */
338: #define SIZ(x) ((x)->_mp_size)
339: #define ABSIZ(x) ABS (SIZ (x))
340: #define PTR(x) ((x)->_mp_d)
1.1.1.2 maekawa 341: #define LIMBS(x) ((x)->_mp_d)
1.1 maekawa 342: #define EXP(x) ((x)->_mp_exp)
343: #define PREC(x) ((x)->_mp_prec)
344: #define ALLOC(x) ((x)->_mp_alloc)
345:
1.1.1.4 ! ohara 346: /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
! 347: then that lowest one bit must have been the only bit set. n==0 will
! 348: return true though, so avoid that. */
! 349: #define POW2_P(n) (((n) & ((n) - 1)) == 0)
! 350:
! 351:
! 352: /* Might be already defined by gmp-mparam.h, otherwise use what's in gmp.h. */
! 353: #ifndef BITS_PER_MP_LIMB
! 354: #define BITS_PER_MP_LIMB __GMP_BITS_PER_MP_LIMB
! 355: #endif
1.1.1.2 maekawa 356:
357:
1.1.1.4 ! ohara 358: /* The "short" defines are a bit different because shorts are promoted to
! 359: ints by ~ or >> etc. */
1.1.1.2 maekawa 360:
361: #ifndef ULONG_MAX
1.1.1.4 ! ohara 362: #define ULONG_MAX __GMP_ULONG_MAX
! 363: #endif
! 364: #ifndef UINT_MAX
! 365: #define UINT_MAX __GMP_UINT_MAX
! 366: #endif
! 367: #ifndef USHRT_MAX
! 368: #define USHRT_MAX __GMP_USHRT_MAX
! 369: #endif
! 370: #define MP_LIMB_T_MAX (~ (mp_limb_t) 0)
! 371:
! 372: /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
! 373: unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc
! 374: treats the plain decimal values in <limits.h> as signed. */
! 375: #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
! 376: #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
! 377: #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
! 378: #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
! 379:
! 380: #ifndef LONG_MIN
! 381: #define LONG_MIN ((long) ULONG_HIGHBIT)
1.1.1.2 maekawa 382: #endif
383: #ifndef LONG_MAX
1.1.1.4 ! ohara 384: #define LONG_MAX (-(LONG_MIN+1))
! 385: #endif
! 386:
! 387: #ifndef INT_MIN
! 388: #define INT_MIN ((int) UINT_HIGHBIT)
! 389: #endif
! 390: #ifndef INT_MAX
! 391: #define INT_MAX (-(INT_MIN+1))
1.1.1.2 maekawa 392: #endif
393:
1.1.1.4 ! ohara 394: #ifndef SHRT_MIN
! 395: #define SHRT_MIN ((short) USHRT_HIGHBIT)
1.1.1.2 maekawa 396: #endif
1.1.1.4 ! ohara 397: #ifndef SHRT_MAX
! 398: #define SHRT_MAX ((short) (-(SHRT_MIN+1)))
1.1.1.2 maekawa 399: #endif
400:
1.1.1.4 ! ohara 401: #if __GMP_MP_SIZE_T_INT
! 402: #define MP_SIZE_T_MAX INT_MAX
! 403: #define MP_SIZE_T_MIN INT_MIN
! 404: #else
! 405: #define MP_SIZE_T_MAX LONG_MAX
! 406: #define MP_SIZE_T_MIN LONG_MIN
! 407: #endif
! 408:
! 409: #define LONG_HIGHBIT LONG_MIN
! 410: #define INT_HIGHBIT INT_MIN
! 411: #define SHRT_HIGHBIT SHRT_MIN
! 412:
! 413:
! 414: #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
! 415:
! 416: #if GMP_NAIL_BITS == 0
! 417: #define GMP_NAIL_LOWBIT CNST_LIMB(0)
! 418: #else
! 419: #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS)
! 420: #endif
! 421:
! 422: #if GMP_NAIL_BITS != 0
! 423: /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using
! 424: code that has not yet been qualified. */
! 425:
! 426: #undef DIV_SB_PREINV_THRESHOLD
! 427: #undef DIV_DC_THRESHOLD
! 428: #undef POWM_THRESHOLD
! 429: #define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX
! 430: #define DIV_DC_THRESHOLD 50
! 431: #define POWM_THRESHOLD 0
! 432:
! 433: #undef GCD_ACCEL_THRESHOLD
! 434: #undef GCDEXT_THRESHOLD
! 435: #define GCD_ACCEL_THRESHOLD 3
! 436: #define GCDEXT_THRESHOLD 20
! 437:
! 438: #undef DIVREM_1_NORM_THRESHOLD
! 439: #undef DIVREM_1_UNNORM_THRESHOLD
! 440: #undef MOD_1_NORM_THRESHOLD
! 441: #undef MOD_1_UNNORM_THRESHOLD
! 442: #undef USE_PREINV_DIVREM_1
! 443: #undef USE_PREINV_MOD_1
! 444: #undef DIVREM_2_THRESHOLD
! 445: #undef DIVEXACT_1_THRESHOLD
! 446: #undef MODEXACT_1_ODD_THRESHOLD
! 447: #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* preinv always */
! 448: #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* always */
! 449: #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* always */
! 450: #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* always */
! 451: #define USE_PREINV_DIVREM_1 0 /* preinv never */
! 452: #define USE_PREINV_MOD_1 0 /* preinv never */
! 453: #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* preinv never */
! 454: #define DIVEXACT_1_THRESHOLD MP_SIZE_T_MAX /* always */
! 455: #define MODEXACT_1_ODD_THRESHOLD MP_SIZE_T_MAX /* always */
! 456:
! 457: #undef GET_STR_DC_THRESHOLD
! 458: #undef GET_STR_PRECOMPUTE_THRESHOLD
! 459: #undef SET_STR_THRESHOLD
! 460: #define GET_STR_DC_THRESHOLD 22
! 461: #define GET_STR_PRECOMPUTE_THRESHOLD 42
! 462: #define SET_STR_THRESHOLD 3259
! 463:
! 464: #undef MUL_FFT_TABLE
! 465: #undef MUL_FFT_MODF_THRESHOLD
! 466: #undef MUL_FFT_THRESHOLD
! 467: #define MUL_FFT_TABLE { 400, 928, 1856, 3840, 7168, 20480, 0 }
! 468: #define MUL_FFT_MODF_THRESHOLD 416
! 469: #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX
! 470:
! 471: #undef SQR_FFT_TABLE
! 472: #undef SQR_FFT_MODF_THRESHOLD
! 473: #undef SQR_FFT_THRESHOLD
! 474: #define SQR_FFT_TABLE { 400, 992, 1984, 3840, 9216, 20480, 0 }
! 475: #define SQR_FFT_MODF_THRESHOLD 416
! 476: #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX
! 477:
! 478: #endif
1.1.1.2 maekawa 479:
480: /* Swap macros. */
481:
482: #define MP_LIMB_T_SWAP(x, y) \
483: do { \
484: mp_limb_t __mp_limb_t_swap__tmp = (x); \
485: (x) = (y); \
486: (y) = __mp_limb_t_swap__tmp; \
487: } while (0)
488: #define MP_SIZE_T_SWAP(x, y) \
489: do { \
490: mp_size_t __mp_size_t_swap__tmp = (x); \
491: (x) = (y); \
492: (y) = __mp_size_t_swap__tmp; \
493: } while (0)
494:
495: #define MP_PTR_SWAP(x, y) \
496: do { \
497: mp_ptr __mp_ptr_swap__tmp = (x); \
498: (x) = (y); \
499: (y) = __mp_ptr_swap__tmp; \
500: } while (0)
501: #define MP_SRCPTR_SWAP(x, y) \
502: do { \
503: mp_srcptr __mp_srcptr_swap__tmp = (x); \
504: (x) = (y); \
505: (y) = __mp_srcptr_swap__tmp; \
506: } while (0)
507:
508: #define MPN_PTR_SWAP(xp,xs, yp,ys) \
509: do { \
510: MP_PTR_SWAP (xp, yp); \
511: MP_SIZE_T_SWAP (xs, ys); \
512: } while(0)
513: #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
514: do { \
515: MP_SRCPTR_SWAP (xp, yp); \
516: MP_SIZE_T_SWAP (xs, ys); \
517: } while(0)
518:
519: #define MPZ_PTR_SWAP(x, y) \
520: do { \
521: mpz_ptr __mpz_ptr_swap__tmp = (x); \
522: (x) = (y); \
523: (y) = __mpz_ptr_swap__tmp; \
524: } while (0)
525: #define MPZ_SRCPTR_SWAP(x, y) \
526: do { \
527: mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
528: (x) = (y); \
529: (y) = __mpz_srcptr_swap__tmp; \
530: } while (0)
531:
532:
1.1.1.4 ! ohara 533: void *__gmp_default_allocate _PROTO ((size_t));
! 534: void *__gmp_default_reallocate _PROTO ((void *, size_t, size_t));
! 535: void __gmp_default_free _PROTO ((void *, size_t));
! 536:
! 537: #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
! 538: ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
! 539: #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
! 540:
! 541: #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
! 542: ((type *) (*__gmp_reallocate_func) \
! 543: (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
! 544: #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
! 545: __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
1.1.1.2 maekawa 546:
1.1.1.4 ! ohara 547: #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
! 548: #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
1.1.1.2 maekawa 549:
1.1.1.4 ! ohara 550: #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
! 551: do { \
! 552: if ((oldsize) != (newsize)) \
! 553: (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
! 554: } while (0)
! 555:
! 556: #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
! 557: do { \
! 558: if ((oldsize) != (newsize)) \
! 559: (ptr) = (type *) (*__gmp_reallocate_func) \
! 560: (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
! 561: } while (0)
1.1.1.2 maekawa 562:
563:
1.1.1.4 ! ohara 564: /* Dummy for non-gcc, code involving it will go dead. */
! 565: #ifndef __GNUC__
! 566: #define __builtin_constant_p(x) 0
! 567: #endif
1.1.1.2 maekawa 568:
1.1 maekawa 569:
1.1.1.4 ! ohara 570: /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
! 571: stack usage is compatible. __attribute__ ((regparm (N))) helps by
! 572: putting leading parameters in registers, avoiding extra stack.
1.1.1.2 maekawa 573:
1.1.1.4 ! ohara 574: regparm cannot be used with calls going through the PLT, because the
! 575: binding code there may clobber the registers (%eax, %edx, %ecx) used for
! 576: the regparm parameters. Calls to local (ie. static) functions could
! 577: still use this, if we cared to differentiate locals and globals. */
1.1 maekawa 578:
1.1.1.4 ! ohara 579: #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC)
! 580: #define USE_LEADING_REGPARM 1
1.1 maekawa 581: #else
1.1.1.4 ! ohara 582: #define USE_LEADING_REGPARM 0
! 583: #endif
1.1 maekawa 584:
1.1.1.4 ! ohara 585: /* Macros for altering parameter order according to regparm usage. */
! 586: #if USE_LEADING_REGPARM
! 587: #define REGPARM_2_1(a,b,x) x,a,b
! 588: #define REGPARM_3_1(a,b,c,x) x,a,b,c
! 589: #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
! 590: #else
! 591: #define REGPARM_2_1(a,b,x) a,b,x
! 592: #define REGPARM_3_1(a,b,c,x) a,b,c,x
! 593: #define REGPARM_ATTR(n)
1.1.1.2 maekawa 594: #endif
1.1 maekawa 595:
1.1.1.4 ! ohara 596:
! 597: /* ASM_L gives a local label for a gcc asm block, for use when temporary
! 598: local labels like "1:" might not be available, which is the case for
! 599: instance on the x86s (the SCO assembler doesn't support them).
! 600:
! 601: The label generated is made unique by including "%=" which is a unique
! 602: number for each insn. This ensures the same name can be used in multiple
! 603: asm blocks, perhaps via a macro. Since jumps between asm blocks are not
! 604: allowed there's no need for a label to be usable outside a single
! 605: block. */
! 606:
! 607: #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name
! 608:
! 609:
! 610: #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
! 611: #if 0
! 612: /* FIXME: Check that these actually improve things.
! 613: FIXME: Need a cld after each std.
! 614: FIXME: Can't have inputs in clobbered registers, must describe them as
! 615: dummy outputs, and add volatile. */
1.1.1.2 maekawa 616: #define MPN_COPY_INCR(DST, SRC, N) \
617: __asm__ ("cld\n\trep\n\tmovsl" : : \
618: "D" (DST), "S" (SRC), "c" (N) : \
619: "cx", "di", "si", "memory")
620: #define MPN_COPY_DECR(DST, SRC, N) \
621: __asm__ ("std\n\trep\n\tmovsl" : : \
622: "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
623: "cx", "di", "si", "memory")
624: #endif
625: #endif
1.1 maekawa 626:
1.1.1.4 ! ohara 627:
! 628: void __gmpz_aorsmul_1 _PROTO ((REGPARM_3_1 (mpz_ptr w, mpz_srcptr u, mp_limb_t v, mp_size_t sub))) REGPARM_ATTR(1);
! 629: #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
! 630:
! 631: #define mpz_n_pow_ui __gmpz_n_pow_ui
! 632: void mpz_n_pow_ui _PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
! 633:
! 634:
! 635: #define mpn_add_nc __MPN(add_nc)
! 636: __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
! 637:
! 638: #define mpn_addmul_1c __MPN(addmul_1c)
! 639: __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
! 640:
! 641: #define mpn_addsub_n __MPN(addsub_n)
! 642: __GMP_DECLSPEC mp_limb_t mpn_addsub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
! 643:
! 644: #define mpn_addsub_nc __MPN(addsub_nc)
! 645: __GMP_DECLSPEC mp_limb_t mpn_addsub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
! 646:
! 647: #define mpn_divrem_1c __MPN(divrem_1c)
! 648: __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
! 649:
! 650: #define mpn_dump __MPN(dump)
! 651: __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
! 652:
! 653: #define mpn_fib2_ui __MPN(fib2_ui)
! 654: mp_size_t mpn_fib2_ui _PROTO ((mp_ptr, mp_ptr, unsigned long));
1.1 maekawa 655:
1.1.1.2 maekawa 656: /* Remap names of internal mpn functions. */
657: #define __clz_tab __MPN(clz_tab)
658: #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
659:
1.1.1.4 ! ohara 660: #define mpn_gcd_finda __MPN(gcd_finda)
! 661: mp_limb_t mpn_gcd_finda _PROTO((const mp_limb_t cp[2])) __GMP_ATTRIBUTE_PURE;
! 662:
! 663: #define mpn_jacobi_base __MPN(jacobi_base)
! 664: int mpn_jacobi_base _PROTO ((mp_limb_t a, mp_limb_t b, int result_bit1)) ATTRIBUTE_CONST;
! 665:
! 666: #define mpn_mod_1c __MPN(mod_1c)
! 667: __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
! 668:
! 669: #define mpn_mul_1c __MPN(mul_1c)
! 670: __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
! 671:
! 672: #define mpn_mul_2 __MPN(mul_2)
! 673: mp_limb_t mpn_mul_2 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
! 674:
! 675: #define mpn_mul_basecase __MPN(mul_basecase)
! 676: __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
! 677:
! 678: #define mpn_sqr_n __MPN(sqr_n)
! 679: __GMP_DECLSPEC void mpn_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
! 680:
! 681: #define mpn_sqr_basecase __MPN(sqr_basecase)
! 682: __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
! 683:
! 684: #define mpn_sub_nc __MPN(sub_nc)
! 685: __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
! 686:
! 687: #define mpn_submul_1c __MPN(submul_1c)
! 688: __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
! 689:
! 690:
! 691: typedef __gmp_randstate_struct *gmp_randstate_ptr;
! 692:
! 693: #define _gmp_rand __gmp_rand
! 694: __GMP_DECLSPEC void _gmp_rand _PROTO ((mp_ptr, gmp_randstate_t, unsigned long int));
! 695:
! 696:
! 697: /* __gmp_rands is the global state for the old-style random functions, and
! 698: is also used in the test programs (hence the __GMP_DECLSPEC).
! 699:
! 700: There's no seeding here, so mpz_random etc will generate the same
! 701: sequence every time. This is not unlike the C library random functions
! 702: if you don't seed them, so perhaps it's acceptable. Digging up a seed
! 703: from /dev/random or the like would work on many systems, but might
! 704: encourage a false confidence, since it'd be pretty much impossible to do
! 705: something that would work reliably everywhere. In any case the new style
! 706: functions are recommended to applications which care about randomness, so
! 707: the old functions aren't too important. */
! 708:
! 709: __GMP_DECLSPEC extern char __gmp_rands_initialized;
! 710: __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
! 711:
! 712: #define RANDS \
! 713: ((__gmp_rands_initialized ? 0 \
! 714: : (__gmp_rands_initialized = 1, \
! 715: gmp_randinit_default (__gmp_rands), 0)), \
! 716: __gmp_rands)
! 717:
! 718: /* this is used by the test programs, to free memory */
! 719: #define RANDS_CLEAR() \
! 720: do { \
! 721: if (__gmp_rands_initialized) \
! 722: { \
! 723: __gmp_rands_initialized = 0; \
! 724: gmp_randclear (__gmp_rands); \
! 725: } \
! 726: } while (0)
! 727:
! 728:
! 729: /* kara uses n+1 limbs of temporary space and then recurses with the
! 730: balance, so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ... */
! 731: #define MPN_KARA_MUL_N_TSIZE(n) (2*((n)+BITS_PER_MP_LIMB))
! 732: #define MPN_KARA_SQR_N_TSIZE(n) (2*((n)+BITS_PER_MP_LIMB))
! 733:
! 734: /* toom3 uses 4*(ceil(n/3)) of temporary space and then recurses with the
! 735: balance either into itself or kara. The following might be
! 736: overestimates. */
! 737: #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 3*BITS_PER_MP_LIMB)
! 738: #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 3*BITS_PER_MP_LIMB)
! 739:
! 740: /* need 2 so that n2>=1 */
! 741: #define MPN_KARA_MUL_N_MINSIZE 2
! 742: #define MPN_KARA_SQR_N_MINSIZE 2
! 743:
! 744: /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */
! 745: #define MPN_TOOM3_MUL_N_MINSIZE 11
! 746: #define MPN_TOOM3_SQR_N_MINSIZE 11
! 747:
! 748: #define mpn_sqr_diagonal __MPN(sqr_diagonal)
! 749: void mpn_sqr_diagonal _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1.1.1.2 maekawa 750:
751: #define mpn_kara_mul_n __MPN(kara_mul_n)
752: void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
753:
754: #define mpn_kara_sqr_n __MPN(kara_sqr_n)
755: void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
756:
757: #define mpn_toom3_mul_n __MPN(toom3_mul_n)
758: void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
759:
760: #define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
761: void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
762:
1.1.1.4 ! ohara 763:
! 764: #define mpn_fft_best_k __MPN(fft_best_k)
! 765: int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr)) ATTRIBUTE_CONST;
1.1.1.2 maekawa 766:
767: #define mpn_mul_fft __MPN(mul_fft)
768: void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
769: mp_srcptr n, mp_size_t nl,
770: mp_srcptr m, mp_size_t ml,
771: int k));
772:
773: #define mpn_mul_fft_full __MPN(mul_fft_full)
774: void mpn_mul_fft_full _PROTO ((mp_ptr op,
775: mp_srcptr n, mp_size_t nl,
776: mp_srcptr m, mp_size_t ml));
777:
1.1.1.4 ! ohara 778: #define mpn_fft_next_size __MPN(fft_next_size)
! 779: mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k)) ATTRIBUTE_CONST;
! 780:
! 781: #define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
! 782: mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t,
! 783: mp_srcptr, mp_size_t));
! 784:
! 785: #define mpn_dc_divrem_n __MPN(dc_divrem_n)
! 786: mp_limb_t mpn_dc_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
1.1.1.2 maekawa 787:
1.1.1.4 ! ohara 788: /* #define mpn_tdiv_q __MPN(tdiv_q) */
1.1.1.2 maekawa 789: /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
790:
1.1.1.4 ! ohara 791: #define mpz_divexact_gcd __gmpz_divexact_gcd
! 792: void mpz_divexact_gcd _PROTO ((mpz_ptr q, mpz_srcptr a, mpz_srcptr d));
! 793:
! 794: #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
! 795: #ifdef _GMP_H_HAVE_FILE
! 796: size_t mpz_inp_str_nowhite _PROTO ((mpz_ptr x, FILE *stream, int base, int c, size_t nread));
! 797: #endif
! 798:
! 799: #define mpn_divisible_p __MPN(divisible_p)
! 800: int mpn_divisible_p _PROTO ((mp_srcptr ap, mp_size_t asize,
! 801: mp_srcptr dp, mp_size_t dsize)) __GMP_ATTRIBUTE_PURE;
! 802:
! 803: #define mpn_rootrem __gmpn_rootrem
! 804: mp_size_t mpn_rootrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
! 805:
! 806:
! 807: /* from gmp.h */
! 808: #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
! 809: #define MPN_COPY_INCR(dst, src, size) \
! 810: do { \
! 811: ASSERT ((size) >= 0); \
! 812: ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
! 813: __GMPN_COPY_INCR (dst, src, size); \
! 814: } while (0)
! 815: #endif
! 816:
! 817: #if defined (_CRAY)
! 818: #define MPN_COPY_INCR(dst, src, n) \
1.1 maekawa 819: do { \
1.1.1.4 ! ohara 820: int __i; /* Faster on some Crays with plain int */ \
! 821: _Pragma ("_CRI ivdep"); \
! 822: for (__i = 0; __i < (n); __i++) \
! 823: (dst)[__i] = (src)[__i]; \
1.1 maekawa 824: } while (0)
1.1.1.2 maekawa 825: #endif
1.1.1.4 ! ohara 826:
! 827: #define mpn_copyi __MPN(copyi)
! 828: void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
! 829:
! 830: #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
! 831: #define MPN_COPY_INCR(dst, src, size) \
! 832: do { \
! 833: ASSERT ((size) >= 0); \
! 834: ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
! 835: mpn_copyi (dst, src, size); \
! 836: } while (0)
1.1.1.2 maekawa 837: #endif
838:
1.1.1.4 ! ohara 839: /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */
! 840: #if ! defined (MPN_COPY_INCR)
! 841: #define MPN_COPY_INCR(dst, src, n) \
! 842: do { \
! 843: ASSERT ((n) >= 0); \
! 844: ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
! 845: if ((n) != 0) \
! 846: { \
! 847: mp_size_t __n = (n) - 1; \
! 848: mp_ptr __dst = (dst); \
! 849: mp_srcptr __src = (src); \
! 850: mp_limb_t __x; \
! 851: __x = *__src++; \
! 852: if (__n != 0) \
! 853: { \
! 854: do \
! 855: { \
! 856: *__dst++ = __x; \
! 857: __x = *__src++; \
! 858: } \
! 859: while (--__n); \
! 860: } \
! 861: *__dst++ = __x; \
! 862: } \
! 863: } while (0)
1.1.1.2 maekawa 864: #endif
865:
1.1.1.4 ! ohara 866:
! 867: /* As per __GMPN_COPY_INCR in gmp.h. */
! 868: #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
! 869: #define MPN_COPY_DECR(dst, src, size) \
! 870: do { \
! 871: ASSERT ((size) >= 0); \
! 872: ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
! 873: if ((size) != 0) \
! 874: { \
! 875: mp_ptr __dst = (dst) + (size); \
! 876: mp_srcptr __src = (src) + (size); \
! 877: mp_size_t __size = (size); \
! 878: do \
! 879: *--__dst = *--__src; \
! 880: while (--__size != 0); \
! 881: } \
! 882: } while (0)
! 883: #endif
! 884:
! 885: #if defined (_CRAY)
! 886: #define MPN_COPY_DECR(dst, src, n) \
1.1 maekawa 887: do { \
1.1.1.4 ! ohara 888: int __i; /* Faster on some Crays with plain int */ \
! 889: _Pragma ("_CRI ivdep"); \
! 890: for (__i = (n) - 1; __i >= 0; __i--) \
! 891: (dst)[__i] = (src)[__i]; \
1.1 maekawa 892: } while (0)
1.1.1.2 maekawa 893: #endif
1.1.1.4 ! ohara 894:
! 895: #define mpn_copyd __MPN(copyd)
! 896: void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
! 897:
! 898: #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
! 899: #define MPN_COPY_DECR(dst, src, size) \
! 900: do { \
! 901: ASSERT ((size) >= 0); \
! 902: ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
! 903: mpn_copyd (dst, src, size); \
! 904: } while (0)
1.1.1.2 maekawa 905: #endif
906:
1.1.1.4 ! ohara 907: /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */
! 908: #if ! defined (MPN_COPY_DECR)
! 909: #define MPN_COPY_DECR(dst, src, n) \
! 910: do { \
! 911: ASSERT ((n) >= 0); \
! 912: ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
! 913: if ((n) != 0) \
! 914: { \
! 915: mp_size_t __n = (n) - 1; \
! 916: mp_ptr __dst = (dst) + __n; \
! 917: mp_srcptr __src = (src) + __n; \
! 918: mp_limb_t __x; \
! 919: __x = *__src--; \
! 920: if (__n != 0) \
! 921: { \
! 922: do \
! 923: { \
! 924: *__dst-- = __x; \
! 925: __x = *__src--; \
! 926: } \
! 927: while (--__n); \
! 928: } \
! 929: *__dst-- = __x; \
! 930: } \
! 931: } while (0)
1.1.1.2 maekawa 932: #endif
933:
1.1.1.4 ! ohara 934:
1.1.1.2 maekawa 935: #ifndef MPN_COPY
1.1.1.4 ! ohara 936: #define MPN_COPY(d,s,n) \
! 937: do { \
! 938: ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
! 939: MPN_COPY_INCR (d, s, n); \
! 940: } while (0)
! 941: #endif
! 942:
! 943:
! 944: /* Set {dst,size} to the limbs of {src,size} in reverse order. */
! 945: #define MPN_REVERSE(dst, src, size) \
! 946: do { \
! 947: mp_ptr __dst = (dst); \
! 948: mp_size_t __size = (size); \
! 949: mp_srcptr __src = (src) + __size - 1; \
! 950: mp_size_t __i; \
! 951: ASSERT ((size) >= 0); \
! 952: ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
! 953: CRAY_Pragma ("_CRI ivdep"); \
! 954: for (__i = 0; __i < __size; __i++) \
! 955: { \
! 956: *__dst = *__src; \
! 957: __dst++; \
! 958: __src--; \
! 959: } \
! 960: } while (0)
! 961:
! 962:
! 963: /* Zero n limbs at dst.
! 964:
! 965: For power and powerpc we want an inline stu/bdnz loop for zeroing. On
! 966: ppc630 for instance this is optimal since it can sustain only 1 store per
! 967: cycle.
! 968:
! 969: gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
! 970: "for" loop in the generic code below can become stu/bdnz. The do/while
! 971: here helps it get to that. The same caveat about plain -mpowerpc64 mode
! 972: applies here as to __GMPN_COPY_INCR in gmp.h.
! 973:
! 974: xlc 3.1 already generates stu/bdnz from the generic C, and does so from
! 975: this loop too.
! 976:
! 977: Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
! 978: at a time. MPN_ZERO isn't all that important in GMP, so it might be more
! 979: trouble than it's worth to do the same, though perhaps a call to memset
! 980: would be good when on a GNU system. */
! 981:
! 982: #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
! 983: #define MPN_ZERO(dst, n) \
! 984: do { \
! 985: ASSERT ((n) >= 0); \
! 986: if ((n) != 0) \
! 987: { \
! 988: mp_ptr __dst = (dst) - 1; \
! 989: mp_size_t __n = (n); \
! 990: do \
! 991: *++__dst = 0; \
! 992: while (--__n); \
! 993: } \
! 994: } while (0)
1.1.1.2 maekawa 995: #endif
1.1 maekawa 996:
1.1.1.2 maekawa 997: #ifndef MPN_ZERO
1.1.1.4 ! ohara 998: #define MPN_ZERO(dst, n) \
! 999: do { \
! 1000: ASSERT ((n) >= 0); \
! 1001: if ((n) != 0) \
! 1002: { \
! 1003: mp_ptr __dst = (dst); \
! 1004: mp_size_t __n = (n); \
! 1005: do \
! 1006: *__dst++ = 0; \
! 1007: while (--__n); \
! 1008: } \
1.1 maekawa 1009: } while (0)
1.1.1.2 maekawa 1010: #endif
1.1 maekawa 1011:
1.1.1.4 ! ohara 1012:
! 1013: /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
! 1014: start up and would need to strip a lot of zeros before it'd be faster
! 1015: than a simple cmpl loop. Here are some times in cycles for
! 1016: std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
! 1017: low zeros).
! 1018:
! 1019: std cld
! 1020: P5 18 16
! 1021: P6 46 38
! 1022: K6 36 13
! 1023: K7 21 20
! 1024: */
1.1.1.2 maekawa 1025: #ifndef MPN_NORMALIZE
1.1 maekawa 1026: #define MPN_NORMALIZE(DST, NLIMBS) \
1027: do { \
1028: while (NLIMBS > 0) \
1029: { \
1030: if ((DST)[(NLIMBS) - 1] != 0) \
1031: break; \
1032: NLIMBS--; \
1033: } \
1034: } while (0)
1.1.1.2 maekawa 1035: #endif
1036: #ifndef MPN_NORMALIZE_NOT_ZERO
1.1.1.4 ! ohara 1037: #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
! 1038: do { \
! 1039: ASSERT ((NLIMBS) >= 1); \
! 1040: while (1) \
! 1041: { \
! 1042: if ((DST)[(NLIMBS) - 1] != 0) \
! 1043: break; \
! 1044: NLIMBS--; \
! 1045: } \
1.1 maekawa 1046: } while (0)
1.1.1.2 maekawa 1047: #endif
1.1 maekawa 1048:
1.1.1.4 ! ohara 1049: /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
! 1050: and decrementing size. low should be ptr[0], and will be the new ptr[0]
! 1051: on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
! 1052: somewhere a non-zero limb. */
! 1053: #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
! 1054: do { \
! 1055: ASSERT ((size) >= 1); \
! 1056: ASSERT ((low) == (ptr)[0]); \
! 1057: \
! 1058: while ((low) == 0) \
! 1059: { \
! 1060: (size)--; \
! 1061: ASSERT ((size) >= 1); \
! 1062: (ptr)++; \
! 1063: (low) = *(ptr); \
! 1064: } \
! 1065: } while (0)
1.1.1.2 maekawa 1066:
1067: /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
1068: temporary variable; it will be automatically cleared out at function
1069: return. We use __x here to make it possible to accept both mpz_ptr and
1070: mpz_t arguments. */
1.1.1.4 ! ohara 1071: #define MPZ_TMP_INIT(X, NLIMBS) \
! 1072: do { \
! 1073: mpz_ptr __x = (X); \
! 1074: ASSERT ((NLIMBS) >= 1); \
! 1075: __x->_mp_alloc = (NLIMBS); \
! 1076: __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
1.1 maekawa 1077: } while (0)
1078:
1.1.1.4 ! ohara 1079: /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
! 1080: #define MPZ_REALLOC(z,n) ((n) > ALLOC(z) ? _mpz_realloc(z,n) : PTR(z))
! 1081:
! 1082: #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
! 1083:
! 1084:
! 1085: /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
! 1086: f1p.
! 1087:
! 1088: From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
! 1089: nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the
! 1090: number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
! 1091:
! 1092: The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
! 1093: without good floating point. There's +2 for rounding up, and a further
! 1094: +2 since at the last step x limbs are doubled into a 2x+1 limb region
! 1095: whereas the actual F[2k] value might be only 2x-1 limbs.
! 1096:
! 1097: Note that a division is done first, since on a 32-bit system it's at
! 1098: least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be
! 1099: about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
! 1100: whatever a multiply of two 190Mbyte numbers takes.)
! 1101:
! 1102: Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
! 1103: worked into the multiplier. */
! 1104:
! 1105: #define MPN_FIB2_SIZE(n) \
! 1106: ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1.1.1.2 maekawa 1107:
1.1.1.4 ! ohara 1108:
! 1109: /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range
! 1110: -1 <= n <= FIB_TABLE_LIMIT.
! 1111:
! 1112: FIB_TABLE_LUCNUM_LIMIT is the largest n for which L[n] = F[n] + 2*F[n-1]
! 1113: fits in a limb.
! 1114:
! 1115: This data generated by code at the end of mpn/generic/fib2_ui.c. */
! 1116:
! 1117: extern const mp_limb_t __gmp_fib_table[];
! 1118: #define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
! 1119:
! 1120: #if GMP_NUMB_BITS >= 64
! 1121: #define FIB_TABLE_LIMIT 93
! 1122: #define FIB_TABLE_LUCNUM_LIMIT 92
! 1123: #else
! 1124: #if GMP_NUMB_BITS >= 32
! 1125: #define FIB_TABLE_LIMIT 47
! 1126: #define FIB_TABLE_LUCNUM_LIMIT 46
! 1127: #else
! 1128: #if GMP_NUMB_BITS >= 16
! 1129: #define FIB_TABLE_LIMIT 24
! 1130: #define FIB_TABLE_LUCNUM_LIMIT 23
! 1131: #else
! 1132: #if GMP_NUMB_BITS >= 8
! 1133: #define FIB_TABLE_LIMIT 13
! 1134: #define FIB_TABLE_LUCNUM_LIMIT 11
! 1135: #else
! 1136: #if GMP_NUMB_BITS >= 4
! 1137: #define FIB_TABLE_LIMIT 7
! 1138: #define FIB_TABLE_LUCNUM_LIMIT 5
! 1139: #endif /* 4 */
! 1140: #endif /* 8 */
! 1141: #endif /* 16 */
! 1142: #endif /* 32 */
! 1143: #endif /* 64 */
! 1144:
! 1145:
! 1146: /* For a threshold between algorithms A and B, size>=thresh is where B
! 1147: should be used. Special value MP_SIZE_T_MAX means only ever use A, or
! 1148: value 0 means only ever use B. The tests for these special values will
! 1149: be compile-time constants, so the compiler should be able to eliminate
! 1150: the code for the unwanted algorithm. */
! 1151:
! 1152: #define ABOVE_THRESHOLD(size,thresh) \
! 1153: ((thresh) == 0 \
! 1154: || ((thresh) != MP_SIZE_T_MAX \
! 1155: && (size) >= (thresh)))
! 1156: #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
! 1157:
! 1158:
! 1159: /* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a
1.1.1.2 maekawa 1160: value which is good on most machines. */
1.1.1.4 ! ohara 1161: #ifndef MUL_KARATSUBA_THRESHOLD
! 1162: #define MUL_KARATSUBA_THRESHOLD 32
1.1.1.2 maekawa 1163: #endif
1164:
1.1.1.4 ! ohara 1165: /* If MUL_TOOM3_THRESHOLD is not already defined, define it to a
1.1.1.2 maekawa 1166: value which is good on most machines. */
1.1.1.4 ! ohara 1167: #ifndef MUL_TOOM3_THRESHOLD
! 1168: #define MUL_TOOM3_THRESHOLD 256
! 1169: #endif
! 1170:
! 1171: /* This is the threshold at which mpn_sqr_basecase should take over from
! 1172: mpn_mul_basecase in mpn_sqr_n. Default is to use mpn_sqr_basecase
! 1173: always.
! 1174:
! 1175: If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase
! 1176: before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the
! 1177: karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0. This oddity arises
! 1178: more or less because SQR_KARATSUBA_THRESHOLD represents the size up to
! 1179: which mpn_sqr_basecase should be used, and that may be never. */
! 1180:
! 1181: #ifndef SQR_BASECASE_THRESHOLD
! 1182: #define SQR_BASECASE_THRESHOLD 0
1.1.1.2 maekawa 1183: #endif
1184:
1.1.1.4 ! ohara 1185: #ifndef SQR_KARATSUBA_THRESHOLD
! 1186: #define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD)
1.1.1.2 maekawa 1187: #endif
1188:
1.1.1.4 ! ohara 1189: #ifndef SQR_TOOM3_THRESHOLD
! 1190: #define SQR_TOOM3_THRESHOLD (2*MUL_TOOM3_THRESHOLD)
1.1.1.2 maekawa 1191: #endif
1192:
1193: /* First k to use for an FFT modF multiply. A modF FFT is an order
1194: log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
1195: whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
1196: #define FFT_FIRST_K 4
1197:
1198: /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
1.1.1.4 ! ohara 1199: #ifndef MUL_FFT_MODF_THRESHOLD
! 1200: #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM3_THRESHOLD * 3)
1.1.1.2 maekawa 1201: #endif
1.1.1.4 ! ohara 1202: #ifndef SQR_FFT_MODF_THRESHOLD
! 1203: #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
1.1.1.2 maekawa 1204: #endif
1205:
1206: /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
1207: will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
1208: NxN->2N multiply and not recursing into itself is an order
1209: log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
1210: is the first better than toom3. */
1.1.1.4 ! ohara 1211: #ifndef MUL_FFT_THRESHOLD
! 1212: #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
1.1.1.2 maekawa 1213: #endif
1.1.1.4 ! ohara 1214: #ifndef SQR_FFT_THRESHOLD
! 1215: #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
1.1.1.2 maekawa 1216: #endif
1217:
1218: /* Table of thresholds for successive modF FFT "k"s. The first entry is
1219: where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
1220: etc. See mpn_fft_best_k(). */
1.1.1.4 ! ohara 1221: #ifndef MUL_FFT_TABLE
! 1222: #define MUL_FFT_TABLE \
! 1223: { MUL_TOOM3_THRESHOLD * 4, /* k=5 */ \
! 1224: MUL_TOOM3_THRESHOLD * 8, /* k=6 */ \
! 1225: MUL_TOOM3_THRESHOLD * 16, /* k=7 */ \
! 1226: MUL_TOOM3_THRESHOLD * 32, /* k=8 */ \
! 1227: MUL_TOOM3_THRESHOLD * 96, /* k=9 */ \
! 1228: MUL_TOOM3_THRESHOLD * 288, /* k=10 */ \
1.1.1.2 maekawa 1229: 0 }
1230: #endif
1.1.1.4 ! ohara 1231: #ifndef SQR_FFT_TABLE
! 1232: #define SQR_FFT_TABLE \
! 1233: { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
! 1234: SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
! 1235: SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
! 1236: SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
! 1237: SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
! 1238: SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
1.1.1.2 maekawa 1239: 0 }
1240: #endif
1241:
1242: #ifndef FFT_TABLE_ATTRS
1243: #define FFT_TABLE_ATTRS static const
1244: #endif
1245:
1246: #define MPN_FFT_TABLE_SIZE 16
1247:
1248:
1.1.1.4 ! ohara 1249: /* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
! 1250: div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
! 1251: i.e. n/2 >= MUL_KARATSUBA_THRESHOLD
! 1252:
! 1253: Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go
! 1254: for 3 as an average. */
! 1255:
! 1256: #ifndef DIV_DC_THRESHOLD
! 1257: #define DIV_DC_THRESHOLD (3 * MUL_KARATSUBA_THRESHOLD)
! 1258: #endif
! 1259:
! 1260:
1.1.1.2 maekawa 1261: /* Return non-zero if xp,xsize and yp,ysize overlap.
1262: If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
1263: overlap. If both these are false, there's an overlap. */
1264: #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
1265: ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
1.1.1.4 ! ohara 1266: #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
! 1267: ( (char *) (xp) + (xsize) > (char *) (yp) \
! 1268: && (char *) (yp) + (ysize) > (char *) (xp))
! 1269:
! 1270: /* Return non-zero if xp,xsize and yp,ysize are either identical or not
! 1271: overlapping. Return zero if they're partially overlapping. */
! 1272: #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
! 1273: MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
! 1274: #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
! 1275: ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
! 1276:
! 1277: /* Return non-zero if dst,dsize and src,ssize are either identical or
! 1278: overlapping in a way suitable for an incrementing/decrementing algorithm.
! 1279: Return zero if they're partially overlapping in an unsuitable fashion. */
! 1280: #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
! 1281: ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
! 1282: #define MPN_SAME_OR_INCR_P(dst, src, size) \
! 1283: MPN_SAME_OR_INCR2_P(dst, size, src, size)
! 1284: #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
! 1285: ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
! 1286: #define MPN_SAME_OR_DECR_P(dst, src, size) \
! 1287: MPN_SAME_OR_DECR2_P(dst, size, src, size)
1.1.1.2 maekawa 1288:
1289:
1290: /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
1291: ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
1292: does it always. Generally assertions are meant for development, but
1293: might help when looking for a problem later too.
1294:
1.1.1.4 ! ohara 1295: Note that strings shouldn't be used within the ASSERT expression,
! 1296: eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
! 1297: used in the !HAVE_STRINGIZE case (ie. K&R). */
1.1.1.2 maekawa 1298:
1299: #ifdef __LINE__
1300: #define ASSERT_LINE __LINE__
1301: #else
1302: #define ASSERT_LINE -1
1303: #endif
1304:
1305: #ifdef __FILE__
1306: #define ASSERT_FILE __FILE__
1307: #else
1308: #define ASSERT_FILE ""
1309: #endif
1310:
1.1.1.4 ! ohara 1311: void __gmp_assert_header _PROTO ((const char *filename, int linenum));
! 1312: __GMP_DECLSPEC void __gmp_assert_fail _PROTO ((const char *filename, int linenum, const char *expr)) ATTRIBUTE_NORETURN;
1.1.1.2 maekawa 1313:
1314: #if HAVE_STRINGIZE
1315: #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
1316: #else
1317: #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
1318: #endif
1319:
1.1.1.4 ! ohara 1320: #define ASSERT_ALWAYS(expr) \
! 1321: do { \
! 1322: if (!(expr)) \
! 1323: ASSERT_FAIL (expr); \
! 1324: } while (0)
! 1325:
! 1326: #if WANT_ASSERT
! 1327: #define ASSERT(expr) ASSERT_ALWAYS (expr)
1.1.1.2 maekawa 1328: #else
1.1.1.4 ! ohara 1329: #define ASSERT(expr) do {} while (0)
1.1.1.2 maekawa 1330: #endif
1331:
1332:
1.1.1.4 ! ohara 1333: /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
! 1334: that it's zero. In both cases if assertion checking is disabled the
! 1335: expression is still evaluated. These macros are meant for use with
! 1336: routines like mpn_add_n() where the return value represents a carry or
! 1337: whatever that should or shouldn't occur in some context. For example,
! 1338: ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
1.1.1.2 maekawa 1339: #if WANT_ASSERT
1.1.1.4 ! ohara 1340: #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
1.1.1.2 maekawa 1341: #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
1342: #else
1.1.1.4 ! ohara 1343: #define ASSERT_CARRY(expr) (expr)
1.1.1.2 maekawa 1344: #define ASSERT_NOCARRY(expr) (expr)
1345: #endif
1346:
1347:
1.1.1.4 ! ohara 1348: /* ASSERT_CODE includes code when assertion checking is wanted. This is the
! 1349: same as writing "#if WANT_ASSERT", but more compact. */
! 1350: #if WANT_ASSERT
! 1351: #define ASSERT_CODE(expr) expr
! 1352: #else
! 1353: #define ASSERT_CODE(expr)
! 1354: #endif
! 1355:
! 1356:
! 1357: /* Test that an mpq_t is in fully canonical form. This can be used as
! 1358: protection on routines like mpq_equal which give wrong results on
! 1359: non-canonical inputs. */
! 1360: #if WANT_ASSERT
! 1361: #define ASSERT_MPQ_CANONICAL(q) \
! 1362: do { \
! 1363: ASSERT (q->_mp_den._mp_size > 0); \
! 1364: if (q->_mp_num._mp_size == 0) \
! 1365: { \
! 1366: /* zero should be 0/1 */ \
! 1367: ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
! 1368: } \
! 1369: else \
! 1370: { \
! 1371: /* no common factors */ \
! 1372: mpz_t __g; \
! 1373: mpz_init (__g); \
! 1374: mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
! 1375: ASSERT (mpz_cmp_ui (__g, 1) == 0); \
! 1376: mpz_clear (__g); \
! 1377: } \
! 1378: } while (0)
! 1379: #else
! 1380: #define ASSERT_MPQ_CANONICAL(q) do {} while (0)
! 1381: #endif
! 1382:
! 1383: /* Check that the nail parts are zero. */
! 1384: #define ASSERT_ALWAYS_LIMB(limb) \
! 1385: do { \
! 1386: mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
! 1387: ASSERT_ALWAYS (__nail == 0); \
! 1388: } while (0)
! 1389: #define ASSERT_ALWAYS_MPN(ptr, size) \
! 1390: do { \
! 1391: /* let whole loop go dead when no nails */ \
! 1392: if (GMP_NAIL_BITS != 0) \
! 1393: { \
! 1394: mp_size_t __i; \
! 1395: for (__i = 0; __i < (size); __i++) \
! 1396: ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
! 1397: } \
! 1398: } while (0)
! 1399: #if WANT_ASSERT
! 1400: #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb)
! 1401: #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size)
! 1402: #else
! 1403: #define ASSERT_LIMB(limb) do {} while (0)
! 1404: #define ASSERT_MPN(ptr, size) do {} while (0)
! 1405: #endif
! 1406:
! 1407:
! 1408: /* Assert that an mpn region {ptr,size} is zero, or non-zero.
! 1409: size==0 is allowed, and in that case {ptr,size} considered to be zero. */
! 1410: #if WANT_ASSERT
! 1411: #define ASSERT_MPN_ZERO_P(ptr,size) \
! 1412: do { \
! 1413: mp_size_t __i; \
! 1414: ASSERT ((size) >= 0); \
! 1415: for (__i = 0; __i < (size); __i++) \
! 1416: ASSERT ((ptr)[__i] == 0); \
! 1417: } while (0)
! 1418: #define ASSERT_MPN_NONZERO_P(ptr,size) \
! 1419: do { \
! 1420: mp_size_t __i; \
! 1421: int __nonzero = 0; \
! 1422: ASSERT ((size) >= 0); \
! 1423: for (__i = 0; __i < (size); __i++) \
! 1424: if ((ptr)[__i] != 0) \
! 1425: { \
! 1426: __nonzero = 1; \
! 1427: break; \
! 1428: } \
! 1429: ASSERT (__nonzero); \
! 1430: } while (0)
! 1431: #else
! 1432: #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0)
! 1433: #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0)
! 1434: #endif
! 1435:
! 1436:
1.1.1.2 maekawa 1437: #if HAVE_NATIVE_mpn_com_n
1438: #define mpn_com_n __MPN(com_n)
1439: void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1440: #else
1.1.1.4 ! ohara 1441: #define mpn_com_n(d,s,n) \
! 1442: do { \
! 1443: mp_ptr __d = (d); \
! 1444: mp_srcptr __s = (s); \
! 1445: mp_size_t __n = (n); \
! 1446: ASSERT (__n >= 1); \
! 1447: ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
! 1448: do \
! 1449: *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
! 1450: while (--__n); \
! 1451: } while (0)
! 1452: #endif
! 1453:
! 1454: #define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation) \
! 1455: do { \
! 1456: mp_ptr __d = (d); \
! 1457: mp_srcptr __s1 = (s1); \
! 1458: mp_srcptr __s2 = (s2); \
! 1459: mp_size_t __n = (n); \
! 1460: ASSERT (__n >= 1); \
! 1461: ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n)); \
! 1462: ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n)); \
! 1463: do \
! 1464: operation; \
! 1465: while (--__n); \
! 1466: } while (0)
1.1.1.2 maekawa 1467:
1468: #if HAVE_NATIVE_mpn_and_n
1469: #define mpn_and_n __MPN(and_n)
1470: void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1471: #else
1.1.1.4 ! ohara 1472: #define mpn_and_n(d, s1, s2, n) \
! 1473: MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++)
1.1.1.2 maekawa 1474: #endif
1475:
1476: #if HAVE_NATIVE_mpn_andn_n
1477: #define mpn_andn_n __MPN(andn_n)
1478: void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1479: #else
1.1.1.4 ! ohara 1480: #define mpn_andn_n(d, s1, s2, n) \
! 1481: MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++)
1.1.1.2 maekawa 1482: #endif
1483:
1484: #if HAVE_NATIVE_mpn_nand_n
1485: #define mpn_nand_n __MPN(nand_n)
1486: void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1487: #else
1.1.1.4 ! ohara 1488: #define mpn_nand_n(d, s1, s2, n) \
! 1489: MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ & *__s2++))
1.1.1.2 maekawa 1490: #endif
1491:
1492: #if HAVE_NATIVE_mpn_ior_n
1493: #define mpn_ior_n __MPN(ior_n)
1494: void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1495: #else
1.1.1.4 ! ohara 1496: #define mpn_ior_n(d, s1, s2, n) \
! 1497: MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++)
1.1.1.2 maekawa 1498: #endif
1499:
1500: #if HAVE_NATIVE_mpn_iorn_n
1501: #define mpn_iorn_n __MPN(iorn_n)
1502: void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1503: #else
1.1.1.4 ! ohara 1504: #define mpn_iorn_n(d, s1, s2, n) \
! 1505: MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | ~*__s2++)
1.1.1.2 maekawa 1506: #endif
1507:
1508: #if HAVE_NATIVE_mpn_nior_n
1509: #define mpn_nior_n __MPN(nior_n)
1510: void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1511: #else
1.1.1.4 ! ohara 1512: #define mpn_nior_n(d, s1, s2, n) \
! 1513: MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ | *__s2++))
1.1.1.2 maekawa 1514: #endif
1515:
1516: #if HAVE_NATIVE_mpn_xor_n
1517: #define mpn_xor_n __MPN(xor_n)
1518: void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1519: #else
1.1.1.4 ! ohara 1520: #define mpn_xor_n(d, s1, s2, n) \
! 1521: MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++)
1.1.1.2 maekawa 1522: #endif
1523:
1524: #if HAVE_NATIVE_mpn_xnor_n
1525: #define mpn_xnor_n __MPN(xnor_n)
1526: void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1527: #else
1.1.1.4 ! ohara 1528: #define mpn_xnor_n(d, s1, s2, n) \
! 1529: MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ ^ *__s2++))
! 1530: #endif
! 1531:
! 1532:
! 1533: /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
! 1534: subtract. */
! 1535: #if GMP_NAIL_BITS == 0
! 1536: #define SUBC_LIMB(cout, w, x, y) \
! 1537: do { \
! 1538: mp_limb_t __x = (x); \
! 1539: mp_limb_t __y = (y); \
! 1540: mp_limb_t __w = __x - __y; \
! 1541: (w) = __w; \
! 1542: (cout) = __w > __x; \
! 1543: } while (0)
! 1544: #else
! 1545: #define SUBC_LIMB(cout, w, x, y) \
! 1546: do { \
! 1547: mp_limb_t __w = (x) - (y); \
! 1548: (w) = __w & GMP_NUMB_MASK; \
! 1549: (cout) = __w >> (GMP_LIMB_BITS-1); \
! 1550: } while (0)
! 1551: #endif
! 1552:
! 1553:
! 1554: /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
! 1555: expecting no carry (or borrow) from that.
! 1556:
! 1557: The size parameter is only for the benefit of assertion checking. In a
! 1558: normal build it's unused and the carry/borrow is just propagated as far
! 1559: as it needs to go.
! 1560:
! 1561: On random data, usually only one or two limbs of {ptr,size} get updated,
! 1562: so there's no need for any sophisticated looping, just something compact
! 1563: and sensible.
! 1564:
! 1565: FIXME: Do the generic MPN_{INCR,DECR}_U with a block of code like
! 1566: mpn_incr_u but with the assertions built in, rather than the separate
! 1567: add_1 and sub_1 when assertion checking.
! 1568:
! 1569: FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
! 1570: declaring their operand sizes, then remove the former. This is purely
! 1571: for the benefit of assertion checking. */
! 1572:
! 1573: #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0 \
! 1574: && BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
! 1575: /* Better flags handling than the generic C gives on i386, saving a few
! 1576: bytes of code and maybe a cycle or two. */
! 1577:
! 1578: #define MPN_IORD_U(ptr, incr, aors) \
! 1579: do { \
! 1580: mp_ptr __ptr_dummy; \
! 1581: if (__builtin_constant_p (incr) && (incr) == 1) \
! 1582: { \
! 1583: __asm__ __volatile__ \
! 1584: ("\n" ASM_L(top) ":\n" \
! 1585: "\t" aors " $1, (%0)\n" \
! 1586: "\tleal 4(%0),%0\n" \
! 1587: "\tjc " ASM_L(top) \
! 1588: : "=r" (__ptr_dummy) \
! 1589: : "0" (ptr) \
! 1590: : "memory"); \
! 1591: } \
! 1592: else \
! 1593: { \
! 1594: __asm__ __volatile__ \
! 1595: ( aors " %2,(%0)\n" \
! 1596: "\tjnc " ASM_L(done) "\n" \
! 1597: ASM_L(top) ":\n" \
! 1598: "\t" aors " $1,4(%0)\n" \
! 1599: "\tleal 4(%0),%0\n" \
! 1600: "\tjc " ASM_L(top) "\n" \
! 1601: ASM_L(done) ":\n" \
! 1602: : "=r" (__ptr_dummy) \
! 1603: : "0" (ptr), \
! 1604: "ri" (incr) \
! 1605: : "memory"); \
! 1606: } \
! 1607: } while (0)
! 1608:
! 1609: #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl")
! 1610: #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl")
! 1611: #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr)
! 1612: #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr)
! 1613: #endif
! 1614:
! 1615: #if GMP_NAIL_BITS == 0
! 1616: #ifndef mpn_incr_u
! 1617: #define mpn_incr_u(p,incr) \
! 1618: do { \
! 1619: mp_limb_t __x; \
! 1620: mp_ptr __p = (p); \
! 1621: if (__builtin_constant_p (incr) && (incr) == 1) \
! 1622: { \
! 1623: while (++(*(__p++)) == 0) \
! 1624: ; \
! 1625: } \
! 1626: else \
! 1627: { \
! 1628: __x = *__p + (incr); \
! 1629: *__p = __x; \
! 1630: if (__x < (incr)) \
! 1631: while (++(*(++__p)) == 0) \
! 1632: ; \
! 1633: } \
! 1634: } while (0)
! 1635: #endif
! 1636: #ifndef mpn_decr_u
! 1637: #define mpn_decr_u(p,incr) \
! 1638: do { \
! 1639: mp_limb_t __x; \
! 1640: mp_ptr __p = (p); \
! 1641: if (__builtin_constant_p (incr) && (incr) == 1) \
! 1642: { \
! 1643: while ((*(__p++))-- == 0) \
! 1644: ; \
! 1645: } \
! 1646: else \
! 1647: { \
! 1648: __x = *__p; \
! 1649: *__p = __x - (incr); \
! 1650: if (__x < (incr)) \
! 1651: while ((*(++__p))-- == 0) \
! 1652: ; \
! 1653: } \
! 1654: } while (0)
! 1655: #endif
! 1656: #endif
! 1657:
! 1658: #if GMP_NAIL_BITS >= 1
! 1659: #ifndef mpn_incr_u
! 1660: #define mpn_incr_u(p,incr) \
! 1661: do { \
! 1662: mp_limb_t __x; \
! 1663: mp_ptr __p = (p); \
! 1664: if (__builtin_constant_p (incr) && (incr) == 1) \
! 1665: { \
! 1666: do \
! 1667: { \
! 1668: __x = (*__p + 1) & GMP_NUMB_MASK; \
! 1669: *__p++ = __x; \
! 1670: } \
! 1671: while (__x == 0); \
! 1672: } \
! 1673: else \
! 1674: { \
! 1675: __x = (*__p + (incr)); \
! 1676: *__p++ = __x & GMP_NUMB_MASK; \
! 1677: if (__x >> GMP_NUMB_BITS != 0) \
! 1678: { \
! 1679: do \
! 1680: { \
! 1681: __x = (*__p + 1) & GMP_NUMB_MASK; \
! 1682: *__p++ = __x; \
! 1683: } \
! 1684: while (__x == 0); \
! 1685: } \
! 1686: } \
! 1687: } while (0)
! 1688: #endif
! 1689: #ifndef mpn_decr_u
! 1690: #define mpn_decr_u(p,incr) \
! 1691: do { \
! 1692: mp_limb_t __x; \
! 1693: mp_ptr __p = (p); \
! 1694: if (__builtin_constant_p (incr) && (incr) == 1) \
! 1695: { \
! 1696: do \
! 1697: { \
! 1698: __x = *__p; \
! 1699: *__p++ = (__x - 1) & GMP_NUMB_MASK; \
! 1700: } \
! 1701: while (__x == 0); \
! 1702: } \
! 1703: else \
! 1704: { \
! 1705: __x = *__p - (incr); \
! 1706: *__p++ = __x & GMP_NUMB_MASK; \
! 1707: if (__x >> GMP_NUMB_BITS != 0) \
! 1708: { \
! 1709: do \
! 1710: { \
! 1711: __x = *__p; \
! 1712: *__p++ = (__x - 1) & GMP_NUMB_MASK; \
! 1713: } \
! 1714: while (__x == 0); \
! 1715: } \
! 1716: } \
! 1717: } while (0)
! 1718: #endif
! 1719: #endif
! 1720:
! 1721: #ifndef MPN_INCR_U
! 1722: #if WANT_ASSERT
! 1723: #define MPN_INCR_U(ptr, size, n) \
! 1724: do { \
! 1725: ASSERT ((size) >= 1); \
! 1726: ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
! 1727: } while (0)
! 1728: #else
! 1729: #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n)
! 1730: #endif
! 1731: #endif
! 1732:
! 1733: #ifndef MPN_DECR_U
! 1734: #if WANT_ASSERT
! 1735: #define MPN_DECR_U(ptr, size, n) \
! 1736: do { \
! 1737: ASSERT ((size) >= 1); \
! 1738: ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
! 1739: } while (0)
! 1740: #else
! 1741: #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n)
! 1742: #endif
1.1.1.2 maekawa 1743: #endif
1.1 maekawa 1744:
1.1.1.4 ! ohara 1745:
1.1 maekawa 1746: /* Structure for conversion between internal binary format and
1747: strings in base 2..36. */
1748: struct bases
1749: {
1750: /* Number of digits in the conversion base that always fits in an mp_limb_t.
1751: For example, for base 10 on a machine where a mp_limb_t has 32 bits this
1752: is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
1753: int chars_per_limb;
1754:
1755: /* log(2)/log(conversion_base) */
1.1.1.2 maekawa 1756: double chars_per_bit_exactly;
1.1 maekawa 1757:
1758: /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
1759: factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
1760: i.e. the number of bits used to represent each digit in the base. */
1761: mp_limb_t big_base;
1762:
1763: /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
1764: fixed-point number. Instead of dividing by big_base an application can
1765: choose to multiply by big_base_inverted. */
1766: mp_limb_t big_base_inverted;
1767: };
1768:
1.1.1.4 ! ohara 1769: #define mp_bases __MPN(bases)
! 1770: #define __mp_bases __MPN(bases)
! 1771: __GMP_DECLSPEC extern const struct bases mp_bases[257];
! 1772:
! 1773: /* mp_bases[10] values, generated by mpn/mp_bases.c */
! 1774: #if GMP_NUMB_BITS == 4
! 1775: #define MP_BASES_CHARS_PER_LIMB_10 1
! 1776: #define MP_BASES_BIG_BASE_10 CNST_LIMB(0xa)
! 1777: #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x9)
! 1778: #define MP_BASES_NORMALIZATION_STEPS_10 0
! 1779: #endif
! 1780: #if GMP_NUMB_BITS == 8
! 1781: #define MP_BASES_CHARS_PER_LIMB_10 2
! 1782: #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x64)
! 1783: #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x47)
! 1784: #define MP_BASES_NORMALIZATION_STEPS_10 1
! 1785: #endif
! 1786: #if GMP_NUMB_BITS == 16
! 1787: #define MP_BASES_CHARS_PER_LIMB_10 4
! 1788: #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x2710)
! 1789: #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0xa36e)
! 1790: #define MP_BASES_NORMALIZATION_STEPS_10 2
! 1791: #endif
! 1792: #if GMP_NUMB_BITS == 28
! 1793: #define MP_BASES_CHARS_PER_LIMB_10 8
! 1794: #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x5f5e100)
! 1795: #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x5798ee23)
! 1796: #define MP_BASES_NORMALIZATION_STEPS_10 5
! 1797: #endif
! 1798: #if GMP_NUMB_BITS == 30
! 1799: #define MP_BASES_CHARS_PER_LIMB_10 9
! 1800: #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x3b9aca00)
! 1801: #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x12e0be82)
! 1802: #define MP_BASES_NORMALIZATION_STEPS_10 2
! 1803: #endif
! 1804: #if GMP_NUMB_BITS == 32
! 1805: #define MP_BASES_CHARS_PER_LIMB_10 9
! 1806: #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x3b9aca00)
! 1807: #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x12e0be82)
! 1808: #define MP_BASES_NORMALIZATION_STEPS_10 2
! 1809: #endif
! 1810: #if GMP_NUMB_BITS == 60
! 1811: #define MP_BASES_CHARS_PER_LIMB_10 18
! 1812: #define MP_BASES_BIG_BASE_10 CNST_LIMB(0xde0b6b3a7640000)
! 1813: #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x2725dd1d243aba0e)
! 1814: #define MP_BASES_NORMALIZATION_STEPS_10 4
! 1815: #endif
! 1816: #if GMP_NUMB_BITS == 62
! 1817: #define MP_BASES_CHARS_PER_LIMB_10 18
! 1818: #define MP_BASES_BIG_BASE_10 CNST_LIMB(0xde0b6b3a7640000)
! 1819: #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x2725dd1d243aba0e)
! 1820: #define MP_BASES_NORMALIZATION_STEPS_10 4
! 1821: #endif
! 1822: #if GMP_NUMB_BITS == 64
! 1823: #define MP_BASES_CHARS_PER_LIMB_10 19
! 1824: #define MP_BASES_BIG_BASE_10 CNST_LIMB(0x8ac7230489e80000)
! 1825: #define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0xd83c94fb6d2ac34a)
! 1826: #define MP_BASES_NORMALIZATION_STEPS_10 0
! 1827: #endif
! 1828:
! 1829:
! 1830: /* For power of 2 bases this is exact. For other bases the result is either
! 1831: exact or one too big.
! 1832:
! 1833: To be exact always it'd be necessary to examine all the limbs of the
! 1834: operand, since numbers like 100..000 and 99...999 generally differ only
! 1835: in the lowest limb. It'd be possible to examine just a couple of high
! 1836: limbs to increase the probability of being exact, but that doesn't seem
! 1837: worth bothering with. */
! 1838:
! 1839: #define MPN_SIZEINBASE(result, ptr, size, base) \
! 1840: do { \
! 1841: int __lb_base, __cnt; \
! 1842: mp_size_t __totbits; \
! 1843: \
! 1844: ASSERT ((size) >= 0); \
! 1845: ASSERT ((base) >= 2); \
! 1846: ASSERT ((base) < numberof (mp_bases)); \
! 1847: \
! 1848: /* Special case for X == 0. */ \
! 1849: if ((size) == 0) \
! 1850: (result) = 1; \
! 1851: else \
! 1852: { \
! 1853: /* Calculate the total number of significant bits of X. */ \
! 1854: count_leading_zeros (__cnt, (ptr)[(size)-1]); \
! 1855: __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \
! 1856: \
! 1857: if (POW2_P (base)) \
! 1858: { \
! 1859: __lb_base = mp_bases[base].big_base; \
! 1860: (result) = (__totbits + __lb_base - 1) / __lb_base; \
! 1861: } \
! 1862: else \
! 1863: (result) = (size_t) \
! 1864: (__totbits * mp_bases[base].chars_per_bit_exactly) + 1; \
! 1865: } \
! 1866: } while (0)
! 1867:
! 1868: /* eliminate mp_bases lookups for base==16 */
! 1869: #define MPN_SIZEINBASE_16(result, ptr, size) \
! 1870: do { \
! 1871: int __cnt; \
! 1872: mp_size_t __totbits; \
! 1873: \
! 1874: ASSERT ((size) >= 0); \
! 1875: \
! 1876: /* Special case for X == 0. */ \
! 1877: if ((size) == 0) \
! 1878: (result) = 1; \
! 1879: else \
! 1880: { \
! 1881: /* Calculate the total number of significant bits of X. */ \
! 1882: count_leading_zeros (__cnt, (ptr)[(size)-1]); \
! 1883: __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \
! 1884: (result) = (__totbits + 4 - 1) / 4; \
! 1885: } \
! 1886: } while (0)
! 1887:
1.1 maekawa 1888:
1.1.1.4 ! ohara 1889: #if HAVE_HOST_CPU_FAMILY_x86
1.1.1.2 maekawa 1890: #define TARGET_REGISTER_STARVED 1
1891: #else
1892: #define TARGET_REGISTER_STARVED 0
1893: #endif
1894:
1895: /* Use a library function for invert_limb, if available. */
1896: #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
1897: #define mpn_invert_limb __MPN(invert_limb)
1.1.1.4 ! ohara 1898: mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
! 1899: #define invert_limb(invxl,xl) (invxl = mpn_invert_limb (xl))
1.1.1.2 maekawa 1900: #endif
1901:
1902: #ifndef invert_limb
1.1.1.4 ! ohara 1903: #define invert_limb(invxl,xl) \
! 1904: do { \
! 1905: mp_limb_t dummy; \
! 1906: ASSERT ((xl) != 0); \
! 1907: if (xl << 1 == 0) \
! 1908: invxl = ~(mp_limb_t) 0; \
! 1909: else \
! 1910: udiv_qrnnd (invxl, dummy, -xl, 0, xl); \
1.1.1.2 maekawa 1911: } while (0)
1912: #endif
1913:
1.1 maekawa 1914: /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
1915: limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
1916: If this would yield overflow, DI should be the largest possible number
1917: (i.e., only ones). For correct operation, the most significant bit of D
1918: has to be set. Put the quotient in Q and the remainder in R. */
1.1.1.4 ! ohara 1919: #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
! 1920: do { \
! 1921: mp_limb_t _q, _ql, _r; \
! 1922: mp_limb_t _xh, _xl; \
! 1923: ASSERT ((d) != 0); \
! 1924: umul_ppmm (_q, _ql, (nh), (di)); \
! 1925: _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */ \
! 1926: umul_ppmm (_xh, _xl, _q, (d)); \
! 1927: sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
! 1928: if (_xh != 0) \
! 1929: { \
! 1930: sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
! 1931: _q += 1; \
! 1932: if (_xh != 0) \
! 1933: { \
! 1934: _r -= (d); \
! 1935: _q += 1; \
! 1936: } \
! 1937: } \
! 1938: if (_r >= (d)) \
! 1939: { \
! 1940: _r -= (d); \
! 1941: _q += 1; \
! 1942: } \
! 1943: (r) = _r; \
! 1944: (q) = _q; \
1.1 maekawa 1945: } while (0)
1946: /* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
1947: so that its most significant bit is set. LGUP is ceil(log2(D)). */
1948: #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
1949: do { \
1.1.1.2 maekawa 1950: mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
1.1 maekawa 1951: mp_limb_t _xh, _xl; \
1.1.1.2 maekawa 1952: _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
1953: _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
1954: _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
1955: _nadj = _n10 + (_n1 & (dnorm)); \
1956: umul_ppmm (_xh, _xl, di, _n2 - _n1); \
1957: add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
1958: _q1 = ~(_n2 + _xh); \
1959: umul_ppmm (_xh, _xl, _q1, d); \
1.1 maekawa 1960: add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
1961: _xh -= (d); \
1962: (r) = _xl + ((d) & _xh); \
1.1.1.2 maekawa 1963: (q) = _xh - _q1; \
1.1 maekawa 1964: } while (0)
1965: /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
1966: version to use. */
1967: #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
1968: do { \
1.1.1.2 maekawa 1969: mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
1.1 maekawa 1970: mp_limb_t _xh, _xl; \
1.1.1.2 maekawa 1971: _n2 = (nh); \
1972: _n10 = (nl); \
1973: _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
1974: _nadj = _n10 + (_n1 & (d)); \
1975: umul_ppmm (_xh, _xl, di, _n2 - _n1); \
1976: add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
1977: _q1 = ~(_n2 + _xh); \
1978: umul_ppmm (_xh, _xl, _q1, d); \
1.1 maekawa 1979: add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
1980: _xh -= (d); \
1981: (r) = _xl + ((d) & _xh); \
1.1.1.2 maekawa 1982: (q) = _xh - _q1; \
1.1 maekawa 1983: } while (0)
1984:
1.1.1.2 maekawa 1985:
1.1.1.4 ! ohara 1986: #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
! 1987: mp_limb_t mpn_preinv_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
! 1988:
! 1989:
! 1990: /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to
! 1991: the plain mpn_divrem_1. Likewise USE_PREINV_MOD_1 chooses between
! 1992: mpn_preinv_mod_1 and plain mpn_mod_1. The default for both is yes, since
! 1993: the few CISC chips where preinv is not good have defines saying so. */
! 1994: #ifndef USE_PREINV_DIVREM_1
! 1995: #define USE_PREINV_DIVREM_1 1
! 1996: #endif
! 1997: #ifndef USE_PREINV_MOD_1
! 1998: #define USE_PREINV_MOD_1 1
! 1999: #endif
! 2000:
! 2001: #if USE_PREINV_DIVREM_1
! 2002: #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
! 2003: mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
! 2004: #else
! 2005: #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
! 2006: mpn_divrem_1 (qp, xsize, ap, size, d)
! 2007: #endif
! 2008:
! 2009: #if USE_PREINV_MOD_1
! 2010: #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
! 2011: mpn_preinv_mod_1 (src, size, divisor, inverse)
! 2012: #else
! 2013: #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
! 2014: mpn_mod_1 (src, size, divisor)
! 2015: #endif
! 2016:
! 2017:
! 2018: #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
! 2019: mp_limb_t mpn_mod_34lsub1 _PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
! 2020:
! 2021:
! 2022: /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
! 2023: plain mpn_divrem_1. Likewise MODEXACT_1_ODD_THRESHOLD for
! 2024: mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and
! 2025: modexact are faster at all sizes, so the defaults are 0. Those CPUs
! 2026: where this is not right have a tuned threshold. */
! 2027: #ifndef DIVEXACT_1_THRESHOLD
! 2028: #define DIVEXACT_1_THRESHOLD 0
! 2029: #endif
! 2030: #ifndef MODEXACT_1_ODD_THRESHOLD
! 2031: #define MODEXACT_1_ODD_THRESHOLD 0
! 2032: #endif
! 2033:
! 2034: #define mpn_divexact_1 __MPN(divexact_1)
! 2035: void mpn_divexact_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
! 2036:
! 2037: #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) \
! 2038: do { \
! 2039: if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD)) \
! 2040: ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
! 2041: else \
! 2042: { \
! 2043: ASSERT (mpn_mod_1 (src, size, divisor) == 0); \
! 2044: mpn_divexact_1 (dst, src, size, divisor); \
! 2045: } \
! 2046: } while (0)
! 2047:
! 2048: #define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
! 2049: mp_limb_t mpn_modexact_1c_odd _PROTO ((mp_srcptr src, mp_size_t size,
! 2050: mp_limb_t divisor, mp_limb_t c)) __GMP_ATTRIBUTE_PURE;
! 2051:
! 2052: #if HAVE_NATIVE_mpn_modexact_1_odd
! 2053: #define mpn_modexact_1_odd __MPN(modexact_1_odd)
! 2054: mp_limb_t mpn_modexact_1_odd _PROTO ((mp_srcptr src, mp_size_t size,
! 2055: mp_limb_t divisor)) __GMP_ATTRIBUTE_PURE;
! 2056: #else
! 2057: #define mpn_modexact_1_odd(src,size,divisor) \
! 2058: mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
! 2059: #endif
! 2060:
! 2061: #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
! 2062: (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD) \
! 2063: ? mpn_modexact_1_odd (src, size, divisor) \
! 2064: : mpn_mod_1 (src, size, divisor))
! 2065:
! 2066:
! 2067: /* modlimb_invert() sets inv to the multiplicative inverse of n modulo
! 2068: 2^BITS_PER_MP_LIMB, ie. satisfying inv*n == 1 mod 2^BITS_PER_MP_LIMB.
! 2069: n must be odd (otherwise such an inverse doesn't exist).
1.1.1.2 maekawa 2070:
2071: This is not to be confused with invert_limb(), which is completely
2072: different.
2073:
2074: The table lookup gives an inverse with the low 8 bits valid, and each
1.1.1.4 ! ohara 2075: multiply step doubles the number of bits. See Jebelean "An algorithm for
! 2076: exact division" end of section 4 (reference in gmp.texi).
! 2077:
! 2078: Possible enhancement: Could use UHWtype until the last step, if half-size
! 2079: multiplies are faster (might help under _LONG_LONG_LIMB).
! 2080:
! 2081: Alternative: As noted in Granlund and Montgomery "Division by Invariant
! 2082: Integers using Multiplication" (reference in gmp.texi), n itself gives a
! 2083: 3-bit inverse immediately, and could be used instead of a table lookup.
! 2084: A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
! 2085: bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */
1.1.1.2 maekawa 2086:
2087: #define modlimb_invert_table __gmp_modlimb_invert_table
1.1.1.4 ! ohara 2088: __GMP_DECLSPEC extern const unsigned char modlimb_invert_table[128];
1.1.1.2 maekawa 2089:
1.1.1.4 ! ohara 2090: #if BITS_PER_MP_LIMB <= 8
! 2091: #define modlimb_invert(inv,n) \
! 2092: do { \
! 2093: mp_limb_t __n = (n); \
! 2094: mp_limb_t __inv; \
! 2095: ASSERT ((__n & 1) == 1); \
! 2096: __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \
! 2097: ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
! 2098: (inv) = __inv & GMP_NUMB_MASK; \
! 2099: } while (0)
! 2100: #else
! 2101: #if BITS_PER_MP_LIMB <= 16
! 2102: #define modlimb_invert(inv,n) \
! 2103: do { \
! 2104: mp_limb_t __n = (n); \
! 2105: mp_limb_t __inv; \
! 2106: ASSERT ((__n & 1) == 1); \
! 2107: __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \
! 2108: __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
! 2109: ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
! 2110: (inv) = __inv & GMP_NUMB_MASK; \
! 2111: } while (0)
! 2112: #else
1.1.1.2 maekawa 2113: #if BITS_PER_MP_LIMB <= 32
2114: #define modlimb_invert(inv,n) \
2115: do { \
2116: mp_limb_t __n = (n); \
2117: mp_limb_t __inv; \
2118: ASSERT ((__n & 1) == 1); \
1.1.1.4 ! ohara 2119: __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \
! 2120: __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
! 2121: __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
! 2122: ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
! 2123: (inv) = __inv & GMP_NUMB_MASK; \
1.1.1.2 maekawa 2124: } while (0)
1.1.1.4 ! ohara 2125: #else
! 2126: #if BITS_PER_MP_LIMB <= 64
1.1.1.2 maekawa 2127: #define modlimb_invert(inv,n) \
2128: do { \
2129: mp_limb_t __n = (n); \
2130: mp_limb_t __inv; \
2131: ASSERT ((__n & 1) == 1); \
1.1.1.4 ! ohara 2132: __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \
! 2133: __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
! 2134: __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
! 2135: __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \
! 2136: ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
! 2137: (inv) = __inv & GMP_NUMB_MASK; \
! 2138: } while (0)
! 2139: #endif /* 64 */
! 2140: #endif /* 32 */
! 2141: #endif /* 16 */
! 2142: #endif /* 8 */
! 2143:
! 2144:
! 2145: /* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
! 2146: 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
! 2147: #define MODLIMB_INVERSE_3 ((GMP_NUMB_MAX / 3) * 2 + 1)
! 2148:
! 2149:
! 2150: /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs.
! 2151:
! 2152: It's not clear whether this is the best way to do this calculation.
! 2153: Anything congruent to -a would be fine for the one limb congruence
! 2154: tests. */
! 2155:
! 2156: #define NEG_MOD(r, a, d) \
! 2157: do { \
! 2158: ASSERT ((d) != 0); \
! 2159: ASSERT_LIMB (a); \
! 2160: ASSERT_LIMB (d); \
! 2161: \
! 2162: if ((a) <= (d)) \
! 2163: { \
! 2164: /* small a is reasonably likely */ \
! 2165: (r) = (d) - (a); \
! 2166: } \
! 2167: else \
! 2168: { \
! 2169: unsigned __twos; \
! 2170: mp_limb_t __dnorm; \
! 2171: count_leading_zeros (__twos, d); \
! 2172: __twos -= GMP_NAIL_BITS; \
! 2173: __dnorm = (d) << __twos; \
! 2174: (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
! 2175: } \
! 2176: \
! 2177: ASSERT_LIMB (r); \
! 2178: } while (0)
! 2179:
! 2180: /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
! 2181: #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1)
! 2182:
! 2183:
! 2184: /* Set "p" to 1 if there's an odd number of 1 bits in "n", or to 0 if
! 2185: there's an even number. */
! 2186:
! 2187: #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
! 2188: #define ULONG_PARITY(p, n) \
! 2189: do { \
! 2190: char __p; \
! 2191: unsigned long __n = (n); \
! 2192: __n ^= (__n >> 16); \
! 2193: asm ("xorb %h1, %b1\n" \
! 2194: "setpo %0\n" \
! 2195: : "=qm" (__p), "=q" (__n) \
! 2196: : "1" (__n)); \
! 2197: (p) = __p; \
! 2198: } while (0)
! 2199: #else
! 2200: #define ULONG_PARITY(p, n) \
! 2201: do { \
! 2202: unsigned long __n = (n); \
! 2203: int __p = 0; \
! 2204: do \
! 2205: { \
! 2206: __p ^= 0x96696996L >> (__n & 0x1F); \
! 2207: __n >>= 5; \
! 2208: } \
! 2209: while (__n != 0); \
! 2210: \
! 2211: (p) = __p; \
! 2212: } while (0)
! 2213: #endif
! 2214:
! 2215:
! 2216: /* bswap is available on i486 and up and is fast. A combination rorw $8 /
! 2217: roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
! 2218: kernel with xchgb instead of rorw), but this is not done here, because
! 2219: i386 means generic x86 and mixing word and dword operations will cause
! 2220: partial register stalls on P6 chips. */
! 2221: #if defined (__GNUC__) && ! defined (NO_ASM) \
! 2222: && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \
! 2223: && BITS_PER_MP_LIMB == 32
! 2224: #define BSWAP_LIMB(dst, src) \
! 2225: do { \
! 2226: asm ("bswap %0" : "=r" (dst) : "0" (src)); \
! 2227: } while (0)
! 2228: #endif /* x86 */
! 2229:
! 2230: #if defined (__GNUC__) && ! defined (NO_ASM) \
! 2231: && defined (__ia64) && BITS_PER_MP_LIMB == 64
! 2232: #define BSWAP_LIMB(dst, src) \
! 2233: do { \
! 2234: asm ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \
! 2235: } while (0)
! 2236: #endif
! 2237:
! 2238: #if ! defined (BSWAP_LIMB)
! 2239: #if BITS_PER_MP_LIMB == 8
! 2240: #define BSWAP_LIMB(dst, src) \
! 2241: do { (dst) = (src); } while (0)
! 2242: #endif
! 2243: #if BITS_PER_MP_LIMB == 16
! 2244: #define BSWAP_LIMB(dst, src) \
! 2245: do { \
! 2246: (dst) = ((src) << 8) + ((src) >> 8); \
! 2247: } while (0)
! 2248: #endif
! 2249: #if BITS_PER_MP_LIMB == 32
! 2250: #define BSWAP_LIMB(dst, src) \
! 2251: do { \
! 2252: (dst) = \
! 2253: ((src) << 24) \
! 2254: + (((src) & 0xFF00) << 8) \
! 2255: + (((src) >> 8) & 0xFF00) \
! 2256: + ((src) >> 24); \
! 2257: } while (0)
! 2258: #endif
! 2259: #if BITS_PER_MP_LIMB == 64
! 2260: #define BSWAP_LIMB(dst, src) \
! 2261: do { \
! 2262: (dst) = \
! 2263: ((src) << 56) \
! 2264: + (((src) & 0xFF00) << 40) \
! 2265: + (((src) & 0xFF0000) << 24) \
! 2266: + (((src) & 0xFF000000) << 8) \
! 2267: + (((src) >> 8) & 0xFF000000) \
! 2268: + (((src) >> 24) & 0xFF0000) \
! 2269: + (((src) >> 40) & 0xFF00) \
! 2270: + ((src) >> 56); \
! 2271: } while (0)
! 2272: #endif
! 2273: #endif
! 2274:
! 2275:
! 2276: /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
! 2277: those we know are fast. */
! 2278: #if defined (__GNUC__) && ! defined (NO_ASM) \
! 2279: && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \
! 2280: && (HAVE_HOST_CPU_powerpc604 \
! 2281: || HAVE_HOST_CPU_powerpc604e \
! 2282: || HAVE_HOST_CPU_powerpc750 \
! 2283: || HAVE_HOST_CPU_powerpc7400)
! 2284: #define BSWAP_LIMB_FETCH(limb, src) \
! 2285: do { \
! 2286: mp_srcptr __blf_src = (src); \
! 2287: mp_limb_t __limb; \
! 2288: __asm__ ("lwbrx %0, 0, %1" \
! 2289: : "=r" (__limb) \
! 2290: : "r" (__blf_src), \
! 2291: "m" (*__blf_src)); \
! 2292: (limb) = __limb; \
! 2293: } while (0)
! 2294: #endif
! 2295:
! 2296: #if ! defined (BSWAP_LIMB_FETCH)
! 2297: #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src))
! 2298: #endif
! 2299:
! 2300:
! 2301: /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
! 2302: know are fast. FIXME: Is this necessary? */
! 2303: #if defined (__GNUC__) && ! defined (NO_ASM) \
! 2304: && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \
! 2305: && (HAVE_HOST_CPU_powerpc604 \
! 2306: || HAVE_HOST_CPU_powerpc604e \
! 2307: || HAVE_HOST_CPU_powerpc750 \
! 2308: || HAVE_HOST_CPU_powerpc7400)
! 2309: #define BSWAP_LIMB_STORE(dst, limb) \
! 2310: do { \
! 2311: mp_ptr __dst = (dst); \
! 2312: mp_limb_t __limb = (limb); \
! 2313: __asm__ ("stwbrx %1, 0, %2" \
! 2314: : "=m" (*__dst) \
! 2315: : "r" (__limb), \
! 2316: "r" (__dst)); \
! 2317: } while (0)
! 2318: #endif
! 2319:
! 2320: #if ! defined (BSWAP_LIMB_STORE)
! 2321: #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb)
! 2322: #endif
! 2323:
! 2324:
! 2325: /* Byte swap limbs from {src,size} and store at {dst,size}. */
! 2326: #define MPN_BSWAP(dst, src, size) \
! 2327: do { \
! 2328: mp_ptr __dst = (dst); \
! 2329: mp_srcptr __src = (src); \
! 2330: mp_size_t __size = (size); \
! 2331: mp_size_t __i; \
! 2332: ASSERT ((size) >= 0); \
! 2333: ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
! 2334: CRAY_Pragma ("_CRI ivdep"); \
! 2335: for (__i = 0; __i < __size; __i++) \
! 2336: { \
! 2337: BSWAP_LIMB_FETCH (*__dst, __src); \
! 2338: __dst++; \
! 2339: __src++; \
! 2340: } \
! 2341: } while (0)
! 2342:
! 2343: /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
! 2344: #define MPN_BSWAP_REVERSE(dst, src, size) \
! 2345: do { \
! 2346: mp_ptr __dst = (dst); \
! 2347: mp_size_t __size = (size); \
! 2348: mp_srcptr __src = (src) + __size - 1; \
! 2349: mp_size_t __i; \
! 2350: ASSERT ((size) >= 0); \
! 2351: ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
! 2352: CRAY_Pragma ("_CRI ivdep"); \
! 2353: for (__i = 0; __i < __size; __i++) \
! 2354: { \
! 2355: BSWAP_LIMB_FETCH (*__dst, __src); \
! 2356: __dst++; \
! 2357: __src--; \
! 2358: } \
! 2359: } while (0)
! 2360:
! 2361:
! 2362: /* No processor claiming to be SPARC v9 compliant seems to
! 2363: implement the POPC instruction. Disable pattern for now. */
! 2364: #if 0
! 2365: #if defined __GNUC__ && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
! 2366: #define popc_limb(result, input) \
! 2367: do { \
! 2368: DItype __res; \
! 2369: asm ("popc %1,%0" : "=r" (result) : "rI" (input)); \
! 2370: } while (0)
! 2371: #endif
! 2372: #endif
! 2373:
! 2374: /* Cool population count of an mp_limb_t.
! 2375: You have to figure out how this works, We won't tell you!
! 2376:
! 2377: The constants could also be expressed as:
! 2378: 0xAA... = [2^(N+1) / 3] = [(2^N-1)/3*2]
! 2379: 0x33... = [2^N / 5] = [(2^N-1)/5]
! 2380: 0x0f... = [2^N / 17] = [(2^N-1)/17]
! 2381: (N is BITS_PER_MP_LIMB, [] denotes truncation.) */
! 2382:
! 2383: #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 64
! 2384: #define popc_limb(result, input) \
! 2385: do { \
! 2386: mp_limb_t __x = (input); \
! 2387: __x -= (__x & CNST_LIMB(0xaaaaaaaaaaaaaaaa)) >> 1; \
! 2388: __x = ((__x >> 2) & CNST_LIMB(0x3333333333333333)) \
! 2389: + (__x & CNST_LIMB(0x3333333333333333)); \
! 2390: __x = ((__x >> 4) + __x) & CNST_LIMB(0x0f0f0f0f0f0f0f0f); \
! 2391: __x = ((__x >> 8) + __x); \
! 2392: __x = ((__x >> 16) + __x); \
! 2393: __x = ((__x >> 32) + __x) & 0xff; \
! 2394: (result) = __x; \
! 2395: } while (0)
! 2396: #endif
! 2397: #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 32
! 2398: #define popc_limb(result, input) \
! 2399: do { \
! 2400: mp_limb_t __x = (input); \
! 2401: __x -= (__x & 0xaaaaaaaaL) >> 1; \
! 2402: __x = ((__x >> 2) & 0x33333333L) + (__x & 0x33333333L); \
! 2403: __x = ((__x >> 4) + __x) & 0x0f0f0f0fL; \
! 2404: __x = ((__x >> 8) + __x); \
! 2405: __x = ((__x >> 16) + __x) & 0xff; \
! 2406: (result) = __x; \
! 2407: } while (0)
! 2408: #endif
! 2409: #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 16
! 2410: #define popc_limb(result, input) \
! 2411: do { \
! 2412: mp_limb_t __x = (input); \
! 2413: __x -= (__x & 0xaaaa) >> 1; \
! 2414: __x = ((__x >> 2) & 0x3333) + (__x & 0x3333); \
! 2415: __x = ((__x >> 4) + __x) & 0x0f0f; \
! 2416: __x = ((__x >> 8) + __x) & 0xff; \
! 2417: (result) = __x; \
! 2418: } while (0)
! 2419: #endif
! 2420: #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 8
! 2421: #define popc_limb(result, input) \
! 2422: do { \
! 2423: mp_limb_t __x = (input); \
! 2424: __x -= (__x & 0xaa) >> 1; \
! 2425: __x = ((__x >> 2) & 0x33) + (__x & 0x33); \
! 2426: __x = ((__x >> 4) + __x) & 0xf; \
! 2427: (result) = __x; \
! 2428: } while (0)
! 2429: #endif
! 2430: #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 4
! 2431: #define popc_limb(result, input) \
! 2432: do { \
! 2433: mp_limb_t __x = (input); \
! 2434: __x = (__x & 1) + ((__x >> 1) & 1) + ((__x >> 2) & 1) + ((__x >> 3) & 1); \
! 2435: (result) = __x; \
1.1.1.2 maekawa 2436: } while (0)
2437: #endif
2438:
2439:
1.1 maekawa 2440: /* Define stuff for longlong.h. */
1.1.1.4 ! ohara 2441: #if HAVE_ATTRIBUTE_MODE
1.1 maekawa 2442: typedef unsigned int UQItype __attribute__ ((mode (QI)));
1.1.1.2 maekawa 2443: typedef int SItype __attribute__ ((mode (SI)));
1.1 maekawa 2444: typedef unsigned int USItype __attribute__ ((mode (SI)));
2445: typedef int DItype __attribute__ ((mode (DI)));
2446: typedef unsigned int UDItype __attribute__ ((mode (DI)));
2447: #else
2448: typedef unsigned char UQItype;
1.1.1.2 maekawa 2449: typedef long SItype;
1.1 maekawa 2450: typedef unsigned long USItype;
1.1.1.4 ! ohara 2451: #if HAVE_LONG_LONG
1.1.1.2 maekawa 2452: typedef long long int DItype;
2453: typedef unsigned long long int UDItype;
2454: #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
2455: typedef long int DItype;
2456: typedef unsigned long int UDItype;
2457: #endif
1.1 maekawa 2458: #endif
2459:
2460: typedef mp_limb_t UWtype;
2461: typedef unsigned int UHWtype;
2462: #define W_TYPE_SIZE BITS_PER_MP_LIMB
2463:
2464: /* Define ieee_double_extract and _GMP_IEEE_FLOATS. */
2465:
1.1.1.2 maekawa 2466: #if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__)))
2467: /* Special case for little endian ARM since floats remain in big-endian. */
2468: #define _GMP_IEEE_FLOATS 1
2469: union ieee_double_extract
2470: {
2471: struct
2472: {
2473: unsigned int manh:20;
2474: unsigned int exp:11;
2475: unsigned int sig:1;
2476: unsigned int manl:32;
2477: } s;
2478: double d;
2479: };
2480: #else
1.1 maekawa 2481: #if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \
2482: || defined (__alpha) \
2483: || defined (__clipper__) \
2484: || defined (__cris) \
2485: || defined (__i386__) \
1.1.1.4 ! ohara 2486: || defined (__x86_64__) \
1.1 maekawa 2487: || defined (__i860__) \
2488: || defined (__i960__) \
1.1.1.4 ! ohara 2489: || defined (__ia64) && ! defined (__hpux) \
1.1 maekawa 2490: || defined (MIPSEL) || defined (_MIPSEL) \
2491: || defined (__ns32000__) \
2492: || defined (__WINNT) || defined (_WIN32)
2493: #define _GMP_IEEE_FLOATS 1
2494: union ieee_double_extract
2495: {
2496: struct
2497: {
2498: unsigned int manl:32;
2499: unsigned int manh:20;
2500: unsigned int exp:11;
2501: unsigned int sig:1;
2502: } s;
2503: double d;
2504: };
2505: #else /* Need this as an #else since the tests aren't made exclusive. */
1.1.1.4 ! ohara 2506: #if defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
! 2507: || defined(mc68020)
! 2508: #define _GMP_IEEE_FLOATS 1
! 2509: union ieee_double_extract
! 2510: {
! 2511: struct
! 2512: {
! 2513: /* "int" might be only 16 bits, so use "long" */
! 2514: unsigned long sig:1;
! 2515: unsigned long exp:11;
! 2516: unsigned long manh:20;
! 2517: unsigned long manl:32;
! 2518: } s;
! 2519: double d;
! 2520: };
! 2521: #else
1.1.1.2 maekawa 2522: #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \
1.1 maekawa 2523: || defined (__a29k__) || defined (_AM29K) \
2524: || defined (__arm__) \
2525: || (defined (__convex__) && defined (_IEEE_FLOAT_)) \
1.1.1.4 ! ohara 2526: || defined (_CRAYMPP) || defined (_CRAYIEEE) \
1.1 maekawa 2527: || defined (__i370__) || defined (__mvs__) \
2528: || defined (__m88000__) \
2529: || defined (MIPSEB) || defined (_MIPSEB) \
1.1.1.2 maekawa 2530: || defined (__hppa) || defined (__hppa__) \
1.1 maekawa 2531: || defined (__pyr__) \
2532: || defined (__ibm032__) \
2533: || defined (_IBMR2) || defined (_ARCH_PPC) \
2534: || defined (__sh__) \
1.1.1.4 ! ohara 2535: || defined (__sparc) || defined (sparc) || defined (__sparc__) \
! 2536: || defined (__sparc_v9) || defined (__sparc_v9__) \
1.1 maekawa 2537: || defined (__we32k__)
2538: #define _GMP_IEEE_FLOATS 1
2539: union ieee_double_extract
2540: {
2541: struct
2542: {
2543: unsigned int sig:1;
2544: unsigned int exp:11;
2545: unsigned int manh:20;
2546: unsigned int manl:32;
2547: } s;
2548: double d;
2549: };
2550: #endif
2551: #endif
1.1.1.2 maekawa 2552: #endif
1.1 maekawa 2553: #endif
2554:
1.1.1.4 ! ohara 2555: /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
! 2556: that don't convert ulong->double correctly (eg. SunOS 4 native cc). */
! 2557: #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
! 2558: /* Maximum number of limbs it will take to store any `double'.
! 2559: We assume doubles have 53 mantissam bits. */
! 2560: #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS + 1)
! 2561:
! 2562: double __gmp_scale2 _PROTO ((double, int)) ATTRIBUTE_CONST;
1.1.1.2 maekawa 2563: int __gmp_extract_double _PROTO ((mp_ptr, double));
2564:
2565: extern int __gmp_junk;
2566: extern const int __gmp_0;
1.1.1.4 ! ohara 2567: void __gmp_exception _PROTO ((int)) ATTRIBUTE_NORETURN;
! 2568: void __gmp_divide_by_zero _PROTO ((void)) ATTRIBUTE_NORETURN;
! 2569: void __gmp_sqrt_of_negative _PROTO ((void)) ATTRIBUTE_NORETURN;
! 2570: #define GMP_ERROR(code) __gmp_exception (code)
! 2571: #define DIVIDE_BY_ZERO __gmp_divide_by_zero ()
! 2572: #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative ()
1.1.1.2 maekawa 2573:
2574: #if defined _LONG_LONG_LIMB
1.1.1.4 ! ohara 2575: #if __GMP_HAVE_TOKEN_PASTE
! 2576: #define CNST_LIMB(C) ((mp_limb_t) C##LL)
1.1.1.2 maekawa 2577: #else
1.1.1.4 ! ohara 2578: #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
1.1.1.2 maekawa 2579: #endif
2580: #else /* not _LONG_LONG_LIMB */
1.1.1.4 ! ohara 2581: #if __GMP_HAVE_TOKEN_PASTE
! 2582: #define CNST_LIMB(C) ((mp_limb_t) C##L)
1.1.1.2 maekawa 2583: #else
1.1.1.4 ! ohara 2584: #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
1.1.1.2 maekawa 2585: #endif
2586: #endif /* _LONG_LONG_LIMB */
2587:
1.1.1.4 ! ohara 2588: /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
! 2589: #if GMP_NUMB_BITS == 2
! 2590: #define PP 0x3 /* 3 */
! 2591: #define PP_FIRST_OMITTED 5
! 2592: #endif
! 2593: #if GMP_NUMB_BITS == 4
! 2594: #define PP 0xF /* 3 x 5 */
! 2595: #define PP_FIRST_OMITTED 7
! 2596: #endif
! 2597: #if GMP_NUMB_BITS == 8
! 2598: #define PP 0x69 /* 3 x 5 x 7 */
! 2599: #define PP_FIRST_OMITTED 11
! 2600: #endif
! 2601: #if GMP_NUMB_BITS == 16
! 2602: #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */
! 2603: #define PP_FIRST_OMITTED 17
! 2604: #endif
! 2605: #if GMP_NUMB_BITS == 32
! 2606: #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */
1.1.1.2 maekawa 2607: #define PP_INVERTED 0x53E5645CL
1.1.1.4 ! ohara 2608: #define PP_FIRST_OMITTED 31
1.1.1.2 maekawa 2609: #endif
1.1.1.4 ! ohara 2610: #if GMP_NUMB_BITS == 64
! 2611: #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */
1.1.1.2 maekawa 2612: #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
1.1.1.4 ! ohara 2613: #define PP_FIRST_OMITTED 59
1.1.1.2 maekawa 2614: #endif
1.1.1.4 ! ohara 2615: #ifndef PP_FIRST_OMITTED
! 2616: #define PP_FIRST_OMITTED 3
! 2617: #endif
! 2618:
1.1.1.2 maekawa 2619:
2620:
2621: /* BIT1 means a result value in bit 1 (second least significant bit), with a
2622: zero bit representing +1 and a one bit representing -1. Bits other than
1.1.1.4 ! ohara 2623: bit 1 are garbage. These are meant to be kept in "int"s, and casts are
! 2624: used to ensure the expressions are "int"s even if a and/or b might be
! 2625: other types.
1.1.1.2 maekawa 2626:
2627: JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
2628: and their speed is important. Expressions are used rather than
2629: conditionals to accumulate sign changes, which effectively means XORs
2630: instead of conditional JUMPs. */
2631:
2632: /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
1.1.1.4 ! ohara 2633: #define JACOBI_S0(a) (((a) == 1) | ((a) == -1))
1.1.1.2 maekawa 2634:
2635: /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
1.1.1.4 ! ohara 2636: #define JACOBI_U0(a) ((a) == 1)
1.1.1.2 maekawa 2637:
1.1.1.4 ! ohara 2638: /* (a/0), with a given by low and size;
! 2639: is 1 if a=+/-1, 0 otherwise */
! 2640: #define JACOBI_LS0(alow,asize) \
! 2641: (((asize) == 1 || (asize) == -1) && (alow) == 1)
! 2642:
! 2643: /* (a/0), with a an mpz_t;
! 2644: fetch of low limb always valid, even if size is zero */
! 2645: #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a))
! 2646:
! 2647: /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
! 2648: #define JACOBI_0U(b) ((b) == 1)
! 2649:
! 2650: /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
! 2651: #define JACOBI_0S(b) ((b) == 1 || (b) == -1)
! 2652:
! 2653: /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
! 2654: #define JACOBI_0LS(blow,bsize) \
! 2655: (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
1.1.1.2 maekawa 2656:
2657: /* Convert a bit1 to +1 or -1. */
2658: #define JACOBI_BIT1_TO_PN(result_bit1) \
1.1.1.4 ! ohara 2659: (1 - ((int) (result_bit1) & 2))
1.1.1.2 maekawa 2660:
2661: /* (2/b), with b unsigned and odd;
2662: is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
2663: hence obtained from (b>>1)^b */
2664: #define JACOBI_TWO_U_BIT1(b) \
1.1.1.4 ! ohara 2665: ((int) (((b) >> 1) ^ (b)))
1.1.1.2 maekawa 2666:
2667: /* (2/b)^twos, with b unsigned and odd */
2668: #define JACOBI_TWOS_U_BIT1(twos, b) \
1.1.1.4 ! ohara 2669: ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
1.1.1.2 maekawa 2670:
2671: /* (2/b)^twos, with b unsigned and odd */
2672: #define JACOBI_TWOS_U(twos, b) \
2673: (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
2674:
1.1.1.4 ! ohara 2675: /* (-1/b), with b odd (signed or unsigned);
! 2676: is (-1)^((b-1)/2) */
! 2677: #define JACOBI_N1B_BIT1(b) \
! 2678: ((int) (b))
! 2679:
1.1.1.2 maekawa 2680: /* (a/b) effect due to sign of a: signed/unsigned, b odd;
1.1.1.4 ! ohara 2681: is (-1/b) if a<0, or +1 if a>=0 */
1.1.1.2 maekawa 2682: #define JACOBI_ASGN_SU_BIT1(a, b) \
1.1.1.4 ! ohara 2683: ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
! 2684:
! 2685: /* (a/b) effect due to sign of b: signed/signed;
! 2686: is -1 if a and b both negative, +1 otherwise */
! 2687: #define JACOBI_BSGN_SS_BIT1(a, b) \
! 2688: ((((a)<0) & ((b)<0)) << 1)
1.1.1.2 maekawa 2689:
2690: /* (a/b) effect due to sign of b: signed/mpz;
2691: is -1 if a and b both negative, +1 otherwise */
2692: #define JACOBI_BSGN_SZ_BIT1(a, b) \
1.1.1.4 ! ohara 2693: JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
1.1.1.2 maekawa 2694:
1.1.1.4 ! ohara 2695: /* (a/b) effect due to sign of b: mpz/signed;
! 2696: is -1 if a and b both negative, +1 otherwise */
1.1.1.2 maekawa 2697: #define JACOBI_BSGN_ZS_BIT1(a, b) \
1.1.1.4 ! ohara 2698: JACOBI_BSGN_SZ_BIT1 (b, a)
1.1.1.2 maekawa 2699:
1.1.1.4 ! ohara 2700: /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
! 2701: is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
1.1.1.2 maekawa 2702: both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
2703: because this is used in a couple of places with only bit 1 of a or b
2704: valid. */
2705: #define JACOBI_RECIP_UU_BIT1(a, b) \
1.1.1.4 ! ohara 2706: ((int) ((a) & (b)))
! 2707:
! 2708:
! 2709: /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
! 2710: modexact_1_odd, but in either case leaving a_rem<b. b must be odd and
! 2711: unsigned. modexact_1_odd effectively calculates -a mod b, and
! 2712: result_bit1 is adjusted for the factor of -1.
! 2713:
! 2714: The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
! 2715: sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
! 2716: factor to introduce into result_bit1, so for that case use mpn_mod_1
! 2717: unconditionally.
! 2718:
! 2719: FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
! 2720: for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result,
! 2721: or not skip a divide step, or something. */
! 2722:
! 2723: #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
! 2724: do { \
! 2725: mp_srcptr __a_ptr = (a_ptr); \
! 2726: mp_size_t __a_size = (a_size); \
! 2727: mp_limb_t __b = (b); \
! 2728: \
! 2729: ASSERT (__a_size >= 1); \
! 2730: ASSERT (__b & 1); \
! 2731: \
! 2732: if ((GMP_NUMB_BITS % 2) != 0 \
! 2733: || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD)) \
! 2734: { \
! 2735: (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
! 2736: } \
! 2737: else \
! 2738: { \
! 2739: (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
! 2740: (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
! 2741: } \
! 2742: } while (0)
! 2743:
! 2744:
! 2745: /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
! 2746: limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb,
! 2747: hence giving back the user's size in bits rounded up. Notice that
! 2748: converting prec->bits->prec gives an unchanged value. */
! 2749: #define __GMPF_BITS_TO_PREC(n) \
! 2750: ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
! 2751: #define __GMPF_PREC_TO_BITS(n) \
! 2752: ((unsigned long) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
! 2753:
! 2754: extern mp_size_t __gmp_default_fp_limb_precision;
! 2755:
! 2756:
! 2757: /* Set n to the number of significant digits an mpf of the given _mp_prec
! 2758: field, in the given base. This is a rounded up value, designed to ensure
! 2759: there's enough digits to reproduce all the guaranteed part of the value.
! 2760:
! 2761: There are prec many limbs, but the high might be only "1" so forget it
! 2762: and just count prec-1 limbs into chars. +1 rounds that upwards, and a
! 2763: further +1 is because the limbs usually won't fall on digit boundaries.
! 2764:
! 2765: FIXME: If base is a power of 2 and the bits per digit divides
! 2766: BITS_PER_MP_LIMB then the +2 is unnecessary. This happens always for
! 2767: base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
! 2768:
! 2769: #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
! 2770: do { \
! 2771: ASSERT (base >= 2 && base < numberof (mp_bases)); \
! 2772: (n) = 2 + (size_t) ((((prec) - 1) * BITS_PER_MP_LIMB) \
! 2773: * mp_bases[(base)].chars_per_bit_exactly); \
! 2774: } while (0)
! 2775:
! 2776:
! 2777: #define DOPRNT_CONV_FIXED 1
! 2778: #define DOPRNT_CONV_SCIENTIFIC 2
! 2779: #define DOPRNT_CONV_GENERAL 3
! 2780:
! 2781: #define DOPRNT_JUSTIFY_NONE 0
! 2782: #define DOPRNT_JUSTIFY_LEFT 1
! 2783: #define DOPRNT_JUSTIFY_RIGHT 2
! 2784: #define DOPRNT_JUSTIFY_INTERNAL 3
! 2785:
! 2786: #define DOPRNT_SHOWBASE_YES 1
! 2787: #define DOPRNT_SHOWBASE_NO 2
! 2788: #define DOPRNT_SHOWBASE_NONZERO 3
! 2789:
! 2790: struct doprnt_params_t {
! 2791: int base; /* negative for upper case */
! 2792: int conv; /* choices above */
! 2793: const char *expfmt; /* exponent format */
! 2794: int exptimes4; /* exponent multiply by 4 */
! 2795: char fill; /* character */
! 2796: int justify; /* choices above */
! 2797: int prec; /* prec field, or -1 for all digits */
! 2798: int showbase; /* choices above */
! 2799: int showpoint; /* if radix point always shown */
! 2800: int showtrailing; /* if trailing zeros wanted */
! 2801: char sign; /* '+', ' ', or '\0' */
! 2802: int width; /* width field */
! 2803: };
! 2804:
! 2805: #if _GMP_H_HAVE_VA_LIST
! 2806:
! 2807: typedef int (*doprnt_format_t) _PROTO ((void *data, const char *fmt, va_list ap));
! 2808: typedef int (*doprnt_memory_t) _PROTO ((void *data, const char *str, size_t len));
! 2809: typedef int (*doprnt_reps_t) _PROTO ((void *data, int c, int reps));
! 2810: typedef int (*doprnt_final_t) _PROTO ((void *data));
! 2811:
! 2812: struct doprnt_funs_t {
! 2813: doprnt_format_t format;
! 2814: doprnt_memory_t memory;
! 2815: doprnt_reps_t reps;
! 2816: doprnt_final_t final; /* NULL if not required */
! 2817: };
! 2818:
! 2819: extern const struct doprnt_funs_t __gmp_fprintf_funs;
! 2820: extern const struct doprnt_funs_t __gmp_sprintf_funs;
! 2821: extern const struct doprnt_funs_t __gmp_snprintf_funs;
! 2822: extern const struct doprnt_funs_t __gmp_obstack_printf_funs;
! 2823: extern const struct doprnt_funs_t __gmp_ostream_funs;
! 2824:
! 2825: /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first
! 2826: "size" of these have been written. "alloc > size" is maintained, so
! 2827: there's room to store a '\0' at the end. "result" is where the
! 2828: application wants the final block pointer. */
! 2829: struct gmp_asprintf_t {
! 2830: char **result;
! 2831: char *buf;
! 2832: size_t size;
! 2833: size_t alloc;
! 2834: };
! 2835:
! 2836: #define GMP_ASPRINTF_T_INIT(d, output) \
! 2837: do { \
! 2838: (d).result = (output); \
! 2839: (d).alloc = 256; \
! 2840: (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
! 2841: (d).size = 0; \
! 2842: } while (0)
! 2843:
! 2844: /* If a realloc is necessary, use twice the size actually required, so as to
! 2845: avoid repeated small reallocs. */
! 2846: #define GMP_ASPRINTF_T_NEED(d, n) \
! 2847: do { \
! 2848: size_t alloc, newsize, newalloc; \
! 2849: ASSERT ((d)->alloc >= (d)->size + 1); \
! 2850: \
! 2851: alloc = (d)->alloc; \
! 2852: newsize = (d)->size + (n); \
! 2853: if (alloc <= newsize) \
! 2854: { \
! 2855: newalloc = 2*newsize; \
! 2856: (d)->alloc = newalloc; \
! 2857: (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
! 2858: alloc, newalloc, char); \
! 2859: } \
! 2860: } while (0)
! 2861:
! 2862: __GMP_DECLSPEC int __gmp_asprintf_memory _PROTO ((struct gmp_asprintf_t *d, const char *str, size_t len));
! 2863: __GMP_DECLSPEC int __gmp_asprintf_reps _PROTO ((struct gmp_asprintf_t *d, int c, int reps));
! 2864: __GMP_DECLSPEC int __gmp_asprintf_final _PROTO ((struct gmp_asprintf_t *d));
! 2865:
! 2866: /* buf is where to write the next output, and size is how much space is left
! 2867: there. If the application passed size==0 then that's what we'll have
! 2868: here, and nothing at all should be written. */
! 2869: struct gmp_snprintf_t {
! 2870: char *buf;
! 2871: size_t size;
! 2872: };
! 2873:
! 2874: /* Add the bytes printed by the call to the total retval, or bail out on an
! 2875: error. */
! 2876: #define DOPRNT_ACCUMULATE(call) \
! 2877: do { \
! 2878: int __ret; \
! 2879: __ret = call; \
! 2880: if (__ret == -1) \
! 2881: goto error; \
! 2882: retval += __ret; \
! 2883: } while (0)
! 2884: #define DOPRNT_ACCUMULATE_FUN(fun, params) \
! 2885: do { \
! 2886: ASSERT ((fun) != NULL); \
! 2887: DOPRNT_ACCUMULATE ((*(fun)) params); \
! 2888: } while (0)
! 2889:
! 2890: #define DOPRNT_FORMAT(fmt, ap) \
! 2891: DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
! 2892: #define DOPRNT_MEMORY(ptr, len) \
! 2893: DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
! 2894: #define DOPRNT_REPS(c, n) \
! 2895: DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
! 2896:
! 2897: #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str))
! 2898:
! 2899: #define DOPRNT_REPS_MAYBE(c, n) \
! 2900: do { \
! 2901: if ((n) != 0) \
! 2902: DOPRNT_REPS (c, n); \
! 2903: } while (0)
! 2904: #define DOPRNT_MEMORY_MAYBE(ptr, len) \
! 2905: do { \
! 2906: if ((len) != 0) \
! 2907: DOPRNT_MEMORY (ptr, len); \
! 2908: } while (0)
! 2909:
! 2910: __GMP_DECLSPEC int __gmp_doprnt _PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
! 2911: __GMP_DECLSPEC int __gmp_doprnt_integer _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
! 2912: __GMP_DECLSPEC int __gmp_doprnt_mpf _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, mpf_srcptr));
! 2913: /* why no __GMP_DECLSPEC here??? */
! 2914: int __gmp_replacement_vsnprintf _PROTO ((char *, size_t, const char *, va_list));
! 2915: #endif /* _GMP_H_HAVE_VA_LIST */
! 2916:
! 2917:
! 2918: typedef int (*gmp_doscan_scan_t) _PROTO ((void *, const char *, ...));
! 2919: typedef void *(*gmp_doscan_step_t) _PROTO ((void *, int));
! 2920: typedef int (*gmp_doscan_get_t) _PROTO ((void *));
! 2921: typedef int (*gmp_doscan_unget_t) _PROTO ((int, void *));
! 2922:
! 2923: struct gmp_doscan_funs_t {
! 2924: gmp_doscan_scan_t scan;
! 2925: gmp_doscan_step_t step;
! 2926: gmp_doscan_get_t get;
! 2927: gmp_doscan_unget_t unget;
! 2928: };
! 2929: extern const struct gmp_doscan_funs_t __gmp_fscanf_funs;
! 2930: extern const struct gmp_doscan_funs_t __gmp_sscanf_funs;
! 2931:
! 2932: #if _GMP_H_HAVE_VA_LIST
! 2933: int __gmp_doscan _PROTO ((const struct gmp_doscan_funs_t *, void *,
! 2934: const char *, va_list));
! 2935: #endif
1.1.1.2 maekawa 2936:
2937:
2938: /* For testing and debugging. */
1.1.1.4 ! ohara 2939: #define MPZ_CHECK_FORMAT(z) \
! 2940: do { \
! 2941: ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
! 2942: ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
! 2943: ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
! 2944: } while (0)
! 2945:
! 2946: #define MPQ_CHECK_FORMAT(q) \
! 2947: do { \
! 2948: MPZ_CHECK_FORMAT (mpq_numref (q)); \
! 2949: MPZ_CHECK_FORMAT (mpq_denref (q)); \
! 2950: ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
! 2951: \
! 2952: if (SIZ(mpq_numref(q)) == 0) \
! 2953: { \
! 2954: /* should have zero as 0/1 */ \
! 2955: ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
! 2956: && PTR(mpq_denref(q))[0] == 1); \
! 2957: } \
! 2958: else \
! 2959: { \
! 2960: /* should have no common factors */ \
! 2961: mpz_t g; \
! 2962: mpz_init (g); \
! 2963: mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
! 2964: ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
! 2965: mpz_clear (g); \
! 2966: } \
! 2967: } while (0)
! 2968:
! 2969: #define MPF_CHECK_FORMAT(f) \
! 2970: do { \
! 2971: ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
! 2972: ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
! 2973: if (SIZ(f) == 0) \
! 2974: ASSERT_ALWAYS (EXP(f) == 0); \
! 2975: if (SIZ(f) != 0) \
! 2976: ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
! 2977: } while (0)
! 2978:
! 2979:
! 2980: #define MPZ_PROVOKE_REALLOC(z) \
1.1.1.2 maekawa 2981: do { ALLOC(z) = ABSIZ(z); } while (0)
2982:
2983:
2984: #if TUNE_PROGRAM_BUILD
2985: /* Some extras wanted when recompiling some .c files for use by the tune
2986: program. Not part of a normal build. */
2987:
2988: extern mp_size_t mul_threshold[];
2989: extern mp_size_t fft_modf_mul_threshold;
2990: extern mp_size_t sqr_threshold[];
2991: extern mp_size_t fft_modf_sqr_threshold;
1.1.1.4 ! ohara 2992: extern mp_size_t sb_preinv_threshold[];
! 2993: extern mp_size_t dc_threshold[];
1.1.1.2 maekawa 2994: extern mp_size_t powm_threshold[];
2995: extern mp_size_t gcd_accel_threshold[];
2996: extern mp_size_t gcdext_threshold[];
1.1.1.4 ! ohara 2997: extern mp_size_t divrem_1_norm_threshold[];
! 2998: extern mp_size_t divrem_1_unnorm_threshold[];
! 2999: extern mp_size_t divrem_2_threshold[];
! 3000: extern mp_size_t mod_1_norm_threshold[];
! 3001: extern mp_size_t mod_1_unnorm_threshold[];
! 3002: extern mp_size_t get_str_basecase_threshold[];
! 3003: extern mp_size_t get_str_precompute_threshold[];
! 3004:
! 3005: #undef MUL_KARATSUBA_THRESHOLD
! 3006: #undef MUL_TOOM3_THRESHOLD
! 3007: #undef MUL_FFT_TABLE
! 3008: #undef MUL_FFT_THRESHOLD
! 3009: #undef MUL_FFT_MODF_THRESHOLD
! 3010: #undef SQR_BASECASE_THRESHOLD
! 3011: #undef SQR_KARATSUBA_THRESHOLD
! 3012: #undef SQR_TOOM3_THRESHOLD
! 3013: #undef SQR_FFT_TABLE
! 3014: #undef SQR_FFT_THRESHOLD
! 3015: #undef SQR_FFT_MODF_THRESHOLD
! 3016: #undef DIV_DC_THRESHOLD
1.1.1.2 maekawa 3017: #undef POWM_THRESHOLD
3018: #undef GCD_ACCEL_THRESHOLD
3019: #undef GCDEXT_THRESHOLD
1.1.1.4 ! ohara 3020: #undef DIVREM_1_NORM_THRESHOLD
! 3021: #undef DIVREM_1_UNNORM_THRESHOLD
! 3022: #undef MOD_1_NORM_THRESHOLD
! 3023: #undef MOD_1_UNNORM_THRESHOLD
! 3024: #undef GET_STR_DC_THRESHOLD
! 3025: #undef GET_STR_PRECOMPUTE_THRESHOLD
! 3026:
! 3027: #define MUL_KARATSUBA_THRESHOLD mul_threshold[0]
! 3028: #define MUL_TOOM3_THRESHOLD mul_threshold[1]
! 3029: #define MUL_FFT_TABLE { 0 }
! 3030: #define MUL_FFT_THRESHOLD mul_threshold[2]
! 3031: #define MUL_FFT_MODF_THRESHOLD fft_modf_mul_threshold
! 3032: #define SQR_BASECASE_THRESHOLD sqr_threshold[0]
! 3033: #define SQR_KARATSUBA_THRESHOLD sqr_threshold[1]
! 3034: #define SQR_TOOM3_THRESHOLD sqr_threshold[2]
! 3035: #define SQR_FFT_TABLE { 0 }
! 3036: #define SQR_FFT_THRESHOLD sqr_threshold[3]
! 3037: #define SQR_FFT_MODF_THRESHOLD fft_modf_sqr_threshold
! 3038: #define DIV_DC_THRESHOLD dc_threshold[0]
! 3039: #define POWM_THRESHOLD powm_threshold[0]
! 3040: #define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0]
! 3041: #define GCDEXT_THRESHOLD gcdext_threshold[0]
! 3042: #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold[0]
! 3043: #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold[0]
! 3044: #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold[0]
! 3045: #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold[0]
! 3046: #define GET_STR_DC_THRESHOLD get_str_basecase_threshold[0]
! 3047: #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold[0]
! 3048:
! 3049: #if ! UDIV_PREINV_ALWAYS
! 3050: #undef DIV_SB_PREINV_THRESHOLD
! 3051: #undef DIVREM_2_THRESHOLD
! 3052: #define DIV_SB_PREINV_THRESHOLD sb_preinv_threshold[0]
! 3053: #define DIVREM_2_THRESHOLD divrem_2_threshold[0]
! 3054: #endif
! 3055:
! 3056: /* Sizes the tune program tests up to, used in a couple of recompilations. */
! 3057: #define SQR_KARATSUBA_MAX_GENERIC 200
! 3058: #define MUL_TOOM3_THRESHOLD_LIMIT 700
! 3059: #define GET_STR_THRESHOLD_LIMIT 500
1.1.1.2 maekawa 3060:
3061: #undef FFT_TABLE_ATTRS
3062: #define FFT_TABLE_ATTRS
3063: extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
3064:
1.1.1.4 ! ohara 3065: #if TUNE_PROGRAM_BUILD_SQR
! 3066: #undef SQR_KARATSUBA_THRESHOLD
! 3067: #define SQR_KARATSUBA_THRESHOLD SQR_KARATSUBA_MAX_GENERIC
! 3068: #endif
! 3069:
1.1.1.2 maekawa 3070: #endif /* TUNE_PROGRAM_BUILD */
3071:
3072: #if defined (__cplusplus)
3073: }
3074: #endif
1.1.1.4 ! ohara 3075:
! 3076:
! 3077: #ifdef __cplusplus
! 3078:
! 3079: /* A little helper for a null-terminated __gmp_allocate_func string.
! 3080: The destructor ensures it's freed even if an exception is thrown. */
! 3081: class gmp_allocated_string {
! 3082: public:
! 3083: char *str;
! 3084: gmp_allocated_string(char *arg) { str = arg; }
! 3085: ~gmp_allocated_string() { (*__gmp_free_func) (str, strlen(str)+1); }
! 3086: };
! 3087:
! 3088: int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
! 3089: void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
! 3090: void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
! 3091: std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
! 3092: extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat;
! 3093:
! 3094: #endif /* __cplusplus */
! 3095:
! 3096: #endif /* __GMP_IMPL_H__ */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>