Annotation of OpenXM_contrib/gmp/gmp-impl.h, Revision 1.1.1.2
1.1 maekawa 1: /* Include file for internal GNU MP types and definitions.
2:
1.1.1.2 ! maekawa 3: THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
! 4: BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
! 5:
! 6: Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
! 7: Foundation, Inc.
1.1 maekawa 8:
9: This file is part of the GNU MP Library.
10:
11: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 ! maekawa 12: it under the terms of the GNU Lesser General Public License as published by
! 13: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 14: option) any later version.
15:
16: The GNU MP Library is distributed in the hope that it will be useful, but
17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 ! maekawa 18: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 19: License for more details.
20:
1.1.1.2 ! maekawa 21: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 22: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24: MA 02111-1307, USA. */
25:
1.1.1.2 ! maekawa 26: #include "config.h"
! 27: #include "gmp-mparam.h"
! 28: /* #include "longlong.h" */
! 29:
1.1 maekawa 30: /* When using gcc, make sure to use its builtin alloca. */
31: #if ! defined (alloca) && defined (__GNUC__)
32: #define alloca __builtin_alloca
33: #define HAVE_ALLOCA
34: #endif
35:
36: /* When using cc, do whatever necessary to allow use of alloca. For many
37: machines, this means including alloca.h. IBM's compilers need a #pragma
38: in "each module that needs to use alloca". */
39: #if ! defined (alloca)
40: /* We need lots of variants for MIPS, to cover all versions and perversions
41: of OSes for MIPS. */
42: #if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
43: || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
44: || defined (__alpha) || defined (__sparc) || defined (sparc) \
45: || defined (__ksr__)
46: #include <alloca.h>
47: #define HAVE_ALLOCA
48: #endif
49: #if defined (_IBMR2)
50: #pragma alloca
51: #define HAVE_ALLOCA
52: #endif
53: #if defined (__DECC)
54: #define alloca(x) __ALLOCA(x)
55: #define HAVE_ALLOCA
56: #endif
57: #endif
58:
1.1.1.2 ! maekawa 59: #if defined (alloca)
! 60: #define HAVE_ALLOCA
! 61: #endif
! 62:
1.1 maekawa 63: #if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
64: #include "stack-alloc.h"
65: #else
66: #define TMP_DECL(m)
67: #define TMP_ALLOC(x) alloca(x)
68: #define TMP_MARK(m)
69: #define TMP_FREE(m)
70: #endif
71:
1.1.1.2 ! maekawa 72: /* Allocating various types. */
! 73: #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
! 74: #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
! 75: #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
! 76:
1.1 maekawa 77:
1.1.1.2 ! maekawa 78: #if ! defined (__GNUC__) /* FIXME: Test for C++ compilers here,
! 79: __DECC understands __inline */
1.1 maekawa 80: #define inline /* Empty */
81: #endif
82:
83: #define ABS(x) (x >= 0 ? x : -x)
84: #define MIN(l,o) ((l) < (o) ? (l) : (o))
85: #define MAX(h,i) ((h) > (i) ? (h) : (i))
1.1.1.2 ! maekawa 86: #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
1.1 maekawa 87:
88: /* Field access macros. */
89: #define SIZ(x) ((x)->_mp_size)
90: #define ABSIZ(x) ABS (SIZ (x))
91: #define PTR(x) ((x)->_mp_d)
1.1.1.2 ! maekawa 92: #define LIMBS(x) ((x)->_mp_d)
1.1 maekawa 93: #define EXP(x) ((x)->_mp_exp)
94: #define PREC(x) ((x)->_mp_prec)
95: #define ALLOC(x) ((x)->_mp_alloc)
96:
1.1.1.2 ! maekawa 97: /* Extra casts because shorts are promoted to ints by "~" and "<<". "-1"
! 98: rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers
! 99: about arithmetic overflow. */
! 100: #define UNSIGNED_TYPE_MAX(type) ((type) ~ (type) 0)
! 101: #define UNSIGNED_TYPE_HIGHBIT(type) ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1))
! 102: #define SIGNED_TYPE_MIN(type) (((type) -1) << (8*sizeof(type)-1))
! 103: #define SIGNED_TYPE_MAX(type) ((type) ~ SIGNED_TYPE_MIN(type))
! 104: #define SIGNED_TYPE_HIGHBIT(type) SIGNED_TYPE_MIN(type)
! 105:
! 106: #define MP_LIMB_T_MAX UNSIGNED_TYPE_MAX (mp_limb_t)
! 107: #define MP_LIMB_T_HIGHBIT UNSIGNED_TYPE_HIGHBIT (mp_limb_t)
! 108:
! 109: #define MP_SIZE_T_MAX SIGNED_TYPE_MAX (mp_size_t)
! 110:
! 111: #ifndef ULONG_MAX
! 112: #define ULONG_MAX UNSIGNED_TYPE_MAX (unsigned long)
! 113: #endif
! 114: #define ULONG_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned long)
! 115: #define LONG_HIGHBIT SIGNED_TYPE_HIGHBIT (long)
! 116: #ifndef LONG_MAX
! 117: #define LONG_MAX SIGNED_TYPE_MAX (long)
! 118: #endif
! 119:
! 120: #ifndef USHORT_MAX
! 121: #define USHORT_MAX UNSIGNED_TYPE_MAX (unsigned short)
! 122: #endif
! 123: #define USHORT_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned short)
! 124: #define SHORT_HIGHBIT SIGNED_TYPE_HIGHBIT (short)
! 125: #ifndef SHORT_MAX
! 126: #define SHORT_MAX SIGNED_TYPE_MAX (short)
! 127: #endif
! 128:
! 129:
! 130: /* Swap macros. */
! 131:
! 132: #define MP_LIMB_T_SWAP(x, y) \
! 133: do { \
! 134: mp_limb_t __mp_limb_t_swap__tmp = (x); \
! 135: (x) = (y); \
! 136: (y) = __mp_limb_t_swap__tmp; \
! 137: } while (0)
! 138: #define MP_SIZE_T_SWAP(x, y) \
! 139: do { \
! 140: mp_size_t __mp_size_t_swap__tmp = (x); \
! 141: (x) = (y); \
! 142: (y) = __mp_size_t_swap__tmp; \
! 143: } while (0)
! 144:
! 145: #define MP_PTR_SWAP(x, y) \
! 146: do { \
! 147: mp_ptr __mp_ptr_swap__tmp = (x); \
! 148: (x) = (y); \
! 149: (y) = __mp_ptr_swap__tmp; \
! 150: } while (0)
! 151: #define MP_SRCPTR_SWAP(x, y) \
! 152: do { \
! 153: mp_srcptr __mp_srcptr_swap__tmp = (x); \
! 154: (x) = (y); \
! 155: (y) = __mp_srcptr_swap__tmp; \
! 156: } while (0)
! 157:
! 158: #define MPN_PTR_SWAP(xp,xs, yp,ys) \
! 159: do { \
! 160: MP_PTR_SWAP (xp, yp); \
! 161: MP_SIZE_T_SWAP (xs, ys); \
! 162: } while(0)
! 163: #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
! 164: do { \
! 165: MP_SRCPTR_SWAP (xp, yp); \
! 166: MP_SIZE_T_SWAP (xs, ys); \
! 167: } while(0)
! 168:
! 169: #define MPZ_PTR_SWAP(x, y) \
! 170: do { \
! 171: mpz_ptr __mpz_ptr_swap__tmp = (x); \
! 172: (x) = (y); \
! 173: (y) = __mpz_ptr_swap__tmp; \
! 174: } while (0)
! 175: #define MPZ_SRCPTR_SWAP(x, y) \
! 176: do { \
! 177: mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
! 178: (x) = (y); \
! 179: (y) = __mpz_srcptr_swap__tmp; \
! 180: } while (0)
! 181:
! 182:
! 183: #if defined (__cplusplus)
! 184: extern "C" {
! 185: #endif
! 186:
! 187: /* FIXME: These are purely internal, so do a search and replace to change
! 188: them to __gmp forms, rather than using these macros. */
! 189: #define _mp_allocate_func __gmp_allocate_func
! 190: #define _mp_reallocate_func __gmp_reallocate_func
! 191: #define _mp_free_func __gmp_free_func
! 192: #define _mp_default_allocate __gmp_default_allocate
! 193: #define _mp_default_reallocate __gmp_default_reallocate
! 194: #define _mp_default_free __gmp_default_free
! 195:
! 196: extern void * (*_mp_allocate_func) _PROTO ((size_t));
! 197: extern void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t));
! 198: extern void (*_mp_free_func) _PROTO ((void *, size_t));
! 199:
! 200: void *_mp_default_allocate _PROTO ((size_t));
! 201: void *_mp_default_reallocate _PROTO ((void *, size_t, size_t));
! 202: void _mp_default_free _PROTO ((void *, size_t));
! 203:
! 204: #define _MP_ALLOCATE_FUNC_TYPE(n,type) \
! 205: ((type *) (*_mp_allocate_func) ((n) * sizeof (type)))
! 206: #define _MP_ALLOCATE_FUNC_LIMBS(n) _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t)
! 207:
! 208: #define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type))
! 209: #define _MP_FREE_FUNC_LIMBS(p,n) _MP_FREE_FUNC_TYPE(p,n,mp_limb_t)
1.1 maekawa 210:
1.1.1.2 ! maekawa 211:
! 212: #if (__STDC__-0) || defined (__cplusplus)
1.1 maekawa 213:
214: #else
215:
216: #define const /* Empty */
217: #define signed /* Empty */
218:
1.1.1.2 ! maekawa 219: #endif
1.1 maekawa 220:
1.1.1.2 ! maekawa 221: #if defined (__GNUC__) && defined (__i386__)
! 222: #if 0 /* check that these actually improve things */
! 223: #define MPN_COPY_INCR(DST, SRC, N) \
! 224: __asm__ ("cld\n\trep\n\tmovsl" : : \
! 225: "D" (DST), "S" (SRC), "c" (N) : \
! 226: "cx", "di", "si", "memory")
! 227: #define MPN_COPY_DECR(DST, SRC, N) \
! 228: __asm__ ("std\n\trep\n\tmovsl" : : \
! 229: "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
! 230: "cx", "di", "si", "memory")
! 231: #define MPN_NORMALIZE_NOT_ZERO(P, N) \
! 232: do { \
! 233: __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \
! 234: "a" (0), "D" ((P) + (N) - 1), "0" (N) : \
! 235: "cx", "di"); \
! 236: (N)++; \
! 237: } while (0)
! 238: #endif
! 239: #endif
1.1 maekawa 240:
1.1.1.2 ! maekawa 241: #if HAVE_NATIVE_mpn_copyi
! 242: #define mpn_copyi __MPN(copyi)
! 243: void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1.1 maekawa 244: #endif
245:
1.1.1.2 ! maekawa 246: /* Remap names of internal mpn functions. */
! 247: #define __clz_tab __MPN(clz_tab)
! 248: #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
! 249: #define mpn_reciprocal __MPN(reciprocal)
! 250:
! 251: #define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
! 252: #define mpn_bz_divrem_n __MPN(bz_divrem_n)
! 253: #define mpn_tdiv_qr __MPN(tdiv_qr)
! 254: /* #define mpn_tdiv_q __MPN(tdiv_q) */
! 255:
! 256: #define mpn_kara_mul_n __MPN(kara_mul_n)
! 257: void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
! 258:
! 259: #define mpn_kara_sqr_n __MPN(kara_sqr_n)
! 260: void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
! 261:
! 262: #define mpn_toom3_mul_n __MPN(toom3_mul_n)
! 263: void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
! 264:
! 265: #define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
! 266: void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
! 267:
! 268: #define mpn_fft_best_k __MPN(fft_best_k)
! 269: int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr));
! 270:
! 271: #define mpn_mul_fft __MPN(mul_fft)
! 272: void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
! 273: mp_srcptr n, mp_size_t nl,
! 274: mp_srcptr m, mp_size_t ml,
! 275: int k));
! 276:
! 277: #define mpn_mul_fft_full __MPN(mul_fft_full)
! 278: void mpn_mul_fft_full _PROTO ((mp_ptr op,
! 279: mp_srcptr n, mp_size_t nl,
! 280: mp_srcptr m, mp_size_t ml));
! 281:
! 282: #define mpn_fft_next_size __MPN(fft_next_size)
! 283: mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k));
! 284:
! 285: mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
! 286: mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
! 287: void mpn_tdiv_qr _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
! 288: /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
! 289:
! 290: /* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed. */
! 291: #ifndef MPN_COPY_INCR
! 292: #if HAVE_NATIVE_mpn_copyi
! 293: #define MPN_COPY_INCR(DST, SRC, NLIMBS) mpn_copyi (DST, SRC, NLIMBS)
! 294: #else
1.1 maekawa 295: #define MPN_COPY_INCR(DST, SRC, NLIMBS) \
296: do { \
297: mp_size_t __i; \
298: for (__i = 0; __i < (NLIMBS); __i++) \
299: (DST)[__i] = (SRC)[__i]; \
300: } while (0)
1.1.1.2 ! maekawa 301: #endif
! 302: #endif
! 303:
! 304: #if HAVE_NATIVE_mpn_copyd
! 305: #define mpn_copyd __MPN(copyd)
! 306: void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
! 307: #endif
! 308:
! 309: /* NLIMBS==0 allowed */
! 310: #ifndef MPN_COPY_DECR
! 311: #if HAVE_NATIVE_mpn_copyd
! 312: #define MPN_COPY_DECR(DST, SRC, NLIMBS) mpn_copyd (DST, SRC, NLIMBS)
! 313: #else
1.1 maekawa 314: #define MPN_COPY_DECR(DST, SRC, NLIMBS) \
315: do { \
316: mp_size_t __i; \
317: for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
318: (DST)[__i] = (SRC)[__i]; \
319: } while (0)
1.1.1.2 ! maekawa 320: #endif
! 321: #endif
! 322:
! 323: /* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro,
! 324: rely on function inlining. */
! 325: #if defined (_CRAY) || defined (__uxp__)
! 326: static inline void
! 327: _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
! 328: {
! 329: int i; /* Faster for Cray with plain int */
! 330: #pragma _CRI ivdep /* Cray PVP systems */
! 331: #pragma loop noalias d,s /* Fujitsu VPP systems */
! 332: for (i = 0; i < n; i++)
! 333: d[i] = s[i];
! 334: }
! 335: #define MPN_COPY _MPN_COPY
! 336: #endif
! 337:
! 338: #ifndef MPN_COPY
1.1 maekawa 339: #define MPN_COPY MPN_COPY_INCR
1.1.1.2 ! maekawa 340: #endif
1.1 maekawa 341:
342: /* Zero NLIMBS *limbs* AT DST. */
1.1.1.2 ! maekawa 343: #ifndef MPN_ZERO
1.1 maekawa 344: #define MPN_ZERO(DST, NLIMBS) \
345: do { \
346: mp_size_t __i; \
347: for (__i = 0; __i < (NLIMBS); __i++) \
348: (DST)[__i] = 0; \
349: } while (0)
1.1.1.2 ! maekawa 350: #endif
1.1 maekawa 351:
1.1.1.2 ! maekawa 352: #ifndef MPN_NORMALIZE
1.1 maekawa 353: #define MPN_NORMALIZE(DST, NLIMBS) \
354: do { \
355: while (NLIMBS > 0) \
356: { \
357: if ((DST)[(NLIMBS) - 1] != 0) \
358: break; \
359: NLIMBS--; \
360: } \
361: } while (0)
1.1.1.2 ! maekawa 362: #endif
! 363: #ifndef MPN_NORMALIZE_NOT_ZERO
1.1 maekawa 364: #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
365: do { \
366: while (1) \
367: { \
368: if ((DST)[(NLIMBS) - 1] != 0) \
369: break; \
370: NLIMBS--; \
371: } \
372: } while (0)
1.1.1.2 ! maekawa 373: #endif
1.1 maekawa 374:
1.1.1.2 ! maekawa 375: /* Strip least significant zero limbs from ptr,size by incrementing ptr and
! 376: decrementing size. The number in ptr,size must be non-zero, ie. size!=0
! 377: and somewhere a non-zero limb. */
! 378: #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \
! 379: do \
! 380: { \
! 381: ASSERT ((size) != 0); \
! 382: while ((ptr)[0] == 0) \
! 383: { \
! 384: (ptr)++; \
! 385: (size)--; \
! 386: ASSERT (size >= 0); \
! 387: } \
! 388: } \
! 389: while (0)
! 390:
! 391: /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
! 392: temporary variable; it will be automatically cleared out at function
! 393: return. We use __x here to make it possible to accept both mpz_ptr and
! 394: mpz_t arguments. */
1.1 maekawa 395: #define MPZ_TMP_INIT(X, NLIMBS) \
396: do { \
397: mpz_ptr __x = (X); \
398: __x->_mp_alloc = (NLIMBS); \
399: __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
400: } while (0)
401:
1.1.1.2 ! maekawa 402: /* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs. */
! 403: #define MPZ_REALLOC(what,needed) \
! 404: do { \
! 405: if ((needed) > ALLOC (what)) \
! 406: _mpz_realloc (what, needed); \
! 407: } while (0)
! 408:
! 409: /* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
! 410: value which is good on most machines. */
! 411: #ifndef KARATSUBA_MUL_THRESHOLD
! 412: #define KARATSUBA_MUL_THRESHOLD 32
! 413: #endif
! 414:
! 415: /* If TOOM3_MUL_THRESHOLD is not already defined, define it to a
! 416: value which is good on most machines. */
! 417: #ifndef TOOM3_MUL_THRESHOLD
! 418: #define TOOM3_MUL_THRESHOLD 256
! 419: #endif
! 420:
! 421: #ifndef KARATSUBA_SQR_THRESHOLD
! 422: #define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
! 423: #endif
! 424:
! 425: #ifndef TOOM3_SQR_THRESHOLD
! 426: #define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD)
! 427: #endif
! 428:
! 429: /* First k to use for an FFT modF multiply. A modF FFT is an order
! 430: log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
! 431: whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
! 432: #define FFT_FIRST_K 4
! 433:
! 434: /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
! 435: #ifndef FFT_MODF_MUL_THRESHOLD
! 436: #define FFT_MODF_MUL_THRESHOLD (TOOM3_MUL_THRESHOLD * 3)
! 437: #endif
! 438: #ifndef FFT_MODF_SQR_THRESHOLD
! 439: #define FFT_MODF_SQR_THRESHOLD (TOOM3_SQR_THRESHOLD * 3)
! 440: #endif
! 441:
! 442: /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
! 443: will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
! 444: NxN->2N multiply and not recursing into itself is an order
! 445: log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
! 446: is the first better than toom3. */
! 447: #ifndef FFT_MUL_THRESHOLD
! 448: #define FFT_MUL_THRESHOLD (FFT_MODF_MUL_THRESHOLD * 10)
! 449: #endif
! 450: #ifndef FFT_SQR_THRESHOLD
! 451: #define FFT_SQR_THRESHOLD (FFT_MODF_SQR_THRESHOLD * 10)
! 452: #endif
! 453:
! 454: /* Table of thresholds for successive modF FFT "k"s. The first entry is
! 455: where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
! 456: etc. See mpn_fft_best_k(). */
! 457: #ifndef FFT_MUL_TABLE
! 458: #define FFT_MUL_TABLE \
! 459: { TOOM3_MUL_THRESHOLD * 4, /* k=5 */ \
! 460: TOOM3_MUL_THRESHOLD * 8, /* k=6 */ \
! 461: TOOM3_MUL_THRESHOLD * 16, /* k=7 */ \
! 462: TOOM3_MUL_THRESHOLD * 32, /* k=8 */ \
! 463: TOOM3_MUL_THRESHOLD * 96, /* k=9 */ \
! 464: TOOM3_MUL_THRESHOLD * 288, /* k=10 */ \
! 465: 0 }
! 466: #endif
! 467: #ifndef FFT_SQR_TABLE
! 468: #define FFT_SQR_TABLE \
! 469: { TOOM3_SQR_THRESHOLD * 4, /* k=5 */ \
! 470: TOOM3_SQR_THRESHOLD * 8, /* k=6 */ \
! 471: TOOM3_SQR_THRESHOLD * 16, /* k=7 */ \
! 472: TOOM3_SQR_THRESHOLD * 32, /* k=8 */ \
! 473: TOOM3_SQR_THRESHOLD * 96, /* k=9 */ \
! 474: TOOM3_SQR_THRESHOLD * 288, /* k=10 */ \
! 475: 0 }
! 476: #endif
! 477:
! 478: #ifndef FFT_TABLE_ATTRS
! 479: #define FFT_TABLE_ATTRS static const
! 480: #endif
! 481:
! 482: #define MPN_FFT_TABLE_SIZE 16
! 483:
! 484:
! 485: /* Return non-zero if xp,xsize and yp,ysize overlap.
! 486: If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
! 487: overlap. If both these are false, there's an overlap. */
! 488: #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
! 489: ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
! 490:
! 491:
! 492: /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
! 493: ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
! 494: does it always. Generally assertions are meant for development, but
! 495: might help when looking for a problem later too.
! 496:
! 497: ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if
! 498: assertion checking is disabled, the expression is still evaluated. This
! 499: is meant for use with routines like mpn_add_n() where the return value
! 500: represents a carry or whatever that shouldn't occur. For example,
! 501: ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
! 502:
! 503: #ifdef __LINE__
! 504: #define ASSERT_LINE __LINE__
! 505: #else
! 506: #define ASSERT_LINE -1
! 507: #endif
! 508:
! 509: #ifdef __FILE__
! 510: #define ASSERT_FILE __FILE__
! 511: #else
! 512: #define ASSERT_FILE ""
! 513: #endif
! 514:
! 515: int __gmp_assert_fail _PROTO((const char *filename, int linenum,
! 516: const char *expr));
! 517:
! 518: #if HAVE_STRINGIZE
! 519: #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
! 520: #else
! 521: #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
! 522: #endif
! 523:
! 524: #if HAVE_VOID
! 525: #define CAST_TO_VOID (void)
! 526: #else
! 527: #define CAST_TO_VOID
! 528: #endif
! 529:
! 530: #define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr))
! 531:
! 532: #if WANT_ASSERT
! 533: #define ASSERT(expr) ASSERT_ALWAYS (expr)
! 534: #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
! 535:
! 536: #else
! 537: #define ASSERT(expr) (CAST_TO_VOID 0)
! 538: #define ASSERT_NOCARRY(expr) (expr)
! 539: #endif
! 540:
! 541:
! 542: #if HAVE_NATIVE_mpn_com_n
! 543: #define mpn_com_n __MPN(com_n)
! 544: void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
! 545: #else
! 546: #define mpn_com_n(d,s,n) \
! 547: do \
! 548: { \
! 549: mp_ptr __d = (d); \
! 550: mp_srcptr __s = (s); \
! 551: mp_size_t __n = (n); \
! 552: do \
! 553: *__d++ = *__s++; \
! 554: while (--__n); \
! 555: } \
! 556: while (0)
! 557: #endif
! 558:
! 559: #define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op) \
! 560: do \
! 561: { \
! 562: mp_ptr __d = (d); \
! 563: mp_srcptr __s1 = (s1); \
! 564: mp_srcptr __s2 = (s2); \
! 565: mp_size_t __n = (n); \
! 566: do \
! 567: *__d++ = dop (*__s1++ op s2op *__s2++); \
! 568: while (--__n); \
! 569: } \
! 570: while (0)
! 571:
! 572: #if HAVE_NATIVE_mpn_and_n
! 573: #define mpn_and_n __MPN(and_n)
! 574: void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
! 575: #else
! 576: #define mpn_and_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, )
! 577: #endif
! 578:
! 579: #if HAVE_NATIVE_mpn_andn_n
! 580: #define mpn_andn_n __MPN(andn_n)
! 581: void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
! 582: #else
! 583: #define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~)
! 584: #endif
! 585:
! 586: #if HAVE_NATIVE_mpn_nand_n
! 587: #define mpn_nand_n __MPN(nand_n)
! 588: void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
! 589: #else
! 590: #define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, )
! 591: #endif
! 592:
! 593: #if HAVE_NATIVE_mpn_ior_n
! 594: #define mpn_ior_n __MPN(ior_n)
! 595: void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
! 596: #else
! 597: #define mpn_ior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, )
! 598: #endif
! 599:
! 600: #if HAVE_NATIVE_mpn_iorn_n
! 601: #define mpn_iorn_n __MPN(iorn_n)
! 602: void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
! 603: #else
! 604: #define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~)
! 605: #endif
! 606:
! 607: #if HAVE_NATIVE_mpn_nior_n
! 608: #define mpn_nior_n __MPN(nior_n)
! 609: void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
! 610: #else
! 611: #define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, )
! 612: #endif
! 613:
! 614: #if HAVE_NATIVE_mpn_xor_n
! 615: #define mpn_xor_n __MPN(xor_n)
! 616: void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
! 617: #else
! 618: #define mpn_xor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, )
! 619: #endif
! 620:
! 621: #if HAVE_NATIVE_mpn_xnor_n
! 622: #define mpn_xnor_n __MPN(xnor_n)
! 623: void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
! 624: #else
! 625: #define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, )
! 626: #endif
1.1 maekawa 627:
628: /* Structure for conversion between internal binary format and
629: strings in base 2..36. */
630: struct bases
631: {
632: /* Number of digits in the conversion base that always fits in an mp_limb_t.
633: For example, for base 10 on a machine where a mp_limb_t has 32 bits this
634: is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
635: int chars_per_limb;
636:
637: /* log(2)/log(conversion_base) */
1.1.1.2 ! maekawa 638: double chars_per_bit_exactly;
1.1 maekawa 639:
640: /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
641: factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
642: i.e. the number of bits used to represent each digit in the base. */
643: mp_limb_t big_base;
644:
645: /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
646: fixed-point number. Instead of dividing by big_base an application can
647: choose to multiply by big_base_inverted. */
648: mp_limb_t big_base_inverted;
649: };
650:
1.1.1.2 ! maekawa 651: #define __mp_bases __MPN(mp_bases)
1.1 maekawa 652: extern const struct bases __mp_bases[];
653: extern mp_size_t __gmp_default_fp_limb_precision;
654:
1.1.1.2 ! maekawa 655: #if defined (__i386__)
! 656: #define TARGET_REGISTER_STARVED 1
! 657: #else
! 658: #define TARGET_REGISTER_STARVED 0
! 659: #endif
! 660:
! 661: /* Use a library function for invert_limb, if available. */
! 662: #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
! 663: #define mpn_invert_limb __MPN(invert_limb)
! 664: mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
! 665: #define invert_limb(invxl,xl) (invxl = __MPN(invert_limb) (xl))
! 666: #endif
! 667:
! 668: #ifndef invert_limb
! 669: #define invert_limb(invxl,xl) \
! 670: do { \
! 671: mp_limb_t dummy; \
! 672: if (xl << 1 == 0) \
! 673: invxl = ~(mp_limb_t) 0; \
! 674: else \
! 675: udiv_qrnnd (invxl, dummy, -xl, 0, xl); \
! 676: } while (0)
! 677: #endif
! 678:
1.1 maekawa 679: /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
680: limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
681: If this would yield overflow, DI should be the largest possible number
682: (i.e., only ones). For correct operation, the most significant bit of D
683: has to be set. Put the quotient in Q and the remainder in R. */
684: #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
685: do { \
686: mp_limb_t _q, _ql, _r; \
687: mp_limb_t _xh, _xl; \
688: umul_ppmm (_q, _ql, (nh), (di)); \
689: _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\
690: umul_ppmm (_xh, _xl, _q, (d)); \
691: sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
692: if (_xh != 0) \
693: { \
694: sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
695: _q += 1; \
696: if (_xh != 0) \
697: { \
698: sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
699: _q += 1; \
700: } \
701: } \
702: if (_r >= (d)) \
703: { \
704: _r -= (d); \
705: _q += 1; \
706: } \
707: (r) = _r; \
708: (q) = _q; \
709: } while (0)
710: /* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
711: so that its most significant bit is set. LGUP is ceil(log2(D)). */
712: #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
713: do { \
1.1.1.2 ! maekawa 714: mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
1.1 maekawa 715: mp_limb_t _xh, _xl; \
1.1.1.2 ! maekawa 716: _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
! 717: _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
! 718: _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
! 719: _nadj = _n10 + (_n1 & (dnorm)); \
! 720: umul_ppmm (_xh, _xl, di, _n2 - _n1); \
! 721: add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
! 722: _q1 = ~(_n2 + _xh); \
! 723: umul_ppmm (_xh, _xl, _q1, d); \
1.1 maekawa 724: add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
725: _xh -= (d); \
726: (r) = _xl + ((d) & _xh); \
1.1.1.2 ! maekawa 727: (q) = _xh - _q1; \
1.1 maekawa 728: } while (0)
729: /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
730: version to use. */
731: #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
732: do { \
1.1.1.2 ! maekawa 733: mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
1.1 maekawa 734: mp_limb_t _xh, _xl; \
1.1.1.2 ! maekawa 735: _n2 = (nh); \
! 736: _n10 = (nl); \
! 737: _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
! 738: _nadj = _n10 + (_n1 & (d)); \
! 739: umul_ppmm (_xh, _xl, di, _n2 - _n1); \
! 740: add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
! 741: _q1 = ~(_n2 + _xh); \
! 742: umul_ppmm (_xh, _xl, _q1, d); \
1.1 maekawa 743: add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
744: _xh -= (d); \
745: (r) = _xl + ((d) & _xh); \
1.1.1.2 ! maekawa 746: (q) = _xh - _q1; \
1.1 maekawa 747: } while (0)
748:
1.1.1.2 ! maekawa 749:
! 750: /* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo
! 751: 2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB.
! 752: "n" must be odd (otherwise such an inverse doesn't exist).
! 753:
! 754: This is not to be confused with invert_limb(), which is completely
! 755: different.
! 756:
! 757: The table lookup gives an inverse with the low 8 bits valid, and each
! 758: multiply step doubles the number of bits. See Jebelean's exact division
! 759: paper, end of section 4 (reference in gmp.texi). */
! 760:
! 761: #define modlimb_invert_table __gmp_modlimb_invert_table
! 762: extern const unsigned char modlimb_invert_table[128];
! 763:
! 764: #if BITS_PER_MP_LIMB <= 32
! 765: #define modlimb_invert(inv,n) \
! 766: do { \
! 767: mp_limb_t __n = (n); \
! 768: mp_limb_t __inv; \
! 769: ASSERT ((__n & 1) == 1); \
! 770: __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
! 771: __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
! 772: __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
! 773: ASSERT (__inv * __n == 1); \
! 774: (inv) = __inv; \
! 775: } while (0)
! 776: #endif
! 777:
! 778: #if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
! 779: #define modlimb_invert(inv,n) \
! 780: do { \
! 781: mp_limb_t __n = (n); \
! 782: mp_limb_t __inv; \
! 783: ASSERT ((__n & 1) == 1); \
! 784: __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
! 785: __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
! 786: __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
! 787: __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \
! 788: ASSERT (__inv * __n == 1); \
! 789: (inv) = __inv; \
! 790: } while (0)
! 791: #endif
! 792:
! 793:
! 794: /* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
! 795: between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
! 796: until then. */
! 797: #if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__)
1.1 maekawa 798: /* Define stuff for longlong.h. */
799: typedef unsigned int UQItype __attribute__ ((mode (QI)));
1.1.1.2 ! maekawa 800: typedef int SItype __attribute__ ((mode (SI)));
1.1 maekawa 801: typedef unsigned int USItype __attribute__ ((mode (SI)));
802: typedef int DItype __attribute__ ((mode (DI)));
803: typedef unsigned int UDItype __attribute__ ((mode (DI)));
804: #else
805: typedef unsigned char UQItype;
1.1.1.2 ! maekawa 806: typedef long SItype;
1.1 maekawa 807: typedef unsigned long USItype;
1.1.1.2 ! maekawa 808: #if defined _LONGLONG || defined _LONG_LONG_LIMB
! 809: typedef long long int DItype;
! 810: typedef unsigned long long int UDItype;
! 811: #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
! 812: typedef long int DItype;
! 813: typedef unsigned long int UDItype;
! 814: #endif
1.1 maekawa 815: #endif
816:
817: typedef mp_limb_t UWtype;
818: typedef unsigned int UHWtype;
819: #define W_TYPE_SIZE BITS_PER_MP_LIMB
820:
821: /* Define ieee_double_extract and _GMP_IEEE_FLOATS. */
822:
1.1.1.2 ! maekawa 823: #if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__)))
! 824: /* Special case for little endian ARM since floats remain in big-endian. */
! 825: #define _GMP_IEEE_FLOATS 1
! 826: union ieee_double_extract
! 827: {
! 828: struct
! 829: {
! 830: unsigned int manh:20;
! 831: unsigned int exp:11;
! 832: unsigned int sig:1;
! 833: unsigned int manl:32;
! 834: } s;
! 835: double d;
! 836: };
! 837: #else
1.1 maekawa 838: #if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \
839: || defined (__alpha) \
840: || defined (__clipper__) \
841: || defined (__cris) \
842: || defined (__i386__) \
843: || defined (__i860__) \
844: || defined (__i960__) \
845: || defined (MIPSEL) || defined (_MIPSEL) \
846: || defined (__ns32000__) \
847: || defined (__WINNT) || defined (_WIN32)
848: #define _GMP_IEEE_FLOATS 1
849: union ieee_double_extract
850: {
851: struct
852: {
853: unsigned int manl:32;
854: unsigned int manh:20;
855: unsigned int exp:11;
856: unsigned int sig:1;
857: } s;
858: double d;
859: };
860: #else /* Need this as an #else since the tests aren't made exclusive. */
1.1.1.2 ! maekawa 861: #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \
1.1 maekawa 862: || defined (__a29k__) || defined (_AM29K) \
863: || defined (__arm__) \
864: || (defined (__convex__) && defined (_IEEE_FLOAT_)) \
1.1.1.2 ! maekawa 865: || defined (_CRAYMPP) \
1.1 maekawa 866: || defined (__i370__) || defined (__mvs__) \
1.1.1.2 ! maekawa 867: || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
1.1 maekawa 868: || defined(mc68020) \
869: || defined (__m88000__) \
870: || defined (MIPSEB) || defined (_MIPSEB) \
1.1.1.2 ! maekawa 871: || defined (__hppa) || defined (__hppa__) \
1.1 maekawa 872: || defined (__pyr__) \
873: || defined (__ibm032__) \
874: || defined (_IBMR2) || defined (_ARCH_PPC) \
875: || defined (__sh__) \
876: || defined (__sparc) || defined (sparc) \
877: || defined (__we32k__)
878: #define _GMP_IEEE_FLOATS 1
879: union ieee_double_extract
880: {
881: struct
882: {
883: unsigned int sig:1;
884: unsigned int exp:11;
885: unsigned int manh:20;
886: unsigned int manl:32;
887: } s;
888: double d;
889: };
890: #endif
891: #endif
1.1.1.2 ! maekawa 892: #endif
1.1 maekawa 893:
1.1.1.2 ! maekawa 894: /* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on
! 895: SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0,
! 896: presumably due to treating the mp_limb_t constant as signed rather than
! 897: unsigned. */
! 898: #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2)))
1.1 maekawa 899: #if BITS_PER_MP_LIMB == 64
900: #define LIMBS_PER_DOUBLE 2
901: #else
902: #define LIMBS_PER_DOUBLE 3
903: #endif
904:
905: double __gmp_scale2 _PROTO ((double, int));
1.1.1.2 ! maekawa 906: int __gmp_extract_double _PROTO ((mp_ptr, double));
! 907:
! 908: extern int __gmp_junk;
! 909: extern const int __gmp_0;
! 910: #define GMP_ERROR(code) (gmp_errno |= (code), __gmp_junk = 10/__gmp_0)
! 911: #define DIVIDE_BY_ZERO GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO)
! 912: #define SQRT_OF_NEGATIVE GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE)
! 913:
! 914: #if defined _LONG_LONG_LIMB
! 915: #if defined (__STDC__)
! 916: #define CNST_LIMB(C) C##LL
! 917: #else
! 918: #define CNST_LIMB(C) C/**/LL
! 919: #endif
! 920: #else /* not _LONG_LONG_LIMB */
! 921: #if defined (__STDC__)
! 922: #define CNST_LIMB(C) C##L
! 923: #else
! 924: #define CNST_LIMB(C) C/**/L
! 925: #endif
! 926: #endif /* _LONG_LONG_LIMB */
! 927:
! 928: /*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
! 929: #if BITS_PER_MP_LIMB == 32
! 930: #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
! 931: #define PP_INVERTED 0x53E5645CL
! 932: #define PP_MAXPRIME 29
! 933: #define PP_MASK 0x208A28A8L
! 934: #endif
! 935:
! 936: #if BITS_PER_MP_LIMB == 64
! 937: #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
! 938: #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
! 939: #define PP_MAXPRIME 53
! 940: #define PP_MASK CNST_LIMB(0x208A20A08A28A8)
! 941: #endif
! 942:
! 943:
! 944: /* BIT1 means a result value in bit 1 (second least significant bit), with a
! 945: zero bit representing +1 and a one bit representing -1. Bits other than
! 946: bit 1 are garbage.
! 947:
! 948: JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
! 949: and their speed is important. Expressions are used rather than
! 950: conditionals to accumulate sign changes, which effectively means XORs
! 951: instead of conditional JUMPs. */
! 952:
! 953: /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
! 954: #define JACOBI_S0(a) \
! 955: (((a) == 1) | ((a) == -1))
! 956:
! 957: /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
! 958: #define JACOBI_U0(a) \
! 959: ((a) == 1)
! 960:
! 961: /* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise
! 962: An mpz_t always has at least one limb of allocated space, so the fetch of
! 963: the low limb is valid. */
! 964: #define JACOBI_Z0(a) \
! 965: (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1))
! 966:
! 967: /* Convert a bit1 to +1 or -1. */
! 968: #define JACOBI_BIT1_TO_PN(result_bit1) \
! 969: (1 - ((result_bit1) & 2))
! 970:
! 971: /* (2/b), with b unsigned and odd;
! 972: is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
! 973: hence obtained from (b>>1)^b */
! 974: #define JACOBI_TWO_U_BIT1(b) \
! 975: (ASSERT (b & 1), (((b) >> 1) ^ (b)))
! 976:
! 977: /* (2/b)^twos, with b unsigned and odd */
! 978: #define JACOBI_TWOS_U_BIT1(twos, b) \
! 979: (((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
! 980:
! 981: /* (2/b)^twos, with b unsigned and odd */
! 982: #define JACOBI_TWOS_U(twos, b) \
! 983: (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
! 984:
! 985: /* (a/b) effect due to sign of a: signed/unsigned, b odd;
! 986: is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */
! 987: #define JACOBI_ASGN_SU_BIT1(a, b) \
! 988: ((((a) < 0) << 1) & (b))
! 989:
! 990: /* (a/b) effect due to sign of b: signed/mpz;
! 991: is -1 if a and b both negative, +1 otherwise */
! 992: #define JACOBI_BSGN_SZ_BIT1(a, b) \
! 993: ((((a) < 0) & (SIZ(b) < 0)) << 1)
! 994:
! 995: /* (a/b) effect due to sign of b: mpz/signed */
! 996: #define JACOBI_BSGN_ZS_BIT1(a, b) \
! 997: JACOBI_BSGN_SZ_BIT1(b, a)
! 998:
! 999: /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd.
! 1000: Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if
! 1001: both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
! 1002: because this is used in a couple of places with only bit 1 of a or b
! 1003: valid. */
! 1004: #define JACOBI_RECIP_UU_BIT1(a, b) \
! 1005: ((a) & (b))
! 1006:
! 1007:
! 1008: /* For testing and debugging. */
! 1009: #define MPZ_CHECK_FORMAT(z) \
! 1010: (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0), \
! 1011: ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)))
! 1012: #define MPZ_PROVOKE_REALLOC(z) \
! 1013: do { ALLOC(z) = ABSIZ(z); } while (0)
! 1014:
! 1015:
! 1016: #if TUNE_PROGRAM_BUILD
! 1017: /* Some extras wanted when recompiling some .c files for use by the tune
! 1018: program. Not part of a normal build. */
! 1019:
! 1020: extern mp_size_t mul_threshold[];
! 1021: extern mp_size_t fft_modf_mul_threshold;
! 1022: extern mp_size_t sqr_threshold[];
! 1023: extern mp_size_t fft_modf_sqr_threshold;
! 1024: extern mp_size_t bz_threshold[];
! 1025: extern mp_size_t fib_threshold[];
! 1026: extern mp_size_t powm_threshold[];
! 1027: extern mp_size_t gcd_accel_threshold[];
! 1028: extern mp_size_t gcdext_threshold[];
! 1029:
! 1030: #undef KARATSUBA_MUL_THRESHOLD
! 1031: #undef TOOM3_MUL_THRESHOLD
! 1032: #undef FFT_MUL_TABLE
! 1033: #undef FFT_MUL_THRESHOLD
! 1034: #undef FFT_MODF_MUL_THRESHOLD
! 1035: #undef KARATSUBA_SQR_THRESHOLD
! 1036: #undef TOOM3_SQR_THRESHOLD
! 1037: #undef FFT_SQR_TABLE
! 1038: #undef FFT_SQR_THRESHOLD
! 1039: #undef FFT_MODF_SQR_THRESHOLD
! 1040: #undef BZ_THRESHOLD
! 1041: #undef FIB_THRESHOLD
! 1042: #undef POWM_THRESHOLD
! 1043: #undef GCD_ACCEL_THRESHOLD
! 1044: #undef GCDEXT_THRESHOLD
! 1045:
! 1046: #define KARATSUBA_MUL_THRESHOLD mul_threshold[0]
! 1047: #define TOOM3_MUL_THRESHOLD mul_threshold[1]
! 1048: #define FFT_MUL_TABLE 0
! 1049: #define FFT_MUL_THRESHOLD mul_threshold[2]
! 1050: #define FFT_MODF_MUL_THRESHOLD fft_modf_mul_threshold
! 1051: #define KARATSUBA_SQR_THRESHOLD sqr_threshold[0]
! 1052: #define TOOM3_SQR_THRESHOLD sqr_threshold[1]
! 1053: #define FFT_SQR_TABLE 0
! 1054: #define FFT_SQR_THRESHOLD sqr_threshold[2]
! 1055: #define FFT_MODF_SQR_THRESHOLD fft_modf_sqr_threshold
! 1056: #define BZ_THRESHOLD bz_threshold[0]
! 1057: #define FIB_THRESHOLD fib_threshold[0]
! 1058: #define POWM_THRESHOLD powm_threshold[0]
! 1059: #define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0]
! 1060: #define GCDEXT_THRESHOLD gcdext_threshold[0]
! 1061:
! 1062: #define TOOM3_MUL_THRESHOLD_LIMIT 700
! 1063:
! 1064: #undef FFT_TABLE_ATTRS
! 1065: #define FFT_TABLE_ATTRS
! 1066: extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
! 1067:
! 1068: #endif /* TUNE_PROGRAM_BUILD */
! 1069:
! 1070: #if defined (__cplusplus)
! 1071: }
! 1072: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>