Annotation of OpenXM_contrib/gmp/gmp-impl.h, Revision 1.1.1.3
1.1 maekawa 1: /* Include file for internal GNU MP types and definitions.
2:
1.1.1.2 maekawa 3: THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4: BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
5:
6: Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
7: Foundation, Inc.
1.1 maekawa 8:
9: This file is part of the GNU MP Library.
10:
11: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 maekawa 12: it under the terms of the GNU Lesser General Public License as published by
13: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 14: option) any later version.
15:
16: The GNU MP Library is distributed in the hope that it will be useful, but
17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 maekawa 18: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 19: License for more details.
20:
1.1.1.2 maekawa 21: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 22: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24: MA 02111-1307, USA. */
25:
1.1.1.2 maekawa 26: #include "config.h"
27: #include "gmp-mparam.h"
28: /* #include "longlong.h" */
29:
1.1 maekawa 30: /* When using gcc, make sure to use its builtin alloca. */
31: #if ! defined (alloca) && defined (__GNUC__)
32: #define alloca __builtin_alloca
33: #define HAVE_ALLOCA
34: #endif
35:
36: /* When using cc, do whatever necessary to allow use of alloca. For many
37: machines, this means including alloca.h. IBM's compilers need a #pragma
38: in "each module that needs to use alloca". */
39: #if ! defined (alloca)
40: /* We need lots of variants for MIPS, to cover all versions and perversions
41: of OSes for MIPS. */
42: #if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
43: || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
44: || defined (__alpha) || defined (__sparc) || defined (sparc) \
45: || defined (__ksr__)
46: #include <alloca.h>
47: #define HAVE_ALLOCA
48: #endif
49: #if defined (_IBMR2)
50: #pragma alloca
51: #define HAVE_ALLOCA
52: #endif
53: #if defined (__DECC)
54: #define alloca(x) __ALLOCA(x)
55: #define HAVE_ALLOCA
56: #endif
57: #endif
58:
1.1.1.2 maekawa 59: #if defined (alloca)
60: #define HAVE_ALLOCA
61: #endif
62:
1.1 maekawa 63: #if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
64: #include "stack-alloc.h"
65: #else
66: #define TMP_DECL(m)
67: #define TMP_ALLOC(x) alloca(x)
68: #define TMP_MARK(m)
69: #define TMP_FREE(m)
70: #endif
71:
1.1.1.2 maekawa 72: /* Allocating various types. */
73: #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
74: #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
75: #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
76:
1.1 maekawa 77:
1.1.1.2 maekawa 78: #if ! defined (__GNUC__) /* FIXME: Test for C++ compilers here,
79: __DECC understands __inline */
1.1 maekawa 80: #define inline /* Empty */
81: #endif
82:
83: #define ABS(x) (x >= 0 ? x : -x)
84: #define MIN(l,o) ((l) < (o) ? (l) : (o))
85: #define MAX(h,i) ((h) > (i) ? (h) : (i))
1.1.1.2 maekawa 86: #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
1.1 maekawa 87:
88: /* Field access macros. */
89: #define SIZ(x) ((x)->_mp_size)
90: #define ABSIZ(x) ABS (SIZ (x))
91: #define PTR(x) ((x)->_mp_d)
1.1.1.2 maekawa 92: #define LIMBS(x) ((x)->_mp_d)
1.1 maekawa 93: #define EXP(x) ((x)->_mp_exp)
94: #define PREC(x) ((x)->_mp_prec)
95: #define ALLOC(x) ((x)->_mp_alloc)
96:
1.1.1.2 maekawa 97: /* Extra casts because shorts are promoted to ints by "~" and "<<". "-1"
98: rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers
99: about arithmetic overflow. */
100: #define UNSIGNED_TYPE_MAX(type) ((type) ~ (type) 0)
101: #define UNSIGNED_TYPE_HIGHBIT(type) ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1))
102: #define SIGNED_TYPE_MIN(type) (((type) -1) << (8*sizeof(type)-1))
103: #define SIGNED_TYPE_MAX(type) ((type) ~ SIGNED_TYPE_MIN(type))
104: #define SIGNED_TYPE_HIGHBIT(type) SIGNED_TYPE_MIN(type)
105:
106: #define MP_LIMB_T_MAX UNSIGNED_TYPE_MAX (mp_limb_t)
107: #define MP_LIMB_T_HIGHBIT UNSIGNED_TYPE_HIGHBIT (mp_limb_t)
108:
109: #define MP_SIZE_T_MAX SIGNED_TYPE_MAX (mp_size_t)
110:
111: #ifndef ULONG_MAX
112: #define ULONG_MAX UNSIGNED_TYPE_MAX (unsigned long)
113: #endif
114: #define ULONG_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned long)
115: #define LONG_HIGHBIT SIGNED_TYPE_HIGHBIT (long)
116: #ifndef LONG_MAX
117: #define LONG_MAX SIGNED_TYPE_MAX (long)
118: #endif
119:
120: #ifndef USHORT_MAX
121: #define USHORT_MAX UNSIGNED_TYPE_MAX (unsigned short)
122: #endif
123: #define USHORT_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned short)
124: #define SHORT_HIGHBIT SIGNED_TYPE_HIGHBIT (short)
125: #ifndef SHORT_MAX
126: #define SHORT_MAX SIGNED_TYPE_MAX (short)
127: #endif
128:
129:
130: /* Swap macros. */
131:
132: #define MP_LIMB_T_SWAP(x, y) \
133: do { \
134: mp_limb_t __mp_limb_t_swap__tmp = (x); \
135: (x) = (y); \
136: (y) = __mp_limb_t_swap__tmp; \
137: } while (0)
138: #define MP_SIZE_T_SWAP(x, y) \
139: do { \
140: mp_size_t __mp_size_t_swap__tmp = (x); \
141: (x) = (y); \
142: (y) = __mp_size_t_swap__tmp; \
143: } while (0)
144:
145: #define MP_PTR_SWAP(x, y) \
146: do { \
147: mp_ptr __mp_ptr_swap__tmp = (x); \
148: (x) = (y); \
149: (y) = __mp_ptr_swap__tmp; \
150: } while (0)
151: #define MP_SRCPTR_SWAP(x, y) \
152: do { \
153: mp_srcptr __mp_srcptr_swap__tmp = (x); \
154: (x) = (y); \
155: (y) = __mp_srcptr_swap__tmp; \
156: } while (0)
157:
158: #define MPN_PTR_SWAP(xp,xs, yp,ys) \
159: do { \
160: MP_PTR_SWAP (xp, yp); \
161: MP_SIZE_T_SWAP (xs, ys); \
162: } while(0)
163: #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
164: do { \
165: MP_SRCPTR_SWAP (xp, yp); \
166: MP_SIZE_T_SWAP (xs, ys); \
167: } while(0)
168:
169: #define MPZ_PTR_SWAP(x, y) \
170: do { \
171: mpz_ptr __mpz_ptr_swap__tmp = (x); \
172: (x) = (y); \
173: (y) = __mpz_ptr_swap__tmp; \
174: } while (0)
175: #define MPZ_SRCPTR_SWAP(x, y) \
176: do { \
177: mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
178: (x) = (y); \
179: (y) = __mpz_srcptr_swap__tmp; \
180: } while (0)
181:
182:
183: #if defined (__cplusplus)
184: extern "C" {
185: #endif
186:
187: /* FIXME: These are purely internal, so do a search and replace to change
188: them to __gmp forms, rather than using these macros. */
189: #define _mp_allocate_func __gmp_allocate_func
190: #define _mp_reallocate_func __gmp_reallocate_func
191: #define _mp_free_func __gmp_free_func
192: #define _mp_default_allocate __gmp_default_allocate
193: #define _mp_default_reallocate __gmp_default_reallocate
194: #define _mp_default_free __gmp_default_free
195:
196: extern void * (*_mp_allocate_func) _PROTO ((size_t));
197: extern void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t));
198: extern void (*_mp_free_func) _PROTO ((void *, size_t));
199:
200: void *_mp_default_allocate _PROTO ((size_t));
201: void *_mp_default_reallocate _PROTO ((void *, size_t, size_t));
202: void _mp_default_free _PROTO ((void *, size_t));
203:
204: #define _MP_ALLOCATE_FUNC_TYPE(n,type) \
205: ((type *) (*_mp_allocate_func) ((n) * sizeof (type)))
206: #define _MP_ALLOCATE_FUNC_LIMBS(n) _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t)
207:
208: #define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type))
209: #define _MP_FREE_FUNC_LIMBS(p,n) _MP_FREE_FUNC_TYPE(p,n,mp_limb_t)
1.1 maekawa 210:
1.1.1.2 maekawa 211:
212: #if (__STDC__-0) || defined (__cplusplus)
1.1 maekawa 213:
214: #else
215:
216: #define const /* Empty */
217: #define signed /* Empty */
218:
1.1.1.2 maekawa 219: #endif
1.1 maekawa 220:
1.1.1.2 maekawa 221: #if defined (__GNUC__) && defined (__i386__)
222: #if 0 /* check that these actually improve things */
223: #define MPN_COPY_INCR(DST, SRC, N) \
224: __asm__ ("cld\n\trep\n\tmovsl" : : \
225: "D" (DST), "S" (SRC), "c" (N) : \
226: "cx", "di", "si", "memory")
227: #define MPN_COPY_DECR(DST, SRC, N) \
228: __asm__ ("std\n\trep\n\tmovsl" : : \
229: "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
230: "cx", "di", "si", "memory")
231: #define MPN_NORMALIZE_NOT_ZERO(P, N) \
232: do { \
233: __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \
234: "a" (0), "D" ((P) + (N) - 1), "0" (N) : \
235: "cx", "di"); \
236: (N)++; \
237: } while (0)
238: #endif
239: #endif
1.1 maekawa 240:
1.1.1.2 maekawa 241: #if HAVE_NATIVE_mpn_copyi
242: #define mpn_copyi __MPN(copyi)
243: void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1.1 maekawa 244: #endif
245:
1.1.1.2 maekawa 246: /* Remap names of internal mpn functions. */
247: #define __clz_tab __MPN(clz_tab)
248: #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
249: #define mpn_reciprocal __MPN(reciprocal)
250:
251: #define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
252: #define mpn_bz_divrem_n __MPN(bz_divrem_n)
253: /* #define mpn_tdiv_q __MPN(tdiv_q) */
254:
255: #define mpn_kara_mul_n __MPN(kara_mul_n)
256: void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
257:
258: #define mpn_kara_sqr_n __MPN(kara_sqr_n)
259: void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
260:
261: #define mpn_toom3_mul_n __MPN(toom3_mul_n)
262: void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
263:
264: #define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
265: void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
266:
267: #define mpn_fft_best_k __MPN(fft_best_k)
268: int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr));
269:
270: #define mpn_mul_fft __MPN(mul_fft)
271: void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
272: mp_srcptr n, mp_size_t nl,
273: mp_srcptr m, mp_size_t ml,
274: int k));
275:
276: #define mpn_mul_fft_full __MPN(mul_fft_full)
277: void mpn_mul_fft_full _PROTO ((mp_ptr op,
278: mp_srcptr n, mp_size_t nl,
279: mp_srcptr m, mp_size_t ml));
280:
281: #define mpn_fft_next_size __MPN(fft_next_size)
282: mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k));
283:
284: mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
285: mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
286: /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
287:
288: /* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed. */
289: #ifndef MPN_COPY_INCR
290: #if HAVE_NATIVE_mpn_copyi
291: #define MPN_COPY_INCR(DST, SRC, NLIMBS) mpn_copyi (DST, SRC, NLIMBS)
292: #else
1.1 maekawa 293: #define MPN_COPY_INCR(DST, SRC, NLIMBS) \
294: do { \
295: mp_size_t __i; \
296: for (__i = 0; __i < (NLIMBS); __i++) \
297: (DST)[__i] = (SRC)[__i]; \
298: } while (0)
1.1.1.2 maekawa 299: #endif
300: #endif
301:
302: #if HAVE_NATIVE_mpn_copyd
303: #define mpn_copyd __MPN(copyd)
304: void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
305: #endif
306:
307: /* NLIMBS==0 allowed */
308: #ifndef MPN_COPY_DECR
309: #if HAVE_NATIVE_mpn_copyd
310: #define MPN_COPY_DECR(DST, SRC, NLIMBS) mpn_copyd (DST, SRC, NLIMBS)
311: #else
1.1 maekawa 312: #define MPN_COPY_DECR(DST, SRC, NLIMBS) \
313: do { \
314: mp_size_t __i; \
315: for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
316: (DST)[__i] = (SRC)[__i]; \
317: } while (0)
1.1.1.2 maekawa 318: #endif
319: #endif
320:
321: /* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro,
322: rely on function inlining. */
323: #if defined (_CRAY) || defined (__uxp__)
324: static inline void
325: _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
326: {
327: int i; /* Faster for Cray with plain int */
328: #pragma _CRI ivdep /* Cray PVP systems */
329: #pragma loop noalias d,s /* Fujitsu VPP systems */
330: for (i = 0; i < n; i++)
331: d[i] = s[i];
332: }
333: #define MPN_COPY _MPN_COPY
334: #endif
335:
336: #ifndef MPN_COPY
1.1 maekawa 337: #define MPN_COPY MPN_COPY_INCR
1.1.1.2 maekawa 338: #endif
1.1 maekawa 339:
340: /* Zero NLIMBS *limbs* AT DST. */
1.1.1.2 maekawa 341: #ifndef MPN_ZERO
1.1 maekawa 342: #define MPN_ZERO(DST, NLIMBS) \
343: do { \
344: mp_size_t __i; \
345: for (__i = 0; __i < (NLIMBS); __i++) \
346: (DST)[__i] = 0; \
347: } while (0)
1.1.1.2 maekawa 348: #endif
1.1 maekawa 349:
1.1.1.2 maekawa 350: #ifndef MPN_NORMALIZE
1.1 maekawa 351: #define MPN_NORMALIZE(DST, NLIMBS) \
352: do { \
353: while (NLIMBS > 0) \
354: { \
355: if ((DST)[(NLIMBS) - 1] != 0) \
356: break; \
357: NLIMBS--; \
358: } \
359: } while (0)
1.1.1.2 maekawa 360: #endif
361: #ifndef MPN_NORMALIZE_NOT_ZERO
1.1 maekawa 362: #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
363: do { \
364: while (1) \
365: { \
366: if ((DST)[(NLIMBS) - 1] != 0) \
367: break; \
368: NLIMBS--; \
369: } \
370: } while (0)
1.1.1.2 maekawa 371: #endif
1.1 maekawa 372:
1.1.1.2 maekawa 373: /* Strip least significant zero limbs from ptr,size by incrementing ptr and
374: decrementing size. The number in ptr,size must be non-zero, ie. size!=0
375: and somewhere a non-zero limb. */
376: #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \
377: do \
378: { \
379: ASSERT ((size) != 0); \
380: while ((ptr)[0] == 0) \
381: { \
382: (ptr)++; \
383: (size)--; \
384: ASSERT (size >= 0); \
385: } \
386: } \
387: while (0)
388:
389: /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
390: temporary variable; it will be automatically cleared out at function
391: return. We use __x here to make it possible to accept both mpz_ptr and
392: mpz_t arguments. */
1.1 maekawa 393: #define MPZ_TMP_INIT(X, NLIMBS) \
394: do { \
395: mpz_ptr __x = (X); \
396: __x->_mp_alloc = (NLIMBS); \
397: __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
398: } while (0)
399:
1.1.1.2 maekawa 400: /* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs. */
401: #define MPZ_REALLOC(what,needed) \
402: do { \
403: if ((needed) > ALLOC (what)) \
404: _mpz_realloc (what, needed); \
405: } while (0)
406:
407: /* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
408: value which is good on most machines. */
409: #ifndef KARATSUBA_MUL_THRESHOLD
410: #define KARATSUBA_MUL_THRESHOLD 32
411: #endif
412:
413: /* If TOOM3_MUL_THRESHOLD is not already defined, define it to a
414: value which is good on most machines. */
415: #ifndef TOOM3_MUL_THRESHOLD
416: #define TOOM3_MUL_THRESHOLD 256
417: #endif
418:
419: #ifndef KARATSUBA_SQR_THRESHOLD
420: #define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
421: #endif
422:
423: #ifndef TOOM3_SQR_THRESHOLD
424: #define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD)
425: #endif
426:
427: /* First k to use for an FFT modF multiply. A modF FFT is an order
428: log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
429: whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
430: #define FFT_FIRST_K 4
431:
432: /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
433: #ifndef FFT_MODF_MUL_THRESHOLD
434: #define FFT_MODF_MUL_THRESHOLD (TOOM3_MUL_THRESHOLD * 3)
435: #endif
436: #ifndef FFT_MODF_SQR_THRESHOLD
437: #define FFT_MODF_SQR_THRESHOLD (TOOM3_SQR_THRESHOLD * 3)
438: #endif
439:
440: /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
441: will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
442: NxN->2N multiply and not recursing into itself is an order
443: log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
444: is the first better than toom3. */
445: #ifndef FFT_MUL_THRESHOLD
446: #define FFT_MUL_THRESHOLD (FFT_MODF_MUL_THRESHOLD * 10)
447: #endif
448: #ifndef FFT_SQR_THRESHOLD
449: #define FFT_SQR_THRESHOLD (FFT_MODF_SQR_THRESHOLD * 10)
450: #endif
451:
452: /* Table of thresholds for successive modF FFT "k"s. The first entry is
453: where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
454: etc. See mpn_fft_best_k(). */
455: #ifndef FFT_MUL_TABLE
456: #define FFT_MUL_TABLE \
457: { TOOM3_MUL_THRESHOLD * 4, /* k=5 */ \
458: TOOM3_MUL_THRESHOLD * 8, /* k=6 */ \
459: TOOM3_MUL_THRESHOLD * 16, /* k=7 */ \
460: TOOM3_MUL_THRESHOLD * 32, /* k=8 */ \
461: TOOM3_MUL_THRESHOLD * 96, /* k=9 */ \
462: TOOM3_MUL_THRESHOLD * 288, /* k=10 */ \
463: 0 }
464: #endif
465: #ifndef FFT_SQR_TABLE
466: #define FFT_SQR_TABLE \
467: { TOOM3_SQR_THRESHOLD * 4, /* k=5 */ \
468: TOOM3_SQR_THRESHOLD * 8, /* k=6 */ \
469: TOOM3_SQR_THRESHOLD * 16, /* k=7 */ \
470: TOOM3_SQR_THRESHOLD * 32, /* k=8 */ \
471: TOOM3_SQR_THRESHOLD * 96, /* k=9 */ \
472: TOOM3_SQR_THRESHOLD * 288, /* k=10 */ \
473: 0 }
474: #endif
475:
476: #ifndef FFT_TABLE_ATTRS
477: #define FFT_TABLE_ATTRS static const
478: #endif
479:
480: #define MPN_FFT_TABLE_SIZE 16
481:
482:
483: /* Return non-zero if xp,xsize and yp,ysize overlap.
484: If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
485: overlap. If both these are false, there's an overlap. */
486: #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
487: ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
488:
489:
490: /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
491: ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
492: does it always. Generally assertions are meant for development, but
493: might help when looking for a problem later too.
494:
495: ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if
496: assertion checking is disabled, the expression is still evaluated. This
497: is meant for use with routines like mpn_add_n() where the return value
498: represents a carry or whatever that shouldn't occur. For example,
499: ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
500:
501: #ifdef __LINE__
502: #define ASSERT_LINE __LINE__
503: #else
504: #define ASSERT_LINE -1
505: #endif
506:
507: #ifdef __FILE__
508: #define ASSERT_FILE __FILE__
509: #else
510: #define ASSERT_FILE ""
511: #endif
512:
513: int __gmp_assert_fail _PROTO((const char *filename, int linenum,
514: const char *expr));
515:
516: #if HAVE_STRINGIZE
517: #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
518: #else
519: #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
520: #endif
521:
522: #if HAVE_VOID
523: #define CAST_TO_VOID (void)
524: #else
525: #define CAST_TO_VOID
526: #endif
527:
528: #define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr))
529:
530: #if WANT_ASSERT
531: #define ASSERT(expr) ASSERT_ALWAYS (expr)
532: #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
533:
534: #else
535: #define ASSERT(expr) (CAST_TO_VOID 0)
536: #define ASSERT_NOCARRY(expr) (expr)
537: #endif
538:
539:
540: #if HAVE_NATIVE_mpn_com_n
541: #define mpn_com_n __MPN(com_n)
542: void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
543: #else
544: #define mpn_com_n(d,s,n) \
545: do \
546: { \
547: mp_ptr __d = (d); \
548: mp_srcptr __s = (s); \
549: mp_size_t __n = (n); \
550: do \
1.1.1.3 ! maekawa 551: *__d++ = ~ *__s++; \
1.1.1.2 maekawa 552: while (--__n); \
553: } \
554: while (0)
555: #endif
556:
557: #define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op) \
558: do \
559: { \
560: mp_ptr __d = (d); \
561: mp_srcptr __s1 = (s1); \
562: mp_srcptr __s2 = (s2); \
563: mp_size_t __n = (n); \
564: do \
565: *__d++ = dop (*__s1++ op s2op *__s2++); \
566: while (--__n); \
567: } \
568: while (0)
569:
570: #if HAVE_NATIVE_mpn_and_n
571: #define mpn_and_n __MPN(and_n)
572: void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
573: #else
574: #define mpn_and_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, )
575: #endif
576:
577: #if HAVE_NATIVE_mpn_andn_n
578: #define mpn_andn_n __MPN(andn_n)
579: void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
580: #else
581: #define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~)
582: #endif
583:
584: #if HAVE_NATIVE_mpn_nand_n
585: #define mpn_nand_n __MPN(nand_n)
586: void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
587: #else
588: #define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, )
589: #endif
590:
591: #if HAVE_NATIVE_mpn_ior_n
592: #define mpn_ior_n __MPN(ior_n)
593: void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
594: #else
595: #define mpn_ior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, )
596: #endif
597:
598: #if HAVE_NATIVE_mpn_iorn_n
599: #define mpn_iorn_n __MPN(iorn_n)
600: void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
601: #else
602: #define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~)
603: #endif
604:
605: #if HAVE_NATIVE_mpn_nior_n
606: #define mpn_nior_n __MPN(nior_n)
607: void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
608: #else
609: #define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, )
610: #endif
611:
612: #if HAVE_NATIVE_mpn_xor_n
613: #define mpn_xor_n __MPN(xor_n)
614: void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
615: #else
616: #define mpn_xor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, )
617: #endif
618:
619: #if HAVE_NATIVE_mpn_xnor_n
620: #define mpn_xnor_n __MPN(xnor_n)
621: void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
622: #else
623: #define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, )
624: #endif
1.1 maekawa 625:
626: /* Structure for conversion between internal binary format and
627: strings in base 2..36. */
628: struct bases
629: {
630: /* Number of digits in the conversion base that always fits in an mp_limb_t.
631: For example, for base 10 on a machine where a mp_limb_t has 32 bits this
632: is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
633: int chars_per_limb;
634:
635: /* log(2)/log(conversion_base) */
1.1.1.2 maekawa 636: double chars_per_bit_exactly;
1.1 maekawa 637:
638: /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
639: factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
640: i.e. the number of bits used to represent each digit in the base. */
641: mp_limb_t big_base;
642:
643: /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
644: fixed-point number. Instead of dividing by big_base an application can
645: choose to multiply by big_base_inverted. */
646: mp_limb_t big_base_inverted;
647: };
648:
1.1.1.2 maekawa 649: #define __mp_bases __MPN(mp_bases)
1.1 maekawa 650: extern const struct bases __mp_bases[];
651: extern mp_size_t __gmp_default_fp_limb_precision;
652:
1.1.1.2 maekawa 653: #if defined (__i386__)
654: #define TARGET_REGISTER_STARVED 1
655: #else
656: #define TARGET_REGISTER_STARVED 0
657: #endif
658:
659: /* Use a library function for invert_limb, if available. */
660: #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
661: #define mpn_invert_limb __MPN(invert_limb)
662: mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
663: #define invert_limb(invxl,xl) (invxl = __MPN(invert_limb) (xl))
664: #endif
665:
666: #ifndef invert_limb
667: #define invert_limb(invxl,xl) \
668: do { \
669: mp_limb_t dummy; \
670: if (xl << 1 == 0) \
671: invxl = ~(mp_limb_t) 0; \
672: else \
673: udiv_qrnnd (invxl, dummy, -xl, 0, xl); \
674: } while (0)
675: #endif
676:
1.1 maekawa 677: /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
678: limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
679: If this would yield overflow, DI should be the largest possible number
680: (i.e., only ones). For correct operation, the most significant bit of D
681: has to be set. Put the quotient in Q and the remainder in R. */
682: #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
683: do { \
684: mp_limb_t _q, _ql, _r; \
685: mp_limb_t _xh, _xl; \
686: umul_ppmm (_q, _ql, (nh), (di)); \
687: _q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\
688: umul_ppmm (_xh, _xl, _q, (d)); \
689: sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
690: if (_xh != 0) \
691: { \
692: sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
693: _q += 1; \
694: if (_xh != 0) \
695: { \
696: sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
697: _q += 1; \
698: } \
699: } \
700: if (_r >= (d)) \
701: { \
702: _r -= (d); \
703: _q += 1; \
704: } \
705: (r) = _r; \
706: (q) = _q; \
707: } while (0)
708: /* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
709: so that its most significant bit is set. LGUP is ceil(log2(D)). */
710: #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
711: do { \
1.1.1.2 maekawa 712: mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
1.1 maekawa 713: mp_limb_t _xh, _xl; \
1.1.1.2 maekawa 714: _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
715: _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
716: _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
717: _nadj = _n10 + (_n1 & (dnorm)); \
718: umul_ppmm (_xh, _xl, di, _n2 - _n1); \
719: add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
720: _q1 = ~(_n2 + _xh); \
721: umul_ppmm (_xh, _xl, _q1, d); \
1.1 maekawa 722: add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
723: _xh -= (d); \
724: (r) = _xl + ((d) & _xh); \
1.1.1.2 maekawa 725: (q) = _xh - _q1; \
1.1 maekawa 726: } while (0)
727: /* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
728: version to use. */
729: #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
730: do { \
1.1.1.2 maekawa 731: mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
1.1 maekawa 732: mp_limb_t _xh, _xl; \
1.1.1.2 maekawa 733: _n2 = (nh); \
734: _n10 = (nl); \
735: _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
736: _nadj = _n10 + (_n1 & (d)); \
737: umul_ppmm (_xh, _xl, di, _n2 - _n1); \
738: add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
739: _q1 = ~(_n2 + _xh); \
740: umul_ppmm (_xh, _xl, _q1, d); \
1.1 maekawa 741: add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
742: _xh -= (d); \
743: (r) = _xl + ((d) & _xh); \
1.1.1.2 maekawa 744: (q) = _xh - _q1; \
1.1 maekawa 745: } while (0)
746:
1.1.1.2 maekawa 747:
748: /* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo
749: 2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB.
750: "n" must be odd (otherwise such an inverse doesn't exist).
751:
752: This is not to be confused with invert_limb(), which is completely
753: different.
754:
755: The table lookup gives an inverse with the low 8 bits valid, and each
756: multiply step doubles the number of bits. See Jebelean's exact division
757: paper, end of section 4 (reference in gmp.texi). */
758:
759: #define modlimb_invert_table __gmp_modlimb_invert_table
760: extern const unsigned char modlimb_invert_table[128];
761:
762: #if BITS_PER_MP_LIMB <= 32
763: #define modlimb_invert(inv,n) \
764: do { \
765: mp_limb_t __n = (n); \
766: mp_limb_t __inv; \
767: ASSERT ((__n & 1) == 1); \
768: __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
769: __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
770: __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
771: ASSERT (__inv * __n == 1); \
772: (inv) = __inv; \
773: } while (0)
774: #endif
775:
776: #if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
777: #define modlimb_invert(inv,n) \
778: do { \
779: mp_limb_t __n = (n); \
780: mp_limb_t __inv; \
781: ASSERT ((__n & 1) == 1); \
782: __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
783: __inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
784: __inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
785: __inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \
786: ASSERT (__inv * __n == 1); \
787: (inv) = __inv; \
788: } while (0)
789: #endif
790:
791:
792: /* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
793: between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
794: until then. */
795: #if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__)
1.1 maekawa 796: /* Define stuff for longlong.h. */
797: typedef unsigned int UQItype __attribute__ ((mode (QI)));
1.1.1.2 maekawa 798: typedef int SItype __attribute__ ((mode (SI)));
1.1 maekawa 799: typedef unsigned int USItype __attribute__ ((mode (SI)));
800: typedef int DItype __attribute__ ((mode (DI)));
801: typedef unsigned int UDItype __attribute__ ((mode (DI)));
802: #else
803: typedef unsigned char UQItype;
1.1.1.2 maekawa 804: typedef long SItype;
1.1 maekawa 805: typedef unsigned long USItype;
1.1.1.2 maekawa 806: #if defined _LONGLONG || defined _LONG_LONG_LIMB
807: typedef long long int DItype;
808: typedef unsigned long long int UDItype;
809: #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
810: typedef long int DItype;
811: typedef unsigned long int UDItype;
812: #endif
1.1 maekawa 813: #endif
814:
815: typedef mp_limb_t UWtype;
816: typedef unsigned int UHWtype;
817: #define W_TYPE_SIZE BITS_PER_MP_LIMB
818:
819: /* Define ieee_double_extract and _GMP_IEEE_FLOATS. */
820:
1.1.1.2 maekawa 821: #if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__)))
822: /* Special case for little endian ARM since floats remain in big-endian. */
823: #define _GMP_IEEE_FLOATS 1
824: union ieee_double_extract
825: {
826: struct
827: {
828: unsigned int manh:20;
829: unsigned int exp:11;
830: unsigned int sig:1;
831: unsigned int manl:32;
832: } s;
833: double d;
834: };
835: #else
1.1 maekawa 836: #if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \
837: || defined (__alpha) \
838: || defined (__clipper__) \
839: || defined (__cris) \
840: || defined (__i386__) \
841: || defined (__i860__) \
842: || defined (__i960__) \
843: || defined (MIPSEL) || defined (_MIPSEL) \
844: || defined (__ns32000__) \
845: || defined (__WINNT) || defined (_WIN32)
846: #define _GMP_IEEE_FLOATS 1
847: union ieee_double_extract
848: {
849: struct
850: {
851: unsigned int manl:32;
852: unsigned int manh:20;
853: unsigned int exp:11;
854: unsigned int sig:1;
855: } s;
856: double d;
857: };
858: #else /* Need this as an #else since the tests aren't made exclusive. */
1.1.1.2 maekawa 859: #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \
1.1 maekawa 860: || defined (__a29k__) || defined (_AM29K) \
861: || defined (__arm__) \
862: || (defined (__convex__) && defined (_IEEE_FLOAT_)) \
1.1.1.2 maekawa 863: || defined (_CRAYMPP) \
1.1 maekawa 864: || defined (__i370__) || defined (__mvs__) \
1.1.1.2 maekawa 865: || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
1.1 maekawa 866: || defined(mc68020) \
867: || defined (__m88000__) \
868: || defined (MIPSEB) || defined (_MIPSEB) \
1.1.1.2 maekawa 869: || defined (__hppa) || defined (__hppa__) \
1.1 maekawa 870: || defined (__pyr__) \
871: || defined (__ibm032__) \
872: || defined (_IBMR2) || defined (_ARCH_PPC) \
873: || defined (__sh__) \
874: || defined (__sparc) || defined (sparc) \
875: || defined (__we32k__)
876: #define _GMP_IEEE_FLOATS 1
877: union ieee_double_extract
878: {
879: struct
880: {
881: unsigned int sig:1;
882: unsigned int exp:11;
883: unsigned int manh:20;
884: unsigned int manl:32;
885: } s;
886: double d;
887: };
888: #endif
889: #endif
1.1.1.2 maekawa 890: #endif
1.1 maekawa 891:
1.1.1.2 maekawa 892: /* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on
893: SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0,
894: presumably due to treating the mp_limb_t constant as signed rather than
895: unsigned. */
896: #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2)))
1.1 maekawa 897: #if BITS_PER_MP_LIMB == 64
898: #define LIMBS_PER_DOUBLE 2
899: #else
900: #define LIMBS_PER_DOUBLE 3
901: #endif
902:
903: double __gmp_scale2 _PROTO ((double, int));
1.1.1.2 maekawa 904: int __gmp_extract_double _PROTO ((mp_ptr, double));
905:
906: extern int __gmp_junk;
907: extern const int __gmp_0;
908: #define GMP_ERROR(code) (gmp_errno |= (code), __gmp_junk = 10/__gmp_0)
909: #define DIVIDE_BY_ZERO GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO)
910: #define SQRT_OF_NEGATIVE GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE)
911:
912: #if defined _LONG_LONG_LIMB
913: #if defined (__STDC__)
914: #define CNST_LIMB(C) C##LL
915: #else
916: #define CNST_LIMB(C) C/**/LL
917: #endif
918: #else /* not _LONG_LONG_LIMB */
919: #if defined (__STDC__)
920: #define CNST_LIMB(C) C##L
921: #else
922: #define CNST_LIMB(C) C/**/L
923: #endif
924: #endif /* _LONG_LONG_LIMB */
925:
926: /*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
927: #if BITS_PER_MP_LIMB == 32
928: #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
929: #define PP_INVERTED 0x53E5645CL
930: #define PP_MAXPRIME 29
931: #define PP_MASK 0x208A28A8L
932: #endif
933:
934: #if BITS_PER_MP_LIMB == 64
935: #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
936: #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
937: #define PP_MAXPRIME 53
938: #define PP_MASK CNST_LIMB(0x208A20A08A28A8)
939: #endif
940:
941:
942: /* BIT1 means a result value in bit 1 (second least significant bit), with a
943: zero bit representing +1 and a one bit representing -1. Bits other than
944: bit 1 are garbage.
945:
946: JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
947: and their speed is important. Expressions are used rather than
948: conditionals to accumulate sign changes, which effectively means XORs
949: instead of conditional JUMPs. */
950:
951: /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
952: #define JACOBI_S0(a) \
953: (((a) == 1) | ((a) == -1))
954:
955: /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
956: #define JACOBI_U0(a) \
957: ((a) == 1)
958:
959: /* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise
960: An mpz_t always has at least one limb of allocated space, so the fetch of
961: the low limb is valid. */
962: #define JACOBI_Z0(a) \
963: (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1))
964:
965: /* Convert a bit1 to +1 or -1. */
966: #define JACOBI_BIT1_TO_PN(result_bit1) \
967: (1 - ((result_bit1) & 2))
968:
969: /* (2/b), with b unsigned and odd;
970: is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
971: hence obtained from (b>>1)^b */
972: #define JACOBI_TWO_U_BIT1(b) \
973: (ASSERT (b & 1), (((b) >> 1) ^ (b)))
974:
975: /* (2/b)^twos, with b unsigned and odd */
976: #define JACOBI_TWOS_U_BIT1(twos, b) \
977: (((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
978:
979: /* (2/b)^twos, with b unsigned and odd */
980: #define JACOBI_TWOS_U(twos, b) \
981: (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
982:
983: /* (a/b) effect due to sign of a: signed/unsigned, b odd;
984: is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */
985: #define JACOBI_ASGN_SU_BIT1(a, b) \
986: ((((a) < 0) << 1) & (b))
987:
988: /* (a/b) effect due to sign of b: signed/mpz;
989: is -1 if a and b both negative, +1 otherwise */
990: #define JACOBI_BSGN_SZ_BIT1(a, b) \
991: ((((a) < 0) & (SIZ(b) < 0)) << 1)
992:
993: /* (a/b) effect due to sign of b: mpz/signed */
994: #define JACOBI_BSGN_ZS_BIT1(a, b) \
995: JACOBI_BSGN_SZ_BIT1(b, a)
996:
997: /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd.
998: Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if
999: both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
1000: because this is used in a couple of places with only bit 1 of a or b
1001: valid. */
1002: #define JACOBI_RECIP_UU_BIT1(a, b) \
1003: ((a) & (b))
1004:
1005:
1006: /* For testing and debugging. */
1007: #define MPZ_CHECK_FORMAT(z) \
1008: (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0), \
1009: ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)))
1010: #define MPZ_PROVOKE_REALLOC(z) \
1011: do { ALLOC(z) = ABSIZ(z); } while (0)
1012:
1013:
1014: #if TUNE_PROGRAM_BUILD
1015: /* Some extras wanted when recompiling some .c files for use by the tune
1016: program. Not part of a normal build. */
1017:
1018: extern mp_size_t mul_threshold[];
1019: extern mp_size_t fft_modf_mul_threshold;
1020: extern mp_size_t sqr_threshold[];
1021: extern mp_size_t fft_modf_sqr_threshold;
1022: extern mp_size_t bz_threshold[];
1023: extern mp_size_t fib_threshold[];
1024: extern mp_size_t powm_threshold[];
1025: extern mp_size_t gcd_accel_threshold[];
1026: extern mp_size_t gcdext_threshold[];
1027:
1028: #undef KARATSUBA_MUL_THRESHOLD
1029: #undef TOOM3_MUL_THRESHOLD
1030: #undef FFT_MUL_TABLE
1031: #undef FFT_MUL_THRESHOLD
1032: #undef FFT_MODF_MUL_THRESHOLD
1033: #undef KARATSUBA_SQR_THRESHOLD
1034: #undef TOOM3_SQR_THRESHOLD
1035: #undef FFT_SQR_TABLE
1036: #undef FFT_SQR_THRESHOLD
1037: #undef FFT_MODF_SQR_THRESHOLD
1038: #undef BZ_THRESHOLD
1039: #undef FIB_THRESHOLD
1040: #undef POWM_THRESHOLD
1041: #undef GCD_ACCEL_THRESHOLD
1042: #undef GCDEXT_THRESHOLD
1043:
1044: #define KARATSUBA_MUL_THRESHOLD mul_threshold[0]
1045: #define TOOM3_MUL_THRESHOLD mul_threshold[1]
1046: #define FFT_MUL_TABLE 0
1047: #define FFT_MUL_THRESHOLD mul_threshold[2]
1048: #define FFT_MODF_MUL_THRESHOLD fft_modf_mul_threshold
1049: #define KARATSUBA_SQR_THRESHOLD sqr_threshold[0]
1050: #define TOOM3_SQR_THRESHOLD sqr_threshold[1]
1051: #define FFT_SQR_TABLE 0
1052: #define FFT_SQR_THRESHOLD sqr_threshold[2]
1053: #define FFT_MODF_SQR_THRESHOLD fft_modf_sqr_threshold
1054: #define BZ_THRESHOLD bz_threshold[0]
1055: #define FIB_THRESHOLD fib_threshold[0]
1056: #define POWM_THRESHOLD powm_threshold[0]
1057: #define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0]
1058: #define GCDEXT_THRESHOLD gcdext_threshold[0]
1059:
1060: #define TOOM3_MUL_THRESHOLD_LIMIT 700
1061:
1062: #undef FFT_TABLE_ATTRS
1063: #define FFT_TABLE_ATTRS
1064: extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
1065:
1066: #endif /* TUNE_PROGRAM_BUILD */
1067:
1068: #if defined (__cplusplus)
1069: }
1070: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>