OpenXM_contrib/gmp/gmp-impl.h - annotate

Return to gmp-impl.h CVS log
Up to [local] / OpenXM_contrib / gmp
Annotation of OpenXM_contrib/gmp/gmp-impl.h, Revision 1.1.1.3

1.1       maekawa     1: /* Include file for internal GNU MP types and definitions.
                      2:
1.1.1.2   maekawa     3:    THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
                      4:    BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
                      5:
                      6: Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
                      7: Foundation, Inc.
1.1       maekawa     8:
                      9: This file is part of the GNU MP Library.
                     10:
                     11: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2   maekawa    12: it under the terms of the GNU Lesser General Public License as published by
                     13: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1       maekawa    14: option) any later version.
                     15:
                     16: The GNU MP Library is distributed in the hope that it will be useful, but
                     17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2   maekawa    18: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1.1       maekawa    19: License for more details.
                     20:
1.1.1.2   maekawa    21: You should have received a copy of the GNU Lesser General Public License
1.1       maekawa    22: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     24: MA 02111-1307, USA. */
                     25:
1.1.1.2   maekawa    26: #include "config.h"
                     27: #include "gmp-mparam.h"
                     28: /* #include "longlong.h" */
                     29:
1.1       maekawa    30: /* When using gcc, make sure to use its builtin alloca.  */
                     31: #if ! defined (alloca) && defined (__GNUC__)
                     32: #define alloca __builtin_alloca
                     33: #define HAVE_ALLOCA
                     34: #endif
                     35:
                     36: /* When using cc, do whatever necessary to allow use of alloca.  For many
                     37:    machines, this means including alloca.h.  IBM's compilers need a #pragma
                     38:    in "each module that needs to use alloca".  */
                     39: #if ! defined (alloca)
                     40: /* We need lots of variants for MIPS, to cover all versions and perversions
                     41:    of OSes for MIPS.  */
                     42: #if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
                     43:  || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
                     44:  || defined (__alpha) || defined (__sparc) || defined (sparc) \
                     45:  || defined (__ksr__)
                     46: #include <alloca.h>
                     47: #define HAVE_ALLOCA
                     48: #endif
                     49: #if defined (_IBMR2)
                     50: #pragma alloca
                     51: #define HAVE_ALLOCA
                     52: #endif
                     53: #if defined (__DECC)
                     54: #define alloca(x) __ALLOCA(x)
                     55: #define HAVE_ALLOCA
                     56: #endif
                     57: #endif
                     58:
1.1.1.2   maekawa    59: #if defined (alloca)
                     60: #define HAVE_ALLOCA
                     61: #endif
                     62:
1.1       maekawa    63: #if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
                     64: #include "stack-alloc.h"
                     65: #else
                     66: #define TMP_DECL(m)
                     67: #define TMP_ALLOC(x) alloca(x)
                     68: #define TMP_MARK(m)
                     69: #define TMP_FREE(m)
                     70: #endif
                     71:
1.1.1.2   maekawa    72: /* Allocating various types. */
                     73: #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
                     74: #define TMP_ALLOC_LIMBS(n)     TMP_ALLOC_TYPE(n,mp_limb_t)
                     75: #define TMP_ALLOC_MP_PTRS(n)   TMP_ALLOC_TYPE(n,mp_ptr)
                     76:
1.1       maekawa    77:
1.1.1.2   maekawa    78: #if ! defined (__GNUC__)       /* FIXME: Test for C++ compilers here,
                     79:                                   __DECC understands __inline */
1.1       maekawa    80: #define inline                 /* Empty */
                     81: #endif
                     82:
                     83: #define ABS(x) (x >= 0 ? x : -x)
                     84: #define MIN(l,o) ((l) < (o) ? (l) : (o))
                     85: #define MAX(h,i) ((h) > (i) ? (h) : (i))
1.1.1.2   maekawa    86: #define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
1.1       maekawa    87:
                     88: /* Field access macros.  */
                     89: #define SIZ(x) ((x)->_mp_size)
                     90: #define ABSIZ(x) ABS (SIZ (x))
                     91: #define PTR(x) ((x)->_mp_d)
1.1.1.2   maekawa    92: #define LIMBS(x) ((x)->_mp_d)
1.1       maekawa    93: #define EXP(x) ((x)->_mp_exp)
                     94: #define PREC(x) ((x)->_mp_prec)
                     95: #define ALLOC(x) ((x)->_mp_alloc)
                     96:
1.1.1.2   maekawa    97: /* Extra casts because shorts are promoted to ints by "~" and "<<".  "-1"
                     98:    rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers
                     99:    about arithmetic overflow.  */
                    100: #define UNSIGNED_TYPE_MAX(type)      ((type) ~ (type) 0)
                    101: #define UNSIGNED_TYPE_HIGHBIT(type)  ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1))
                    102: #define SIGNED_TYPE_MIN(type)        (((type) -1) << (8*sizeof(type)-1))
                    103: #define SIGNED_TYPE_MAX(type)        ((type) ~ SIGNED_TYPE_MIN(type))
                    104: #define SIGNED_TYPE_HIGHBIT(type)    SIGNED_TYPE_MIN(type)
                    105:
                    106: #define MP_LIMB_T_MAX      UNSIGNED_TYPE_MAX (mp_limb_t)
                    107: #define MP_LIMB_T_HIGHBIT  UNSIGNED_TYPE_HIGHBIT (mp_limb_t)
                    108:
                    109: #define MP_SIZE_T_MAX      SIGNED_TYPE_MAX (mp_size_t)
                    110:
                    111: #ifndef ULONG_MAX
                    112: #define ULONG_MAX          UNSIGNED_TYPE_MAX (unsigned long)
                    113: #endif
                    114: #define ULONG_HIGHBIT      UNSIGNED_TYPE_HIGHBIT (unsigned long)
                    115: #define LONG_HIGHBIT       SIGNED_TYPE_HIGHBIT (long)
                    116: #ifndef LONG_MAX
                    117: #define LONG_MAX           SIGNED_TYPE_MAX (long)
                    118: #endif
                    119:
                    120: #ifndef USHORT_MAX
                    121: #define USHORT_MAX         UNSIGNED_TYPE_MAX (unsigned short)
                    122: #endif
                    123: #define USHORT_HIGHBIT     UNSIGNED_TYPE_HIGHBIT (unsigned short)
                    124: #define SHORT_HIGHBIT      SIGNED_TYPE_HIGHBIT (short)
                    125: #ifndef SHORT_MAX
                    126: #define SHORT_MAX          SIGNED_TYPE_MAX (short)
                    127: #endif
                    128:
                    129:
                    130: /* Swap macros. */
                    131:
                    132: #define MP_LIMB_T_SWAP(x, y)                    \
                    133:   do {                                          \
                    134:     mp_limb_t __mp_limb_t_swap__tmp = (x);      \
                    135:     (x) = (y);                                  \
                    136:     (y) = __mp_limb_t_swap__tmp;                \
                    137:   } while (0)
                    138: #define MP_SIZE_T_SWAP(x, y)                    \
                    139:   do {                                          \
                    140:     mp_size_t __mp_size_t_swap__tmp = (x);      \
                    141:     (x) = (y);                                  \
                    142:     (y) = __mp_size_t_swap__tmp;                \
                    143:   } while (0)
                    144:
                    145: #define MP_PTR_SWAP(x, y)               \
                    146:   do {                                  \
                    147:     mp_ptr __mp_ptr_swap__tmp = (x);    \
                    148:     (x) = (y);                          \
                    149:     (y) = __mp_ptr_swap__tmp;           \
                    150:   } while (0)
                    151: #define MP_SRCPTR_SWAP(x, y)                    \
                    152:   do {                                          \
                    153:     mp_srcptr __mp_srcptr_swap__tmp = (x);      \
                    154:     (x) = (y);                                  \
                    155:     (y) = __mp_srcptr_swap__tmp;                \
                    156:   } while (0)
                    157:
                    158: #define MPN_PTR_SWAP(xp,xs, yp,ys)      \
                    159:   do {                                  \
                    160:     MP_PTR_SWAP (xp, yp);               \
                    161:     MP_SIZE_T_SWAP (xs, ys);            \
                    162:   } while(0)
                    163: #define MPN_SRCPTR_SWAP(xp,xs, yp,ys)   \
                    164:   do {                                  \
                    165:     MP_SRCPTR_SWAP (xp, yp);            \
                    166:     MP_SIZE_T_SWAP (xs, ys);            \
                    167:   } while(0)
                    168:
                    169: #define MPZ_PTR_SWAP(x, y)              \
                    170:   do {                                  \
                    171:     mpz_ptr __mpz_ptr_swap__tmp = (x);  \
                    172:     (x) = (y);                          \
                    173:     (y) = __mpz_ptr_swap__tmp;          \
                    174:   } while (0)
                    175: #define MPZ_SRCPTR_SWAP(x, y)                   \
                    176:   do {                                          \
                    177:     mpz_srcptr __mpz_srcptr_swap__tmp = (x);    \
                    178:     (x) = (y);                                  \
                    179:     (y) = __mpz_srcptr_swap__tmp;               \
                    180:   } while (0)
                    181:
                    182:
                    183: #if defined (__cplusplus)
                    184: extern "C" {
                    185: #endif
                    186:
                    187: /* FIXME: These are purely internal, so do a search and replace to change
                    188:    them to __gmp forms, rather than using these macros. */
                    189: #define _mp_allocate_func      __gmp_allocate_func
                    190: #define _mp_reallocate_func    __gmp_reallocate_func
                    191: #define _mp_free_func          __gmp_free_func
                    192: #define _mp_default_allocate   __gmp_default_allocate
                    193: #define _mp_default_reallocate __gmp_default_reallocate
                    194: #define _mp_default_free       __gmp_default_free
                    195:
                    196: extern void *  (*_mp_allocate_func) _PROTO ((size_t));
                    197: extern void *  (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t));
                    198: extern void    (*_mp_free_func) _PROTO ((void *, size_t));
                    199:
                    200: void *_mp_default_allocate _PROTO ((size_t));
                    201: void *_mp_default_reallocate _PROTO ((void *, size_t, size_t));
                    202: void _mp_default_free _PROTO ((void *, size_t));
                    203:
                    204: #define _MP_ALLOCATE_FUNC_TYPE(n,type) \
                    205:   ((type *) (*_mp_allocate_func) ((n) * sizeof (type)))
                    206: #define _MP_ALLOCATE_FUNC_LIMBS(n)   _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t)
                    207:
                    208: #define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type))
                    209: #define _MP_FREE_FUNC_LIMBS(p,n)     _MP_FREE_FUNC_TYPE(p,n,mp_limb_t)
1.1       maekawa   210:
1.1.1.2   maekawa   211:
                    212: #if (__STDC__-0) || defined (__cplusplus)
1.1       maekawa   213:
                    214: #else
                    215:
                    216: #define const                  /* Empty */
                    217: #define signed                 /* Empty */
                    218:
1.1.1.2   maekawa   219: #endif
1.1       maekawa   220:
1.1.1.2   maekawa   221: #if defined (__GNUC__) && defined (__i386__)
                    222: #if 0                  /* check that these actually improve things */
                    223: #define MPN_COPY_INCR(DST, SRC, N)                                     \
                    224:   __asm__ ("cld\n\trep\n\tmovsl" : :                                   \
                    225:           "D" (DST), "S" (SRC), "c" (N) :                              \
                    226:           "cx", "di", "si", "memory")
                    227: #define MPN_COPY_DECR(DST, SRC, N)                                     \
                    228:   __asm__ ("std\n\trep\n\tmovsl" : :                                   \
                    229:           "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :      \
                    230:           "cx", "di", "si", "memory")
                    231: #define MPN_NORMALIZE_NOT_ZERO(P, N)                                   \
                    232:   do {                                                                 \
                    233:     __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) :                       \
                    234:             "a" (0), "D" ((P) + (N) - 1), "0" (N) :                    \
                    235:             "cx", "di");                                               \
                    236:     (N)++;                                                             \
                    237:   } while (0)
                    238: #endif
                    239: #endif
1.1       maekawa   240:
1.1.1.2   maekawa   241: #if HAVE_NATIVE_mpn_copyi
                    242: #define mpn_copyi __MPN(copyi)
                    243: void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1.1       maekawa   244: #endif
                    245:
1.1.1.2   maekawa   246: /* Remap names of internal mpn functions.  */
                    247: #define __clz_tab               __MPN(clz_tab)
                    248: #define mpn_udiv_w_sdiv                __MPN(udiv_w_sdiv)
                    249: #define mpn_reciprocal         __MPN(reciprocal)
                    250:
                    251: #define mpn_sb_divrem_mn       __MPN(sb_divrem_mn)
                    252: #define mpn_bz_divrem_n                __MPN(bz_divrem_n)
                    253: /* #define mpn_tdiv_q          __MPN(tdiv_q) */
                    254:
                    255: #define mpn_kara_mul_n __MPN(kara_mul_n)
                    256: void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
                    257:
                    258: #define mpn_kara_sqr_n  __MPN(kara_sqr_n)
                    259: void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
                    260:
                    261: #define mpn_toom3_mul_n  __MPN(toom3_mul_n)
                    262: void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
                    263:
                    264: #define mpn_toom3_sqr_n  __MPN(toom3_sqr_n)
                    265: void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
                    266:
                    267: #define mpn_fft_best_k  __MPN(fft_best_k)
                    268: int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr));
                    269:
                    270: #define mpn_mul_fft  __MPN(mul_fft)
                    271: void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
                    272:                           mp_srcptr n, mp_size_t nl,
                    273:                           mp_srcptr m, mp_size_t ml,
                    274:                           int k));
                    275:
                    276: #define mpn_mul_fft_full  __MPN(mul_fft_full)
                    277: void mpn_mul_fft_full _PROTO ((mp_ptr op,
                    278:                                mp_srcptr n, mp_size_t nl,
                    279:                                mp_srcptr m, mp_size_t ml));
                    280:
                    281: #define mpn_fft_next_size  __MPN(fft_next_size)
                    282: mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k));
                    283:
                    284: mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
                    285: mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
                    286: /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
                    287:
                    288: /* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed.  */
                    289: #ifndef MPN_COPY_INCR
                    290: #if HAVE_NATIVE_mpn_copyi
                    291: #define MPN_COPY_INCR(DST, SRC, NLIMBS)   mpn_copyi (DST, SRC, NLIMBS)
                    292: #else
1.1       maekawa   293: #define MPN_COPY_INCR(DST, SRC, NLIMBS) \
                    294:   do {                                                                 \
                    295:     mp_size_t __i;                                                     \
                    296:     for (__i = 0; __i < (NLIMBS); __i++)                               \
                    297:       (DST)[__i] = (SRC)[__i];                                         \
                    298:   } while (0)
1.1.1.2   maekawa   299: #endif
                    300: #endif
                    301:
                    302: #if HAVE_NATIVE_mpn_copyd
                    303: #define mpn_copyd __MPN(copyd)
                    304: void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
                    305: #endif
                    306:
                    307: /* NLIMBS==0 allowed */
                    308: #ifndef MPN_COPY_DECR
                    309: #if HAVE_NATIVE_mpn_copyd
                    310: #define MPN_COPY_DECR(DST, SRC, NLIMBS)   mpn_copyd (DST, SRC, NLIMBS)
                    311: #else
1.1       maekawa   312: #define MPN_COPY_DECR(DST, SRC, NLIMBS) \
                    313:   do {                                                                 \
                    314:     mp_size_t __i;                                                     \
                    315:     for (__i = (NLIMBS) - 1; __i >= 0; __i--)                          \
                    316:       (DST)[__i] = (SRC)[__i];                                         \
                    317:   } while (0)
1.1.1.2   maekawa   318: #endif
                    319: #endif
                    320:
                    321: /* Define MPN_COPY for vector computers.  Since #pragma cannot be in a macro,
                    322:    rely on function inlining. */
                    323: #if defined (_CRAY) || defined (__uxp__)
                    324: static inline void
                    325: _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
                    326: {
                    327:   int i;                               /* Faster for Cray with plain int */
                    328: #pragma _CRI ivdep                     /* Cray PVP systems */
                    329: #pragma loop noalias d,s               /* Fujitsu VPP systems */
                    330:   for (i = 0; i < n; i++)
                    331:     d[i] = s[i];
                    332: }
                    333: #define MPN_COPY _MPN_COPY
                    334: #endif
                    335:
                    336: #ifndef MPN_COPY
1.1       maekawa   337: #define MPN_COPY MPN_COPY_INCR
1.1.1.2   maekawa   338: #endif
1.1       maekawa   339:
                    340: /* Zero NLIMBS *limbs* AT DST.  */
1.1.1.2   maekawa   341: #ifndef MPN_ZERO
1.1       maekawa   342: #define MPN_ZERO(DST, NLIMBS) \
                    343:   do {                                                                 \
                    344:     mp_size_t __i;                                                     \
                    345:     for (__i = 0; __i < (NLIMBS); __i++)                               \
                    346:       (DST)[__i] = 0;                                                  \
                    347:   } while (0)
1.1.1.2   maekawa   348: #endif
1.1       maekawa   349:
1.1.1.2   maekawa   350: #ifndef MPN_NORMALIZE
1.1       maekawa   351: #define MPN_NORMALIZE(DST, NLIMBS) \
                    352:   do {                                                                 \
                    353:     while (NLIMBS > 0)                                                 \
                    354:       {                                                                        \
                    355:        if ((DST)[(NLIMBS) - 1] != 0)                                   \
                    356:          break;                                                        \
                    357:        NLIMBS--;                                                       \
                    358:       }                                                                        \
                    359:   } while (0)
1.1.1.2   maekawa   360: #endif
                    361: #ifndef MPN_NORMALIZE_NOT_ZERO
1.1       maekawa   362: #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
                    363:   do {                                                                 \
                    364:     while (1)                                                          \
                    365:       {                                                                        \
                    366:        if ((DST)[(NLIMBS) - 1] != 0)                                   \
                    367:          break;                                                        \
                    368:        NLIMBS--;                                                       \
                    369:       }                                                                        \
                    370:   } while (0)
1.1.1.2   maekawa   371: #endif
1.1       maekawa   372:
1.1.1.2   maekawa   373: /* Strip least significant zero limbs from ptr,size by incrementing ptr and
                    374:    decrementing size.  The number in ptr,size must be non-zero, ie. size!=0
                    375:    and somewhere a non-zero limb.  */
                    376: #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \
                    377:   do                                            \
                    378:     {                                           \
                    379:       ASSERT ((size) != 0);                     \
                    380:       while ((ptr)[0] == 0)                     \
                    381:         {                                       \
                    382:           (ptr)++;                              \
                    383:           (size)--;                             \
                    384:           ASSERT (size >= 0);                   \
                    385:        }                                       \
                    386:     }                                           \
                    387:   while (0)
                    388:
                    389: /* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
                    390:    temporary variable; it will be automatically cleared out at function
                    391:    return.  We use __x here to make it possible to accept both mpz_ptr and
                    392:    mpz_t arguments.  */
1.1       maekawa   393: #define MPZ_TMP_INIT(X, NLIMBS) \
                    394:   do {                                                                 \
                    395:     mpz_ptr __x = (X);                                                 \
                    396:     __x->_mp_alloc = (NLIMBS);                                         \
                    397:     __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB);    \
                    398:   } while (0)
                    399:
1.1.1.2   maekawa   400: /* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs.  */
                    401: #define MPZ_REALLOC(what,needed) \
                    402:   do {                                                         \
                    403:     if ((needed) > ALLOC (what))                               \
                    404:       _mpz_realloc (what, needed);                             \
                    405:   } while (0)
                    406:
                    407: /* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
                    408:    value which is good on most machines.  */
                    409: #ifndef KARATSUBA_MUL_THRESHOLD
                    410: #define KARATSUBA_MUL_THRESHOLD 32
                    411: #endif
                    412:
                    413: /* If TOOM3_MUL_THRESHOLD is not already defined, define it to a
                    414:    value which is good on most machines.  */
                    415: #ifndef TOOM3_MUL_THRESHOLD
                    416: #define TOOM3_MUL_THRESHOLD 256
                    417: #endif
                    418:
                    419: #ifndef KARATSUBA_SQR_THRESHOLD
                    420: #define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
                    421: #endif
                    422:
                    423: #ifndef TOOM3_SQR_THRESHOLD
                    424: #define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD)
                    425: #endif
                    426:
                    427: /* First k to use for an FFT modF multiply.  A modF FFT is an order
                    428:    log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
                    429:    whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
                    430: #define FFT_FIRST_K  4
                    431:
                    432: /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
                    433: #ifndef FFT_MODF_MUL_THRESHOLD
                    434: #define FFT_MODF_MUL_THRESHOLD   (TOOM3_MUL_THRESHOLD * 3)
                    435: #endif
                    436: #ifndef FFT_MODF_SQR_THRESHOLD
                    437: #define FFT_MODF_SQR_THRESHOLD   (TOOM3_SQR_THRESHOLD * 3)
                    438: #endif
                    439:
                    440: /* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
                    441:    will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
                    442:    NxN->2N multiply and not recursing into itself is an order
                    443:    log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
                    444:    is the first better than toom3.  */
                    445: #ifndef FFT_MUL_THRESHOLD
                    446: #define FFT_MUL_THRESHOLD   (FFT_MODF_MUL_THRESHOLD * 10)
                    447: #endif
                    448: #ifndef FFT_SQR_THRESHOLD
                    449: #define FFT_SQR_THRESHOLD   (FFT_MODF_SQR_THRESHOLD * 10)
                    450: #endif
                    451:
                    452: /* Table of thresholds for successive modF FFT "k"s.  The first entry is
                    453:    where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
                    454:    etc.  See mpn_fft_best_k(). */
                    455: #ifndef FFT_MUL_TABLE
                    456: #define FFT_MUL_TABLE                           \
                    457:   { TOOM3_MUL_THRESHOLD * 4,   /* k=5 */        \
                    458:     TOOM3_MUL_THRESHOLD * 8,   /* k=6 */        \
                    459:     TOOM3_MUL_THRESHOLD * 16,  /* k=7 */        \
                    460:     TOOM3_MUL_THRESHOLD * 32,  /* k=8 */        \
                    461:     TOOM3_MUL_THRESHOLD * 96,  /* k=9 */        \
                    462:     TOOM3_MUL_THRESHOLD * 288, /* k=10 */       \
                    463:     0 }
                    464: #endif
                    465: #ifndef FFT_SQR_TABLE
                    466: #define FFT_SQR_TABLE                           \
                    467:   { TOOM3_SQR_THRESHOLD * 4,   /* k=5 */        \
                    468:     TOOM3_SQR_THRESHOLD * 8,   /* k=6 */        \
                    469:     TOOM3_SQR_THRESHOLD * 16,  /* k=7 */        \
                    470:     TOOM3_SQR_THRESHOLD * 32,  /* k=8 */        \
                    471:     TOOM3_SQR_THRESHOLD * 96,  /* k=9 */        \
                    472:     TOOM3_SQR_THRESHOLD * 288, /* k=10 */       \
                    473:     0 }
                    474: #endif
                    475:
                    476: #ifndef FFT_TABLE_ATTRS
                    477: #define FFT_TABLE_ATTRS   static const
                    478: #endif
                    479:
                    480: #define MPN_FFT_TABLE_SIZE  16
                    481:
                    482:
                    483: /* Return non-zero if xp,xsize and yp,ysize overlap.
                    484:    If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
                    485:    overlap.  If both these are false, there's an overlap. */
                    486: #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
                    487:   ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
                    488:
                    489:
                    490: /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
                    491:    ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
                    492:    does it always.  Generally assertions are meant for development, but
                    493:    might help when looking for a problem later too.
                    494:
                    495:    ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if
                    496:    assertion checking is disabled, the expression is still evaluated.  This
                    497:    is meant for use with routines like mpn_add_n() where the return value
                    498:    represents a carry or whatever that shouldn't occur.  For example,
                    499:    ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
                    500:
                    501: #ifdef __LINE__
                    502: #define ASSERT_LINE  __LINE__
                    503: #else
                    504: #define ASSERT_LINE  -1
                    505: #endif
                    506:
                    507: #ifdef __FILE__
                    508: #define ASSERT_FILE  __FILE__
                    509: #else
                    510: #define ASSERT_FILE  ""
                    511: #endif
                    512:
                    513: int __gmp_assert_fail _PROTO((const char *filename, int linenum,
                    514:                               const char *expr));
                    515:
                    516: #if HAVE_STRINGIZE
                    517: #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
                    518: #else
                    519: #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
                    520: #endif
                    521:
                    522: #if HAVE_VOID
                    523: #define CAST_TO_VOID        (void)
                    524: #else
                    525: #define CAST_TO_VOID
                    526: #endif
                    527:
                    528: #define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr))
                    529:
                    530: #if WANT_ASSERT
                    531: #define ASSERT(expr)           ASSERT_ALWAYS (expr)
                    532: #define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
                    533:
                    534: #else
                    535: #define ASSERT(expr)           (CAST_TO_VOID 0)
                    536: #define ASSERT_NOCARRY(expr)   (expr)
                    537: #endif
                    538:
                    539:
                    540: #if HAVE_NATIVE_mpn_com_n
                    541: #define mpn_com_n __MPN(com_n)
                    542: void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
                    543: #else
                    544: #define mpn_com_n(d,s,n)        \
                    545:   do                            \
                    546:     {                           \
                    547:       mp_ptr     __d = (d);     \
                    548:       mp_srcptr  __s = (s);     \
                    549:       mp_size_t  __n = (n);     \
                    550:       do                        \
1.1.1.3 ! maekawa   551:         *__d++ = ~ *__s++;      \
1.1.1.2   maekawa   552:       while (--__n);            \
                    553:     }                           \
                    554:   while (0)
                    555: #endif
                    556:
                    557: #define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op)      \
                    558:   do                                                    \
                    559:     {                                                   \
                    560:       mp_ptr     __d = (d);                             \
                    561:       mp_srcptr  __s1 = (s1);                           \
                    562:       mp_srcptr  __s2 = (s2);                           \
                    563:       mp_size_t  __n = (n);                             \
                    564:       do                                                \
                    565:         *__d++ = dop (*__s1++ op s2op *__s2++);         \
                    566:       while (--__n);                                    \
                    567:     }                                                   \
                    568:   while (0)
                    569:
                    570: #if HAVE_NATIVE_mpn_and_n
                    571: #define mpn_and_n __MPN(and_n)
                    572: void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
                    573: #else
                    574: #define mpn_and_n(d,s1,s2,n)  MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, )
                    575: #endif
                    576:
                    577: #if HAVE_NATIVE_mpn_andn_n
                    578: #define mpn_andn_n __MPN(andn_n)
                    579: void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
                    580: #else
                    581: #define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~)
                    582: #endif
                    583:
                    584: #if HAVE_NATIVE_mpn_nand_n
                    585: #define mpn_nand_n __MPN(nand_n)
                    586: void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
                    587: #else
                    588: #define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, )
                    589: #endif
                    590:
                    591: #if HAVE_NATIVE_mpn_ior_n
                    592: #define mpn_ior_n __MPN(ior_n)
                    593: void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
                    594: #else
                    595: #define mpn_ior_n(d,s1,s2,n)  MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, )
                    596: #endif
                    597:
                    598: #if HAVE_NATIVE_mpn_iorn_n
                    599: #define mpn_iorn_n __MPN(iorn_n)
                    600: void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
                    601: #else
                    602: #define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~)
                    603: #endif
                    604:
                    605: #if HAVE_NATIVE_mpn_nior_n
                    606: #define mpn_nior_n __MPN(nior_n)
                    607: void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
                    608: #else
                    609: #define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, )
                    610: #endif
                    611:
                    612: #if HAVE_NATIVE_mpn_xor_n
                    613: #define mpn_xor_n __MPN(xor_n)
                    614: void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
                    615: #else
                    616: #define mpn_xor_n(d,s1,s2,n)  MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, )
                    617: #endif
                    618:
                    619: #if HAVE_NATIVE_mpn_xnor_n
                    620: #define mpn_xnor_n __MPN(xnor_n)
                    621: void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
                    622: #else
                    623: #define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, )
                    624: #endif
1.1       maekawa   625:
                    626: /* Structure for conversion between internal binary format and
                    627:    strings in base 2..36.  */
                    628: struct bases
                    629: {
                    630:   /* Number of digits in the conversion base that always fits in an mp_limb_t.
                    631:      For example, for base 10 on a machine where a mp_limb_t has 32 bits this
                    632:      is 9, since 10**9 is the largest number that fits into a mp_limb_t.  */
                    633:   int chars_per_limb;
                    634:
                    635:   /* log(2)/log(conversion_base) */
1.1.1.2   maekawa   636:   double chars_per_bit_exactly;
1.1       maekawa   637:
                    638:   /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
                    639:      factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
                    640:      i.e. the number of bits used to represent each digit in the base.  */
                    641:   mp_limb_t big_base;
                    642:
                    643:   /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
                    644:      fixed-point number.  Instead of dividing by big_base an application can
                    645:      choose to multiply by big_base_inverted.  */
                    646:   mp_limb_t big_base_inverted;
                    647: };
                    648:
1.1.1.2   maekawa   649: #define __mp_bases __MPN(mp_bases)
1.1       maekawa   650: extern const struct bases __mp_bases[];
                    651: extern mp_size_t __gmp_default_fp_limb_precision;
                    652:
1.1.1.2   maekawa   653: #if defined (__i386__)
                    654: #define TARGET_REGISTER_STARVED 1
                    655: #else
                    656: #define TARGET_REGISTER_STARVED 0
                    657: #endif
                    658:
                    659: /* Use a library function for invert_limb, if available. */
                    660: #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
                    661: #define mpn_invert_limb  __MPN(invert_limb)
                    662: mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
                    663: #define invert_limb(invxl,xl)  (invxl = __MPN(invert_limb) (xl))
                    664: #endif
                    665:
                    666: #ifndef invert_limb
                    667: #define invert_limb(invxl,xl) \
                    668:   do {                                                                 \
                    669:     mp_limb_t dummy;                                                   \
                    670:     if (xl << 1 == 0)                                                  \
                    671:       invxl = ~(mp_limb_t) 0;                                          \
                    672:     else                                                               \
                    673:       udiv_qrnnd (invxl, dummy, -xl, 0, xl);                           \
                    674:   } while (0)
                    675: #endif
                    676:
1.1       maekawa   677: /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
                    678:    limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
                    679:    If this would yield overflow, DI should be the largest possible number
                    680:    (i.e., only ones).  For correct operation, the most significant bit of D
                    681:    has to be set.  Put the quotient in Q and the remainder in R.  */
                    682: #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
                    683:   do {                                                                 \
                    684:     mp_limb_t _q, _ql, _r;                                             \
                    685:     mp_limb_t _xh, _xl;                                                        \
                    686:     umul_ppmm (_q, _ql, (nh), (di));                                   \
                    687:     _q += (nh);                        /* DI is 2**BITS_PER_MP_LIMB too small */\
                    688:     umul_ppmm (_xh, _xl, _q, (d));                                     \
                    689:     sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);                                \
                    690:     if (_xh != 0)                                                      \
                    691:       {                                                                        \
                    692:        sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                          \
                    693:        _q += 1;                                                        \
                    694:        if (_xh != 0)                                                   \
                    695:          {                                                             \
                    696:            sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                      \
                    697:            _q += 1;                                                    \
                    698:          }                                                             \
                    699:       }                                                                        \
                    700:     if (_r >= (d))                                                     \
                    701:       {                                                                        \
                    702:        _r -= (d);                                                      \
                    703:        _q += 1;                                                        \
                    704:       }                                                                        \
                    705:     (r) = _r;                                                          \
                    706:     (q) = _q;                                                          \
                    707:   } while (0)
                    708: /* Like udiv_qrnnd_preinv, but for for any value D.  DNORM is D shifted left
                    709:    so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
                    710: #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
                    711:   do {                                                                 \
1.1.1.2   maekawa   712:     mp_limb_t _n2, _n10, _n1, _nadj, _q1;                              \
1.1       maekawa   713:     mp_limb_t _xh, _xl;                                                        \
1.1.1.2   maekawa   714:     _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
                    715:     _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup));                                \
                    716:     _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1));         \
                    717:     _nadj = _n10 + (_n1 & (dnorm));                                    \
                    718:     umul_ppmm (_xh, _xl, di, _n2 - _n1);                               \
                    719:     add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj);                         \
                    720:     _q1 = ~(_n2 + _xh);                                                        \
                    721:     umul_ppmm (_xh, _xl, _q1, d);                                      \
1.1       maekawa   722:     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                           \
                    723:     _xh -= (d);                                                                \
                    724:     (r) = _xl + ((d) & _xh);                                           \
1.1.1.2   maekawa   725:     (q) = _xh - _q1;                                                   \
1.1       maekawa   726:   } while (0)
                    727: /* Exactly like udiv_qrnnd_preinv, but branch-free.  It is not clear which
                    728:    version to use.  */
                    729: #define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
                    730:   do {                                                                 \
1.1.1.2   maekawa   731:     mp_limb_t _n2, _n10, _n1, _nadj, _q1;                              \
1.1       maekawa   732:     mp_limb_t _xh, _xl;                                                        \
1.1.1.2   maekawa   733:     _n2 = (nh);                                                                \
                    734:     _n10 = (nl);                                                       \
                    735:     _n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1));         \
                    736:     _nadj = _n10 + (_n1 & (d));                                                \
                    737:     umul_ppmm (_xh, _xl, di, _n2 - _n1);                               \
                    738:     add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj);                         \
                    739:     _q1 = ~(_n2 + _xh);                                                        \
                    740:     umul_ppmm (_xh, _xl, _q1, d);                                      \
1.1       maekawa   741:     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                           \
                    742:     _xh -= (d);                                                                \
                    743:     (r) = _xl + ((d) & _xh);                                           \
1.1.1.2   maekawa   744:     (q) = _xh - _q1;                                                   \
1.1       maekawa   745:   } while (0)
                    746:
1.1.1.2   maekawa   747:
                    748: /* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo
                    749:    2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB.
                    750:    "n" must be odd (otherwise such an inverse doesn't exist).
                    751:
                    752:    This is not to be confused with invert_limb(), which is completely
                    753:    different.
                    754:
                    755:    The table lookup gives an inverse with the low 8 bits valid, and each
                    756:    multiply step doubles the number of bits.  See Jebelean's exact division
                    757:    paper, end of section 4 (reference in gmp.texi). */
                    758:
                    759: #define modlimb_invert_table  __gmp_modlimb_invert_table
                    760: extern const unsigned char  modlimb_invert_table[128];
                    761:
                    762: #if BITS_PER_MP_LIMB <= 32
                    763: #define modlimb_invert(inv,n)                                   \
                    764:   do {                                                          \
                    765:     mp_limb_t  __n = (n);                                       \
                    766:     mp_limb_t  __inv;                                           \
                    767:     ASSERT ((__n & 1) == 1);                                    \
                    768:     __inv = modlimb_invert_table[(__n&0xFF)/2]; /*  8 */        \
                    769:     __inv = 2 * __inv - __inv * __inv * __n;    /* 16 */        \
                    770:     __inv = 2 * __inv - __inv * __inv * __n;    /* 32 */        \
                    771:     ASSERT (__inv * __n == 1);                                  \
                    772:     (inv) = __inv;                                              \
                    773:   } while (0)
                    774: #endif
                    775:
                    776: #if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
                    777: #define modlimb_invert(inv,n)                                   \
                    778:   do {                                                          \
                    779:     mp_limb_t  __n = (n);                                       \
                    780:     mp_limb_t  __inv;                                           \
                    781:     ASSERT ((__n & 1) == 1);                                    \
                    782:     __inv = modlimb_invert_table[(__n&0xFF)/2]; /*  8 */        \
                    783:     __inv = 2 * __inv - __inv * __inv * __n;    /* 16 */        \
                    784:     __inv = 2 * __inv - __inv * __inv * __n;    /* 32 */        \
                    785:     __inv = 2 * __inv - __inv * __inv * __n;    /* 64 */        \
                    786:     ASSERT (__inv * __n == 1);                                  \
                    787:     (inv) = __inv;                                              \
                    788:   } while (0)
                    789: #endif
                    790:
                    791:
                    792: /* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
                    793:    between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
                    794:    until then.  */
                    795: #if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__)
1.1       maekawa   796: /* Define stuff for longlong.h.  */
                    797: typedef unsigned int UQItype   __attribute__ ((mode (QI)));
1.1.1.2   maekawa   798: typedef                 int SItype     __attribute__ ((mode (SI)));
1.1       maekawa   799: typedef unsigned int USItype   __attribute__ ((mode (SI)));
                    800: typedef                 int DItype     __attribute__ ((mode (DI)));
                    801: typedef unsigned int UDItype   __attribute__ ((mode (DI)));
                    802: #else
                    803: typedef unsigned char UQItype;
1.1.1.2   maekawa   804: typedef                 long SItype;
1.1       maekawa   805: typedef unsigned long USItype;
1.1.1.2   maekawa   806: #if defined _LONGLONG || defined _LONG_LONG_LIMB
                    807: typedef        long long int DItype;
                    808: typedef unsigned long long int UDItype;
                    809: #else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
                    810: typedef long int DItype;
                    811: typedef unsigned long int UDItype;
                    812: #endif
1.1       maekawa   813: #endif
                    814:
                    815: typedef mp_limb_t UWtype;
                    816: typedef unsigned int UHWtype;
                    817: #define W_TYPE_SIZE BITS_PER_MP_LIMB
                    818:
                    819: /* Define ieee_double_extract and _GMP_IEEE_FLOATS.  */
                    820:
1.1.1.2   maekawa   821: #if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__)))
                    822: /* Special case for little endian ARM since floats remain in big-endian.  */
                    823: #define _GMP_IEEE_FLOATS 1
                    824: union ieee_double_extract
                    825: {
                    826:   struct
                    827:     {
                    828:       unsigned int manh:20;
                    829:       unsigned int exp:11;
                    830:       unsigned int sig:1;
                    831:       unsigned int manl:32;
                    832:     } s;
                    833:   double d;
                    834: };
                    835: #else
1.1       maekawa   836: #if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__)            \
                    837:  || defined (__alpha)                                                  \
                    838:  || defined (__clipper__)                                              \
                    839:  || defined (__cris)                                                   \
                    840:  || defined (__i386__)                                                 \
                    841:  || defined (__i860__)                                                 \
                    842:  || defined (__i960__)                                                 \
                    843:  || defined (MIPSEL) || defined (_MIPSEL)                              \
                    844:  || defined (__ns32000__)                                              \
                    845:  || defined (__WINNT) || defined (_WIN32)
                    846: #define _GMP_IEEE_FLOATS 1
                    847: union ieee_double_extract
                    848: {
                    849:   struct
                    850:     {
                    851:       unsigned int manl:32;
                    852:       unsigned int manh:20;
                    853:       unsigned int exp:11;
                    854:       unsigned int sig:1;
                    855:     } s;
                    856:   double d;
                    857: };
                    858: #else /* Need this as an #else since the tests aren't made exclusive.  */
1.1.1.2   maekawa   859: #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__)                  \
1.1       maekawa   860:  || defined (__a29k__) || defined (_AM29K)                             \
                    861:  || defined (__arm__)                                                  \
                    862:  || (defined (__convex__) && defined (_IEEE_FLOAT_))                   \
1.1.1.2   maekawa   863:  || defined (_CRAYMPP)                                                 \
1.1       maekawa   864:  || defined (__i370__) || defined (__mvs__)                            \
1.1.1.2   maekawa   865:  || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
1.1       maekawa   866:     || defined(mc68020)                                                        \
                    867:  || defined (__m88000__)                                               \
                    868:  || defined (MIPSEB) || defined (_MIPSEB)                              \
1.1.1.2   maekawa   869:  || defined (__hppa) || defined (__hppa__)                             \
1.1       maekawa   870:  || defined (__pyr__)                                                  \
                    871:  || defined (__ibm032__)                                               \
                    872:  || defined (_IBMR2) || defined (_ARCH_PPC)                            \
                    873:  || defined (__sh__)                                                   \
                    874:  || defined (__sparc) || defined (sparc)                               \
                    875:  || defined (__we32k__)
                    876: #define _GMP_IEEE_FLOATS 1
                    877: union ieee_double_extract
                    878: {
                    879:   struct
                    880:     {
                    881:       unsigned int sig:1;
                    882:       unsigned int exp:11;
                    883:       unsigned int manh:20;
                    884:       unsigned int manl:32;
                    885:     } s;
                    886:   double d;
                    887: };
                    888: #endif
                    889: #endif
1.1.1.2   maekawa   890: #endif
1.1       maekawa   891:
1.1.1.2   maekawa   892: /* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on
                    893:    SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0,
                    894:    presumably due to treating the mp_limb_t constant as signed rather than
                    895:    unsigned. */
                    896: #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2)))
1.1       maekawa   897: #if BITS_PER_MP_LIMB == 64
                    898: #define LIMBS_PER_DOUBLE 2
                    899: #else
                    900: #define LIMBS_PER_DOUBLE 3
                    901: #endif
                    902:
                    903: double __gmp_scale2 _PROTO ((double, int));
1.1.1.2   maekawa   904: int __gmp_extract_double _PROTO ((mp_ptr, double));
                    905:
                    906: extern int __gmp_junk;
                    907: extern const int __gmp_0;
                    908: #define GMP_ERROR(code)   (gmp_errno |= (code), __gmp_junk = 10/__gmp_0)
                    909: #define DIVIDE_BY_ZERO    GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO)
                    910: #define SQRT_OF_NEGATIVE  GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE)
                    911:
                    912: #if defined _LONG_LONG_LIMB
                    913: #if defined (__STDC__)
                    914: #define CNST_LIMB(C) C##LL
                    915: #else
                    916: #define CNST_LIMB(C) C/**/LL
                    917: #endif
                    918: #else /* not _LONG_LONG_LIMB */
                    919: #if defined (__STDC__)
                    920: #define CNST_LIMB(C) C##L
                    921: #else
                    922: #define CNST_LIMB(C) C/**/L
                    923: #endif
                    924: #endif /* _LONG_LONG_LIMB */
                    925:
                    926: /*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
                    927: #if BITS_PER_MP_LIMB == 32
                    928: #define PP 0xC0CFD797L         /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
                    929: #define PP_INVERTED 0x53E5645CL
                    930: #define PP_MAXPRIME 29
                    931: #define PP_MASK 0x208A28A8L
                    932: #endif
                    933:
                    934: #if BITS_PER_MP_LIMB == 64
                    935: #define PP CNST_LIMB(0xE221F97C30E94E1D)       /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
                    936: #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
                    937: #define PP_MAXPRIME 53
                    938: #define PP_MASK CNST_LIMB(0x208A20A08A28A8)
                    939: #endif
                    940:
                    941:
                    942: /* BIT1 means a result value in bit 1 (second least significant bit), with a
                    943:    zero bit representing +1 and a one bit representing -1.  Bits other than
                    944:    bit 1 are garbage.
                    945:
                    946:    JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
                    947:    and their speed is important.  Expressions are used rather than
                    948:    conditionals to accumulate sign changes, which effectively means XORs
                    949:    instead of conditional JUMPs. */
                    950:
                    951: /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
                    952: #define JACOBI_S0(a) \
                    953:   (((a) == 1) | ((a) == -1))
                    954:
                    955: /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
                    956: #define JACOBI_U0(a) \
                    957:   ((a) == 1)
                    958:
                    959: /* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise
                    960:    An mpz_t always has at least one limb of allocated space, so the fetch of
                    961:    the low limb is valid. */
                    962: #define JACOBI_Z0(a) \
                    963:   (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1))
                    964:
                    965: /* Convert a bit1 to +1 or -1. */
                    966: #define JACOBI_BIT1_TO_PN(result_bit1) \
                    967:   (1 - ((result_bit1) & 2))
                    968:
                    969: /* (2/b), with b unsigned and odd;
                    970:    is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
                    971:    hence obtained from (b>>1)^b */
                    972: #define JACOBI_TWO_U_BIT1(b) \
                    973:   (ASSERT (b & 1), (((b) >> 1) ^ (b)))
                    974:
                    975: /* (2/b)^twos, with b unsigned and odd */
                    976: #define JACOBI_TWOS_U_BIT1(twos, b) \
                    977:   (((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
                    978:
                    979: /* (2/b)^twos, with b unsigned and odd */
                    980: #define JACOBI_TWOS_U(twos, b) \
                    981:   (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
                    982:
                    983: /* (a/b) effect due to sign of a: signed/unsigned, b odd;
                    984:    is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */
                    985: #define JACOBI_ASGN_SU_BIT1(a, b) \
                    986:   ((((a) < 0) << 1) & (b))
                    987:
                    988: /* (a/b) effect due to sign of b: signed/mpz;
                    989:    is -1 if a and b both negative, +1 otherwise */
                    990: #define JACOBI_BSGN_SZ_BIT1(a, b) \
                    991:   ((((a) < 0) & (SIZ(b) < 0)) << 1)
                    992:
                    993: /* (a/b) effect due to sign of b: mpz/signed */
                    994: #define JACOBI_BSGN_ZS_BIT1(a, b) \
                    995:   JACOBI_BSGN_SZ_BIT1(b, a)
                    996:
                    997: /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd.
                    998:    Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if
                    999:    both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
                   1000:    because this is used in a couple of places with only bit 1 of a or b
                   1001:    valid. */
                   1002: #define JACOBI_RECIP_UU_BIT1(a, b) \
                   1003:   ((a) & (b))
                   1004:
                   1005:
                   1006: /* For testing and debugging.  */
                   1007: #define MPZ_CHECK_FORMAT(z)                                            \
                   1008:   (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0),           \
                   1009:    ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)))
                   1010: #define MPZ_PROVOKE_REALLOC(z)                                         \
                   1011:   do { ALLOC(z) = ABSIZ(z); } while (0)
                   1012:
                   1013:
                   1014: #if TUNE_PROGRAM_BUILD
                   1015: /* Some extras wanted when recompiling some .c files for use by the tune
                   1016:    program.  Not part of a normal build. */
                   1017:
                   1018: extern mp_size_t  mul_threshold[];
                   1019: extern mp_size_t  fft_modf_mul_threshold;
                   1020: extern mp_size_t  sqr_threshold[];
                   1021: extern mp_size_t  fft_modf_sqr_threshold;
                   1022: extern mp_size_t  bz_threshold[];
                   1023: extern mp_size_t  fib_threshold[];
                   1024: extern mp_size_t  powm_threshold[];
                   1025: extern mp_size_t  gcd_accel_threshold[];
                   1026: extern mp_size_t  gcdext_threshold[];
                   1027:
                   1028: #undef KARATSUBA_MUL_THRESHOLD
                   1029: #undef TOOM3_MUL_THRESHOLD
                   1030: #undef FFT_MUL_TABLE
                   1031: #undef FFT_MUL_THRESHOLD
                   1032: #undef FFT_MODF_MUL_THRESHOLD
                   1033: #undef KARATSUBA_SQR_THRESHOLD
                   1034: #undef TOOM3_SQR_THRESHOLD
                   1035: #undef FFT_SQR_TABLE
                   1036: #undef FFT_SQR_THRESHOLD
                   1037: #undef FFT_MODF_SQR_THRESHOLD
                   1038: #undef BZ_THRESHOLD
                   1039: #undef FIB_THRESHOLD
                   1040: #undef POWM_THRESHOLD
                   1041: #undef GCD_ACCEL_THRESHOLD
                   1042: #undef GCDEXT_THRESHOLD
                   1043:
                   1044: #define KARATSUBA_MUL_THRESHOLD  mul_threshold[0]
                   1045: #define TOOM3_MUL_THRESHOLD      mul_threshold[1]
                   1046: #define FFT_MUL_TABLE            0
                   1047: #define FFT_MUL_THRESHOLD        mul_threshold[2]
                   1048: #define FFT_MODF_MUL_THRESHOLD   fft_modf_mul_threshold
                   1049: #define KARATSUBA_SQR_THRESHOLD  sqr_threshold[0]
                   1050: #define TOOM3_SQR_THRESHOLD      sqr_threshold[1]
                   1051: #define FFT_SQR_TABLE            0
                   1052: #define FFT_SQR_THRESHOLD        sqr_threshold[2]
                   1053: #define FFT_MODF_SQR_THRESHOLD   fft_modf_sqr_threshold
                   1054: #define BZ_THRESHOLD             bz_threshold[0]
                   1055: #define FIB_THRESHOLD            fib_threshold[0]
                   1056: #define POWM_THRESHOLD           powm_threshold[0]
                   1057: #define GCD_ACCEL_THRESHOLD      gcd_accel_threshold[0]
                   1058: #define GCDEXT_THRESHOLD         gcdext_threshold[0]
                   1059:
                   1060: #define TOOM3_MUL_THRESHOLD_LIMIT  700
                   1061:
                   1062: #undef  FFT_TABLE_ATTRS
                   1063: #define FFT_TABLE_ATTRS
                   1064: extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
                   1065:
                   1066: #endif /* TUNE_PROGRAM_BUILD */
                   1067:
                   1068: #if defined (__cplusplus)
                   1069: }
                   1070: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>