OpenXM_contrib/gmp/gmp-impl.h - diff

Return to gmp-impl.h CVS log

Up to [local] / OpenXM_contrib / gmp

Diff for /OpenXM_contrib/gmp/Attic/gmp-impl.h between version 1.1.1.3 and 1.1.1.4

-version 1.1.1.3, 2000/12/01 05:44:45
+version 1.1.1.4, 2003/08/25 16:06:00
 Line 3
 Line 3
 Line 3
     THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
     BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
- Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
+ Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002 Free
- Foundation, Inc.
+ Software Foundation, Inc.
  This file is part of the GNU MP Library.
 Line 23  along with the GNU MP Library; see the file COPYING.LI
 Line 23  along with the GNU MP Library; see the file COPYING.LI
 Line 23  along with the GNU MP Library; see the file COPYING.LI
  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  MA 02111-1307, USA. */
+ #ifndef __GMP_IMPL_H__
+ #define __GMP_IMPL_H__
+ /* On Cray vector systems "short" and "unsigned short" might not be the same
+    number of bits, making the SHRT_MAX defaults below fail.  (This depends
+    on compiler options.)  Instead use limits.h.  */
+ #if defined _CRAY
+ #include <limits.h>
+ #endif
+ #if ! __GMP_WITHIN_CONFIGURE
  #include "config.h"
  #include "gmp-mparam.h"
- /* #include "longlong.h" */
+ #endif
- /* When using gcc, make sure to use its builtin alloca.  */
+ #ifdef __cplusplus
- #if ! defined (alloca) && defined (__GNUC__)
+ #include <string>
- #define alloca __builtin_alloca
- #define HAVE_ALLOCA
  #endif
- /* When using cc, do whatever necessary to allow use of alloca.  For many
+ /* Might search and replace _PROTO to __GMP_PROTO internally one day, to
-    machines, this means including alloca.h.  IBM's compilers need a #pragma
+    avoid two names for one thing, but no hurry for that.  */
-    in "each module that needs to use alloca".  */
+ #define _PROTO(x)  __GMP_PROTO(x)
- #if ! defined (alloca)
- /* We need lots of variants for MIPS, to cover all versions and perversions
+ /* The following tries to get a good version of alloca.  The tests are
-    of OSes for MIPS.  */
+    adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
- #if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
+    Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
-  || defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
+    be setup appropriately.
-  || defined (__alpha) || defined (__sparc) || defined (sparc) \
-  || defined (__ksr__)
+    ifndef alloca - a cpp define might already exist.
- #include <alloca.h>
+        glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
- #define HAVE_ALLOCA
+        HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
+    GCC __builtin_alloca - preferred whenever available.
+    _AIX pragma - IBM compilers need a #pragma in "each module that needs to
+        use alloca".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was
+        used in past versions of GMP, retained still in case it matters.
+        The autoconf manual says this pragma needs to be at the start of a C
+        file, apart from comments and preprocessor directives.  Is that true?
+        xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
+        from gmp.h.
+ */
+ #ifndef alloca
+ # ifdef __GNUC__
+ #  define alloca __builtin_alloca
+ # else
+ #  ifdef __DECC
+ #   define alloca(x) __ALLOCA(x)
+ #  else
+ #   ifdef _MSC_VER
+ #    include <malloc.h>
+ #    define alloca _alloca
+ #   else
+ #    if HAVE_ALLOCA_H
+ #     include <alloca.h>
+ #    else
+ #     if defined (_AIX) || defined (_IBMR2)
+  #pragma alloca
+ #     else
+        char *alloca ();
+ #     endif
+ #    endif
+ #   endif
+ #  endif
+ # endif
  #endif
- #if defined (_IBMR2)
- #pragma alloca
- #define HAVE_ALLOCA
+ /* const and signed must match __gmp_const and __gmp_signed, so follow the
+    decision made for those in gmp.h.    */
+ #if ! __GMP_HAVE_CONST
+ #define const   /* empty */
+ #define signed  /* empty */
  #endif
- #if defined (__DECC)
- #define alloca(x) __ALLOCA(x)
- #define HAVE_ALLOCA
+ /* "const" basically means a function does nothing but examine its arguments
+    and give a return value, it doesn't read or write any memory (neither
+    global nor pointed to by arguments), and has no other side-effects.  This
+    is more restrictive than "pure".  See info node "(gcc)Function
+    Attributes".  */
+ #if HAVE_ATTRIBUTE_CONST
+ #define ATTRIBUTE_CONST  __attribute__ ((const))
+ #else
+ #define ATTRIBUTE_CONST
  #endif
+ #if HAVE_ATTRIBUTE_NORETURN
+ #define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))
+ #else
+ #define ATTRIBUTE_NORETURN
  #endif
- #if defined (alloca)
+ /* "malloc" means a function behaves like malloc in that the pointer it
- #define HAVE_ALLOCA
+    returns doesn't alias anything.  */
+ #if HAVE_ATTRIBUTE_MALLOC
+ #define ATTRIBUTE_MALLOC  __attribute__ ((malloc))
+ #else
+ #define ATTRIBUTE_MALLOC
  #endif
- #if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
+ #ifdef _CRAY
- #include "stack-alloc.h"
+ #define CRAY_Pragma(str)  _Pragma (str)
  #else
+ #define CRAY_Pragma(str)
+ #endif
+ #if ! HAVE_STRCHR
+ #define strchr(s,c)  index(s,c)
+ #endif
+ #if ! HAVE_MEMSET
+ #define memset(p, c, n)                 \
+   do {                                  \
+     ASSERT (n >= 0);                    \
+     int  __i;                           \
+     for (__i = 0; __i < (n); __i++)     \
+       (p)[__i] = (c);                   \
+   } while (0)
+ #endif
+ /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
+    mode.  Falling back to a memcpy will give maximum portability, since it
+    works no matter whether va_list is a pointer, struct or array.  */
+ #if ! defined (va_copy) && defined (__va_copy)
+ #define va_copy(dst,src)  __va_copy(dst,src)
+ #endif
+ #if ! defined (va_copy)
+ #define va_copy(dst,src) \
+   do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
+ #endif
+ #if defined (__cplusplus)
+ extern "C" {
+ #endif
+ /* Usage: TMP_DECL (marker);
+           TMP_MARK (marker);
+           ptr = TMP_ALLOC (bytes);
+           TMP_FREE (marker);
+    TMP_DECL just declares a variable, but might be empty and so must be last
+    in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.
+    TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a
+    TMP_MARK was made, but then no TMP_ALLOCs.
+    The name "marker" isn't used by the malloc-reentrant and debug methods,
+    instead they hardcode a name which TMP_ALLOC will know.  For that reason
+    two TMP_DECLs are not allowed, unless one is in a nested "{ }" block, and
+    in that case TMP_MARK, TMP_ALLOC and TMP_FREE will refer to the TMP_DECL
+    which is in scope, irrespective of the marker name given.  */
+ /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
+    __gmp_allocate_func doesn't already determine it.  Currently TMP_ALLOC
+    isn't used for "double"s, so that's not in the union.  */
+ union tmp_align_t {
+   mp_limb_t  l;
+   char       *p;
+ };
+ #define __TMP_ALIGN  sizeof (union tmp_align_t)
+ /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
+    "a" must be an unsigned type.  */
+ #define ROUND_UP_MULTIPLE(a,m)  ((a) + (-(a))%(m))
+ #if WANT_TMP_ALLOCA
+ /* Each TMP_ALLOC is simply an alloca(), and nothing else is needed.
+    This is the preferred method.  */
  #define TMP_DECL(m)
  #define TMP_ALLOC(x) alloca(x)
  #define TMP_MARK(m)
  #define TMP_FREE(m)
  #endif
+ #if WANT_TMP_REENTRANT
+ /* See tal-reent.c for some comments. */
+ struct tmp_reentrant_t {
+   struct tmp_reentrant_t  *next;
+   size_t                  size;   /* bytes, including header */
+ };
+ void *__gmp_tmp_reentrant_alloc _PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
+ void  __gmp_tmp_reentrant_free _PROTO ((struct tmp_reentrant_t *));
+ #define TMP_DECL(marker)   struct tmp_reentrant_t *__tmp_marker
+ /* don't demand NULL, just cast a zero */
+ #define TMP_MARK(marker) \
+   do { __tmp_marker = (struct tmp_reentrant_t *) 0; } while (0)
+ #define TMP_ALLOC(size)    __gmp_tmp_reentrant_alloc (&__tmp_marker, size)
+ #define TMP_FREE(marker)   __gmp_tmp_reentrant_free  (__tmp_marker)
+ #endif
+ #if WANT_TMP_NOTREENTRANT
+ /* See tal-notreent.c for some comments. */
+ struct tmp_marker
+ {
+   struct tmp_stack *which_chunk;
+   void *alloc_point;
+ };
+ typedef struct tmp_marker tmp_marker;
+ void *__gmp_tmp_alloc _PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
+ void __gmp_tmp_mark _PROTO ((tmp_marker *));
+ void __gmp_tmp_free _PROTO ((tmp_marker *));
+ #define TMP_DECL(marker) tmp_marker marker
+ /* gcc recognises "(-(8*n))%8" or the like is always zero, which means the
+    rounding up is a noop for allocs of whole limbs. */
+ #define TMP_ALLOC(size) \
+   __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (size), __TMP_ALIGN))
+ #define TMP_MARK(marker) __gmp_tmp_mark (&marker)
+ #define TMP_FREE(marker) __gmp_tmp_free (&marker)
+ #endif
+ #if WANT_TMP_DEBUG
+ /* See tal-debug.c for some comments. */
+ struct tmp_debug_t {
+   struct tmp_debug_entry_t  *list;
+   const char                *file;
+   int                       line;
+ };
+ struct tmp_debug_entry_t {
+   struct tmp_debug_entry_t  *next;
+   char                      *block;
+   size_t                    size;
+ };
+ void  __gmp_tmp_debug_mark  _PROTO ((const char *, int, struct tmp_debug_t **,
+                                      struct tmp_debug_t *,
+                                      const char *, const char *));
+ void *__gmp_tmp_debug_alloc _PROTO ((const char *, int, int,
+                                      struct tmp_debug_t **, const char *,
+                                      size_t)) ATTRIBUTE_MALLOC;
+ void  __gmp_tmp_debug_free  _PROTO ((const char *, int, int,
+                                      struct tmp_debug_t **,
+                                      const char *, const char *));
+ #if HAVE_STRINGIZE
+ #define TMP_DECL(marker) TMP_DECL_NAME(marker, #marker)
+ #define TMP_MARK(marker) TMP_MARK_NAME(marker, #marker)
+ #define TMP_FREE(marker) TMP_FREE_NAME(marker, #marker)
+ #else
+ #define TMP_DECL(marker) TMP_DECL_NAME(marker, "marker")
+ #define TMP_MARK(marker) TMP_MARK_NAME(marker, "marker")
+ #define TMP_FREE(marker) TMP_FREE_NAME(marker, "marker")
+ #endif
+ /* The marker variable is designed to provoke an uninitialized varialble
+    warning from the compiler if TMP_FREE is used without a TMP_MARK.
+    __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
+    these things up too.  */
+ #define TMP_DECL_NAME(marker, marker_name)                      \
+   int marker;                                                   \
+   int __tmp_marker_inscope;                                     \
+   const char *__tmp_marker_name = marker_name;                  \
+   struct tmp_debug_t  __tmp_marker_struct;                      \
+   /* don't demand NULL, just cast a zero */                     \
+   struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
+ #define TMP_MARK_NAME(marker, marker_name)                      \
+   do {                                                          \
+     marker = 1;                                                 \
+     __tmp_marker_inscope = 1;                                   \
+     __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,            \
+                            &__tmp_marker, &__tmp_marker_struct, \
+                            __tmp_marker_name, marker_name);     \
+   } while (0)
+ #define TMP_ALLOC(size)                                                 \
+   __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,                      \
+                          __tmp_marker_inscope,                          \
+                          &__tmp_marker, __tmp_marker_name, size)
+ #define TMP_FREE_NAME(marker, marker_name)                      \
+   do {                                                          \
+     __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,            \
+                            marker, &__tmp_marker,               \
+                            __tmp_marker_name, marker_name);     \
+   } while (0)
+ #endif
  /* Allocating various types. */
  #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
  #define TMP_ALLOC_LIMBS(n)     TMP_ALLOC_TYPE(n,mp_limb_t)
  #define TMP_ALLOC_MP_PTRS(n)   TMP_ALLOC_TYPE(n,mp_ptr)
+ /* It's more efficient to allocate one block than two.  This is certainly
- #if ! defined (__GNUC__)        /* FIXME: Test for C++ compilers here,
+    true of the malloc methods, but it can even be true of alloca if that
-                                    __DECC understands __inline */
+    involves copying a chunk of stack (various RISCs), or a call to a stack
- #define inline                  /* Empty */
+    bounds check (mingw).  In any case, when debugging keep separate blocks
+    so a redzoning malloc debugger can protect each individually.  */
+ #if WANT_TMP_DEBUG
+ #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)   \
+   do {                                          \
+     (xp) = TMP_ALLOC_LIMBS (xsize);             \
+     (yp) = TMP_ALLOC_LIMBS (ysize);             \
+   } while (0)
+ #else
+ #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)   \
+   do {                                          \
+     (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
+     (yp) = (xp) + (xsize);                      \
+   } while (0)
  #endif
- #define ABS(x) (x >= 0 ? x : -x)
+ /* From gmp.h, nicer names for internal use. */
+ #define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)
+ #define ABS(x) ((x) >= 0 ? (x) : -(x))
  #define MIN(l,o) ((l) < (o) ? (l) : (o))
  #define MAX(h,i) ((h) > (i) ? (h) : (i))
  #define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
-Line 94  MA 02111-1307, USA. */
+Line 343  MA 02111-1307, USA. */
 Line 94  MA 02111-1307, USA. */
 Line 343  MA 02111-1307, USA. */
  #define PREC(x) ((x)->_mp_prec)
  #define ALLOC(x) ((x)->_mp_alloc)
- /* Extra casts because shorts are promoted to ints by "~" and "<<".  "-1"
+ /* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
-    rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers
+    then that lowest one bit must have been the only bit set.  n==0 will
-    about arithmetic overflow.  */
+    return true though, so avoid that.  */
- #define UNSIGNED_TYPE_MAX(type)      ((type) ~ (type) 0)
+ #define POW2_P(n)  (((n) & ((n) - 1)) == 0)
- #define UNSIGNED_TYPE_HIGHBIT(type)  ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1))
- #define SIGNED_TYPE_MIN(type)        (((type) -1) << (8*sizeof(type)-1))
- #define SIGNED_TYPE_MAX(type)        ((type) ~ SIGNED_TYPE_MIN(type))
- #define SIGNED_TYPE_HIGHBIT(type)    SIGNED_TYPE_MIN(type)
- #define MP_LIMB_T_MAX      UNSIGNED_TYPE_MAX (mp_limb_t)
- #define MP_LIMB_T_HIGHBIT  UNSIGNED_TYPE_HIGHBIT (mp_limb_t)
- #define MP_SIZE_T_MAX      SIGNED_TYPE_MAX (mp_size_t)
+ /* Might be already defined by gmp-mparam.h, otherwise use what's in gmp.h. */
+ #ifndef BITS_PER_MP_LIMB
+ #define BITS_PER_MP_LIMB  __GMP_BITS_PER_MP_LIMB
+ #endif
+ /* The "short" defines are a bit different because shorts are promoted to
+    ints by ~ or >> etc.  */
  #ifndef ULONG_MAX
- #define ULONG_MAX          UNSIGNED_TYPE_MAX (unsigned long)
+ #define ULONG_MAX   __GMP_ULONG_MAX
  #endif
- #define ULONG_HIGHBIT      UNSIGNED_TYPE_HIGHBIT (unsigned long)
+ #ifndef UINT_MAX
- #define LONG_HIGHBIT       SIGNED_TYPE_HIGHBIT (long)
+ #define UINT_MAX    __GMP_UINT_MAX
+ #endif
+ #ifndef USHRT_MAX
+ #define USHRT_MAX   __GMP_USHRT_MAX
+ #endif
+ #define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)
+ /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
+    unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc
+    treats the plain decimal values in <limits.h> as signed.  */
+ #define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
+ #define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
+ #define USHRT_HIGHBIT      ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
+ #define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
+ #ifndef LONG_MIN
+ #define LONG_MIN           ((long) ULONG_HIGHBIT)
+ #endif
  #ifndef LONG_MAX
- #define LONG_MAX           SIGNED_TYPE_MAX (long)
+ #define LONG_MAX           (-(LONG_MIN+1))
  #endif
- #ifndef USHORT_MAX
+ #ifndef INT_MIN
- #define USHORT_MAX         UNSIGNED_TYPE_MAX (unsigned short)
+ #define INT_MIN            ((int) UINT_HIGHBIT)
  #endif
- #define USHORT_HIGHBIT     UNSIGNED_TYPE_HIGHBIT (unsigned short)
+ #ifndef INT_MAX
- #define SHORT_HIGHBIT      SIGNED_TYPE_HIGHBIT (short)
+ #define INT_MAX            (-(INT_MIN+1))
- #ifndef SHORT_MAX
- #define SHORT_MAX          SIGNED_TYPE_MAX (short)
  #endif
+ #ifndef SHRT_MIN
+ #define SHRT_MIN           ((short) USHRT_HIGHBIT)
+ #endif
+ #ifndef SHRT_MAX
+ #define SHRT_MAX           ((short) (-(SHRT_MIN+1)))
+ #endif
+ #if __GMP_MP_SIZE_T_INT
+ #define MP_SIZE_T_MAX      INT_MAX
+ #define MP_SIZE_T_MIN      INT_MIN
+ #else
+ #define MP_SIZE_T_MAX      LONG_MAX
+ #define MP_SIZE_T_MIN      LONG_MIN
+ #endif
+ #define LONG_HIGHBIT       LONG_MIN
+ #define INT_HIGHBIT        INT_MIN
+ #define SHRT_HIGHBIT       SHRT_MIN
+ #define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
+ #if GMP_NAIL_BITS == 0
+ #define GMP_NAIL_LOWBIT   CNST_LIMB(0)
+ #else
+ #define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)
+ #endif
+ #if GMP_NAIL_BITS != 0
+ /* Set various *_THRESHOLD values to be used for nails.  Thus we avoid using
+    code that has not yet been qualified.  */
+ #undef DIV_SB_PREINV_THRESHOLD
+ #undef DIV_DC_THRESHOLD
+ #undef POWM_THRESHOLD
+ #define DIV_SB_PREINV_THRESHOLD           MP_SIZE_T_MAX
+ #define DIV_DC_THRESHOLD                 50
+ #define POWM_THRESHOLD                    0
+ #undef GCD_ACCEL_THRESHOLD
+ #undef GCDEXT_THRESHOLD
+ #define GCD_ACCEL_THRESHOLD               3
+ #define GCDEXT_THRESHOLD                 20
+ #undef DIVREM_1_NORM_THRESHOLD
+ #undef DIVREM_1_UNNORM_THRESHOLD
+ #undef MOD_1_NORM_THRESHOLD
+ #undef MOD_1_UNNORM_THRESHOLD
+ #undef USE_PREINV_DIVREM_1
+ #undef USE_PREINV_MOD_1
+ #undef DIVREM_2_THRESHOLD
+ #undef DIVEXACT_1_THRESHOLD
+ #undef MODEXACT_1_ODD_THRESHOLD
+ #define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* preinv always */
+ #define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* always */
+ #define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* always */
+ #define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* always */
+ #define USE_PREINV_DIVREM_1               0  /* preinv never */
+ #define USE_PREINV_MOD_1                  0  /* preinv never */
+ #define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* preinv never */
+ #define DIVEXACT_1_THRESHOLD              MP_SIZE_T_MAX  /* always */
+ #define MODEXACT_1_ODD_THRESHOLD          MP_SIZE_T_MAX  /* always */
+ #undef GET_STR_DC_THRESHOLD
+ #undef GET_STR_PRECOMPUTE_THRESHOLD
+ #undef SET_STR_THRESHOLD
+ #define GET_STR_DC_THRESHOLD             22
+ #define GET_STR_PRECOMPUTE_THRESHOLD     42
+ #define SET_STR_THRESHOLD              3259
+ #undef MUL_FFT_TABLE
+ #undef MUL_FFT_MODF_THRESHOLD
+ #undef MUL_FFT_THRESHOLD
+ #define MUL_FFT_TABLE  { 400, 928, 1856, 3840, 7168, 20480, 0 }
+ #define MUL_FFT_MODF_THRESHOLD          416
+ #define MUL_FFT_THRESHOLD                MP_SIZE_T_MAX
+ #undef  SQR_FFT_TABLE
+ #undef  SQR_FFT_MODF_THRESHOLD
+ #undef  SQR_FFT_THRESHOLD
+ #define SQR_FFT_TABLE  { 400, 992, 1984, 3840, 9216, 20480, 0 }
+ #define SQR_FFT_MODF_THRESHOLD          416
+ #define SQR_FFT_THRESHOLD                MP_SIZE_T_MAX
+ #endif
  /* Swap macros. */
  #define MP_LIMB_T_SWAP(x, y)                    \
-Line 180  MA 02111-1307, USA. */
+Line 530  MA 02111-1307, USA. */
 Line 180  MA 02111-1307, USA. */
 Line 530  MA 02111-1307, USA. */
    } while (0)
- #if defined (__cplusplus)
+ void *__gmp_default_allocate _PROTO ((size_t));
- extern "C" {
+ void *__gmp_default_reallocate _PROTO ((void *, size_t, size_t));
- #endif
+ void __gmp_default_free _PROTO ((void *, size_t));
- /* FIXME: These are purely internal, so do a search and replace to change
+ #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
-    them to __gmp forms, rather than using these macros. */
+   ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
- #define _mp_allocate_func      __gmp_allocate_func
+ #define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
- #define _mp_reallocate_func    __gmp_reallocate_func
- #define _mp_free_func          __gmp_free_func
- #define _mp_default_allocate   __gmp_default_allocate
- #define _mp_default_reallocate __gmp_default_reallocate
- #define _mp_default_free       __gmp_default_free
- extern void *   (*_mp_allocate_func) _PROTO ((size_t));
+ #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
- extern void *   (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t));
+   ((type *) (*__gmp_reallocate_func)                            \
- extern void     (*_mp_free_func) _PROTO ((void *, size_t));
+    (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
+ #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
+   __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
- void *_mp_default_allocate _PROTO ((size_t));
+ #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
- void *_mp_default_reallocate _PROTO ((void *, size_t, size_t));
+ #define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
- void _mp_default_free _PROTO ((void *, size_t));
- #define _MP_ALLOCATE_FUNC_TYPE(n,type) \
+ #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)      \
-   ((type *) (*_mp_allocate_func) ((n) * sizeof (type)))
+   do {                                                          \
- #define _MP_ALLOCATE_FUNC_LIMBS(n)   _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t)
+     if ((oldsize) != (newsize))                                 \
+       (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
+   } while (0)
- #define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type))
+ #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)   \
- #define _MP_FREE_FUNC_LIMBS(p,n)     _MP_FREE_FUNC_TYPE(p,n,mp_limb_t)
+   do {                                                                  \
+     if ((oldsize) != (newsize))                                         \
+       (ptr) = (type *) (*__gmp_reallocate_func)                         \
+         (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));    \
+   } while (0)
- #if (__STDC__-0) || defined (__cplusplus)
+ /* Dummy for non-gcc, code involving it will go dead. */
+ #ifndef __GNUC__
+ #define __builtin_constant_p(x)   0
+ #endif
- #else
- #define const                   /* Empty */
+ /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
- #define signed                  /* Empty */
+    stack usage is compatible.  __attribute__ ((regparm (N))) helps by
+    putting leading parameters in registers, avoiding extra stack.
+    regparm cannot be used with calls going through the PLT, because the
+    binding code there may clobber the registers (%eax, %edx, %ecx) used for
+    the regparm parameters.  Calls to local (ie. static) functions could
+    still use this, if we cared to differentiate locals and globals.  */
+ #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC)
+ #define USE_LEADING_REGPARM 1
+ #else
+ #define USE_LEADING_REGPARM 0
  #endif
- #if defined (__GNUC__) && defined (__i386__)
+ /* Macros for altering parameter order according to regparm usage. */
- #if 0                   /* check that these actually improve things */
+ #if USE_LEADING_REGPARM
+ #define REGPARM_2_1(a,b,x)    x,a,b
+ #define REGPARM_3_1(a,b,c,x)  x,a,b,c
+ #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
+ #else
+ #define REGPARM_2_1(a,b,x)    a,b,x
+ #define REGPARM_3_1(a,b,c,x)  a,b,c,x
+ #define REGPARM_ATTR(n)
+ #endif
+ /* ASM_L gives a local label for a gcc asm block, for use when temporary
+    local labels like "1:" might not be available, which is the case for
+    instance on the x86s (the SCO assembler doesn't support them).
+    The label generated is made unique by including "%=" which is a unique
+    number for each insn.  This ensures the same name can be used in multiple
+    asm blocks, perhaps via a macro.  Since jumps between asm blocks are not
+    allowed there's no need for a label to be usable outside a single
+    block.  */
+ #define ASM_L(name)  LSYM_PREFIX "asm_%=_" #name
+ #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
+ #if 0
+ /* FIXME: Check that these actually improve things.
+    FIXME: Need a cld after each std.
+    FIXME: Can't have inputs in clobbered registers, must describe them as
+    dummy outputs, and add volatile. */
  #define MPN_COPY_INCR(DST, SRC, N)                                      \
    __asm__ ("cld\n\trep\n\tmovsl" : :                                    \
             "D" (DST), "S" (SRC), "c" (N) :                              \
-Line 228  void _mp_default_free _PROTO ((void *, size_t));
+Line 621  void _mp_default_free _PROTO ((void *, size_t));
 Line 228  void _mp_default_free _PROTO ((void *, size_t));
 Line 621  void _mp_default_free _PROTO ((void *, size_t));
    __asm__ ("std\n\trep\n\tmovsl" : :                                    \
             "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :      \
             "cx", "di", "si", "memory")
- #define MPN_NORMALIZE_NOT_ZERO(P, N)                                    \
-   do {                                                                  \
-     __asm__ ("std\n\trepe\n\tscasl" : "=c" (N) :                        \
-              "a" (0), "D" ((P) + (N) - 1), "0" (N) :                    \
-              "cx", "di");                                               \
-     (N)++;                                                              \
-   } while (0)
  #endif
  #endif
- #if HAVE_NATIVE_mpn_copyi
- #define mpn_copyi __MPN(copyi)
- void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
- #endif
+ void __gmpz_aorsmul_1 _PROTO ((REGPARM_3_1 (mpz_ptr w, mpz_srcptr u, mp_limb_t v, mp_size_t sub))) REGPARM_ATTR(1);
+ #define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
+ #define mpz_n_pow_ui __gmpz_n_pow_ui
+ void    mpz_n_pow_ui _PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
+ #define mpn_add_nc __MPN(add_nc)
+ __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+ #define mpn_addmul_1c __MPN(addmul_1c)
+ __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+ #define mpn_addsub_n __MPN(addsub_n)
+ __GMP_DECLSPEC mp_limb_t mpn_addsub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+ #define mpn_addsub_nc __MPN(addsub_nc)
+ __GMP_DECLSPEC mp_limb_t mpn_addsub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+ #define mpn_divrem_1c __MPN(divrem_1c)
+ __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+ #define mpn_dump __MPN(dump)
+ __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
+ #define mpn_fib2_ui __MPN(fib2_ui)
+ mp_size_t mpn_fib2_ui _PROTO ((mp_ptr, mp_ptr, unsigned long));
  /* Remap names of internal mpn functions.  */
  #define __clz_tab               __MPN(clz_tab)
  #define mpn_udiv_w_sdiv         __MPN(udiv_w_sdiv)
- #define mpn_reciprocal          __MPN(reciprocal)
- #define mpn_sb_divrem_mn        __MPN(sb_divrem_mn)
+ #define mpn_gcd_finda   __MPN(gcd_finda)
- #define mpn_bz_divrem_n         __MPN(bz_divrem_n)
+ mp_limb_t mpn_gcd_finda _PROTO((const mp_limb_t cp[2])) __GMP_ATTRIBUTE_PURE;
- /* #define mpn_tdiv_q           __MPN(tdiv_q) */
+ #define mpn_jacobi_base __MPN(jacobi_base)
+ int mpn_jacobi_base _PROTO ((mp_limb_t a, mp_limb_t b, int result_bit1)) ATTRIBUTE_CONST;
+ #define mpn_mod_1c __MPN(mod_1c)
+ __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+ #define mpn_mul_1c __MPN(mul_1c)
+ __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+ #define mpn_mul_2 __MPN(mul_2)
+ mp_limb_t mpn_mul_2 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+ #define mpn_mul_basecase __MPN(mul_basecase)
+ __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+ #define mpn_sqr_n __MPN(sqr_n)
+ __GMP_DECLSPEC void mpn_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+ #define mpn_sqr_basecase __MPN(sqr_basecase)
+ __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+ #define mpn_sub_nc __MPN(sub_nc)
+ __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+ #define mpn_submul_1c __MPN(submul_1c)
+ __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+ typedef __gmp_randstate_struct *gmp_randstate_ptr;
+ #define _gmp_rand __gmp_rand
+ __GMP_DECLSPEC void _gmp_rand _PROTO ((mp_ptr, gmp_randstate_t, unsigned long int));
+ /* __gmp_rands is the global state for the old-style random functions, and
+    is also used in the test programs (hence the __GMP_DECLSPEC).
+    There's no seeding here, so mpz_random etc will generate the same
+    sequence every time.  This is not unlike the C library random functions
+    if you don't seed them, so perhaps it's acceptable.  Digging up a seed
+    from /dev/random or the like would work on many systems, but might
+    encourage a false confidence, since it'd be pretty much impossible to do
+    something that would work reliably everywhere.  In any case the new style
+    functions are recommended to applications which care about randomness, so
+    the old functions aren't too important.  */
+ __GMP_DECLSPEC extern char             __gmp_rands_initialized;
+ __GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
+ #define RANDS                                   \
+   ((__gmp_rands_initialized ? 0                 \
+     : (__gmp_rands_initialized = 1,             \
+        gmp_randinit_default (__gmp_rands), 0)), \
+    __gmp_rands)
+ /* this is used by the test programs, to free memory */
+ #define RANDS_CLEAR()                   \
+   do {                                  \
+     if (__gmp_rands_initialized)        \
+       {                                 \
+         __gmp_rands_initialized = 0;    \
+         gmp_randclear (__gmp_rands);    \
+       }                                 \
+   } while (0)
+ /* kara uses n+1 limbs of temporary space and then recurses with the
+    balance, so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ... */
+ #define MPN_KARA_MUL_N_TSIZE(n)   (2*((n)+BITS_PER_MP_LIMB))
+ #define MPN_KARA_SQR_N_TSIZE(n)   (2*((n)+BITS_PER_MP_LIMB))
+ /* toom3 uses 4*(ceil(n/3)) of temporary space and then recurses with the
+    balance either into itself or kara.  The following might be
+    overestimates. */
+ #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 3*BITS_PER_MP_LIMB)
+ #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 3*BITS_PER_MP_LIMB)
+ /* need 2 so that n2>=1 */
+ #define MPN_KARA_MUL_N_MINSIZE    2
+ #define MPN_KARA_SQR_N_MINSIZE    2
+ /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */
+ #define MPN_TOOM3_MUL_N_MINSIZE   11
+ #define MPN_TOOM3_SQR_N_MINSIZE   11
+ #define mpn_sqr_diagonal __MPN(sqr_diagonal)
+ void mpn_sqr_diagonal _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
  #define mpn_kara_mul_n  __MPN(kara_mul_n)
  void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
-Line 264  void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_sr
+Line 760  void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_sr
 Line 264  void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_sr
 Line 760  void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_sr
  #define mpn_toom3_sqr_n  __MPN(toom3_sqr_n)
  void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
- #define mpn_fft_best_k  __MPN(fft_best_k)
- int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr));
+ #define mpn_fft_best_k __MPN(fft_best_k)
+ int     mpn_fft_best_k _PROTO ((mp_size_t n, int sqr)) ATTRIBUTE_CONST;
  #define mpn_mul_fft  __MPN(mul_fft)
  void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
                            mp_srcptr n, mp_size_t nl,
-Line 278  void mpn_mul_fft_full _PROTO ((mp_ptr op,
+Line 775  void mpn_mul_fft_full _PROTO ((mp_ptr op,
 Line 278  void mpn_mul_fft_full _PROTO ((mp_ptr op,
 Line 775  void mpn_mul_fft_full _PROTO ((mp_ptr op,
                                 mp_srcptr n, mp_size_t nl,
                                 mp_srcptr m, mp_size_t ml));
- #define mpn_fft_next_size  __MPN(fft_next_size)
+ #define   mpn_fft_next_size __MPN(fft_next_size)
- mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k));
+ mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k)) ATTRIBUTE_CONST;
- mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+ #define mpn_sb_divrem_mn  __MPN(sb_divrem_mn)
- mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+ mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t,
+                                     mp_srcptr, mp_size_t));
+ #define mpn_dc_divrem_n  __MPN(dc_divrem_n)
+ mp_limb_t mpn_dc_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+ /* #define mpn_tdiv_q  __MPN(tdiv_q) */
  /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
- /* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed.  */
+ #define mpz_divexact_gcd  __gmpz_divexact_gcd
- #ifndef MPN_COPY_INCR
+ void mpz_divexact_gcd _PROTO ((mpz_ptr q, mpz_srcptr a, mpz_srcptr d));
- #if HAVE_NATIVE_mpn_copyi
- #define MPN_COPY_INCR(DST, SRC, NLIMBS)   mpn_copyi (DST, SRC, NLIMBS)
+ #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
- #else
+ #ifdef _GMP_H_HAVE_FILE
- #define MPN_COPY_INCR(DST, SRC, NLIMBS) \
+ size_t mpz_inp_str_nowhite _PROTO ((mpz_ptr x, FILE *stream, int base, int c, size_t nread));
+ #endif
+ #define mpn_divisible_p __MPN(divisible_p)
+ int     mpn_divisible_p _PROTO ((mp_srcptr ap, mp_size_t asize,
+                                  mp_srcptr dp, mp_size_t dsize)) __GMP_ATTRIBUTE_PURE;
+ #define mpn_rootrem __gmpn_rootrem
+ mp_size_t mpn_rootrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+ /* from gmp.h */
+ #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
+ #define MPN_COPY_INCR(dst, src, size)                   \
+   do {                                                  \
+     ASSERT ((size) >= 0);                               \
+     ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));       \
+     __GMPN_COPY_INCR (dst, src, size);                  \
+   } while (0)
+ #endif
+ #if defined (_CRAY)
+ #define MPN_COPY_INCR(dst, src, n)                                      \
    do {                                                                  \
-     mp_size_t __i;                                                      \
+     int __i;            /* Faster on some Crays with plain int */       \
-     for (__i = 0; __i < (NLIMBS); __i++)                                \
+     _Pragma ("_CRI ivdep");                                             \
-       (DST)[__i] = (SRC)[__i];                                          \
+     for (__i = 0; __i < (n); __i++)                                     \
+       (dst)[__i] = (src)[__i];                                          \
    } while (0)
  #endif
+ #define mpn_copyi __MPN(copyi)
+ void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+ #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
+ #define MPN_COPY_INCR(dst, src, size)                   \
+   do {                                                  \
+     ASSERT ((size) >= 0);                               \
+     ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));       \
+     mpn_copyi (dst, src, size);                         \
+   } while (0)
  #endif
- #if HAVE_NATIVE_mpn_copyd
+ /* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
- #define mpn_copyd __MPN(copyd)
+ #if ! defined (MPN_COPY_INCR)
- void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+ #define MPN_COPY_INCR(dst, src, n)                      \
+   do {                                                  \
+     ASSERT ((n) >= 0);                                  \
+     ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));          \
+     if ((n) != 0)                                       \
+       {                                                 \
+         mp_size_t __n = (n) - 1;                        \
+         mp_ptr __dst = (dst);                           \
+         mp_srcptr __src = (src);                        \
+         mp_limb_t __x;                                  \
+         __x = *__src++;                                 \
+         if (__n != 0)                                   \
+           {                                             \
+             do                                          \
+               {                                         \
+                 *__dst++ = __x;                         \
+                 __x = *__src++;                         \
+               }                                         \
+             while (--__n);                              \
+           }                                             \
+         *__dst++ = __x;                                 \
+       }                                                 \
+   } while (0)
  #endif
- /* NLIMBS==0 allowed */
- #ifndef MPN_COPY_DECR
+ /* As per __GMPN_COPY_INCR in gmp.h. */
- #if HAVE_NATIVE_mpn_copyd
+ #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
- #define MPN_COPY_DECR(DST, SRC, NLIMBS)   mpn_copyd (DST, SRC, NLIMBS)
+ #define MPN_COPY_DECR(dst, src, size)                   \
- #else
+   do {                                                  \
- #define MPN_COPY_DECR(DST, SRC, NLIMBS) \
+     ASSERT ((size) >= 0);                               \
+     ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));       \
+     if ((size) != 0)                                    \
+       {                                                 \
+         mp_ptr     __dst = (dst) + (size);              \
+         mp_srcptr  __src = (src) + (size);              \
+         mp_size_t  __size = (size);                     \
+         do                                              \
+           *--__dst = *--__src;                          \
+         while (--__size != 0);                          \
+       }                                                 \
+   } while (0)
+ #endif
+ #if defined (_CRAY)
+ #define MPN_COPY_DECR(dst, src, n)                                      \
    do {                                                                  \
-     mp_size_t __i;                                                      \
+     int __i;            /* Faster on some Crays with plain int */       \
-     for (__i = (NLIMBS) - 1; __i >= 0; __i--)                           \
+     _Pragma ("_CRI ivdep");                                             \
-       (DST)[__i] = (SRC)[__i];                                          \
+     for (__i = (n) - 1; __i >= 0; __i--)                                \
+       (dst)[__i] = (src)[__i];                                          \
    } while (0)
  #endif
+ #define mpn_copyd __MPN(copyd)
+ void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+ #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
+ #define MPN_COPY_DECR(dst, src, size)                   \
+   do {                                                  \
+     ASSERT ((size) >= 0);                               \
+     ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));       \
+     mpn_copyd (dst, src, size);                         \
+   } while (0)
  #endif
- /* Define MPN_COPY for vector computers.  Since #pragma cannot be in a macro,
+ /* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
-    rely on function inlining. */
+ #if ! defined (MPN_COPY_DECR)
- #if defined (_CRAY) || defined (__uxp__)
+ #define MPN_COPY_DECR(dst, src, n)                      \
- static inline void
+   do {                                                  \
- _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
+     ASSERT ((n) >= 0);                                  \
- {
+     ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));          \
-   int i;                                /* Faster for Cray with plain int */
+     if ((n) != 0)                                       \
- #pragma _CRI ivdep                      /* Cray PVP systems */
+       {                                                 \
- #pragma loop noalias d,s                /* Fujitsu VPP systems */
+         mp_size_t __n = (n) - 1;                        \
-   for (i = 0; i < n; i++)
+         mp_ptr __dst = (dst) + __n;                     \
-     d[i] = s[i];
+         mp_srcptr __src = (src) + __n;                  \
- }
+         mp_limb_t __x;                                  \
- #define MPN_COPY _MPN_COPY
+         __x = *__src--;                                 \
+         if (__n != 0)                                   \
+           {                                             \
+             do                                          \
+               {                                         \
+                 *__dst-- = __x;                         \
+                 __x = *__src--;                         \
+               }                                         \
+             while (--__n);                              \
+           }                                             \
+         *__dst-- = __x;                                 \
+       }                                                 \
+   } while (0)
  #endif
  #ifndef MPN_COPY
- #define MPN_COPY MPN_COPY_INCR
+ #define MPN_COPY(d,s,n)                         \
+   do {                                          \
+     ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));  \
+     MPN_COPY_INCR (d, s, n);                    \
+   } while (0)
  #endif
- /* Zero NLIMBS *limbs* AT DST.  */
+ /* Set {dst,size} to the limbs of {src,size} in reverse order. */
+ #define MPN_REVERSE(dst, src, size)                     \
+   do {                                                  \
+     mp_ptr     __dst = (dst);                           \
+     mp_size_t  __size = (size);                         \
+     mp_srcptr  __src = (src) + __size - 1;              \
+     mp_size_t  __i;                                     \
+     ASSERT ((size) >= 0);                               \
+     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
+     CRAY_Pragma ("_CRI ivdep");                         \
+     for (__i = 0; __i < __size; __i++)                  \
+       {                                                 \
+         *__dst = *__src;                                \
+         __dst++;                                        \
+         __src--;                                        \
+       }                                                 \
+   } while (0)
+ /* Zero n limbs at dst.
+    For power and powerpc we want an inline stu/bdnz loop for zeroing.  On
+    ppc630 for instance this is optimal since it can sustain only 1 store per
+    cycle.
+    gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
+    "for" loop in the generic code below can become stu/bdnz.  The do/while
+    here helps it get to that.  The same caveat about plain -mpowerpc64 mode
+    applies here as to __GMPN_COPY_INCR in gmp.h.
+    xlc 3.1 already generates stu/bdnz from the generic C, and does so from
+    this loop too.
+    Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
+    at a time.  MPN_ZERO isn't all that important in GMP, so it might be more
+    trouble than it's worth to do the same, though perhaps a call to memset
+    would be good when on a GNU system.  */
+ #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
+ #define MPN_ZERO(dst, n)                        \
+   do {                                          \
+     ASSERT ((n) >= 0);                          \
+     if ((n) != 0)                               \
+       {                                         \
+         mp_ptr __dst = (dst) - 1;               \
+         mp_size_t __n = (n);                    \
+         do                                      \
+           *++__dst = 0;                         \
+         while (--__n);                          \
+       }                                         \
+   } while (0)
+ #endif
  #ifndef MPN_ZERO
- #define MPN_ZERO(DST, NLIMBS) \
+ #define MPN_ZERO(dst, n)                        \
-   do {                                                                  \
+   do {                                          \
-     mp_size_t __i;                                                      \
+     ASSERT ((n) >= 0);                          \
-     for (__i = 0; __i < (NLIMBS); __i++)                                \
+     if ((n) != 0)                               \
-       (DST)[__i] = 0;                                                   \
+       {                                         \
+         mp_ptr __dst = (dst);                   \
+         mp_size_t __n = (n);                    \
+         do                                      \
+           *__dst++ = 0;                         \
+         while (--__n);                          \
+       }                                         \
    } while (0)
  #endif
+ /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
+    start up and would need to strip a lot of zeros before it'd be faster
+    than a simple cmpl loop.  Here are some times in cycles for
+    std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
+    low zeros).
+                 std   cld
+            P5    18    16
+            P6    46    38
+            K6    36    13
+            K7    21    20
+ */
  #ifndef MPN_NORMALIZE
  #define MPN_NORMALIZE(DST, NLIMBS) \
    do {                                                                  \
-Line 359  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
+Line 1034  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
 Line 359  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
 Line 1034  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
    } while (0)
  #endif
  #ifndef MPN_NORMALIZE_NOT_ZERO
- #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
+ #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)     \
-   do {                                                                  \
+   do {                                          \
-     while (1)                                                           \
+     ASSERT ((NLIMBS) >= 1);                     \
-       {                                                                 \
+     while (1)                                   \
-         if ((DST)[(NLIMBS) - 1] != 0)                                   \
+       {                                         \
-           break;                                                        \
+         if ((DST)[(NLIMBS) - 1] != 0)           \
-         NLIMBS--;                                                       \
+           break;                                \
-       }                                                                 \
+         NLIMBS--;                               \
+       }                                         \
    } while (0)
  #endif
- /* Strip least significant zero limbs from ptr,size by incrementing ptr and
+ /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
-    decrementing size.  The number in ptr,size must be non-zero, ie. size!=0
+    and decrementing size.  low should be ptr[0], and will be the new ptr[0]
-    and somewhere a non-zero limb.  */
+    on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
- #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \
+    somewhere a non-zero limb.  */
-   do                                            \
+ #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)    \
-     {                                           \
+   do {                                                  \
-       ASSERT ((size) != 0);                     \
+     ASSERT ((size) >= 1);                               \
-       while ((ptr)[0] == 0)                     \
+     ASSERT ((low) == (ptr)[0]);                         \
-         {                                       \
+                                                         \
-           (ptr)++;                              \
+     while ((low) == 0)                                  \
-           (size)--;                             \
+       {                                                 \
-           ASSERT (size >= 0);                   \
+         (size)--;                                       \
-         }                                       \
+         ASSERT ((size) >= 1);                           \
-     }                                           \
+         (ptr)++;                                        \
-   while (0)
+         (low) = *(ptr);                                 \
+       }                                                 \
+   } while (0)
  /* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
     temporary variable; it will be automatically cleared out at function
     return.  We use __x here to make it possible to accept both mpz_ptr and
     mpz_t arguments.  */
- #define MPZ_TMP_INIT(X, NLIMBS) \
+ #define MPZ_TMP_INIT(X, NLIMBS)                                         \
    do {                                                                  \
      mpz_ptr __x = (X);                                                  \
-     __x->_mp_alloc = (NLIMBS);                                          \
+     ASSERT ((NLIMBS) >= 1);                                             \
-     __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB);     \
+     __x->_mp_alloc = (NLIMBS);                                          \
+     __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB);     \
    } while (0)
- /* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs.  */
+ /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
- #define MPZ_REALLOC(what,needed) \
+ #define MPZ_REALLOC(z,n) ((n) > ALLOC(z) ? _mpz_realloc(z,n) : PTR(z))
-   do {                                                          \
-     if ((needed) > ALLOC (what))                                \
-       _mpz_realloc (what, needed);                              \
-   } while (0)
- /* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
+ #define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
+ /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
+    f1p.
+    From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
+    nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
+    number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
+    The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
+    without good floating point.  There's +2 for rounding up, and a further
+    +2 since at the last step x limbs are doubled into a 2x+1 limb region
+    whereas the actual F[2k] value might be only 2x-1 limbs.
+    Note that a division is done first, since on a 32-bit system it's at
+    least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
+    about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
+    whatever a multiply of two 190Mbyte numbers takes.)
+    Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
+    worked into the multiplier.  */
+ #define MPN_FIB2_SIZE(n) \
+   ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
+ /* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
+    -1 <= n <= FIB_TABLE_LIMIT.
+    FIB_TABLE_LUCNUM_LIMIT is the largest n for which L[n] = F[n] + 2*F[n-1]
+    fits in a limb.
+    This data generated by code at the end of mpn/generic/fib2_ui.c.  */
+ extern const mp_limb_t __gmp_fib_table[];
+ #define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
+ #if GMP_NUMB_BITS >= 64
+ #define FIB_TABLE_LIMIT         93
+ #define FIB_TABLE_LUCNUM_LIMIT  92
+ #else
+ #if GMP_NUMB_BITS >= 32
+ #define FIB_TABLE_LIMIT         47
+ #define FIB_TABLE_LUCNUM_LIMIT  46
+ #else
+ #if GMP_NUMB_BITS >= 16
+ #define FIB_TABLE_LIMIT         24
+ #define FIB_TABLE_LUCNUM_LIMIT  23
+ #else
+ #if GMP_NUMB_BITS >= 8
+ #define FIB_TABLE_LIMIT         13
+ #define FIB_TABLE_LUCNUM_LIMIT  11
+ #else
+ #if GMP_NUMB_BITS >= 4
+ #define FIB_TABLE_LIMIT         7
+ #define FIB_TABLE_LUCNUM_LIMIT  5
+ #endif /* 4 */
+ #endif /* 8 */
+ #endif /* 16 */
+ #endif /* 32 */
+ #endif /* 64 */
+ /* For a threshold between algorithms A and B, size>=thresh is where B
+    should be used.  Special value MP_SIZE_T_MAX means only ever use A, or
+    value 0 means only ever use B.  The tests for these special values will
+    be compile-time constants, so the compiler should be able to eliminate
+    the code for the unwanted algorithm.  */
+ #define ABOVE_THRESHOLD(size,thresh)    \
+   ((thresh) == 0                        \
+    || ((thresh) != MP_SIZE_T_MAX        \
+        && (size) >= (thresh)))
+ #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
+ /* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a
     value which is good on most machines.  */
- #ifndef KARATSUBA_MUL_THRESHOLD
+ #ifndef MUL_KARATSUBA_THRESHOLD
- #define KARATSUBA_MUL_THRESHOLD 32
+ #define MUL_KARATSUBA_THRESHOLD 32
  #endif
- /* If TOOM3_MUL_THRESHOLD is not already defined, define it to a
+ /* If MUL_TOOM3_THRESHOLD is not already defined, define it to a
     value which is good on most machines.  */
- #ifndef TOOM3_MUL_THRESHOLD
+ #ifndef MUL_TOOM3_THRESHOLD
- #define TOOM3_MUL_THRESHOLD 256
+ #define MUL_TOOM3_THRESHOLD 256
  #endif
- #ifndef KARATSUBA_SQR_THRESHOLD
+ /* This is the threshold at which mpn_sqr_basecase should take over from
- #define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
+    mpn_mul_basecase in mpn_sqr_n.  Default is to use mpn_sqr_basecase
+    always.
+    If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase
+    before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the
+    karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0.  This oddity arises
+    more or less because SQR_KARATSUBA_THRESHOLD represents the size up to
+    which mpn_sqr_basecase should be used, and that may be never.  */
+ #ifndef SQR_BASECASE_THRESHOLD
+ #define SQR_BASECASE_THRESHOLD 0
  #endif
- #ifndef TOOM3_SQR_THRESHOLD
+ #ifndef SQR_KARATSUBA_THRESHOLD
- #define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD)
+ #define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD)
  #endif
+ #ifndef SQR_TOOM3_THRESHOLD
+ #define SQR_TOOM3_THRESHOLD (2*MUL_TOOM3_THRESHOLD)
+ #endif
  /* First k to use for an FFT modF multiply.  A modF FFT is an order
     log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
     whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
  #define FFT_FIRST_K  4
  /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
- #ifndef FFT_MODF_MUL_THRESHOLD
+ #ifndef MUL_FFT_MODF_THRESHOLD
- #define FFT_MODF_MUL_THRESHOLD   (TOOM3_MUL_THRESHOLD * 3)
+ #define MUL_FFT_MODF_THRESHOLD   (MUL_TOOM3_THRESHOLD * 3)
  #endif
- #ifndef FFT_MODF_SQR_THRESHOLD
+ #ifndef SQR_FFT_MODF_THRESHOLD
- #define FFT_MODF_SQR_THRESHOLD   (TOOM3_SQR_THRESHOLD * 3)
+ #define SQR_FFT_MODF_THRESHOLD   (SQR_TOOM3_THRESHOLD * 3)
  #endif
  /* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
-Line 442  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
+Line 1208  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
 Line 442  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
 Line 1208  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
     NxN->2N multiply and not recursing into itself is an order
     log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
     is the first better than toom3.  */
- #ifndef FFT_MUL_THRESHOLD
+ #ifndef MUL_FFT_THRESHOLD
- #define FFT_MUL_THRESHOLD   (FFT_MODF_MUL_THRESHOLD * 10)
+ #define MUL_FFT_THRESHOLD   (MUL_FFT_MODF_THRESHOLD * 10)
  #endif
- #ifndef FFT_SQR_THRESHOLD
+ #ifndef SQR_FFT_THRESHOLD
- #define FFT_SQR_THRESHOLD   (FFT_MODF_SQR_THRESHOLD * 10)
+ #define SQR_FFT_THRESHOLD   (SQR_FFT_MODF_THRESHOLD * 10)
  #endif
  /* Table of thresholds for successive modF FFT "k"s.  The first entry is
     where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
     etc.  See mpn_fft_best_k(). */
- #ifndef FFT_MUL_TABLE
+ #ifndef MUL_FFT_TABLE
- #define FFT_MUL_TABLE                           \
+ #define MUL_FFT_TABLE                           \
-   { TOOM3_MUL_THRESHOLD * 4,   /* k=5 */        \
+   { MUL_TOOM3_THRESHOLD * 4,   /* k=5 */        \
-     TOOM3_MUL_THRESHOLD * 8,   /* k=6 */        \
+     MUL_TOOM3_THRESHOLD * 8,   /* k=6 */        \
-     TOOM3_MUL_THRESHOLD * 16,  /* k=7 */        \
+     MUL_TOOM3_THRESHOLD * 16,  /* k=7 */        \
-     TOOM3_MUL_THRESHOLD * 32,  /* k=8 */        \
+     MUL_TOOM3_THRESHOLD * 32,  /* k=8 */        \
-     TOOM3_MUL_THRESHOLD * 96,  /* k=9 */        \
+     MUL_TOOM3_THRESHOLD * 96,  /* k=9 */        \
-     TOOM3_MUL_THRESHOLD * 288, /* k=10 */       \
+     MUL_TOOM3_THRESHOLD * 288, /* k=10 */       \
 }
  #endif
- #ifndef FFT_SQR_TABLE
+ #ifndef SQR_FFT_TABLE
- #define FFT_SQR_TABLE                           \
+ #define SQR_FFT_TABLE                           \
-   { TOOM3_SQR_THRESHOLD * 4,   /* k=5 */        \
+   { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */        \
-     TOOM3_SQR_THRESHOLD * 8,   /* k=6 */        \
+     SQR_TOOM3_THRESHOLD * 8,   /* k=6 */        \
-     TOOM3_SQR_THRESHOLD * 16,  /* k=7 */        \
+     SQR_TOOM3_THRESHOLD * 16,  /* k=7 */        \
-     TOOM3_SQR_THRESHOLD * 32,  /* k=8 */        \
+     SQR_TOOM3_THRESHOLD * 32,  /* k=8 */        \
-     TOOM3_SQR_THRESHOLD * 96,  /* k=9 */        \
+     SQR_TOOM3_THRESHOLD * 96,  /* k=9 */        \
-     TOOM3_SQR_THRESHOLD * 288, /* k=10 */       \
+     SQR_TOOM3_THRESHOLD * 288, /* k=10 */       \
 }
  #endif
-Line 480  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
+Line 1246  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
 Line 480  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
 Line 1246  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
  #define MPN_FFT_TABLE_SIZE  16
+ /* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
+    div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
+    i.e. n/2 >= MUL_KARATSUBA_THRESHOLD
+    Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go
+    for 3 as an average.  */
+ #ifndef DIV_DC_THRESHOLD
+ #define DIV_DC_THRESHOLD    (3 * MUL_KARATSUBA_THRESHOLD)
+ #endif
  /* Return non-zero if xp,xsize and yp,ysize overlap.
     If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
     overlap.  If both these are false, there's an overlap. */
  #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
    ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
+ #define MEM_OVERLAP_P(xp, xsize, yp, ysize)     \
+   (   (char *) (xp) + (xsize) > (char *) (yp)   \
+    && (char *) (yp) + (ysize) > (char *) (xp))
+ /* Return non-zero if xp,xsize and yp,ysize are either identical or not
+    overlapping.  Return zero if they're partially overlapping. */
+ #define MPN_SAME_OR_SEPARATE_P(xp, yp, size)    \
+   MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
+ #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)           \
+   ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
+ /* Return non-zero if dst,dsize and src,ssize are either identical or
+    overlapping in a way suitable for an incrementing/decrementing algorithm.
+    Return zero if they're partially overlapping in an unsuitable fashion. */
+ #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)             \
+   ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
+ #define MPN_SAME_OR_INCR_P(dst, src, size)      \
+   MPN_SAME_OR_INCR2_P(dst, size, src, size)
+ #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)             \
+   ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
+ #define MPN_SAME_OR_DECR_P(dst, src, size)      \
+   MPN_SAME_OR_DECR2_P(dst, size, src, size)
  /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
     ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
     does it always.  Generally assertions are meant for development, but
     might help when looking for a problem later too.
-    ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if
+    Note that strings shouldn't be used within the ASSERT expression,
-    assertion checking is disabled, the expression is still evaluated.  This
+    eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
-    is meant for use with routines like mpn_add_n() where the return value
+    used in the !HAVE_STRINGIZE case (ie. K&R).  */
-    represents a carry or whatever that shouldn't occur.  For example,
-    ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
  #ifdef __LINE__
  #define ASSERT_LINE  __LINE__
-Line 510  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
+Line 1308  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
 Line 510  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
 Line 1308  _MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n
  #define ASSERT_FILE  ""
  #endif
- int __gmp_assert_fail _PROTO((const char *filename, int linenum,
+ void __gmp_assert_header _PROTO ((const char *filename, int linenum));
-                               const char *expr));
+ __GMP_DECLSPEC void __gmp_assert_fail _PROTO ((const char *filename, int linenum, const char *expr)) ATTRIBUTE_NORETURN;
  #if HAVE_STRINGIZE
  #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
-Line 519  int __gmp_assert_fail _PROTO((const char *filename, in
+Line 1317  int __gmp_assert_fail _PROTO((const char *filename, in
 Line 519  int __gmp_assert_fail _PROTO((const char *filename, in
 Line 1317  int __gmp_assert_fail _PROTO((const char *filename, in
  #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
  #endif
- #if HAVE_VOID
+ #define ASSERT_ALWAYS(expr)     \
- #define CAST_TO_VOID        (void)
+   do {                          \
+     if (!(expr))                \
+       ASSERT_FAIL (expr);       \
+   } while (0)
+ #if WANT_ASSERT
+ #define ASSERT(expr)   ASSERT_ALWAYS (expr)
  #else
- #define CAST_TO_VOID
+ #define ASSERT(expr)   do {} while (0)
  #endif
- #define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr))
+ /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
+    that it's zero.  In both cases if assertion checking is disabled the
+    expression is still evaluated.  These macros are meant for use with
+    routines like mpn_add_n() where the return value represents a carry or
+    whatever that should or shouldn't occur in some context.  For example,
+    ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
  #if WANT_ASSERT
- #define ASSERT(expr)           ASSERT_ALWAYS (expr)
+ #define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)
  #define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
  #else
- #define ASSERT(expr)           (CAST_TO_VOID 0)
+ #define ASSERT_CARRY(expr)     (expr)
  #define ASSERT_NOCARRY(expr)   (expr)
  #endif
+ /* ASSERT_CODE includes code when assertion checking is wanted.  This is the
+    same as writing "#if WANT_ASSERT", but more compact.  */
+ #if WANT_ASSERT
+ #define ASSERT_CODE(expr)  expr
+ #else
+ #define ASSERT_CODE(expr)
+ #endif
+ /* Test that an mpq_t is in fully canonical form.  This can be used as
+    protection on routines like mpq_equal which give wrong results on
+    non-canonical inputs.  */
+ #if WANT_ASSERT
+ #define ASSERT_MPQ_CANONICAL(q)                         \
+   do {                                                  \
+     ASSERT (q->_mp_den._mp_size > 0);                   \
+     if (q->_mp_num._mp_size == 0)                       \
+       {                                                 \
+         /* zero should be 0/1 */                        \
+         ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);   \
+       }                                                 \
+     else                                                \
+       {                                                 \
+         /* no common factors */                         \
+         mpz_t  __g;                                     \
+         mpz_init (__g);                                 \
+         mpz_gcd (__g, mpq_numref(q), mpq_denref(q));    \
+         ASSERT (mpz_cmp_ui (__g, 1) == 0);              \
+         mpz_clear (__g);                                \
+       }                                                 \
+   } while (0)
+ #else
+ #define ASSERT_MPQ_CANONICAL(q)  do {} while (0)
+ #endif
+ /* Check that the nail parts are zero. */
+ #define ASSERT_ALWAYS_LIMB(limb)                \
+   do {                                          \
+     mp_limb_t  __nail = (limb) & GMP_NAIL_MASK; \
+     ASSERT_ALWAYS (__nail == 0);                \
+   } while (0)
+ #define ASSERT_ALWAYS_MPN(ptr, size)            \
+   do {                                          \
+     /* let whole loop go dead when no nails */  \
+     if (GMP_NAIL_BITS != 0)                     \
+       {                                         \
+         mp_size_t  __i;                         \
+         for (__i = 0; __i < (size); __i++)      \
+           ASSERT_ALWAYS_LIMB ((ptr)[__i]);      \
+       }                                         \
+   } while (0)
+ #if WANT_ASSERT
+ #define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
+ #define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)
+ #else
+ #define ASSERT_LIMB(limb)       do {} while (0)
+ #define ASSERT_MPN(ptr, size)   do {} while (0)
+ #endif
+ /* Assert that an mpn region {ptr,size} is zero, or non-zero.
+    size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
+ #if WANT_ASSERT
+ #define ASSERT_MPN_ZERO_P(ptr,size)     \
+   do {                                  \
+     mp_size_t  __i;                     \
+     ASSERT ((size) >= 0);               \
+     for (__i = 0; __i < (size); __i++)  \
+       ASSERT ((ptr)[__i] == 0);         \
+   } while (0)
+ #define ASSERT_MPN_NONZERO_P(ptr,size)  \
+   do {                                  \
+     mp_size_t  __i;                     \
+     int        __nonzero = 0;           \
+     ASSERT ((size) >= 0);               \
+     for (__i = 0; __i < (size); __i++)  \
+       if ((ptr)[__i] != 0)              \
+         {                               \
+           __nonzero = 1;                \
+           break;                        \
+         }                               \
+     ASSERT (__nonzero);                 \
+   } while (0)
+ #else
+ #define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
+ #define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)
+ #endif
  #if HAVE_NATIVE_mpn_com_n
  #define mpn_com_n __MPN(com_n)
  void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
  #else
- #define mpn_com_n(d,s,n)        \
+ #define mpn_com_n(d,s,n)                                \
-   do                            \
+   do {                                                  \
-     {                           \
+     mp_ptr     __d = (d);                               \
-       mp_ptr     __d = (d);     \
+     mp_srcptr  __s = (s);                               \
-       mp_srcptr  __s = (s);     \
+     mp_size_t  __n = (n);                               \
-       mp_size_t  __n = (n);     \
+     ASSERT (__n >= 1);                                  \
-       do                        \
+     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));    \
-         *__d++ = ~ *__s++;      \
+     do                                                  \
-       while (--__n);            \
+       *__d++ = (~ *__s++) & GMP_NUMB_MASK;              \
-     }                           \
+     while (--__n);                                      \
-   while (0)
+   } while (0)
  #endif
- #define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op)      \
+ #define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation)    \
-   do                                                    \
+   do {                                                  \
-     {                                                   \
+     mp_ptr       __d = (d);                             \
-       mp_ptr     __d = (d);                             \
+     mp_srcptr    __s1 = (s1);                           \
-       mp_srcptr  __s1 = (s1);                           \
+     mp_srcptr    __s2 = (s2);                           \
-       mp_srcptr  __s2 = (s2);                           \
+     mp_size_t    __n = (n);                             \
-       mp_size_t  __n = (n);                             \
+     ASSERT (__n >= 1);                                  \
-       do                                                \
+     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n));   \
-         *__d++ = dop (*__s1++ op s2op *__s2++);         \
+     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n));   \
-       while (--__n);                                    \
+     do                                                  \
-     }                                                   \
+       operation;                                        \
-   while (0)
+     while (--__n);                                      \
+   } while (0)
  #if HAVE_NATIVE_mpn_and_n
  #define mpn_and_n __MPN(and_n)
  void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
  #else
- #define mpn_and_n(d,s1,s2,n)  MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, )
+ #define mpn_and_n(d, s1, s2, n) \
+   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++)
  #endif
  #if HAVE_NATIVE_mpn_andn_n
  #define mpn_andn_n __MPN(andn_n)
  void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
  #else
- #define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~)
+ #define mpn_andn_n(d, s1, s2, n) \
+   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++)
  #endif
  #if HAVE_NATIVE_mpn_nand_n
  #define mpn_nand_n __MPN(nand_n)
  void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
  #else
- #define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, )
+ #define mpn_nand_n(d, s1, s2, n) \
+   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ & *__s2++))
  #endif
  #if HAVE_NATIVE_mpn_ior_n
  #define mpn_ior_n __MPN(ior_n)
  void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
  #else
- #define mpn_ior_n(d,s1,s2,n)  MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, )
+ #define mpn_ior_n(d, s1, s2, n) \
+   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++)
  #endif
  #if HAVE_NATIVE_mpn_iorn_n
  #define mpn_iorn_n __MPN(iorn_n)
  void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
  #else
- #define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~)
+ #define mpn_iorn_n(d, s1, s2, n) \
+   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | ~*__s2++)
  #endif
  #if HAVE_NATIVE_mpn_nior_n
  #define mpn_nior_n __MPN(nior_n)
  void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
  #else
- #define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, )
+ #define mpn_nior_n(d, s1, s2, n) \
+   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ | *__s2++))
  #endif
  #if HAVE_NATIVE_mpn_xor_n
  #define mpn_xor_n __MPN(xor_n)
  void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
  #else
- #define mpn_xor_n(d,s1,s2,n)  MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, )
+ #define mpn_xor_n(d, s1, s2, n) \
+   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++)
  #endif
  #if HAVE_NATIVE_mpn_xnor_n
  #define mpn_xnor_n __MPN(xnor_n)
  void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
  #else
- #define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, )
+ #define mpn_xnor_n(d, s1, s2, n) \
+   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~ (*__s1++ ^ *__s2++))
  #endif
+ /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
+    subtract.  */
+ #if GMP_NAIL_BITS == 0
+ #define SUBC_LIMB(cout, w, x, y)        \
+   do {                                  \
+     mp_limb_t  __x = (x);               \
+     mp_limb_t  __y = (y);               \
+     mp_limb_t  __w = __x - __y;         \
+     (w) = __w;                          \
+     (cout) = __w > __x;                 \
+   } while (0)
+ #else
+ #define SUBC_LIMB(cout, w, x, y)        \
+   do {                                  \
+     mp_limb_t  __w = (x) - (y);         \
+     (w) = __w & GMP_NUMB_MASK;          \
+     (cout) = __w >> (GMP_LIMB_BITS-1);  \
+   } while (0)
+ #endif
+ /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
+    expecting no carry (or borrow) from that.
+    The size parameter is only for the benefit of assertion checking.  In a
+    normal build it's unused and the carry/borrow is just propagated as far
+    as it needs to go.
+    On random data, usually only one or two limbs of {ptr,size} get updated,
+    so there's no need for any sophisticated looping, just something compact
+    and sensible.
+    FIXME: Do the generic MPN_{INCR,DECR}_U with a block of code like
+    mpn_incr_u but with the assertions built in, rather than the separate
+    add_1 and sub_1 when assertion checking.
+    FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
+    declaring their operand sizes, then remove the former.  This is purely
+    for the benefit of assertion checking.  */
+ #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0      \
+   && BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
+ /* Better flags handling than the generic C gives on i386, saving a few
+    bytes of code and maybe a cycle or two.  */
+ #define MPN_IORD_U(ptr, incr, aors)                     \
+   do {                                                  \
+     mp_ptr  __ptr_dummy;                                \
+     if (__builtin_constant_p (incr) && (incr) == 1)     \
+       {                                                 \
+         __asm__ __volatile__                            \
+           ("\n" ASM_L(top) ":\n"                        \
+            "\t" aors " $1, (%0)\n"                      \
+            "\tleal 4(%0),%0\n"                          \
+            "\tjc " ASM_L(top)                           \
+            : "=r" (__ptr_dummy)                         \
+            : "0"  (ptr)                                 \
+            : "memory");                                 \
+       }                                                 \
+     else                                                \
+       {                                                 \
+         __asm__ __volatile__                            \
+           (   aors  " %2,(%0)\n"                        \
+            "\tjnc " ASM_L(done) "\n"                    \
+            ASM_L(top) ":\n"                             \
+            "\t" aors " $1,4(%0)\n"                      \
+            "\tleal 4(%0),%0\n"                          \
+            "\tjc " ASM_L(top) "\n"                      \
+            ASM_L(done) ":\n"                            \
+            : "=r" (__ptr_dummy)                         \
+            : "0"  (ptr),                                \
+              "ri" (incr)                                \
+            : "memory");                                 \
+       }                                                 \
+   } while (0)
+ #define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
+ #define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
+ #define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
+ #define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
+ #endif
+ #if GMP_NAIL_BITS == 0
+ #ifndef mpn_incr_u
+ #define mpn_incr_u(p,incr)                              \
+   do {                                                  \
+     mp_limb_t __x;                                      \
+     mp_ptr __p = (p);                                   \
+     if (__builtin_constant_p (incr) && (incr) == 1)     \
+       {                                                 \
+         while (++(*(__p++)) == 0)                       \
+           ;                                             \
+       }                                                 \
+     else                                                \
+       {                                                 \
+         __x = *__p + (incr);                            \
+         *__p = __x;                                     \
+         if (__x < (incr))                               \
+           while (++(*(++__p)) == 0)                     \
+             ;                                           \
+       }                                                 \
+   } while (0)
+ #endif
+ #ifndef mpn_decr_u
+ #define mpn_decr_u(p,incr)                              \
+   do {                                                  \
+     mp_limb_t __x;                                      \
+     mp_ptr __p = (p);                                   \
+     if (__builtin_constant_p (incr) && (incr) == 1)     \
+       {                                                 \
+         while ((*(__p++))-- == 0)                       \
+           ;                                             \
+       }                                                 \
+     else                                                \
+       {                                                 \
+         __x = *__p;                                     \
+         *__p = __x - (incr);                            \
+         if (__x < (incr))                               \
+           while ((*(++__p))-- == 0)                     \
+             ;                                           \
+       }                                                 \
+   } while (0)
+ #endif
+ #endif
+ #if GMP_NAIL_BITS >= 1
+ #ifndef mpn_incr_u
+ #define mpn_incr_u(p,incr)                              \
+   do {                                                  \
+     mp_limb_t __x;                                      \
+     mp_ptr __p = (p);                                   \
+     if (__builtin_constant_p (incr) && (incr) == 1)     \
+       {                                                 \
+         do                                              \
+           {                                             \
+             __x = (*__p + 1) & GMP_NUMB_MASK;           \
+             *__p++ = __x;                               \
+           }                                             \
+         while (__x == 0);                               \
+       }                                                 \
+     else                                                \
+       {                                                 \
+         __x = (*__p + (incr));                          \
+         *__p++ = __x & GMP_NUMB_MASK;                   \
+         if (__x >> GMP_NUMB_BITS != 0)                  \
+           {                                             \
+             do                                          \
+               {                                         \
+                 __x = (*__p + 1) & GMP_NUMB_MASK;       \
+                 *__p++ = __x;                           \
+               }                                         \
+             while (__x == 0);                           \
+           }                                             \
+       }                                                 \
+   } while (0)
+ #endif
+ #ifndef mpn_decr_u
+ #define mpn_decr_u(p,incr)                              \
+   do {                                                  \
+     mp_limb_t __x;                                      \
+     mp_ptr __p = (p);                                   \
+     if (__builtin_constant_p (incr) && (incr) == 1)     \
+       {                                                 \
+         do                                              \
+           {                                             \
+             __x = *__p;                                 \
+             *__p++ = (__x - 1) & GMP_NUMB_MASK;         \
+           }                                             \
+         while (__x == 0);                               \
+       }                                                 \
+     else                                                \
+       {                                                 \
+         __x = *__p - (incr);                            \
+         *__p++ = __x & GMP_NUMB_MASK;                   \
+         if (__x >> GMP_NUMB_BITS != 0)                  \
+           {                                             \
+             do                                          \
+               {                                         \
+                 __x = *__p;                             \
+                 *__p++ = (__x - 1) & GMP_NUMB_MASK;     \
+               }                                         \
+             while (__x == 0);                           \
+           }                                             \
+       }                                                 \
+   } while (0)
+ #endif
+ #endif
+ #ifndef MPN_INCR_U
+ #if WANT_ASSERT
+ #define MPN_INCR_U(ptr, size, n)                        \
+   do {                                                  \
+     ASSERT ((size) >= 1);                               \
+     ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));     \
+   } while (0)
+ #else
+ #define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
+ #endif
+ #endif
+ #ifndef MPN_DECR_U
+ #if WANT_ASSERT
+ #define MPN_DECR_U(ptr, size, n)                        \
+   do {                                                  \
+     ASSERT ((size) >= 1);                               \
+     ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));     \
+   } while (0)
+ #else
+ #define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
+ #endif
+ #endif
  /* Structure for conversion between internal binary format and
     strings in base 2..36.  */
  struct bases
-Line 646  struct bases
+Line 1766  struct bases
 Line 646  struct bases
 Line 1766  struct bases
    mp_limb_t big_base_inverted;
  };
- #define __mp_bases __MPN(mp_bases)
+ #define mp_bases __MPN(bases)
- extern const struct bases __mp_bases[];
+ #define __mp_bases __MPN(bases)
- extern mp_size_t __gmp_default_fp_limb_precision;
+ __GMP_DECLSPEC extern const struct bases mp_bases[257];
- #if defined (__i386__)
+ /* mp_bases[10] values, generated by mpn/mp_bases.c */
+ #if GMP_NUMB_BITS == 4
+ #define MP_BASES_CHARS_PER_LIMB_10      1
+ #define MP_BASES_BIG_BASE_10            CNST_LIMB(0xa)
+ #define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x9)
+ #define MP_BASES_NORMALIZATION_STEPS_10 0
+ #endif
+ #if GMP_NUMB_BITS == 8
+ #define MP_BASES_CHARS_PER_LIMB_10      2
+ #define MP_BASES_BIG_BASE_10            CNST_LIMB(0x64)
+ #define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x47)
+ #define MP_BASES_NORMALIZATION_STEPS_10 1
+ #endif
+ #if GMP_NUMB_BITS == 16
+ #define MP_BASES_CHARS_PER_LIMB_10      4
+ #define MP_BASES_BIG_BASE_10            CNST_LIMB(0x2710)
+ #define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0xa36e)
+ #define MP_BASES_NORMALIZATION_STEPS_10 2
+ #endif
+ #if GMP_NUMB_BITS == 28
+ #define MP_BASES_CHARS_PER_LIMB_10      8
+ #define MP_BASES_BIG_BASE_10            CNST_LIMB(0x5f5e100)
+ #define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x5798ee23)
+ #define MP_BASES_NORMALIZATION_STEPS_10 5
+ #endif
+ #if GMP_NUMB_BITS == 30
+ #define MP_BASES_CHARS_PER_LIMB_10      9
+ #define MP_BASES_BIG_BASE_10            CNST_LIMB(0x3b9aca00)
+ #define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x12e0be82)
+ #define MP_BASES_NORMALIZATION_STEPS_10 2
+ #endif
+ #if GMP_NUMB_BITS == 32
+ #define MP_BASES_CHARS_PER_LIMB_10      9
+ #define MP_BASES_BIG_BASE_10            CNST_LIMB(0x3b9aca00)
+ #define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x12e0be82)
+ #define MP_BASES_NORMALIZATION_STEPS_10 2
+ #endif
+ #if GMP_NUMB_BITS == 60
+ #define MP_BASES_CHARS_PER_LIMB_10      18
+ #define MP_BASES_BIG_BASE_10            CNST_LIMB(0xde0b6b3a7640000)
+ #define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x2725dd1d243aba0e)
+ #define MP_BASES_NORMALIZATION_STEPS_10 4
+ #endif
+ #if GMP_NUMB_BITS == 62
+ #define MP_BASES_CHARS_PER_LIMB_10      18
+ #define MP_BASES_BIG_BASE_10            CNST_LIMB(0xde0b6b3a7640000)
+ #define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0x2725dd1d243aba0e)
+ #define MP_BASES_NORMALIZATION_STEPS_10 4
+ #endif
+ #if GMP_NUMB_BITS == 64
+ #define MP_BASES_CHARS_PER_LIMB_10      19
+ #define MP_BASES_BIG_BASE_10            CNST_LIMB(0x8ac7230489e80000)
+ #define MP_BASES_BIG_BASE_INVERTED_10   CNST_LIMB(0xd83c94fb6d2ac34a)
+ #define MP_BASES_NORMALIZATION_STEPS_10 0
+ #endif
+ /* For power of 2 bases this is exact.  For other bases the result is either
+    exact or one too big.
+    To be exact always it'd be necessary to examine all the limbs of the
+    operand, since numbers like 100..000 and 99...999 generally differ only
+    in the lowest limb.  It'd be possible to examine just a couple of high
+    limbs to increase the probability of being exact, but that doesn't seem
+    worth bothering with.  */
+ #define MPN_SIZEINBASE(result, ptr, size, base)                         \
+   do {                                                                  \
+     int       __lb_base, __cnt;                                         \
+     mp_size_t __totbits;                                                \
+                                                                         \
+     ASSERT ((size) >= 0);                                               \
+     ASSERT ((base) >= 2);                                               \
+     ASSERT ((base) < numberof (mp_bases));                              \
+                                                                         \
+     /* Special case for X == 0.  */                                     \
+     if ((size) == 0)                                                    \
+       (result) = 1;                                                     \
+     else                                                                \
+       {                                                                 \
+         /* Calculate the total number of significant bits of X.  */     \
+         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
+         __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);   \
+                                                                         \
+         if (POW2_P (base))                                              \
+           {                                                             \
+             __lb_base = mp_bases[base].big_base;                        \
+             (result) = (__totbits + __lb_base - 1) / __lb_base;         \
+           }                                                             \
+         else                                                            \
+           (result) = (size_t)                                           \
+             (__totbits * mp_bases[base].chars_per_bit_exactly) + 1;     \
+       }                                                                 \
+   } while (0)
+ /* eliminate mp_bases lookups for base==16 */
+ #define MPN_SIZEINBASE_16(result, ptr, size)                            \
+   do {                                                                  \
+     int       __cnt;                                                    \
+     mp_size_t __totbits;                                                \
+                                                                         \
+     ASSERT ((size) >= 0);                                               \
+                                                                         \
+     /* Special case for X == 0.  */                                     \
+     if ((size) == 0)                                                    \
+       (result) = 1;                                                     \
+     else                                                                \
+       {                                                                 \
+         /* Calculate the total number of significant bits of X.  */     \
+         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
+         __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);   \
+         (result) = (__totbits + 4 - 1) / 4;                             \
+       }                                                                 \
+   } while (0)
+ #if HAVE_HOST_CPU_FAMILY_x86
  #define TARGET_REGISTER_STARVED 1
  #else
  #define TARGET_REGISTER_STARVED 0
-Line 659  extern mp_size_t __gmp_default_fp_limb_precision;
+Line 1895  extern mp_size_t __gmp_default_fp_limb_precision;
 Line 659  extern mp_size_t __gmp_default_fp_limb_precision;
 Line 1895  extern mp_size_t __gmp_default_fp_limb_precision;
  /* Use a library function for invert_limb, if available. */
  #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
  #define mpn_invert_limb  __MPN(invert_limb)
- mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
+ mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
- #define invert_limb(invxl,xl)  (invxl = __MPN(invert_limb) (xl))
+ #define invert_limb(invxl,xl)  (invxl = mpn_invert_limb (xl))
  #endif
  #ifndef invert_limb
- #define invert_limb(invxl,xl) \
+ #define invert_limb(invxl,xl)                   \
-   do {                                                                  \
+   do {                                          \
-     mp_limb_t dummy;                                                    \
+     mp_limb_t dummy;                            \
-     if (xl << 1 == 0)                                                   \
+     ASSERT ((xl) != 0);                         \
-       invxl = ~(mp_limb_t) 0;                                           \
+     if (xl << 1 == 0)                           \
-     else                                                                \
+       invxl = ~(mp_limb_t) 0;                   \
-       udiv_qrnnd (invxl, dummy, -xl, 0, xl);                            \
+     else                                        \
+       udiv_qrnnd (invxl, dummy, -xl, 0, xl);    \
    } while (0)
  #endif
-Line 679  mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
+Line 1916  mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
 Line 679  mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
 Line 1916  mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
     If this would yield overflow, DI should be the largest possible number
     (i.e., only ones).  For correct operation, the most significant bit of D
     has to be set.  Put the quotient in Q and the remainder in R.  */
- #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
+ #define udiv_qrnnd_preinv(q, r, nh, nl, d, di)                            \
-   do {                                                                  \
+   do {                                                                    \
-     mp_limb_t _q, _ql, _r;                                              \
+     mp_limb_t _q, _ql, _r;                                                \
-     mp_limb_t _xh, _xl;                                                 \
+     mp_limb_t _xh, _xl;                                                   \
-     umul_ppmm (_q, _ql, (nh), (di));                                    \
+     ASSERT ((d) != 0);                                                    \
-     _q += (nh);                 /* DI is 2**BITS_PER_MP_LIMB too small */\
+     umul_ppmm (_q, _ql, (nh), (di));                                      \
-     umul_ppmm (_xh, _xl, _q, (d));                                      \
+     _q += (nh);                 /* DI is 2**BITS_PER_MP_LIMB too small */ \
-     sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);                         \
+     umul_ppmm (_xh, _xl, _q, (d));                                        \
-     if (_xh != 0)                                                       \
+     sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);                           \
-       {                                                                 \
+     if (_xh != 0)                                                         \
-         sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                          \
+       {                                                                   \
-         _q += 1;                                                        \
+         sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                            \
-         if (_xh != 0)                                                   \
+         _q += 1;                                                          \
-           {                                                             \
+         if (_xh != 0)                                                     \
-             sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                      \
+           {                                                               \
-             _q += 1;                                                    \
+             _r -= (d);                                                    \
-           }                                                             \
+             _q += 1;                                                      \
-       }                                                                 \
+           }                                                               \
-     if (_r >= (d))                                                      \
+       }                                                                   \
-       {                                                                 \
+     if (_r >= (d))                                                        \
-         _r -= (d);                                                      \
+       {                                                                   \
-         _q += 1;                                                        \
+         _r -= (d);                                                        \
-       }                                                                 \
+         _q += 1;                                                          \
-     (r) = _r;                                                           \
+       }                                                                   \
-     (q) = _q;                                                           \
+     (r) = _r;                                                             \
+     (q) = _q;                                                             \
    } while (0)
  /* Like udiv_qrnnd_preinv, but for for any value D.  DNORM is D shifted left
     so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
-Line 745  mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
+Line 1983  mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
 Line 745  mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
 Line 1983  mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
    } while (0)
- /* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo
+ #define mpn_preinv_divrem_1  __MPN(preinv_divrem_1)
-^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB.
+ mp_limb_t mpn_preinv_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
-    "n" must be odd (otherwise such an inverse doesn't exist).
+ /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to
+    the plain mpn_divrem_1.  Likewise USE_PREINV_MOD_1 chooses between
+    mpn_preinv_mod_1 and plain mpn_mod_1.  The default for both is yes, since
+    the few CISC chips where preinv is not good have defines saying so.  */
+ #ifndef USE_PREINV_DIVREM_1
+ #define USE_PREINV_DIVREM_1   1
+ #endif
+ #ifndef USE_PREINV_MOD_1
+ #define USE_PREINV_MOD_1   1
+ #endif
+ #if USE_PREINV_DIVREM_1
+ #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
+   mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
+ #else
+ #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
+   mpn_divrem_1 (qp, xsize, ap, size, d)
+ #endif
+ #if USE_PREINV_MOD_1
+ #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
+   mpn_preinv_mod_1 (src, size, divisor, inverse)
+ #else
+ #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
+   mpn_mod_1 (src, size, divisor)
+ #endif
+ #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
+ mp_limb_t mpn_mod_34lsub1 _PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+ /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
+    plain mpn_divrem_1.  Likewise MODEXACT_1_ODD_THRESHOLD for
+    mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
+    modexact are faster at all sizes, so the defaults are 0.  Those CPUs
+    where this is not right have a tuned threshold.  */
+ #ifndef DIVEXACT_1_THRESHOLD
+ #define DIVEXACT_1_THRESHOLD  0
+ #endif
+ #ifndef MODEXACT_1_ODD_THRESHOLD
+ #define MODEXACT_1_ODD_THRESHOLD  0
+ #endif
+ #define mpn_divexact_1 __MPN(divexact_1)
+ void    mpn_divexact_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+ #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor)                     \
+   do {                                                                        \
+     if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD))                         \
+       ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
+     else                                                                      \
+       {                                                                       \
+         ASSERT (mpn_mod_1 (src, size, divisor) == 0);                         \
+         mpn_divexact_1 (dst, src, size, divisor);                             \
+       }                                                                       \
+   } while (0)
+ #define mpn_modexact_1c_odd  __MPN(modexact_1c_odd)
+ mp_limb_t mpn_modexact_1c_odd _PROTO ((mp_srcptr src, mp_size_t size,
+                                        mp_limb_t divisor, mp_limb_t c)) __GMP_ATTRIBUTE_PURE;
+ #if HAVE_NATIVE_mpn_modexact_1_odd
+ #define mpn_modexact_1_odd   __MPN(modexact_1_odd)
+ mp_limb_t mpn_modexact_1_odd _PROTO ((mp_srcptr src, mp_size_t size,
+                                       mp_limb_t divisor)) __GMP_ATTRIBUTE_PURE;
+ #else
+ #define mpn_modexact_1_odd(src,size,divisor) \
+   mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
+ #endif
+ #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)     \
+   (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD)     \
+    ? mpn_modexact_1_odd (src, size, divisor)            \
+    : mpn_mod_1 (src, size, divisor))
+ /* modlimb_invert() sets inv to the multiplicative inverse of n modulo
+^BITS_PER_MP_LIMB, ie. satisfying inv*n == 1 mod 2^BITS_PER_MP_LIMB.
+    n must be odd (otherwise such an inverse doesn't exist).
     This is not to be confused with invert_limb(), which is completely
     different.
     The table lookup gives an inverse with the low 8 bits valid, and each
-    multiply step doubles the number of bits.  See Jebelean's exact division
+    multiply step doubles the number of bits.  See Jebelean "An algorithm for
-    paper, end of section 4 (reference in gmp.texi). */
+    exact division" end of section 4 (reference in gmp.texi).
+    Possible enhancement: Could use UHWtype until the last step, if half-size
+    multiplies are faster (might help under _LONG_LONG_LIMB).
+    Alternative: As noted in Granlund and Montgomery "Division by Invariant
+    Integers using Multiplication" (reference in gmp.texi), n itself gives a
+-bit inverse immediately, and could be used instead of a table lookup.
+    A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
+    bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */
  #define modlimb_invert_table  __gmp_modlimb_invert_table
- extern const unsigned char  modlimb_invert_table[128];
+ __GMP_DECLSPEC extern const unsigned char  modlimb_invert_table[128];
+ #if BITS_PER_MP_LIMB <= 8
+ #define modlimb_invert(inv,n)                                   \
+   do {                                                          \
+     mp_limb_t  __n = (n);                                       \
+     mp_limb_t  __inv;                                           \
+     ASSERT ((__n & 1) == 1);                                    \
+     __inv = modlimb_invert_table[(__n/2) & 0x7F]; /*  8 */      \
+     ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);                \
+     (inv) = __inv & GMP_NUMB_MASK;                              \
+   } while (0)
+ #else
+ #if BITS_PER_MP_LIMB <= 16
+ #define modlimb_invert(inv,n)                                   \
+   do {                                                          \
+     mp_limb_t  __n = (n);                                       \
+     mp_limb_t  __inv;                                           \
+     ASSERT ((__n & 1) == 1);                                    \
+     __inv = modlimb_invert_table[(__n/2) & 0x7F]; /*  8 */      \
+     __inv = 2 * __inv - __inv * __inv * __n;      /* 16 */      \
+     ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);                \
+     (inv) = __inv & GMP_NUMB_MASK;                              \
+   } while (0)
+ #else
  #if BITS_PER_MP_LIMB <= 32
  #define modlimb_invert(inv,n)                                   \
    do {                                                          \
      mp_limb_t  __n = (n);                                       \
      mp_limb_t  __inv;                                           \
      ASSERT ((__n & 1) == 1);                                    \
-     __inv = modlimb_invert_table[(__n&0xFF)/2]; /*  8 */        \
+     __inv = modlimb_invert_table[(__n/2) & 0x7F]; /*  8 */      \
-     __inv = 2 * __inv - __inv * __inv * __n;    /* 16 */        \
+     __inv = 2 * __inv - __inv * __inv * __n;      /* 16 */      \
-     __inv = 2 * __inv - __inv * __inv * __n;    /* 32 */        \
+     __inv = 2 * __inv - __inv * __inv * __n;      /* 32 */      \
-     ASSERT (__inv * __n == 1);                                  \
+     ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);                \
-     (inv) = __inv;                                              \
+     (inv) = __inv & GMP_NUMB_MASK;                              \
    } while (0)
- #endif
+ #else
+ #if BITS_PER_MP_LIMB <= 64
- #if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
  #define modlimb_invert(inv,n)                                   \
    do {                                                          \
      mp_limb_t  __n = (n);                                       \
      mp_limb_t  __inv;                                           \
      ASSERT ((__n & 1) == 1);                                    \
-     __inv = modlimb_invert_table[(__n&0xFF)/2]; /*  8 */        \
+     __inv = modlimb_invert_table[(__n/2) & 0x7F]; /*  8 */      \
-     __inv = 2 * __inv - __inv * __inv * __n;    /* 16 */        \
+     __inv = 2 * __inv - __inv * __inv * __n;      /* 16 */      \
-     __inv = 2 * __inv - __inv * __inv * __n;    /* 32 */        \
+     __inv = 2 * __inv - __inv * __inv * __n;      /* 32 */      \
-     __inv = 2 * __inv - __inv * __inv * __n;    /* 64 */        \
+     __inv = 2 * __inv - __inv * __inv * __n;      /* 64 */      \
-     ASSERT (__inv * __n == 1);                                  \
+     ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);                \
-     (inv) = __inv;                                              \
+     (inv) = __inv & GMP_NUMB_MASK;                              \
    } while (0)
+ #endif /* 64 */
+ #endif /* 32 */
+ #endif /* 16 */
+ #endif /* 8 */
+ /* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
+xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
+ #define MODLIMB_INVERSE_3   ((GMP_NUMB_MAX / 3) * 2 + 1)
+ /* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.
+    It's not clear whether this is the best way to do this calculation.
+    Anything congruent to -a would be fine for the one limb congruence
+    tests.  */
+ #define NEG_MOD(r, a, d)                                        \
+   do {                                                          \
+     ASSERT ((d) != 0);                                          \
+     ASSERT_LIMB (a);                                            \
+     ASSERT_LIMB (d);                                            \
+                                                                 \
+     if ((a) <= (d))                                             \
+       {                                                         \
+         /* small a is reasonably likely */                      \
+         (r) = (d) - (a);                                        \
+       }                                                         \
+     else                                                        \
+       {                                                         \
+         unsigned   __twos;                                      \
+         mp_limb_t  __dnorm;                                     \
+         count_leading_zeros (__twos, d);                        \
+         __twos -= GMP_NAIL_BITS;                                \
+         __dnorm = (d) << __twos;                                \
+         (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);     \
+       }                                                         \
+                                                                 \
+     ASSERT_LIMB (r);                                            \
+   } while (0)
+ /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
+ #define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)
+ /* Set "p" to 1 if there's an odd number of 1 bits in "n", or to 0 if
+    there's an even number.  */
+ #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
+ #define ULONG_PARITY(p, n)              \
+   do {                                  \
+     char           __p;                 \
+     unsigned long  __n = (n);           \
+     __n ^= (__n >> 16);                 \
+     asm ("xorb   %h1, %b1\n"            \
+          "setpo  %0\n"                  \
+          : "=qm" (__p), "=q" (__n)      \
+          : "1" (__n));                  \
+     (p) = __p;                          \
+   } while (0)
+ #else
+ #define ULONG_PARITY(p, n)                      \
+   do {                                          \
+     unsigned long  __n = (n);                   \
+     int  __p = 0;                               \
+     do                                          \
+       {                                         \
+         __p ^= 0x96696996L >> (__n & 0x1F);     \
+         __n >>= 5;                              \
+       }                                         \
+     while (__n != 0);                           \
+                                                 \
+     (p) = __p;                                  \
+   } while (0)
  #endif
- /* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
+ /* bswap is available on i486 and up and is fast.  A combination rorw $8 /
-    between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
+    roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
-    until then.  */
+    kernel with xchgb instead of rorw), but this is not done here, because
- #if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__)
+    i386 means generic x86 and mixing word and dword operations will cause
+    partial register stalls on P6 chips.  */
+ #if defined (__GNUC__) && ! defined (NO_ASM)            \
+   && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386   \
+   && BITS_PER_MP_LIMB == 32
+ #define BSWAP_LIMB(dst, src)                    \
+   do {                                          \
+     asm ("bswap %0" : "=r" (dst) : "0" (src));  \
+   } while (0)
+ #endif /* x86 */
+ #if defined (__GNUC__) && ! defined (NO_ASM)    \
+   && defined (__ia64) && BITS_PER_MP_LIMB == 64
+ #define BSWAP_LIMB(dst, src)                                    \
+   do {                                                          \
+     asm ("mux1 %0 = %1, @rev" : "=r" (dst) :  "r" (src));       \
+   } while (0)
+ #endif
+ #if ! defined (BSWAP_LIMB)
+ #if BITS_PER_MP_LIMB == 8
+ #define BSWAP_LIMB(dst, src)            \
+   do { (dst) = (src); } while (0)
+ #endif
+ #if BITS_PER_MP_LIMB == 16
+ #define BSWAP_LIMB(dst, src)                    \
+   do {                                          \
+     (dst) = ((src) << 8) + ((src) >> 8);        \
+   } while (0)
+ #endif
+ #if BITS_PER_MP_LIMB == 32
+ #define BSWAP_LIMB(dst, src)    \
+   do {                          \
+     (dst) =                     \
+       ((src) << 24)             \
+       + (((src) & 0xFF00) << 8) \
+       + (((src) >> 8) & 0xFF00) \
+       + ((src) >> 24);          \
+   } while (0)
+ #endif
+ #if BITS_PER_MP_LIMB == 64
+ #define BSWAP_LIMB(dst, src)            \
+   do {                                  \
+     (dst) =                             \
+       ((src) << 56)                     \
+       + (((src) & 0xFF00) << 40)        \
+       + (((src) & 0xFF0000) << 24)      \
+       + (((src) & 0xFF000000) << 8)     \
+       + (((src) >> 8) & 0xFF000000)     \
+       + (((src) >> 24) & 0xFF0000)      \
+       + (((src) >> 40) & 0xFF00)        \
+       + ((src) >> 56);                  \
+   } while (0)
+ #endif
+ #endif
+ /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
+    those we know are fast.  */
+ #if defined (__GNUC__) && ! defined (NO_ASM)                            \
+   && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN                     \
+   && (HAVE_HOST_CPU_powerpc604                                          \
+       || HAVE_HOST_CPU_powerpc604e                                      \
+       || HAVE_HOST_CPU_powerpc750                                       \
+       || HAVE_HOST_CPU_powerpc7400)
+ #define BSWAP_LIMB_FETCH(limb, src)     \
+   do {                                  \
+     mp_srcptr  __blf_src = (src);       \
+     mp_limb_t  __limb;                  \
+     __asm__ ("lwbrx %0, 0, %1"          \
+              : "=r" (__limb)            \
+              : "r" (__blf_src),         \
+                "m" (*__blf_src));       \
+     (limb) = __limb;                    \
+   } while (0)
+ #endif
+ #if ! defined (BSWAP_LIMB_FETCH)
+ #define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))
+ #endif
+ /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
+    know are fast.  FIXME: Is this necessary?  */
+ #if defined (__GNUC__) && ! defined (NO_ASM)                            \
+   && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN                     \
+   && (HAVE_HOST_CPU_powerpc604                                          \
+       || HAVE_HOST_CPU_powerpc604e                                      \
+       || HAVE_HOST_CPU_powerpc750                                       \
+       || HAVE_HOST_CPU_powerpc7400)
+ #define BSWAP_LIMB_STORE(dst, limb)     \
+   do {                                  \
+     mp_ptr     __dst = (dst);           \
+     mp_limb_t  __limb = (limb);         \
+     __asm__ ("stwbrx %1, 0, %2"         \
+              : "=m" (*__dst)            \
+              : "r" (__limb),            \
+                "r" (__dst));            \
+   } while (0)
+ #endif
+ #if ! defined (BSWAP_LIMB_STORE)
+ #define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)
+ #endif
+ /* Byte swap limbs from {src,size} and store at {dst,size}. */
+ #define MPN_BSWAP(dst, src, size)                       \
+   do {                                                  \
+     mp_ptr     __dst = (dst);                           \
+     mp_srcptr  __src = (src);                           \
+     mp_size_t  __size = (size);                         \
+     mp_size_t  __i;                                     \
+     ASSERT ((size) >= 0);                               \
+     ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));   \
+     CRAY_Pragma ("_CRI ivdep");                         \
+     for (__i = 0; __i < __size; __i++)                  \
+       {                                                 \
+         BSWAP_LIMB_FETCH (*__dst, __src);               \
+         __dst++;                                        \
+         __src++;                                        \
+       }                                                 \
+   } while (0)
+ /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
+ #define MPN_BSWAP_REVERSE(dst, src, size)               \
+   do {                                                  \
+     mp_ptr     __dst = (dst);                           \
+     mp_size_t  __size = (size);                         \
+     mp_srcptr  __src = (src) + __size - 1;              \
+     mp_size_t  __i;                                     \
+     ASSERT ((size) >= 0);                               \
+     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
+     CRAY_Pragma ("_CRI ivdep");                         \
+     for (__i = 0; __i < __size; __i++)                  \
+       {                                                 \
+         BSWAP_LIMB_FETCH (*__dst, __src);               \
+         __dst++;                                        \
+         __src--;                                        \
+       }                                                 \
+   } while (0)
+ /* No processor claiming to be SPARC v9 compliant seems to
+    implement the POPC instruction.  Disable pattern for now.  */
+ #if 0
+ #if defined __GNUC__ && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
+ #define popc_limb(result, input)                        \
+   do {                                                  \
+     DItype __res;                                       \
+     asm ("popc %1,%0" : "=r" (result) : "rI" (input));  \
+   } while (0)
+ #endif
+ #endif
+ /* Cool population count of an mp_limb_t.
+    You have to figure out how this works, We won't tell you!
+    The constants could also be expressed as:
+xAA... = [2^(N+1) / 3] = [(2^N-1)/3*2]
+x33... = [2^N / 5]     = [(2^N-1)/5]
+x0f... = [2^N / 17]    = [(2^N-1)/17]
+      (N is BITS_PER_MP_LIMB, [] denotes truncation.) */
+ #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 64
+ #define popc_limb(result, input)                                \
+   do {                                                          \
+     mp_limb_t  __x = (input);                                   \
+     __x -= (__x & CNST_LIMB(0xaaaaaaaaaaaaaaaa)) >> 1;          \
+     __x = ((__x >> 2) & CNST_LIMB(0x3333333333333333))          \
+       +    (__x       & CNST_LIMB(0x3333333333333333));         \
+     __x = ((__x >> 4) + __x) & CNST_LIMB(0x0f0f0f0f0f0f0f0f);   \
+     __x = ((__x >> 8) + __x);                                   \
+     __x = ((__x >> 16) + __x);                                  \
+     __x = ((__x >> 32) + __x) & 0xff;                           \
+     (result) = __x;                                             \
+   } while (0)
+ #endif
+ #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 32
+ #define popc_limb(result, input)                                \
+   do {                                                          \
+     mp_limb_t  __x = (input);                                   \
+     __x -= (__x & 0xaaaaaaaaL) >> 1;                            \
+     __x = ((__x >> 2) & 0x33333333L) + (__x & 0x33333333L);     \
+     __x = ((__x >> 4) + __x) & 0x0f0f0f0fL;                     \
+     __x = ((__x >> 8) + __x);                                   \
+     __x = ((__x >> 16) + __x) & 0xff;                           \
+     (result) = __x;                                             \
+   } while (0)
+ #endif
+ #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 16
+ #define popc_limb(result, input)                        \
+   do {                                                  \
+     mp_limb_t  __x = (input);                           \
+     __x -= (__x & 0xaaaa) >> 1;                         \
+     __x = ((__x >> 2) & 0x3333) + (__x & 0x3333);       \
+     __x = ((__x >> 4) + __x) & 0x0f0f;                  \
+     __x = ((__x >> 8) + __x) & 0xff;                    \
+     (result) = __x;                                     \
+   } while (0)
+ #endif
+ #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 8
+ #define popc_limb(result, input)                \
+   do {                                          \
+     mp_limb_t  __x = (input);                   \
+     __x -= (__x & 0xaa) >> 1;                   \
+     __x = ((__x >> 2) & 0x33) + (__x & 0x33);   \
+     __x = ((__x >> 4) + __x) & 0xf;             \
+     (result) = __x;                             \
+   } while (0)
+ #endif
+ #if ! defined (popc_limb) && BITS_PER_MP_LIMB == 4
+ #define popc_limb(result, input)                                              \
+   do {                                                                        \
+     mp_limb_t  __x = (input);                                                 \
+     __x = (__x & 1) + ((__x >> 1) & 1) + ((__x >> 2) & 1) + ((__x >> 3) & 1); \
+     (result) = __x;                                                           \
+   } while (0)
+ #endif
  /* Define stuff for longlong.h.  */
+ #if HAVE_ATTRIBUTE_MODE
  typedef unsigned int UQItype    __attribute__ ((mode (QI)));
  typedef          int SItype     __attribute__ ((mode (SI)));
  typedef unsigned int USItype    __attribute__ ((mode (SI)));
-Line 803  typedef unsigned int UDItype __attribute__ ((mode (DI)
+Line 2448  typedef unsigned int UDItype __attribute__ ((mode (DI)
 Line 803  typedef unsigned int UDItype __attribute__ ((mode (DI)
 Line 2448  typedef unsigned int UDItype __attribute__ ((mode (DI)
  typedef unsigned char UQItype;
  typedef          long SItype;
  typedef unsigned long USItype;
- #if defined _LONGLONG || defined _LONG_LONG_LIMB
+ #if HAVE_LONG_LONG
  typedef long long int DItype;
  typedef unsigned long long int UDItype;
  #else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
-Line 838  union ieee_double_extract
+Line 2483  union ieee_double_extract
 Line 838  union ieee_double_extract
 Line 2483  union ieee_double_extract
   || defined (__clipper__)                                               \
   || defined (__cris)                                                    \
   || defined (__i386__)                                                  \
+  || defined (__x86_64__)                                                \
   || defined (__i860__)                                                  \
   || defined (__i960__)                                                  \
+  || defined (__ia64) && ! defined (__hpux)                              \
   || defined (MIPSEL) || defined (_MIPSEL)                               \
   || defined (__ns32000__)                                               \
   || defined (__WINNT) || defined (_WIN32)
-Line 856  union ieee_double_extract
+Line 2503  union ieee_double_extract
 Line 856  union ieee_double_extract
 Line 2503  union ieee_double_extract
    double d;
  };
  #else /* Need this as an #else since the tests aren't made exclusive.  */
+ #if defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
+     || defined(mc68020)
+ #define _GMP_IEEE_FLOATS 1
+ union ieee_double_extract
+ {
+   struct
+     {
+       /* "int" might be only 16 bits, so use "long" */
+       unsigned long sig:1;
+       unsigned long exp:11;
+       unsigned long manh:20;
+       unsigned long manl:32;
+     } s;
+   double d;
+ };
+ #else
  #if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__)                   \
   || defined (__a29k__) || defined (_AM29K)                              \
   || defined (__arm__)                                                   \
   || (defined (__convex__) && defined (_IEEE_FLOAT_))                    \
-  || defined (_CRAYMPP)                                                  \
+  || defined (_CRAYMPP) || defined (_CRAYIEEE)                           \
   || defined (__i370__) || defined (__mvs__)                             \
-  || defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
-     || defined(mc68020)                                                 \
   || defined (__m88000__)                                                \
   || defined (MIPSEB) || defined (_MIPSEB)                               \
   || defined (__hppa) || defined (__hppa__)                              \
-Line 871  union ieee_double_extract
+Line 2532  union ieee_double_extract
 Line 871  union ieee_double_extract
 Line 2532  union ieee_double_extract
   || defined (__ibm032__)                                                \
   || defined (_IBMR2) || defined (_ARCH_PPC)                             \
   || defined (__sh__)                                                    \
-  || defined (__sparc) || defined (sparc)                                \
+  || defined (__sparc) || defined (sparc) || defined (__sparc__)         \
+  || defined (__sparc_v9) || defined (__sparc_v9__)                      \
   || defined (__we32k__)
  #define _GMP_IEEE_FLOATS 1
  union ieee_double_extract
-Line 888  union ieee_double_extract
+Line 2550  union ieee_double_extract
 Line 888  union ieee_double_extract
 Line 2550  union ieee_double_extract
  #endif
  #endif
  #endif
- /* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on
-    SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0,
-    presumably due to treating the mp_limb_t constant as signed rather than
-    unsigned. */
- #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2)))
- #if BITS_PER_MP_LIMB == 64
- #define LIMBS_PER_DOUBLE 2
- #else
- #define LIMBS_PER_DOUBLE 3
  #endif
- double __gmp_scale2 _PROTO ((double, int));
+ /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
+    that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
+ #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
+ /* Maximum number of limbs it will take to store any `double'.
+    We assume doubles have 53 mantissam bits.  */
+ #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS + 1)
+ double __gmp_scale2 _PROTO ((double, int)) ATTRIBUTE_CONST;
  int __gmp_extract_double _PROTO ((mp_ptr, double));
  extern int __gmp_junk;
  extern const int __gmp_0;
- #define GMP_ERROR(code)   (gmp_errno |= (code), __gmp_junk = 10/__gmp_0)
+ void __gmp_exception _PROTO ((int)) ATTRIBUTE_NORETURN;
- #define DIVIDE_BY_ZERO    GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO)
+ void __gmp_divide_by_zero _PROTO ((void)) ATTRIBUTE_NORETURN;
- #define SQRT_OF_NEGATIVE  GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE)
+ void __gmp_sqrt_of_negative _PROTO ((void)) ATTRIBUTE_NORETURN;
+ #define GMP_ERROR(code)   __gmp_exception (code)
+ #define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
+ #define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
  #if defined _LONG_LONG_LIMB
- #if defined (__STDC__)
+ #if __GMP_HAVE_TOKEN_PASTE
- #define CNST_LIMB(C) C##LL
+ #define CNST_LIMB(C) ((mp_limb_t) C##LL)
  #else
- #define CNST_LIMB(C) C/**/LL
+ #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
  #endif
  #else /* not _LONG_LONG_LIMB */
- #if defined (__STDC__)
+ #if __GMP_HAVE_TOKEN_PASTE
- #define CNST_LIMB(C) C##L
+ #define CNST_LIMB(C) ((mp_limb_t) C##L)
  #else
- #define CNST_LIMB(C) C/**/L
+ #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
  #endif
  #endif /* _LONG_LONG_LIMB */
- /*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
+ /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
- #if BITS_PER_MP_LIMB == 32
+ #if GMP_NUMB_BITS == 2
- #define PP 0xC0CFD797L          /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
+ #define PP 0x3                                  /* 3 */
+ #define PP_FIRST_OMITTED 5
+ #endif
+ #if GMP_NUMB_BITS == 4
+ #define PP 0xF                                  /* 3 x 5 */
+ #define PP_FIRST_OMITTED 7
+ #endif
+ #if GMP_NUMB_BITS == 8
+ #define PP 0x69                                 /* 3 x 5 x 7 */
+ #define PP_FIRST_OMITTED 11
+ #endif
+ #if GMP_NUMB_BITS == 16
+ #define PP 0x3AA7                               /* 3 x 5 x 7 x 11 x 13 */
+ #define PP_FIRST_OMITTED 17
+ #endif
+ #if GMP_NUMB_BITS == 32
+ #define PP 0xC0CFD797L                          /* 3 x 5 x 7 x 11 x ... x 29 */
  #define PP_INVERTED 0x53E5645CL
- #define PP_MAXPRIME 29
+ #define PP_FIRST_OMITTED 31
- #define PP_MASK 0x208A28A8L
  #endif
+ #if GMP_NUMB_BITS == 64
- #if BITS_PER_MP_LIMB == 64
+ #define PP CNST_LIMB(0xE221F97C30E94E1D)        /* 3 x 5 x 7 x 11 x ... x 53 */
- #define PP CNST_LIMB(0xE221F97C30E94E1D)        /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
  #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
- #define PP_MAXPRIME 53
+ #define PP_FIRST_OMITTED 59
- #define PP_MASK CNST_LIMB(0x208A20A08A28A8)
  #endif
+ #ifndef PP_FIRST_OMITTED
+ #define PP_FIRST_OMITTED 3
+ #endif
  /* BIT1 means a result value in bit 1 (second least significant bit), with a
     zero bit representing +1 and a one bit representing -1.  Bits other than
-    bit 1 are garbage.
+    bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
+    used to ensure the expressions are "int"s even if a and/or b might be
+    other types.
     JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
     and their speed is important.  Expressions are used rather than
-Line 949  extern const int __gmp_0;
+Line 2630  extern const int __gmp_0;
 Line 949  extern const int __gmp_0;
 Line 2630  extern const int __gmp_0;
     instead of conditional JUMPs. */
  /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
- #define JACOBI_S0(a) \
+ #define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))
-   (((a) == 1) | ((a) == -1))
  /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
- #define JACOBI_U0(a) \
+ #define JACOBI_U0(a)   ((a) == 1)
-   ((a) == 1)
- /* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise
+ /* (a/0), with a given by low and size;
-    An mpz_t always has at least one limb of allocated space, so the fetch of
+    is 1 if a=+/-1, 0 otherwise */
-    the low limb is valid. */
+ #define JACOBI_LS0(alow,asize) \
- #define JACOBI_Z0(a) \
+   (((asize) == 1 || (asize) == -1) && (alow) == 1)
-   (((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1))
+ /* (a/0), with a an mpz_t;
+    fetch of low limb always valid, even if size is zero */
+ #define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))
+ /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
+ #define JACOBI_0U(b)   ((b) == 1)
+ /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
+ #define JACOBI_0S(b)   ((b) == 1 || (b) == -1)
+ /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
+ #define JACOBI_0LS(blow,bsize) \
+   (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
  /* Convert a bit1 to +1 or -1. */
  #define JACOBI_BIT1_TO_PN(result_bit1) \
-   (1 - ((result_bit1) & 2))
+   (1 - ((int) (result_bit1) & 2))
  /* (2/b), with b unsigned and odd;
     is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
     hence obtained from (b>>1)^b */
  #define JACOBI_TWO_U_BIT1(b) \
-   (ASSERT (b & 1), (((b) >> 1) ^ (b)))
+   ((int) (((b) >> 1) ^ (b)))
  /* (2/b)^twos, with b unsigned and odd */
  #define JACOBI_TWOS_U_BIT1(twos, b) \
-   (((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
+   ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
  /* (2/b)^twos, with b unsigned and odd */
  #define JACOBI_TWOS_U(twos, b) \
    (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
+ /* (-1/b), with b odd (signed or unsigned);
+    is (-1)^((b-1)/2) */
+ #define JACOBI_N1B_BIT1(b) \
+   ((int) (b))
  /* (a/b) effect due to sign of a: signed/unsigned, b odd;
-    is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */
+    is (-1/b) if a<0, or +1 if a>=0 */
  #define JACOBI_ASGN_SU_BIT1(a, b) \
-   ((((a) < 0) << 1) & (b))
+   ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
+ /* (a/b) effect due to sign of b: signed/signed;
+    is -1 if a and b both negative, +1 otherwise */
+ #define JACOBI_BSGN_SS_BIT1(a, b) \
+   ((((a)<0) & ((b)<0)) << 1)
  /* (a/b) effect due to sign of b: signed/mpz;
     is -1 if a and b both negative, +1 otherwise */
  #define JACOBI_BSGN_SZ_BIT1(a, b) \
-   ((((a) < 0) & (SIZ(b) < 0)) << 1)
+   JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
- /* (a/b) effect due to sign of b: mpz/signed */
+ /* (a/b) effect due to sign of b: mpz/signed;
+    is -1 if a and b both negative, +1 otherwise */
  #define JACOBI_BSGN_ZS_BIT1(a, b) \
-   JACOBI_BSGN_SZ_BIT1(b, a)
+   JACOBI_BSGN_SZ_BIT1 (b, a)
- /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd.
+ /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
-    Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if
+    is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
     both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
     because this is used in a couple of places with only bit 1 of a or b
     valid. */
  #define JACOBI_RECIP_UU_BIT1(a, b) \
-   ((a) & (b))
+   ((int) ((a) & (b)))
+ /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
+    modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and
+    unsigned.  modexact_1_odd effectively calculates -a mod b, and
+    result_bit1 is adjusted for the factor of -1.
+    The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
+    sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
+    factor to introduce into result_bit1, so for that case use mpn_mod_1
+    unconditionally.
+    FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
+    for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,
+    or not skip a divide step, or something. */
+ #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
+   do {                                                                     \
+     mp_srcptr  __a_ptr  = (a_ptr);                                         \
+     mp_size_t  __a_size = (a_size);                                        \
+     mp_limb_t  __b      = (b);                                             \
+                                                                            \
+     ASSERT (__a_size >= 1);                                                \
+     ASSERT (__b & 1);                                                      \
+                                                                            \
+     if ((GMP_NUMB_BITS % 2) != 0                                           \
+         || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD))           \
+       {                                                                    \
+         (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \
+       }                                                                    \
+     else                                                                   \
+       {                                                                    \
+         (result_bit1) ^= JACOBI_N1B_BIT1 (__b);                            \
+         (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);             \
+       }                                                                    \
+   } while (0)
+ /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
+    limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,
+    hence giving back the user's size in bits rounded up.  Notice that
+    converting prec->bits->prec gives an unchanged value.  */
+ #define __GMPF_BITS_TO_PREC(n)                                          \
+   ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
+ #define __GMPF_PREC_TO_BITS(n) \
+   ((unsigned long) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
+ extern mp_size_t __gmp_default_fp_limb_precision;
+ /* Set n to the number of significant digits an mpf of the given _mp_prec
+    field, in the given base.  This is a rounded up value, designed to ensure
+    there's enough digits to reproduce all the guaranteed part of the value.
+    There are prec many limbs, but the high might be only "1" so forget it
+    and just count prec-1 limbs into chars.  +1 rounds that upwards, and a
+    further +1 is because the limbs usually won't fall on digit boundaries.
+    FIXME: If base is a power of 2 and the bits per digit divides
+    BITS_PER_MP_LIMB then the +2 is unnecessary.  This happens always for
+    base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
+ #define MPF_SIGNIFICANT_DIGITS(n, base, prec)                           \
+   do {                                                                  \
+     ASSERT (base >= 2 && base < numberof (mp_bases));                   \
+     (n) = 2 + (size_t) ((((prec) - 1) * BITS_PER_MP_LIMB)               \
+                         * mp_bases[(base)].chars_per_bit_exactly);      \
+   } while (0)
+ #define DOPRNT_CONV_FIXED        1
+ #define DOPRNT_CONV_SCIENTIFIC   2
+ #define DOPRNT_CONV_GENERAL      3
+ #define DOPRNT_JUSTIFY_NONE      0
+ #define DOPRNT_JUSTIFY_LEFT      1
+ #define DOPRNT_JUSTIFY_RIGHT     2
+ #define DOPRNT_JUSTIFY_INTERNAL  3
+ #define DOPRNT_SHOWBASE_YES      1
+ #define DOPRNT_SHOWBASE_NO       2
+ #define DOPRNT_SHOWBASE_NONZERO  3
+ struct doprnt_params_t {
+   int         base;          /* negative for upper case */
+   int         conv;          /* choices above */
+   const char  *expfmt;       /* exponent format */
+   int         exptimes4;     /* exponent multiply by 4 */
+   char        fill;          /* character */
+   int         justify;       /* choices above */
+   int         prec;          /* prec field, or -1 for all digits */
+   int         showbase;      /* choices above */
+   int         showpoint;     /* if radix point always shown */
+   int         showtrailing;  /* if trailing zeros wanted */
+   char        sign;          /* '+', ' ', or '\0' */
+   int         width;         /* width field */
+ };
+ #if _GMP_H_HAVE_VA_LIST
+ typedef int (*doprnt_format_t) _PROTO ((void *data, const char *fmt, va_list ap));
+ typedef int (*doprnt_memory_t) _PROTO ((void *data, const char *str, size_t len));
+ typedef int (*doprnt_reps_t) _PROTO ((void *data, int c, int reps));
+ typedef int (*doprnt_final_t) _PROTO ((void *data));
+ struct doprnt_funs_t {
+   doprnt_format_t  format;
+   doprnt_memory_t  memory;
+   doprnt_reps_t    reps;
+   doprnt_final_t   final;   /* NULL if not required */
+ };
+ extern const struct doprnt_funs_t  __gmp_fprintf_funs;
+ extern const struct doprnt_funs_t  __gmp_sprintf_funs;
+ extern const struct doprnt_funs_t  __gmp_snprintf_funs;
+ extern const struct doprnt_funs_t  __gmp_obstack_printf_funs;
+ extern const struct doprnt_funs_t  __gmp_ostream_funs;
+ /* "buf" is a __gmp_allocate_func block of "alloc" many bytes.  The first
+    "size" of these have been written.  "alloc > size" is maintained, so
+    there's room to store a '\0' at the end.  "result" is where the
+    application wants the final block pointer.  */
+ struct gmp_asprintf_t {
+   char    **result;
+   char    *buf;
+   size_t  size;
+   size_t  alloc;
+ };
+ #define GMP_ASPRINTF_T_INIT(d, output)                          \
+   do {                                                          \
+     (d).result = (output);                                      \
+     (d).alloc = 256;                                            \
+     (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);      \
+     (d).size = 0;                                               \
+   } while (0)
+ /* If a realloc is necessary, use twice the size actually required, so as to
+    avoid repeated small reallocs.  */
+ #define GMP_ASPRINTF_T_NEED(d, n)                                       \
+   do {                                                                  \
+     size_t  alloc, newsize, newalloc;                                   \
+     ASSERT ((d)->alloc >= (d)->size + 1);                               \
+                                                                         \
+     alloc = (d)->alloc;                                                 \
+     newsize = (d)->size + (n);                                          \
+     if (alloc <= newsize)                                               \
+       {                                                                 \
+         newalloc = 2*newsize;                                           \
+         (d)->alloc = newalloc;                                          \
+         (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,                \
+                                                alloc, newalloc, char);  \
+       }                                                                 \
+   } while (0)
+ __GMP_DECLSPEC int __gmp_asprintf_memory _PROTO ((struct gmp_asprintf_t *d, const char *str, size_t len));
+ __GMP_DECLSPEC int __gmp_asprintf_reps _PROTO ((struct gmp_asprintf_t *d, int c, int reps));
+ __GMP_DECLSPEC int __gmp_asprintf_final _PROTO ((struct gmp_asprintf_t *d));
+ /* buf is where to write the next output, and size is how much space is left
+    there.  If the application passed size==0 then that's what we'll have
+    here, and nothing at all should be written.  */
+ struct gmp_snprintf_t {
+   char    *buf;
+   size_t  size;
+ };
+ /* Add the bytes printed by the call to the total retval, or bail out on an
+    error.  */
+ #define DOPRNT_ACCUMULATE(call) \
+   do {                          \
+     int  __ret;                 \
+     __ret = call;               \
+     if (__ret == -1)            \
+       goto error;               \
+     retval += __ret;            \
+   } while (0)
+ #define DOPRNT_ACCUMULATE_FUN(fun, params)      \
+   do {                                          \
+     ASSERT ((fun) != NULL);                     \
+     DOPRNT_ACCUMULATE ((*(fun)) params);        \
+   } while (0)
+ #define DOPRNT_FORMAT(fmt, ap)                          \
+   DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
+ #define DOPRNT_MEMORY(ptr, len)                                 \
+   DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
+ #define DOPRNT_REPS(c, n)                               \
+   DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
+ #define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
+ #define DOPRNT_REPS_MAYBE(c, n) \
+   do {                          \
+     if ((n) != 0)               \
+       DOPRNT_REPS (c, n);       \
+   } while (0)
+ #define DOPRNT_MEMORY_MAYBE(ptr, len)   \
+   do {                                  \
+     if ((len) != 0)                     \
+       DOPRNT_MEMORY (ptr, len);         \
+   } while (0)
+ __GMP_DECLSPEC int __gmp_doprnt _PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
+ __GMP_DECLSPEC int __gmp_doprnt_integer _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
+ __GMP_DECLSPEC int __gmp_doprnt_mpf _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, mpf_srcptr));
+ /* why no __GMP_DECLSPEC here??? */
+ int __gmp_replacement_vsnprintf _PROTO ((char *, size_t, const char *, va_list));
+ #endif /* _GMP_H_HAVE_VA_LIST */
+ typedef int (*gmp_doscan_scan_t)  _PROTO ((void *, const char *, ...));
+ typedef void *(*gmp_doscan_step_t) _PROTO ((void *, int));
+ typedef int (*gmp_doscan_get_t)   _PROTO ((void *));
+ typedef int (*gmp_doscan_unget_t) _PROTO ((int, void *));
+ struct gmp_doscan_funs_t {
+   gmp_doscan_scan_t   scan;
+   gmp_doscan_step_t   step;
+   gmp_doscan_get_t    get;
+   gmp_doscan_unget_t  unget;
+ };
+ extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
+ extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
+ #if _GMP_H_HAVE_VA_LIST
+ int __gmp_doscan _PROTO ((const struct gmp_doscan_funs_t *, void *,
+                           const char *, va_list));
+ #endif
  /* For testing and debugging.  */
- #define MPZ_CHECK_FORMAT(z)                                             \
+ #define MPZ_CHECK_FORMAT(z)                                     \
-   (ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0),            \
+   do {                                                          \
-    ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)))
+     ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);   \
- #define MPZ_PROVOKE_REALLOC(z)                                          \
+     ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));                       \
+     ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));                       \
+   } while (0)
+ #define MPQ_CHECK_FORMAT(q)                             \
+   do {                                                  \
+     MPZ_CHECK_FORMAT (mpq_numref (q));                  \
+     MPZ_CHECK_FORMAT (mpq_denref (q));                  \
+     ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);            \
+                                                         \
+     if (SIZ(mpq_numref(q)) == 0)                        \
+       {                                                 \
+         /* should have zero as 0/1 */                   \
+         ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1          \
+                        && PTR(mpq_denref(q))[0] == 1);  \
+       }                                                 \
+     else                                                \
+       {                                                 \
+         /* should have no common factors */             \
+         mpz_t  g;                                       \
+         mpz_init (g);                                   \
+         mpz_gcd (g, mpq_numref(q), mpq_denref(q));      \
+         ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);         \
+         mpz_clear (g);                                  \
+       }                                                 \
+   } while (0)
+ #define MPF_CHECK_FORMAT(f)                             \
+   do {                                                  \
+     ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
+     ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);              \
+     if (SIZ(f) == 0)                                    \
+       ASSERT_ALWAYS (EXP(f) == 0);                      \
+     if (SIZ(f) != 0)                                    \
+       ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);        \
+   } while (0)
+ #define MPZ_PROVOKE_REALLOC(z)                                  \
    do { ALLOC(z) = ABSIZ(z); } while (0)
-Line 1019  extern mp_size_t  mul_threshold[];
+Line 2989  extern mp_size_t  mul_threshold[];
 Line 1019  extern mp_size_t  mul_threshold[];
 Line 2989  extern mp_size_t  mul_threshold[];
  extern mp_size_t  fft_modf_mul_threshold;
  extern mp_size_t  sqr_threshold[];
  extern mp_size_t  fft_modf_sqr_threshold;
- extern mp_size_t  bz_threshold[];
+ extern mp_size_t  sb_preinv_threshold[];
- extern mp_size_t  fib_threshold[];
+ extern mp_size_t  dc_threshold[];
  extern mp_size_t  powm_threshold[];
  extern mp_size_t  gcd_accel_threshold[];
  extern mp_size_t  gcdext_threshold[];
+ extern mp_size_t  divrem_1_norm_threshold[];
+ extern mp_size_t  divrem_1_unnorm_threshold[];
+ extern mp_size_t  divrem_2_threshold[];
+ extern mp_size_t  mod_1_norm_threshold[];
+ extern mp_size_t  mod_1_unnorm_threshold[];
+ extern mp_size_t  get_str_basecase_threshold[];
+ extern mp_size_t  get_str_precompute_threshold[];
- #undef KARATSUBA_MUL_THRESHOLD
+ #undef MUL_KARATSUBA_THRESHOLD
- #undef TOOM3_MUL_THRESHOLD
+ #undef MUL_TOOM3_THRESHOLD
- #undef FFT_MUL_TABLE
+ #undef MUL_FFT_TABLE
- #undef FFT_MUL_THRESHOLD
+ #undef MUL_FFT_THRESHOLD
- #undef FFT_MODF_MUL_THRESHOLD
+ #undef MUL_FFT_MODF_THRESHOLD
- #undef KARATSUBA_SQR_THRESHOLD
+ #undef SQR_BASECASE_THRESHOLD
- #undef TOOM3_SQR_THRESHOLD
+ #undef SQR_KARATSUBA_THRESHOLD
- #undef FFT_SQR_TABLE
+ #undef SQR_TOOM3_THRESHOLD
- #undef FFT_SQR_THRESHOLD
+ #undef SQR_FFT_TABLE
- #undef FFT_MODF_SQR_THRESHOLD
+ #undef SQR_FFT_THRESHOLD
- #undef BZ_THRESHOLD
+ #undef SQR_FFT_MODF_THRESHOLD
- #undef FIB_THRESHOLD
+ #undef DIV_DC_THRESHOLD
  #undef POWM_THRESHOLD
  #undef GCD_ACCEL_THRESHOLD
  #undef GCDEXT_THRESHOLD
+ #undef DIVREM_1_NORM_THRESHOLD
+ #undef DIVREM_1_UNNORM_THRESHOLD
+ #undef MOD_1_NORM_THRESHOLD
+ #undef MOD_1_UNNORM_THRESHOLD
+ #undef GET_STR_DC_THRESHOLD
+ #undef GET_STR_PRECOMPUTE_THRESHOLD
- #define KARATSUBA_MUL_THRESHOLD  mul_threshold[0]
+ #define MUL_KARATSUBA_THRESHOLD   mul_threshold[0]
- #define TOOM3_MUL_THRESHOLD      mul_threshold[1]
+ #define MUL_TOOM3_THRESHOLD       mul_threshold[1]
- #define FFT_MUL_TABLE            0
+ #define MUL_FFT_TABLE             { 0 }
- #define FFT_MUL_THRESHOLD        mul_threshold[2]
+ #define MUL_FFT_THRESHOLD         mul_threshold[2]
- #define FFT_MODF_MUL_THRESHOLD   fft_modf_mul_threshold
+ #define MUL_FFT_MODF_THRESHOLD    fft_modf_mul_threshold
- #define KARATSUBA_SQR_THRESHOLD  sqr_threshold[0]
+ #define SQR_BASECASE_THRESHOLD    sqr_threshold[0]
- #define TOOM3_SQR_THRESHOLD      sqr_threshold[1]
+ #define SQR_KARATSUBA_THRESHOLD   sqr_threshold[1]
- #define FFT_SQR_TABLE            0
+ #define SQR_TOOM3_THRESHOLD       sqr_threshold[2]
- #define FFT_SQR_THRESHOLD        sqr_threshold[2]
+ #define SQR_FFT_TABLE             { 0 }
- #define FFT_MODF_SQR_THRESHOLD   fft_modf_sqr_threshold
+ #define SQR_FFT_THRESHOLD         sqr_threshold[3]
- #define BZ_THRESHOLD             bz_threshold[0]
+ #define SQR_FFT_MODF_THRESHOLD    fft_modf_sqr_threshold
- #define FIB_THRESHOLD            fib_threshold[0]
+ #define DIV_DC_THRESHOLD              dc_threshold[0]
- #define POWM_THRESHOLD           powm_threshold[0]
+ #define POWM_THRESHOLD            powm_threshold[0]
- #define GCD_ACCEL_THRESHOLD      gcd_accel_threshold[0]
+ #define GCD_ACCEL_THRESHOLD       gcd_accel_threshold[0]
- #define GCDEXT_THRESHOLD         gcdext_threshold[0]
+ #define GCDEXT_THRESHOLD          gcdext_threshold[0]
+ #define DIVREM_1_NORM_THRESHOLD   divrem_1_norm_threshold[0]
+ #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold[0]
+ #define MOD_1_NORM_THRESHOLD      mod_1_norm_threshold[0]
+ #define MOD_1_UNNORM_THRESHOLD    mod_1_unnorm_threshold[0]
+ #define GET_STR_DC_THRESHOLD   get_str_basecase_threshold[0]
+ #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold[0]
- #define TOOM3_MUL_THRESHOLD_LIMIT  700
+ #if ! UDIV_PREINV_ALWAYS
+ #undef DIV_SB_PREINV_THRESHOLD
+ #undef DIVREM_2_THRESHOLD
+ #define DIV_SB_PREINV_THRESHOLD       sb_preinv_threshold[0]
+ #define DIVREM_2_THRESHOLD        divrem_2_threshold[0]
+ #endif
+ /* Sizes the tune program tests up to, used in a couple of recompilations. */
+ #define SQR_KARATSUBA_MAX_GENERIC  200
+ #define MUL_TOOM3_THRESHOLD_LIMIT  700
+ #define GET_STR_THRESHOLD_LIMIT    500
  #undef  FFT_TABLE_ATTRS
  #define FFT_TABLE_ATTRS
  extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
+ #if TUNE_PROGRAM_BUILD_SQR
+ #undef SQR_KARATSUBA_THRESHOLD
+ #define SQR_KARATSUBA_THRESHOLD  SQR_KARATSUBA_MAX_GENERIC
+ #endif
  #endif /* TUNE_PROGRAM_BUILD */
  #if defined (__cplusplus)
  }
  #endif
+ #ifdef __cplusplus
+ /* A little helper for a null-terminated __gmp_allocate_func string.
+    The destructor ensures it's freed even if an exception is thrown.  */
+ class gmp_allocated_string {
+  public:
+   char *str;
+   gmp_allocated_string(char *arg) { str = arg; }
+   ~gmp_allocated_string() { (*__gmp_free_func) (str, strlen(str)+1); }
+ };
+ int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
+ void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
+ void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
+ std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
+ extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
+ #endif /* __cplusplus */
+ #endif /* __GMP_IMPL_H__ */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>