[BACK]Return to common.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tune

Diff for /OpenXM_contrib/gmp/tune/Attic/common.c between version 1.1 and 1.1.1.2

version 1.1, 2000/09/09 14:13:19 version 1.1.1.2, 2003/08/25 16:06:37
Line 1 
Line 1 
 /* Shared speed subroutines.  */  /* Shared speed subroutines.
   
 /*  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 Copyright (C) 1999, 2000 Free Software Foundation, Inc.  
   
 This file is part of the GNU MP Library.  This file is part of the GNU MP Library.
   
Line 18  License for more details.
Line 17  License for more details.
 You should have received a copy of the GNU Lesser General Public License  You should have received a copy of the GNU Lesser General Public License
 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 MA 02111-1307, USA.  MA 02111-1307, USA. */
 */  
   
 #include <errno.h>  #include <errno.h>
 #include <fcntl.h>  #include <fcntl.h>
Line 36  MA 02111-1307, USA.
Line 34  MA 02111-1307, USA.
 #include "gmp-impl.h"  #include "gmp-impl.h"
 #include "longlong.h"  #include "longlong.h"
   
   #include "tests.h"
 #include "speed.h"  #include "speed.h"
   
 /* Change this to "#define TRACE(x) x" to get traces. */  
 #define TRACE(x)  
   
   int   speed_option_addrs = 0;
   int   speed_option_verbose = 0;
   
 typedef int (*qsort_function_t) _PROTO ((const void *, const void *));  
   
   /* Provide __clz_tab even if it's not required, for the benefit of new code
      being tested with many.pl. */
   #ifndef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
   #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
   #include "mp_clz_tab.c"
   #undef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
   #endif
   
 int   speed_option_addrs = 0;  
   
   
 void  void
 pentium_wbinvd(void)  pentium_wbinvd(void)
 {  {
Line 90  pentium_wbinvd(void)
Line 93  pentium_wbinvd(void)
 #endif  #endif
 }  }
   
 static int  
   int
 double_cmp_ptr (const double *p, const double *q)  double_cmp_ptr (const double *p, const double *q)
 {  {
   if (*p > *q)  return 1;    if (*p > *q)  return 1;
Line 125  speed_measure (double (*fun) _PROTO ((struct speed_par
Line 129  speed_measure (double (*fun) _PROTO ((struct speed_par
   int     i, j, e;    int     i, j, e;
   double  t[30];    double  t[30];
   double  t_unsorted[30];    double  t_unsorted[30];
     double  reps_d;
   
   /* Use dummy parameters if caller doesn't provide any.  Only a few special    /* Use dummy parameters if caller doesn't provide any.  Only a few special
      "fun"s will cope with this, speed_noop() is one.  */       "fun"s will cope with this, speed_noop() is one.  */
Line 144  speed_measure (double (*fun) _PROTO ((struct speed_par
Line 149  speed_measure (double (*fun) _PROTO ((struct speed_par
           s->dst_num = 0;            s->dst_num = 0;
   
           t[i] = (*fun) (s);            t[i] = (*fun) (s);
           t_unsorted[i] = t[i];  
   
           TRACE (printf("size=%ld reps=%u r=%d attempt=%d  %.9f\n",            if (speed_option_verbose >= 3)
                         s->size, s->reps, s->r, i, t[i]));              printf("size=%ld reps=%u r=%ld attempt=%d  %.9f\n",
                      s->size, s->reps, s->r, i, t[i]);
   
           if (t[i] == -1.0)            if (t[i] == -1.0)
             return -1.0;              return -1.0;
Line 156  speed_measure (double (*fun) _PROTO ((struct speed_par
Line 161  speed_measure (double (*fun) _PROTO ((struct speed_par
             break;              break;
   
           /* go to a value of reps to make t[i] >= precision */            /* go to a value of reps to make t[i] >= precision */
           s->reps = (unsigned) ceil (1.1 * s->reps            reps_d = ceil (1.1 * s->reps
                                      * speed_unittime * speed_precision                           * speed_unittime * speed_precision
                                      / MAX (t[i], speed_unittime));                           / MAX (t[i], speed_unittime));
             if (reps_d > 2e9 || reps_d < 1.0)
               {
                 fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d);
                 fprintf (stderr, "  (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n",
                          s->reps, speed_unittime, speed_precision, t[i]);
                 abort ();
               }
             s->reps = (unsigned) reps_d;
         }          }
       t[i] /= s->reps;        t[i] /= s->reps;
         t_unsorted[i] = t[i];
   
       if (speed_precision == 0)        if (speed_precision == 0)
         return t[i];          return t[i];
Line 184  speed_measure (double (*fun) _PROTO ((struct speed_par
Line 198  speed_measure (double (*fun) _PROTO ((struct speed_par
   
   fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n",    fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n",
            e, (TOLERANCE-1.0)*100.0);             e, (TOLERANCE-1.0)*100.0);
   fprintf (stderr, "  %.12f is about 0.5%%\n", t[0]*(TOLERANCE-1.0));    fprintf (stderr, "    unsorted         sorted\n");
     fprintf (stderr, "  %.12f    %.12f    is about 0.5%%\n",
              t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0));
   for (i = 0; i < numberof (t); i++)    for (i = 0; i < numberof (t); i++)
     fprintf (stderr, "  %.09f\n", t_unsorted[i]);      fprintf (stderr, "  %.09f       %.09f\n", t_unsorted[i], t[i]);
   
   return -1.0;    return -1.0;
 }  }
Line 317  speed_cache_fill (struct speed_params *s)
Line 333  speed_cache_fill (struct speed_params *s)
 }  }
   
   
 /* Return p advanced to the next multiple of "align" bytes.  "align" must be  
    a power of 2.  Care is taken not to assume sizeof(int)==sizeof(pointer).  
    Using "unsigned long" avoids a warning on hpux.  */  
 void *  
 align_pointer (void *p, size_t align)  
 {  
   unsigned long  d;  
   d = ((unsigned long) p) & (align-1);  
   d = (d != 0 ? align-d : 0);  
   return (void *) (((char *) p) + d);  
 }  
   
 /* Note that memory allocated with this function can never be freed, because  
    the start address of the block allocated is discarded. */  
 void *  
 _mp_allocate_func_aligned (size_t bytes, size_t align)  
 {  
   return align_pointer ((*_mp_allocate_func) (bytes + align-1), align);  
 }  
   
   
 void *  
 _mp_allocate_or_reallocate (void *ptr, size_t oldsize, size_t newsize)  
 {  
   if (ptr == NULL)  
     return (*_mp_allocate_func) (newsize);  
   else  
     return (*_mp_reallocate_func) (ptr, oldsize, newsize);  
 }  
   
   
 /* Adjust ptr to align to CACHE_LINE_SIZE bytes plus "align" limbs.  ptr  /* Adjust ptr to align to CACHE_LINE_SIZE bytes plus "align" limbs.  ptr
    needs to have room for up to CACHE_LINE_SIZE-4 extra bytes.  */     needs to have room for up to CACHE_LINE_SIZE-4 extra bytes.  */
   
Line 368  speed_tmp_alloc_adjust (void *ptr, mp_size_t align)
Line 353  speed_tmp_alloc_adjust (void *ptr, mp_size_t align)
 }  }
   
   
 void  
 mpz_set_n (mpz_ptr z, mp_srcptr p, mp_size_t size)  
 {  
   ASSERT (size >= 0);  
   MPN_NORMALIZE (p, size);  
   MPZ_REALLOC (z, size);  
   MPN_COPY (PTR(z), p, size);  
   SIZ(z) = size;  
 }  
   
   
 /* Miscellanous options accepted by tune and speed programs under -o. */  /* Miscellanous options accepted by tune and speed programs under -o. */
   
 void  void
 speed_option_set (const char *s)  speed_option_set (const char *s)
 {  {
   if (strcmp (s, "addrs") == 0)  speed_option_addrs = 1;    int  n;
   
     if (strcmp (s, "addrs") == 0)
       {
         speed_option_addrs = 1;
       }
     else if (strcmp (s, "verbose") == 0)
       {
         speed_option_verbose++;
       }
     else if (sscanf (s, "verbose=%d", &n) == 1)
       {
         speed_option_verbose = n;
       }
   else    else
     {      {
       printf ("Unrecognised -o option: %s\n", s);        printf ("Unrecognised -o option: %s\n", s);
Line 439  speed_option_set (const char *s)
Line 426  speed_option_set (const char *s)
 double  double
 speed_MPN_COPY (struct speed_params *s)  speed_MPN_COPY (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_COPY_CALL (MPN_COPY (wp, s->xp, s->size));    SPEED_ROUTINE_MPN_COPY (MPN_COPY);
 }  }
 double  double
 speed_MPN_COPY_INCR (struct speed_params *s)  speed_MPN_COPY_INCR (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_COPY_CALL (MPN_COPY_INCR (wp, s->xp, s->size));    SPEED_ROUTINE_MPN_COPY (MPN_COPY_INCR);
 }  }
 double  double
 speed_MPN_COPY_DECR (struct speed_params *s)  speed_MPN_COPY_DECR (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_COPY_CALL (MPN_COPY_DECR (wp, s->xp, s->size));    SPEED_ROUTINE_MPN_COPY (MPN_COPY_DECR);
 }  }
   #if HAVE_NATIVE_mpn_copyi
 double  double
   speed_mpn_copyi (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_COPY (mpn_copyi);
   }
   #endif
   #if HAVE_NATIVE_mpn_copyd
   double
   speed_mpn_copyd (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_COPY (mpn_copyd);
   }
   #endif
   double
 speed_memcpy (struct speed_params *s)  speed_memcpy (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_COPY_CALL    SPEED_ROUTINE_MPN_COPY_BYTES (memcpy);
     (memcpy (wp, s->xp, s->size * BYTES_PER_MP_LIMB));  
 }  }
   double
   speed_mpn_com_n (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_COPY (mpn_com_n);
   }
   
   
 double  double
Line 476  speed_mpn_mul_1 (struct speed_params *s)
Line 481  speed_mpn_mul_1 (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_UNARY_1 (mpn_mul_1);    SPEED_ROUTINE_MPN_UNARY_1 (mpn_mul_1);
 }  }
   double
   speed_mpn_mul_1_inplace (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_mul_1);
   }
   
   #if HAVE_NATIVE_mpn_mul_2
   double
   speed_mpn_mul_2 (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_MUL_2 (mpn_mul_2);
   }
   #endif
   
   
 double  double
 speed_mpn_lshift (struct speed_params *s)  speed_mpn_lshift (struct speed_params *s)
 {  {
Line 517  speed_mpn_divrem_1cf (struct speed_params *s)
Line 535  speed_mpn_divrem_1cf (struct speed_params *s)
 #endif  #endif
   
 double  double
   speed_mpn_divrem_1_div (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_div);
   }
   double
   speed_mpn_divrem_1f_div (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_div);
   }
   double
   speed_mpn_divrem_1_inv (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_inv);
   }
   double
   speed_mpn_divrem_1f_inv (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_inv);
   }
   double
   speed_mpn_mod_1_div (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_div);
   }
   double
   speed_mpn_mod_1_inv (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_inv);
   }
   
   double
   speed_mpn_preinv_divrem_1 (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_PREINV_DIVREM_1 (mpn_preinv_divrem_1);
   }
   double
   speed_mpn_preinv_divrem_1f (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_PREINV_DIVREM_1F (mpn_preinv_divrem_1);
   }
   
   double
   speed_mpn_mod_34lsub1 (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_MOD_34LSUB1 (mpn_mod_34lsub1);
   }
   
   double
 speed_mpn_divrem_2 (struct speed_params *s)  speed_mpn_divrem_2 (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2);    SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2);
 }  }
   double
   speed_mpn_divrem_2_div (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_div);
   }
   double
   speed_mpn_divrem_2_inv (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_inv);
   }
   
 double  double
 speed_mpn_mod_1 (struct speed_params *s)  speed_mpn_mod_1 (struct speed_params *s)
Line 534  speed_mpn_mod_1c (struct speed_params *s)
Line 610  speed_mpn_mod_1c (struct speed_params *s)
   SPEED_ROUTINE_MPN_MOD_1C (mpn_mod_1c);    SPEED_ROUTINE_MPN_MOD_1C (mpn_mod_1c);
 }  }
 #endif  #endif
   double
   speed_mpn_preinv_mod_1 (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1);
   }
   
 double  double
   speed_mpn_divexact_1 (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DIVEXACT_1 (mpn_divexact_1);
   }
   
   double
 speed_mpn_divexact_by3 (struct speed_params *s)  speed_mpn_divexact_by3 (struct speed_params *s)
 {  {
   /* mpn_divexact_by3 is a macro, so the _CALL form is necessary */    SPEED_ROUTINE_MPN_COPY (mpn_divexact_by3);
   SPEED_ROUTINE_MPN_COPY_CALL(mpn_divexact_by3 (wp, s->xp, s->size));  
 }  }
   
   #if HAVE_NATIVE_mpn_modexact_1_odd
   double
   speed_mpn_modexact_1_odd (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_MODEXACT_1_ODD (mpn_modexact_1_odd);
   }
   #endif
   
 double  double
 speed_mpn_bz_divrem_n (struct speed_params *s)  speed_mpn_modexact_1c_odd (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_BZ_DIVREM_N (mpn_bz_divrem_n);    SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd);
 }  }
   
   
 double  double
 speed_mpn_bz_divrem_sb (struct speed_params *s)  speed_mpn_dc_tdiv_qr (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_BZ_DIVREM_SB (mpn_sb_divrem_mn);    SPEED_ROUTINE_MPN_DC_TDIV_QR (mpn_tdiv_qr);
 }  }
 double  double
 speed_mpn_bz_tdiv_qr (struct speed_params *s)  speed_mpn_dc_divrem_n (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_BZ_TDIV_QR (mpn_tdiv_qr);    SPEED_ROUTINE_MPN_DC_DIVREM_N (mpn_dc_divrem_n);
 }  }
   double
   speed_mpn_dc_divrem_sb (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn);
   }
   double
   speed_mpn_dc_divrem_sb_div (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_div);
   }
   double
   speed_mpn_dc_divrem_sb_inv (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_inv);
   }
   
   double
   speed_mpn_sb_divrem_m3 (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn);
   }
   double
   speed_mpn_sb_divrem_m3_div (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_div);
   }
   double
   speed_mpn_sb_divrem_m3_inv (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_inv);
   }
   
 double  double
   speed_mpz_mod (struct speed_params *s)
   {
     SPEED_ROUTINE_MPZ_MOD (mpz_mod);
   }
   double
   speed_redc (struct speed_params *s)
   {
     SPEED_ROUTINE_REDC (redc);
   }
   
   
   double
 speed_mpn_popcount (struct speed_params *s)  speed_mpn_popcount (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_POPCOUNT (mpn_popcount);    SPEED_ROUTINE_MPN_POPCOUNT (mpn_popcount);
Line 582  speed_mpn_sub_n (struct speed_params *s)
Line 719  speed_mpn_sub_n (struct speed_params *s)
 {  {
 SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);  SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);
 }  }
 double  
 speed_mpn_add_n_self (struct speed_params *s)  
 {  
   SPEED_ROUTINE_MPN_BINARY_N_SELF (mpn_add_n);  
 }  
 double  
 speed_mpn_add_n_inplace (struct speed_params *s)  
 {  
   SPEED_ROUTINE_MPN_BINARY_N_INPLACE (mpn_add_n);  
 }  
   
   
 /* mpn_and_n etc can be macros and so have to be handled with  /* mpn_and_n etc can be macros and so have to be handled with
Line 666  speed_mpn_sqr_basecase (struct speed_params *s)
Line 793  speed_mpn_sqr_basecase (struct speed_params *s)
   SPEED_ROUTINE_MPN_SQR (mpn_sqr_basecase);    SPEED_ROUTINE_MPN_SQR (mpn_sqr_basecase);
 }  }
   
   #if HAVE_NATIVE_mpn_sqr_diagonal
 double  double
   speed_mpn_sqr_diagonal (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_SQR (mpn_sqr_diagonal);
   }
   #endif
   
   double
 speed_mpn_kara_mul_n (struct speed_params *s)  speed_mpn_kara_mul_n (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_KARA_MUL_N (mpn_kara_mul_n);    SPEED_ROUTINE_MPN_KARA_MUL_N (mpn_kara_mul_n);
Line 689  speed_mpn_toom3_sqr_n (struct speed_params *s)
Line 824  speed_mpn_toom3_sqr_n (struct speed_params *s)
 }  }
   
 double  double
   speed_mpn_toom3_mul_n_mpn (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_TOOM3_MUL_N (mpn_toom3_mul_n_mpn);
   }
   double
   speed_mpn_toom3_mul_n_open (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_TOOM3_MUL_N (mpn_toom3_mul_n_open);
   }
   double
   speed_mpn_toom3_sqr_n_mpn (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_TOOM3_SQR_N (mpn_toom3_sqr_n_mpn);
   }
   double
   speed_mpn_toom3_sqr_n_open (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_TOOM3_SQR_N (mpn_toom3_sqr_n_open);
   }
   
   double
 speed_mpn_mul_fft_full (struct speed_params *s)  speed_mpn_mul_fft_full (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_MUL_N_CALL    SPEED_ROUTINE_MPN_MUL_N_CALL
Line 764  speed_mpn_gcd (struct speed_params *s)
Line 920  speed_mpn_gcd (struct speed_params *s)
   SPEED_ROUTINE_MPN_GCD (mpn_gcd);    SPEED_ROUTINE_MPN_GCD (mpn_gcd);
 }  }
 double  double
   speed_mpn_gcd_binary (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_GCD (mpn_gcd_binary);
   }
   
   #if HAVE_NATIVE_mpn_gcd_finda
   double
   speed_mpn_gcd_finda (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_GCD_FINDA (mpn_gcd_finda);
   }
   #endif
   
   
   double
 speed_mpn_gcdext (struct speed_params *s)  speed_mpn_gcdext (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext);    SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext);
 }  }
 double  double
   speed_mpn_gcdext_single (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_single);
   }
   double
   speed_mpn_gcdext_double (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_double);
   }
   double
   speed_mpn_gcdext_one_single (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_single);
   }
   double
   speed_mpn_gcdext_one_double (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_double);
   }
   double
 speed_mpn_gcd_1 (struct speed_params *s)  speed_mpn_gcd_1 (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1);    SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1);
 }  }
   double
   speed_mpn_gcd_1N (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1);
   }
   
   
 double  double
   speed_mpz_jacobi (struct speed_params *s)
   {
     SPEED_ROUTINE_MPZ_JACOBI (mpz_jacobi);
   }
   double
 speed_mpn_jacobi_base (struct speed_params *s)  speed_mpn_jacobi_base (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base);    SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base);
 }  }
   double
   speed_mpn_jacobi_base_1 (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_1);
   }
   double
   speed_mpn_jacobi_base_2 (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_2);
   }
   double
   speed_mpn_jacobi_base_3 (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3);
   }
   
   
 double  double
   speed_mpn_sqrtrem (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_SQRTREM (mpn_sqrtrem);
   }
   
   
   double
 speed_mpz_fac_ui (struct speed_params *s)  speed_mpz_fac_ui (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPZ_UI (mpz_fac_ui);    SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui);
 }  }
   
   
 double  double
   speed_mpn_fib2_ui (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_FIB2_UI (mpn_fib2_ui);
   }
   double
 speed_mpz_fib_ui (struct speed_params *s)  speed_mpz_fib_ui (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPZ_UI (mpz_fib_ui);    SPEED_ROUTINE_MPZ_FIB_UI (mpz_fib_ui);
 }  }
   double
   speed_mpz_fib2_ui (struct speed_params *s)
   {
     SPEED_ROUTINE_MPZ_FIB2_UI (mpz_fib2_ui);
   }
   double
   speed_mpz_lucnum_ui (struct speed_params *s)
   {
     SPEED_ROUTINE_MPZ_LUCNUM_UI (mpz_lucnum_ui);
   }
   double
   speed_mpz_lucnum2_ui (struct speed_params *s)
   {
     SPEED_ROUTINE_MPZ_LUCNUM2_UI (mpz_lucnum2_ui);
   }
   
   
 double  double
Line 799  speed_mpz_powm (struct speed_params *s)
Line 1044  speed_mpz_powm (struct speed_params *s)
 {  {
   SPEED_ROUTINE_MPZ_POWM (mpz_powm);    SPEED_ROUTINE_MPZ_POWM (mpz_powm);
 }  }
   double
   speed_mpz_powm_mod (struct speed_params *s)
   {
     SPEED_ROUTINE_MPZ_POWM (mpz_powm_mod);
   }
   double
   speed_mpz_powm_redc (struct speed_params *s)
   {
     SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc);
   }
   double
   speed_mpz_powm_ui (struct speed_params *s)
   {
     SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui);
   }
   
   
 double  double
Line 883  speed_noop_wxys (struct speed_params *s)
Line 1143  speed_noop_wxys (struct speed_params *s)
   
   
 /* Compare these to see how much malloc/free costs and then how much  /* Compare these to see how much malloc/free costs and then how much
    _mp_default_allocate/free and mpz_init/clear add.  mpz_init/clear or     __gmp_default_allocate/free and mpz_init/clear add.  mpz_init/clear or
    mpq_init/clear will be doing a 1 limb allocate, so use that as the size     mpq_init/clear will be doing a 1 limb allocate, so use that as the size
    when including them in comparisons.  */     when including them in comparisons.  */
   
Line 907  speed_malloc_realloc_free (struct speed_params *s)
Line 1167  speed_malloc_realloc_free (struct speed_params *s)
 }  }
   
 double  double
 speed_mp_allocate_free (struct speed_params *s)  speed_gmp_allocate_free (struct speed_params *s)
 {  {
   size_t  bytes = s->size * BYTES_PER_MP_LIMB;    size_t  bytes = s->size * BYTES_PER_MP_LIMB;
   SPEED_ROUTINE_ALLOC_FREE (void *p,    SPEED_ROUTINE_ALLOC_FREE (void *p,
                             p = (*_mp_allocate_func) (bytes);                              p = (*__gmp_allocate_func) (bytes);
                             (*_mp_free_func) (p, bytes));                              (*__gmp_free_func) (p, bytes));
 }  }
   
 double  double
 speed_mp_allocate_reallocate_free (struct speed_params *s)  speed_gmp_allocate_reallocate_free (struct speed_params *s)
 {  {
   size_t  bytes = s->size * BYTES_PER_MP_LIMB;    size_t  bytes = s->size * BYTES_PER_MP_LIMB;
   SPEED_ROUTINE_ALLOC_FREE    SPEED_ROUTINE_ALLOC_FREE
     (void *p,      (void *p,
      p = (*_mp_allocate_func) (BYTES_PER_MP_LIMB);       p = (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
      p = (*_mp_reallocate_func) (p, bytes, BYTES_PER_MP_LIMB);       p = (*__gmp_reallocate_func) (p, bytes, BYTES_PER_MP_LIMB);
      (*_mp_free_func) (p, bytes));       (*__gmp_free_func) (p, bytes));
 }  }
   
 double  double
Line 1058  speed_mpz_bin_uiui (struct speed_params *s)
Line 1318  speed_mpz_bin_uiui (struct speed_params *s)
     h = s->xp[0];               \      h = s->xp[0];               \
     l = s->yp[0];               \      l = s->yp[0];               \
                                 \                                  \
     switch (s->r) {             \      if (s->r == 1)              \
     case 1:                     \        {                         \
       speed_starttime ();       \          speed_starttime ();     \
       i = s->reps;              \          i = s->reps;            \
       do                        \          do                      \
         {            {
   
 #define SPEED_MACRO_UMUL_PPMM_B \  #define SPEED_MACRO_UMUL_PPMM_B \
         }                       \            }                     \
       while (--i != 0);         \          while (--i != 0);       \
       t = speed_endtime ();     \          t = speed_endtime ();   \
       break;                    \        }                         \
                                 \      else                        \
     default:                    \        {                         \
       speed_starttime ();       \          speed_starttime ();     \
       i = s->reps;              \          i = s->reps;            \
       do                        \          do                      \
         {            {
   
 #define SPEED_MACRO_UMUL_PPMM_C                                         \  #define SPEED_MACRO_UMUL_PPMM_C                                         \
         }                                                               \            }                                                             \
       while (--i != 0);                                                 \          while (--i != 0);                                               \
       t = speed_endtime ();                                             \          t = speed_endtime ();                                           \
       break;                                                            \        }                                                                 \
     }                                                                   \  
                                                                         \                                                                          \
     /* stop the compiler optimizing away the whole calculation! */      \      /* stop the compiler optimizing away the whole calculation! */      \
     noop_1 (h);                                                         \      noop_1 (h);                                                         \
Line 1202  speed_mpn_umul_ppmm (struct speed_params *s)
Line 1461  speed_mpn_umul_ppmm (struct speed_params *s)
     /* divisor from "r" parameter, or a default */      \      /* divisor from "r" parameter, or a default */      \
     d = s->r;                                           \      d = s->r;                                           \
     if (d == 0)                                         \      if (d == 0)                                         \
       d = 0x12345678;                                   \        d = __mp_bases[10].big_base;                      \
                                                         \                                                          \
     if (normalize)                                      \      if (normalize)                                      \
       {                                                 \        {                                                 \
Line 1289  speed_udiv_qrnnd_preinv2norm (struct speed_params *s)
Line 1548  speed_udiv_qrnnd_preinv2norm (struct speed_params *s)
   SPEED_ROUTINE_UDIV_QRNND_B;    SPEED_ROUTINE_UDIV_QRNND_B;
 }  }
   
   double
   speed_udiv_qrnnd_c (struct speed_params *s)
   {
     SPEED_ROUTINE_UDIV_QRNND_A (1);
     {
       __udiv_qrnnd_c (q, r, r, q, d);
        __udiv_qrnnd_c (q, r, r, q, d);
        __udiv_qrnnd_c (q, r, r, q, d);
       __udiv_qrnnd_c (q, r, r, q, d);
        __udiv_qrnnd_c (q, r, r, q, d);
        __udiv_qrnnd_c (q, r, r, q, d);
       __udiv_qrnnd_c (q, r, r, q, d);
        __udiv_qrnnd_c (q, r, r, q, d);
        __udiv_qrnnd_c (q, r, r, q, d);
       __udiv_qrnnd_c (q, r, r, q, d);
     }
     SPEED_ROUTINE_UDIV_QRNND_B;
   }
   
 #if HAVE_NATIVE_mpn_udiv_qrnnd  #if HAVE_NATIVE_mpn_udiv_qrnnd
   
 #if defined (__hppa) && W_TYPE_SIZE == 64  #if defined (__hppa) && W_TYPE_SIZE == 64
Line 1300  speed_udiv_qrnnd_preinv2norm (struct speed_params *s)
Line 1578  speed_udiv_qrnnd_preinv2norm (struct speed_params *s)
 double  double
 speed_mpn_udiv_qrnnd (struct speed_params *s)  speed_mpn_udiv_qrnnd (struct speed_params *s)
 {  {
   
   SPEED_ROUTINE_UDIV_QRNND_A (1);    SPEED_ROUTINE_UDIV_QRNND_A (1);
   {    {
     CALL_MPN_UDIV_QRNND;      CALL_MPN_UDIV_QRNND;
Line 1317  speed_mpn_udiv_qrnnd (struct speed_params *s)
Line 1594  speed_mpn_udiv_qrnnd (struct speed_params *s)
   SPEED_ROUTINE_UDIV_QRNND_B;    SPEED_ROUTINE_UDIV_QRNND_B;
 }  }
 #endif  #endif
   
   
   double
   speed_invert_limb (struct speed_params *s)
   {
     SPEED_ROUTINE_INVERT_LIMB_CALL (invert_limb (dinv, d));
   }
   
   
   /* xp[0] might not be particularly random, but should give an indication how
      "/" runs.  Same for speed_operator_mod below.  */
   double
   speed_operator_div (struct speed_params *s)
   {
     double     t;
     unsigned   i;
     mp_limb_t  x, q, d;
   
     s->time_divisor = 10;
   
     /* divisor from "r" parameter, or a default */
     d = s->r;
     if (d == 0)
       d = __mp_bases[10].big_base;
   
     x = s->xp[0];
     q = 0;
   
     speed_starttime ();
     i = s->reps;
     do
       {
         q ^= x; q /= d;
          q ^= x; q /= d;
          q ^= x; q /= d;
         q ^= x; q /= d;
          q ^= x; q /= d;
          q ^= x; q /= d;
         q ^= x; q /= d;
          q ^= x; q /= d;
          q ^= x; q /= d;
         q ^= x; q /= d;
       }
     while (--i != 0);
     t = speed_endtime ();
   
     /* stop the compiler optimizing away the whole calculation! */
     noop_1 (q);
   
     return t;
   }
   
   double
   speed_operator_mod (struct speed_params *s)
   {
     double     t;
     unsigned   i;
     mp_limb_t  x, r, d;
   
     s->time_divisor = 10;
   
     /* divisor from "r" parameter, or a default */
     d = s->r;
     if (d == 0)
       d = __mp_bases[10].big_base;
   
     x = s->xp[0];
     r = 0;
   
     speed_starttime ();
     i = s->reps;
     do
       {
         r ^= x; r %= d;
          r ^= x; r %= d;
          r ^= x; r %= d;
         r ^= x; r %= d;
          r ^= x; r %= d;
          r ^= x; r %= d;
         r ^= x; r %= d;
          r ^= x; r %= d;
          r ^= x; r %= d;
         r ^= x; r %= d;
       }
     while (--i != 0);
     t = speed_endtime ();
   
     /* stop the compiler optimizing away the whole calculation! */
     noop_1 (r);
   
     return t;
   }
   
   
   /* r==0 measures on data with the values uniformly distributed.  This will
      be typical for count_trailing_zeros in a GCD etc.
   
      r==1 measures on data with the resultant count uniformly distributed
      between 0 and BITS_PER_MP_LIMB-1.  This is probably sensible for
      count_leading_zeros on the high limbs of divisors.  */
   
   int
   speed_routine_count_zeros_setup (struct speed_params *s,
                                    mp_ptr xp, int leading, int zero)
   {
     int        i, c;
     mp_limb_t  n;
   
     if (s->r == 0)
       {
         /* Make uniformly distributed data.  If zero isn't allowed then change
            it to 1 for leading, or 0x800..00 for trailing.  */
         MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE);
         if (! zero)
           for (i = 0; i < SPEED_BLOCK_SIZE; i++)
             if (xp[i] == 0)
               xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT;
       }
     else if (s->r == 1)
       {
         /* Make counts uniformly distributed.  A randomly chosen bit is set, and
            for leading the rest above it are cleared, or for trailing then the
            rest below.  */
         for (i = 0; i < SPEED_BLOCK_SIZE; i++)
           {
             mp_limb_t  set = CNST_LIMB(1) << (s->yp_block[i] % BITS_PER_MP_LIMB);
             mp_limb_t  keep_below = set-1;
             mp_limb_t  keep_above = MP_LIMB_T_MAX ^ keep_below;
             mp_limb_t  keep = (leading ? keep_below : keep_above);
             xp[i] = (s->xp_block[i] & keep) | set;
           }
       }
     else
       {
         return 0;
       }
   
     /* Account for the effect of n^=c. */
     c = 0;
     for (i = 0; i < SPEED_BLOCK_SIZE; i++)
       {
         n = xp[i];
         xp[i] ^= c;
   
         if (leading)
           count_leading_zeros (c, n);
         else
           count_trailing_zeros (c, n);
       }
   
     return 1;
   }
   
   double
   speed_count_leading_zeros (struct speed_params *s)
   {
   #ifdef COUNT_LEADING_ZEROS_0
   #define COUNT_LEADING_ZEROS_0_ALLOWED   1
   #else
   #define COUNT_LEADING_ZEROS_0_ALLOWED   0
   #endif
   
     SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED);
     count_leading_zeros (c, n);
     SPEED_ROUTINE_COUNT_ZEROS_B ();
   }
   double
   speed_count_trailing_zeros (struct speed_params *s)
   {
     SPEED_ROUTINE_COUNT_ZEROS_A (0, 0);
     count_trailing_zeros (c, n);
     SPEED_ROUTINE_COUNT_ZEROS_B ();
   }
   
   
   double
   speed_mpn_get_str (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_GET_STR (mpn_get_str);
   }
   
   double
   speed_mpn_set_str (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_SET_STR (mpn_set_str);
   }
   double
   speed_mpn_set_str_basecase (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_SET_STR (mpn_set_str_basecase);
   }
   double
   speed_mpn_set_str_subquad (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_SET_STR (mpn_set_str_subquad);
   }
   
   
   double
   speed_MPN_ZERO (struct speed_params *s)
   {
     SPEED_ROUTINE_MPN_ZERO_CALL (MPN_ZERO (wp, s->size));
   }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.1.1.2

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>