OpenXM_contrib/gmp/mpn/generic/divrem_1.c - diff

Return to divrem_1.c CVS log

Up to [local] / OpenXM_contrib / gmp / mpn / generic

Diff for /OpenXM_contrib/gmp/mpn/generic/Attic/divrem_1.c between version 1.1.1.2 and 1.1.1.3

-version 1.1.1.2, 2000/09/09 14:12:24
+version 1.1.1.3, 2003/08/25 16:06:20
 Line 1
 Line 1
 Line 1
- /* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) --
+ /* mpn_divrem_1 -- mpn by limb division.
-    Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
-    Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
-    Return the single-limb remainder.
-    There are no constraints on the value of the divisor.
-    QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+ Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002 Free Software
- Copyright (C) 1991, 1993, 1994, 1996, 1998, 1999, 2000 Free Software
  Foundation, Inc.
  This file is part of the GNU MP Library.
-Line 31  MA 02111-1307, USA. */
+Line 25  MA 02111-1307, USA. */
 Line 31  MA 02111-1307, USA. */
 Line 25  MA 02111-1307, USA. */
  #include "longlong.h"
+ /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+    meaning the quotient size where that should happen, the quotient size
+    being how many udiv divisions will be done.
- /* __gmpn_divmod_1_internal(quot_ptr,dividend_ptr,dividend_size,divisor_limb)
+    The default is to use preinv always, CPUs where this doesn't suit have
-    Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+    tuned thresholds.  Note in particular that preinv should certainly be
-    Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+    used if that's the only division available (USE_PREINV_ALWAYS).  */
-    Return the single-limb remainder.
-    There are no constraints on the value of the divisor.
-    QUOT_PTR and DIVIDEND_PTR might point to the same limb. */
+ #ifndef DIVREM_1_NORM_THRESHOLD
+ #define DIVREM_1_NORM_THRESHOLD  0
- #ifndef UMUL_TIME
- #define UMUL_TIME 1
  #endif
+ #ifndef DIVREM_1_UNNORM_THRESHOLD
- #ifndef UDIV_TIME
+ #define DIVREM_1_UNNORM_THRESHOLD  0
- #define UDIV_TIME UMUL_TIME
  #endif
- static mp_limb_t
- #if __STDC__
- __gmpn_divmod_1_internal (mp_ptr quot_ptr,
-               mp_srcptr dividend_ptr, mp_size_t dividend_size,
-               mp_limb_t divisor_limb)
- #else
- __gmpn_divmod_1_internal (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
-      mp_ptr quot_ptr;
-      mp_srcptr dividend_ptr;
-      mp_size_t dividend_size;
-      mp_limb_t divisor_limb;
- #endif
- {
-   mp_size_t i;
-   mp_limb_t n1, n0, r;
-   int dummy;
-   /* ??? Should this be handled at all?  Rely on callers?  */
-   if (dividend_size == 0)
-     return 0;
-   /* If multiplication is much faster than division, and the
+ /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
-      dividend is large, pre-invert the divisor, and use
+    and UNNORM thresholds are 0 and only the inversion code is included.
-      only multiplications in the inner loop.  */
-   /* This test should be read:
+    If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
-        Does it ever help to use udiv_qrnnd_preinv?
+    will be MP_SIZE_T_MAX and only the plain division code is included.
-          && Does what we save compensate for the inversion overhead?  */
-   if (UDIV_TIME > (2 * UMUL_TIME + 6)
-       && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME)
-     {
-       int normalization_steps;
-       count_leading_zeros (normalization_steps, divisor_limb);
+    Otherwise mul-by-inverse is better than plain division above some
-       if (normalization_steps != 0)
+    threshold, and best results are obtained by having code for both present.
-         {
-           mp_limb_t divisor_limb_inverted;
-           divisor_limb <<= normalization_steps;
+    The main reason for separating the norm and unnorm cases is that not all
-           invert_limb (divisor_limb_inverted, divisor_limb);
+    CPUs give zero for "n0 >> BITS_PER_MP_LIMB" which would arise in the
+    unnorm code used on an already normalized divisor.
-           n1 = dividend_ptr[dividend_size - 1];
+    If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
-           r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+    non-shifting code for both the norm and unnorm cases, though with
+    different criteria for skipping a division, and with different thresholds
+    of course.  And in fact if inversion is never viable, then that simple
+    non-shifting division would be all that's left.
-           /* Possible optimization:
+    The NORM and UNNORM thresholds might not differ much, but if there's
-              if (r == 0
+    going to be separate code for norm and unnorm then it makes sense to have
-              && divisor_limb > ((n1 << normalization_steps)
+    separate thresholds.  One thing that's possible is that the
-                              | (dividend_ptr[dividend_size - 2] >> ...)))
+    mul-by-inverse might be better only for normalized divisors, due to that
-              ...one division less... */
+    case not needing variable bit shifts.
-           for (i = dividend_size - 2; i >= 0; i--)
+    Notice that the thresholds are tested after the decision to possibly skip
+    one divide step, so they're based on the actual number of divisions done.
+    For the unnorm case, it would be possible to call mpn_lshift to adjust
+    the dividend all in one go (into the quotient space say), rather than
+    limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
+    than what the compiler can generate for EXTRACT.  But this is left to CPU
+    specific implementations to consider, especially since EXTRACT isn't on
+    the dependent chain.  */
+ mp_limb_t
+ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
+               mp_srcptr up, mp_size_t un, mp_limb_t d)
+ {
+   mp_size_t  n;
+   mp_size_t  i;
+   mp_limb_t  n1, n0;
+   mp_limb_t  r = 0;
+   ASSERT (qxn >= 0);
+   ASSERT (un >= 0);
+   ASSERT (d != 0);
+   /* FIXME: What's the correct overlap rule when qxn!=0? */
+   ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
+   n = un + qxn;
+   if (n == 0)
+     return 0;
+   d <<= GMP_NAIL_BITS;
+   qp += (n - 1);   /* Make qp point at most significant quotient limb */
+   if ((d & GMP_LIMB_HIGHBIT) != 0)
+     {
+       if (un != 0)
+         {
+           /* High quotient limb is 0 or 1, skip a divide step. */
+           mp_limb_t q;
+           r = up[un - 1] << GMP_NAIL_BITS;
+           q = (r >= d);
+           *qp-- = q;
+           r -= (d & -q);
+           r >>= GMP_NAIL_BITS;
+           n--;
+           un--;
+         }
+       if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
+         {
+         plain:
+           for (i = un - 1; i >= 0; i--)
              {
-               n0 = dividend_ptr[i];
+               n0 = up[i] << GMP_NAIL_BITS;
-               udiv_qrnnd_preinv (quot_ptr[i + 1], r, r,
+               udiv_qrnnd (*qp, r, r, n0, d);
-                                  ((n1 << normalization_steps)
+               r >>= GMP_NAIL_BITS;
-                                   | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+               qp--;
-                                  divisor_limb, divisor_limb_inverted);
-               n1 = n0;
              }
-           udiv_qrnnd_preinv (quot_ptr[0], r, r,
+           for (i = qxn - 1; i >= 0; i--)
-                              n1 << normalization_steps,
+             {
-                              divisor_limb, divisor_limb_inverted);
+               udiv_qrnnd (*qp, r, r, 0, d);
-           return r >> normalization_steps;
+               r >>= GMP_NAIL_BITS;
+               qp--;
+             }
+           return r;
          }
        else
          {
-           mp_limb_t divisor_limb_inverted;
+           /* Multiply-by-inverse, divisor already normalized. */
+           mp_limb_t dinv;
+           invert_limb (dinv, d);
-           invert_limb (divisor_limb_inverted, divisor_limb);
+           for (i = un - 1; i >= 0; i--)
-           i = dividend_size - 1;
-           r = dividend_ptr[i];
-           if (r >= divisor_limb)
-             r = 0;
-           else
              {
-               quot_ptr[i] = 0;
+               n0 = up[i] << GMP_NAIL_BITS;
-               i--;
+               udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
+               r >>= GMP_NAIL_BITS;
+               qp--;
              }
+           for (i = qxn - 1; i >= 0; i--)
-           for (; i >= 0; i--)
              {
-               n0 = dividend_ptr[i];
+               udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv);
-               udiv_qrnnd_preinv (quot_ptr[i], r, r,
+               r >>= GMP_NAIL_BITS;
-                                  n0, divisor_limb, divisor_limb_inverted);
+               qp--;
              }
            return r;
          }
      }
    else
      {
-       if (UDIV_NEEDS_NORMALIZATION)
+       /* Most significant bit of divisor == 0.  */
-         {
+       int norm;
-           int normalization_steps;
-           count_leading_zeros (normalization_steps, divisor_limb);
+       /* Skip a division if high < divisor (high quotient 0).  Testing here
-           if (normalization_steps != 0)
+          before before normalizing will still skip as often as possible.  */
+       if (un != 0)
+         {
+           n1 = up[un - 1] << GMP_NAIL_BITS;
+           if (n1 < d)
              {
-               divisor_limb <<= normalization_steps;
+               r = n1 >> GMP_NAIL_BITS;
+               *qp-- = 0;
+               n--;
+               if (n == 0)
+                 return r;
+               un--;
+             }
+         }
-               n1 = dividend_ptr[dividend_size - 1];
+       if (! UDIV_NEEDS_NORMALIZATION
-               r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+           && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+         goto plain;
-               /* Possible optimization:
+       count_leading_zeros (norm, d);
-                  if (r == 0
+       d <<= norm;
-                  && divisor_limb > ((n1 << normalization_steps)
+       r <<= norm;
-                                  | (dividend_ptr[dividend_size - 2] >> ...)))
-                  ...one division less... */
-               for (i = dividend_size - 2; i >= 0; i--)
+       if (UDIV_NEEDS_NORMALIZATION
+           && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
+         {
+           if (un != 0)
+             {
+               n1 = up[un - 1] << GMP_NAIL_BITS;
+               r |= (n1 >> (GMP_LIMB_BITS - norm));
+               for (i = un - 2; i >= 0; i--)
                  {
-                   n0 = dividend_ptr[i];
+                   n0 = up[i] << GMP_NAIL_BITS;
-                   udiv_qrnnd (quot_ptr[i + 1], r, r,
+                   udiv_qrnnd (*qp, r, r,
-                               ((n1 << normalization_steps)
+                               (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
-                                | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+                               d);
-                               divisor_limb);
+                   r >>= GMP_NAIL_BITS;
+                   qp--;
                    n1 = n0;
                  }
-               udiv_qrnnd (quot_ptr[0], r, r,
+               udiv_qrnnd (*qp, r, r, n1 << norm, d);
-                           n1 << normalization_steps,
+               r >>= GMP_NAIL_BITS;
-                           divisor_limb);
+               qp--;
-               return r >> normalization_steps;
              }
+           for (i = qxn - 1; i >= 0; i--)
+             {
+               udiv_qrnnd (*qp, r, r, 0, d);
+               r >>= GMP_NAIL_BITS;
+               qp--;
+             }
+           return r >> norm;
          }
-       /* No normalization needed, either because udiv_qrnnd doesn't require
-          it, or because DIVISOR_LIMB is already normalized.  */
-       i = dividend_size - 1;
-       r = dividend_ptr[i];
-       if (r >= divisor_limb)
-         r = 0;
        else
          {
-           quot_ptr[i] = 0;
+           mp_limb_t  dinv;
-           i--;
+           invert_limb (dinv, d);
-         }
+           if (un != 0)
+             {
-       for (; i >= 0; i--)
+               n1 = up[un - 1] << GMP_NAIL_BITS;
-         {
+               r |= (n1 >> (GMP_LIMB_BITS - norm));
-           n0 = dividend_ptr[i];
+               for (i = un - 2; i >= 0; i--)
-           udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb);
+                 {
-         }
+                   n0 = up[i] << GMP_NAIL_BITS;
-       return r;
+                   udiv_qrnnd_preinv (*qp, r, r,
-     }
+                                      ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
- }
+                                      d, dinv);
+                   r >>= GMP_NAIL_BITS;
+                   qp--;
+                   n1 = n0;
- mp_limb_t
+                 }
- #if __STDC__
+               udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
- mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
+               r >>= GMP_NAIL_BITS;
-               mp_srcptr np, mp_size_t nn,
+               qp--;
-               mp_limb_t d)
+             }
- #else
- mpn_divrem_1 (qp, qxn, np, nn, d)
-      mp_ptr qp;
-      mp_size_t qxn;
-      mp_srcptr np;
-      mp_size_t nn;
-      mp_limb_t d;
- #endif
- {
-   mp_limb_t rlimb;
-   mp_size_t i;
-   /* Develop integer part of quotient.  */
-   rlimb = __gmpn_divmod_1_internal (qp + qxn, np, nn, d);
-   /* Develop fraction part of quotient.  This is not as fast as it should;
-      the preinvert stuff from __gmpn_divmod_1_internal ought to be used here
-      too.  */
-   if (UDIV_NEEDS_NORMALIZATION)
-     {
-       int normalization_steps;
-       count_leading_zeros (normalization_steps, d);
-       if (normalization_steps != 0)
-         {
-           d <<= normalization_steps;
-           rlimb <<= normalization_steps;
            for (i = qxn - 1; i >= 0; i--)
-             udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
+             {
+               udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv);
-           return rlimb >> normalization_steps;
+               r >>= GMP_NAIL_BITS;
+               qp--;
+             }
+           return r >> norm;
          }
-       else
-         /* fall out */
-         ;
      }
-   for (i = qxn - 1; i >= 0; i--)
-     udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
-   return rlimb;
  }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>