[BACK]Return to sb_divrem_mn.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Diff for /OpenXM_contrib/gmp/mpn/generic/Attic/sb_divrem_mn.c between version 1.1 and 1.1.1.2

version 1.1, 2000/09/09 14:12:27 version 1.1.1.2, 2003/08/25 16:06:20
Line 7 
Line 7 
    FUTURE GNU MP RELEASE.     FUTURE GNU MP RELEASE.
   
   
 Copyright (C) 1993, 1994, 1995, 1996, 2000 Free Software Foundation, Inc.  Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software Foundation,
   Inc.
   
 This file is part of the GNU MP Library.  This file is part of the GNU MP Library.
   
Line 30  MA 02111-1307, USA. */
Line 31  MA 02111-1307, USA. */
 #include "gmp-impl.h"  #include "gmp-impl.h"
 #include "longlong.h"  #include "longlong.h"
   
   
   /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
      meaning the quotient size where that should happen, the quotient size
      being how many udiv divisions will be done.
   
      The default is to use preinv always, CPUs where this doesn't suit have
      tuned thresholds.  Note in particular that preinv should certainly be
      used if that's the only division available (USE_PREINV_ALWAYS).  */
   
   #ifndef DIV_SB_PREINV_THRESHOLD
   #define DIV_SB_PREINV_THRESHOLD  0
   #endif
   
   
 /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write  /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
    the NSIZE-DSIZE least significant quotient limbs at QP     the NSIZE-DSIZE least significant quotient limbs at QP
    and the DSIZE long remainder at NP.  If QEXTRA_LIMBS is     and the DSIZE long remainder at NP.
    non-zero, generate that many fraction bits and append them after the  
    other quotient limbs.  
    Return the most significant limb of the quotient, this is always 0 or 1.     Return the most significant limb of the quotient, this is always 0 or 1.
   
    Preconditions:     Preconditions:
Line 44  MA 02111-1307, USA. */
Line 57  MA 02111-1307, USA. */
       QP + DSIZE >= NP must hold true.  (This means that it's        QP + DSIZE >= NP must hold true.  (This means that it's
       possible to put the quotient in the high part of NUM, right after the        possible to put the quotient in the high part of NUM, right after the
       remainder in NUM.        remainder in NUM.
    3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.     3. NSIZE >= DSIZE.
    4. DSIZE >= 2.  */     4. DSIZE >= 2.  */
   
   
 #define PREINVERT_VIABLE \  
   (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */)  
   
 mp_limb_t  mp_limb_t
 #if __STDC__  
 mpn_sb_divrem_mn (mp_ptr qp,  mpn_sb_divrem_mn (mp_ptr qp,
                mp_ptr np, mp_size_t nsize,                    mp_ptr np, mp_size_t nn,
                mp_srcptr dp, mp_size_t dsize)                    mp_srcptr dp, mp_size_t dn)
 #else  
 mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)  
      mp_ptr qp;  
      mp_ptr np;  
      mp_size_t nsize;  
      mp_srcptr dp;  
      mp_size_t dsize;  
 #endif  
 {  {
   mp_limb_t most_significant_q_limb = 0;    mp_limb_t most_significant_q_limb = 0;
     mp_size_t qn = nn - dn;
   mp_size_t i;    mp_size_t i;
   mp_limb_t dx, d1, n0;    mp_limb_t dx, d1, n0;
   mp_limb_t dxinv;    mp_limb_t dxinv;
   int have_preinv;    int use_preinv;
   
   ASSERT_ALWAYS (dsize > 2);    ASSERT (dn > 2);
     ASSERT (nn >= dn);
     ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
     ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
     ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
     ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
     ASSERT_MPN (np, nn);
     ASSERT_MPN (dp, dn);
   
   np += nsize - dsize;    np += qn;
   dx = dp[dsize - 1];    dx = dp[dn - 1];
   d1 = dp[dsize - 2];    d1 = dp[dn - 2];
   n0 = np[dsize - 1];    n0 = np[dn - 1];
   
   if (n0 >= dx)    if (n0 >= dx)
     {      {
       if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0)        if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0)
         {          {
           mpn_sub_n (np, np, dp, dsize);            mpn_sub_n (np, np, dp, dn);
           most_significant_q_limb = 1;            most_significant_q_limb = 1;
         }          }
     }      }
   
   /* If multiplication is much faster than division, preinvert the    /* use_preinv is possibly a constant, but it's left to the compiler to
      most significant divisor limb before entering the loop.  */       optimize away the unused code in that case.  */
   if (PREINVERT_VIABLE)    use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD);
     {    if (use_preinv)
       have_preinv = 0;      invert_limb (dxinv, dx);
       if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME)  
         {  
           invert_limb (dxinv, dx);  
           have_preinv = 1;  
         }  
     }  
   
   for (i = nsize - dsize - 1; i >= 0; i--)    for (i = qn - 1; i >= 0; i--)
     {      {
       mp_limb_t q;        mp_limb_t q;
       mp_limb_t nx;        mp_limb_t nx;
       mp_limb_t cy_limb;        mp_limb_t cy_limb;
   
       nx = np[dsize - 1];        nx = np[dn - 1];          /* FIXME: could get value from r1 */
       np--;        np--;
   
       if (nx == dx)        if (nx == dx)
         {          {
           /* This might over-estimate q, but it's probably not worth            /* This might over-estimate q, but it's probably not worth
              the extra code here to find out.  */               the extra code here to find out.  */
           q = ~(mp_limb_t) 0;            q = GMP_NUMB_MASK;
   
 #if 1  #if 1
           cy_limb = mpn_submul_1 (np, dp, dsize, q);            cy_limb = mpn_submul_1 (np, dp, dn, q);
 #else  #else
           /* This should be faster on many machines */            /* This should be faster on many machines */
           cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize);            cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn);
           cy = mpn_add_n (np, np, dp, dsize);            cy = mpn_add_n (np, np, dp, dn);
           np[dsize] += cy;            np[dn] += cy;
 #endif  #endif
   
           if (nx != cy_limb)            if (nx != cy_limb)
             {              {
               mpn_add_n (np, np, dp, dsize);                mpn_add_n (np, np, dp, dn);
               q--;                q--;
             }              }
   
Line 135  mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
Line 138  mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
         {          {
           mp_limb_t rx, r1, r0, p1, p0;            mp_limb_t rx, r1, r0, p1, p0;
   
           /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register            /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage
              usage when np[dsize-1] is used in an asm statement like               when np[dn-1] is used in an asm statement like umul_ppmm in
              umul_ppmm in udiv_qrnnd_preinv.  The symptom is seg faults due               udiv_qrnnd_preinv.  The symptom is seg faults due to registers
              to registers being clobbered.  gcc 2.95 i386 doesn't have the               being clobbered.  gcc 2.95 i386 doesn't have the problem. */
              problem. */            {
           {              mp_limb_t  workaround = np[dn - 1];
             mp_limb_t  workaround = np[dsize - 1];              if (use_preinv)
             if (PREINVERT_VIABLE && have_preinv)                udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
               udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);              else
             else                {
               udiv_qrnnd (q, r1, nx, workaround, dx);                  udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS,
           }                              dx << GMP_NAIL_BITS);
           umul_ppmm (p1, p0, d1, q);                  r1 >>= GMP_NAIL_BITS;
                 }
             }
             umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS);
             p0 >>= GMP_NAIL_BITS;
   
           r0 = np[dsize - 2];            r0 = np[dn - 2];
           rx = 0;            rx = 0;
           if (r1 < p1 || (r1 == p1 && r0 < p0))            if (r1 < p1 || (r1 == p1 && r0 < p0))
             {              {
               p1 -= p0 < d1;                p1 -= p0 < d1;
               p0 -= d1;                p0 = (p0 - d1) & GMP_NUMB_MASK;
               q--;                q--;
               r1 += dx;                r1 = (r1 + dx) & GMP_NUMB_MASK;
               rx = r1 < dx;                rx = r1 < dx;
             }              }
   
           p1 += r0 < p0;        /* cannot carry! */            p1 += r0 < p0;        /* cannot carry! */
           rx -= r1 < p1;        /* may become 11..1 if q is still too large */            rx -= r1 < p1;        /* may become 11..1 if q is still too large */
           r1 -= p1;            r1 = (r1 - p1) & GMP_NUMB_MASK;
           r0 -= p0;            r0 = (r0 - p0) & GMP_NUMB_MASK;
   
           cy_limb = mpn_submul_1 (np, dp, dsize - 2, q);            cy_limb = mpn_submul_1 (np, dp, dn - 2, q);
   
             /* Check if we've over-estimated q, and adjust as needed.  */
           {            {
             mp_limb_t cy1, cy2;              mp_limb_t cy1, cy2;
             cy1 = r0 < cy_limb;              cy1 = r0 < cy_limb;
             r0 -= cy_limb;              r0 = (r0 - cy_limb) & GMP_NUMB_MASK;
             cy2 = r1 < cy1;              cy2 = r1 < cy1;
             r1 -= cy1;              r1 -= cy1;
             np[dsize - 1] = r1;              np[dn - 1] = r1;
             np[dsize - 2] = r0;              np[dn - 2] = r0;
             if (cy2 != rx)              if (cy2 != rx)
               {                {
                 mpn_add_n (np, np, dp, dsize);                  mpn_add_n (np, np, dp, dn);
                 q--;                  q--;
               }                }
           }            }
Line 190  mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
Line 198  mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
             ______ ______              ______ ______
          - |__p1__|__p0__|              partial product to subtract           - |__p1__|__p0__|              partial product to subtract
             ______ ______              ______ ______
          - |______|cylimb|           - |______|cylimb|
   
      rx is -1, 0 or 1.  If rx=1, then q is correct (it should match       rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
      carry out).  If rx=-1 then q is too large.  If rx=0, then q might       carry out).  If rx=-1 then q is too large.  If rx=0, then q might

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.1.1.2

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>