[BACK]Return to sb_divrem_mn.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Annotation of OpenXM_contrib/gmp/mpn/generic/sb_divrem_mn.c, Revision 1.1.1.2

1.1       maekawa     1: /* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
                      2:    quotient.
                      3:
                      4:    THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
                      5:    INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
                      6:    IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
                      7:    FUTURE GNU MP RELEASE.
                      8:
                      9:
1.1.1.2 ! ohara      10: Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software Foundation,
        !            11: Inc.
1.1       maekawa    12:
                     13: This file is part of the GNU MP Library.
                     14:
                     15: The GNU MP Library is free software; you can redistribute it and/or modify
                     16: it under the terms of the GNU Lesser General Public License as published by
                     17: the Free Software Foundation; either version 2.1 of the License, or (at your
                     18: option) any later version.
                     19:
                     20: The GNU MP Library is distributed in the hope that it will be useful, but
                     21: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     22: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     23: License for more details.
                     24:
                     25: You should have received a copy of the GNU Lesser General Public License
                     26: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     27: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     28: MA 02111-1307, USA. */
                     29:
                     30: #include "gmp.h"
                     31: #include "gmp-impl.h"
                     32: #include "longlong.h"
                     33:
1.1.1.2 ! ohara      34:
        !            35: /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
        !            36:    meaning the quotient size where that should happen, the quotient size
        !            37:    being how many udiv divisions will be done.
        !            38:
        !            39:    The default is to use preinv always, CPUs where this doesn't suit have
        !            40:    tuned thresholds.  Note in particular that preinv should certainly be
        !            41:    used if that's the only division available (USE_PREINV_ALWAYS).  */
        !            42:
        !            43: #ifndef DIV_SB_PREINV_THRESHOLD
        !            44: #define DIV_SB_PREINV_THRESHOLD  0
        !            45: #endif
        !            46:
        !            47:
1.1       maekawa    48: /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
                     49:    the NSIZE-DSIZE least significant quotient limbs at QP
1.1.1.2 ! ohara      50:    and the DSIZE long remainder at NP.
1.1       maekawa    51:    Return the most significant limb of the quotient, this is always 0 or 1.
                     52:
                     53:    Preconditions:
                     54:    0. NSIZE >= DSIZE.
                     55:    1. The most significant bit of the divisor must be set.
                     56:    2. QP must either not overlap with the input operands at all, or
                     57:       QP + DSIZE >= NP must hold true.  (This means that it's
                     58:       possible to put the quotient in the high part of NUM, right after the
                     59:       remainder in NUM.
1.1.1.2 ! ohara      60:    3. NSIZE >= DSIZE.
1.1       maekawa    61:    4. DSIZE >= 2.  */
                     62:
                     63:
                     64: mp_limb_t
                     65: mpn_sb_divrem_mn (mp_ptr qp,
1.1.1.2 ! ohara      66:                  mp_ptr np, mp_size_t nn,
        !            67:                  mp_srcptr dp, mp_size_t dn)
1.1       maekawa    68: {
                     69:   mp_limb_t most_significant_q_limb = 0;
1.1.1.2 ! ohara      70:   mp_size_t qn = nn - dn;
1.1       maekawa    71:   mp_size_t i;
                     72:   mp_limb_t dx, d1, n0;
                     73:   mp_limb_t dxinv;
1.1.1.2 ! ohara      74:   int use_preinv;
1.1       maekawa    75:
1.1.1.2 ! ohara      76:   ASSERT (dn > 2);
        !            77:   ASSERT (nn >= dn);
        !            78:   ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
        !            79:   ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
        !            80:   ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
        !            81:   ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
        !            82:   ASSERT_MPN (np, nn);
        !            83:   ASSERT_MPN (dp, dn);
        !            84:
        !            85:   np += qn;
        !            86:   dx = dp[dn - 1];
        !            87:   d1 = dp[dn - 2];
        !            88:   n0 = np[dn - 1];
1.1       maekawa    89:
                     90:   if (n0 >= dx)
                     91:     {
1.1.1.2 ! ohara      92:       if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0)
1.1       maekawa    93:        {
1.1.1.2 ! ohara      94:          mpn_sub_n (np, np, dp, dn);
1.1       maekawa    95:          most_significant_q_limb = 1;
                     96:        }
                     97:     }
                     98:
1.1.1.2 ! ohara      99:   /* use_preinv is possibly a constant, but it's left to the compiler to
        !           100:      optimize away the unused code in that case.  */
        !           101:   use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD);
        !           102:   if (use_preinv)
        !           103:     invert_limb (dxinv, dx);
1.1       maekawa   104:
1.1.1.2 ! ohara     105:   for (i = qn - 1; i >= 0; i--)
1.1       maekawa   106:     {
                    107:       mp_limb_t q;
                    108:       mp_limb_t nx;
                    109:       mp_limb_t cy_limb;
                    110:
1.1.1.2 ! ohara     111:       nx = np[dn - 1];         /* FIXME: could get value from r1 */
1.1       maekawa   112:       np--;
                    113:
                    114:       if (nx == dx)
                    115:        {
                    116:          /* This might over-estimate q, but it's probably not worth
                    117:             the extra code here to find out.  */
1.1.1.2 ! ohara     118:          q = GMP_NUMB_MASK;
1.1       maekawa   119:
                    120: #if 1
1.1.1.2 ! ohara     121:          cy_limb = mpn_submul_1 (np, dp, dn, q);
1.1       maekawa   122: #else
                    123:          /* This should be faster on many machines */
1.1.1.2 ! ohara     124:          cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn);
        !           125:          cy = mpn_add_n (np, np, dp, dn);
        !           126:          np[dn] += cy;
1.1       maekawa   127: #endif
                    128:
                    129:          if (nx != cy_limb)
                    130:            {
1.1.1.2 ! ohara     131:              mpn_add_n (np, np, dp, dn);
1.1       maekawa   132:              q--;
                    133:            }
                    134:
                    135:          qp[i] = q;
                    136:        }
                    137:       else
                    138:        {
                    139:          mp_limb_t rx, r1, r0, p1, p0;
                    140:
1.1.1.2 ! ohara     141:          /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage
        !           142:             when np[dn-1] is used in an asm statement like umul_ppmm in
        !           143:             udiv_qrnnd_preinv.  The symptom is seg faults due to registers
        !           144:             being clobbered.  gcc 2.95 i386 doesn't have the problem. */
        !           145:          {
        !           146:            mp_limb_t  workaround = np[dn - 1];
        !           147:            if (use_preinv)
        !           148:              udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
        !           149:            else
        !           150:              {
        !           151:                udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS,
        !           152:                            dx << GMP_NAIL_BITS);
        !           153:                r1 >>= GMP_NAIL_BITS;
        !           154:              }
        !           155:          }
        !           156:          umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS);
        !           157:          p0 >>= GMP_NAIL_BITS;
1.1       maekawa   158:
1.1.1.2 ! ohara     159:          r0 = np[dn - 2];
1.1       maekawa   160:          rx = 0;
                    161:          if (r1 < p1 || (r1 == p1 && r0 < p0))
                    162:            {
                    163:              p1 -= p0 < d1;
1.1.1.2 ! ohara     164:              p0 = (p0 - d1) & GMP_NUMB_MASK;
1.1       maekawa   165:              q--;
1.1.1.2 ! ohara     166:              r1 = (r1 + dx) & GMP_NUMB_MASK;
1.1       maekawa   167:              rx = r1 < dx;
                    168:            }
                    169:
                    170:          p1 += r0 < p0;        /* cannot carry! */
                    171:          rx -= r1 < p1;        /* may become 11..1 if q is still too large */
1.1.1.2 ! ohara     172:          r1 = (r1 - p1) & GMP_NUMB_MASK;
        !           173:          r0 = (r0 - p0) & GMP_NUMB_MASK;
1.1       maekawa   174:
1.1.1.2 ! ohara     175:          cy_limb = mpn_submul_1 (np, dp, dn - 2, q);
1.1       maekawa   176:
1.1.1.2 ! ohara     177:          /* Check if we've over-estimated q, and adjust as needed.  */
1.1       maekawa   178:          {
                    179:            mp_limb_t cy1, cy2;
                    180:            cy1 = r0 < cy_limb;
1.1.1.2 ! ohara     181:            r0 = (r0 - cy_limb) & GMP_NUMB_MASK;
1.1       maekawa   182:            cy2 = r1 < cy1;
                    183:            r1 -= cy1;
1.1.1.2 ! ohara     184:            np[dn - 1] = r1;
        !           185:            np[dn - 2] = r0;
1.1       maekawa   186:            if (cy2 != rx)
                    187:              {
1.1.1.2 ! ohara     188:                mpn_add_n (np, np, dp, dn);
1.1       maekawa   189:                q--;
                    190:              }
                    191:          }
                    192:          qp[i] = q;
                    193:        }
                    194:     }
                    195:
                    196:   /* ______ ______ ______
                    197:     |__rx__|__r1__|__r0__|             partial remainder
                    198:            ______ ______
                    199:         - |__p1__|__p0__|              partial product to subtract
                    200:            ______ ______
1.1.1.2 ! ohara     201:         - |______|cylimb|
1.1       maekawa   202:
                    203:      rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
                    204:      carry out).  If rx=-1 then q is too large.  If rx=0, then q might
                    205:      be too large, but it is most likely correct.
                    206:   */
                    207:
                    208:   return most_significant_q_limb;
                    209: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>