[BACK]Return to sb_divrem_mn.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Annotation of OpenXM_contrib/gmp/mpn/generic/sb_divrem_mn.c, Revision 1.1.1.1

1.1       maekawa     1: /* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
                      2:    quotient.
                      3:
                      4:    THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
                      5:    INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
                      6:    IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
                      7:    FUTURE GNU MP RELEASE.
                      8:
                      9:
                     10: Copyright (C) 1993, 1994, 1995, 1996, 2000 Free Software Foundation, Inc.
                     11:
                     12: This file is part of the GNU MP Library.
                     13:
                     14: The GNU MP Library is free software; you can redistribute it and/or modify
                     15: it under the terms of the GNU Lesser General Public License as published by
                     16: the Free Software Foundation; either version 2.1 of the License, or (at your
                     17: option) any later version.
                     18:
                     19: The GNU MP Library is distributed in the hope that it will be useful, but
                     20: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     21: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     22: License for more details.
                     23:
                     24: You should have received a copy of the GNU Lesser General Public License
                     25: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     26: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     27: MA 02111-1307, USA. */
                     28:
                     29: #include "gmp.h"
                     30: #include "gmp-impl.h"
                     31: #include "longlong.h"
                     32:
                     33: /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
                     34:    the NSIZE-DSIZE least significant quotient limbs at QP
                     35:    and the DSIZE long remainder at NP.  If QEXTRA_LIMBS is
                     36:    non-zero, generate that many fraction bits and append them after the
                     37:    other quotient limbs.
                     38:    Return the most significant limb of the quotient, this is always 0 or 1.
                     39:
                     40:    Preconditions:
                     41:    0. NSIZE >= DSIZE.
                     42:    1. The most significant bit of the divisor must be set.
                     43:    2. QP must either not overlap with the input operands at all, or
                     44:       QP + DSIZE >= NP must hold true.  (This means that it's
                     45:       possible to put the quotient in the high part of NUM, right after the
                     46:       remainder in NUM.
                     47:    3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
                     48:    4. DSIZE >= 2.  */
                     49:
                     50:
                     51: #define PREINVERT_VIABLE \
                     52:   (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */)
                     53:
                     54: mp_limb_t
                     55: #if __STDC__
                     56: mpn_sb_divrem_mn (mp_ptr qp,
                     57:               mp_ptr np, mp_size_t nsize,
                     58:               mp_srcptr dp, mp_size_t dsize)
                     59: #else
                     60: mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
                     61:      mp_ptr qp;
                     62:      mp_ptr np;
                     63:      mp_size_t nsize;
                     64:      mp_srcptr dp;
                     65:      mp_size_t dsize;
                     66: #endif
                     67: {
                     68:   mp_limb_t most_significant_q_limb = 0;
                     69:   mp_size_t i;
                     70:   mp_limb_t dx, d1, n0;
                     71:   mp_limb_t dxinv;
                     72:   int have_preinv;
                     73:
                     74:   ASSERT_ALWAYS (dsize > 2);
                     75:
                     76:   np += nsize - dsize;
                     77:   dx = dp[dsize - 1];
                     78:   d1 = dp[dsize - 2];
                     79:   n0 = np[dsize - 1];
                     80:
                     81:   if (n0 >= dx)
                     82:     {
                     83:       if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0)
                     84:        {
                     85:          mpn_sub_n (np, np, dp, dsize);
                     86:          most_significant_q_limb = 1;
                     87:        }
                     88:     }
                     89:
                     90:   /* If multiplication is much faster than division, preinvert the
                     91:      most significant divisor limb before entering the loop.  */
                     92:   if (PREINVERT_VIABLE)
                     93:     {
                     94:       have_preinv = 0;
                     95:       if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME)
                     96:        {
                     97:          invert_limb (dxinv, dx);
                     98:          have_preinv = 1;
                     99:        }
                    100:     }
                    101:
                    102:   for (i = nsize - dsize - 1; i >= 0; i--)
                    103:     {
                    104:       mp_limb_t q;
                    105:       mp_limb_t nx;
                    106:       mp_limb_t cy_limb;
                    107:
                    108:       nx = np[dsize - 1];
                    109:       np--;
                    110:
                    111:       if (nx == dx)
                    112:        {
                    113:          /* This might over-estimate q, but it's probably not worth
                    114:             the extra code here to find out.  */
                    115:          q = ~(mp_limb_t) 0;
                    116:
                    117: #if 1
                    118:          cy_limb = mpn_submul_1 (np, dp, dsize, q);
                    119: #else
                    120:          /* This should be faster on many machines */
                    121:          cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize);
                    122:          cy = mpn_add_n (np, np, dp, dsize);
                    123:          np[dsize] += cy;
                    124: #endif
                    125:
                    126:          if (nx != cy_limb)
                    127:            {
                    128:              mpn_add_n (np, np, dp, dsize);
                    129:              q--;
                    130:            }
                    131:
                    132:          qp[i] = q;
                    133:        }
                    134:       else
                    135:        {
                    136:          mp_limb_t rx, r1, r0, p1, p0;
                    137:
                    138:           /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register
                    139:              usage when np[dsize-1] is used in an asm statement like
                    140:              umul_ppmm in udiv_qrnnd_preinv.  The symptom is seg faults due
                    141:              to registers being clobbered.  gcc 2.95 i386 doesn't have the
                    142:              problem. */
                    143:           {
                    144:             mp_limb_t  workaround = np[dsize - 1];
                    145:             if (PREINVERT_VIABLE && have_preinv)
                    146:               udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
                    147:             else
                    148:               udiv_qrnnd (q, r1, nx, workaround, dx);
                    149:           }
                    150:          umul_ppmm (p1, p0, d1, q);
                    151:
                    152:          r0 = np[dsize - 2];
                    153:          rx = 0;
                    154:          if (r1 < p1 || (r1 == p1 && r0 < p0))
                    155:            {
                    156:              p1 -= p0 < d1;
                    157:              p0 -= d1;
                    158:              q--;
                    159:              r1 += dx;
                    160:              rx = r1 < dx;
                    161:            }
                    162:
                    163:          p1 += r0 < p0;        /* cannot carry! */
                    164:          rx -= r1 < p1;        /* may become 11..1 if q is still too large */
                    165:          r1 -= p1;
                    166:          r0 -= p0;
                    167:
                    168:          cy_limb = mpn_submul_1 (np, dp, dsize - 2, q);
                    169:
                    170:          {
                    171:            mp_limb_t cy1, cy2;
                    172:            cy1 = r0 < cy_limb;
                    173:            r0 -= cy_limb;
                    174:            cy2 = r1 < cy1;
                    175:            r1 -= cy1;
                    176:            np[dsize - 1] = r1;
                    177:            np[dsize - 2] = r0;
                    178:            if (cy2 != rx)
                    179:              {
                    180:                mpn_add_n (np, np, dp, dsize);
                    181:                q--;
                    182:              }
                    183:          }
                    184:          qp[i] = q;
                    185:        }
                    186:     }
                    187:
                    188:   /* ______ ______ ______
                    189:     |__rx__|__r1__|__r0__|             partial remainder
                    190:            ______ ______
                    191:         - |__p1__|__p0__|              partial product to subtract
                    192:            ______ ______
                    193:         - |______|cylimb|
                    194:
                    195:      rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
                    196:      carry out).  If rx=-1 then q is too large.  If rx=0, then q might
                    197:      be too large, but it is most likely correct.
                    198:   */
                    199:
                    200:   return most_significant_q_limb;
                    201: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>