OpenXM_contrib/gmp/mpn/generic/tdiv_qr.c - annotate

Return to tdiv_qr.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / generic
Annotation of OpenXM_contrib/gmp/mpn/generic/tdiv_qr.c, Revision 1.1.1.1

1.1       maekawa     1: /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
                      2:    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
                      3:    qxn is non-zero, generate that many fraction limbs and append them after the
                      4:    other quotient limbs, and update the remainder accordningly.  The input
                      5:    operands are unaffected.
                      6:
                      7:    Preconditions:
                      8:    1. The most significant limb of of the divisor must be non-zero.
                      9:    2. No argument overlap is permitted.  (??? relax this ???)
                     10:    3. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
                     11:
                     12:    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
                     13:    complexity of multiplication.
                     14:
                     15: Copyright (C) 1997, 2000 Free Software Foundation, Inc.
                     16:
                     17: This file is part of the GNU MP Library.
                     18:
                     19: The GNU MP Library is free software; you can redistribute it and/or modify
                     20: it under the terms of the GNU Lesser General Public License as published by
                     21: the Free Software Foundation; either version 2.1 of the License, or (at your
                     22: option) any later version.
                     23:
                     24: The GNU MP Library is distributed in the hope that it will be useful, but
                     25: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     26: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     27: License for more details.
                     28:
                     29: You should have received a copy of the GNU Lesser General Public License
                     30: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     31: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     32: MA 02111-1307, USA. */
                     33:
                     34: #include "gmp.h"
                     35: #include "gmp-impl.h"
                     36: #include "longlong.h"
                     37:
                     38: #ifndef BZ_THRESHOLD
                     39: #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
                     40: #endif
                     41:
                     42: /* Extract the middle limb from ((h,,l) << cnt) */
                     43: #define SHL(h,l,cnt) \
                     44:   ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
                     45:
                     46: void
                     47: #if __STDC__
                     48: mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
                     49:             mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
                     50: #else
                     51: mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
                     52:      mp_ptr qp;
                     53:      mp_ptr rp;
                     54:      mp_size_t qxn;
                     55:      mp_srcptr np;
                     56:      mp_size_t nn;
                     57:      mp_srcptr dp;
                     58:      mp_size_t dn;
                     59: #endif
                     60: {
                     61:   /* FIXME:
                     62:      1. qxn
                     63:      2. pass allocated storage in additional parameter?
                     64:   */
                     65:   if (qxn != 0)
                     66:     abort ();
                     67:
                     68:   switch (dn)
                     69:     {
                     70:     case 0:
                     71:       DIVIDE_BY_ZERO;
                     72:
                     73:     case 1:
                     74:       {
                     75:        rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
                     76:        return;
                     77:       }
                     78:
                     79:     case 2:
                     80:       {
                     81:        int cnt;
                     82:        mp_ptr n2p, d2p;
                     83:        mp_limb_t qhl, cy;
                     84:        TMP_DECL (marker);
                     85:        TMP_MARK (marker);
                     86:        count_leading_zeros (cnt, dp[dn - 1]);
                     87:        if (cnt != 0)
                     88:          {
                     89:            d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
                     90:            mpn_lshift (d2p, dp, dn, cnt);
                     91:            n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
                     92:            cy = mpn_lshift (n2p, np, nn, cnt);
                     93:            n2p[nn] = cy;
                     94:            qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
                     95:            if (cy == 0)
                     96:              qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
                     97:          }
                     98:        else
                     99:          {
                    100:            d2p = (mp_ptr) dp;
                    101:            n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
                    102:            MPN_COPY (n2p, np, nn);
                    103:            qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
                    104:            qp[nn - 2] = qhl;   /* always store nn-dn+1 quotient limbs */
                    105:          }
                    106:
                    107:        if (cnt != 0)
                    108:          mpn_rshift (rp, n2p, dn, cnt);
                    109:        else
                    110:          MPN_COPY (rp, n2p, dn);
                    111:        TMP_FREE (marker);
                    112:        return;
                    113:       }
                    114:
                    115:     default:
                    116:       {
                    117:        int adjust;
                    118:        TMP_DECL (marker);
                    119:        TMP_MARK (marker);
                    120:        adjust = np[nn - 1] >= dp[dn - 1];      /* conservative tests for quotient size */
                    121:        if (nn + adjust >= 2 * dn)
                    122:          {
                    123:            mp_ptr n2p, d2p;
                    124:            mp_limb_t cy;
                    125:            int cnt;
                    126:            count_leading_zeros (cnt, dp[dn - 1]);
                    127:
                    128:            qp[nn - dn] = 0;                    /* zero high quotient limb */
                    129:            if (cnt != 0)                       /* normalize divisor if needed */
                    130:              {
                    131:                d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
                    132:                mpn_lshift (d2p, dp, dn, cnt);
                    133:                n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
                    134:                cy = mpn_lshift (n2p, np, nn, cnt);
                    135:                n2p[nn] = cy;
                    136:                nn += adjust;
                    137:              }
                    138:            else
                    139:              {
                    140:                d2p = (mp_ptr) dp;
                    141:                n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
                    142:                MPN_COPY (n2p, np, nn);
                    143:                n2p[nn] = 0;
                    144:                nn += adjust;
                    145:              }
                    146:
                    147:            if (dn == 2)
                    148:              mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
                    149:            else if (dn < BZ_THRESHOLD)
                    150:              mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
                    151:            else
                    152:              {
                    153:                /* Perform 2*dn / dn limb divisions as long as the limbs
                    154:                   in np last.  */
                    155:                mp_ptr q2p = qp + nn - 2 * dn;
                    156:                n2p += nn - 2 * dn;
                    157:                mpn_bz_divrem_n (q2p, n2p, d2p, dn);
                    158:                nn -= dn;
                    159:                while (nn >= 2 * dn)
                    160:                  {
                    161:                    mp_limb_t c;
                    162:                    q2p -= dn;  n2p -= dn;
                    163:                    c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
                    164:                    ASSERT_ALWAYS (c == 0);
                    165:                    nn -= dn;
                    166:                  }
                    167:
                    168:                if (nn != dn)
                    169:                  {
                    170:                    n2p -= nn - dn;
                    171:                    /* In theory, we could fall out to the cute code below
                    172:                       since we now have exactly the situation that code
                    173:                       is designed to handle.  We botch this badly and call
                    174:                       the basic mpn_sb_divrem_mn!  */
                    175:                    if (dn == 2)
                    176:                      mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
                    177:                    else
                    178:                      mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
                    179:                  }
                    180:              }
                    181:
                    182:
                    183:            if (cnt != 0)
                    184:              mpn_rshift (rp, n2p, dn, cnt);
                    185:            else
                    186:              MPN_COPY (rp, n2p, dn);
                    187:            TMP_FREE (marker);
                    188:            return;
                    189:          }
                    190:
                    191:        /* When we come here, the numerator/partial remainder is less
                    192:           than twice the size of the denominator.  */
                    193:
                    194:          {
                    195:            /* Problem:
                    196:
                    197:               Divide a numerator N with nn limbs by a denominator D with dn
                    198:               limbs forming a quotient of nn-dn+1 limbs.  When qn is small
                    199:               compared to dn, conventional division algorithms perform poorly.
                    200:               We want an algorithm that has an expected running time that is
                    201:               dependent only on qn.  It is assumed that the most significant
                    202:               limb of the numerator is smaller than the most significant limb
                    203:               of the denominator.
                    204:
                    205:               Algorithm (very informally stated):
                    206:
                    207:               1) Divide the 2 x qn most significant limbs from the numerator
                    208:                  by the qn most significant limbs from the denominator.  Call
                    209:                  the result qest.  This is either the correct quotient, but
                    210:                  might be 1 or 2 too large.  Compute the remainder from the
                    211:                  division.  (This step is implemented by a mpn_divrem call.)
                    212:
                    213:               2) Is the most significant limb from the remainder < p, where p
                    214:                  is the product of the most significant limb from the quotient
                    215:                  and the next(d).  (Next(d) denotes the next ignored limb from
                    216:                  the denominator.)  If it is, decrement qest, and adjust the
                    217:                  remainder accordingly.
                    218:
                    219:               3) Is the remainder >= qest?  If it is, qest is the desired
                    220:                  quotient.  The algorithm terminates.
                    221:
                    222:               4) Subtract qest x next(d) from the remainder.  If there is
                    223:                  borrow out, decrement qest, and adjust the remainder
                    224:                  accordingly.
                    225:
                    226:               5) Skip one word from the denominator (i.e., let next(d) denote
                    227:                  the next less significant limb.  */
                    228:
                    229:            mp_size_t qn;
                    230:            mp_ptr n2p, d2p;
                    231:            mp_ptr tp;
                    232:            mp_limb_t cy;
                    233:            mp_size_t in, rn;
                    234:            mp_limb_t quotient_too_large;
                    235:            int cnt;
                    236:
                    237:            qn = nn - dn;
                    238:            qp[qn] = 0;                         /* zero high quotient limb */
                    239:            qn += adjust;                       /* qn cannot become bigger */
                    240:
                    241:            if (qn == 0)
                    242:              {
                    243:                MPN_COPY (rp, np, dn);
                    244:                TMP_FREE (marker);
                    245:                return;
                    246:              }
                    247:
                    248:            in = dn - qn;               /* (at least partially) ignored # of limbs in ops */
                    249:            /* Normalize denominator by shifting it to the left such that its
                    250:               most significant bit is set.  Then shift the numerator the same
                    251:               amount, to mathematically preserve quotient.  */
                    252:            count_leading_zeros (cnt, dp[dn - 1]);
                    253:            if (cnt != 0)
                    254:              {
                    255:                d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
                    256:
                    257:                mpn_lshift (d2p, dp + in, qn, cnt);
                    258:                d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
                    259:
                    260:                n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
                    261:                cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
                    262:                if (adjust)
                    263:                  {
                    264:                    n2p[2 * qn] = cy;
                    265:                    n2p++;
                    266:                  }
                    267:                else
                    268:                  {
                    269:                    n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
                    270:                  }
                    271:              }
                    272:            else
                    273:              {
                    274:                d2p = (mp_ptr) dp + in;
                    275:
                    276:                n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
                    277:                MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
                    278:                if (adjust)
                    279:                  {
                    280:                    n2p[2 * qn] = 0;
                    281:                    n2p++;
                    282:                  }
                    283:              }
                    284:
                    285:            /* Get an approximate quotient using the extracted operands.  */
                    286:            if (qn == 1)
                    287:              {
                    288:                mp_limb_t q0, r0;
                    289:                mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
                    290:                /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
                    291:                   temps here.  This doesn't hurt code quality on any machines
                    292:                   so we do it unconditionally.  */
                    293:                gcc272bug_n1 = n2p[1];
                    294:                gcc272bug_n0 = n2p[0];
                    295:                gcc272bug_d0 = d2p[0];
                    296:                udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
                    297:                n2p[0] = r0;
                    298:                qp[0] = q0;
                    299:              }
                    300:            else if (qn == 2)
                    301:              mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
                    302:            else if (qn < BZ_THRESHOLD)
                    303:              mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
                    304:            else
                    305:              mpn_bz_divrem_n (qp, n2p, d2p, qn);
                    306:
                    307:            rn = qn;
                    308:            /* Multiply the first ignored divisor limb by the most significant
                    309:               quotient limb.  If that product is > the partial remainder's
                    310:               most significant limb, we know the quotient is too large.  This
                    311:               test quickly catches most cases where the quotient is too large;
                    312:               it catches all cases where the quotient is 2 too large.  */
                    313:            {
                    314:              mp_limb_t dl, x;
                    315:              mp_limb_t h, l;
                    316:
                    317:              if (in - 2 < 0)
                    318:                dl = 0;
                    319:              else
                    320:                dl = dp[in - 2];
                    321:
                    322:              x = SHL (dp[in - 1], dl, cnt);
                    323:              umul_ppmm (h, l, x, qp[qn - 1]);
                    324:
                    325:              if (n2p[qn - 1] < h)
                    326:                {
                    327:                  mp_limb_t cy;
                    328:
                    329:                  mpn_decr_u (qp, (mp_limb_t) 1);
                    330:                  cy = mpn_add_n (n2p, n2p, d2p, qn);
                    331:                  if (cy)
                    332:                    {
                    333:                      /* The partial remainder is safely large.  */
                    334:                      n2p[qn] = cy;
                    335:                      ++rn;
                    336:                    }
                    337:                }
                    338:            }
                    339:
                    340:            quotient_too_large = 0;
                    341:            if (cnt != 0)
                    342:              {
                    343:                mp_limb_t cy1, cy2;
                    344:
                    345:                /* Append partially used numerator limb to partial remainder.  */
                    346:                cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
                    347:                n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
                    348:
                    349:                /* Update partial remainder with partially used divisor limb.  */
                    350:                cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
                    351:                if (qn != rn)
                    352:                  {
                    353:                    if (n2p[qn] < cy2)
                    354:                      abort ();
                    355:                    n2p[qn] -= cy2;
                    356:                  }
                    357:                else
                    358:                  {
                    359:                    n2p[qn] = cy1 - cy2;
                    360:
                    361:                    quotient_too_large = (cy1 < cy2);
                    362:                    ++rn;
                    363:                  }
                    364:                --in;
                    365:              }
                    366:            /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
                    367:
                    368:            tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
                    369:
                    370:            if (in < qn)
                    371:              {
                    372:                if (in == 0)
                    373:                  {
                    374:                    MPN_COPY (rp, n2p, rn);
                    375:                    if (rn != dn)
                    376:                      abort ();
                    377:                    goto foo;
                    378:                  }
                    379:                mpn_mul (tp, qp, qn, dp, in);
                    380:              }
                    381:            else
                    382:              mpn_mul (tp, dp, in, qp, qn);
                    383:
                    384:            cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
                    385:            MPN_COPY (rp + in, n2p, dn - in);
                    386:            quotient_too_large |= cy;
                    387:            cy = mpn_sub_n (rp, np, tp, in);
                    388:            cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
                    389:            quotient_too_large |= cy;
                    390:          foo:
                    391:            if (quotient_too_large)
                    392:              {
                    393:                mpn_decr_u (qp, (mp_limb_t) 1);
                    394:                mpn_add_n (rp, rp, dp, dn);
                    395:              }
                    396:          }
                    397:        TMP_FREE (marker);
                    398:        return;
                    399:       }
                    400:     }
                    401: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>