OpenXM_contrib/gmp/mpn/generic/tdiv_qr.c - annotate

Return to tdiv_qr.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / generic
Annotation of OpenXM_contrib/gmp/mpn/generic/tdiv_qr.c, Revision 1.1

1.1     ! maekawa     1: /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
        !             2:    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
        !             3:    qxn is non-zero, generate that many fraction limbs and append them after the
        !             4:    other quotient limbs, and update the remainder accordningly.  The input
        !             5:    operands are unaffected.
        !             6:
        !             7:    Preconditions:
        !             8:    1. The most significant limb of of the divisor must be non-zero.
        !             9:    2. No argument overlap is permitted.  (??? relax this ???)
        !            10:    3. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
        !            11:
        !            12:    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
        !            13:    complexity of multiplication.
        !            14:
        !            15: Copyright (C) 1997, 2000 Free Software Foundation, Inc.
        !            16:
        !            17: This file is part of the GNU MP Library.
        !            18:
        !            19: The GNU MP Library is free software; you can redistribute it and/or modify
        !            20: it under the terms of the GNU Lesser General Public License as published by
        !            21: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            22: option) any later version.
        !            23:
        !            24: The GNU MP Library is distributed in the hope that it will be useful, but
        !            25: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            26: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            27: License for more details.
        !            28:
        !            29: You should have received a copy of the GNU Lesser General Public License
        !            30: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            31: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            32: MA 02111-1307, USA. */
        !            33:
        !            34: #include "gmp.h"
        !            35: #include "gmp-impl.h"
        !            36: #include "longlong.h"
        !            37:
        !            38: #ifndef BZ_THRESHOLD
        !            39: #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
        !            40: #endif
        !            41:
        !            42: /* Extract the middle limb from ((h,,l) << cnt) */
        !            43: #define SHL(h,l,cnt) \
        !            44:   ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
        !            45:
        !            46: void
        !            47: #if __STDC__
        !            48: mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
        !            49:             mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
        !            50: #else
        !            51: mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
        !            52:      mp_ptr qp;
        !            53:      mp_ptr rp;
        !            54:      mp_size_t qxn;
        !            55:      mp_srcptr np;
        !            56:      mp_size_t nn;
        !            57:      mp_srcptr dp;
        !            58:      mp_size_t dn;
        !            59: #endif
        !            60: {
        !            61:   /* FIXME:
        !            62:      1. qxn
        !            63:      2. pass allocated storage in additional parameter?
        !            64:   */
        !            65:   if (qxn != 0)
        !            66:     abort ();
        !            67:
        !            68:   switch (dn)
        !            69:     {
        !            70:     case 0:
        !            71:       DIVIDE_BY_ZERO;
        !            72:
        !            73:     case 1:
        !            74:       {
        !            75:        rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
        !            76:        return;
        !            77:       }
        !            78:
        !            79:     case 2:
        !            80:       {
        !            81:        int cnt;
        !            82:        mp_ptr n2p, d2p;
        !            83:        mp_limb_t qhl, cy;
        !            84:        TMP_DECL (marker);
        !            85:        TMP_MARK (marker);
        !            86:        count_leading_zeros (cnt, dp[dn - 1]);
        !            87:        if (cnt != 0)
        !            88:          {
        !            89:            d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
        !            90:            mpn_lshift (d2p, dp, dn, cnt);
        !            91:            n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
        !            92:            cy = mpn_lshift (n2p, np, nn, cnt);
        !            93:            n2p[nn] = cy;
        !            94:            qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
        !            95:            if (cy == 0)
        !            96:              qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
        !            97:          }
        !            98:        else
        !            99:          {
        !           100:            d2p = (mp_ptr) dp;
        !           101:            n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
        !           102:            MPN_COPY (n2p, np, nn);
        !           103:            qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
        !           104:            qp[nn - 2] = qhl;   /* always store nn-dn+1 quotient limbs */
        !           105:          }
        !           106:
        !           107:        if (cnt != 0)
        !           108:          mpn_rshift (rp, n2p, dn, cnt);
        !           109:        else
        !           110:          MPN_COPY (rp, n2p, dn);
        !           111:        TMP_FREE (marker);
        !           112:        return;
        !           113:       }
        !           114:
        !           115:     default:
        !           116:       {
        !           117:        int adjust;
        !           118:        TMP_DECL (marker);
        !           119:        TMP_MARK (marker);
        !           120:        adjust = np[nn - 1] >= dp[dn - 1];      /* conservative tests for quotient size */
        !           121:        if (nn + adjust >= 2 * dn)
        !           122:          {
        !           123:            mp_ptr n2p, d2p;
        !           124:            mp_limb_t cy;
        !           125:            int cnt;
        !           126:            count_leading_zeros (cnt, dp[dn - 1]);
        !           127:
        !           128:            qp[nn - dn] = 0;                    /* zero high quotient limb */
        !           129:            if (cnt != 0)                       /* normalize divisor if needed */
        !           130:              {
        !           131:                d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
        !           132:                mpn_lshift (d2p, dp, dn, cnt);
        !           133:                n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
        !           134:                cy = mpn_lshift (n2p, np, nn, cnt);
        !           135:                n2p[nn] = cy;
        !           136:                nn += adjust;
        !           137:              }
        !           138:            else
        !           139:              {
        !           140:                d2p = (mp_ptr) dp;
        !           141:                n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
        !           142:                MPN_COPY (n2p, np, nn);
        !           143:                n2p[nn] = 0;
        !           144:                nn += adjust;
        !           145:              }
        !           146:
        !           147:            if (dn == 2)
        !           148:              mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
        !           149:            else if (dn < BZ_THRESHOLD)
        !           150:              mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
        !           151:            else
        !           152:              {
        !           153:                /* Perform 2*dn / dn limb divisions as long as the limbs
        !           154:                   in np last.  */
        !           155:                mp_ptr q2p = qp + nn - 2 * dn;
        !           156:                n2p += nn - 2 * dn;
        !           157:                mpn_bz_divrem_n (q2p, n2p, d2p, dn);
        !           158:                nn -= dn;
        !           159:                while (nn >= 2 * dn)
        !           160:                  {
        !           161:                    mp_limb_t c;
        !           162:                    q2p -= dn;  n2p -= dn;
        !           163:                    c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
        !           164:                    ASSERT_ALWAYS (c == 0);
        !           165:                    nn -= dn;
        !           166:                  }
        !           167:
        !           168:                if (nn != dn)
        !           169:                  {
        !           170:                    n2p -= nn - dn;
        !           171:                    /* In theory, we could fall out to the cute code below
        !           172:                       since we now have exactly the situation that code
        !           173:                       is designed to handle.  We botch this badly and call
        !           174:                       the basic mpn_sb_divrem_mn!  */
        !           175:                    if (dn == 2)
        !           176:                      mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
        !           177:                    else
        !           178:                      mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
        !           179:                  }
        !           180:              }
        !           181:
        !           182:
        !           183:            if (cnt != 0)
        !           184:              mpn_rshift (rp, n2p, dn, cnt);
        !           185:            else
        !           186:              MPN_COPY (rp, n2p, dn);
        !           187:            TMP_FREE (marker);
        !           188:            return;
        !           189:          }
        !           190:
        !           191:        /* When we come here, the numerator/partial remainder is less
        !           192:           than twice the size of the denominator.  */
        !           193:
        !           194:          {
        !           195:            /* Problem:
        !           196:
        !           197:               Divide a numerator N with nn limbs by a denominator D with dn
        !           198:               limbs forming a quotient of nn-dn+1 limbs.  When qn is small
        !           199:               compared to dn, conventional division algorithms perform poorly.
        !           200:               We want an algorithm that has an expected running time that is
        !           201:               dependent only on qn.  It is assumed that the most significant
        !           202:               limb of the numerator is smaller than the most significant limb
        !           203:               of the denominator.
        !           204:
        !           205:               Algorithm (very informally stated):
        !           206:
        !           207:               1) Divide the 2 x qn most significant limbs from the numerator
        !           208:                  by the qn most significant limbs from the denominator.  Call
        !           209:                  the result qest.  This is either the correct quotient, but
        !           210:                  might be 1 or 2 too large.  Compute the remainder from the
        !           211:                  division.  (This step is implemented by a mpn_divrem call.)
        !           212:
        !           213:               2) Is the most significant limb from the remainder < p, where p
        !           214:                  is the product of the most significant limb from the quotient
        !           215:                  and the next(d).  (Next(d) denotes the next ignored limb from
        !           216:                  the denominator.)  If it is, decrement qest, and adjust the
        !           217:                  remainder accordingly.
        !           218:
        !           219:               3) Is the remainder >= qest?  If it is, qest is the desired
        !           220:                  quotient.  The algorithm terminates.
        !           221:
        !           222:               4) Subtract qest x next(d) from the remainder.  If there is
        !           223:                  borrow out, decrement qest, and adjust the remainder
        !           224:                  accordingly.
        !           225:
        !           226:               5) Skip one word from the denominator (i.e., let next(d) denote
        !           227:                  the next less significant limb.  */
        !           228:
        !           229:            mp_size_t qn;
        !           230:            mp_ptr n2p, d2p;
        !           231:            mp_ptr tp;
        !           232:            mp_limb_t cy;
        !           233:            mp_size_t in, rn;
        !           234:            mp_limb_t quotient_too_large;
        !           235:            int cnt;
        !           236:
        !           237:            qn = nn - dn;
        !           238:            qp[qn] = 0;                         /* zero high quotient limb */
        !           239:            qn += adjust;                       /* qn cannot become bigger */
        !           240:
        !           241:            if (qn == 0)
        !           242:              {
        !           243:                MPN_COPY (rp, np, dn);
        !           244:                TMP_FREE (marker);
        !           245:                return;
        !           246:              }
        !           247:
        !           248:            in = dn - qn;               /* (at least partially) ignored # of limbs in ops */
        !           249:            /* Normalize denominator by shifting it to the left such that its
        !           250:               most significant bit is set.  Then shift the numerator the same
        !           251:               amount, to mathematically preserve quotient.  */
        !           252:            count_leading_zeros (cnt, dp[dn - 1]);
        !           253:            if (cnt != 0)
        !           254:              {
        !           255:                d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
        !           256:
        !           257:                mpn_lshift (d2p, dp + in, qn, cnt);
        !           258:                d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
        !           259:
        !           260:                n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
        !           261:                cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
        !           262:                if (adjust)
        !           263:                  {
        !           264:                    n2p[2 * qn] = cy;
        !           265:                    n2p++;
        !           266:                  }
        !           267:                else
        !           268:                  {
        !           269:                    n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
        !           270:                  }
        !           271:              }
        !           272:            else
        !           273:              {
        !           274:                d2p = (mp_ptr) dp + in;
        !           275:
        !           276:                n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
        !           277:                MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
        !           278:                if (adjust)
        !           279:                  {
        !           280:                    n2p[2 * qn] = 0;
        !           281:                    n2p++;
        !           282:                  }
        !           283:              }
        !           284:
        !           285:            /* Get an approximate quotient using the extracted operands.  */
        !           286:            if (qn == 1)
        !           287:              {
        !           288:                mp_limb_t q0, r0;
        !           289:                mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
        !           290:                /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
        !           291:                   temps here.  This doesn't hurt code quality on any machines
        !           292:                   so we do it unconditionally.  */
        !           293:                gcc272bug_n1 = n2p[1];
        !           294:                gcc272bug_n0 = n2p[0];
        !           295:                gcc272bug_d0 = d2p[0];
        !           296:                udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
        !           297:                n2p[0] = r0;
        !           298:                qp[0] = q0;
        !           299:              }
        !           300:            else if (qn == 2)
        !           301:              mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
        !           302:            else if (qn < BZ_THRESHOLD)
        !           303:              mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
        !           304:            else
        !           305:              mpn_bz_divrem_n (qp, n2p, d2p, qn);
        !           306:
        !           307:            rn = qn;
        !           308:            /* Multiply the first ignored divisor limb by the most significant
        !           309:               quotient limb.  If that product is > the partial remainder's
        !           310:               most significant limb, we know the quotient is too large.  This
        !           311:               test quickly catches most cases where the quotient is too large;
        !           312:               it catches all cases where the quotient is 2 too large.  */
        !           313:            {
        !           314:              mp_limb_t dl, x;
        !           315:              mp_limb_t h, l;
        !           316:
        !           317:              if (in - 2 < 0)
        !           318:                dl = 0;
        !           319:              else
        !           320:                dl = dp[in - 2];
        !           321:
        !           322:              x = SHL (dp[in - 1], dl, cnt);
        !           323:              umul_ppmm (h, l, x, qp[qn - 1]);
        !           324:
        !           325:              if (n2p[qn - 1] < h)
        !           326:                {
        !           327:                  mp_limb_t cy;
        !           328:
        !           329:                  mpn_decr_u (qp, (mp_limb_t) 1);
        !           330:                  cy = mpn_add_n (n2p, n2p, d2p, qn);
        !           331:                  if (cy)
        !           332:                    {
        !           333:                      /* The partial remainder is safely large.  */
        !           334:                      n2p[qn] = cy;
        !           335:                      ++rn;
        !           336:                    }
        !           337:                }
        !           338:            }
        !           339:
        !           340:            quotient_too_large = 0;
        !           341:            if (cnt != 0)
        !           342:              {
        !           343:                mp_limb_t cy1, cy2;
        !           344:
        !           345:                /* Append partially used numerator limb to partial remainder.  */
        !           346:                cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
        !           347:                n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
        !           348:
        !           349:                /* Update partial remainder with partially used divisor limb.  */
        !           350:                cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
        !           351:                if (qn != rn)
        !           352:                  {
        !           353:                    if (n2p[qn] < cy2)
        !           354:                      abort ();
        !           355:                    n2p[qn] -= cy2;
        !           356:                  }
        !           357:                else
        !           358:                  {
        !           359:                    n2p[qn] = cy1 - cy2;
        !           360:
        !           361:                    quotient_too_large = (cy1 < cy2);
        !           362:                    ++rn;
        !           363:                  }
        !           364:                --in;
        !           365:              }
        !           366:            /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
        !           367:
        !           368:            tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
        !           369:
        !           370:            if (in < qn)
        !           371:              {
        !           372:                if (in == 0)
        !           373:                  {
        !           374:                    MPN_COPY (rp, n2p, rn);
        !           375:                    if (rn != dn)
        !           376:                      abort ();
        !           377:                    goto foo;
        !           378:                  }
        !           379:                mpn_mul (tp, qp, qn, dp, in);
        !           380:              }
        !           381:            else
        !           382:              mpn_mul (tp, dp, in, qp, qn);
        !           383:
        !           384:            cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
        !           385:            MPN_COPY (rp + in, n2p, dn - in);
        !           386:            quotient_too_large |= cy;
        !           387:            cy = mpn_sub_n (rp, np, tp, in);
        !           388:            cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
        !           389:            quotient_too_large |= cy;
        !           390:          foo:
        !           391:            if (quotient_too_large)
        !           392:              {
        !           393:                mpn_decr_u (qp, (mp_limb_t) 1);
        !           394:                mpn_add_n (rp, rp, dp, dn);
        !           395:              }
        !           396:          }
        !           397:        TMP_FREE (marker);
        !           398:        return;
        !           399:       }
        !           400:     }
        !           401: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>