OpenXM_contrib/gmp/mpn/generic/tdiv_qr.c - annotate

Return to tdiv_qr.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / generic
Annotation of OpenXM_contrib/gmp/mpn/generic/tdiv_qr.c, Revision 1.1.1.2

1.1       maekawa     1: /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
                      2:    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
                      3:    qxn is non-zero, generate that many fraction limbs and append them after the
                      4:    other quotient limbs, and update the remainder accordningly.  The input
                      5:    operands are unaffected.
                      6:
                      7:    Preconditions:
                      8:    1. The most significant limb of of the divisor must be non-zero.
                      9:    2. No argument overlap is permitted.  (??? relax this ???)
                     10:    3. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
                     11:
                     12:    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
                     13:    complexity of multiplication.
                     14:
1.1.1.2 ! ohara      15: Copyright 1997, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1       maekawa    16:
                     17: This file is part of the GNU MP Library.
                     18:
                     19: The GNU MP Library is free software; you can redistribute it and/or modify
                     20: it under the terms of the GNU Lesser General Public License as published by
                     21: the Free Software Foundation; either version 2.1 of the License, or (at your
                     22: option) any later version.
                     23:
                     24: The GNU MP Library is distributed in the hope that it will be useful, but
                     25: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     26: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     27: License for more details.
                     28:
                     29: You should have received a copy of the GNU Lesser General Public License
                     30: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     31: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     32: MA 02111-1307, USA. */
                     33:
1.1.1.2 ! ohara      34: #include <stdlib.h>
1.1       maekawa    35: #include "gmp.h"
                     36: #include "gmp-impl.h"
                     37: #include "longlong.h"
                     38:
                     39:
                     40: void
                     41: mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
                     42:             mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
                     43: {
                     44:   /* FIXME:
                     45:      1. qxn
                     46:      2. pass allocated storage in additional parameter?
                     47:   */
                     48:   if (qxn != 0)
                     49:     abort ();
                     50:
1.1.1.2 ! ohara      51:   ASSERT (qxn >= 0);
        !            52:   ASSERT (nn >= 0);
        !            53:   ASSERT (dn >= 0);
        !            54:   ASSERT (dn == 0 || dp[dn - 1] != 0);
        !            55:   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
        !            56:   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
        !            57:
1.1       maekawa    58:   switch (dn)
                     59:     {
                     60:     case 0:
                     61:       DIVIDE_BY_ZERO;
                     62:
                     63:     case 1:
                     64:       {
                     65:        rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
                     66:        return;
                     67:       }
                     68:
                     69:     case 2:
                     70:       {
                     71:        mp_ptr n2p, d2p;
                     72:        mp_limb_t qhl, cy;
                     73:        TMP_DECL (marker);
                     74:        TMP_MARK (marker);
1.1.1.2 ! ohara      75:        if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
1.1       maekawa    76:          {
1.1.1.2 ! ohara      77:            int cnt;
        !            78:            mp_limb_t dtmp[2];
        !            79:            count_leading_zeros (cnt, dp[1]);
        !            80:            cnt -= GMP_NAIL_BITS;
        !            81:            d2p = dtmp;
        !            82:            d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
        !            83:            d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
1.1       maekawa    84:            n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
                     85:            cy = mpn_lshift (n2p, np, nn, cnt);
                     86:            n2p[nn] = cy;
                     87:            qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
                     88:            if (cy == 0)
1.1.1.2 ! ohara      89:              qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
        !            90:            mpn_rshift (rp, n2p, (mp_size_t) 2, cnt);
1.1       maekawa    91:          }
                     92:        else
                     93:          {
                     94:            d2p = (mp_ptr) dp;
                     95:            n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
                     96:            MPN_COPY (n2p, np, nn);
                     97:            qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
1.1.1.2 ! ohara      98:            qp[nn - 2] = qhl;   /* always store nn-2+1 quotient limbs */
        !            99:            MPN_COPY (rp, n2p, 2);
1.1       maekawa   100:          }
                    101:        TMP_FREE (marker);
                    102:        return;
                    103:       }
                    104:
                    105:     default:
                    106:       {
                    107:        int adjust;
                    108:        TMP_DECL (marker);
                    109:        TMP_MARK (marker);
                    110:        adjust = np[nn - 1] >= dp[dn - 1];      /* conservative tests for quotient size */
                    111:        if (nn + adjust >= 2 * dn)
                    112:          {
                    113:            mp_ptr n2p, d2p;
                    114:            mp_limb_t cy;
                    115:            int cnt;
                    116:
1.1.1.2 ! ohara     117:            qp[nn - dn] = 0;                      /* zero high quotient limb */
        !           118:            if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
1.1       maekawa   119:              {
1.1.1.2 ! ohara     120:                count_leading_zeros (cnt, dp[dn - 1]);
        !           121:                cnt -= GMP_NAIL_BITS;
1.1       maekawa   122:                d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
                    123:                mpn_lshift (d2p, dp, dn, cnt);
                    124:                n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
                    125:                cy = mpn_lshift (n2p, np, nn, cnt);
                    126:                n2p[nn] = cy;
                    127:                nn += adjust;
                    128:              }
                    129:            else
                    130:              {
1.1.1.2 ! ohara     131:                cnt = 0;
1.1       maekawa   132:                d2p = (mp_ptr) dp;
                    133:                n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
                    134:                MPN_COPY (n2p, np, nn);
                    135:                n2p[nn] = 0;
                    136:                nn += adjust;
                    137:              }
                    138:
                    139:            if (dn == 2)
                    140:              mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
1.1.1.2 ! ohara     141:            else if (dn < DIV_DC_THRESHOLD)
1.1       maekawa   142:              mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
                    143:            else
                    144:              {
                    145:                /* Perform 2*dn / dn limb divisions as long as the limbs
                    146:                   in np last.  */
                    147:                mp_ptr q2p = qp + nn - 2 * dn;
                    148:                n2p += nn - 2 * dn;
1.1.1.2 ! ohara     149:                mpn_dc_divrem_n (q2p, n2p, d2p, dn);
1.1       maekawa   150:                nn -= dn;
                    151:                while (nn >= 2 * dn)
                    152:                  {
                    153:                    mp_limb_t c;
                    154:                    q2p -= dn;  n2p -= dn;
1.1.1.2 ! ohara     155:                    c = mpn_dc_divrem_n (q2p, n2p, d2p, dn);
1.1       maekawa   156:                    ASSERT_ALWAYS (c == 0);
                    157:                    nn -= dn;
                    158:                  }
                    159:
                    160:                if (nn != dn)
                    161:                  {
                    162:                    n2p -= nn - dn;
                    163:                    /* In theory, we could fall out to the cute code below
                    164:                       since we now have exactly the situation that code
                    165:                       is designed to handle.  We botch this badly and call
                    166:                       the basic mpn_sb_divrem_mn!  */
                    167:                    if (dn == 2)
                    168:                      mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
                    169:                    else
                    170:                      mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
                    171:                  }
                    172:              }
                    173:
                    174:
                    175:            if (cnt != 0)
                    176:              mpn_rshift (rp, n2p, dn, cnt);
                    177:            else
                    178:              MPN_COPY (rp, n2p, dn);
                    179:            TMP_FREE (marker);
                    180:            return;
                    181:          }
                    182:
                    183:        /* When we come here, the numerator/partial remainder is less
                    184:           than twice the size of the denominator.  */
                    185:
                    186:          {
                    187:            /* Problem:
                    188:
                    189:               Divide a numerator N with nn limbs by a denominator D with dn
                    190:               limbs forming a quotient of nn-dn+1 limbs.  When qn is small
                    191:               compared to dn, conventional division algorithms perform poorly.
                    192:               We want an algorithm that has an expected running time that is
                    193:               dependent only on qn.  It is assumed that the most significant
                    194:               limb of the numerator is smaller than the most significant limb
                    195:               of the denominator.
                    196:
                    197:               Algorithm (very informally stated):
                    198:
                    199:               1) Divide the 2 x qn most significant limbs from the numerator
                    200:                  by the qn most significant limbs from the denominator.  Call
                    201:                  the result qest.  This is either the correct quotient, but
                    202:                  might be 1 or 2 too large.  Compute the remainder from the
                    203:                  division.  (This step is implemented by a mpn_divrem call.)
                    204:
                    205:               2) Is the most significant limb from the remainder < p, where p
                    206:                  is the product of the most significant limb from the quotient
                    207:                  and the next(d).  (Next(d) denotes the next ignored limb from
                    208:                  the denominator.)  If it is, decrement qest, and adjust the
                    209:                  remainder accordingly.
                    210:
                    211:               3) Is the remainder >= qest?  If it is, qest is the desired
                    212:                  quotient.  The algorithm terminates.
                    213:
                    214:               4) Subtract qest x next(d) from the remainder.  If there is
                    215:                  borrow out, decrement qest, and adjust the remainder
                    216:                  accordingly.
                    217:
                    218:               5) Skip one word from the denominator (i.e., let next(d) denote
                    219:                  the next less significant limb.  */
                    220:
                    221:            mp_size_t qn;
                    222:            mp_ptr n2p, d2p;
                    223:            mp_ptr tp;
                    224:            mp_limb_t cy;
                    225:            mp_size_t in, rn;
                    226:            mp_limb_t quotient_too_large;
1.1.1.2 ! ohara     227:            unsigned int cnt;
1.1       maekawa   228:
                    229:            qn = nn - dn;
                    230:            qp[qn] = 0;                         /* zero high quotient limb */
                    231:            qn += adjust;                       /* qn cannot become bigger */
                    232:
                    233:            if (qn == 0)
                    234:              {
                    235:                MPN_COPY (rp, np, dn);
                    236:                TMP_FREE (marker);
                    237:                return;
                    238:              }
                    239:
                    240:            in = dn - qn;               /* (at least partially) ignored # of limbs in ops */
                    241:            /* Normalize denominator by shifting it to the left such that its
                    242:               most significant bit is set.  Then shift the numerator the same
                    243:               amount, to mathematically preserve quotient.  */
1.1.1.2 ! ohara     244:            if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
1.1       maekawa   245:              {
1.1.1.2 ! ohara     246:                count_leading_zeros (cnt, dp[dn - 1]);
        !           247:                cnt -= GMP_NAIL_BITS;
1.1       maekawa   248:
1.1.1.2 ! ohara     249:                d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
1.1       maekawa   250:                mpn_lshift (d2p, dp + in, qn, cnt);
1.1.1.2 ! ohara     251:                d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
1.1       maekawa   252:
                    253:                n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
                    254:                cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
                    255:                if (adjust)
                    256:                  {
                    257:                    n2p[2 * qn] = cy;
                    258:                    n2p++;
                    259:                  }
                    260:                else
                    261:                  {
1.1.1.2 ! ohara     262:                    n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
1.1       maekawa   263:                  }
                    264:              }
                    265:            else
                    266:              {
1.1.1.2 ! ohara     267:                cnt = 0;
1.1       maekawa   268:                d2p = (mp_ptr) dp + in;
                    269:
                    270:                n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
                    271:                MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
                    272:                if (adjust)
                    273:                  {
                    274:                    n2p[2 * qn] = 0;
                    275:                    n2p++;
                    276:                  }
                    277:              }
                    278:
                    279:            /* Get an approximate quotient using the extracted operands.  */
                    280:            if (qn == 1)
                    281:              {
                    282:                mp_limb_t q0, r0;
                    283:                mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
                    284:                /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
                    285:                   temps here.  This doesn't hurt code quality on any machines
                    286:                   so we do it unconditionally.  */
                    287:                gcc272bug_n1 = n2p[1];
                    288:                gcc272bug_n0 = n2p[0];
                    289:                gcc272bug_d0 = d2p[0];
1.1.1.2 ! ohara     290:                udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0 << GMP_NAIL_BITS,
        !           291:                            gcc272bug_d0 << GMP_NAIL_BITS);
        !           292:                r0 >>= GMP_NAIL_BITS;
1.1       maekawa   293:                n2p[0] = r0;
                    294:                qp[0] = q0;
                    295:              }
                    296:            else if (qn == 2)
                    297:              mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
1.1.1.2 ! ohara     298:            else if (qn < DIV_DC_THRESHOLD)
        !           299:              mpn_sb_divrem_mn (qp, n2p, 2 * qn, d2p, qn);
1.1       maekawa   300:            else
1.1.1.2 ! ohara     301:              mpn_dc_divrem_n (qp, n2p, d2p, qn);
1.1       maekawa   302:
                    303:            rn = qn;
                    304:            /* Multiply the first ignored divisor limb by the most significant
                    305:               quotient limb.  If that product is > the partial remainder's
                    306:               most significant limb, we know the quotient is too large.  This
                    307:               test quickly catches most cases where the quotient is too large;
                    308:               it catches all cases where the quotient is 2 too large.  */
                    309:            {
                    310:              mp_limb_t dl, x;
                    311:              mp_limb_t h, l;
                    312:
                    313:              if (in - 2 < 0)
                    314:                dl = 0;
                    315:              else
                    316:                dl = dp[in - 2];
                    317:
1.1.1.2 ! ohara     318: #if GMP_NAIL_BITS == 0
        !           319:              x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % BITS_PER_MP_LIMB));
        !           320: #else
        !           321:              x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
        !           322:              if (cnt != 0)
        !           323:                x |= dl >> (GMP_NUMB_BITS - cnt);
        !           324: #endif
        !           325:              umul_ppmm (h, l, x, qp[qn - 1] << GMP_NAIL_BITS);
        !           326:              l >>= GMP_NAIL_BITS;
1.1       maekawa   327:
                    328:              if (n2p[qn - 1] < h)
                    329:                {
                    330:                  mp_limb_t cy;
                    331:
                    332:                  mpn_decr_u (qp, (mp_limb_t) 1);
                    333:                  cy = mpn_add_n (n2p, n2p, d2p, qn);
                    334:                  if (cy)
                    335:                    {
                    336:                      /* The partial remainder is safely large.  */
                    337:                      n2p[qn] = cy;
                    338:                      ++rn;
                    339:                    }
                    340:                }
                    341:            }
                    342:
                    343:            quotient_too_large = 0;
                    344:            if (cnt != 0)
                    345:              {
                    346:                mp_limb_t cy1, cy2;
                    347:
                    348:                /* Append partially used numerator limb to partial remainder.  */
1.1.1.2 ! ohara     349:                cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
        !           350:                n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
1.1       maekawa   351:
                    352:                /* Update partial remainder with partially used divisor limb.  */
1.1.1.2 ! ohara     353:                cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
1.1       maekawa   354:                if (qn != rn)
                    355:                  {
                    356:                    if (n2p[qn] < cy2)
                    357:                      abort ();
                    358:                    n2p[qn] -= cy2;
                    359:                  }
                    360:                else
                    361:                  {
1.1.1.2 ! ohara     362:                    n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
1.1       maekawa   363:
                    364:                    quotient_too_large = (cy1 < cy2);
                    365:                    ++rn;
                    366:                  }
                    367:                --in;
                    368:              }
                    369:            /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
                    370:
                    371:            tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
                    372:
                    373:            if (in < qn)
                    374:              {
                    375:                if (in == 0)
                    376:                  {
                    377:                    MPN_COPY (rp, n2p, rn);
                    378:                    if (rn != dn)
                    379:                      abort ();
                    380:                    goto foo;
                    381:                  }
                    382:                mpn_mul (tp, qp, qn, dp, in);
                    383:              }
                    384:            else
                    385:              mpn_mul (tp, dp, in, qp, qn);
                    386:
                    387:            cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
                    388:            MPN_COPY (rp + in, n2p, dn - in);
                    389:            quotient_too_large |= cy;
                    390:            cy = mpn_sub_n (rp, np, tp, in);
                    391:            cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
                    392:            quotient_too_large |= cy;
                    393:          foo:
                    394:            if (quotient_too_large)
                    395:              {
                    396:                mpn_decr_u (qp, (mp_limb_t) 1);
                    397:                mpn_add_n (rp, rp, dp, dn);
                    398:              }
                    399:          }
                    400:        TMP_FREE (marker);
                    401:        return;
                    402:       }
                    403:     }
                    404: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>