OpenXM_contrib/gmp/mpn/generic/divrem_1.c - annotate

Return to divrem_1.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / generic
Annotation of OpenXM_contrib/gmp/mpn/generic/divrem_1.c, Revision 1.1.1.3

1.1.1.3 ! ohara       1: /* mpn_divrem_1 -- mpn by limb division.
1.1       maekawa     2:
1.1.1.3 ! ohara       3: Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002 Free Software
1.1.1.2   maekawa     4: Foundation, Inc.
1.1       maekawa     5:
                      6: This file is part of the GNU MP Library.
                      7:
                      8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2   maekawa     9: it under the terms of the GNU Lesser General Public License as published by
                     10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1       maekawa    11: option) any later version.
                     12:
                     13: The GNU MP Library is distributed in the hope that it will be useful, but
                     14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2   maekawa    15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1.1       maekawa    16: License for more details.
                     17:
1.1.1.2   maekawa    18: You should have received a copy of the GNU Lesser General Public License
1.1       maekawa    19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: MA 02111-1307, USA. */
                     22:
                     23: #include "gmp.h"
                     24: #include "gmp-impl.h"
                     25: #include "longlong.h"
                     26:
1.1.1.2   maekawa    27:
1.1.1.3 ! ohara      28: /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
        !            29:    meaning the quotient size where that should happen, the quotient size
        !            30:    being how many udiv divisions will be done.
        !            31:
        !            32:    The default is to use preinv always, CPUs where this doesn't suit have
        !            33:    tuned thresholds.  Note in particular that preinv should certainly be
        !            34:    used if that's the only division available (USE_PREINV_ALWAYS).  */
1.1.1.2   maekawa    35:
1.1.1.3 ! ohara      36: #ifndef DIVREM_1_NORM_THRESHOLD
        !            37: #define DIVREM_1_NORM_THRESHOLD  0
        !            38: #endif
        !            39: #ifndef DIVREM_1_UNNORM_THRESHOLD
        !            40: #define DIVREM_1_UNNORM_THRESHOLD  0
        !            41: #endif
1.1.1.2   maekawa    42:
                     43:
                     44:
1.1.1.3 ! ohara      45: /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
        !            46:    and UNNORM thresholds are 0 and only the inversion code is included.
1.1.1.2   maekawa    47:
1.1.1.3 ! ohara      48:    If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
        !            49:    will be MP_SIZE_T_MAX and only the plain division code is included.
        !            50:
        !            51:    Otherwise mul-by-inverse is better than plain division above some
        !            52:    threshold, and best results are obtained by having code for both present.
        !            53:
        !            54:    The main reason for separating the norm and unnorm cases is that not all
        !            55:    CPUs give zero for "n0 >> BITS_PER_MP_LIMB" which would arise in the
        !            56:    unnorm code used on an already normalized divisor.
        !            57:
        !            58:    If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
        !            59:    non-shifting code for both the norm and unnorm cases, though with
        !            60:    different criteria for skipping a division, and with different thresholds
        !            61:    of course.  And in fact if inversion is never viable, then that simple
        !            62:    non-shifting division would be all that's left.
        !            63:
        !            64:    The NORM and UNNORM thresholds might not differ much, but if there's
        !            65:    going to be separate code for norm and unnorm then it makes sense to have
        !            66:    separate thresholds.  One thing that's possible is that the
        !            67:    mul-by-inverse might be better only for normalized divisors, due to that
        !            68:    case not needing variable bit shifts.
        !            69:
        !            70:    Notice that the thresholds are tested after the decision to possibly skip
        !            71:    one divide step, so they're based on the actual number of divisions done.
        !            72:
        !            73:    For the unnorm case, it would be possible to call mpn_lshift to adjust
        !            74:    the dividend all in one go (into the quotient space say), rather than
        !            75:    limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
        !            76:    than what the compiler can generate for EXTRACT.  But this is left to CPU
        !            77:    specific implementations to consider, especially since EXTRACT isn't on
        !            78:    the dependent chain.  */
        !            79:
        !            80: mp_limb_t
        !            81: mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
        !            82:              mp_srcptr up, mp_size_t un, mp_limb_t d)
1.1       maekawa    83: {
1.1.1.3 ! ohara      84:   mp_size_t  n;
        !            85:   mp_size_t  i;
        !            86:   mp_limb_t  n1, n0;
        !            87:   mp_limb_t  r = 0;
        !            88:
        !            89:   ASSERT (qxn >= 0);
        !            90:   ASSERT (un >= 0);
        !            91:   ASSERT (d != 0);
        !            92:   /* FIXME: What's the correct overlap rule when qxn!=0? */
        !            93:   ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
1.1.1.2   maekawa    94:
1.1.1.3 ! ohara      95:   n = un + qxn;
        !            96:   if (n == 0)
1.1.1.2   maekawa    97:     return 0;
                     98:
1.1.1.3 ! ohara      99:   d <<= GMP_NAIL_BITS;
        !           100:
        !           101:   qp += (n - 1);   /* Make qp point at most significant quotient limb */
1.1.1.2   maekawa   102:
1.1.1.3 ! ohara     103:   if ((d & GMP_LIMB_HIGHBIT) != 0)
        !           104:     {
        !           105:       if (un != 0)
1.1.1.2   maekawa   106:        {
1.1.1.3 ! ohara     107:          /* High quotient limb is 0 or 1, skip a divide step. */
        !           108:          mp_limb_t q;
        !           109:          r = up[un - 1] << GMP_NAIL_BITS;
        !           110:          q = (r >= d);
        !           111:          *qp-- = q;
        !           112:          r -= (d & -q);
        !           113:          r >>= GMP_NAIL_BITS;
        !           114:          n--;
        !           115:          un--;
        !           116:        }
1.1.1.2   maekawa   117:
1.1.1.3 ! ohara     118:       if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
        !           119:        {
        !           120:        plain:
        !           121:          for (i = un - 1; i >= 0; i--)
        !           122:            {
        !           123:              n0 = up[i] << GMP_NAIL_BITS;
        !           124:              udiv_qrnnd (*qp, r, r, n0, d);
        !           125:              r >>= GMP_NAIL_BITS;
        !           126:              qp--;
        !           127:            }
        !           128:          for (i = qxn - 1; i >= 0; i--)
        !           129:            {
        !           130:              udiv_qrnnd (*qp, r, r, 0, d);
        !           131:              r >>= GMP_NAIL_BITS;
        !           132:              qp--;
        !           133:            }
        !           134:          return r;
1.1.1.2   maekawa   135:        }
                    136:       else
                    137:        {
1.1.1.3 ! ohara     138:          /* Multiply-by-inverse, divisor already normalized. */
        !           139:          mp_limb_t dinv;
        !           140:          invert_limb (dinv, d);
1.1.1.2   maekawa   141:
1.1.1.3 ! ohara     142:          for (i = un - 1; i >= 0; i--)
1.1.1.2   maekawa   143:            {
1.1.1.3 ! ohara     144:              n0 = up[i] << GMP_NAIL_BITS;
        !           145:              udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
        !           146:              r >>= GMP_NAIL_BITS;
        !           147:              qp--;
1.1.1.2   maekawa   148:            }
1.1.1.3 ! ohara     149:          for (i = qxn - 1; i >= 0; i--)
1.1.1.2   maekawa   150:            {
1.1.1.3 ! ohara     151:              udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv);
        !           152:              r >>= GMP_NAIL_BITS;
        !           153:              qp--;
1.1.1.2   maekawa   154:            }
                    155:          return r;
                    156:        }
                    157:     }
                    158:   else
                    159:     {
1.1.1.3 ! ohara     160:       /* Most significant bit of divisor == 0.  */
        !           161:       int norm;
1.1.1.2   maekawa   162:
1.1.1.3 ! ohara     163:       /* Skip a division if high < divisor (high quotient 0).  Testing here
        !           164:         before before normalizing will still skip as often as possible.  */
        !           165:       if (un != 0)
        !           166:        {
        !           167:          n1 = up[un - 1] << GMP_NAIL_BITS;
        !           168:          if (n1 < d)
1.1.1.2   maekawa   169:            {
1.1.1.3 ! ohara     170:              r = n1 >> GMP_NAIL_BITS;
        !           171:              *qp-- = 0;
        !           172:              n--;
        !           173:              if (n == 0)
        !           174:                return r;
        !           175:              un--;
        !           176:            }
        !           177:        }
1.1.1.2   maekawa   178:
1.1.1.3 ! ohara     179:       if (! UDIV_NEEDS_NORMALIZATION
        !           180:          && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
        !           181:        goto plain;
        !           182:
        !           183:       count_leading_zeros (norm, d);
        !           184:       d <<= norm;
        !           185:       r <<= norm;
1.1.1.2   maekawa   186:
1.1.1.3 ! ohara     187:       if (UDIV_NEEDS_NORMALIZATION
        !           188:          && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
        !           189:        {
        !           190:          if (un != 0)
        !           191:            {
        !           192:              n1 = up[un - 1] << GMP_NAIL_BITS;
        !           193:              r |= (n1 >> (GMP_LIMB_BITS - norm));
        !           194:              for (i = un - 2; i >= 0; i--)
1.1.1.2   maekawa   195:                {
1.1.1.3 ! ohara     196:                  n0 = up[i] << GMP_NAIL_BITS;
        !           197:                  udiv_qrnnd (*qp, r, r,
        !           198:                              (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
        !           199:                              d);
        !           200:                  r >>= GMP_NAIL_BITS;
        !           201:                  qp--;
1.1.1.2   maekawa   202:                  n1 = n0;
                    203:                }
1.1.1.3 ! ohara     204:              udiv_qrnnd (*qp, r, r, n1 << norm, d);
        !           205:              r >>= GMP_NAIL_BITS;
        !           206:              qp--;
1.1.1.2   maekawa   207:            }
1.1.1.3 ! ohara     208:          for (i = qxn - 1; i >= 0; i--)
        !           209:            {
        !           210:              udiv_qrnnd (*qp, r, r, 0, d);
        !           211:              r >>= GMP_NAIL_BITS;
        !           212:              qp--;
        !           213:            }
        !           214:          return r >> norm;
1.1.1.2   maekawa   215:        }
                    216:       else
                    217:        {
1.1.1.3 ! ohara     218:          mp_limb_t  dinv;
        !           219:          invert_limb (dinv, d);
        !           220:          if (un != 0)
        !           221:            {
        !           222:              n1 = up[un - 1] << GMP_NAIL_BITS;
        !           223:              r |= (n1 >> (GMP_LIMB_BITS - norm));
        !           224:              for (i = un - 2; i >= 0; i--)
        !           225:                {
        !           226:                  n0 = up[i] << GMP_NAIL_BITS;
        !           227:                  udiv_qrnnd_preinv (*qp, r, r,
        !           228:                                     ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
        !           229:                                     d, dinv);
        !           230:                  r >>= GMP_NAIL_BITS;
        !           231:                  qp--;
        !           232:                  n1 = n0;
        !           233:                }
        !           234:              udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
        !           235:              r >>= GMP_NAIL_BITS;
        !           236:              qp--;
        !           237:            }
1.1.1.2   maekawa   238:          for (i = qxn - 1; i >= 0; i--)
1.1.1.3 ! ohara     239:            {
        !           240:              udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv);
        !           241:              r >>= GMP_NAIL_BITS;
        !           242:              qp--;
        !           243:            }
        !           244:          return r >> norm;
1.1.1.2   maekawa   245:        }
1.1       maekawa   246:     }
                    247: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>