Annotation of OpenXM_contrib/gmp/mpn/generic/divrem_1.c, Revision 1.1.1.3
1.1.1.3 ! ohara 1: /* mpn_divrem_1 -- mpn by limb division.
1.1 maekawa 2:
1.1.1.3 ! ohara 3: Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002 Free Software
1.1.1.2 maekawa 4: Foundation, Inc.
1.1 maekawa 5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 maekawa 9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 maekawa 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 16: License for more details.
17:
1.1.1.2 maekawa 18: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA. */
22:
23: #include "gmp.h"
24: #include "gmp-impl.h"
25: #include "longlong.h"
26:
1.1.1.2 maekawa 27:
1.1.1.3 ! ohara 28: /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
! 29: meaning the quotient size where that should happen, the quotient size
! 30: being how many udiv divisions will be done.
! 31:
! 32: The default is to use preinv always, CPUs where this doesn't suit have
! 33: tuned thresholds. Note in particular that preinv should certainly be
! 34: used if that's the only division available (USE_PREINV_ALWAYS). */
1.1.1.2 maekawa 35:
1.1.1.3 ! ohara 36: #ifndef DIVREM_1_NORM_THRESHOLD
! 37: #define DIVREM_1_NORM_THRESHOLD 0
! 38: #endif
! 39: #ifndef DIVREM_1_UNNORM_THRESHOLD
! 40: #define DIVREM_1_UNNORM_THRESHOLD 0
! 41: #endif
1.1.1.2 maekawa 42:
43:
44:
1.1.1.3 ! ohara 45: /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
! 46: and UNNORM thresholds are 0 and only the inversion code is included.
1.1.1.2 maekawa 47:
1.1.1.3 ! ohara 48: If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
! 49: will be MP_SIZE_T_MAX and only the plain division code is included.
! 50:
! 51: Otherwise mul-by-inverse is better than plain division above some
! 52: threshold, and best results are obtained by having code for both present.
! 53:
! 54: The main reason for separating the norm and unnorm cases is that not all
! 55: CPUs give zero for "n0 >> BITS_PER_MP_LIMB" which would arise in the
! 56: unnorm code used on an already normalized divisor.
! 57:
! 58: If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
! 59: non-shifting code for both the norm and unnorm cases, though with
! 60: different criteria for skipping a division, and with different thresholds
! 61: of course. And in fact if inversion is never viable, then that simple
! 62: non-shifting division would be all that's left.
! 63:
! 64: The NORM and UNNORM thresholds might not differ much, but if there's
! 65: going to be separate code for norm and unnorm then it makes sense to have
! 66: separate thresholds. One thing that's possible is that the
! 67: mul-by-inverse might be better only for normalized divisors, due to that
! 68: case not needing variable bit shifts.
! 69:
! 70: Notice that the thresholds are tested after the decision to possibly skip
! 71: one divide step, so they're based on the actual number of divisions done.
! 72:
! 73: For the unnorm case, it would be possible to call mpn_lshift to adjust
! 74: the dividend all in one go (into the quotient space say), rather than
! 75: limb-by-limb in the loop. This might help if mpn_lshift is a lot faster
! 76: than what the compiler can generate for EXTRACT. But this is left to CPU
! 77: specific implementations to consider, especially since EXTRACT isn't on
! 78: the dependent chain. */
! 79:
! 80: mp_limb_t
! 81: mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
! 82: mp_srcptr up, mp_size_t un, mp_limb_t d)
1.1 maekawa 83: {
1.1.1.3 ! ohara 84: mp_size_t n;
! 85: mp_size_t i;
! 86: mp_limb_t n1, n0;
! 87: mp_limb_t r = 0;
! 88:
! 89: ASSERT (qxn >= 0);
! 90: ASSERT (un >= 0);
! 91: ASSERT (d != 0);
! 92: /* FIXME: What's the correct overlap rule when qxn!=0? */
! 93: ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
1.1.1.2 maekawa 94:
1.1.1.3 ! ohara 95: n = un + qxn;
! 96: if (n == 0)
1.1.1.2 maekawa 97: return 0;
98:
1.1.1.3 ! ohara 99: d <<= GMP_NAIL_BITS;
! 100:
! 101: qp += (n - 1); /* Make qp point at most significant quotient limb */
1.1.1.2 maekawa 102:
1.1.1.3 ! ohara 103: if ((d & GMP_LIMB_HIGHBIT) != 0)
! 104: {
! 105: if (un != 0)
1.1.1.2 maekawa 106: {
1.1.1.3 ! ohara 107: /* High quotient limb is 0 or 1, skip a divide step. */
! 108: mp_limb_t q;
! 109: r = up[un - 1] << GMP_NAIL_BITS;
! 110: q = (r >= d);
! 111: *qp-- = q;
! 112: r -= (d & -q);
! 113: r >>= GMP_NAIL_BITS;
! 114: n--;
! 115: un--;
! 116: }
1.1.1.2 maekawa 117:
1.1.1.3 ! ohara 118: if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
! 119: {
! 120: plain:
! 121: for (i = un - 1; i >= 0; i--)
! 122: {
! 123: n0 = up[i] << GMP_NAIL_BITS;
! 124: udiv_qrnnd (*qp, r, r, n0, d);
! 125: r >>= GMP_NAIL_BITS;
! 126: qp--;
! 127: }
! 128: for (i = qxn - 1; i >= 0; i--)
! 129: {
! 130: udiv_qrnnd (*qp, r, r, 0, d);
! 131: r >>= GMP_NAIL_BITS;
! 132: qp--;
! 133: }
! 134: return r;
1.1.1.2 maekawa 135: }
136: else
137: {
1.1.1.3 ! ohara 138: /* Multiply-by-inverse, divisor already normalized. */
! 139: mp_limb_t dinv;
! 140: invert_limb (dinv, d);
1.1.1.2 maekawa 141:
1.1.1.3 ! ohara 142: for (i = un - 1; i >= 0; i--)
1.1.1.2 maekawa 143: {
1.1.1.3 ! ohara 144: n0 = up[i] << GMP_NAIL_BITS;
! 145: udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
! 146: r >>= GMP_NAIL_BITS;
! 147: qp--;
1.1.1.2 maekawa 148: }
1.1.1.3 ! ohara 149: for (i = qxn - 1; i >= 0; i--)
1.1.1.2 maekawa 150: {
1.1.1.3 ! ohara 151: udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv);
! 152: r >>= GMP_NAIL_BITS;
! 153: qp--;
1.1.1.2 maekawa 154: }
155: return r;
156: }
157: }
158: else
159: {
1.1.1.3 ! ohara 160: /* Most significant bit of divisor == 0. */
! 161: int norm;
1.1.1.2 maekawa 162:
1.1.1.3 ! ohara 163: /* Skip a division if high < divisor (high quotient 0). Testing here
! 164: before before normalizing will still skip as often as possible. */
! 165: if (un != 0)
! 166: {
! 167: n1 = up[un - 1] << GMP_NAIL_BITS;
! 168: if (n1 < d)
1.1.1.2 maekawa 169: {
1.1.1.3 ! ohara 170: r = n1 >> GMP_NAIL_BITS;
! 171: *qp-- = 0;
! 172: n--;
! 173: if (n == 0)
! 174: return r;
! 175: un--;
! 176: }
! 177: }
1.1.1.2 maekawa 178:
1.1.1.3 ! ohara 179: if (! UDIV_NEEDS_NORMALIZATION
! 180: && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
! 181: goto plain;
! 182:
! 183: count_leading_zeros (norm, d);
! 184: d <<= norm;
! 185: r <<= norm;
1.1.1.2 maekawa 186:
1.1.1.3 ! ohara 187: if (UDIV_NEEDS_NORMALIZATION
! 188: && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
! 189: {
! 190: if (un != 0)
! 191: {
! 192: n1 = up[un - 1] << GMP_NAIL_BITS;
! 193: r |= (n1 >> (GMP_LIMB_BITS - norm));
! 194: for (i = un - 2; i >= 0; i--)
1.1.1.2 maekawa 195: {
1.1.1.3 ! ohara 196: n0 = up[i] << GMP_NAIL_BITS;
! 197: udiv_qrnnd (*qp, r, r,
! 198: (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
! 199: d);
! 200: r >>= GMP_NAIL_BITS;
! 201: qp--;
1.1.1.2 maekawa 202: n1 = n0;
203: }
1.1.1.3 ! ohara 204: udiv_qrnnd (*qp, r, r, n1 << norm, d);
! 205: r >>= GMP_NAIL_BITS;
! 206: qp--;
1.1.1.2 maekawa 207: }
1.1.1.3 ! ohara 208: for (i = qxn - 1; i >= 0; i--)
! 209: {
! 210: udiv_qrnnd (*qp, r, r, 0, d);
! 211: r >>= GMP_NAIL_BITS;
! 212: qp--;
! 213: }
! 214: return r >> norm;
1.1.1.2 maekawa 215: }
216: else
217: {
1.1.1.3 ! ohara 218: mp_limb_t dinv;
! 219: invert_limb (dinv, d);
! 220: if (un != 0)
! 221: {
! 222: n1 = up[un - 1] << GMP_NAIL_BITS;
! 223: r |= (n1 >> (GMP_LIMB_BITS - norm));
! 224: for (i = un - 2; i >= 0; i--)
! 225: {
! 226: n0 = up[i] << GMP_NAIL_BITS;
! 227: udiv_qrnnd_preinv (*qp, r, r,
! 228: ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
! 229: d, dinv);
! 230: r >>= GMP_NAIL_BITS;
! 231: qp--;
! 232: n1 = n0;
! 233: }
! 234: udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
! 235: r >>= GMP_NAIL_BITS;
! 236: qp--;
! 237: }
1.1.1.2 maekawa 238: for (i = qxn - 1; i >= 0; i--)
1.1.1.3 ! ohara 239: {
! 240: udiv_qrnnd_preinv (*qp, r, r, 0, d, dinv);
! 241: r >>= GMP_NAIL_BITS;
! 242: qp--;
! 243: }
! 244: return r >> norm;
1.1.1.2 maekawa 245: }
1.1 maekawa 246: }
247: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>