Annotation of OpenXM_contrib/gmp/mpn/generic/sb_divrem_mn.c, Revision 1.1.1.2
1.1 maekawa 1: /* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
2: quotient.
3:
4: THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
5: INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
6: IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
7: FUTURE GNU MP RELEASE.
8:
9:
1.1.1.2 ! ohara 10: Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software Foundation,
! 11: Inc.
1.1 maekawa 12:
13: This file is part of the GNU MP Library.
14:
15: The GNU MP Library is free software; you can redistribute it and/or modify
16: it under the terms of the GNU Lesser General Public License as published by
17: the Free Software Foundation; either version 2.1 of the License, or (at your
18: option) any later version.
19:
20: The GNU MP Library is distributed in the hope that it will be useful, but
21: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
23: License for more details.
24:
25: You should have received a copy of the GNU Lesser General Public License
26: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
27: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
28: MA 02111-1307, USA. */
29:
30: #include "gmp.h"
31: #include "gmp-impl.h"
32: #include "longlong.h"
33:
1.1.1.2 ! ohara 34:
! 35: /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
! 36: meaning the quotient size where that should happen, the quotient size
! 37: being how many udiv divisions will be done.
! 38:
! 39: The default is to use preinv always, CPUs where this doesn't suit have
! 40: tuned thresholds. Note in particular that preinv should certainly be
! 41: used if that's the only division available (USE_PREINV_ALWAYS). */
! 42:
! 43: #ifndef DIV_SB_PREINV_THRESHOLD
! 44: #define DIV_SB_PREINV_THRESHOLD 0
! 45: #endif
! 46:
! 47:
1.1 maekawa 48: /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
49: the NSIZE-DSIZE least significant quotient limbs at QP
1.1.1.2 ! ohara 50: and the DSIZE long remainder at NP.
1.1 maekawa 51: Return the most significant limb of the quotient, this is always 0 or 1.
52:
53: Preconditions:
54: 0. NSIZE >= DSIZE.
55: 1. The most significant bit of the divisor must be set.
56: 2. QP must either not overlap with the input operands at all, or
57: QP + DSIZE >= NP must hold true. (This means that it's
58: possible to put the quotient in the high part of NUM, right after the
59: remainder in NUM.
1.1.1.2 ! ohara 60: 3. NSIZE >= DSIZE.
1.1 maekawa 61: 4. DSIZE >= 2. */
62:
63:
64: mp_limb_t
65: mpn_sb_divrem_mn (mp_ptr qp,
1.1.1.2 ! ohara 66: mp_ptr np, mp_size_t nn,
! 67: mp_srcptr dp, mp_size_t dn)
1.1 maekawa 68: {
69: mp_limb_t most_significant_q_limb = 0;
1.1.1.2 ! ohara 70: mp_size_t qn = nn - dn;
1.1 maekawa 71: mp_size_t i;
72: mp_limb_t dx, d1, n0;
73: mp_limb_t dxinv;
1.1.1.2 ! ohara 74: int use_preinv;
1.1 maekawa 75:
1.1.1.2 ! ohara 76: ASSERT (dn > 2);
! 77: ASSERT (nn >= dn);
! 78: ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
! 79: ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
! 80: ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
! 81: ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
! 82: ASSERT_MPN (np, nn);
! 83: ASSERT_MPN (dp, dn);
! 84:
! 85: np += qn;
! 86: dx = dp[dn - 1];
! 87: d1 = dp[dn - 2];
! 88: n0 = np[dn - 1];
1.1 maekawa 89:
90: if (n0 >= dx)
91: {
1.1.1.2 ! ohara 92: if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0)
1.1 maekawa 93: {
1.1.1.2 ! ohara 94: mpn_sub_n (np, np, dp, dn);
1.1 maekawa 95: most_significant_q_limb = 1;
96: }
97: }
98:
1.1.1.2 ! ohara 99: /* use_preinv is possibly a constant, but it's left to the compiler to
! 100: optimize away the unused code in that case. */
! 101: use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD);
! 102: if (use_preinv)
! 103: invert_limb (dxinv, dx);
1.1 maekawa 104:
1.1.1.2 ! ohara 105: for (i = qn - 1; i >= 0; i--)
1.1 maekawa 106: {
107: mp_limb_t q;
108: mp_limb_t nx;
109: mp_limb_t cy_limb;
110:
1.1.1.2 ! ohara 111: nx = np[dn - 1]; /* FIXME: could get value from r1 */
1.1 maekawa 112: np--;
113:
114: if (nx == dx)
115: {
116: /* This might over-estimate q, but it's probably not worth
117: the extra code here to find out. */
1.1.1.2 ! ohara 118: q = GMP_NUMB_MASK;
1.1 maekawa 119:
120: #if 1
1.1.1.2 ! ohara 121: cy_limb = mpn_submul_1 (np, dp, dn, q);
1.1 maekawa 122: #else
123: /* This should be faster on many machines */
1.1.1.2 ! ohara 124: cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn);
! 125: cy = mpn_add_n (np, np, dp, dn);
! 126: np[dn] += cy;
1.1 maekawa 127: #endif
128:
129: if (nx != cy_limb)
130: {
1.1.1.2 ! ohara 131: mpn_add_n (np, np, dp, dn);
1.1 maekawa 132: q--;
133: }
134:
135: qp[i] = q;
136: }
137: else
138: {
139: mp_limb_t rx, r1, r0, p1, p0;
140:
1.1.1.2 ! ohara 141: /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage
! 142: when np[dn-1] is used in an asm statement like umul_ppmm in
! 143: udiv_qrnnd_preinv. The symptom is seg faults due to registers
! 144: being clobbered. gcc 2.95 i386 doesn't have the problem. */
! 145: {
! 146: mp_limb_t workaround = np[dn - 1];
! 147: if (use_preinv)
! 148: udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
! 149: else
! 150: {
! 151: udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS,
! 152: dx << GMP_NAIL_BITS);
! 153: r1 >>= GMP_NAIL_BITS;
! 154: }
! 155: }
! 156: umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS);
! 157: p0 >>= GMP_NAIL_BITS;
1.1 maekawa 158:
1.1.1.2 ! ohara 159: r0 = np[dn - 2];
1.1 maekawa 160: rx = 0;
161: if (r1 < p1 || (r1 == p1 && r0 < p0))
162: {
163: p1 -= p0 < d1;
1.1.1.2 ! ohara 164: p0 = (p0 - d1) & GMP_NUMB_MASK;
1.1 maekawa 165: q--;
1.1.1.2 ! ohara 166: r1 = (r1 + dx) & GMP_NUMB_MASK;
1.1 maekawa 167: rx = r1 < dx;
168: }
169:
170: p1 += r0 < p0; /* cannot carry! */
171: rx -= r1 < p1; /* may become 11..1 if q is still too large */
1.1.1.2 ! ohara 172: r1 = (r1 - p1) & GMP_NUMB_MASK;
! 173: r0 = (r0 - p0) & GMP_NUMB_MASK;
1.1 maekawa 174:
1.1.1.2 ! ohara 175: cy_limb = mpn_submul_1 (np, dp, dn - 2, q);
1.1 maekawa 176:
1.1.1.2 ! ohara 177: /* Check if we've over-estimated q, and adjust as needed. */
1.1 maekawa 178: {
179: mp_limb_t cy1, cy2;
180: cy1 = r0 < cy_limb;
1.1.1.2 ! ohara 181: r0 = (r0 - cy_limb) & GMP_NUMB_MASK;
1.1 maekawa 182: cy2 = r1 < cy1;
183: r1 -= cy1;
1.1.1.2 ! ohara 184: np[dn - 1] = r1;
! 185: np[dn - 2] = r0;
1.1 maekawa 186: if (cy2 != rx)
187: {
1.1.1.2 ! ohara 188: mpn_add_n (np, np, dp, dn);
1.1 maekawa 189: q--;
190: }
191: }
192: qp[i] = q;
193: }
194: }
195:
196: /* ______ ______ ______
197: |__rx__|__r1__|__r0__| partial remainder
198: ______ ______
199: - |__p1__|__p0__| partial product to subtract
200: ______ ______
1.1.1.2 ! ohara 201: - |______|cylimb|
1.1 maekawa 202:
203: rx is -1, 0 or 1. If rx=1, then q is correct (it should match
204: carry out). If rx=-1 then q is too large. If rx=0, then q might
205: be too large, but it is most likely correct.
206: */
207:
208: return most_significant_q_limb;
209: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>