Annotation of OpenXM_contrib/gmp/mpn/generic/bz_divrem_n.c, Revision 1.1.1.2
1.1 maekawa 1: /* mpn_bz_divrem_n and auxilliary routines.
2:
3: THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
4: INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
5: IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
6: FUTURE GNU MP RELEASE.
7:
8:
9: Copyright (C) 2000 Free Software Foundation, Inc.
10: Contributed by Paul Zimmermann.
11:
12: This file is part of the GNU MP Library.
13:
14: The GNU MP Library is free software; you can redistribute it and/or modify
15: it under the terms of the GNU Lesser General Public License as published by
16: the Free Software Foundation; either version 2.1 of the License, or (at your
17: option) any later version.
18:
19: The GNU MP Library is distributed in the hope that it will be useful, but
20: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22: License for more details.
23:
24: You should have received a copy of the GNU Lesser General Public License
25: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
26: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27: MA 02111-1307, USA. */
28:
29: #include "gmp.h"
30: #include "gmp-impl.h"
31:
32: /*
33: [1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
34: Technical report MPI-I-98-1-022, october 1998.
35: http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
36: */
37:
1.1.1.2 ! maekawa 38: static mp_limb_t mpn_bz_div_3_halves_by_2
! 39: _PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n));
1.1 maekawa 40:
41:
42: /* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
43: div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
44: i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
45: #ifndef BZ_THRESHOLD
46: #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
47: #endif
48:
49: #if 0
50: static
51: unused_mpn_divrem (qp, qxn, np, nn, dp, dn)
52: mp_ptr qp;
53: mp_size_t qxn;
54: mp_ptr np;
55: mp_size_t nn;
56: mp_srcptr dp;
57: mp_size_t dn;
58: {
59: /* This might be useful: */
60: if (qxn != 0)
61: {
62: mp_limb_t c;
63: mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB);
64: MPN_COPY (tp + qxn - nn, np, nn);
65: MPN_ZERO (tp, qxn);
66: c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn);
67: /* Maybe copy proper part of tp to np? Documentation is unclear about
68: the returned np value when qxn != 0 */
69: return c;
70: }
71: }
72: #endif
73:
1.1.1.2 ! maekawa 74:
1.1 maekawa 75: /* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
76: by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
77: Returns most significant limb of the quotient, which is 0 or 1.
78: Requires that the most significant bit of the divisor is set. */
79:
80: mp_limb_t
81: #if __STDC__
82: mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
83: #else
84: mpn_bz_divrem_n (qp, np, dp, n)
85: mp_ptr qp;
86: mp_ptr np;
87: mp_srcptr dp;
88: mp_size_t n;
89: #endif
90: {
1.1.1.2 ! maekawa 91: mp_limb_t qhl, cc;
1.1 maekawa 92:
93: if (n % 2 != 0)
94: {
1.1.1.2 ! maekawa 95: qhl = mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
! 96: cc = mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]);
! 97: cc = mpn_sub_1 (np + n, np + n, 1, cc);
! 98: if (qhl) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
! 99: while (cc)
! 100: {
! 101: qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, (mp_limb_t) 1);
! 102: cc -= mpn_add_n (np + 1, np + 1, dp, n);
! 103: }
1.1 maekawa 104: qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
1.1.1.2 ! maekawa 105: mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
1.1 maekawa 106: }
107: else
108: {
109: mp_size_t n2 = n/2;
1.1.1.2 ! maekawa 110: qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2);
1.1 maekawa 111: qhl += mpn_add_1 (qp + n2, qp + n2, n2,
1.1.1.2 ! maekawa 112: mpn_bz_div_3_halves_by_2 (qp, np, dp, n2));
1.1 maekawa 113: }
114: return qhl;
115: }
116:
1.1.1.2 ! maekawa 117:
1.1 maekawa 118: /* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
119: the remainder in (np, 2n) */
120:
121: static mp_limb_t
122: #if __STDC__
1.1.1.2 ! maekawa 123: mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
1.1 maekawa 124: #else
1.1.1.2 ! maekawa 125: mpn_bz_div_3_halves_by_2 (qp, np, dp, n)
1.1 maekawa 126: mp_ptr qp;
127: mp_ptr np;
128: mp_srcptr dp;
129: mp_size_t n;
130: #endif
131: {
1.1.1.2 ! maekawa 132: mp_size_t twon = n + n;
! 133: mp_limb_t qhl, cc;
! 134: mp_ptr tmp;
! 135: TMP_DECL (marker);
1.1 maekawa 136:
1.1.1.2 ! maekawa 137: TMP_MARK (marker);
1.1 maekawa 138: if (n < BZ_THRESHOLD)
139: qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
140: else
1.1.1.2 ! maekawa 141: qhl = mpn_bz_divrem_n (qp, np + n, dp + n, n);
! 142: tmp = (mp_ptr) TMP_ALLOC (twon * BYTES_PER_MP_LIMB);
1.1 maekawa 143: mpn_mul_n (tmp, qp, dp, n);
1.1.1.2 ! maekawa 144: cc = mpn_sub_n (np, np, tmp, twon);
! 145: TMP_FREE (marker);
! 146: if (qhl) cc += mpn_sub_n (np + n, np + n, dp, n);
! 147: while (cc)
1.1 maekawa 148: {
1.1.1.2 ! maekawa 149: qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1);
! 150: cc -= mpn_add_n (np, np, dp, twon);
1.1 maekawa 151: }
152: return qhl;
153: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>