Annotation of OpenXM_contrib/gmp/mpn/generic/bz_divrem_n.c, Revision 1.1.1.1
1.1 maekawa 1: /* mpn_bz_divrem_n and auxilliary routines.
2:
3: THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
4: INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
5: IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
6: FUTURE GNU MP RELEASE.
7:
8:
9: Copyright (C) 2000 Free Software Foundation, Inc.
10: Contributed by Paul Zimmermann.
11:
12: This file is part of the GNU MP Library.
13:
14: The GNU MP Library is free software; you can redistribute it and/or modify
15: it under the terms of the GNU Lesser General Public License as published by
16: the Free Software Foundation; either version 2.1 of the License, or (at your
17: option) any later version.
18:
19: The GNU MP Library is distributed in the hope that it will be useful, but
20: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22: License for more details.
23:
24: You should have received a copy of the GNU Lesser General Public License
25: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
26: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27: MA 02111-1307, USA. */
28:
29: #include "gmp.h"
30: #include "gmp-impl.h"
31:
32: /*
33: [1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
34: Technical report MPI-I-98-1-022, october 1998.
35: http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
36: */
37:
38: static mp_limb_t mpn_bz_div_3_halves_by_2 _PROTO ((mp_ptr, mp_ptr, mp_srcptr,
39: mp_size_t, mp_ptr));
40:
41: static mp_limb_t mpn_bz_divrem_aux _PROTO ((mp_ptr, mp_ptr, mp_srcptr,
42: mp_size_t, mp_ptr));
43:
44: /* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
45: div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
46: i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
47: #ifndef BZ_THRESHOLD
48: #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
49: #endif
50:
51: #if 0
52: static
53: unused_mpn_divrem (qp, qxn, np, nn, dp, dn)
54: mp_ptr qp;
55: mp_size_t qxn;
56: mp_ptr np;
57: mp_size_t nn;
58: mp_srcptr dp;
59: mp_size_t dn;
60: {
61: /* This might be useful: */
62: if (qxn != 0)
63: {
64: mp_limb_t c;
65: mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB);
66: MPN_COPY (tp + qxn - nn, np, nn);
67: MPN_ZERO (tp, qxn);
68: c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn);
69: /* Maybe copy proper part of tp to np? Documentation is unclear about
70: the returned np value when qxn != 0 */
71: return c;
72: }
73: }
74: #endif
75:
76: /* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
77: by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
78: Returns most significant limb of the quotient, which is 0 or 1.
79: Requires that the most significant bit of the divisor is set. */
80:
81: mp_limb_t
82: #if __STDC__
83: mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
84: #else
85: mpn_bz_divrem_n (qp, np, dp, n)
86: mp_ptr qp;
87: mp_ptr np;
88: mp_srcptr dp;
89: mp_size_t n;
90: #endif
91: {
92: mp_limb_t qhl = 0;
93: if (mpn_cmp (np + n, dp, n) >= 0)
94: {
95: qhl = 1;
96: mpn_sub_n (np + n, np + n, dp, n);
97: abort ();
98: }
99: if (n % 2 != 0)
100: {
101: /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
102: if (n < BZ_THRESHOLD)
103: qhl += mpn_sb_divrem_mn (qp + 1, np + 2, 2 * (n - 1), dp + 1, n - 1);
104: else
105: qhl += mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
106: /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
107: (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
108: if (mpn_sub_1 (np + n, np + n, 1,
109: mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
110: {
111: /* quotient too large */
112: qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
113: if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
114: { /* still too large */
115: qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
116: mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
117: }
118: }
119: /* now divide (np, n + 1) by (dp, n) */
120: qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
121: mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
122: }
123: else
124: {
125: mp_ptr tmp;
126: mp_size_t n2 = n/2;
127: TMP_DECL (marker);
128: TMP_MARK (marker);
129: tmp = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
130: qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
131: qhl += mpn_add_1 (qp + n2, qp + n2, n2,
132: mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
133: TMP_FREE (marker);
134: }
135: return qhl;
136: }
137:
138: /* Like mpn_bz_divrem_n, but without memory allocation. Also
139: assumes mpn_cmp (np + n, dp, n) < 0 */
140:
141: static mp_limb_t
142: #if __STDC__
143: mpn_bz_divrem_aux (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr tmp)
144: #else
145: mpn_bz_divrem_aux (qp, np, dp, n, tmp)
146: mp_ptr qp;
147: mp_ptr np;
148: mp_srcptr dp;
149: mp_size_t n;
150: mp_ptr tmp;
151: #endif
152: {
153: mp_limb_t qhl;
154:
155: if (n % 2 != 0)
156: {
157: /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
158: qhl = mpn_bz_divrem_aux (qp + 1, np + 2, dp + 1, n - 1, tmp);
159: /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
160: (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
161: if (mpn_sub_1 (np + n, np + n, 1,
162: mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
163: {
164: /* quotient too large */
165: qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
166: if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
167: { /* still too large */
168: qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
169: mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
170: }
171: }
172: /* now divide (np, n + 1) by (dp, n) */
173: qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
174: mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
175: }
176: else
177: {
178: mp_size_t n2 = n/2;
179: qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
180: qhl += mpn_add_1 (qp + n2, qp + n2, n2,
181: mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
182: }
183: return qhl;
184: }
185:
186: /* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
187: the remainder in (np, 2n) */
188:
189: static mp_limb_t
190: #if __STDC__
191: mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
192: mp_ptr tmp)
193: #else
194: mpn_bz_div_3_halves_by_2 (qp, np, dp, n, tmp)
195: mp_ptr qp;
196: mp_ptr np;
197: mp_srcptr dp;
198: mp_size_t n;
199: mp_ptr tmp;
200: #endif
201: {
202: mp_size_t twon = n + n;
203: mp_limb_t qhl;
204:
205: if (n < BZ_THRESHOLD)
206: qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
207: else
208: qhl = mpn_bz_divrem_aux (qp, np + n, dp + n, n, tmp);
209: /* q = (qp, n), c = (np + n, n) with the notations of [1] */
210: mpn_mul_n (tmp, qp, dp, n);
211: if (qhl != 0)
212: mpn_add_n (tmp + n, tmp + n, dp, n);
213: if (mpn_sub_n (np, np, tmp, twon)) /* R = (np, 2n) */
214: {
215: qhl -= mpn_sub_1 (qp, qp, n, 1L);
216: if (mpn_add_n (np, np, dp, twon) == 0)
217: { /* qp still too large */
218: qhl -= mpn_sub_1 (qp, qp, n, 1L);
219: mpn_add_n (np, np, dp, twon); /* always carry here */
220: }
221: }
222: return qhl;
223: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>