Annotation of OpenXM_contrib/gmp/mpn/generic/rootrem.c, Revision 1.1.1.1
1.1 ohara 1: /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
2: store the truncated integer part at rootp and the remainder at remp.
3:
4: THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
5: INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
6: IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
7: FUTURE GNU MP RELEASE.
8:
9:
10: Copyright 2002 Free Software Foundation, Inc.
11:
12: This file is part of the GNU MP Library.
13:
14: The GNU MP Library is free software; you can redistribute it and/or modify
15: it under the terms of the GNU Lesser General Public License as published by
16: the Free Software Foundation; either version 2.1 of the License, or (at your
17: option) any later version.
18:
19: The GNU MP Library is distributed in the hope that it will be useful, but
20: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22: License for more details.
23:
24: You should have received a copy of the GNU Lesser General Public License
25: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
26: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27: MA 02111-1307, USA. */
28:
29: /*
30: We use Newton's method to compute the root of a:
31:
32: n
33: f(x) := x - a
34:
35:
36: n - 1
37: f'(x) := x n
38:
39:
40: n-1 n-1 n-1
41: x - a/x a/x - x a/x + (n-1)x
42: new x = x - f(x)/f'(x) = x - ---------- = x + --------- = --------------
43: n n n
44:
45: */
46:
47:
48: #include <stdio.h>
49: #include <stdlib.h>
50:
51: #include "gmp.h"
52: #include "gmp-impl.h"
53: #include "longlong.h"
54:
55: mp_size_t
56: mpn_rootrem (mp_ptr rootp, mp_ptr remp,
57: mp_srcptr up, mp_size_t un, mp_limb_t nth)
58: {
59: mp_ptr pp, qp, xp;
60: mp_size_t pn, xn, qn;
61: unsigned long int unb, xnb, bit;
62: unsigned int cnt;
63: mp_size_t i;
64: unsigned long int n_valid_bits, adj;
65: TMP_DECL (marker);
66:
67: TMP_MARK (marker);
68:
69: /* The extra factor 1.585 = log(3)/log(2) here is for the worst case
70: overestimate of the root, i.e., when the code rounds a root that is
71: 2+epsilon to 3, and the powers this to a potentially huge power. We
72: could generalize the code for detecting root=1 a few lines below to deal
73: with xnb <= k, for some small k. For example, when xnb <= 2, meaning
74: the root should be 1, 2, or 3, we could replace this factor by the much
75: smaller log(5)/log(4). */
76:
77: #define PP_ALLOC (2 + (mp_size_t) (un*1.585))
78: pp = TMP_ALLOC_LIMBS (PP_ALLOC);
79:
80: count_leading_zeros (cnt, up[un - 1]);
81: unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
82:
83: xnb = (unb - 1) / nth + 1;
84: if (xnb == 1)
85: {
86: if (remp == NULL)
87: remp = pp;
88: mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
89: MPN_NORMALIZE (remp, un);
90: rootp[0] = 1;
91: TMP_FREE (marker);
92: return un;
93: }
94:
95: xn = (xnb + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
96:
97: qp = TMP_ALLOC_LIMBS (un + 1);
98: xp = TMP_ALLOC_LIMBS (xn + 1);
99:
100: /* Set initial root to only ones. This is an overestimate of the actual root
101: by less than a factor of 2. */
102: for (i = 0; i < xn; i++)
103: xp[i] = GMP_NUMB_MAX;
104: xp[xnb / GMP_NUMB_BITS] = ((mp_limb_t) 1 << (xnb % GMP_NUMB_BITS)) - 1;
105:
106: /* Improve the initial approximation, one bit at a time. Keep the
107: approximations >= root(U,nth). */
108: bit = xnb - 2;
109: n_valid_bits = 0;
110: for (i = 0; (nth >> i) != 0; i++)
111: {
112: mp_limb_t xl = xp[bit / GMP_NUMB_BITS];
113: xp[bit / GMP_NUMB_BITS] = xl ^ (mp_limb_t) 1 << bit % GMP_NUMB_BITS;
114: pn = mpn_pow_1 (pp, xp, xn, nth, qp);
115: ASSERT_ALWAYS (pn < PP_ALLOC);
116: /* If the new root approximation is too small, restore old value. */
117: if (! (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0)))
118: xp[bit / GMP_NUMB_BITS] = xl; /* restore old value */
119: n_valid_bits += 1;
120: if (bit == 0)
121: goto done;
122: bit--;
123: }
124:
125: adj = n_valid_bits - 1;
126:
127: /* Newton loop. Converges downwards towards root(U,nth). Currently we use
128: full precision from iteration 1. Clearly, we should use just n_valid_bits
129: of precision in each step, and thus save most of the computations. */
130: while (n_valid_bits <= xnb)
131: {
132: mp_limb_t cy;
133:
134: pn = mpn_pow_1 (pp, xp, xn, nth - 1, qp);
135: ASSERT_ALWAYS (pn < PP_ALLOC);
136: qp[xn - 1] = 0; /* pad quotient to make it always xn limbs */
137: mpn_tdiv_qr (qp, pp, (mp_size_t) 0, up, un, pp, pn); /* junk remainder */
138: cy = mpn_addmul_1 (qp, xp, xn, nth - 1);
139: if (un - pn == xn)
140: {
141: cy += qp[xn];
142: if (cy == nth)
143: {
144: for (i = xn - 1; i >= 0; i--)
145: qp[i] = GMP_NUMB_MAX;
146: cy = nth - 1;
147: }
148: }
149:
150: qp[xn] = cy;
151: qn = xn + (cy != 0);
152:
153: mpn_divrem_1 (xp, (mp_size_t) 0, qp, qn, nth);
154: n_valid_bits = n_valid_bits * 2 - adj;
155: }
156:
157: /* The computed result might be one unit too large. Adjust as necessary. */
158: done:
159: pn = mpn_pow_1 (pp, xp, xn, nth, qp);
160: ASSERT_ALWAYS (pn < PP_ALLOC);
161: if (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0))
162: {
163: mpn_decr_u (xp, 1);
164: pn = mpn_pow_1 (pp, xp, xn, nth, qp);
165: ASSERT_ALWAYS (pn < PP_ALLOC);
166:
167: ASSERT_ALWAYS (! (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0)));
168: }
169:
170: if (remp == NULL)
171: remp = pp;
172: mpn_sub (remp, up, un, pp, pn);
173: MPN_NORMALIZE (remp, un);
174: MPN_COPY (rootp, xp, xn);
175: TMP_FREE (marker);
176: return un;
177: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>