Annotation of OpenXM_contrib/gmp/mpfr/exp_2.c, Revision 1.1.1.1
1.1 ohara 1: /* mpfr_exp_2 -- exponential of a floating-point number
2: using Brent's algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
3:
4: Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
5:
6: This file is part of the MPFR Library.
7:
8: The MPFR Library is free software; you can redistribute it and/or modify
9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
11: option) any later version.
12:
13: The MPFR Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: License for more details.
17:
18: You should have received a copy of the GNU Lesser General Public License
19: along with the MPFR Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA. */
22:
23: #include <stdio.h>
24: #include "gmp.h"
25: #include "gmp-impl.h"
26: #include "mpfr.h"
27: #include "mpfr-impl.h"
28:
29: static int mpfr_exp2_aux _PROTO ((mpz_t, mpfr_srcptr, int, int*));
30: static int mpfr_exp2_aux2 _PROTO ((mpz_t, mpfr_srcptr, int, int*));
31: static mp_exp_t mpz_normalize _PROTO ((mpz_t, mpz_t, int));
32: static int mpz_normalize2 _PROTO ((mpz_t, mpz_t, int, int));
33:
34: /* returns floor(sqrt(n)) */
35: unsigned long
36: _mpfr_isqrt (unsigned long n)
37: {
38: unsigned long s;
39:
40: s = 1;
41: do {
42: s = (s + n / s) / 2;
43: } while (!(s*s <= n && n <= s*(s+2)));
44: return s;
45: }
46:
47: /* returns floor(n^(1/3)) */
48: unsigned long
49: _mpfr_cuberoot (unsigned long n)
50: {
51: double s, is;
52:
53: s = 1.0;
54: do {
55: s = (2*s*s*s + (double) n) / (3*s*s);
56: is = (double) ((int) s);
57: } while (!(is*is*is <= (double) n && (double) n < (is+1)*(is+1)*(is+1)));
58: return (unsigned long) is;
59: }
60:
61: #define SWITCH 100 /* number of bits to switch from O(n^(1/2)*M(n)) method
62: to O(n^(1/3)*M(n)) method */
63:
64: #define MY_INIT_MPZ(x, s) { \
65: (x)->_mp_alloc = (s); \
66: PTR(x) = (mp_ptr) TMP_ALLOC((s)*BYTES_PER_MP_LIMB); \
67: (x)->_mp_size = 0; }
68:
69: /* #define DEBUG */
70:
71: /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
72: Otherwise do nothing and return 0.
73: */
74: static mp_exp_t
75: mpz_normalize (mpz_t rop, mpz_t z, int q)
76: {
77: int k;
78:
79: k = mpz_sizeinbase(z, 2);
80: if (k > q) {
81: mpz_div_2exp(rop, z, k-q);
82: return k-q;
83: }
84: else {
85: if (rop != z) mpz_set(rop, z);
86: return 0;
87: }
88: }
89:
90: /* if expz > target, shift z by (expz-target) bits to the left.
91: if expz < target, shift z by (target-expz) bits to the right.
92: Returns target.
93: */
94: static int
95: mpz_normalize2 (mpz_t rop, mpz_t z, int expz, int target)
96: {
97: if (target > expz) mpz_div_2exp(rop, z, target-expz);
98: else mpz_mul_2exp(rop, z, expz-target);
99: return target;
100: }
101:
102: /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
103: where x = n*log(2)+(2^K)*r
104: together with Brent-Kung O(t^(1/2)) algorithm for the evaluation of
105: power series. The resulting complexity is O(n^(1/3)*M(n)).
106: */
107: int
108: mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
109: {
110: int n, K, precy, q, k, l, err, exps, inexact;
111: mpfr_t r, s, t; mpz_t ss;
112: TMP_DECL(marker);
113:
114: precy = MPFR_PREC(y);
115:
116: n = (int) (mpfr_get_d1 (x) / LOG2);
117:
118: /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
119: n/K terms costs about n/(2K) multiplications when computed in fixed
120: point */
121: K = (precy<SWITCH) ? _mpfr_isqrt((precy + 1) / 2) : _mpfr_cuberoot (4*precy);
122: l = (precy-1)/K + 1;
123: err = K + (int) _mpfr_ceil_log2 (2.0 * (double) l + 18.0);
124: /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
125: q = precy + err + K + 3;
126: mpfr_init2 (r, q);
127: mpfr_init2 (s, q);
128: mpfr_init2 (t, q);
129: /* the algorithm consists in computing an upper bound of exp(x) using
130: a precision of q bits, and see if we can round to MPFR_PREC(y) taking
131: into account the maximal error. Otherwise we increase q. */
132: do {
133: #ifdef DEBUG
134: printf("n=%d K=%d l=%d q=%d\n",n,K,l,q);
135: #endif
136:
137: /* if n<0, we have to get an upper bound of log(2)
138: in order to get an upper bound of r = x-n*log(2) */
139: mpfr_const_log2 (s, (n>=0) ? GMP_RNDZ : GMP_RNDU);
140: #ifdef DEBUG
141: printf("n=%d log(2)=",n); mpfr_print_binary(s); putchar('\n');
142: #endif
143: mpfr_mul_ui (r, s, (n<0) ? -n : n, (n>=0) ? GMP_RNDZ : GMP_RNDU);
144: if (n<0) mpfr_neg(r, r, GMP_RNDD);
145: /* r = floor(n*log(2)) */
146:
147: #ifdef DEBUG
148: printf("x=%1.20e\n", mpfr_get_d1 (x));
149: printf(" ="); mpfr_print_binary(x); putchar('\n');
150: printf("r=%1.20e\n", mpfr_get_d1 (r));
151: printf(" ="); mpfr_print_binary(r); putchar('\n');
152: #endif
153: mpfr_sub(r, x, r, GMP_RNDU);
154: if (MPFR_SIGN(r)<0) { /* initial approximation n was too large */
155: n--;
156: mpfr_mul_ui(r, s, (n<0) ? -n : n, GMP_RNDZ);
157: if (n<0) mpfr_neg(r, r, GMP_RNDD);
158: mpfr_sub(r, x, r, GMP_RNDU);
159: }
160: #ifdef DEBUG
161: printf("x-r=%1.20e\n", mpfr_get_d1 (r));
162: printf(" ="); mpfr_print_binary(r); putchar('\n');
163: if (MPFR_SIGN(r)<0) { fprintf(stderr,"Error in mpfr_exp: r<0\n"); exit(1); }
164: #endif
165: mpfr_div_2ui(r, r, K, GMP_RNDU); /* r = (x-n*log(2))/2^K */
166:
167: TMP_MARK(marker);
168: MY_INIT_MPZ(ss, 3 + 2*((q-1)/BITS_PER_MP_LIMB));
169: exps = mpfr_get_z_exp(ss, s);
170: /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
171: l = (precy<SWITCH) ? mpfr_exp2_aux(ss, r, q, &exps) /* naive method */
172: : mpfr_exp2_aux2(ss, r, q, &exps); /* Brent/Kung method */
173:
174: #ifdef DEBUG
175: printf("l=%d q=%d (K+l)*q^2=%1.3e\n", l, q, (K+l)*(double)q*q);
176: #endif
177:
178: for (k=0;k<K;k++) {
179: mpz_mul(ss, ss, ss); exps <<= 1;
180: exps += mpz_normalize(ss, ss, q);
181: }
182: mpfr_set_z(s, ss, GMP_RNDN); MPFR_EXP(s) += exps;
183: TMP_FREE(marker); /* don't need ss anymore */
184:
185: if (n>0) mpfr_mul_2ui(s, s, n, GMP_RNDU);
186: else mpfr_div_2ui(s, s, -n, GMP_RNDU);
187:
188: /* error is at most 2^K*(3l*(l+1)) ulp for mpfr_exp2_aux */
189: if (precy<SWITCH) l = 3*l*(l+1);
190: else l = l*(l+4);
191: k=0; while (l) { k++; l >>= 1; }
192: /* now k = ceil(log(error in ulps)/log(2)) */
193: K += k;
194: #ifdef DEBUG
195: printf("after mult. by 2^n:\n");
196: if (MPFR_EXP(s) > -1024)
197: printf("s=%1.20e\n", mpfr_get_d1 (s));
198: printf(" ="); mpfr_print_binary(s); putchar('\n');
199: printf("err=%d bits\n", K);
200: #endif
201:
202: l = mpfr_can_round(s, q-K, GMP_RNDN, rnd_mode, precy);
203: if (l==0) {
204: #ifdef DEBUG
205: printf("not enough precision, use %d\n", q+BITS_PER_MP_LIMB);
206: printf("q=%d q-K=%d precy=%d\n",q,q-K,precy);
207: #endif
208: q += BITS_PER_MP_LIMB;
209: mpfr_set_prec(r, q); mpfr_set_prec(s, q); mpfr_set_prec(t, q);
210: }
211: } while (l==0);
212:
213: inexact = mpfr_set (y, s, rnd_mode);
214:
215: mpfr_clear(r); mpfr_clear(s); mpfr_clear(t);
216: return inexact;
217: }
218:
219: /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
220: using naive method with O(l) multiplications.
221: Return the number of iterations l.
222: The absolute error on s is less than 3*l*(l+1)*2^(-q).
223: Version using fixed-point arithmetic with mpz instead
224: of mpfr for internal computations.
225: s must have at least qn+1 limbs (qn should be enough, but currently fails
226: since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
227: */
228: static int
229: mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, int q, int *exps)
230: {
231: int l, dif, qn;
232: mpz_t t, rr; mp_exp_t expt, expr;
233: TMP_DECL(marker);
234:
235: TMP_MARK(marker);
236: qn = 1 + (q-1)/BITS_PER_MP_LIMB;
237: MY_INIT_MPZ(t, 2*qn+1); /* 2*qn+1 is neeeded since mpz_div_2exp may
238: need an extra limb */
239: MY_INIT_MPZ(rr, qn+1);
240: mpz_set_ui(t, 1); expt=0;
241: mpz_set_ui(s, 1); mpz_mul_2exp(s, s, q-1); *exps = 1-q; /* s = 2^(q-1) */
242: expr = mpfr_get_z_exp(rr, r); /* no error here */
243:
244: l = 0;
245: do {
246: l++;
247: mpz_mul(t, t, rr); expt=expt+expr;
248: dif = *exps + mpz_sizeinbase(s, 2) - expt - mpz_sizeinbase(t, 2);
249: /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
250: expt += mpz_normalize(t, t, q-dif); /* error at most 2^(1-q) */
251: mpz_div_ui(t, t, l); /* error at most 2^(1-q) */
252: /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
253: #ifdef DEBUG
254: if (expt != *exps) {
255: fprintf(stderr, "Error: expt != exps %d %d\n", expt, *exps); exit(1);
256: }
257: #endif
258: mpz_add(s, s, t); /* no error here: exact */
259: /* ensures rr has the same size as t: after several shifts, the error
260: on rr is still at most ulp(t)=ulp(s) */
261: expr += mpz_normalize(rr, rr, mpz_sizeinbase(t, 2));
262: } while (mpz_cmp_ui(t, 0));
263:
264: TMP_FREE(marker);
265: return l;
266: }
267:
268: /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
269: using Brent/Kung method with O(sqrt(l)) multiplications.
270: Return l.
271: Uses m multiplications of full size and 2l/m of decreasing size,
272: i.e. a total equivalent to about m+l/m full multiplications,
273: i.e. 2*sqrt(l) for m=sqrt(l).
274: Version using mpz. ss must have at least (sizer+1) limbs.
275: The error is bounded by (l^2+4*l) ulps where l is the return value.
276: */
277: static int
278: mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, int q, int *exps)
279: {
280: int expr, l, m, i, sizer, *expR, expt, ql;
281: unsigned long int c;
282: mpz_t t, *R, rr, tmp;
283: TMP_DECL(marker);
284:
285: /* estimate value of l */
286: l = q / (-MPFR_EXP(r));
287: m = (int) _mpfr_isqrt (l);
288: /* we access R[2], thus we need m >= 2 */
289: if (m < 2) m = 2;
290: TMP_MARK(marker);
291: R = (mpz_t*) TMP_ALLOC((m+1)*sizeof(mpz_t)); /* R[i] stands for r^i */
292: expR = (int*) TMP_ALLOC((m+1)*sizeof(int)); /* exponent for R[i] */
293: sizer = 1 + (MPFR_PREC(r)-1)/BITS_PER_MP_LIMB;
294: mpz_init(tmp);
295: MY_INIT_MPZ(rr, sizer+2);
296: MY_INIT_MPZ(t, 2*sizer); /* double size for products */
297: mpz_set_ui(s, 0); *exps = 1-q; /* initialize s to zero, 1 ulp = 2^(1-q) */
298: for (i=0;i<=m;i++) MY_INIT_MPZ(R[i], sizer+2);
299: expR[1] = mpfr_get_z_exp(R[1], r); /* exact operation: no error */
300: expR[1] = mpz_normalize2(R[1], R[1], expR[1], 1-q); /* error <= 1 ulp */
301: mpz_mul(t, R[1], R[1]); /* err(t) <= 2 ulps */
302: mpz_div_2exp(R[2], t, q-1); /* err(R[2]) <= 3 ulps */
303: expR[2] = 1-q;
304: for (i=3;i<=m;i++) {
305: mpz_mul(t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
306: mpz_div_2exp(R[i], t, q-1); /* err(R[i]) <= 2*i-1 ulps */
307: expR[i] = 1-q;
308: }
309: mpz_set_ui(R[0], 1); mpz_mul_2exp(R[0], R[0], q-1); expR[0]=1-q; /* R[0]=1 */
310: mpz_set_ui(rr, 1); expr=0; /* rr contains r^l/l! */
311: /* by induction: err(rr) <= 2*l ulps */
312:
313: l = 0;
314: ql = q; /* precision used for current giant step */
315: do {
316: /* all R[i] must have exponent 1-ql */
317: if (l) for (i=0;i<m;i++) {
318: expR[i] = mpz_normalize2(R[i], R[i], expR[i], 1-ql);
319: }
320: /* the absolute error on R[i]*rr is still 2*i-1 ulps */
321: expt = mpz_normalize2(t, R[m-1], expR[m-1], 1-ql);
322: /* err(t) <= 2*m-1 ulps */
323: /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
324: using Horner's scheme */
325: for (i=m-2;i>=0;i--) {
326: mpz_div_ui(t, t, l+i+1); /* err(t) += 1 ulp */
327: mpz_add(t, t, R[i]);
328: }
329: /* now err(t) <= (3m-2) ulps */
330:
331: /* now multiplies t by r^l/l! and adds to s */
332: mpz_mul(t, t, rr); expt += expr;
333: expt = mpz_normalize2(t, t, expt, *exps);
334: /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
335: #ifdef DEBUG
336: if (expt != *exps) {
337: fprintf(stderr, "Error: expt != exps %d %d\n", expt, *exps); exit(1);
338: }
339: #endif
340: mpz_add(s, s, t); /* no error here */
341:
342: /* updates rr, the multiplication of the factors l+i could be done
343: using binary splitting too, but it is not sure it would save much */
344: mpz_mul(t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
345: expr += expR[m];
346: mpz_set_ui (tmp, 1);
347: for (i=1, c=1; i<=m; i++) {
348: if (l+i > ~((unsigned long int) 0)/c) {
349: mpz_mul_ui(tmp, tmp, c);
350: c = l+i;
351: }
352: else c *= (unsigned long int) l+i;
353: }
354: if (c != 1) mpz_mul_ui (tmp, tmp, c); /* tmp is exact */
355: mpz_fdiv_q(t, t, tmp); /* err(t) <= err(rr) + 2m */
356: expr += mpz_normalize(rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
357: ql = q - *exps - mpz_sizeinbase(s, 2) + expr + mpz_sizeinbase(rr, 2);
358: l+=m;
359: } while (expr+mpz_sizeinbase(rr, 2) > -q);
360:
361: TMP_FREE(marker);
362: mpz_clear(tmp);
363: return l;
364: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>