Annotation of OpenXM_contrib/gmp/mpfr/exp_2.c, Revision 1.1
1.1 ! ohara 1: /* mpfr_exp_2 -- exponential of a floating-point number
! 2: using Brent's algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
! 3:
! 4: Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
! 5:
! 6: This file is part of the MPFR Library.
! 7:
! 8: The MPFR Library is free software; you can redistribute it and/or modify
! 9: it under the terms of the GNU Lesser General Public License as published by
! 10: the Free Software Foundation; either version 2.1 of the License, or (at your
! 11: option) any later version.
! 12:
! 13: The MPFR Library is distributed in the hope that it will be useful, but
! 14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: License for more details.
! 17:
! 18: You should have received a copy of the GNU Lesser General Public License
! 19: along with the MPFR Library; see the file COPYING.LIB. If not, write to
! 20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: MA 02111-1307, USA. */
! 22:
! 23: #include <stdio.h>
! 24: #include "gmp.h"
! 25: #include "gmp-impl.h"
! 26: #include "mpfr.h"
! 27: #include "mpfr-impl.h"
! 28:
! 29: static int mpfr_exp2_aux _PROTO ((mpz_t, mpfr_srcptr, int, int*));
! 30: static int mpfr_exp2_aux2 _PROTO ((mpz_t, mpfr_srcptr, int, int*));
! 31: static mp_exp_t mpz_normalize _PROTO ((mpz_t, mpz_t, int));
! 32: static int mpz_normalize2 _PROTO ((mpz_t, mpz_t, int, int));
! 33:
! 34: /* returns floor(sqrt(n)) */
! 35: unsigned long
! 36: _mpfr_isqrt (unsigned long n)
! 37: {
! 38: unsigned long s;
! 39:
! 40: s = 1;
! 41: do {
! 42: s = (s + n / s) / 2;
! 43: } while (!(s*s <= n && n <= s*(s+2)));
! 44: return s;
! 45: }
! 46:
! 47: /* returns floor(n^(1/3)) */
! 48: unsigned long
! 49: _mpfr_cuberoot (unsigned long n)
! 50: {
! 51: double s, is;
! 52:
! 53: s = 1.0;
! 54: do {
! 55: s = (2*s*s*s + (double) n) / (3*s*s);
! 56: is = (double) ((int) s);
! 57: } while (!(is*is*is <= (double) n && (double) n < (is+1)*(is+1)*(is+1)));
! 58: return (unsigned long) is;
! 59: }
! 60:
! 61: #define SWITCH 100 /* number of bits to switch from O(n^(1/2)*M(n)) method
! 62: to O(n^(1/3)*M(n)) method */
! 63:
! 64: #define MY_INIT_MPZ(x, s) { \
! 65: (x)->_mp_alloc = (s); \
! 66: PTR(x) = (mp_ptr) TMP_ALLOC((s)*BYTES_PER_MP_LIMB); \
! 67: (x)->_mp_size = 0; }
! 68:
! 69: /* #define DEBUG */
! 70:
! 71: /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
! 72: Otherwise do nothing and return 0.
! 73: */
! 74: static mp_exp_t
! 75: mpz_normalize (mpz_t rop, mpz_t z, int q)
! 76: {
! 77: int k;
! 78:
! 79: k = mpz_sizeinbase(z, 2);
! 80: if (k > q) {
! 81: mpz_div_2exp(rop, z, k-q);
! 82: return k-q;
! 83: }
! 84: else {
! 85: if (rop != z) mpz_set(rop, z);
! 86: return 0;
! 87: }
! 88: }
! 89:
! 90: /* if expz > target, shift z by (expz-target) bits to the left.
! 91: if expz < target, shift z by (target-expz) bits to the right.
! 92: Returns target.
! 93: */
! 94: static int
! 95: mpz_normalize2 (mpz_t rop, mpz_t z, int expz, int target)
! 96: {
! 97: if (target > expz) mpz_div_2exp(rop, z, target-expz);
! 98: else mpz_mul_2exp(rop, z, expz-target);
! 99: return target;
! 100: }
! 101:
! 102: /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
! 103: where x = n*log(2)+(2^K)*r
! 104: together with Brent-Kung O(t^(1/2)) algorithm for the evaluation of
! 105: power series. The resulting complexity is O(n^(1/3)*M(n)).
! 106: */
! 107: int
! 108: mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
! 109: {
! 110: int n, K, precy, q, k, l, err, exps, inexact;
! 111: mpfr_t r, s, t; mpz_t ss;
! 112: TMP_DECL(marker);
! 113:
! 114: precy = MPFR_PREC(y);
! 115:
! 116: n = (int) (mpfr_get_d1 (x) / LOG2);
! 117:
! 118: /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
! 119: n/K terms costs about n/(2K) multiplications when computed in fixed
! 120: point */
! 121: K = (precy<SWITCH) ? _mpfr_isqrt((precy + 1) / 2) : _mpfr_cuberoot (4*precy);
! 122: l = (precy-1)/K + 1;
! 123: err = K + (int) _mpfr_ceil_log2 (2.0 * (double) l + 18.0);
! 124: /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
! 125: q = precy + err + K + 3;
! 126: mpfr_init2 (r, q);
! 127: mpfr_init2 (s, q);
! 128: mpfr_init2 (t, q);
! 129: /* the algorithm consists in computing an upper bound of exp(x) using
! 130: a precision of q bits, and see if we can round to MPFR_PREC(y) taking
! 131: into account the maximal error. Otherwise we increase q. */
! 132: do {
! 133: #ifdef DEBUG
! 134: printf("n=%d K=%d l=%d q=%d\n",n,K,l,q);
! 135: #endif
! 136:
! 137: /* if n<0, we have to get an upper bound of log(2)
! 138: in order to get an upper bound of r = x-n*log(2) */
! 139: mpfr_const_log2 (s, (n>=0) ? GMP_RNDZ : GMP_RNDU);
! 140: #ifdef DEBUG
! 141: printf("n=%d log(2)=",n); mpfr_print_binary(s); putchar('\n');
! 142: #endif
! 143: mpfr_mul_ui (r, s, (n<0) ? -n : n, (n>=0) ? GMP_RNDZ : GMP_RNDU);
! 144: if (n<0) mpfr_neg(r, r, GMP_RNDD);
! 145: /* r = floor(n*log(2)) */
! 146:
! 147: #ifdef DEBUG
! 148: printf("x=%1.20e\n", mpfr_get_d1 (x));
! 149: printf(" ="); mpfr_print_binary(x); putchar('\n');
! 150: printf("r=%1.20e\n", mpfr_get_d1 (r));
! 151: printf(" ="); mpfr_print_binary(r); putchar('\n');
! 152: #endif
! 153: mpfr_sub(r, x, r, GMP_RNDU);
! 154: if (MPFR_SIGN(r)<0) { /* initial approximation n was too large */
! 155: n--;
! 156: mpfr_mul_ui(r, s, (n<0) ? -n : n, GMP_RNDZ);
! 157: if (n<0) mpfr_neg(r, r, GMP_RNDD);
! 158: mpfr_sub(r, x, r, GMP_RNDU);
! 159: }
! 160: #ifdef DEBUG
! 161: printf("x-r=%1.20e\n", mpfr_get_d1 (r));
! 162: printf(" ="); mpfr_print_binary(r); putchar('\n');
! 163: if (MPFR_SIGN(r)<0) { fprintf(stderr,"Error in mpfr_exp: r<0\n"); exit(1); }
! 164: #endif
! 165: mpfr_div_2ui(r, r, K, GMP_RNDU); /* r = (x-n*log(2))/2^K */
! 166:
! 167: TMP_MARK(marker);
! 168: MY_INIT_MPZ(ss, 3 + 2*((q-1)/BITS_PER_MP_LIMB));
! 169: exps = mpfr_get_z_exp(ss, s);
! 170: /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
! 171: l = (precy<SWITCH) ? mpfr_exp2_aux(ss, r, q, &exps) /* naive method */
! 172: : mpfr_exp2_aux2(ss, r, q, &exps); /* Brent/Kung method */
! 173:
! 174: #ifdef DEBUG
! 175: printf("l=%d q=%d (K+l)*q^2=%1.3e\n", l, q, (K+l)*(double)q*q);
! 176: #endif
! 177:
! 178: for (k=0;k<K;k++) {
! 179: mpz_mul(ss, ss, ss); exps <<= 1;
! 180: exps += mpz_normalize(ss, ss, q);
! 181: }
! 182: mpfr_set_z(s, ss, GMP_RNDN); MPFR_EXP(s) += exps;
! 183: TMP_FREE(marker); /* don't need ss anymore */
! 184:
! 185: if (n>0) mpfr_mul_2ui(s, s, n, GMP_RNDU);
! 186: else mpfr_div_2ui(s, s, -n, GMP_RNDU);
! 187:
! 188: /* error is at most 2^K*(3l*(l+1)) ulp for mpfr_exp2_aux */
! 189: if (precy<SWITCH) l = 3*l*(l+1);
! 190: else l = l*(l+4);
! 191: k=0; while (l) { k++; l >>= 1; }
! 192: /* now k = ceil(log(error in ulps)/log(2)) */
! 193: K += k;
! 194: #ifdef DEBUG
! 195: printf("after mult. by 2^n:\n");
! 196: if (MPFR_EXP(s) > -1024)
! 197: printf("s=%1.20e\n", mpfr_get_d1 (s));
! 198: printf(" ="); mpfr_print_binary(s); putchar('\n');
! 199: printf("err=%d bits\n", K);
! 200: #endif
! 201:
! 202: l = mpfr_can_round(s, q-K, GMP_RNDN, rnd_mode, precy);
! 203: if (l==0) {
! 204: #ifdef DEBUG
! 205: printf("not enough precision, use %d\n", q+BITS_PER_MP_LIMB);
! 206: printf("q=%d q-K=%d precy=%d\n",q,q-K,precy);
! 207: #endif
! 208: q += BITS_PER_MP_LIMB;
! 209: mpfr_set_prec(r, q); mpfr_set_prec(s, q); mpfr_set_prec(t, q);
! 210: }
! 211: } while (l==0);
! 212:
! 213: inexact = mpfr_set (y, s, rnd_mode);
! 214:
! 215: mpfr_clear(r); mpfr_clear(s); mpfr_clear(t);
! 216: return inexact;
! 217: }
! 218:
! 219: /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
! 220: using naive method with O(l) multiplications.
! 221: Return the number of iterations l.
! 222: The absolute error on s is less than 3*l*(l+1)*2^(-q).
! 223: Version using fixed-point arithmetic with mpz instead
! 224: of mpfr for internal computations.
! 225: s must have at least qn+1 limbs (qn should be enough, but currently fails
! 226: since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
! 227: */
! 228: static int
! 229: mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, int q, int *exps)
! 230: {
! 231: int l, dif, qn;
! 232: mpz_t t, rr; mp_exp_t expt, expr;
! 233: TMP_DECL(marker);
! 234:
! 235: TMP_MARK(marker);
! 236: qn = 1 + (q-1)/BITS_PER_MP_LIMB;
! 237: MY_INIT_MPZ(t, 2*qn+1); /* 2*qn+1 is neeeded since mpz_div_2exp may
! 238: need an extra limb */
! 239: MY_INIT_MPZ(rr, qn+1);
! 240: mpz_set_ui(t, 1); expt=0;
! 241: mpz_set_ui(s, 1); mpz_mul_2exp(s, s, q-1); *exps = 1-q; /* s = 2^(q-1) */
! 242: expr = mpfr_get_z_exp(rr, r); /* no error here */
! 243:
! 244: l = 0;
! 245: do {
! 246: l++;
! 247: mpz_mul(t, t, rr); expt=expt+expr;
! 248: dif = *exps + mpz_sizeinbase(s, 2) - expt - mpz_sizeinbase(t, 2);
! 249: /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
! 250: expt += mpz_normalize(t, t, q-dif); /* error at most 2^(1-q) */
! 251: mpz_div_ui(t, t, l); /* error at most 2^(1-q) */
! 252: /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
! 253: #ifdef DEBUG
! 254: if (expt != *exps) {
! 255: fprintf(stderr, "Error: expt != exps %d %d\n", expt, *exps); exit(1);
! 256: }
! 257: #endif
! 258: mpz_add(s, s, t); /* no error here: exact */
! 259: /* ensures rr has the same size as t: after several shifts, the error
! 260: on rr is still at most ulp(t)=ulp(s) */
! 261: expr += mpz_normalize(rr, rr, mpz_sizeinbase(t, 2));
! 262: } while (mpz_cmp_ui(t, 0));
! 263:
! 264: TMP_FREE(marker);
! 265: return l;
! 266: }
! 267:
! 268: /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
! 269: using Brent/Kung method with O(sqrt(l)) multiplications.
! 270: Return l.
! 271: Uses m multiplications of full size and 2l/m of decreasing size,
! 272: i.e. a total equivalent to about m+l/m full multiplications,
! 273: i.e. 2*sqrt(l) for m=sqrt(l).
! 274: Version using mpz. ss must have at least (sizer+1) limbs.
! 275: The error is bounded by (l^2+4*l) ulps where l is the return value.
! 276: */
! 277: static int
! 278: mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, int q, int *exps)
! 279: {
! 280: int expr, l, m, i, sizer, *expR, expt, ql;
! 281: unsigned long int c;
! 282: mpz_t t, *R, rr, tmp;
! 283: TMP_DECL(marker);
! 284:
! 285: /* estimate value of l */
! 286: l = q / (-MPFR_EXP(r));
! 287: m = (int) _mpfr_isqrt (l);
! 288: /* we access R[2], thus we need m >= 2 */
! 289: if (m < 2) m = 2;
! 290: TMP_MARK(marker);
! 291: R = (mpz_t*) TMP_ALLOC((m+1)*sizeof(mpz_t)); /* R[i] stands for r^i */
! 292: expR = (int*) TMP_ALLOC((m+1)*sizeof(int)); /* exponent for R[i] */
! 293: sizer = 1 + (MPFR_PREC(r)-1)/BITS_PER_MP_LIMB;
! 294: mpz_init(tmp);
! 295: MY_INIT_MPZ(rr, sizer+2);
! 296: MY_INIT_MPZ(t, 2*sizer); /* double size for products */
! 297: mpz_set_ui(s, 0); *exps = 1-q; /* initialize s to zero, 1 ulp = 2^(1-q) */
! 298: for (i=0;i<=m;i++) MY_INIT_MPZ(R[i], sizer+2);
! 299: expR[1] = mpfr_get_z_exp(R[1], r); /* exact operation: no error */
! 300: expR[1] = mpz_normalize2(R[1], R[1], expR[1], 1-q); /* error <= 1 ulp */
! 301: mpz_mul(t, R[1], R[1]); /* err(t) <= 2 ulps */
! 302: mpz_div_2exp(R[2], t, q-1); /* err(R[2]) <= 3 ulps */
! 303: expR[2] = 1-q;
! 304: for (i=3;i<=m;i++) {
! 305: mpz_mul(t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
! 306: mpz_div_2exp(R[i], t, q-1); /* err(R[i]) <= 2*i-1 ulps */
! 307: expR[i] = 1-q;
! 308: }
! 309: mpz_set_ui(R[0], 1); mpz_mul_2exp(R[0], R[0], q-1); expR[0]=1-q; /* R[0]=1 */
! 310: mpz_set_ui(rr, 1); expr=0; /* rr contains r^l/l! */
! 311: /* by induction: err(rr) <= 2*l ulps */
! 312:
! 313: l = 0;
! 314: ql = q; /* precision used for current giant step */
! 315: do {
! 316: /* all R[i] must have exponent 1-ql */
! 317: if (l) for (i=0;i<m;i++) {
! 318: expR[i] = mpz_normalize2(R[i], R[i], expR[i], 1-ql);
! 319: }
! 320: /* the absolute error on R[i]*rr is still 2*i-1 ulps */
! 321: expt = mpz_normalize2(t, R[m-1], expR[m-1], 1-ql);
! 322: /* err(t) <= 2*m-1 ulps */
! 323: /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
! 324: using Horner's scheme */
! 325: for (i=m-2;i>=0;i--) {
! 326: mpz_div_ui(t, t, l+i+1); /* err(t) += 1 ulp */
! 327: mpz_add(t, t, R[i]);
! 328: }
! 329: /* now err(t) <= (3m-2) ulps */
! 330:
! 331: /* now multiplies t by r^l/l! and adds to s */
! 332: mpz_mul(t, t, rr); expt += expr;
! 333: expt = mpz_normalize2(t, t, expt, *exps);
! 334: /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
! 335: #ifdef DEBUG
! 336: if (expt != *exps) {
! 337: fprintf(stderr, "Error: expt != exps %d %d\n", expt, *exps); exit(1);
! 338: }
! 339: #endif
! 340: mpz_add(s, s, t); /* no error here */
! 341:
! 342: /* updates rr, the multiplication of the factors l+i could be done
! 343: using binary splitting too, but it is not sure it would save much */
! 344: mpz_mul(t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
! 345: expr += expR[m];
! 346: mpz_set_ui (tmp, 1);
! 347: for (i=1, c=1; i<=m; i++) {
! 348: if (l+i > ~((unsigned long int) 0)/c) {
! 349: mpz_mul_ui(tmp, tmp, c);
! 350: c = l+i;
! 351: }
! 352: else c *= (unsigned long int) l+i;
! 353: }
! 354: if (c != 1) mpz_mul_ui (tmp, tmp, c); /* tmp is exact */
! 355: mpz_fdiv_q(t, t, tmp); /* err(t) <= err(rr) + 2m */
! 356: expr += mpz_normalize(rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
! 357: ql = q - *exps - mpz_sizeinbase(s, 2) + expr + mpz_sizeinbase(rr, 2);
! 358: l+=m;
! 359: } while (expr+mpz_sizeinbase(rr, 2) > -q);
! 360:
! 361: TMP_FREE(marker);
! 362: mpz_clear(tmp);
! 363: return l;
! 364: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>