OpenXM_contrib/gmp/mpfr/exp_2.c - annotate

Return to exp_2.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpfr
Annotation of OpenXM_contrib/gmp/mpfr/exp_2.c, Revision 1.1

1.1     ! ohara       1: /* mpfr_exp_2 -- exponential of a floating-point number
        !             2:                 using Brent's algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
        !             3:
        !             4: Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
        !             5:
        !             6: This file is part of the MPFR Library.
        !             7:
        !             8: The MPFR Library is free software; you can redistribute it and/or modify
        !             9: it under the terms of the GNU Lesser General Public License as published by
        !            10: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            11: option) any later version.
        !            12:
        !            13: The MPFR Library is distributed in the hope that it will be useful, but
        !            14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: License for more details.
        !            17:
        !            18: You should have received a copy of the GNU Lesser General Public License
        !            19: along with the MPFR Library; see the file COPYING.LIB.  If not, write to
        !            20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: MA 02111-1307, USA. */
        !            22:
        !            23: #include <stdio.h>
        !            24: #include "gmp.h"
        !            25: #include "gmp-impl.h"
        !            26: #include "mpfr.h"
        !            27: #include "mpfr-impl.h"
        !            28:
        !            29: static int mpfr_exp2_aux      _PROTO ((mpz_t, mpfr_srcptr, int, int*));
        !            30: static int mpfr_exp2_aux2     _PROTO ((mpz_t, mpfr_srcptr, int, int*));
        !            31: static mp_exp_t mpz_normalize _PROTO ((mpz_t, mpz_t, int));
        !            32: static int mpz_normalize2     _PROTO ((mpz_t, mpz_t, int, int));
        !            33:
        !            34: /* returns floor(sqrt(n)) */
        !            35: unsigned long
        !            36: _mpfr_isqrt (unsigned long n)
        !            37: {
        !            38:   unsigned long s;
        !            39:
        !            40:   s = 1;
        !            41:   do {
        !            42:     s = (s + n / s) / 2;
        !            43:   } while (!(s*s <= n && n <= s*(s+2)));
        !            44:   return s;
        !            45: }
        !            46:
        !            47: /* returns floor(n^(1/3)) */
        !            48: unsigned long
        !            49: _mpfr_cuberoot (unsigned long n)
        !            50: {
        !            51:   double s, is;
        !            52:
        !            53:   s = 1.0;
        !            54:   do {
        !            55:     s = (2*s*s*s + (double) n) / (3*s*s);
        !            56:     is = (double) ((int) s);
        !            57:   } while (!(is*is*is <= (double) n && (double) n < (is+1)*(is+1)*(is+1)));
        !            58:   return (unsigned long) is;
        !            59: }
        !            60:
        !            61: #define SWITCH 100 /* number of bits to switch from O(n^(1/2)*M(n)) method
        !            62:                      to O(n^(1/3)*M(n)) method */
        !            63:
        !            64: #define MY_INIT_MPZ(x, s) { \
        !            65:    (x)->_mp_alloc = (s); \
        !            66:    PTR(x) = (mp_ptr) TMP_ALLOC((s)*BYTES_PER_MP_LIMB); \
        !            67:    (x)->_mp_size = 0; }
        !            68:
        !            69: /* #define DEBUG */
        !            70:
        !            71: /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
        !            72:    Otherwise do nothing and return 0.
        !            73:  */
        !            74: static mp_exp_t
        !            75: mpz_normalize (mpz_t rop, mpz_t z, int q)
        !            76: {
        !            77:   int k;
        !            78:
        !            79:   k = mpz_sizeinbase(z, 2);
        !            80:   if (k > q) {
        !            81:     mpz_div_2exp(rop, z, k-q);
        !            82:     return k-q;
        !            83:   }
        !            84:   else {
        !            85:     if (rop != z) mpz_set(rop, z);
        !            86:     return 0;
        !            87:   }
        !            88: }
        !            89:
        !            90: /* if expz > target, shift z by (expz-target) bits to the left.
        !            91:    if expz < target, shift z by (target-expz) bits to the right.
        !            92:    Returns target.
        !            93: */
        !            94: static int
        !            95: mpz_normalize2 (mpz_t rop, mpz_t z, int expz, int target)
        !            96: {
        !            97:   if (target > expz) mpz_div_2exp(rop, z, target-expz);
        !            98:   else mpz_mul_2exp(rop, z, expz-target);
        !            99:   return target;
        !           100: }
        !           101:
        !           102: /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
        !           103:    where x = n*log(2)+(2^K)*r
        !           104:    together with Brent-Kung O(t^(1/2)) algorithm for the evaluation of
        !           105:    power series. The resulting complexity is O(n^(1/3)*M(n)).
        !           106: */
        !           107: int
        !           108: mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
        !           109: {
        !           110:   int n, K, precy, q, k, l, err, exps, inexact;
        !           111:   mpfr_t r, s, t; mpz_t ss;
        !           112:   TMP_DECL(marker);
        !           113:
        !           114:   precy = MPFR_PREC(y);
        !           115:
        !           116:   n = (int) (mpfr_get_d1 (x) / LOG2);
        !           117:
        !           118:   /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
        !           119:      n/K terms costs about n/(2K) multiplications when computed in fixed
        !           120:      point */
        !           121:   K = (precy<SWITCH) ? _mpfr_isqrt((precy + 1) / 2) : _mpfr_cuberoot (4*precy);
        !           122:   l = (precy-1)/K + 1;
        !           123:   err = K + (int) _mpfr_ceil_log2 (2.0 * (double) l + 18.0);
        !           124:   /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
        !           125:   q = precy + err + K + 3;
        !           126:   mpfr_init2 (r, q);
        !           127:   mpfr_init2 (s, q);
        !           128:   mpfr_init2 (t, q);
        !           129:   /* the algorithm consists in computing an upper bound of exp(x) using
        !           130:      a precision of q bits, and see if we can round to MPFR_PREC(y) taking
        !           131:      into account the maximal error. Otherwise we increase q. */
        !           132:   do {
        !           133: #ifdef DEBUG
        !           134:   printf("n=%d K=%d l=%d q=%d\n",n,K,l,q);
        !           135: #endif
        !           136:
        !           137:   /* if n<0, we have to get an upper bound of log(2)
        !           138:      in order to get an upper bound of r = x-n*log(2) */
        !           139:   mpfr_const_log2 (s, (n>=0) ? GMP_RNDZ : GMP_RNDU);
        !           140: #ifdef DEBUG
        !           141:   printf("n=%d log(2)=",n); mpfr_print_binary(s); putchar('\n');
        !           142: #endif
        !           143:   mpfr_mul_ui (r, s, (n<0) ? -n : n, (n>=0) ? GMP_RNDZ : GMP_RNDU);
        !           144:   if (n<0) mpfr_neg(r, r, GMP_RNDD);
        !           145:   /* r = floor(n*log(2)) */
        !           146:
        !           147: #ifdef DEBUG
        !           148:   printf("x=%1.20e\n", mpfr_get_d1 (x));
        !           149:   printf(" ="); mpfr_print_binary(x); putchar('\n');
        !           150:   printf("r=%1.20e\n", mpfr_get_d1 (r));
        !           151:   printf(" ="); mpfr_print_binary(r); putchar('\n');
        !           152: #endif
        !           153:   mpfr_sub(r, x, r, GMP_RNDU);
        !           154:   if (MPFR_SIGN(r)<0) { /* initial approximation n was too large */
        !           155:     n--;
        !           156:     mpfr_mul_ui(r, s, (n<0) ? -n : n, GMP_RNDZ);
        !           157:     if (n<0) mpfr_neg(r, r, GMP_RNDD);
        !           158:     mpfr_sub(r, x, r, GMP_RNDU);
        !           159:   }
        !           160: #ifdef DEBUG
        !           161:   printf("x-r=%1.20e\n", mpfr_get_d1 (r));
        !           162:   printf(" ="); mpfr_print_binary(r); putchar('\n');
        !           163:   if (MPFR_SIGN(r)<0) { fprintf(stderr,"Error in mpfr_exp: r<0\n"); exit(1); }
        !           164: #endif
        !           165:   mpfr_div_2ui(r, r, K, GMP_RNDU); /* r = (x-n*log(2))/2^K */
        !           166:
        !           167:   TMP_MARK(marker);
        !           168:   MY_INIT_MPZ(ss, 3 + 2*((q-1)/BITS_PER_MP_LIMB));
        !           169:   exps = mpfr_get_z_exp(ss, s);
        !           170:   /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
        !           171:   l = (precy<SWITCH) ? mpfr_exp2_aux(ss, r, q, &exps) /* naive method */
        !           172:     : mpfr_exp2_aux2(ss, r, q, &exps); /* Brent/Kung method */
        !           173:
        !           174: #ifdef DEBUG
        !           175:   printf("l=%d q=%d (K+l)*q^2=%1.3e\n", l, q, (K+l)*(double)q*q);
        !           176: #endif
        !           177:
        !           178:   for (k=0;k<K;k++) {
        !           179:     mpz_mul(ss, ss, ss); exps <<= 1;
        !           180:     exps += mpz_normalize(ss, ss, q);
        !           181:   }
        !           182:   mpfr_set_z(s, ss, GMP_RNDN); MPFR_EXP(s) += exps;
        !           183:   TMP_FREE(marker); /* don't need ss anymore */
        !           184:
        !           185:   if (n>0) mpfr_mul_2ui(s, s, n, GMP_RNDU);
        !           186:   else mpfr_div_2ui(s, s, -n, GMP_RNDU);
        !           187:
        !           188:   /* error is at most 2^K*(3l*(l+1)) ulp for mpfr_exp2_aux */
        !           189:   if (precy<SWITCH) l = 3*l*(l+1);
        !           190:   else l = l*(l+4);
        !           191:   k=0; while (l) { k++; l >>= 1; }
        !           192:   /* now k = ceil(log(error in ulps)/log(2)) */
        !           193:   K += k;
        !           194: #ifdef DEBUG
        !           195:     printf("after mult. by 2^n:\n");
        !           196:     if (MPFR_EXP(s) > -1024)
        !           197:       printf("s=%1.20e\n", mpfr_get_d1 (s));
        !           198:     printf(" ="); mpfr_print_binary(s); putchar('\n');
        !           199:     printf("err=%d bits\n", K);
        !           200: #endif
        !           201:
        !           202:   l = mpfr_can_round(s, q-K, GMP_RNDN, rnd_mode, precy);
        !           203:   if (l==0) {
        !           204: #ifdef DEBUG
        !           205:      printf("not enough precision, use %d\n", q+BITS_PER_MP_LIMB);
        !           206:      printf("q=%d q-K=%d precy=%d\n",q,q-K,precy);
        !           207: #endif
        !           208:      q += BITS_PER_MP_LIMB;
        !           209:      mpfr_set_prec(r, q); mpfr_set_prec(s, q); mpfr_set_prec(t, q);
        !           210:   }
        !           211:   } while (l==0);
        !           212:
        !           213:   inexact = mpfr_set (y, s, rnd_mode);
        !           214:
        !           215:   mpfr_clear(r); mpfr_clear(s); mpfr_clear(t);
        !           216:   return inexact;
        !           217: }
        !           218:
        !           219: /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
        !           220:    using naive method with O(l) multiplications.
        !           221:    Return the number of iterations l.
        !           222:    The absolute error on s is less than 3*l*(l+1)*2^(-q).
        !           223:    Version using fixed-point arithmetic with mpz instead
        !           224:    of mpfr for internal computations.
        !           225:    s must have at least qn+1 limbs (qn should be enough, but currently fails
        !           226:    since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
        !           227: */
        !           228: static int
        !           229: mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, int q, int *exps)
        !           230: {
        !           231:   int l, dif, qn;
        !           232:   mpz_t t, rr; mp_exp_t expt, expr;
        !           233:   TMP_DECL(marker);
        !           234:
        !           235:   TMP_MARK(marker);
        !           236:   qn = 1 + (q-1)/BITS_PER_MP_LIMB;
        !           237:   MY_INIT_MPZ(t, 2*qn+1); /* 2*qn+1 is neeeded since mpz_div_2exp may
        !           238:                              need an extra limb */
        !           239:   MY_INIT_MPZ(rr, qn+1);
        !           240:   mpz_set_ui(t, 1); expt=0;
        !           241:   mpz_set_ui(s, 1); mpz_mul_2exp(s, s, q-1); *exps = 1-q; /* s = 2^(q-1) */
        !           242:   expr = mpfr_get_z_exp(rr, r); /* no error here */
        !           243:
        !           244:   l = 0;
        !           245:   do {
        !           246:     l++;
        !           247:     mpz_mul(t, t, rr); expt=expt+expr;
        !           248:     dif = *exps + mpz_sizeinbase(s, 2) - expt - mpz_sizeinbase(t, 2);
        !           249:     /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
        !           250:     expt += mpz_normalize(t, t, q-dif); /* error at most 2^(1-q) */
        !           251:     mpz_div_ui(t, t, l); /* error at most 2^(1-q) */
        !           252:     /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
        !           253: #ifdef DEBUG
        !           254:     if (expt != *exps) {
        !           255:       fprintf(stderr, "Error: expt != exps %d %d\n", expt, *exps); exit(1);
        !           256:     }
        !           257: #endif
        !           258:     mpz_add(s, s, t); /* no error here: exact */
        !           259:     /* ensures rr has the same size as t: after several shifts, the error
        !           260:        on rr is still at most ulp(t)=ulp(s) */
        !           261:     expr += mpz_normalize(rr, rr, mpz_sizeinbase(t, 2));
        !           262:   } while (mpz_cmp_ui(t, 0));
        !           263:
        !           264:   TMP_FREE(marker);
        !           265:   return l;
        !           266: }
        !           267:
        !           268: /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
        !           269:    using Brent/Kung method with O(sqrt(l)) multiplications.
        !           270:    Return l.
        !           271:    Uses m multiplications of full size and 2l/m of decreasing size,
        !           272:    i.e. a total equivalent to about m+l/m full multiplications,
        !           273:    i.e. 2*sqrt(l) for m=sqrt(l).
        !           274:    Version using mpz. ss must have at least (sizer+1) limbs.
        !           275:    The error is bounded by (l^2+4*l) ulps where l is the return value.
        !           276: */
        !           277: static int
        !           278: mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, int q, int *exps)
        !           279: {
        !           280:   int expr, l, m, i, sizer, *expR, expt, ql;
        !           281:   unsigned long int c;
        !           282:   mpz_t t, *R, rr, tmp;
        !           283:   TMP_DECL(marker);
        !           284:
        !           285:   /* estimate value of l */
        !           286:   l = q / (-MPFR_EXP(r));
        !           287:   m = (int) _mpfr_isqrt (l);
        !           288:   /* we access R[2], thus we need m >= 2 */
        !           289:   if (m < 2) m = 2;
        !           290:   TMP_MARK(marker);
        !           291:   R = (mpz_t*) TMP_ALLOC((m+1)*sizeof(mpz_t)); /* R[i] stands for r^i */
        !           292:   expR = (int*) TMP_ALLOC((m+1)*sizeof(int)); /* exponent for R[i] */
        !           293:   sizer = 1 + (MPFR_PREC(r)-1)/BITS_PER_MP_LIMB;
        !           294:   mpz_init(tmp);
        !           295:   MY_INIT_MPZ(rr, sizer+2);
        !           296:   MY_INIT_MPZ(t, 2*sizer); /* double size for products */
        !           297:   mpz_set_ui(s, 0); *exps = 1-q; /* initialize s to zero, 1 ulp = 2^(1-q) */
        !           298:   for (i=0;i<=m;i++) MY_INIT_MPZ(R[i], sizer+2);
        !           299:   expR[1] = mpfr_get_z_exp(R[1], r); /* exact operation: no error */
        !           300:   expR[1] = mpz_normalize2(R[1], R[1], expR[1], 1-q); /* error <= 1 ulp */
        !           301:   mpz_mul(t, R[1], R[1]); /* err(t) <= 2 ulps */
        !           302:   mpz_div_2exp(R[2], t, q-1); /* err(R[2]) <= 3 ulps */
        !           303:   expR[2] = 1-q;
        !           304:   for (i=3;i<=m;i++) {
        !           305:     mpz_mul(t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
        !           306:     mpz_div_2exp(R[i], t, q-1); /* err(R[i]) <= 2*i-1 ulps */
        !           307:     expR[i] = 1-q;
        !           308:   }
        !           309:   mpz_set_ui(R[0], 1); mpz_mul_2exp(R[0], R[0], q-1); expR[0]=1-q; /* R[0]=1 */
        !           310:   mpz_set_ui(rr, 1); expr=0; /* rr contains r^l/l! */
        !           311:   /* by induction: err(rr) <= 2*l ulps */
        !           312:
        !           313:   l = 0;
        !           314:   ql = q; /* precision used for current giant step */
        !           315:   do {
        !           316:     /* all R[i] must have exponent 1-ql */
        !           317:     if (l) for (i=0;i<m;i++) {
        !           318:       expR[i] = mpz_normalize2(R[i], R[i], expR[i], 1-ql);
        !           319:     }
        !           320:     /* the absolute error on R[i]*rr is still 2*i-1 ulps */
        !           321:     expt = mpz_normalize2(t, R[m-1], expR[m-1], 1-ql);
        !           322:     /* err(t) <= 2*m-1 ulps */
        !           323:     /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
        !           324:        using Horner's scheme */
        !           325:     for (i=m-2;i>=0;i--) {
        !           326:       mpz_div_ui(t, t, l+i+1); /* err(t) += 1 ulp */
        !           327:       mpz_add(t, t, R[i]);
        !           328:     }
        !           329:     /* now err(t) <= (3m-2) ulps */
        !           330:
        !           331:     /* now multiplies t by r^l/l! and adds to s */
        !           332:     mpz_mul(t, t, rr); expt += expr;
        !           333:     expt = mpz_normalize2(t, t, expt, *exps);
        !           334:     /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
        !           335: #ifdef DEBUG
        !           336:     if (expt != *exps) {
        !           337:       fprintf(stderr, "Error: expt != exps %d %d\n", expt, *exps); exit(1);
        !           338:     }
        !           339: #endif
        !           340:     mpz_add(s, s, t); /* no error here */
        !           341:
        !           342:     /* updates rr, the multiplication of the factors l+i could be done
        !           343:        using binary splitting too, but it is not sure it would save much */
        !           344:     mpz_mul(t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
        !           345:     expr += expR[m];
        !           346:     mpz_set_ui (tmp, 1);
        !           347:     for (i=1, c=1; i<=m; i++) {
        !           348:       if (l+i > ~((unsigned long int) 0)/c) {
        !           349:        mpz_mul_ui(tmp, tmp, c);
        !           350:        c = l+i;
        !           351:       }
        !           352:       else c *= (unsigned long int) l+i;
        !           353:     }
        !           354:     if (c != 1) mpz_mul_ui (tmp, tmp, c); /* tmp is exact */
        !           355:     mpz_fdiv_q(t, t, tmp); /* err(t) <= err(rr) + 2m */
        !           356:     expr += mpz_normalize(rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
        !           357:     ql = q - *exps - mpz_sizeinbase(s, 2) + expr + mpz_sizeinbase(rr, 2);
        !           358:     l+=m;
        !           359:   } while (expr+mpz_sizeinbase(rr, 2) > -q);
        !           360:
        !           361:   TMP_FREE(marker);
        !           362:   mpz_clear(tmp);
        !           363:   return l;
        !           364: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>