version 1.1, 2000/09/09 14:12:19 |
version 1.1.1.2, 2003/08/25 16:06:07 |
|
|
/* mpfr_sqrt -- square root of a floating-point number |
/* mpfr_sqrt -- square root of a floating-point number |
|
|
Copyright (C) 1999 PolKA project, Inria Lorraine and Loria |
Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. |
|
|
This file is part of the MPFR Library. |
This file is part of the MPFR Library. |
|
|
The MPFR Library is free software; you can redistribute it and/or modify |
The MPFR Library is free software; you can redistribute it and/or modify |
it under the terms of the GNU Library General Public License as published by |
it under the terms of the GNU Lesser General Public License as published by |
the Free Software Foundation; either version 2 of the License, or (at your |
the Free Software Foundation; either version 2.1 of the License, or (at your |
option) any later version. |
option) any later version. |
|
|
The MPFR Library is distributed in the hope that it will be useful, but |
The MPFR Library is distributed in the hope that it will be useful, but |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
License for more details. |
License for more details. |
|
|
You should have received a copy of the GNU Library General Public License |
You should have received a copy of the GNU Lesser General Public License |
along with the MPFR Library; see the file COPYING.LIB. If not, write to |
along with the MPFR Library; see the file COPYING.LIB. If not, write to |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
MA 02111-1307, USA. */ |
MA 02111-1307, USA. */ |
|
|
#include <math.h> |
|
#include <stdio.h> |
|
#include <stdlib.h> |
|
#include "gmp.h" |
#include "gmp.h" |
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "mpfr.h" |
#include "mpfr.h" |
#include "longlong.h" |
#include "mpfr-impl.h" |
|
|
/* #define DEBUG */ |
/* #define DEBUG */ |
|
|
int |
int |
mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, unsigned char rnd_mode) |
mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mp_rnd_t rnd_mode) |
{ |
{ |
mp_ptr up, rp, tmp, remp; |
mp_ptr up, rp, tmp, remp; |
mp_size_t usize, rrsize; |
mp_size_t usize, rrsize; |
mp_size_t rsize; |
mp_size_t rsize; |
mp_size_t prec, err; |
mp_size_t err; |
mp_limb_t q_limb; |
mp_limb_t q_limb; |
|
int odd_exp_u; |
long rw, nw, k; |
long rw, nw, k; |
int exact = 0; |
int inexact = 0, t; |
unsigned long cc = 0; |
unsigned long cc = 0; |
char can_round = 0; |
int can_round = 0; |
TMP_DECL (marker); TMP_DECL(marker0); |
|
|
|
if (FLAG_NAN(u) || SIGN(u) == -1) { SET_NAN(r); return 0; } |
TMP_DECL(marker); |
|
|
prec = PREC(r); |
|
|
|
if (!NOTZERO(u)) |
if (MPFR_IS_NAN(u)) |
{ |
{ |
EXP(r) = 0; |
MPFR_SET_NAN(r); |
MPN_ZERO(MANT(r), ABSSIZE(r)); |
MPFR_RET_NAN; |
return 1; |
} |
} |
|
|
|
up = MANT(u); |
if (MPFR_SIGN(u) < 0) |
|
{ |
|
if (MPFR_IS_INF(u) || MPFR_NOTZERO(u)) |
|
{ |
|
MPFR_SET_NAN(r); |
|
MPFR_RET_NAN; |
|
} |
|
else |
|
{ /* sqrt(-0) = -0 */ |
|
MPFR_CLEAR_FLAGS(r); |
|
MPFR_SET_ZERO(r); |
|
MPFR_SET_NEG(r); |
|
MPFR_RET(0); |
|
} |
|
} |
|
|
|
MPFR_CLEAR_NAN(r); |
|
MPFR_SET_POS(r); |
|
|
|
if (MPFR_IS_INF(u)) |
|
{ |
|
MPFR_SET_INF(r); |
|
MPFR_RET(0); |
|
} |
|
|
|
MPFR_CLEAR_INF(r); |
|
|
|
if (MPFR_IS_ZERO(u)) |
|
{ |
|
MPFR_SET_ZERO(r); |
|
MPFR_RET(0); /* zero is exact */ |
|
} |
|
|
|
up = MPFR_MANT(u); |
|
usize = (MPFR_PREC(u) - 1)/BITS_PER_MP_LIMB + 1; |
|
|
#ifdef DEBUG |
#ifdef DEBUG |
printf("Entering square root : "); |
printf("Entering square root : "); |
for(k = usize - 1; k >= 0; k--) { printf("%lu ", up[k]); } |
for(k = usize - 1; k >= 0; k--) { printf("%lu ", up[k]); } |
printf(".\n"); |
printf(".\n"); |
#endif |
#endif |
|
|
/* Compare the mantissas */ |
/* Compare the mantissas */ |
|
|
usize = (PREC(u) - 1)/BITS_PER_MP_LIMB + 1; |
|
rsize = ((PREC(r) + 2 + (EXP(u) & 1))/BITS_PER_MP_LIMB + 1) << 1; |
|
rrsize = (PREC(r) + 2 + (EXP(u) & 1))/BITS_PER_MP_LIMB + 1; |
|
/* One extra bit is needed in order to get the square root with enough |
|
precision ; take one extra bit for rrsize in order to solve more |
|
easily the problem of rounding to nearest. |
|
Need to have 2*rrsize = rsize... |
|
Take one extra bit if the exponent of u is odd since we shall have |
|
to shift then. |
|
*/ |
|
|
|
TMP_MARK(marker0); |
odd_exp_u = (unsigned int) MPFR_EXP(u) & 1; |
if (EXP(u) & 1) /* Shift u one bit to the right */ |
MPFR_ASSERTN(MPFR_PREC(r) <= MPFR_INTPREC_MAX - 3); |
{ |
rrsize = (MPFR_PREC(r) + 2 + odd_exp_u) / BITS_PER_MP_LIMB + 1; |
if (PREC(u) & (BITS_PER_MP_LIMB - 1)) |
MPFR_ASSERTN(rrsize <= MP_SIZE_T_MAX/2); |
{ |
rsize = rrsize << 1; |
up = TMP_ALLOC(usize*BYTES_PER_MP_LIMB); |
/* One extra bit is needed in order to get the square root with enough |
mpn_rshift(up, u->_mp_d, usize, 1); |
precision ; take one extra bit for rrsize in order to solve more |
} |
easily the problem of rounding to nearest. |
else |
Need to have 2*rrsize = rsize... |
{ |
Take one extra bit if the exponent of u is odd since we shall have |
up = TMP_ALLOC((usize + 1)*BYTES_PER_MP_LIMB); |
to shift then. |
if (mpn_rshift(up + 1, u->_mp_d, ABSSIZE(u), 1)) |
*/ |
up [0] = ((mp_limb_t) 1) << (BITS_PER_MP_LIMB - 1); |
|
else up[0] = 0; |
|
usize++; |
|
} |
|
} |
|
|
|
EXP(r) = ((EXP(u) + (EXP(u) & 1)) / 2) ; |
TMP_MARK(marker); |
|
if (odd_exp_u) /* Shift u one bit to the right */ |
do |
{ |
{ |
if (MPFR_PREC(u) & (BITS_PER_MP_LIMB - 1)) |
TMP_MARK (marker); |
{ |
|
up = TMP_ALLOC(usize * BYTES_PER_MP_LIMB); |
|
mpn_rshift(up, MPFR_MANT(u), usize, 1); |
|
} |
|
else |
|
{ |
|
up = TMP_ALLOC((usize + 1) * BYTES_PER_MP_LIMB); |
|
if (mpn_rshift(up + 1, MPFR_MANT(u), usize, 1)) |
|
up[0] = GMP_LIMB_HIGHBIT; |
|
else |
|
up[0] = 0; |
|
usize++; |
|
} |
|
} |
|
|
err = rsize*BITS_PER_MP_LIMB; |
MPFR_EXP(r) = MPFR_EXP(u) != MPFR_EMAX_MAX |
if (rsize < usize) { err--; } |
? (MPFR_EXP(u) + odd_exp_u) / 2 |
if (err > rrsize * BITS_PER_MP_LIMB) |
: (MPFR_EMAX_MAX - 1) / 2 + 1; |
{ err = rrsize * BITS_PER_MP_LIMB; } |
|
|
do |
|
{ |
|
|
|
err = rsize * BITS_PER_MP_LIMB; |
|
if (rsize < usize) |
|
err--; |
|
if (err > rrsize * BITS_PER_MP_LIMB) |
|
err = rrsize * BITS_PER_MP_LIMB; |
|
|
tmp = (mp_ptr) TMP_ALLOC (rsize * BYTES_PER_MP_LIMB); |
tmp = (mp_ptr) TMP_ALLOC (rsize * BYTES_PER_MP_LIMB); |
rp = (mp_ptr) TMP_ALLOC (rrsize * BYTES_PER_MP_LIMB); |
rp = (mp_ptr) TMP_ALLOC (rrsize * BYTES_PER_MP_LIMB); |
remp = (mp_ptr) TMP_ALLOC (rsize * BYTES_PER_MP_LIMB); |
remp = (mp_ptr) TMP_ALLOC (rsize * BYTES_PER_MP_LIMB); |
|
|
if (usize >= rsize) { |
if (usize >= rsize) |
MPN_COPY (tmp, up + usize - rsize, rsize); |
{ |
} |
MPN_COPY (tmp, up + usize - rsize, rsize); |
else { |
} |
MPN_COPY (tmp + rsize - usize, up, usize); |
else |
MPN_ZERO (tmp, rsize - usize); |
{ |
} |
MPN_COPY (tmp + rsize - usize, up, usize); |
|
MPN_ZERO (tmp, rsize - usize); |
|
} |
|
|
/* Do the real job */ |
/* Do the real job */ |
|
|
#ifdef DEBUG |
#ifdef DEBUG |
printf("Taking the sqrt of : "); |
printf("Taking the sqrt of : "); |
for(k = rsize - 1; k >= 0; k--) { |
for(k = rsize - 1; k >= 0; k--) |
printf("+%lu*2^%lu",tmp[k],k*mp_bits_per_limb); } |
printf("+%lu*2^%lu",tmp[k],k*BITS_PER_MP_LIMB); |
printf(".\n"); |
printf(".\n"); |
#endif |
#endif |
|
|
q_limb = kara_sqrtrem (rp, remp, tmp, rsize); |
q_limb = mpn_sqrtrem (rp, remp, tmp, rsize); |
|
|
#ifdef DEBUG |
#ifdef DEBUG |
printf("The result is : \n"); |
printf ("The result is : \n"); |
printf("sqrt : "); |
printf ("sqrt : "); |
for(k = rrsize - 1; k >= 0; k--) { printf("%lu ", rp[k]); } |
for (k = rrsize - 1; k >= 0; k--) |
printf("(q_limb = %lu)\n", q_limb); |
printf ("%lu ", rp[k]); |
|
printf ("(inexact = %lu)\n", q_limb); |
#endif |
#endif |
|
|
can_round = (mpfr_can_round_raw(rp, rrsize, 1, err, |
|
GMP_RNDZ, rnd_mode, PREC(r))); |
|
|
|
/* If we used all the limbs of both the dividend and the divisor, |
can_round = mpfr_can_round_raw(rp, rrsize, 1, err, |
then we have the correct RNDZ rounding */ |
GMP_RNDZ, rnd_mode, MPFR_PREC(r)); |
|
|
if (!can_round && (rsize < 2*usize)) |
/* If we used all the limbs of both the dividend and the divisor, |
{ |
then we have the correct RNDZ rounding */ |
|
|
|
if (!can_round && (rsize < 2*usize)) |
|
{ |
#ifdef DEBUG |
#ifdef DEBUG |
printf("Increasing the precision.\n"); |
printf("Increasing the precision.\n"); |
#endif |
#endif |
TMP_FREE(marker); |
} |
} |
} |
} |
while (!can_round && (rsize < 2*usize) && (rsize += 2) && (rrsize++)); |
while (!can_round && (rsize < 2*usize) |
#ifdef DEBUG |
&& (rsize += 2) && (rrsize ++)); |
printf ("can_round = %d\n", can_round); |
|
#endif |
|
|
|
/* This part may be deplaced upper to avoid a few mpfr_can_round_raw */ |
|
/* when the square root is exact. It is however very unprobable that */ |
|
/* it would improve the behaviour of the present code on average. */ |
|
|
/* This part may be deplaced upper to avoid a few mpfr_can_round_raw */ |
if (!q_limb) /* the sqrtrem call was exact, possible exact square root */ |
/* when the square root is exact. It is however very unprobable that */ |
{ |
/* it would improve the behaviour of the present code on average. */ |
/* if we have taken into account the whole of up */ |
|
for (k = usize - rsize - 1; k >= 0; k++) |
|
if (up[k] != 0) |
|
break; |
|
|
if (!q_limb) /* possibly exact */ |
if (k < 0) |
{ |
goto fin; /* exact square root ==> inexact = 0 */ |
/* if we have taken into account the whole of up */ |
} |
for (k = usize - rsize - 1; k >= 0; k ++) |
|
if (up[k]) break; |
|
|
|
if (k < 0) { exact = 1; goto fin; } |
|
} |
|
|
|
if (can_round) |
if (can_round) |
{ |
|
cc = mpfr_round_raw(rp, rp, err, 0, PREC(r), rnd_mode); |
|
rrsize = (PREC(r) - 1)/BITS_PER_MP_LIMB + 1; |
|
} |
|
else |
|
/* Use the return value of sqrtrem to decide of the rounding */ |
|
/* Note that at this point the sqrt has been computed */ |
|
/* EXACTLY. If rounding = GMP_RNDZ, do nothing [comes from */ |
|
/* the fact that the exact square root can end with a bunch of ones, */ |
|
/* and in that case we indeed cannot round if we do not know that */ |
|
/* the computation was exact. */ |
|
switch (rnd_mode) |
|
{ |
{ |
case GMP_RNDZ : |
cc = mpfr_round_raw (rp, rp, err, 0, MPFR_PREC(r), rnd_mode, &inexact); |
case GMP_RNDD : break; |
if (inexact == 0) /* exact high part: inex flag depends on remainder */ |
|
inexact = -q_limb; |
|
rrsize = (MPFR_PREC(r) - 1)/BITS_PER_MP_LIMB + 1; |
|
} |
|
else |
|
{ |
|
/* Use the return value of sqrtrem to decide of the rounding */ |
|
/* Note that at this point the sqrt has been computed */ |
|
/* EXACTLY. */ |
|
switch (rnd_mode) |
|
{ |
|
case GMP_RNDZ : |
|
case GMP_RNDD : |
|
inexact = -1; /* result is truncated */ |
|
break; |
|
|
case GMP_RNDN : |
case GMP_RNDN : |
/* Not in the situation ...0 111111 */ |
/* Not in the situation ...0 111111 */ |
rw = (PREC(r) + 1) & (BITS_PER_MP_LIMB - 1); |
rw = (MPFR_PREC(r) + 1) & (BITS_PER_MP_LIMB - 1); |
if (rw) { rw = BITS_PER_MP_LIMB - rw; nw = 0; } else nw = 1; |
if (rw != 0) |
if ((rp[nw] >> rw) & 1 && /* Not 0111111111 */ |
{ |
(q_limb || /* Nonzero remainder */ |
rw = BITS_PER_MP_LIMB - rw; |
(rw ? (rp[nw] >> (rw + 1)) & 1 : |
nw = 0; |
(rp[nw] >> (BITS_PER_MP_LIMB - 1)) & 1))) /* or even rounding */ |
} |
cc = mpn_add_1(rp + nw, rp + nw, rrsize, ((mp_limb_t)1) << rw); |
else |
break; |
nw = 1; |
|
if ((rp[nw] >> rw) & 1 && /* Not 0111111111 */ |
|
(q_limb || /* Nonzero remainder */ |
|
(rw ? (rp[nw] >> (rw + 1)) & 1 : |
|
(rp[nw] >> (BITS_PER_MP_LIMB - 1)) & 1))) /* or even r. */ |
|
{ |
|
cc = mpn_add_1 (rp + nw, rp + nw, rrsize, MP_LIMB_T_ONE << rw); |
|
inexact = 1; |
|
} |
|
else |
|
inexact = -1; |
|
break; |
|
|
case GMP_RNDU : |
case GMP_RNDU: |
if (q_limb) |
/* we should arrive here only when the result is inexact, i.e. |
cc = mpn_add_1(rp, rp, rrsize, 1 << (BITS_PER_MP_LIMB - |
either q_limb > 0 (the remainder from mpn_sqrtrem is non-zero) |
(PREC(r) & |
or up[0..usize-rsize-1] is non zero, thus we have to add one |
(BITS_PER_MP_LIMB - 1)))); |
ulp, and inexact = 1 */ |
|
inexact = 1; |
|
t = MPFR_PREC(r) & (BITS_PER_MP_LIMB - 1); |
|
rsize = (MPFR_PREC(r) - 1)/BITS_PER_MP_LIMB + 1; |
|
cc = mpn_add_1 (rp + rrsize - rsize, rp + rrsize - rsize, rsize, |
|
t != 0 ? |
|
MP_LIMB_T_ONE << (BITS_PER_MP_LIMB - t) : |
|
MP_LIMB_T_ONE); |
|
} |
} |
} |
|
|
if (cc) { |
if (cc) |
mpn_rshift(rp, rp, rrsize, 1); |
{ |
rp[rrsize-1] |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1); |
/* Is a shift necessary here? Isn't the result 1.0000...? */ |
r->_mp_exp++; |
mpn_rshift (rp, rp, rrsize, 1); |
} |
rp[rrsize-1] |= GMP_LIMB_HIGHBIT; |
|
MPFR_EXP(r)++; |
|
} |
|
|
fin: |
fin: |
rsize = rrsize; |
rsize = rrsize; |
rrsize = (PREC(r) - 1)/BITS_PER_MP_LIMB + 1; |
rrsize = (MPFR_PREC(r) - 1)/BITS_PER_MP_LIMB + 1; |
MPN_COPY(r->_mp_d, rp + rsize - rrsize, rrsize); |
MPN_COPY(MPFR_MANT(r), rp + rsize - rrsize, rrsize); |
|
|
if (PREC(r) & (BITS_PER_MP_LIMB - 1)) |
if (MPFR_PREC(r) & (BITS_PER_MP_LIMB - 1)) |
MANT(r) [0] &= ~(((mp_limb_t)1 << (BITS_PER_MP_LIMB - |
MPFR_MANT(r)[0] &= ~((MP_LIMB_T_ONE << |
(PREC(r) & (BITS_PER_MP_LIMB - 1)))) - 1) ; |
(BITS_PER_MP_LIMB - |
|
(MPFR_PREC(r) & (BITS_PER_MP_LIMB - 1)))) - 1); |
TMP_FREE(marker0); TMP_FREE (marker); |
|
return exact; |
TMP_FREE(marker); |
|
return inexact; |
} |
} |