version 1.1.1.1, 2000/01/10 15:35:24 |
version 1.1.1.2, 2000/09/09 14:12:27 |
|
|
the function is 0 if OP is a perfect square, and *any* non-zero number |
the function is 0 if OP is a perfect square, and *any* non-zero number |
otherwise. |
otherwise. |
|
|
Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc. |
Copyright (C) 1993, 1994, 1996, 1997, 1998, 1999, 2000 Free Software |
|
Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
The GNU MP Library is free software; you can redistribute it and/or modify |
The GNU MP Library is free software; you can redistribute it and/or modify |
it under the terms of the GNU Library General Public License as published by |
it under the terms of the GNU Lesser General Public License as published by |
the Free Software Foundation; either version 2 of the License, or (at your |
the Free Software Foundation; either version 2.1 of the License, or (at your |
option) any later version. |
option) any later version. |
|
|
The GNU MP Library is distributed in the hope that it will be useful, but |
The GNU MP Library is distributed in the hope that it will be useful, but |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
License for more details. |
License for more details. |
|
|
You should have received a copy of the GNU Library General Public License |
You should have received a copy of the GNU Lesser General Public License |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
MA 02111-1307, USA. */ |
MA 02111-1307, USA. */ |
Line 35 MA 02111-1307, USA. */ |
|
Line 36 MA 02111-1307, USA. */ |
|
doesn't help to use CHAR_BIT from limits.h, as the real problem is |
doesn't help to use CHAR_BIT from limits.h, as the real problem is |
the static arrays. */ |
the static arrays. */ |
|
|
|
#include <stdio.h> /* for NULL */ |
#include "gmp.h" |
#include "gmp.h" |
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "longlong.h" |
#include "longlong.h" |
Line 59 MA 02111-1307, USA. */ |
|
Line 61 MA 02111-1307, USA. */ |
|
/* Define this macro for IEEE P854 machines with a fast sqrt instruction. */ |
/* Define this macro for IEEE P854 machines with a fast sqrt instruction. */ |
#if defined __GNUC__ && ! defined __SOFT_FLOAT |
#if defined __GNUC__ && ! defined __SOFT_FLOAT |
|
|
#if defined __sparc__ |
#if defined (__sparc__) && BITS_PER_MP_LIMB == 32 |
#define SQRT(a) \ |
#define SQRT(a) \ |
({ \ |
({ \ |
double __sqrt_res; \ |
double __sqrt_res; \ |
Line 68 MA 02111-1307, USA. */ |
|
Line 70 MA 02111-1307, USA. */ |
|
}) |
}) |
#endif |
#endif |
|
|
#if defined __HAVE_68881__ |
#if defined (__HAVE_68881__) |
#define SQRT(a) \ |
#define SQRT(a) \ |
({ \ |
({ \ |
double __sqrt_res; \ |
double __sqrt_res; \ |
Line 77 MA 02111-1307, USA. */ |
|
Line 79 MA 02111-1307, USA. */ |
|
}) |
}) |
#endif |
#endif |
|
|
#if defined __hppa |
#if defined (__hppa) && BITS_PER_MP_LIMB == 32 |
#define SQRT(a) \ |
#define SQRT(a) \ |
({ \ |
({ \ |
double __sqrt_res; \ |
double __sqrt_res; \ |
Line 86 MA 02111-1307, USA. */ |
|
Line 88 MA 02111-1307, USA. */ |
|
}) |
}) |
#endif |
#endif |
|
|
#if defined _ARCH_PWR2 |
#if defined (_ARCH_PWR2) && BITS_PER_MP_LIMB == 32 |
#define SQRT(a) \ |
#define SQRT(a) \ |
({ \ |
({ \ |
double __sqrt_res; \ |
double __sqrt_res; \ |
Line 95 MA 02111-1307, USA. */ |
|
Line 97 MA 02111-1307, USA. */ |
|
}) |
}) |
#endif |
#endif |
|
|
|
#if 0 |
|
#if defined (__i386__) || defined (__i486__) |
|
#define SQRT(a) \ |
|
({ \ |
|
double __sqrt_res; \ |
|
asm ("fsqrt" : "=t" (__sqrt_res) : "0" (a)); \ |
|
__sqrt_res; \ |
|
}) |
#endif |
#endif |
|
#endif |
|
|
|
#endif |
|
|
#ifndef SQRT |
#ifndef SQRT |
|
|
/* Tables for initial approximation of the square root. These are |
/* Tables for initial approximation of the square root. These are |
Line 112 MA 02111-1307, USA. */ |
|
Line 125 MA 02111-1307, USA. */ |
|
square root of numbers with the same initial digits and an even |
square root of numbers with the same initial digits and an even |
difference in the total number of digits. Consider the square root |
difference in the total number of digits. Consider the square root |
of 1, 10, 100, 1000, ...) */ |
of 1, 10, 100, 1000, ...) */ |
static unsigned char even_approx_tab[256] = |
static const unsigned char even_approx_tab[256] = |
{ |
{ |
0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e, |
0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e, |
0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74, |
0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74, |
Line 150 static unsigned char even_approx_tab[256] = |
|
Line 163 static unsigned char even_approx_tab[256] = |
|
|
|
/* Table to be used for operands with an odd total number of bits. |
/* Table to be used for operands with an odd total number of bits. |
(Further comments before previous table.) */ |
(Further comments before previous table.) */ |
static unsigned char odd_approx_tab[256] = |
static const unsigned char odd_approx_tab[256] = |
{ |
{ |
0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, |
0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, |
0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, |
0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, |
Line 272 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 285 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
/* Is there a fast sqrt instruction defined for this machine? */ |
/* Is there a fast sqrt instruction defined for this machine? */ |
#ifdef SQRT |
#ifdef SQRT |
{ |
{ |
initial_approx = SQRT (t_high0 * 2.0 |
initial_approx = SQRT (t_high0 * MP_BASE_AS_DOUBLE + t_high1); |
* ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)) |
|
+ t_high1); |
|
/* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have |
/* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have |
become incorrect due to overflow in the conversion from double to |
become incorrect due to overflow in the conversion from double to |
mp_limb_t above. It will typically be zero in that case, but might be |
mp_limb_t above. It will typically be zero in that case, but might be |
Line 293 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 304 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
|
|
if ((cnt & 1) == 0) |
if ((cnt & 1) == 0) |
{ |
{ |
/* The most sign bit of t_high0 is set. */ |
/* The most significant bit of t_high0 is set. */ |
initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1); |
initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1); |
initial_approx &= 0xff; |
initial_approx &= 0xff; |
initial_approx = even_approx_tab[initial_approx]; |
initial_approx = even_approx_tab[initial_approx]; |
} |
} |
else |
else |
{ |
{ |
/* The most significant bit of T_HIGH0 is unset, |
/* The most significant bit of t_high0 is unset, |
the second most significant is set. */ |
the second most significant is set. */ |
initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2); |
initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2); |
initial_approx &= 0xff; |
initial_approx &= 0xff; |
Line 310 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 321 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
initial_approx <<= BITS_PER_MP_LIMB - 8 - 1; |
initial_approx <<= BITS_PER_MP_LIMB - 8 - 1; |
|
|
/* Perform small precision Newtonian iterations to get a full word |
/* Perform small precision Newtonian iterations to get a full word |
approximation. For small operands, these iteration will make the |
approximation. For small operands, these iterations will do the |
entire job. */ |
entire job. */ |
if (t_high0 == ~(mp_limb_t)0) |
if (t_high0 == ~(mp_limb_t)0) |
initial_approx = t_high0; |
initial_approx = t_high0; |
Line 328 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 339 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
|
|
/* Now get a full word by one (or for > 36 bit machines) several |
/* Now get a full word by one (or for > 36 bit machines) several |
iterations. */ |
iterations. */ |
for (i = 16; i < BITS_PER_MP_LIMB; i <<= 1) |
for (i = 18; i < BITS_PER_MP_LIMB; i <<= 1) |
{ |
{ |
mp_limb_t ignored_remainder; |
mp_limb_t ignored_remainder; |
|
|
Line 343 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 354 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
rp[0] = initial_approx; |
rp[0] = initial_approx; |
rsize = 1; |
rsize = 1; |
|
|
#ifdef DEBUG |
#ifdef SQRT_DEBUG |
printf ("\n\nT = "); |
printf ("\n\nT = "); |
mpn_dump (tp, tsize); |
mpn_dump (tp, tsize); |
#endif |
#endif |
Line 373 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 384 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
time is spent here. */ |
time is spent here. */ |
|
|
/* It is possible to do a great optimization here. The successive |
/* It is possible to do a great optimization here. The successive |
divisors in the mpn_divmod call below has more and more leading |
divisors in the mpn_divmod call below have more and more leading |
words equal to its predecessor. Therefore the beginning of |
words equal to its predecessor. Therefore the beginning of |
each division will repeat the same work as did the last |
each division will repeat the same work as did the last |
division. If we could guarantee that the leading words of two |
division. If we could guarantee that the leading words of two |
Line 392 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 403 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
while (--i >= 0) |
while (--i >= 0) |
{ |
{ |
mp_limb_t cy; |
mp_limb_t cy; |
#ifdef DEBUG |
#ifdef SQRT_DEBUG |
mp_limb_t old_least_sign_r = rp[0]; |
mp_limb_t old_least_sign_r = rp[0]; |
mp_size_t old_rsize = rsize; |
mp_size_t old_rsize = rsize; |
|
|
Line 408 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 419 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
cy = mpn_divmod (xp, ttp, tsize, rp, rsize); |
cy = mpn_divmod (xp, ttp, tsize, rp, rsize); |
xsize = tsize - rsize; |
xsize = tsize - rsize; |
|
|
#ifdef DEBUG |
#ifdef SQRT_DEBUG |
printf ("X =%d ", cy); |
printf ("X =%d ", cy); |
mpn_dump (xp, xsize); |
mpn_dump (xp, xsize); |
#endif |
#endif |
Line 435 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 446 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
mpn_rshift (rp, xp, xsize, 1); |
mpn_rshift (rp, xp, xsize, 1); |
rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)); |
rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)); |
rsize = xsize; |
rsize = xsize; |
#ifdef DEBUG |
#ifdef SQRT_DEBUG |
if (old_least_sign_r != rp[rsize - old_rsize]) |
if (old_least_sign_r != rp[rsize - old_rsize]) |
printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n", |
printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n", |
i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r, |
i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r, |
Line 444 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 455 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
} |
} |
} |
} |
|
|
#ifdef DEBUG |
#ifdef SQRT_DEBUG |
printf ("(final) R = "); |
printf ("(final) R = "); |
mpn_dump (rp, rsize); |
mpn_dump (rp, rsize); |
#endif |
#endif |
Line 470 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
Line 481 mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size) |
|
/* These operations can't overflow. */ |
/* These operations can't overflow. */ |
cy_limb = mpn_sub_n (tp, tp, rp, rsize); |
cy_limb = mpn_sub_n (tp, tp, rp, rsize); |
cy_limb += mpn_sub_n (tp, tp, rp, rsize); |
cy_limb += mpn_sub_n (tp, tp, rp, rsize); |
mpn_sub_1 (tp + rsize, tp + rsize, tsize - rsize, cy_limb); |
mpn_decr_u (tp + rsize, cy_limb); |
mpn_add_1 (tp, tp, tsize, (mp_limb_t) 1); |
mpn_incr_u (tp, (mp_limb_t) 1); |
|
|
mpn_sub_1 (rp, rp, rsize, (mp_limb_t) 1); |
mpn_decr_u (rp, (mp_limb_t) 1); |
|
|
#ifdef DEBUG |
#ifdef SQRT_DEBUG |
printf ("(adjusted) R = "); |
printf ("(adjusted) R = "); |
mpn_dump (rp, rsize); |
mpn_dump (rp, rsize); |
#endif |
#endif |