=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/generic/Attic/perfsqr.c,v retrieving revision 1.1.1.1 retrieving revision 1.1.1.3 diff -u -p -r1.1.1.1 -r1.1.1.3 --- OpenXM_contrib/gmp/mpn/generic/Attic/perfsqr.c 2000/01/10 15:35:23 1.1.1.1 +++ OpenXM_contrib/gmp/mpn/generic/Attic/perfsqr.c 2003/08/25 16:06:20 1.1.1.3 @@ -1,47 +1,32 @@ /* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square, zero otherwise. -Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc. +Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation, +Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Library General Public License as published by -the Free Software Foundation; either version 2 of the License, or (at your +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -You should have received a copy of the GNU Library General Public License +You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include /* for NULL */ #include "gmp.h" #include "gmp-impl.h" #include "longlong.h" -#ifndef UMUL_TIME -#define UMUL_TIME 1 -#endif -#ifndef UDIV_TIME -#define UDIV_TIME UMUL_TIME -#endif - -#if BITS_PER_MP_LIMB == 32 -#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */ -#define PP_INVERTED 0x53E5645CL -#endif - -#if BITS_PER_MP_LIMB == 64 -#define PP 0xE221F97C30E94E1DL /* 3 x 5 x 7 x 11 x 13 x ... x 53 */ -#define PP_INVERTED 0x21CFE6CFC938B36BL -#endif - /* sq_res_0x100[x mod 0x100] == 1 iff x mod 0x100 is a quadratic residue modulo 0x100. */ static unsigned char const sq_res_0x100[0x100] = @@ -53,77 +38,132 @@ static unsigned char const sq_res_0x100[0x100] = 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, - 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0 }; int -#if __STDC__ mpn_perfect_square_p (mp_srcptr up, mp_size_t usize) -#else -mpn_perfect_square_p (up, usize) - mp_srcptr up; - mp_size_t usize; -#endif { mp_limb_t rem; mp_ptr root_ptr; int res; TMP_DECL (marker); - /* The first test excludes 55/64 (85.9%) of the perfect square candidates + ASSERT (usize >= 1); + + /* The first test excludes 212/256 (82.8%) of the perfect square candidates in O(1) time. */ - if ((sq_res_0x100[(unsigned int) up[0] % 0x100] & 1) == 0) + if ((sq_res_0x100[(unsigned int) up[0] % 0x100]) == 0) return 0; +#if 0 + /* Check that we have even multiplicity of 2, and then check that the rest is + a possible perfect square. Leave disabled until we can determine this + really is an improvement. It it is, it could completely replace the + simple probe above, since this should through out more non-squares, but at + the expense of somewhat more cycles. */ + { + mp_limb_t lo; + int cnt; + lo = up[0]; + while (lo == 0) + up++, lo = up[0], usize--; + count_trailing_zeros (cnt, lo); + if ((cnt & 1) != 0) + return 0; /* return of not even multiplicity of 2 */ + lo >>= cnt; /* shift down to align lowest non-zero bit */ + lo >>= 1; /* shift away lowest non-zero bit */ + if ((lo & 3) != 0) + return 0; + } +#endif + #if defined (PP) - /* The second test excludes 30652543/30808063 (99.5%) of the remaining - perfect square candidates in O(n) time. */ + /* The second test excludes most of the remaining perfect square candidates + in O(n) time. Each residue tested keeps (p+1)/2 out of p, and with no + common factors the tests are independent so the total passed is - /* Firstly, compute REM = A mod PP. */ - if (UDIV_TIME > (2 * UMUL_TIME + 6)) - rem = mpn_preinv_mod_1 (up, usize, (mp_limb_t) PP, (mp_limb_t) PP_INVERTED); - else - rem = mpn_mod_1 (up, usize, (mp_limb_t) PP); + p=M (p+1)/2 + product ------- + p=3 p - /* Now decide if REM is a quadratic residue modulo the factors in PP. */ + which, for M=29 (for 32-bit hosts) is 155520/30808063, or 99.5% excluded, + and for M=53 (for 64-bit hosts) is 67722117120/742518990138757, or 99.99% + excluded. - /* If A is just a few limbs, computing the square root does not take long - time, so things might run faster if we limit this loop according to the - size of A. */ + If U is just a few limbs, computing the square root does not take long + time, so things might run faster if we suppress this for A less than some + appropriate threshold. */ + + /* Firstly, compute REM = U mod PP. */ +#if defined (PP_INVERTED) + rem = MPN_MOD_OR_PREINV_MOD_1 (up, usize, (mp_limb_t) PP, + (mp_limb_t) PP_INVERTED); +#else + rem = mpn_mod_1 (up, usize, (mp_limb_t) PP); +#endif + + /* Now decide if REM is a quadratic residue modulo the factors in PP. We + test prime pairs multiplied together, as long as their products don't + overflow a limb. The tests that exclude most false squares are put + first. */ + + if (0 #if BITS_PER_MP_LIMB == 64 - if (((0x12DD703303AED3L >> rem % 53) & 1) == 0) - return 0; - if (((0x4351B2753DFL >> rem % 47) & 1) == 0) - return 0; - if (((0x35883A3EE53L >> rem % 43) & 1) == 0) - return 0; - if (((0x1B382B50737L >> rem % 41) & 1) == 0) - return 0; - if (((0x165E211E9BL >> rem % 37) & 1) == 0) - return 0; - if (((0x121D47B7L >> rem % 31) & 1) == 0) - return 0; + || ((CNST_LIMB(0x0C22C90530902D3) >> rem % (3 * 19)) & 1) == 0 + || ((CNST_LIMB(0x0230148611CA33) >> rem % (5 * 11)) & 1) == 0 + || ((CNST_LIMB(0x12DD703303AED3) >> rem % 53) & 1) == 0 + || ((CNST_LIMB(0x04351B2753DF) >> rem % 47) & 1) == 0 + || ((CNST_LIMB(0x35883A3EE53) >> rem % 43) & 1) == 0 + || ((CNST_LIMB(0x1B382B50737) >> rem % 41) & 1) == 0 + || ((CNST_LIMB(0x165E211E9B) >> rem % 37) & 1) == 0 + || ((0x121D47B7L >> rem % 31) & 1) == 0 + || ((0x13D122F3L >> rem % 29) & 1) == 0 + || ((0x05335FL >> rem % 23) & 1) == 0 + /* 19 handled above */ + || ((0x1A317L >> rem % 17) & 1) == 0 + || ((0x161BL >> rem % 13) & 1) == 0 + /* 11 handled above */ + || ((0x17L >> rem % 7) & 1) == 0 + /* 5 handled above */ + /* 3 handled above */ #endif - if (((0x13D122F3L >> rem % 29) & 1) == 0) - return 0; - if (((0x5335FL >> rem % 23) & 1) == 0) - return 0; - if (((0x30AF3L >> rem % 19) & 1) == 0) - return 0; - if (((0x1A317L >> rem % 17) & 1) == 0) - return 0; - if (((0x161BL >> rem % 13) & 1) == 0) - return 0; - if (((0x23BL >> rem % 11) & 1) == 0) - return 0; - if (((0x017L >> rem % 7) & 1) == 0) - return 0; - if (((0x13L >> rem % 5) & 1) == 0) - return 0; - if (((0x3L >> rem % 3) & 1) == 0) - return 0; +#if BITS_PER_MP_LIMB == 32 + || ((0x058293L >> rem % (3 * 7)) & 1) == 0 + || ((0x13D122F3L >> rem % 29) & 1) == 0 + || ((0x05335FL >> rem % 23) & 1) == 0 + || ((0x30AF3L >> rem % 19) & 1) == 0 + || ((0x1A317L >> rem % 17) & 1) == 0 + || ((0x161BL >> rem % 13) & 1) == 0 + || ((0x23BL >> rem % 11) & 1) == 0 + /* 7 handled above */ + || ((0x13L >> rem % 5) & 1) == 0 + /* 3 handled above */ #endif +#if BITS_PER_MP_LIMB == 16 + || ((0x0653L >> rem % (3 * 5)) & 1) == 0 + || ((0x161BL >> rem % 13) & 1) == 0 + || ((0x23BL >> rem % 11) & 1) == 0 + || ((0x17L >> rem % 7) & 1) == 0 + /* 5 handled above */ + /* 3 handled above */ +#endif +#if BITS_PER_MP_LIMB == 8 + || ((0x17L >> rem % 7) & 1) == 0 + || ((0x13L >> rem % 5) & 1) == 0 + || ((0x3L >> rem % 3) & 1) == 0 +#endif +#if BITS_PER_MP_LIMB == 4 + || ((0x13L >> rem % 5) & 1) == 0 + || ((0x3L >> rem % 3) & 1) == 0 +#endif +#if BITS_PER_MP_LIMB == 2 + || ((0x3L >> rem % 3) & 1) == 0 +#endif + ) + return 0; +#endif /* PP */ TMP_MARK (marker);