=================================================================== RCS file: /home/cvs/OpenXM_contrib/gmp/mpn/generic/Attic/perfsqr.c,v retrieving revision 1.1.1.2 retrieving revision 1.1.1.3 diff -u -p -r1.1.1.2 -r1.1.1.3 --- OpenXM_contrib/gmp/mpn/generic/Attic/perfsqr.c 2000/09/09 14:12:26 1.1.1.2 +++ OpenXM_contrib/gmp/mpn/generic/Attic/perfsqr.c 2003/08/25 16:06:20 1.1.1.3 @@ -1,7 +1,7 @@ /* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square, zero otherwise. -Copyright (C) 1991, 1993, 1994, 1996, 1997, 2000 Free Software Foundation, +Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation, Inc. This file is part of the GNU MP Library. @@ -38,77 +38,132 @@ static unsigned char const sq_res_0x100[0x100] = 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, - 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0 }; int -#if __STDC__ mpn_perfect_square_p (mp_srcptr up, mp_size_t usize) -#else -mpn_perfect_square_p (up, usize) - mp_srcptr up; - mp_size_t usize; -#endif { mp_limb_t rem; mp_ptr root_ptr; int res; TMP_DECL (marker); - /* The first test excludes 55/64 (85.9%) of the perfect square candidates + ASSERT (usize >= 1); + + /* The first test excludes 212/256 (82.8%) of the perfect square candidates in O(1) time. */ - if ((sq_res_0x100[(unsigned int) up[0] % 0x100] & 1) == 0) + if ((sq_res_0x100[(unsigned int) up[0] % 0x100]) == 0) return 0; +#if 0 + /* Check that we have even multiplicity of 2, and then check that the rest is + a possible perfect square. Leave disabled until we can determine this + really is an improvement. It it is, it could completely replace the + simple probe above, since this should through out more non-squares, but at + the expense of somewhat more cycles. */ + { + mp_limb_t lo; + int cnt; + lo = up[0]; + while (lo == 0) + up++, lo = up[0], usize--; + count_trailing_zeros (cnt, lo); + if ((cnt & 1) != 0) + return 0; /* return of not even multiplicity of 2 */ + lo >>= cnt; /* shift down to align lowest non-zero bit */ + lo >>= 1; /* shift away lowest non-zero bit */ + if ((lo & 3) != 0) + return 0; + } +#endif + #if defined (PP) - /* The second test excludes 30652543/30808063 (99.5%) of the remaining - perfect square candidates in O(n) time. */ + /* The second test excludes most of the remaining perfect square candidates + in O(n) time. Each residue tested keeps (p+1)/2 out of p, and with no + common factors the tests are independent so the total passed is - /* Firstly, compute REM = A mod PP. */ - if (UDIV_TIME > (2 * UMUL_TIME + 6)) - rem = mpn_preinv_mod_1 (up, usize, (mp_limb_t) PP, (mp_limb_t) PP_INVERTED); - else - rem = mpn_mod_1 (up, usize, (mp_limb_t) PP); + p=M (p+1)/2 + product ------- + p=3 p - /* Now decide if REM is a quadratic residue modulo the factors in PP. */ + which, for M=29 (for 32-bit hosts) is 155520/30808063, or 99.5% excluded, + and for M=53 (for 64-bit hosts) is 67722117120/742518990138757, or 99.99% + excluded. - /* If A is just a few limbs, computing the square root does not take long - time, so things might run faster if we limit this loop according to the - size of A. */ + If U is just a few limbs, computing the square root does not take long + time, so things might run faster if we suppress this for A less than some + appropriate threshold. */ + + /* Firstly, compute REM = U mod PP. */ +#if defined (PP_INVERTED) + rem = MPN_MOD_OR_PREINV_MOD_1 (up, usize, (mp_limb_t) PP, + (mp_limb_t) PP_INVERTED); +#else + rem = mpn_mod_1 (up, usize, (mp_limb_t) PP); +#endif + + /* Now decide if REM is a quadratic residue modulo the factors in PP. We + test prime pairs multiplied together, as long as their products don't + overflow a limb. The tests that exclude most false squares are put + first. */ + + if (0 #if BITS_PER_MP_LIMB == 64 - if (((CNST_LIMB(0x12DD703303AED3) >> rem % 53) & 1) == 0) - return 0; - if (((CNST_LIMB(0x4351B2753DF) >> rem % 47) & 1) == 0) - return 0; - if (((CNST_LIMB(0x35883A3EE53) >> rem % 43) & 1) == 0) - return 0; - if (((CNST_LIMB(0x1B382B50737) >> rem % 41) & 1) == 0) - return 0; - if (((CNST_LIMB(0x165E211E9B) >> rem % 37) & 1) == 0) - return 0; - if (((CNST_LIMB(0x121D47B7) >> rem % 31) & 1) == 0) - return 0; + || ((CNST_LIMB(0x0C22C90530902D3) >> rem % (3 * 19)) & 1) == 0 + || ((CNST_LIMB(0x0230148611CA33) >> rem % (5 * 11)) & 1) == 0 + || ((CNST_LIMB(0x12DD703303AED3) >> rem % 53) & 1) == 0 + || ((CNST_LIMB(0x04351B2753DF) >> rem % 47) & 1) == 0 + || ((CNST_LIMB(0x35883A3EE53) >> rem % 43) & 1) == 0 + || ((CNST_LIMB(0x1B382B50737) >> rem % 41) & 1) == 0 + || ((CNST_LIMB(0x165E211E9B) >> rem % 37) & 1) == 0 + || ((0x121D47B7L >> rem % 31) & 1) == 0 + || ((0x13D122F3L >> rem % 29) & 1) == 0 + || ((0x05335FL >> rem % 23) & 1) == 0 + /* 19 handled above */ + || ((0x1A317L >> rem % 17) & 1) == 0 + || ((0x161BL >> rem % 13) & 1) == 0 + /* 11 handled above */ + || ((0x17L >> rem % 7) & 1) == 0 + /* 5 handled above */ + /* 3 handled above */ #endif - if (((0x13D122F3L >> rem % 29) & 1) == 0) - return 0; - if (((0x5335FL >> rem % 23) & 1) == 0) - return 0; - if (((0x30AF3L >> rem % 19) & 1) == 0) - return 0; - if (((0x1A317L >> rem % 17) & 1) == 0) - return 0; - if (((0x161BL >> rem % 13) & 1) == 0) - return 0; - if (((0x23BL >> rem % 11) & 1) == 0) - return 0; - if (((0x017L >> rem % 7) & 1) == 0) - return 0; - if (((0x13L >> rem % 5) & 1) == 0) - return 0; - if (((0x3L >> rem % 3) & 1) == 0) - return 0; +#if BITS_PER_MP_LIMB == 32 + || ((0x058293L >> rem % (3 * 7)) & 1) == 0 + || ((0x13D122F3L >> rem % 29) & 1) == 0 + || ((0x05335FL >> rem % 23) & 1) == 0 + || ((0x30AF3L >> rem % 19) & 1) == 0 + || ((0x1A317L >> rem % 17) & 1) == 0 + || ((0x161BL >> rem % 13) & 1) == 0 + || ((0x23BL >> rem % 11) & 1) == 0 + /* 7 handled above */ + || ((0x13L >> rem % 5) & 1) == 0 + /* 3 handled above */ #endif +#if BITS_PER_MP_LIMB == 16 + || ((0x0653L >> rem % (3 * 5)) & 1) == 0 + || ((0x161BL >> rem % 13) & 1) == 0 + || ((0x23BL >> rem % 11) & 1) == 0 + || ((0x17L >> rem % 7) & 1) == 0 + /* 5 handled above */ + /* 3 handled above */ +#endif +#if BITS_PER_MP_LIMB == 8 + || ((0x17L >> rem % 7) & 1) == 0 + || ((0x13L >> rem % 5) & 1) == 0 + || ((0x3L >> rem % 3) & 1) == 0 +#endif +#if BITS_PER_MP_LIMB == 4 + || ((0x13L >> rem % 5) & 1) == 0 + || ((0x3L >> rem % 3) & 1) == 0 +#endif +#if BITS_PER_MP_LIMB == 2 + || ((0x3L >> rem % 3) & 1) == 0 +#endif + ) + return 0; +#endif /* PP */ TMP_MARK (marker);