Annotation of OpenXM_contrib/gmp/mpz/jacobi.c, Revision 1.1.1.3
1.1.1.3 ! ohara 1: /* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.
1.1 maekawa 2:
1.1.1.3 ! ohara 3: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
1.1 maekawa 4:
5: This file is part of the GNU MP Library.
6:
7: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.3 ! ohara 8: it under the terms of the GNU Library General Public License as published by
! 9: the Free Software Foundation; either version 2 of the License, or (at your
1.1 maekawa 10: option) any later version.
11:
12: The GNU MP Library is distributed in the hope that it will be useful, but
13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.3 ! ohara 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
1.1 maekawa 15: License for more details.
16:
1.1.1.3 ! ohara 17: You should have received a copy of the GNU Library General Public License
1.1 maekawa 18: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20: MA 02111-1307, USA. */
21:
1.1.1.3 ! ohara 22: #include <stdio.h>
1.1 maekawa 23: #include "gmp.h"
1.1.1.3 ! ohara 24: #include "gmp-impl.h"
! 25: #include "longlong.h"
1.1 maekawa 26:
27:
1.1.1.3 ! ohara 28: /* Change this to "#define TRACE(x) x" for some traces. */
! 29: #define TRACE(x)
! 30:
! 31:
! 32: #define MPN_RSHIFT_OR_COPY(dst,src,size,shift) \
! 33: do { \
! 34: if ((shift) != 0) \
! 35: { \
! 36: ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift)); \
! 37: (size) -= ((dst)[(size)-1] == 0); \
! 38: } \
! 39: else \
! 40: MPN_COPY (dst, src, size); \
! 41: } while (0)
! 42:
! 43:
! 44: /* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker.
! 45:
! 46: mpz_jacobi could assume b is odd, but the improvements from that seem
! 47: small compared to other operations, and anything significant should be
! 48: checked at run-time since we'd like odd b to go fast in mpz_kronecker
! 49: too.
! 50:
! 51: mpz_legendre could assume b is an odd prime, but knowing this doesn't
! 52: present any obvious benefits. Result 0 wouldn't arise (unless "a" is a
! 53: multiple of b), but the checking for that takes little time compared to
! 54: other operations.
! 55:
! 56: The main loop is just a simple binary GCD with the jacobi symbol result
! 57: tracked during the reduction.
! 58:
! 59: The special cases for a or b fitting in one limb let mod_1 or modexact_1
! 60: get used, without any copying, and end up just as efficient as the mixed
! 61: precision mpz_kronecker_ui etc.
! 62:
! 63: When tdiv_qr is called it's not necessary to make "a" odd or make a
! 64: working copy of it, but tdiv_qr is going to be pretty slow so it's not
! 65: worth bothering trying to save anything for that case.
! 66:
! 67: Enhancements:
! 68:
! 69: mpn_bdivmod could be used instead of mpn_tdiv_qr, like in mpn_gcd.
! 70: Currently tdiv_qr is preferred since it's sub-quadratic on big sizes,
! 71: although bdivmod might be a touch quicker on small sizes. This can be
! 72: revised when bdivmod becomes sub-quadratic too.
! 73:
! 74: Some sort of multi-step algorithm should be used. The current subtract
! 75: and shift for every bit is very inefficient. Lehmer (per current gcdext)
! 76: would need some low bits included in its calculation to apply the sign
! 77: change for reciprocity. Binary Lehmer keeps low bits to strip twos
! 78: anyway, so might be better suited. Maybe the accelerated GCD style k-ary
! 79: reduction would work, if sign changes due to the extra factors it
! 80: introduces can be accounted for (or maybe they can be ignored). */
! 81:
! 82:
! 83: /* This implementation depends on BITS_PER_MP_LIMB being even, so that
! 84: (a/2)^BITS_PER_MP_LIMB = 1 and so there's no need to pay attention to how
! 85: many low zero limbs are stripped. */
! 86: #if BITS_PER_MP_LIMB % 2 != 0
! 87: Error, error, need BITS_PER_MP_LIMB even
1.1 maekawa 88: #endif
1.1.1.3 ! ohara 89:
! 90:
! 91: int
! 92: mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
1.1 maekawa 93: {
1.1.1.3 ! ohara 94: mp_srcptr asrcp, bsrcp;
! 95: mp_size_t asize, bsize;
! 96: mp_ptr ap, bp;
! 97: mp_limb_t alow, blow, ahigh, bhigh, asecond, bsecond;
! 98: unsigned atwos, btwos;
! 99: int result_bit1;
! 100: TMP_DECL (marker);
! 101:
! 102: TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b));
! 103: mpz_trace (" a", a);
! 104: mpz_trace (" b", b));
! 105:
! 106: asize = SIZ(a);
! 107: asrcp = PTR(a);
! 108: alow = asrcp[0];
! 109:
! 110: bsize = SIZ(b);
! 111: if (bsize == 0)
! 112: return JACOBI_LS0 (alow, asize); /* (a/0) */
! 113:
! 114: bsrcp = PTR(b);
! 115: blow = bsrcp[0];
! 116:
! 117: if (asize == 0)
! 118: return JACOBI_0LS (blow, bsize); /* (0/b) */
! 119:
! 120: /* (even/even)=0 */
! 121: if (((alow | blow) & 1) == 0)
! 122: return 0;
! 123:
! 124: /* account for effect of sign of b, then ignore it */
! 125: result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize);
! 126: bsize = ABS (bsize);
! 127:
! 128: /* low zero limbs on b can be discarded */
! 129: MPN_STRIP_LOW_ZEROS_NOT_ZERO (bsrcp, bsize, blow);
! 130:
! 131: count_trailing_zeros (btwos, blow);
! 132: TRACE (printf ("b twos %u\n", btwos));
! 133:
! 134: /* establish shifted blow */
! 135: blow >>= btwos;
! 136: if (bsize > 1)
! 137: {
! 138: bsecond = bsrcp[1];
! 139: if (btwos != 0)
! 140: blow |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
! 141: }
! 142:
! 143: /* account for effect of sign of a, then ignore it */
! 144: result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow);
! 145: asize = ABS (asize);
! 146:
! 147: if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0))
! 148: {
! 149: /* special case one limb b, use modexact and no copying */
! 150:
! 151: /* (a/2)=(2/a) with a odd, and if b is even then a is odd here */
! 152: result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
! 153:
! 154: if (blow == 1) /* (a/1)=1 always */
! 155: return JACOBI_BIT1_TO_PN (result_bit1);
! 156:
! 157: JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
! 158: TRACE (printf ("base (%lu/%lu) with %d\n",
! 159: alow, blow, JACOBI_BIT1_TO_PN (result_bit1)));
! 160: return mpn_jacobi_base (alow, blow, result_bit1);
! 161: }
! 162:
! 163: /* Discard low zero limbs of a. Usually there won't be anything to
! 164: strip, hence not bothering with it for the bsize==1 case. */
! 165: MPN_STRIP_LOW_ZEROS_NOT_ZERO (asrcp, asize, alow);
! 166:
! 167: count_trailing_zeros (atwos, alow);
! 168: TRACE (printf ("a twos %u\n", atwos));
! 169: result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow);
! 170:
! 171: /* establish shifted alow */
! 172: alow >>= atwos;
! 173: if (asize > 1)
! 174: {
! 175: asecond = asrcp[1];
! 176: if (atwos != 0)
! 177: alow |= (asecond << (GMP_NUMB_BITS - atwos)) & GMP_NUMB_MASK;
! 178: }
! 179:
! 180: /* (a/2)=(2/a) with a odd */
! 181: result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
! 182:
! 183: if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0))
! 184: {
! 185: /* another special case with modexact and no copying */
! 186:
! 187: if (alow == 1) /* (1/b)=1 always */
! 188: return JACOBI_BIT1_TO_PN (result_bit1);
! 189:
! 190: /* b still has its twos, so cancel out their effect */
! 191: result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
! 192:
! 193: result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); /* now (b/a) */
! 194: JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
! 195: TRACE (printf ("base (%lu/%lu) with %d\n",
! 196: blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
! 197: return mpn_jacobi_base (blow, alow, result_bit1);
! 198: }
! 199:
! 200:
! 201: TMP_MARK (marker);
! 202: TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize);
! 203:
! 204: MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos);
! 205: ASSERT (alow == ap[0]);
! 206: TRACE (mpn_trace ("stripped a", ap, asize));
! 207:
! 208: MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos);
! 209: ASSERT (blow == bp[0]);
! 210: TRACE (mpn_trace ("stripped b", bp, bsize));
! 211:
! 212: /* swap if necessary to make a longer than b */
! 213: if (asize < bsize)
! 214: {
! 215: TRACE (printf ("swap\n"));
! 216: MPN_PTR_SWAP (ap,asize, bp,bsize);
! 217: MP_LIMB_T_SWAP (alow, blow);
! 218: result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
! 219: }
! 220:
! 221: /* If a is bigger than b then reduce to a mod b.
! 222: Division is much faster than chipping away at "a" bit-by-bit. */
! 223: if (asize > bsize)
! 224: {
! 225: mp_ptr rp, qp;
! 226:
! 227: TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize));
! 228:
! 229: TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1);
! 230: mpn_tdiv_qr (qp, rp, 0, ap, asize, bp, bsize);
! 231: ap = rp;
! 232: asize = bsize;
! 233: MPN_NORMALIZE (ap, asize);
! 234:
! 235: TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize);
! 236: mpn_trace (" a", ap, asize);
! 237: mpn_trace (" b", bp, bsize));
! 238:
! 239: if (asize == 0) /* (0/b)=0 for b!=1 */
! 240: goto zero;
! 241:
! 242: alow = ap[0];
! 243: goto strip_a;
! 244: }
! 245:
! 246: for (;;)
! 247: {
! 248: ASSERT (asize >= 1); /* a,b non-empty */
! 249: ASSERT (bsize >= 1);
! 250: ASSERT (ap[asize-1] != 0); /* a,b normalized (and hence non-zero) */
! 251: ASSERT (bp[bsize-1] != 0);
! 252: ASSERT (alow == ap[0]); /* low limb copies should be correct */
! 253: ASSERT (blow == bp[0]);
! 254: ASSERT (alow & 1); /* a,b odd */
! 255: ASSERT (blow & 1);
! 256:
! 257: TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize);
! 258: mpn_trace (" a", ap, asize);
! 259: mpn_trace (" b", bp, bsize));
! 260:
! 261: /* swap if necessary to make a>=b, applying reciprocity
! 262: high limbs are almost always enough to tell which is bigger */
! 263: if (asize < bsize
! 264: || (asize == bsize
! 265: && ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1])
! 266: || (ahigh == bhigh
! 267: && mpn_cmp (ap, bp, asize-1) < 0))))
! 268: {
! 269: TRACE (printf ("swap\n"));
! 270: MPN_PTR_SWAP (ap,asize, bp,bsize);
! 271: MP_LIMB_T_SWAP (alow, blow);
! 272: result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
! 273: }
! 274:
! 275: if (asize == 1)
! 276: break;
! 277:
! 278: /* a = a-b */
! 279: ASSERT (asize >= bsize);
! 280: ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize));
! 281: MPN_NORMALIZE (ap, asize);
! 282: alow = ap[0];
! 283:
! 284: /* (0/b)=0 for b!=1. b!=1 when a==0 because otherwise would have had
! 285: a==1 which is asize==1 and would have exited above. */
! 286: if (asize == 0)
! 287: goto zero;
! 288:
! 289: strip_a:
! 290: /* low zero limbs on a can be discarded */
! 291: MPN_STRIP_LOW_ZEROS_NOT_ZERO (ap, asize, alow);
! 292:
! 293: if ((alow & 1) == 0)
! 294: {
! 295: /* factors of 2 from a */
! 296: unsigned twos;
! 297: count_trailing_zeros (twos, alow);
! 298: TRACE (printf ("twos %u\n", twos));
! 299: result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow);
! 300: ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos));
! 301: asize -= (ap[asize-1] == 0);
! 302: alow = ap[0];
! 303: }
! 304: }
! 305:
! 306: ASSERT (asize == 1 && bsize == 1); /* just alow and blow left */
! 307: TMP_FREE (marker);
! 308:
! 309: /* (1/b)=1 always (in this case have b==1 because a>=b) */
! 310: if (alow == 1)
! 311: return JACOBI_BIT1_TO_PN (result_bit1);
! 312:
! 313: /* swap with reciprocity and do (b/a) */
! 314: result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
! 315: TRACE (printf ("base (%lu/%lu) with %d\n",
! 316: blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
! 317: return mpn_jacobi_base (blow, alow, result_bit1);
! 318:
! 319: zero:
! 320: TMP_FREE (marker);
! 321: return 0;
1.1 maekawa 322: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>