Annotation of OpenXM_contrib/gmp/tests/refmpn.c, Revision 1.1
1.1 ! ohara 1: /* Reference mpn functions, designed to be simple, portable and independent
! 2: of the normal gmp code. Speed isn't a consideration.
! 3:
! 4: Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
! 5: Inc.
! 6:
! 7: This file is part of the GNU MP Library.
! 8:
! 9: The GNU MP Library is free software; you can redistribute it and/or modify
! 10: it under the terms of the GNU Lesser General Public License as published by
! 11: the Free Software Foundation; either version 2.1 of the License, or (at your
! 12: option) any later version.
! 13:
! 14: The GNU MP Library is distributed in the hope that it will be useful, but
! 15: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 16: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 17: License for more details.
! 18:
! 19: You should have received a copy of the GNU Lesser General Public License
! 20: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 21: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 22: MA 02111-1307, USA. */
! 23:
! 24:
! 25: /* Most routines have assertions representing what the mpn routines are
! 26: supposed to accept. Many of these reference routines do sensible things
! 27: outside these ranges (eg. for size==0), but the assertions are present to
! 28: pick up bad parameters passed here that are about to be passed the same
! 29: to a real mpn routine being compared. */
! 30:
! 31: /* always do assertion checking */
! 32: #define WANT_ASSERT 1
! 33:
! 34: #include <stdio.h> /* for NULL */
! 35: #include <stdlib.h> /* for malloc */
! 36:
! 37: #include "gmp.h"
! 38: #include "gmp-impl.h"
! 39: #include "longlong.h"
! 40:
! 41: #include "tests.h"
! 42:
! 43:
! 44:
! 45: /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
! 46: in bytes. */
! 47: int
! 48: byte_overlap_p (const void *v_xp, mp_size_t xsize,
! 49: const void *v_yp, mp_size_t ysize)
! 50: {
! 51: const char *xp = v_xp;
! 52: const char *yp = v_yp;
! 53:
! 54: ASSERT (xsize >= 0);
! 55: ASSERT (ysize >= 0);
! 56:
! 57: /* no wraparounds */
! 58: ASSERT (xp+xsize >= xp);
! 59: ASSERT (yp+ysize >= yp);
! 60:
! 61: if (xp + xsize <= yp)
! 62: return 0;
! 63:
! 64: if (yp + ysize <= xp)
! 65: return 0;
! 66:
! 67: return 1;
! 68: }
! 69:
! 70: /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
! 71: int
! 72: refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
! 73: {
! 74: return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB,
! 75: yp, ysize * BYTES_PER_MP_LIMB);
! 76: }
! 77:
! 78: /* Check overlap for a routine defined to work low to high. */
! 79: int
! 80: refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
! 81: {
! 82: return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
! 83: }
! 84:
! 85: /* Check overlap for a routine defined to work high to low. */
! 86: int
! 87: refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
! 88: {
! 89: return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
! 90: }
! 91:
! 92: /* Check overlap for a standard routine requiring equal or separate. */
! 93: int
! 94: refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
! 95: {
! 96: return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
! 97: }
! 98: int
! 99: refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
! 100: mp_size_t size)
! 101: {
! 102: return (refmpn_overlap_fullonly_p (dst, src1, size)
! 103: && refmpn_overlap_fullonly_p (dst, src2, size));
! 104: }
! 105:
! 106:
! 107: mp_ptr
! 108: refmpn_malloc_limbs (mp_size_t size)
! 109: {
! 110: mp_ptr p;
! 111: ASSERT (size >= 0);
! 112: if (size == 0)
! 113: size = 1;
! 114: p = (mp_ptr) malloc (size * BYTES_PER_MP_LIMB);
! 115: ASSERT (p != NULL);
! 116: return p;
! 117: }
! 118:
! 119: mp_ptr
! 120: refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
! 121: {
! 122: mp_ptr p;
! 123: p = refmpn_malloc_limbs (size);
! 124: refmpn_copyi (p, ptr, size);
! 125: return p;
! 126: }
! 127:
! 128: /* malloc n limbs on a multiple of m bytes boundary */
! 129: mp_ptr
! 130: refmpn_malloc_limbs_aligned (size_t n, size_t m)
! 131: {
! 132: return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
! 133: }
! 134:
! 135:
! 136: void
! 137: refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
! 138: {
! 139: mp_size_t i;
! 140: ASSERT (size >= 0);
! 141: for (i = 0; i < size; i++)
! 142: ptr[i] = value;
! 143: }
! 144:
! 145: void
! 146: refmpn_zero (mp_ptr ptr, mp_size_t size)
! 147: {
! 148: refmpn_fill (ptr, size, CNST_LIMB(0));
! 149: }
! 150:
! 151: int
! 152: refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
! 153: {
! 154: mp_size_t i;
! 155: for (i = 0; i < size; i++)
! 156: if (ptr[i] != 0)
! 157: return 0;
! 158: return 1;
! 159: }
! 160:
! 161: /* the highest one bit in x */
! 162: mp_limb_t
! 163: refmpn_msbone (mp_limb_t x)
! 164: {
! 165: mp_limb_t n = (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1);
! 166:
! 167: while (n != 0)
! 168: {
! 169: if (x & n)
! 170: break;
! 171: n >>= 1;
! 172: }
! 173: return n;
! 174: }
! 175:
! 176: /* a mask of the highest one bit plus and all bits below */
! 177: mp_limb_t
! 178: refmpn_msbone_mask (mp_limb_t x)
! 179: {
! 180: if (x == 0)
! 181: return 0;
! 182:
! 183: return (refmpn_msbone (x) << 1) - 1;
! 184: }
! 185:
! 186:
! 187: void
! 188: refmpn_setbit (mp_ptr ptr, unsigned long bit)
! 189: {
! 190: ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
! 191: }
! 192:
! 193: void
! 194: refmpn_clrbit (mp_ptr ptr, unsigned long bit)
! 195: {
! 196: ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
! 197: }
! 198:
! 199: #define REFMPN_TSTBIT(ptr,bit) \
! 200: (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
! 201:
! 202: int
! 203: refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
! 204: {
! 205: return REFMPN_TSTBIT (ptr, bit);
! 206: }
! 207:
! 208: unsigned long
! 209: refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
! 210: {
! 211: while (REFMPN_TSTBIT (ptr, bit) != 0)
! 212: bit++;
! 213: return bit;
! 214: }
! 215:
! 216: unsigned long
! 217: refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
! 218: {
! 219: while (REFMPN_TSTBIT (ptr, bit) == 0)
! 220: bit++;
! 221: return bit;
! 222: }
! 223:
! 224: void
! 225: refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 226: {
! 227: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
! 228: refmpn_copyi (rp, sp, size);
! 229: }
! 230:
! 231: void
! 232: refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 233: {
! 234: mp_size_t i;
! 235:
! 236: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
! 237: ASSERT (size >= 0);
! 238:
! 239: for (i = 0; i < size; i++)
! 240: rp[i] = sp[i];
! 241: }
! 242:
! 243: void
! 244: refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 245: {
! 246: mp_size_t i;
! 247:
! 248: ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
! 249: ASSERT (size >= 0);
! 250:
! 251: for (i = size-1; i >= 0; i--)
! 252: rp[i] = sp[i];
! 253: }
! 254:
! 255: void
! 256: refmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 257: {
! 258: mp_size_t i;
! 259:
! 260: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
! 261: ASSERT (size >= 1);
! 262: ASSERT_MPN (sp, size);
! 263:
! 264: for (i = 0; i < size; i++)
! 265: rp[i] = sp[i] ^ GMP_NUMB_MASK;
! 266: }
! 267:
! 268:
! 269: int
! 270: refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
! 271: {
! 272: mp_size_t i;
! 273:
! 274: ASSERT (size >= 1);
! 275: ASSERT_MPN (xp, size);
! 276: ASSERT_MPN (yp, size);
! 277:
! 278: for (i = size-1; i >= 0; i--)
! 279: {
! 280: if (xp[i] > yp[i]) return 1;
! 281: if (xp[i] < yp[i]) return -1;
! 282: }
! 283: return 0;
! 284: }
! 285:
! 286: int
! 287: refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
! 288: {
! 289: if (size == 0)
! 290: return 0;
! 291: else
! 292: return refmpn_cmp (xp, yp, size);
! 293: }
! 294:
! 295: int
! 296: refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
! 297: mp_srcptr yp, mp_size_t ysize)
! 298: {
! 299: int opp, cmp;
! 300:
! 301: ASSERT_MPN (xp, xsize);
! 302: ASSERT_MPN (yp, ysize);
! 303:
! 304: opp = (xsize < ysize);
! 305: if (opp)
! 306: MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
! 307:
! 308: if (! refmpn_zero_p (xp+ysize, xsize-ysize))
! 309: cmp = 1;
! 310: else
! 311: cmp = refmpn_cmp (xp, yp, ysize);
! 312:
! 313: return (opp ? -cmp : cmp);
! 314: }
! 315:
! 316: int
! 317: refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
! 318: {
! 319: mp_size_t i;
! 320: ASSERT (size >= 0);
! 321:
! 322: for (i = 0; i < size; i++)
! 323: if (xp[i] != yp[i])
! 324: return 0;
! 325: return 1;
! 326: }
! 327:
! 328:
! 329: #define LOGOPS(operation) \
! 330: { \
! 331: mp_size_t i; \
! 332: \
! 333: ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
! 334: ASSERT (size >= 1); \
! 335: ASSERT_MPN (s1p, size); \
! 336: ASSERT_MPN (s2p, size); \
! 337: \
! 338: for (i = 0; i < size; i++) \
! 339: rp[i] = operation; \
! 340: }
! 341:
! 342: void
! 343: refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 344: {
! 345: LOGOPS (s1p[i] & s2p[i]);
! 346: }
! 347: void
! 348: refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 349: {
! 350: LOGOPS (s1p[i] & ~s2p[i]);
! 351: }
! 352: void
! 353: refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 354: {
! 355: LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
! 356: }
! 357: void
! 358: refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 359: {
! 360: LOGOPS (s1p[i] | s2p[i]);
! 361: }
! 362: void
! 363: refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 364: {
! 365: LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
! 366: }
! 367: void
! 368: refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 369: {
! 370: LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
! 371: }
! 372: void
! 373: refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 374: {
! 375: LOGOPS (s1p[i] ^ s2p[i]);
! 376: }
! 377: void
! 378: refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 379: {
! 380: LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
! 381: }
! 382:
! 383: /* set *w to x+y, return 0 or 1 carry */
! 384: mp_limb_t
! 385: add (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
! 386: {
! 387: mp_limb_t sum, cy;
! 388:
! 389: ASSERT_LIMB (x);
! 390: ASSERT_LIMB (y);
! 391:
! 392: sum = x + y;
! 393: #if GMP_NAIL_BITS == 0
! 394: *w = sum;
! 395: cy = (sum < x);
! 396: #else
! 397: *w = sum & GMP_NUMB_MASK;
! 398: cy = (sum >> GMP_NUMB_BITS);
! 399: #endif
! 400: return cy;
! 401: }
! 402:
! 403: /* set *w to x-y, return 0 or 1 borrow */
! 404: mp_limb_t
! 405: sub (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
! 406: {
! 407: mp_limb_t diff, cy;
! 408:
! 409: ASSERT_LIMB (x);
! 410: ASSERT_LIMB (y);
! 411:
! 412: diff = x - y;
! 413: #if GMP_NAIL_BITS == 0
! 414: *w = diff;
! 415: cy = (diff > x);
! 416: #else
! 417: *w = diff & GMP_NUMB_MASK;
! 418: cy = (diff >> GMP_NUMB_BITS) & 1;
! 419: #endif
! 420: return cy;
! 421: }
! 422:
! 423: /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
! 424: mp_limb_t
! 425: adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
! 426: {
! 427: mp_limb_t r;
! 428:
! 429: ASSERT_LIMB (x);
! 430: ASSERT_LIMB (y);
! 431: ASSERT (c == 0 || c == 1);
! 432:
! 433: r = add (w, x, y);
! 434: return r + add (w, *w, c);
! 435: }
! 436:
! 437: /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
! 438: mp_limb_t
! 439: sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
! 440: {
! 441: mp_limb_t r;
! 442:
! 443: ASSERT_LIMB (x);
! 444: ASSERT_LIMB (y);
! 445: ASSERT (c == 0 || c == 1);
! 446:
! 447: r = sub (w, x, y);
! 448: return r + sub (w, *w, c);
! 449: }
! 450:
! 451:
! 452: #define AORS_1(operation) \
! 453: { \
! 454: mp_limb_t i; \
! 455: \
! 456: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
! 457: ASSERT (size >= 1); \
! 458: ASSERT_MPN (sp, size); \
! 459: ASSERT_LIMB (n); \
! 460: \
! 461: for (i = 0; i < size; i++) \
! 462: n = operation (&rp[i], sp[i], n); \
! 463: return n; \
! 464: }
! 465:
! 466: mp_limb_t
! 467: refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
! 468: {
! 469: AORS_1 (add);
! 470: }
! 471: mp_limb_t
! 472: refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
! 473: {
! 474: AORS_1 (sub);
! 475: }
! 476:
! 477: #define AORS_NC(operation) \
! 478: { \
! 479: mp_size_t i; \
! 480: \
! 481: ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
! 482: ASSERT (carry == 0 || carry == 1); \
! 483: ASSERT (size >= 1); \
! 484: ASSERT_MPN (s1p, size); \
! 485: ASSERT_MPN (s2p, size); \
! 486: \
! 487: for (i = 0; i < size; i++) \
! 488: carry = operation (&rp[i], s1p[i], s2p[i], carry); \
! 489: return carry; \
! 490: }
! 491:
! 492: mp_limb_t
! 493: refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
! 494: mp_limb_t carry)
! 495: {
! 496: AORS_NC (adc);
! 497: }
! 498: mp_limb_t
! 499: refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
! 500: mp_limb_t carry)
! 501: {
! 502: AORS_NC (sbb);
! 503: }
! 504:
! 505:
! 506: mp_limb_t
! 507: refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 508: {
! 509: return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
! 510: }
! 511: mp_limb_t
! 512: refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 513: {
! 514: return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
! 515: }
! 516:
! 517:
! 518: /* Twos complement, return borrow. */
! 519: mp_limb_t
! 520: refmpn_neg_n (mp_ptr dst, mp_srcptr src, mp_size_t size)
! 521: {
! 522: mp_ptr zeros;
! 523: mp_limb_t ret;
! 524:
! 525: ASSERT (size >= 1);
! 526:
! 527: zeros = refmpn_malloc_limbs (size);
! 528: refmpn_fill (zeros, size, CNST_LIMB(0));
! 529: ret = refmpn_sub_n (dst, zeros, src, size);
! 530: free (zeros);
! 531: return ret;
! 532: }
! 533:
! 534:
! 535: #define AORS(aors_n, aors_1) \
! 536: { \
! 537: mp_limb_t c; \
! 538: ASSERT (s1size >= s2size); \
! 539: ASSERT (s2size >= 1); \
! 540: c = aors_n (rp, s1p, s2p, s2size); \
! 541: if (s1size-s2size != 0) \
! 542: c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \
! 543: return c; \
! 544: }
! 545: mp_limb_t
! 546: refmpn_add (mp_ptr rp,
! 547: mp_srcptr s1p, mp_size_t s1size,
! 548: mp_srcptr s2p, mp_size_t s2size)
! 549: {
! 550: AORS (refmpn_add_n, refmpn_add_1);
! 551: }
! 552: mp_limb_t
! 553: refmpn_sub (mp_ptr rp,
! 554: mp_srcptr s1p, mp_size_t s1size,
! 555: mp_srcptr s2p, mp_size_t s2size)
! 556: {
! 557: AORS (refmpn_sub_n, refmpn_sub_1);
! 558: }
! 559:
! 560:
! 561: #define SHIFTHIGH(x) ((x) << BITS_PER_MP_LIMB/2)
! 562: #define SHIFTLOW(x) ((x) >> BITS_PER_MP_LIMB/2)
! 563:
! 564: #define LOWMASK (((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)-1)
! 565: #define HIGHMASK SHIFTHIGH(LOWMASK)
! 566:
! 567: #define LOWPART(x) ((x) & LOWMASK)
! 568: #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK)
! 569:
! 570: /* Set *hi,*lo to x*y, using full limbs not nails. */
! 571: mp_limb_t
! 572: refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
! 573: {
! 574: mp_limb_t hi, s;
! 575:
! 576: *lo = LOWPART(x) * LOWPART(y);
! 577: hi = HIGHPART(x) * HIGHPART(y);
! 578:
! 579: s = LOWPART(x) * HIGHPART(y);
! 580: hi += HIGHPART(s);
! 581: s = SHIFTHIGH(LOWPART(s));
! 582: *lo += s;
! 583: hi += (*lo < s);
! 584:
! 585: s = HIGHPART(x) * LOWPART(y);
! 586: hi += HIGHPART(s);
! 587: s = SHIFTHIGH(LOWPART(s));
! 588: *lo += s;
! 589: hi += (*lo < s);
! 590:
! 591: return hi;
! 592: }
! 593:
! 594: mp_limb_t
! 595: refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
! 596: {
! 597: return refmpn_umul_ppmm (lo, x, y);
! 598: }
! 599:
! 600: mp_limb_t
! 601: refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
! 602: mp_limb_t carry)
! 603: {
! 604: mp_size_t i;
! 605: mp_limb_t hi, lo;
! 606:
! 607: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
! 608: ASSERT (size >= 1);
! 609: ASSERT_MPN (sp, size);
! 610: ASSERT_LIMB (multiplier);
! 611: ASSERT_LIMB (carry);
! 612:
! 613: multiplier <<= GMP_NAIL_BITS;
! 614: for (i = 0; i < size; i++)
! 615: {
! 616: hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
! 617: lo >>= GMP_NAIL_BITS;
! 618: ASSERT_NOCARRY (add (&hi, hi, add (&lo, lo, carry)));
! 619: rp[i] = lo;
! 620: carry = hi;
! 621: }
! 622: return carry;
! 623: }
! 624:
! 625: mp_limb_t
! 626: refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
! 627: {
! 628: return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
! 629: }
! 630:
! 631:
! 632: mp_limb_t
! 633: refmpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult)
! 634: {
! 635: mp_ptr src_copy;
! 636: mp_limb_t c;
! 637:
! 638: ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
! 639: ASSERT (! refmpn_overlap_p (dst, size+1, mult, 2));
! 640: ASSERT (size >= 1);
! 641: ASSERT_MPN (mult, 2);
! 642:
! 643: /* in case dst==src */
! 644: src_copy = refmpn_malloc_limbs (size);
! 645: refmpn_copyi (src_copy, src, size);
! 646: src = src_copy;
! 647:
! 648: dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
! 649: c = refmpn_addmul_1 (dst+1, src, size, mult[1]);
! 650: free (src_copy);
! 651: return c;
! 652: }
! 653:
! 654:
! 655: #define AORSMUL_1C(operation_n) \
! 656: { \
! 657: mp_ptr p; \
! 658: mp_limb_t ret; \
! 659: \
! 660: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
! 661: \
! 662: p = refmpn_malloc_limbs (size); \
! 663: ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \
! 664: ret += operation_n (rp, rp, p, size); \
! 665: \
! 666: free (p); \
! 667: return ret; \
! 668: }
! 669:
! 670: mp_limb_t
! 671: refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
! 672: mp_limb_t multiplier, mp_limb_t carry)
! 673: {
! 674: AORSMUL_1C (refmpn_add_n);
! 675: }
! 676: mp_limb_t
! 677: refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
! 678: mp_limb_t multiplier, mp_limb_t carry)
! 679: {
! 680: AORSMUL_1C (refmpn_sub_n);
! 681: }
! 682:
! 683:
! 684: mp_limb_t
! 685: refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
! 686: {
! 687: return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
! 688: }
! 689: mp_limb_t
! 690: refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
! 691: {
! 692: return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
! 693: }
! 694:
! 695:
! 696: mp_limb_t
! 697: refmpn_addsub_nc (mp_ptr r1p, mp_ptr r2p,
! 698: mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
! 699: mp_limb_t carry)
! 700: {
! 701: mp_ptr p;
! 702: mp_limb_t acy, scy;
! 703:
! 704: /* Destinations can't overlap. */
! 705: ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
! 706: ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
! 707: ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
! 708: ASSERT (size >= 1);
! 709:
! 710: /* in case r1p==s1p or r1p==s2p */
! 711: p = refmpn_malloc_limbs (size);
! 712:
! 713: acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
! 714: scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
! 715: refmpn_copyi (r1p, p, size);
! 716:
! 717: free (p);
! 718: return 2 * acy + scy;
! 719: }
! 720:
! 721: mp_limb_t
! 722: refmpn_addsub_n (mp_ptr r1p, mp_ptr r2p,
! 723: mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 724: {
! 725: return refmpn_addsub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
! 726: }
! 727:
! 728:
! 729: /* Right shift hi,lo and return the low limb of the result.
! 730: Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
! 731: mp_limb_t
! 732: rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
! 733: {
! 734: ASSERT (shift < GMP_NUMB_BITS);
! 735: if (shift == 0)
! 736: return lo;
! 737: else
! 738: return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
! 739: }
! 740:
! 741: /* Left shift hi,lo and return the high limb of the result.
! 742: Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
! 743: mp_limb_t
! 744: lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
! 745: {
! 746: ASSERT (shift < GMP_NUMB_BITS);
! 747: if (shift == 0)
! 748: return hi;
! 749: else
! 750: return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
! 751: }
! 752:
! 753:
! 754: mp_limb_t
! 755: refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
! 756: {
! 757: mp_limb_t ret;
! 758: mp_size_t i;
! 759:
! 760: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
! 761: ASSERT (size >= 1);
! 762: ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
! 763: ASSERT_MPN (sp, size);
! 764:
! 765: ret = rshift_make (sp[0], CNST_LIMB(0), shift);
! 766:
! 767: for (i = 0; i < size-1; i++)
! 768: rp[i] = rshift_make (sp[i+1], sp[i], shift);
! 769:
! 770: rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
! 771: return ret;
! 772: }
! 773:
! 774: mp_limb_t
! 775: refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
! 776: {
! 777: mp_limb_t ret;
! 778: mp_size_t i;
! 779:
! 780: ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
! 781: ASSERT (size >= 1);
! 782: ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
! 783: ASSERT_MPN (sp, size);
! 784:
! 785: ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
! 786:
! 787: for (i = size-2; i >= 0; i--)
! 788: rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
! 789:
! 790: rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
! 791: return ret;
! 792: }
! 793:
! 794:
! 795: mp_limb_t
! 796: refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
! 797: {
! 798: if (shift == 0)
! 799: {
! 800: refmpn_copyi (rp, sp, size);
! 801: return 0;
! 802: }
! 803: else
! 804: {
! 805: return refmpn_rshift (rp, sp, size, shift);
! 806: }
! 807: }
! 808:
! 809: mp_limb_t
! 810: refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
! 811: {
! 812: if (shift == 0)
! 813: {
! 814: refmpn_copyd (rp, sp, size);
! 815: return 0;
! 816: }
! 817: else
! 818: {
! 819: return refmpn_lshift (rp, sp, size, shift);
! 820: }
! 821: }
! 822:
! 823:
! 824: /* Divide h,l by d, return quotient, store remainder to *rp.
! 825: Operates on full limbs, not nails.
! 826: Must have h < d.
! 827: __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
! 828: mp_limb_t
! 829: refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
! 830: {
! 831: mp_limb_t q, r;
! 832: int n;
! 833:
! 834: ASSERT (d != 0);
! 835: ASSERT (h < d);
! 836:
! 837: #if 0
! 838: udiv_qrnnd (q, r, h, l, d);
! 839: *rp = r;
! 840: return q;
! 841: #endif
! 842:
! 843: n = refmpn_count_leading_zeros (d);
! 844: d <<= n;
! 845:
! 846: if (n != 0)
! 847: {
! 848: h = (h << n) | (l >> (GMP_LIMB_BITS - n));
! 849: l <<= n;
! 850: }
! 851:
! 852: __udiv_qrnnd_c (q, r, h, l, d);
! 853: r >>= n;
! 854: *rp = r;
! 855: return q;
! 856: }
! 857:
! 858: mp_limb_t
! 859: refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
! 860: {
! 861: return refmpn_udiv_qrnnd (rp, h, l, d);
! 862: }
! 863:
! 864: /* This little subroutine avoids some bad code generation from i386 gcc 3.0
! 865: -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */
! 866: static mp_limb_t
! 867: refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
! 868: mp_limb_t divisor, mp_limb_t carry)
! 869: {
! 870: mp_size_t i;
! 871: for (i = size-1; i >= 0; i--)
! 872: {
! 873: rp[i] = refmpn_udiv_qrnnd (&carry, carry,
! 874: sp[i] << GMP_NAIL_BITS,
! 875: divisor << GMP_NAIL_BITS);
! 876: carry >>= GMP_NAIL_BITS;
! 877: }
! 878: return carry;
! 879: }
! 880:
! 881: mp_limb_t
! 882: refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
! 883: mp_limb_t divisor, mp_limb_t carry)
! 884: {
! 885: mp_ptr sp_orig;
! 886: mp_ptr prod;
! 887: mp_limb_t carry_orig;
! 888:
! 889: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
! 890: ASSERT (size >= 0);
! 891: ASSERT (carry < divisor);
! 892: ASSERT_MPN (sp, size);
! 893: ASSERT_LIMB (divisor);
! 894: ASSERT_LIMB (carry);
! 895:
! 896: if (size == 0)
! 897: return carry;
! 898:
! 899: sp_orig = refmpn_memdup_limbs (sp, size);
! 900: prod = refmpn_malloc_limbs (size);
! 901: carry_orig = carry;
! 902:
! 903: carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
! 904:
! 905: /* check by multiplying back */
! 906: #if 0
! 907: printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
! 908: size, divisor, carry_orig, carry);
! 909: mpn_trace("s",sp_copy,size);
! 910: mpn_trace("r",rp,size);
! 911: printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
! 912: mpn_trace("p",prod,size);
! 913: #endif
! 914: ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
! 915: ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
! 916: free (sp_orig);
! 917: free (prod);
! 918:
! 919: return carry;
! 920: }
! 921:
! 922: mp_limb_t
! 923: refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
! 924: {
! 925: return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
! 926: }
! 927:
! 928:
! 929: mp_limb_t
! 930: refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
! 931: mp_limb_t carry)
! 932: {
! 933: mp_ptr p = refmpn_malloc_limbs (size);
! 934: carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
! 935: free (p);
! 936: return carry;
! 937: }
! 938:
! 939: mp_limb_t
! 940: refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
! 941: {
! 942: return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
! 943: }
! 944:
! 945: mp_limb_t
! 946: refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
! 947: mp_limb_t inverse)
! 948: {
! 949: ASSERT (divisor & GMP_NUMB_HIGHBIT);
! 950: ASSERT (inverse == refmpn_invert_limb (divisor));
! 951: return refmpn_mod_1 (sp, size, divisor);
! 952: }
! 953:
! 954: /* This implementation will be rather slow, but has the advantage of being
! 955: in a different style than the libgmp versions. */
! 956: mp_limb_t
! 957: refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
! 958: {
! 959: ASSERT ((GMP_NUMB_BITS % 4) == 0);
! 960: return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
! 961: }
! 962:
! 963:
! 964: mp_limb_t
! 965: refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
! 966: mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
! 967: mp_limb_t carry)
! 968: {
! 969: mp_ptr z;
! 970:
! 971: z = refmpn_malloc_limbs (xsize);
! 972: refmpn_fill (z, xsize, CNST_LIMB(0));
! 973:
! 974: carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
! 975: carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
! 976:
! 977: free (z);
! 978: return carry;
! 979: }
! 980:
! 981: mp_limb_t
! 982: refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
! 983: mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
! 984: {
! 985: return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
! 986: }
! 987:
! 988: mp_limb_t
! 989: refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
! 990: mp_srcptr sp, mp_size_t size,
! 991: mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
! 992: {
! 993: ASSERT (size >= 0);
! 994: ASSERT (shift == refmpn_count_leading_zeros (divisor));
! 995: ASSERT (inverse == refmpn_invert_limb (divisor << shift));
! 996:
! 997: return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
! 998: }
! 999:
! 1000:
! 1001: /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
! 1002: paper, figure 8.1 m', where b=2^BITS_PER_MP_LIMB. Note that -d-1 < d
! 1003: since d has the high bit set. */
! 1004:
! 1005: mp_limb_t
! 1006: refmpn_invert_limb (mp_limb_t d)
! 1007: {
! 1008: mp_limb_t r;
! 1009: ASSERT (d & GMP_LIMB_HIGHBIT);
! 1010: return refmpn_udiv_qrnnd (&r, -d-1, -1, d);
! 1011: }
! 1012:
! 1013:
! 1014: /* The aim is to produce a dst quotient and return a remainder c, satisfying
! 1015: c*b^n + src-i == 3*dst, where i is the incoming carry.
! 1016:
! 1017: Some value c==0, c==1 or c==2 will satisfy, so just try each.
! 1018:
! 1019: If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
! 1020: remainder from the first division attempt determines the correct
! 1021: remainder (3-c), but don't bother with that, since we can't guarantee
! 1022: anything about GMP_NUMB_BITS when using nails.
! 1023:
! 1024: If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
! 1025: complement negative, ie. b^n+a-i, and the calculation produces c1
! 1026: satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This
! 1027: means it's enough to just add any borrow back at the end.
! 1028:
! 1029: A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
! 1030: mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
! 1031: or 1 respectively, so with 1 added the final return value is still in the
! 1032: prescribed range 0 to 2. */
! 1033:
! 1034: mp_limb_t
! 1035: refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
! 1036: {
! 1037: mp_ptr spcopy;
! 1038: mp_limb_t c, cs;
! 1039:
! 1040: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
! 1041: ASSERT (size >= 1);
! 1042: ASSERT (carry <= 2);
! 1043: ASSERT_MPN (sp, size);
! 1044:
! 1045: spcopy = refmpn_malloc_limbs (size);
! 1046: cs = refmpn_sub_1 (spcopy, sp, size, carry);
! 1047:
! 1048: for (c = 0; c <= 2; c++)
! 1049: if (refmpn_divmod_1c (rp, spcopy, size, 3, c) == 0)
! 1050: goto done;
! 1051: ASSERT_FAIL (no value of c satisfies);
! 1052:
! 1053: done:
! 1054: c += cs;
! 1055: ASSERT (c <= 2);
! 1056:
! 1057: free (spcopy);
! 1058: return c;
! 1059: }
! 1060:
! 1061: mp_limb_t
! 1062: refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 1063: {
! 1064: return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
! 1065: }
! 1066:
! 1067:
! 1068: /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
! 1069: void
! 1070: refmpn_mul_basecase (mp_ptr prodp,
! 1071: mp_srcptr up, mp_size_t usize,
! 1072: mp_srcptr vp, mp_size_t vsize)
! 1073: {
! 1074: mp_size_t i;
! 1075:
! 1076: ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
! 1077: ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
! 1078: ASSERT (usize >= vsize);
! 1079: ASSERT (vsize >= 1);
! 1080: ASSERT_MPN (up, usize);
! 1081: ASSERT_MPN (vp, vsize);
! 1082:
! 1083: prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
! 1084: for (i = 1; i < vsize; i++)
! 1085: prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
! 1086: }
! 1087:
! 1088: void
! 1089: refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
! 1090: {
! 1091: refmpn_mul_basecase (prodp, up, size, vp, size);
! 1092: }
! 1093:
! 1094: void
! 1095: refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
! 1096: {
! 1097: refmpn_mul_basecase (dst, src, size, src, size);
! 1098: }
! 1099:
! 1100: /* Allowing usize<vsize, usize==0 or vsize==0. */
! 1101: void
! 1102: refmpn_mul_any (mp_ptr prodp,
! 1103: mp_srcptr up, mp_size_t usize,
! 1104: mp_srcptr vp, mp_size_t vsize)
! 1105: {
! 1106: ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
! 1107: ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
! 1108: ASSERT (usize >= 0);
! 1109: ASSERT (vsize >= 0);
! 1110: ASSERT_MPN (up, usize);
! 1111: ASSERT_MPN (vp, vsize);
! 1112:
! 1113: if (usize == 0)
! 1114: {
! 1115: refmpn_fill (prodp, vsize, CNST_LIMB(0));
! 1116: return;
! 1117: }
! 1118:
! 1119: if (vsize == 0)
! 1120: {
! 1121: refmpn_fill (prodp, usize, CNST_LIMB(0));
! 1122: return;
! 1123: }
! 1124:
! 1125: if (usize >= vsize)
! 1126: refmpn_mul_basecase (prodp, up, usize, vp, vsize);
! 1127: else
! 1128: refmpn_mul_basecase (prodp, vp, vsize, up, usize);
! 1129: }
! 1130:
! 1131:
! 1132: mp_limb_t
! 1133: refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
! 1134: {
! 1135: mp_limb_t x;
! 1136: int twos;
! 1137:
! 1138: ASSERT (y != 0);
! 1139: ASSERT (! refmpn_zero_p (xp, xsize));
! 1140: ASSERT_MPN (xp, xsize);
! 1141: ASSERT_LIMB (y);
! 1142:
! 1143: x = refmpn_mod_1 (xp, xsize, y);
! 1144: if (x == 0)
! 1145: return y;
! 1146:
! 1147: twos = 0;
! 1148: while ((x & 1) == 0 && (y & 1) == 0)
! 1149: {
! 1150: x >>= 1;
! 1151: y >>= 1;
! 1152: twos++;
! 1153: }
! 1154:
! 1155: for (;;)
! 1156: {
! 1157: while ((x & 1) == 0) x >>= 1;
! 1158: while ((y & 1) == 0) y >>= 1;
! 1159:
! 1160: if (x < y)
! 1161: MP_LIMB_T_SWAP (x, y);
! 1162:
! 1163: x -= y;
! 1164: if (x == 0)
! 1165: break;
! 1166: }
! 1167:
! 1168: return y << twos;
! 1169: }
! 1170:
! 1171:
! 1172: /* Based on the full limb x, not nails. */
! 1173: unsigned
! 1174: refmpn_count_leading_zeros (mp_limb_t x)
! 1175: {
! 1176: unsigned n = 0;
! 1177:
! 1178: ASSERT (x != 0);
! 1179:
! 1180: while ((x & GMP_LIMB_HIGHBIT) == 0)
! 1181: {
! 1182: x <<= 1;
! 1183: n++;
! 1184: }
! 1185: return n;
! 1186: }
! 1187:
! 1188: /* Full limbs allowed, not limited to nails. */
! 1189: unsigned
! 1190: refmpn_count_trailing_zeros (mp_limb_t x)
! 1191: {
! 1192: unsigned n = 0;
! 1193:
! 1194: ASSERT (x != 0);
! 1195: ASSERT_LIMB (x);
! 1196:
! 1197: while ((x & 1) == 0)
! 1198: {
! 1199: x >>= 1;
! 1200: n++;
! 1201: }
! 1202: return n;
! 1203: }
! 1204:
! 1205: /* Strip factors of two (low zero bits) from {p,size} by right shifting.
! 1206: The return value is the number of twos stripped. */
! 1207: mp_size_t
! 1208: refmpn_strip_twos (mp_ptr p, mp_size_t size)
! 1209: {
! 1210: mp_size_t limbs;
! 1211: unsigned shift;
! 1212:
! 1213: ASSERT (size >= 1);
! 1214: ASSERT (! refmpn_zero_p (p, size));
! 1215: ASSERT_MPN (p, size);
! 1216:
! 1217: for (limbs = 0; p[0] == 0; limbs++)
! 1218: {
! 1219: refmpn_copyi (p, p+1, size-1);
! 1220: p[size-1] = 0;
! 1221: }
! 1222:
! 1223: shift = refmpn_count_trailing_zeros (p[0]);
! 1224: if (shift)
! 1225: refmpn_rshift (p, p, size, shift);
! 1226:
! 1227: return limbs*GMP_NUMB_BITS + shift;
! 1228: }
! 1229:
! 1230: mp_limb_t
! 1231: refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
! 1232: {
! 1233: int cmp;
! 1234:
! 1235: ASSERT (ysize >= 1);
! 1236: ASSERT (xsize >= ysize);
! 1237: ASSERT ((xp[0] & 1) != 0);
! 1238: ASSERT ((yp[0] & 1) != 0);
! 1239: /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */
! 1240: ASSERT (yp[ysize-1] != 0);
! 1241: ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
! 1242: ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
! 1243: ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
! 1244: if (xsize == ysize)
! 1245: ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
! 1246: ASSERT_MPN (xp, xsize);
! 1247: ASSERT_MPN (yp, ysize);
! 1248:
! 1249: refmpn_strip_twos (xp, xsize);
! 1250: MPN_NORMALIZE (xp, xsize);
! 1251: MPN_NORMALIZE (yp, ysize);
! 1252:
! 1253: for (;;)
! 1254: {
! 1255: cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
! 1256: if (cmp == 0)
! 1257: break;
! 1258: if (cmp < 0)
! 1259: MPN_PTR_SWAP (xp,xsize, yp,ysize);
! 1260:
! 1261: ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
! 1262:
! 1263: refmpn_strip_twos (xp, xsize);
! 1264: MPN_NORMALIZE (xp, xsize);
! 1265: }
! 1266:
! 1267: refmpn_copyi (gp, xp, xsize);
! 1268: return xsize;
! 1269: }
! 1270:
! 1271:
! 1272: unsigned long
! 1273: refmpn_popcount (mp_srcptr sp, mp_size_t size)
! 1274: {
! 1275: unsigned long count = 0;
! 1276: mp_size_t i;
! 1277: int j;
! 1278: mp_limb_t l;
! 1279:
! 1280: ASSERT (size >= 0);
! 1281: ASSERT_MPN (sp, size);
! 1282:
! 1283: for (i = 0; i < size; i++)
! 1284: {
! 1285: l = sp[i];
! 1286: for (j = 0; j < GMP_NUMB_BITS; j++)
! 1287: {
! 1288: count += (l & 1);
! 1289: l >>= 1;
! 1290: }
! 1291: }
! 1292: return count;
! 1293: }
! 1294:
! 1295: unsigned long
! 1296: refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 1297: {
! 1298: mp_ptr d;
! 1299: unsigned long count;
! 1300:
! 1301: ASSERT (size >= 0);
! 1302: ASSERT_MPN (s1p, size);
! 1303: ASSERT_MPN (s2p, size);
! 1304:
! 1305: if (size == 0)
! 1306: return 0;
! 1307:
! 1308: d = refmpn_malloc_limbs (size);
! 1309: refmpn_xor_n (d, s1p, s2p, size);
! 1310: count = refmpn_popcount (d, size);
! 1311: free (d);
! 1312: return count;
! 1313: }
! 1314:
! 1315:
! 1316: /* set r to a%d */
! 1317: void
! 1318: refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
! 1319: {
! 1320: mp_limb_t D[2];
! 1321: int n;
! 1322:
! 1323: ASSERT (! refmpn_overlap_p (r, 2, a, 2));
! 1324: ASSERT (! refmpn_overlap_p (r, 2, d, 2));
! 1325: ASSERT_MPN (a, 2);
! 1326: ASSERT_MPN (d, 2);
! 1327:
! 1328: D[1] = d[1], D[0] = d[0];
! 1329: r[1] = a[1], r[0] = a[0];
! 1330: n = 0;
! 1331:
! 1332: for (;;)
! 1333: {
! 1334: if (D[1] & GMP_NUMB_HIGHBIT)
! 1335: break;
! 1336: if (refmpn_cmp (r, D, 2) <= 0)
! 1337: break;
! 1338: refmpn_lshift (D, D, 2, 1);
! 1339: n++;
! 1340: ASSERT (n <= GMP_NUMB_BITS);
! 1341: }
! 1342:
! 1343: while (n >= 0)
! 1344: {
! 1345: if (refmpn_cmp (r, D, 2) >= 0)
! 1346: ASSERT_NOCARRY (refmpn_sub_n (r, r, D, 2));
! 1347: refmpn_rshift (D, D, 2, 1);
! 1348: n--;
! 1349: }
! 1350:
! 1351: ASSERT (refmpn_cmp (r, d, 2) < 0);
! 1352: }
! 1353:
! 1354:
! 1355: /* Find n where 0<n<2^GMP_NUMB_BITS and n==c mod m */
! 1356: mp_limb_t
! 1357: refmpn_gcd_finda (const mp_limb_t c[2])
! 1358: {
! 1359: mp_limb_t n1[2], n2[2];
! 1360:
! 1361: ASSERT (c[0] != 0);
! 1362: ASSERT (c[1] != 0);
! 1363: ASSERT_MPN (c, 2);
! 1364:
! 1365: n1[0] = c[0];
! 1366: n1[1] = c[1];
! 1367:
! 1368: n2[0] = -n1[0];
! 1369: n2[1] = ~n1[1];
! 1370:
! 1371: while (n2[1] != 0)
! 1372: {
! 1373: refmpn_mod2 (n1, n1, n2);
! 1374:
! 1375: MP_LIMB_T_SWAP (n1[0], n2[0]);
! 1376: MP_LIMB_T_SWAP (n1[1], n2[1]);
! 1377: }
! 1378:
! 1379: return n2[0];
! 1380: }
! 1381:
! 1382:
! 1383: /* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
! 1384: particular the trial quotient is allowed to be 2 too big. */
! 1385: mp_limb_t
! 1386: refmpn_sb_divrem_mn (mp_ptr qp,
! 1387: mp_ptr np, mp_size_t nsize,
! 1388: mp_srcptr dp, mp_size_t dsize)
! 1389: {
! 1390: mp_limb_t retval = 0;
! 1391: mp_size_t i;
! 1392: mp_limb_t d1 = dp[dsize-1];
! 1393: mp_ptr np_orig = refmpn_memdup_limbs (np, nsize);
! 1394:
! 1395: ASSERT (nsize >= dsize);
! 1396: /* ASSERT (dsize > 2); */
! 1397: ASSERT (dsize >= 2);
! 1398: ASSERT (dp[dsize-1] & GMP_LIMB_HIGHBIT);
! 1399: ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
! 1400: ASSERT_MPN (np, nsize);
! 1401: ASSERT_MPN (dp, dsize);
! 1402:
! 1403: i = nsize-dsize;
! 1404: if (refmpn_cmp (np+i, dp, dsize) >= 0)
! 1405: {
! 1406: ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
! 1407: retval = 1;
! 1408: }
! 1409:
! 1410: for (i--; i >= 0; i--)
! 1411: {
! 1412: mp_limb_t n0 = np[i+dsize];
! 1413: mp_limb_t n1 = np[i+dsize-1];
! 1414: mp_limb_t q, dummy_r;
! 1415:
! 1416: ASSERT (n0 <= d1);
! 1417: if (n0 == d1)
! 1418: q = MP_LIMB_T_MAX;
! 1419: else
! 1420: q = refmpn_udiv_qrnnd (&dummy_r, n0, n1, d1);
! 1421:
! 1422: n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
! 1423: ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
! 1424: if (n0)
! 1425: {
! 1426: q--;
! 1427: if (! refmpn_add_n (np+i, np+i, dp, dsize))
! 1428: {
! 1429: q--;
! 1430: ASSERT (refmpn_add_n (np+i, np+i, dp, dsize) != 0);
! 1431: }
! 1432: }
! 1433: np[i+dsize] = 0;
! 1434:
! 1435: qp[i] = q;
! 1436: }
! 1437:
! 1438: /* remainder < divisor */
! 1439: ASSERT (refmpn_cmp (np, dp, dsize) < 0);
! 1440:
! 1441: /* multiply back to original */
! 1442: {
! 1443: mp_ptr mp = refmpn_malloc_limbs (nsize);
! 1444:
! 1445: refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
! 1446: if (retval)
! 1447: ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
! 1448: ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
! 1449: ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
! 1450:
! 1451: free (mp);
! 1452: }
! 1453:
! 1454: free (np_orig);
! 1455: return retval;
! 1456: }
! 1457:
! 1458: /* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
! 1459: particular the trial quotient is allowed to be 2 too big. */
! 1460: void
! 1461: refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
! 1462: mp_ptr np, mp_size_t nsize,
! 1463: mp_srcptr dp, mp_size_t dsize)
! 1464: {
! 1465: ASSERT (qxn == 0);
! 1466: ASSERT_MPN (np, nsize);
! 1467: ASSERT_MPN (dp, dsize);
! 1468:
! 1469: if (dsize == 1)
! 1470: {
! 1471: rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
! 1472: return;
! 1473: }
! 1474: else
! 1475: {
! 1476: mp_ptr n2p = refmpn_malloc_limbs (nsize+1);
! 1477: mp_ptr d2p = refmpn_malloc_limbs (dsize);
! 1478: int norm = refmpn_count_leading_zeros (dp[dsize-1]);
! 1479:
! 1480: n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
! 1481: ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
! 1482:
! 1483: refmpn_sb_divrem_mn (qp, n2p, nsize+1, d2p, dsize);
! 1484: refmpn_rshift_or_copy (rp, n2p, dsize, norm);
! 1485:
! 1486: /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
! 1487: free (n2p);
! 1488: free (d2p);
! 1489: }
! 1490: }
! 1491:
! 1492: size_t
! 1493: refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
! 1494: {
! 1495: unsigned char *d;
! 1496: size_t dsize;
! 1497:
! 1498: ASSERT (size >= 0);
! 1499: ASSERT (base >= 2);
! 1500: ASSERT (base < numberof (__mp_bases));
! 1501: ASSERT (size == 0 || src[size-1] != 0);
! 1502: ASSERT_MPN (src, size);
! 1503:
! 1504: MPN_SIZEINBASE (dsize, src, size, base);
! 1505: ASSERT (dsize >= 1);
! 1506: ASSERT (! byte_overlap_p (dst, dsize, src, size * BYTES_PER_MP_LIMB));
! 1507:
! 1508: if (size == 0)
! 1509: {
! 1510: dst[0] = 0;
! 1511: return 1;
! 1512: }
! 1513:
! 1514: /* don't clobber input for power of 2 bases */
! 1515: if (POW2_P (base))
! 1516: src = refmpn_memdup_limbs (src, size);
! 1517:
! 1518: d = dst + dsize;
! 1519: do
! 1520: {
! 1521: d--;
! 1522: ASSERT (d >= dst);
! 1523: *d = refmpn_divrem_1 (src, 0, src, size, (mp_limb_t) base);
! 1524: size -= (src[size-1] == 0);
! 1525: }
! 1526: while (size != 0);
! 1527:
! 1528: /* at most one leading zero */
! 1529: ASSERT (d == dst || d == dst+1);
! 1530: if (d != dst)
! 1531: *dst = 0;
! 1532:
! 1533: if (POW2_P (base))
! 1534: free (src);
! 1535:
! 1536: return dsize;
! 1537: }
! 1538:
! 1539:
! 1540: mp_limb_t
! 1541: refmpn_bswap_limb (mp_limb_t src)
! 1542: {
! 1543: mp_limb_t dst;
! 1544: int i;
! 1545:
! 1546: dst = 0;
! 1547: for (i = 0; i < BYTES_PER_MP_LIMB; i++)
! 1548: {
! 1549: dst = (dst << 8) + (src & 0xFF);
! 1550: src >>= 8;
! 1551: }
! 1552: return dst;
! 1553: }
! 1554:
! 1555:
! 1556: /* These random functions are mostly for transitional purposes while adding
! 1557: nail support, since they're independent of the normal mpn routines. They
! 1558: can probably be removed when those normal routines are reliable, though
! 1559: perhaps something independent would still be useful at times. */
! 1560:
! 1561: #if BITS_PER_MP_LIMB == 32
! 1562: #define RAND_A CNST_LIMB(0x29CF535)
! 1563: #endif
! 1564: #if BITS_PER_MP_LIMB == 64
! 1565: #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D)
! 1566: #endif
! 1567:
! 1568: mp_limb_t refmpn_random_seed;
! 1569:
! 1570: mp_limb_t
! 1571: refmpn_random_half (void)
! 1572: {
! 1573: refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
! 1574: return (refmpn_random_seed >> BITS_PER_MP_LIMB/2);
! 1575: }
! 1576:
! 1577: mp_limb_t
! 1578: refmpn_random_limb (void)
! 1579: {
! 1580: return ((refmpn_random_half () << (BITS_PER_MP_LIMB/2))
! 1581: | refmpn_random_half ()) & GMP_NUMB_MASK;
! 1582: }
! 1583:
! 1584: void
! 1585: refmpn_random (mp_ptr ptr, mp_size_t size)
! 1586: {
! 1587: mp_size_t i;
! 1588: if (GMP_NAIL_BITS == 0)
! 1589: {
! 1590: mpn_random (ptr, size);
! 1591: return;
! 1592: }
! 1593:
! 1594: for (i = 0; i < size; i++)
! 1595: ptr[i] = refmpn_random_limb ();
! 1596: }
! 1597:
! 1598: void
! 1599: refmpn_random2 (mp_ptr ptr, mp_size_t size)
! 1600: {
! 1601: mp_size_t i;
! 1602: mp_limb_t bit, mask, limb;
! 1603: int run;
! 1604:
! 1605: if (GMP_NAIL_BITS == 0)
! 1606: {
! 1607: mpn_random2 (ptr, size);
! 1608: return;
! 1609: }
! 1610:
! 1611: #define RUN_MODULUS 32
! 1612:
! 1613: /* start with ones at a random pos in the high limb */
! 1614: bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
! 1615: mask = 0;
! 1616: run = 0;
! 1617:
! 1618: for (i = size-1; i >= 0; i--)
! 1619: {
! 1620: limb = 0;
! 1621: do
! 1622: {
! 1623: if (run == 0)
! 1624: {
! 1625: run = (refmpn_random_half () % RUN_MODULUS) + 1;
! 1626: mask = ~mask;
! 1627: }
! 1628:
! 1629: limb |= (bit & mask);
! 1630: bit >>= 1;
! 1631: run--;
! 1632: }
! 1633: while (bit != 0);
! 1634:
! 1635: ptr[i] = limb;
! 1636: bit = GMP_NUMB_HIGHBIT;
! 1637: }
! 1638: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>