Annotation of OpenXM_contrib/gmp/mpn/tests/ref.c, Revision 1.1
1.1 ! maekawa 1: /* Reference mpn functions, designed to be simple, portable and independent
! 2: of the normal gmp code. Speed isn't a consideration. */
! 3:
! 4: /*
! 5: Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
! 6:
! 7: This file is part of the GNU MP Library.
! 8:
! 9: The GNU MP Library is free software; you can redistribute it and/or modify
! 10: it under the terms of the GNU Lesser General Public License as published by
! 11: the Free Software Foundation; either version 2.1 of the License, or (at your
! 12: option) any later version.
! 13:
! 14: The GNU MP Library is distributed in the hope that it will be useful, but
! 15: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 16: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 17: License for more details.
! 18:
! 19: You should have received a copy of the GNU Lesser General Public License
! 20: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 21: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 22: MA 02111-1307, USA.
! 23: */
! 24:
! 25:
! 26: /* Most routines have assertions representing what the mpn routines are
! 27: supposed to accept. Many of these reference routines do sensible things
! 28: outside these ranges (eg. for size==0), but the assertions are present to
! 29: pick up bad parameters passed here that are about to be passed the same
! 30: to a real mpn routine being compared. */
! 31:
! 32: /* always do assertion checking */
! 33: #define WANT_ASSERT 1
! 34:
! 35: #include <stdio.h> /* for NULL */
! 36: #include <stdlib.h> /* for malloc */
! 37: #include "gmp.h"
! 38: #include "gmp-impl.h"
! 39: #include "longlong.h"
! 40:
! 41: #include "ref.h"
! 42:
! 43:
! 44: /* Check overlap for a routine defined to work low to high. */
! 45: int
! 46: refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
! 47: {
! 48: return (!MPN_OVERLAP_P (dst, size, src, size) || dst <= src);
! 49: }
! 50:
! 51: /* Check overlap for a routine defined to work high to low. */
! 52: int
! 53: refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
! 54: {
! 55: return (!MPN_OVERLAP_P (dst, size, src, size) || dst >= src);
! 56: }
! 57:
! 58: /* Check overlap for a standard routine requiring equal or separate. */
! 59: int
! 60: refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
! 61: {
! 62: return (dst == src || !MPN_OVERLAP_P (dst, size, src, size));
! 63: }
! 64: int
! 65: refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
! 66: mp_size_t size)
! 67: {
! 68: return (refmpn_overlap_fullonly_p (dst, src1, size)
! 69: && refmpn_overlap_fullonly_p (dst, src2, size));
! 70: }
! 71:
! 72:
! 73: mp_ptr
! 74: refmpn_malloc_limbs (mp_size_t size)
! 75: {
! 76: mp_ptr p;
! 77: ASSERT (size >= 0);
! 78: if (size == 0)
! 79: size = 1;
! 80: p = (mp_ptr) malloc (size * BYTES_PER_MP_LIMB);
! 81: ASSERT (p != NULL);
! 82: return p;
! 83: }
! 84:
! 85: mp_ptr
! 86: refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
! 87: {
! 88: mp_ptr p;
! 89: p = refmpn_malloc_limbs (size);
! 90: refmpn_copyi (p, ptr, size);
! 91: return p;
! 92: }
! 93:
! 94: void
! 95: refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
! 96: {
! 97: mp_size_t i;
! 98: ASSERT (size >= 0);
! 99: for (i = 0; i < size; i++)
! 100: ptr[i] = value;
! 101: }
! 102:
! 103: int
! 104: refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
! 105: {
! 106: mp_size_t i;
! 107: for (i = 0; i < size; i++)
! 108: if (ptr[i] != 0)
! 109: return 0;
! 110: return 1;
! 111: }
! 112:
! 113: mp_limb_t
! 114: refmpn_msbone (mp_limb_t x)
! 115: {
! 116: mp_limb_t n = (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1);
! 117:
! 118: while (n != 0)
! 119: {
! 120: if (x & n)
! 121: break;
! 122: n >>= 1;
! 123: }
! 124: return n;
! 125: }
! 126:
! 127: /* a mask of the MSB one bit and all bits below */
! 128: mp_limb_t
! 129: refmpn_msbone_mask (mp_limb_t x)
! 130: {
! 131: if (x == 0)
! 132: return 0;
! 133:
! 134: return (refmpn_msbone (x) << 1) - 1;
! 135: }
! 136:
! 137: void
! 138: refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 139: {
! 140: mp_size_t i;
! 141:
! 142: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
! 143: ASSERT (size >= 0);
! 144:
! 145: for (i = 0; i < size; i++)
! 146: rp[i] = sp[i];
! 147: }
! 148:
! 149: void
! 150: refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 151: {
! 152: mp_size_t i;
! 153:
! 154: ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
! 155: ASSERT (size >= 0);
! 156:
! 157: for (i = size-1; i >= 0; i--)
! 158: rp[i] = sp[i];
! 159: }
! 160:
! 161: void
! 162: refmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 163: {
! 164: mp_size_t i;
! 165:
! 166: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
! 167: ASSERT (size >= 1);
! 168:
! 169: for (i = 0; i < size; i++)
! 170: rp[i] = ~sp[i];
! 171: }
! 172:
! 173:
! 174: int
! 175: refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
! 176: {
! 177: mp_size_t i;
! 178:
! 179: ASSERT (size >= 1);
! 180:
! 181: for (i = size-1; i >= 0; i--)
! 182: {
! 183: if (xp[i] > yp[i]) return 1;
! 184: if (xp[i] < yp[i]) return -1;
! 185: }
! 186: return 0;
! 187: }
! 188:
! 189: int
! 190: refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
! 191: mp_srcptr yp, mp_size_t ysize)
! 192: {
! 193: int opp, cmp;
! 194:
! 195: opp = (xsize < ysize);
! 196: if (opp)
! 197: MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
! 198:
! 199: if (! refmpn_zero_p (xp+ysize, xsize-ysize))
! 200: cmp = 1;
! 201: else
! 202: cmp = refmpn_cmp (xp, yp, ysize);
! 203:
! 204: return (opp ? -cmp : cmp);
! 205: }
! 206:
! 207:
! 208: #define LOGOPS(operation) \
! 209: { \
! 210: mp_size_t i; \
! 211: \
! 212: ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
! 213: ASSERT (size >= 1); \
! 214: \
! 215: for (i = 0; i < size; i++) \
! 216: rp[i] = operation; \
! 217: }
! 218:
! 219: void
! 220: refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 221: {
! 222: LOGOPS (s1p[i] & s2p[i]);
! 223: }
! 224: void
! 225: refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 226: {
! 227: LOGOPS (s1p[i] & ~s2p[i]);
! 228: }
! 229: void
! 230: refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 231: {
! 232: LOGOPS (~(s1p[i] & s2p[i]));
! 233: }
! 234: void
! 235: refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 236: {
! 237: LOGOPS (s1p[i] | s2p[i]);
! 238: }
! 239: void
! 240: refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 241: {
! 242: LOGOPS (s1p[i] | ~s2p[i]);
! 243: }
! 244: void
! 245: refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 246: {
! 247: LOGOPS (~(s1p[i] | s2p[i]));
! 248: }
! 249: void
! 250: refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 251: {
! 252: LOGOPS (s1p[i] ^ s2p[i]);
! 253: }
! 254: void
! 255: refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 256: {
! 257: LOGOPS (~(s1p[i] ^ s2p[i]));
! 258: }
! 259:
! 260: /* set *w to x+y, return 0 or 1 carry */
! 261: mp_limb_t
! 262: add (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
! 263: {
! 264: *w = x + y;
! 265: return *w < x;
! 266: }
! 267:
! 268: /* set *w to x-y, return 0 or 1 borrow */
! 269: mp_limb_t
! 270: sub (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
! 271: {
! 272: *w = x - y;
! 273: return *w > x;
! 274: }
! 275:
! 276: /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
! 277: mp_limb_t
! 278: adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
! 279: {
! 280: mp_limb_t r;
! 281: ASSERT (c == 0 || c == 1);
! 282: r = add (w, x, y);
! 283: return r + add (w, *w, c);
! 284: }
! 285:
! 286: /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
! 287: mp_limb_t
! 288: sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
! 289: {
! 290: mp_limb_t r;
! 291: ASSERT (c == 0 || c == 1);
! 292: r = sub (w, x, y);
! 293: return r + sub (w, *w, c);
! 294: }
! 295:
! 296:
! 297: #define AORS_1(operation) \
! 298: { \
! 299: mp_limb_t i; \
! 300: \
! 301: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
! 302: ASSERT (size >= 1); \
! 303: \
! 304: for (i = 0; i < size; i++) \
! 305: n = operation (&rp[i], sp[i], n); \
! 306: return n; \
! 307: }
! 308:
! 309: mp_limb_t
! 310: refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
! 311: {
! 312: AORS_1 (add);
! 313: }
! 314: mp_limb_t
! 315: refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
! 316: {
! 317: AORS_1 (sub);
! 318: }
! 319:
! 320: #define AORS_NC(operation) \
! 321: { \
! 322: mp_size_t i; \
! 323: \
! 324: ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
! 325: ASSERT (carry == 0 || carry == 1); \
! 326: ASSERT (size >= 1); \
! 327: \
! 328: for (i = 0; i < size; i++) \
! 329: carry = operation (&rp[i], s1p[i], s2p[i], carry); \
! 330: return carry; \
! 331: }
! 332:
! 333: mp_limb_t
! 334: refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
! 335: mp_limb_t carry)
! 336: {
! 337: AORS_NC (adc);
! 338: }
! 339: mp_limb_t
! 340: refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
! 341: mp_limb_t carry)
! 342: {
! 343: AORS_NC (sbb);
! 344: }
! 345:
! 346:
! 347: mp_limb_t
! 348: refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 349: {
! 350: return refmpn_add_nc (rp, s1p, s2p, size, 0);
! 351: }
! 352: mp_limb_t
! 353: refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 354: {
! 355: return refmpn_sub_nc (rp, s1p, s2p, size, 0);
! 356: }
! 357:
! 358:
! 359: #define AORS(aors_n, aors_1) \
! 360: { \
! 361: mp_limb_t c; \
! 362: ASSERT (s1size >= s2size); \
! 363: ASSERT (s2size >= 1); \
! 364: c = aors_n (rp, s1p, s2p, s2size); \
! 365: if (s1size-s2size != 0) \
! 366: c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \
! 367: return c; \
! 368: }
! 369: mp_limb_t
! 370: refmpn_add (mp_ptr rp,
! 371: mp_srcptr s1p, mp_size_t s1size,
! 372: mp_srcptr s2p, mp_size_t s2size)
! 373: {
! 374: AORS (refmpn_add_n, refmpn_add_1);
! 375: }
! 376: mp_limb_t
! 377: refmpn_sub (mp_ptr rp,
! 378: mp_srcptr s1p, mp_size_t s1size,
! 379: mp_srcptr s2p, mp_size_t s2size)
! 380: {
! 381: AORS (refmpn_sub_n, refmpn_sub_1);
! 382: }
! 383:
! 384:
! 385: #define SHIFTHIGH(x) ((x) << BITS_PER_MP_LIMB/2)
! 386: #define SHIFTLOW(x) ((x) >> BITS_PER_MP_LIMB/2)
! 387:
! 388: #define LOWMASK (((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)-1)
! 389: #define HIGHMASK SHIFTHIGH(LOWMASK)
! 390:
! 391: #define LOWPART(x) ((x) & LOWMASK)
! 392: #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK)
! 393:
! 394: /* set *hi,*lo to x*y */
! 395: void
! 396: mul (mp_limb_t *hi, mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
! 397: {
! 398: mp_limb_t s;
! 399:
! 400: *lo = LOWPART(x) * LOWPART(y);
! 401: *hi = HIGHPART(x) * HIGHPART(y);
! 402:
! 403: s = LOWPART(x) * HIGHPART(y);
! 404: ASSERT_NOCARRY (add (hi, *hi, add (lo, *lo, SHIFTHIGH(LOWPART(s)))));
! 405: ASSERT_NOCARRY (add (hi, *hi, HIGHPART(s)));
! 406:
! 407: s = HIGHPART(x) * LOWPART(y);
! 408: ASSERT_NOCARRY (add (hi, *hi, add (lo, *lo, SHIFTHIGH(LOWPART(s)))));
! 409: ASSERT_NOCARRY (add (hi, *hi, HIGHPART(s)));
! 410: }
! 411:
! 412: mp_limb_t
! 413: refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
! 414: mp_limb_t carry)
! 415: {
! 416: mp_size_t i;
! 417: mp_limb_t hi, lo;
! 418:
! 419: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
! 420: ASSERT (size >= 1);
! 421:
! 422: for (i = 0; i < size; i++)
! 423: {
! 424: mul (&hi, &lo, sp[i], multiplier);
! 425: ASSERT_NOCARRY (add (&hi, hi, add (&lo, lo, carry)));
! 426: rp[i] = lo;
! 427: carry = hi;
! 428: }
! 429:
! 430: return carry;
! 431: }
! 432:
! 433: mp_limb_t
! 434: refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
! 435: {
! 436: return refmpn_mul_1c (rp, sp, size, multiplier, 0);
! 437: }
! 438:
! 439:
! 440: #define AORSMUL_1C(operation_n) \
! 441: { \
! 442: mp_ptr p = refmpn_malloc_limbs (size); \
! 443: mp_limb_t ret; \
! 444: \
! 445: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
! 446: ASSERT (size >= 1); \
! 447: \
! 448: ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \
! 449: ret += operation_n (rp, rp, p, size); \
! 450: \
! 451: free (p); \
! 452: return ret; \
! 453: }
! 454:
! 455: mp_limb_t
! 456: refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
! 457: mp_limb_t multiplier, mp_limb_t carry)
! 458: {
! 459: AORSMUL_1C (refmpn_add_n);
! 460: }
! 461: mp_limb_t
! 462: refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
! 463: mp_limb_t multiplier, mp_limb_t carry)
! 464: {
! 465: AORSMUL_1C (refmpn_sub_n);
! 466: }
! 467:
! 468:
! 469: mp_limb_t
! 470: refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
! 471: {
! 472: return refmpn_addmul_1c (rp, sp, size, multiplier, 0);
! 473: }
! 474: mp_limb_t
! 475: refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
! 476: {
! 477: return refmpn_submul_1c (rp, sp, size, multiplier, 0);
! 478: }
! 479:
! 480:
! 481: mp_limb_t
! 482: refmpn_addsub_nc (mp_ptr r1p, mp_ptr r2p,
! 483: mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
! 484: mp_limb_t carry)
! 485: {
! 486: mp_ptr p;
! 487: mp_limb_t acy, scy;
! 488:
! 489: /* Destinations can't overlap. */
! 490: ASSERT (! MPN_OVERLAP_P (r1p, size, r2p, size));
! 491: ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
! 492: ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
! 493: ASSERT (size >= 1);
! 494:
! 495: p = refmpn_malloc_limbs (size);
! 496: acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
! 497: scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
! 498: refmpn_copyi (r1p, p, size);
! 499: free (p);
! 500: return 2 * acy + scy;
! 501: }
! 502:
! 503: mp_limb_t
! 504: refmpn_addsub_n (mp_ptr r1p, mp_ptr r2p,
! 505: mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 506: {
! 507: return refmpn_addsub_nc (r1p, r2p, s1p, s2p, size, 0);
! 508: }
! 509:
! 510:
! 511: /* Right shift hi,lo and return the low limb of the result.
! 512: Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
! 513: mp_limb_t
! 514: rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
! 515: {
! 516: ASSERT (shift >= 0 && shift < BITS_PER_MP_LIMB);
! 517: if (shift == 0)
! 518: return lo;
! 519: else
! 520: return (hi << (BITS_PER_MP_LIMB-shift)) | (lo >> shift);
! 521: }
! 522:
! 523: /* Left shift hi,lo and return the high limb of the result.
! 524: Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
! 525: mp_limb_t
! 526: lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
! 527: {
! 528: ASSERT (shift >= 0 && shift < BITS_PER_MP_LIMB);
! 529: if (shift == 0)
! 530: return hi;
! 531: else
! 532: return (hi << shift) | (lo >> (BITS_PER_MP_LIMB-shift));
! 533: }
! 534:
! 535:
! 536: mp_limb_t
! 537: refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
! 538: {
! 539: mp_limb_t ret;
! 540: mp_size_t i;
! 541:
! 542: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
! 543: ASSERT (size >= 1);
! 544: ASSERT (shift >= 1 && shift < BITS_PER_MP_LIMB);
! 545:
! 546: ret = rshift_make (sp[0], 0, shift);
! 547:
! 548: for (i = 0; i < size-1; i++)
! 549: rp[i] = rshift_make (sp[i+1], sp[i], shift);
! 550:
! 551: rp[i] = rshift_make (0, sp[i], shift);
! 552: return ret;
! 553: }
! 554:
! 555: mp_limb_t
! 556: refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
! 557: {
! 558: mp_limb_t ret;
! 559: mp_size_t i;
! 560:
! 561: ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
! 562: ASSERT (size >= 1);
! 563: ASSERT (shift >= 1 && shift < BITS_PER_MP_LIMB);
! 564:
! 565: ret = lshift_make (0, sp[size-1], shift);
! 566:
! 567: for (i = size-2; i >= 0; i--)
! 568: rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
! 569:
! 570: rp[i+1] = lshift_make (sp[i+1], 0, shift);
! 571: return ret;
! 572: }
! 573:
! 574:
! 575: /* Divide h,l by d, producing a quotient *q and remainder *r.
! 576: Must have h < d.
! 577: __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
! 578: void
! 579: div1 (mp_limb_t *q, mp_limb_t *r, mp_limb_t h, mp_limb_t l, mp_limb_t d)
! 580: {
! 581: int n;
! 582:
! 583: ASSERT (d != 0);
! 584: ASSERT (h < d);
! 585:
! 586: #if 0
! 587: udiv_qrnnd (*q, *r, h, l, d);
! 588: return;
! 589: #endif
! 590:
! 591: for (n = 0; !(d & MP_LIMB_T_HIGHBIT); n++)
! 592: d <<= 1;
! 593:
! 594: h = lshift_make (h, l, n);
! 595: l <<= n;
! 596:
! 597: __udiv_qrnnd_c (*q, *r, h, l, d);
! 598: *r >>= n;
! 599: }
! 600:
! 601: mp_limb_t
! 602: refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
! 603: mp_limb_t carry)
! 604: {
! 605: mp_ptr sp_orig;
! 606: mp_ptr prod;
! 607: mp_limb_t carry_orig;
! 608: mp_size_t i;
! 609:
! 610: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
! 611: ASSERT (size >= 0);
! 612: ASSERT (carry < divisor);
! 613:
! 614: if (size == 0)
! 615: return carry;
! 616:
! 617: sp_orig = refmpn_memdup_limbs (sp, size);
! 618: prod = refmpn_malloc_limbs (size);
! 619: carry_orig = carry;
! 620:
! 621: for (i = size-1; i >= 0; i--)
! 622: div1 (&rp[i], &carry, carry, sp[i], divisor);
! 623:
! 624: /* check by multiplying back */
! 625: #if 0
! 626: printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
! 627: size, divisor, carry_orig, carry);
! 628: mpn_trace("s",sp_copy,size);
! 629: mpn_trace("r",rp,size);
! 630: printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
! 631: mpn_trace("p",prod,size);
! 632: #endif
! 633: ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
! 634: ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
! 635: free (sp_orig);
! 636: free (prod);
! 637:
! 638: return carry;
! 639: }
! 640:
! 641: mp_limb_t
! 642: refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
! 643: {
! 644: return refmpn_divmod_1c (rp, sp, size, divisor, 0);
! 645: }
! 646:
! 647:
! 648: mp_limb_t
! 649: refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
! 650: mp_limb_t carry)
! 651: {
! 652: mp_ptr p = refmpn_malloc_limbs (size);
! 653: carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
! 654: free (p);
! 655: return carry;
! 656: }
! 657:
! 658: mp_limb_t
! 659: refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
! 660: {
! 661: return refmpn_mod_1c (sp, size, divisor, 0);
! 662: }
! 663:
! 664: mp_limb_t
! 665: refmpn_mod_1_rshift (mp_srcptr sp, mp_size_t size,
! 666: unsigned shift, mp_limb_t divisor)
! 667: {
! 668: mp_limb_t r;
! 669: mp_ptr p = refmpn_malloc_limbs (size);
! 670: refmpn_rshift (p, sp, size, shift);
! 671: r = refmpn_mod_1 (p, size, divisor);
! 672: free (p);
! 673: return r;
! 674: }
! 675:
! 676:
! 677: mp_limb_t
! 678: refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
! 679: mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
! 680: mp_limb_t carry)
! 681: {
! 682: mp_ptr z;
! 683:
! 684: z = refmpn_malloc_limbs (xsize);
! 685: refmpn_fill (z, xsize, 0);
! 686:
! 687: carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
! 688: carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
! 689:
! 690: free (z);
! 691: return carry;
! 692: }
! 693:
! 694: mp_limb_t
! 695: refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
! 696: mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
! 697: {
! 698: return refmpn_divrem_1c (rp, xsize, sp, size, divisor, 0);
! 699: }
! 700:
! 701:
! 702: /* As given in the manual, the divexact method gives quotient q and return
! 703: value c satisfying
! 704:
! 705: c*b^n + a-i == 3*q
! 706:
! 707: where a=dividend, i=initial carry, b=2^BITS_PER_MP_LIMB, and n=size.
! 708:
! 709: If a-i is divisible by 3 then c==0 and a plain divmod gives the quotient.
! 710: If (a-i)%3==r then c is a high limb tacked on that will turn r into 0.
! 711: Because 2^BITS_PER_MP_LIMB==1mod3 (so long as BITS_PER_MP_LIMB is even)
! 712: it's enough to set c=3-r, ie. if r=1 then c=2, or if r=2 then c=1.
! 713:
! 714: If a-i produces a borrow then refmpn_sub_1 leaves a twos complement
! 715: negative, ie. b^n+a-i, and the calculation produces c1 satisfying
! 716:
! 717: c1*b^n + b^n+a-i == 3*q
! 718:
! 719: From which clearly c=c1+1, so it's enough to just add any borrow to the
! 720: return value otherwise calculated.
! 721:
! 722: A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
! 723: mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
! 724: or 1 respectively, so with 1 added the final return value is still in the
! 725: prescribed range 0 to 2. */
! 726:
! 727: mp_limb_t
! 728: refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
! 729: {
! 730: mp_ptr spcopy;
! 731: mp_limb_t c, cs;
! 732:
! 733: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
! 734: ASSERT (size >= 1);
! 735: ASSERT (carry <= 2);
! 736:
! 737: spcopy = refmpn_memdup_limbs (sp, size);
! 738: cs = refmpn_sub_1 (spcopy, spcopy, size, carry);
! 739:
! 740: c = refmpn_divmod_1 (rp, spcopy, size, 3);
! 741: if (c != 0)
! 742: {
! 743: ASSERT ((BITS_PER_MP_LIMB % 2) == 0);
! 744: c = 3-c;
! 745: ASSERT_NOCARRY (refmpn_divmod_1c (rp, spcopy, size, 3, c));
! 746: }
! 747:
! 748: c += cs;
! 749: ASSERT (c <= 2);
! 750:
! 751: free (spcopy);
! 752: return c;
! 753: }
! 754:
! 755: mp_limb_t
! 756: refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
! 757: {
! 758: return refmpn_divexact_by3c (rp, sp, size, 0);
! 759: }
! 760:
! 761:
! 762: /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
! 763: void
! 764: refmpn_mul_basecase (mp_ptr prodp,
! 765: mp_srcptr up, mp_size_t usize,
! 766: mp_srcptr vp, mp_size_t vsize)
! 767: {
! 768: mp_size_t i;
! 769:
! 770: ASSERT (! MPN_OVERLAP_P (prodp, usize+vsize, up, usize));
! 771: ASSERT (! MPN_OVERLAP_P (prodp, usize+vsize, vp, vsize));
! 772: ASSERT (usize >= vsize);
! 773: ASSERT (vsize >= 1);
! 774:
! 775: prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
! 776: for (i = 1; i < vsize; i++)
! 777: prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
! 778: }
! 779:
! 780: void
! 781: refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
! 782: {
! 783: refmpn_mul_basecase (prodp, up, size, vp, size);
! 784: }
! 785:
! 786: void
! 787: refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
! 788: {
! 789: refmpn_mul_basecase (dst, src, size, src, size);
! 790: }
! 791:
! 792:
! 793: mp_limb_t
! 794: refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
! 795: {
! 796: mp_limb_t x;
! 797: int twos;
! 798:
! 799: ASSERT (y != 0);
! 800: ASSERT (! refmpn_zero_p (xp, xsize));
! 801:
! 802: x = mpn_mod_1 (xp, xsize, y);
! 803: if (x == 0)
! 804: return y;
! 805:
! 806: twos = 0;
! 807: while ((x & 1) == 0 && (y & 1) == 0)
! 808: {
! 809: x >>= 1;
! 810: y >>= 1;
! 811: twos++;
! 812: }
! 813:
! 814: for (;;)
! 815: {
! 816: while ((x & 1) == 0) x >>= 1;
! 817: while ((y & 1) == 0) y >>= 1;
! 818:
! 819: if (x < y)
! 820: MP_LIMB_T_SWAP (x, y);
! 821:
! 822: x -= y;
! 823: if (x == 0)
! 824: break;
! 825: }
! 826:
! 827: return y << twos;
! 828: }
! 829:
! 830:
! 831: unsigned
! 832: refmpn_count_trailing_zeros (mp_limb_t x)
! 833: {
! 834: unsigned n = 0;
! 835:
! 836: ASSERT (x != 0);
! 837: while ((x & 1) == 0)
! 838: {
! 839: x >>= 1;
! 840: n++;
! 841: }
! 842: return n;
! 843: }
! 844:
! 845: mp_size_t
! 846: refmpn_strip_twos (mp_ptr p, mp_size_t size)
! 847: {
! 848: mp_size_t limbs;
! 849: unsigned shift;
! 850:
! 851: ASSERT (size >= 1);
! 852: ASSERT (! refmpn_zero_p (p, size));
! 853:
! 854: for (limbs = 0; p[0] == 0; limbs++)
! 855: {
! 856: MPN_COPY_INCR (p, p+1, size-1);
! 857: p[size-1] = 0;
! 858: }
! 859:
! 860: shift = refmpn_count_trailing_zeros (p[0]);
! 861: if (shift)
! 862: refmpn_rshift (p, p, size, shift);
! 863:
! 864: return limbs*BITS_PER_MP_LIMB + shift;
! 865: }
! 866:
! 867: mp_limb_t
! 868: refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
! 869: {
! 870: int cmp;
! 871:
! 872: ASSERT (ysize >= 1);
! 873: ASSERT (xsize >= ysize);
! 874: ASSERT ((xp[0] & 1) != 0);
! 875: ASSERT ((yp[0] & 1) != 0);
! 876: ASSERT (xp[xsize-1] != 0);
! 877: ASSERT (yp[ysize-1] != 0);
! 878: ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
! 879: ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
! 880: ASSERT (! MPN_OVERLAP_P (xp, xsize, yp, ysize));
! 881: if (xsize == ysize)
! 882: ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
! 883:
! 884: refmpn_strip_twos (xp, xsize);
! 885: MPN_NORMALIZE (xp, xsize);
! 886: MPN_NORMALIZE (yp, ysize);
! 887:
! 888: for (;;)
! 889: {
! 890: cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
! 891: if (cmp == 0)
! 892: break;
! 893: if (cmp < 0)
! 894: MPN_PTR_SWAP (xp,xsize, yp,ysize);
! 895:
! 896: ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
! 897:
! 898: refmpn_strip_twos (xp, xsize);
! 899: MPN_NORMALIZE (xp, xsize);
! 900: }
! 901:
! 902: refmpn_copyi (gp, xp, xsize);
! 903: return xsize;
! 904: }
! 905:
! 906:
! 907: unsigned long
! 908: refmpn_popcount (mp_srcptr sp, mp_size_t size)
! 909: {
! 910: unsigned long count = 0;
! 911: mp_size_t i;
! 912: int j;
! 913: mp_limb_t l;
! 914:
! 915: ASSERT (size >= 0);
! 916: for (i = 0; i < size; i++)
! 917: {
! 918: l = sp[i];
! 919: for (j = 0; j < BITS_PER_MP_LIMB; j++)
! 920: {
! 921: count += (l & 1);
! 922: l >>= 1;
! 923: }
! 924: }
! 925: return count;
! 926: }
! 927:
! 928: unsigned long
! 929: refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
! 930: {
! 931: mp_ptr d;
! 932: unsigned long count;
! 933:
! 934: if (size == 0)
! 935: return 0;
! 936:
! 937: d = refmpn_malloc_limbs (size);
! 938: refmpn_xor_n (d, s1p, s2p, size);
! 939: count = refmpn_popcount (d, size);
! 940: free (d);
! 941: return count;
! 942: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>