[BACK]Return to ref.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / tests

Annotation of OpenXM_contrib/gmp/mpn/tests/ref.c, Revision 1.1.1.1

1.1       maekawa     1: /* Reference mpn functions, designed to be simple, portable and independent
                      2:    of the normal gmp code.  Speed isn't a consideration.  */
                      3:
                      4: /*
                      5: Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
                      6:
                      7: This file is part of the GNU MP Library.
                      8:
                      9: The GNU MP Library is free software; you can redistribute it and/or modify
                     10: it under the terms of the GNU Lesser General Public License as published by
                     11: the Free Software Foundation; either version 2.1 of the License, or (at your
                     12: option) any later version.
                     13:
                     14: The GNU MP Library is distributed in the hope that it will be useful, but
                     15: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     16: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     17: License for more details.
                     18:
                     19: You should have received a copy of the GNU Lesser General Public License
                     20: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     21: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     22: MA 02111-1307, USA.
                     23: */
                     24:
                     25:
                     26: /* Most routines have assertions representing what the mpn routines are
                     27:    supposed to accept.  Many of these reference routines do sensible things
                     28:    outside these ranges (eg. for size==0), but the assertions are present to
                     29:    pick up bad parameters passed here that are about to be passed the same
                     30:    to a real mpn routine being compared.  */
                     31:
                     32: /* always do assertion checking */
                     33: #define WANT_ASSERT  1
                     34:
                     35: #include <stdio.h>  /* for NULL */
                     36: #include <stdlib.h> /* for malloc */
                     37: #include "gmp.h"
                     38: #include "gmp-impl.h"
                     39: #include "longlong.h"
                     40:
                     41: #include "ref.h"
                     42:
                     43:
                     44: /* Check overlap for a routine defined to work low to high. */
                     45: int
                     46: refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
                     47: {
                     48:   return (!MPN_OVERLAP_P (dst, size, src, size) || dst <= src);
                     49: }
                     50:
                     51: /* Check overlap for a routine defined to work high to low. */
                     52: int
                     53: refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
                     54: {
                     55:   return (!MPN_OVERLAP_P (dst, size, src, size) || dst >= src);
                     56: }
                     57:
                     58: /* Check overlap for a standard routine requiring equal or separate. */
                     59: int
                     60: refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
                     61: {
                     62:   return (dst == src || !MPN_OVERLAP_P (dst, size, src, size));
                     63: }
                     64: int
                     65: refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
                     66:                                mp_size_t size)
                     67: {
                     68:   return (refmpn_overlap_fullonly_p (dst, src1, size)
                     69:           && refmpn_overlap_fullonly_p (dst, src2, size));
                     70: }
                     71:
                     72:
                     73: mp_ptr
                     74: refmpn_malloc_limbs (mp_size_t size)
                     75: {
                     76:   mp_ptr  p;
                     77:   ASSERT (size >= 0);
                     78:   if (size == 0)
                     79:     size = 1;
                     80:   p = (mp_ptr) malloc (size * BYTES_PER_MP_LIMB);
                     81:   ASSERT (p != NULL);
                     82:   return p;
                     83: }
                     84:
                     85: mp_ptr
                     86: refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
                     87: {
                     88:   mp_ptr  p;
                     89:   p = refmpn_malloc_limbs (size);
                     90:   refmpn_copyi (p, ptr, size);
                     91:   return p;
                     92: }
                     93:
                     94: void
                     95: refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
                     96: {
                     97:   mp_size_t  i;
                     98:   ASSERT (size >= 0);
                     99:   for (i = 0; i < size; i++)
                    100:     ptr[i] = value;
                    101: }
                    102:
                    103: int
                    104: refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
                    105: {
                    106:   mp_size_t  i;
                    107:   for (i = 0; i < size; i++)
                    108:     if (ptr[i] != 0)
                    109:       return 0;
                    110:   return 1;
                    111: }
                    112:
                    113: mp_limb_t
                    114: refmpn_msbone (mp_limb_t x)
                    115: {
                    116:   mp_limb_t  n = (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1);
                    117:
                    118:   while (n != 0)
                    119:     {
                    120:       if (x & n)
                    121:         break;
                    122:       n >>= 1;
                    123:     }
                    124:   return n;
                    125: }
                    126:
                    127: /* a mask of the MSB one bit and all bits below */
                    128: mp_limb_t
                    129: refmpn_msbone_mask (mp_limb_t x)
                    130: {
                    131:   if (x == 0)
                    132:     return 0;
                    133:
                    134:   return (refmpn_msbone (x) << 1) - 1;
                    135: }
                    136:
                    137: void
                    138: refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    139: {
                    140:   mp_size_t i;
                    141:
                    142:   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
                    143:   ASSERT (size >= 0);
                    144:
                    145:   for (i = 0; i < size; i++)
                    146:     rp[i] = sp[i];
                    147: }
                    148:
                    149: void
                    150: refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    151: {
                    152:   mp_size_t i;
                    153:
                    154:   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
                    155:   ASSERT (size >= 0);
                    156:
                    157:   for (i = size-1; i >= 0; i--)
                    158:     rp[i] = sp[i];
                    159: }
                    160:
                    161: void
                    162: refmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    163: {
                    164:   mp_size_t i;
                    165:
                    166:   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
                    167:   ASSERT (size >= 1);
                    168:
                    169:   for (i = 0; i < size; i++)
                    170:     rp[i] = ~sp[i];
                    171: }
                    172:
                    173:
                    174: int
                    175: refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
                    176: {
                    177:   mp_size_t  i;
                    178:
                    179:   ASSERT (size >= 1);
                    180:
                    181:   for (i = size-1; i >= 0; i--)
                    182:     {
                    183:       if (xp[i] > yp[i])  return 1;
                    184:       if (xp[i] < yp[i])  return -1;
                    185:     }
                    186:   return 0;
                    187: }
                    188:
                    189: int
                    190: refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
                    191:                      mp_srcptr yp, mp_size_t ysize)
                    192: {
                    193:   int  opp, cmp;
                    194:
                    195:   opp = (xsize < ysize);
                    196:   if (opp)
                    197:     MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
                    198:
                    199:   if (! refmpn_zero_p (xp+ysize, xsize-ysize))
                    200:     cmp = 1;
                    201:   else
                    202:     cmp = refmpn_cmp (xp, yp, ysize);
                    203:
                    204:   return (opp ? -cmp : cmp);
                    205: }
                    206:
                    207:
                    208: #define LOGOPS(operation)                                               \
                    209:   {                                                                     \
                    210:     mp_size_t  i;                                                       \
                    211:                                                                         \
                    212:     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
                    213:     ASSERT (size >= 1);                                                 \
                    214:                                                                         \
                    215:     for (i = 0; i < size; i++)                                          \
                    216:       rp[i] = operation;                                                \
                    217:   }
                    218:
                    219: void
                    220: refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    221: {
                    222:   LOGOPS (s1p[i] &  s2p[i]);
                    223: }
                    224: void
                    225: refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    226: {
                    227:   LOGOPS (s1p[i] & ~s2p[i]);
                    228: }
                    229: void
                    230: refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    231: {
                    232:   LOGOPS (~(s1p[i] &  s2p[i]));
                    233: }
                    234: void
                    235: refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    236: {
                    237:   LOGOPS (s1p[i] |  s2p[i]);
                    238: }
                    239: void
                    240: refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    241: {
                    242:   LOGOPS (s1p[i] | ~s2p[i]);
                    243: }
                    244: void
                    245: refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    246: {
                    247:   LOGOPS (~(s1p[i] |  s2p[i]));
                    248: }
                    249: void
                    250: refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    251: {
                    252:   LOGOPS (s1p[i] ^  s2p[i]);
                    253: }
                    254: void
                    255: refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    256: {
                    257:   LOGOPS (~(s1p[i] ^  s2p[i]));
                    258: }
                    259:
                    260: /* set *w to x+y, return 0 or 1 carry */
                    261: mp_limb_t
                    262: add (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
                    263: {
                    264:   *w = x + y;
                    265:   return *w < x;
                    266: }
                    267:
                    268: /* set *w to x-y, return 0 or 1 borrow */
                    269: mp_limb_t
                    270: sub (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
                    271: {
                    272:   *w = x - y;
                    273:   return *w > x;
                    274: }
                    275:
                    276: /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
                    277: mp_limb_t
                    278: adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
                    279: {
                    280:   mp_limb_t  r;
                    281:   ASSERT (c == 0 || c == 1);
                    282:   r = add (w, x, y);
                    283:   return r + add (w, *w, c);
                    284: }
                    285:
                    286: /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
                    287: mp_limb_t
                    288: sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
                    289: {
                    290:   mp_limb_t  r;
                    291:   ASSERT (c == 0 || c == 1);
                    292:   r = sub (w, x, y);
                    293:   return r + sub (w, *w, c);
                    294: }
                    295:
                    296:
                    297: #define AORS_1(operation)                               \
                    298:   {                                                     \
                    299:     mp_limb_t  i;                                       \
                    300:                                                         \
                    301:     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
                    302:     ASSERT (size >= 1);                                 \
                    303:                                                         \
                    304:     for (i = 0; i < size; i++)                          \
                    305:       n = operation (&rp[i], sp[i], n);                 \
                    306:     return n;                                           \
                    307:   }
                    308:
                    309: mp_limb_t
                    310: refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
                    311: {
                    312:   AORS_1 (add);
                    313: }
                    314: mp_limb_t
                    315: refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
                    316: {
                    317:   AORS_1 (sub);
                    318: }
                    319:
                    320: #define AORS_NC(operation)                                              \
                    321:   {                                                                     \
                    322:     mp_size_t  i;                                                       \
                    323:                                                                         \
                    324:     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
                    325:     ASSERT (carry == 0 || carry == 1);                                  \
                    326:     ASSERT (size >= 1);                                                 \
                    327:                                                                         \
                    328:     for (i = 0; i < size; i++)                                          \
                    329:       carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
                    330:     return carry;                                                       \
                    331:   }
                    332:
                    333: mp_limb_t
                    334: refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
                    335:                mp_limb_t carry)
                    336: {
                    337:   AORS_NC (adc);
                    338: }
                    339: mp_limb_t
                    340: refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
                    341:                mp_limb_t carry)
                    342: {
                    343:   AORS_NC (sbb);
                    344: }
                    345:
                    346:
                    347: mp_limb_t
                    348: refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    349: {
                    350:   return refmpn_add_nc (rp, s1p, s2p, size, 0);
                    351: }
                    352: mp_limb_t
                    353: refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    354: {
                    355:   return refmpn_sub_nc (rp, s1p, s2p, size, 0);
                    356: }
                    357:
                    358:
                    359: #define AORS(aors_n, aors_1)                                    \
                    360:   {                                                             \
                    361:     mp_limb_t  c;                                               \
                    362:     ASSERT (s1size >= s2size);                                  \
                    363:     ASSERT (s2size >= 1);                                       \
                    364:     c = aors_n (rp, s1p, s2p, s2size);                          \
                    365:     if (s1size-s2size != 0)                                     \
                    366:       c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
                    367:     return c;                                                   \
                    368:   }
                    369: mp_limb_t
                    370: refmpn_add (mp_ptr rp,
                    371:             mp_srcptr s1p, mp_size_t s1size,
                    372:             mp_srcptr s2p, mp_size_t s2size)
                    373: {
                    374:   AORS (refmpn_add_n, refmpn_add_1);
                    375: }
                    376: mp_limb_t
                    377: refmpn_sub (mp_ptr rp,
                    378:             mp_srcptr s1p, mp_size_t s1size,
                    379:             mp_srcptr s2p, mp_size_t s2size)
                    380: {
                    381:   AORS (refmpn_sub_n, refmpn_sub_1);
                    382: }
                    383:
                    384:
                    385: #define SHIFTHIGH(x) ((x) << BITS_PER_MP_LIMB/2)
                    386: #define SHIFTLOW(x)  ((x) >> BITS_PER_MP_LIMB/2)
                    387:
                    388: #define LOWMASK   (((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)-1)
                    389: #define HIGHMASK  SHIFTHIGH(LOWMASK)
                    390:
                    391: #define LOWPART(x)   ((x) & LOWMASK)
                    392: #define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
                    393:
                    394: /* set *hi,*lo to x*y */
                    395: void
                    396: mul (mp_limb_t *hi, mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
                    397: {
                    398:   mp_limb_t s;
                    399:
                    400:   *lo = LOWPART(x) * LOWPART(y);
                    401:   *hi = HIGHPART(x) * HIGHPART(y);
                    402:
                    403:   s = LOWPART(x) * HIGHPART(y);
                    404:   ASSERT_NOCARRY (add (hi, *hi, add (lo, *lo, SHIFTHIGH(LOWPART(s)))));
                    405:   ASSERT_NOCARRY (add (hi, *hi, HIGHPART(s)));
                    406:
                    407:   s = HIGHPART(x) * LOWPART(y);
                    408:   ASSERT_NOCARRY (add (hi, *hi, add (lo, *lo, SHIFTHIGH(LOWPART(s)))));
                    409:   ASSERT_NOCARRY (add (hi, *hi, HIGHPART(s)));
                    410: }
                    411:
                    412: mp_limb_t
                    413: refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
                    414:                mp_limb_t carry)
                    415: {
                    416:   mp_size_t  i;
                    417:   mp_limb_t  hi, lo;
                    418:
                    419:   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
                    420:   ASSERT (size >= 1);
                    421:
                    422:   for (i = 0; i < size; i++)
                    423:     {
                    424:       mul (&hi, &lo, sp[i], multiplier);
                    425:       ASSERT_NOCARRY (add (&hi, hi, add (&lo, lo, carry)));
                    426:       rp[i] = lo;
                    427:       carry = hi;
                    428:     }
                    429:
                    430:   return carry;
                    431: }
                    432:
                    433: mp_limb_t
                    434: refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
                    435: {
                    436:   return refmpn_mul_1c (rp, sp, size, multiplier, 0);
                    437: }
                    438:
                    439:
                    440: #define AORSMUL_1C(operation_n)                                 \
                    441:   {                                                             \
                    442:     mp_ptr     p = refmpn_malloc_limbs (size);                  \
                    443:     mp_limb_t  ret;                                             \
                    444:                                                                 \
                    445:     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
                    446:     ASSERT (size >= 1);                                         \
                    447:                                                                 \
                    448:     ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
                    449:     ret += operation_n (rp, rp, p, size);                       \
                    450:                                                                 \
                    451:     free (p);                                                   \
                    452:     return ret;                                                 \
                    453:   }
                    454:
                    455: mp_limb_t
                    456: refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
                    457:                   mp_limb_t multiplier, mp_limb_t carry)
                    458: {
                    459:   AORSMUL_1C (refmpn_add_n);
                    460: }
                    461: mp_limb_t
                    462: refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
                    463:                   mp_limb_t multiplier, mp_limb_t carry)
                    464: {
                    465:   AORSMUL_1C (refmpn_sub_n);
                    466: }
                    467:
                    468:
                    469: mp_limb_t
                    470: refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
                    471: {
                    472:   return refmpn_addmul_1c (rp, sp, size, multiplier, 0);
                    473: }
                    474: mp_limb_t
                    475: refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
                    476: {
                    477:   return refmpn_submul_1c (rp, sp, size, multiplier, 0);
                    478: }
                    479:
                    480:
                    481: mp_limb_t
                    482: refmpn_addsub_nc (mp_ptr r1p, mp_ptr r2p,
                    483:                   mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
                    484:                   mp_limb_t carry)
                    485: {
                    486:   mp_ptr p;
                    487:   mp_limb_t acy, scy;
                    488:
                    489:   /* Destinations can't overlap. */
                    490:   ASSERT (! MPN_OVERLAP_P (r1p, size, r2p, size));
                    491:   ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
                    492:   ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
                    493:   ASSERT (size >= 1);
                    494:
                    495:   p = refmpn_malloc_limbs (size);
                    496:   acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
                    497:   scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
                    498:   refmpn_copyi (r1p, p, size);
                    499:   free (p);
                    500:   return 2 * acy + scy;
                    501: }
                    502:
                    503: mp_limb_t
                    504: refmpn_addsub_n (mp_ptr r1p, mp_ptr r2p,
                    505:                 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    506: {
                    507:   return refmpn_addsub_nc (r1p, r2p, s1p, s2p, size, 0);
                    508: }
                    509:
                    510:
                    511: /* Right shift hi,lo and return the low limb of the result.
                    512:    Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
                    513: mp_limb_t
                    514: rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
                    515: {
                    516:   ASSERT (shift >= 0 && shift < BITS_PER_MP_LIMB);
                    517:   if (shift == 0)
                    518:     return lo;
                    519:   else
                    520:     return (hi << (BITS_PER_MP_LIMB-shift)) | (lo >> shift);
                    521: }
                    522:
                    523: /* Left shift hi,lo and return the high limb of the result.
                    524:    Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
                    525: mp_limb_t
                    526: lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
                    527: {
                    528:   ASSERT (shift >= 0 && shift < BITS_PER_MP_LIMB);
                    529:   if (shift == 0)
                    530:     return hi;
                    531:   else
                    532:     return (hi << shift) | (lo >> (BITS_PER_MP_LIMB-shift));
                    533: }
                    534:
                    535:
                    536: mp_limb_t
                    537: refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
                    538: {
                    539:   mp_limb_t  ret;
                    540:   mp_size_t  i;
                    541:
                    542:   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
                    543:   ASSERT (size >= 1);
                    544:   ASSERT (shift >= 1 && shift < BITS_PER_MP_LIMB);
                    545:
                    546:   ret = rshift_make (sp[0], 0, shift);
                    547:
                    548:   for (i = 0; i < size-1; i++)
                    549:     rp[i] = rshift_make (sp[i+1], sp[i], shift);
                    550:
                    551:   rp[i] = rshift_make (0, sp[i], shift);
                    552:   return ret;
                    553: }
                    554:
                    555: mp_limb_t
                    556: refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
                    557: {
                    558:   mp_limb_t  ret;
                    559:   mp_size_t  i;
                    560:
                    561:   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
                    562:   ASSERT (size >= 1);
                    563:   ASSERT (shift >= 1 && shift < BITS_PER_MP_LIMB);
                    564:
                    565:   ret = lshift_make (0, sp[size-1], shift);
                    566:
                    567:   for (i = size-2; i >= 0; i--)
                    568:     rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
                    569:
                    570:   rp[i+1] = lshift_make (sp[i+1], 0, shift);
                    571:   return ret;
                    572: }
                    573:
                    574:
                    575: /* Divide h,l by d, producing a quotient *q and remainder *r.
                    576:    Must have h < d.
                    577:    __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
                    578: void
                    579: div1 (mp_limb_t *q, mp_limb_t *r, mp_limb_t h, mp_limb_t l, mp_limb_t d)
                    580: {
                    581:   int  n;
                    582:
                    583:   ASSERT (d != 0);
                    584:   ASSERT (h < d);
                    585:
                    586: #if 0
                    587:   udiv_qrnnd (*q, *r, h, l, d);
                    588:   return;
                    589: #endif
                    590:
                    591:   for (n = 0; !(d & MP_LIMB_T_HIGHBIT); n++)
                    592:     d <<= 1;
                    593:
                    594:   h = lshift_make (h, l, n);
                    595:   l <<= n;
                    596:
                    597:   __udiv_qrnnd_c (*q, *r, h, l, d);
                    598:   *r >>= n;
                    599: }
                    600:
                    601: mp_limb_t
                    602: refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
                    603:                  mp_limb_t carry)
                    604: {
                    605:   mp_ptr     sp_orig;
                    606:   mp_ptr     prod;
                    607:   mp_limb_t  carry_orig;
                    608:   mp_size_t  i;
                    609:
                    610:   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
                    611:   ASSERT (size >= 0);
                    612:   ASSERT (carry < divisor);
                    613:
                    614:   if (size == 0)
                    615:     return carry;
                    616:
                    617:   sp_orig = refmpn_memdup_limbs (sp, size);
                    618:   prod = refmpn_malloc_limbs (size);
                    619:   carry_orig = carry;
                    620:
                    621:   for (i = size-1; i >= 0; i--)
                    622:     div1 (&rp[i], &carry, carry, sp[i], divisor);
                    623:
                    624:   /* check by multiplying back */
                    625: #if 0
                    626:   printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
                    627:           size, divisor, carry_orig, carry);
                    628:   mpn_trace("s",sp_copy,size);
                    629:   mpn_trace("r",rp,size);
                    630:   printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
                    631:   mpn_trace("p",prod,size);
                    632: #endif
                    633:   ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
                    634:   ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
                    635:   free (sp_orig);
                    636:   free (prod);
                    637:
                    638:   return carry;
                    639: }
                    640:
                    641: mp_limb_t
                    642: refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
                    643: {
                    644:   return refmpn_divmod_1c (rp, sp, size, divisor, 0);
                    645: }
                    646:
                    647:
                    648: mp_limb_t
                    649: refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
                    650:                mp_limb_t carry)
                    651: {
                    652:   mp_ptr  p = refmpn_malloc_limbs (size);
                    653:   carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
                    654:   free (p);
                    655:   return carry;
                    656: }
                    657:
                    658: mp_limb_t
                    659: refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
                    660: {
                    661:   return refmpn_mod_1c (sp, size, divisor, 0);
                    662: }
                    663:
                    664: mp_limb_t
                    665: refmpn_mod_1_rshift (mp_srcptr sp, mp_size_t size,
                    666:                      unsigned shift, mp_limb_t divisor)
                    667: {
                    668:   mp_limb_t  r;
                    669:   mp_ptr     p = refmpn_malloc_limbs (size);
                    670:   refmpn_rshift (p, sp, size, shift);
                    671:   r = refmpn_mod_1 (p, size, divisor);
                    672:   free (p);
                    673:   return r;
                    674: }
                    675:
                    676:
                    677: mp_limb_t
                    678: refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
                    679:                   mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
                    680:                   mp_limb_t carry)
                    681: {
                    682:   mp_ptr  z;
                    683:
                    684:   z = refmpn_malloc_limbs (xsize);
                    685:   refmpn_fill (z, xsize, 0);
                    686:
                    687:   carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
                    688:   carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
                    689:
                    690:   free (z);
                    691:   return carry;
                    692: }
                    693:
                    694: mp_limb_t
                    695: refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
                    696:                  mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
                    697: {
                    698:   return refmpn_divrem_1c (rp, xsize, sp, size, divisor, 0);
                    699: }
                    700:
                    701:
                    702: /* As given in the manual, the divexact method gives quotient q and return
                    703:    value c satisfying
                    704:
                    705:            c*b^n + a-i == 3*q
                    706:
                    707:    where a=dividend, i=initial carry, b=2^BITS_PER_MP_LIMB, and n=size.
                    708:
                    709:    If a-i is divisible by 3 then c==0 and a plain divmod gives the quotient.
                    710:    If (a-i)%3==r then c is a high limb tacked on that will turn r into 0.
                    711:    Because 2^BITS_PER_MP_LIMB==1mod3 (so long as BITS_PER_MP_LIMB is even)
                    712:    it's enough to set c=3-r, ie. if r=1 then c=2, or if r=2 then c=1.
                    713:
                    714:    If a-i produces a borrow then refmpn_sub_1 leaves a twos complement
                    715:    negative, ie. b^n+a-i, and the calculation produces c1 satisfying
                    716:
                    717:            c1*b^n + b^n+a-i == 3*q
                    718:
                    719:    From which clearly c=c1+1, so it's enough to just add any borrow to the
                    720:    return value otherwise calculated.
                    721:
                    722:    A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
                    723:    mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
                    724:    or 1 respectively, so with 1 added the final return value is still in the
                    725:    prescribed range 0 to 2. */
                    726:
                    727: mp_limb_t
                    728: refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
                    729: {
                    730:   mp_ptr     spcopy;
                    731:   mp_limb_t  c, cs;
                    732:
                    733:   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
                    734:   ASSERT (size >= 1);
                    735:   ASSERT (carry <= 2);
                    736:
                    737:   spcopy = refmpn_memdup_limbs (sp, size);
                    738:   cs = refmpn_sub_1 (spcopy, spcopy, size, carry);
                    739:
                    740:   c = refmpn_divmod_1 (rp, spcopy, size, 3);
                    741:   if (c != 0)
                    742:     {
                    743:       ASSERT ((BITS_PER_MP_LIMB % 2) == 0);
                    744:       c = 3-c;
                    745:       ASSERT_NOCARRY (refmpn_divmod_1c (rp, spcopy, size, 3, c));
                    746:     }
                    747:
                    748:   c += cs;
                    749:   ASSERT (c <= 2);
                    750:
                    751:   free (spcopy);
                    752:   return c;
                    753: }
                    754:
                    755: mp_limb_t
                    756: refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    757: {
                    758:   return refmpn_divexact_by3c (rp, sp, size, 0);
                    759: }
                    760:
                    761:
                    762: /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
                    763: void
                    764: refmpn_mul_basecase (mp_ptr prodp,
                    765:                      mp_srcptr up, mp_size_t usize,
                    766:                      mp_srcptr vp, mp_size_t vsize)
                    767: {
                    768:   mp_size_t i;
                    769:
                    770:   ASSERT (! MPN_OVERLAP_P (prodp, usize+vsize, up, usize));
                    771:   ASSERT (! MPN_OVERLAP_P (prodp, usize+vsize, vp, vsize));
                    772:   ASSERT (usize >= vsize);
                    773:   ASSERT (vsize >= 1);
                    774:
                    775:   prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
                    776:   for (i = 1; i < vsize; i++)
                    777:     prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
                    778: }
                    779:
                    780: void
                    781: refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
                    782: {
                    783:   refmpn_mul_basecase (prodp, up, size, vp, size);
                    784: }
                    785:
                    786: void
                    787: refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
                    788: {
                    789:   refmpn_mul_basecase (dst, src, size, src, size);
                    790: }
                    791:
                    792:
                    793: mp_limb_t
                    794: refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
                    795: {
                    796:   mp_limb_t  x;
                    797:   int  twos;
                    798:
                    799:   ASSERT (y != 0);
                    800:   ASSERT (! refmpn_zero_p (xp, xsize));
                    801:
                    802:   x = mpn_mod_1 (xp, xsize, y);
                    803:   if (x == 0)
                    804:     return y;
                    805:
                    806:   twos = 0;
                    807:   while ((x & 1) == 0 && (y & 1) == 0)
                    808:     {
                    809:       x >>= 1;
                    810:       y >>= 1;
                    811:       twos++;
                    812:     }
                    813:
                    814:   for (;;)
                    815:     {
                    816:       while ((x & 1) == 0)  x >>= 1;
                    817:       while ((y & 1) == 0)  y >>= 1;
                    818:
                    819:       if (x < y)
                    820:         MP_LIMB_T_SWAP (x, y);
                    821:
                    822:       x -= y;
                    823:       if (x == 0)
                    824:         break;
                    825:     }
                    826:
                    827:   return y << twos;
                    828: }
                    829:
                    830:
                    831: unsigned
                    832: refmpn_count_trailing_zeros (mp_limb_t x)
                    833: {
                    834:   unsigned  n = 0;
                    835:
                    836:   ASSERT (x != 0);
                    837:   while ((x & 1) == 0)
                    838:     {
                    839:       x >>= 1;
                    840:       n++;
                    841:     }
                    842:   return n;
                    843: }
                    844:
                    845: mp_size_t
                    846: refmpn_strip_twos (mp_ptr p, mp_size_t size)
                    847: {
                    848:   mp_size_t  limbs;
                    849:   unsigned   shift;
                    850:
                    851:   ASSERT (size >= 1);
                    852:   ASSERT (! refmpn_zero_p (p, size));
                    853:
                    854:   for (limbs = 0; p[0] == 0; limbs++)
                    855:     {
                    856:       MPN_COPY_INCR (p, p+1, size-1);
                    857:       p[size-1] = 0;
                    858:     }
                    859:
                    860:   shift = refmpn_count_trailing_zeros (p[0]);
                    861:   if (shift)
                    862:     refmpn_rshift (p, p, size, shift);
                    863:
                    864:   return limbs*BITS_PER_MP_LIMB + shift;
                    865: }
                    866:
                    867: mp_limb_t
                    868: refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
                    869: {
                    870:   int       cmp;
                    871:
                    872:   ASSERT (ysize >= 1);
                    873:   ASSERT (xsize >= ysize);
                    874:   ASSERT ((xp[0] & 1) != 0);
                    875:   ASSERT ((yp[0] & 1) != 0);
                    876:   ASSERT (xp[xsize-1] != 0);
                    877:   ASSERT (yp[ysize-1] != 0);
                    878:   ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
                    879:   ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
                    880:   ASSERT (! MPN_OVERLAP_P (xp, xsize, yp, ysize));
                    881:   if (xsize == ysize)
                    882:     ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
                    883:
                    884:   refmpn_strip_twos (xp, xsize);
                    885:   MPN_NORMALIZE (xp, xsize);
                    886:   MPN_NORMALIZE (yp, ysize);
                    887:
                    888:   for (;;)
                    889:     {
                    890:       cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
                    891:       if (cmp == 0)
                    892:         break;
                    893:       if (cmp < 0)
                    894:         MPN_PTR_SWAP (xp,xsize, yp,ysize);
                    895:
                    896:       ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
                    897:
                    898:       refmpn_strip_twos (xp, xsize);
                    899:       MPN_NORMALIZE (xp, xsize);
                    900:     }
                    901:
                    902:   refmpn_copyi (gp, xp, xsize);
                    903:   return xsize;
                    904: }
                    905:
                    906:
                    907: unsigned long
                    908: refmpn_popcount (mp_srcptr sp, mp_size_t size)
                    909: {
                    910:   unsigned long  count = 0;
                    911:   mp_size_t  i;
                    912:   int        j;
                    913:   mp_limb_t  l;
                    914:
                    915:   ASSERT (size >= 0);
                    916:   for (i = 0; i < size; i++)
                    917:     {
                    918:       l = sp[i];
                    919:       for (j = 0; j < BITS_PER_MP_LIMB; j++)
                    920:         {
                    921:           count += (l & 1);
                    922:           l >>= 1;
                    923:         }
                    924:     }
                    925:   return count;
                    926: }
                    927:
                    928: unsigned long
                    929: refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    930: {
                    931:   mp_ptr  d;
                    932:   unsigned long  count;
                    933:
                    934:   if (size == 0)
                    935:     return 0;
                    936:
                    937:   d = refmpn_malloc_limbs (size);
                    938:   refmpn_xor_n (d, s1p, s2p, size);
                    939:   count = refmpn_popcount (d, size);
                    940:   free (d);
                    941:   return count;
                    942: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>