[BACK]Return to refmpn.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / tests

Annotation of OpenXM_contrib/gmp/tests/refmpn.c, Revision 1.1.1.1

1.1       ohara       1: /* Reference mpn functions, designed to be simple, portable and independent
                      2:    of the normal gmp code.  Speed isn't a consideration.
                      3:
                      4: Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
                      5: Inc.
                      6:
                      7: This file is part of the GNU MP Library.
                      8:
                      9: The GNU MP Library is free software; you can redistribute it and/or modify
                     10: it under the terms of the GNU Lesser General Public License as published by
                     11: the Free Software Foundation; either version 2.1 of the License, or (at your
                     12: option) any later version.
                     13:
                     14: The GNU MP Library is distributed in the hope that it will be useful, but
                     15: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     16: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     17: License for more details.
                     18:
                     19: You should have received a copy of the GNU Lesser General Public License
                     20: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     21: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     22: MA 02111-1307, USA. */
                     23:
                     24:
                     25: /* Most routines have assertions representing what the mpn routines are
                     26:    supposed to accept.  Many of these reference routines do sensible things
                     27:    outside these ranges (eg. for size==0), but the assertions are present to
                     28:    pick up bad parameters passed here that are about to be passed the same
                     29:    to a real mpn routine being compared.  */
                     30:
                     31: /* always do assertion checking */
                     32: #define WANT_ASSERT  1
                     33:
                     34: #include <stdio.h>  /* for NULL */
                     35: #include <stdlib.h> /* for malloc */
                     36:
                     37: #include "gmp.h"
                     38: #include "gmp-impl.h"
                     39: #include "longlong.h"
                     40:
                     41: #include "tests.h"
                     42:
                     43:
                     44:
                     45: /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
                     46:    in bytes. */
                     47: int
                     48: byte_overlap_p (const void *v_xp, mp_size_t xsize,
                     49:                 const void *v_yp, mp_size_t ysize)
                     50: {
                     51:   const char *xp = v_xp;
                     52:   const char *yp = v_yp;
                     53:
                     54:   ASSERT (xsize >= 0);
                     55:   ASSERT (ysize >= 0);
                     56:
                     57:   /* no wraparounds */
                     58:   ASSERT (xp+xsize >= xp);
                     59:   ASSERT (yp+ysize >= yp);
                     60:
                     61:   if (xp + xsize <= yp)
                     62:     return 0;
                     63:
                     64:   if (yp + ysize <= xp)
                     65:     return 0;
                     66:
                     67:   return 1;
                     68: }
                     69:
                     70: /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
                     71: int
                     72: refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
                     73: {
                     74:   return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB,
                     75:                          yp, ysize * BYTES_PER_MP_LIMB);
                     76: }
                     77:
                     78: /* Check overlap for a routine defined to work low to high. */
                     79: int
                     80: refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
                     81: {
                     82:   return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
                     83: }
                     84:
                     85: /* Check overlap for a routine defined to work high to low. */
                     86: int
                     87: refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
                     88: {
                     89:   return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
                     90: }
                     91:
                     92: /* Check overlap for a standard routine requiring equal or separate. */
                     93: int
                     94: refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
                     95: {
                     96:   return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
                     97: }
                     98: int
                     99: refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
                    100:                                mp_size_t size)
                    101: {
                    102:   return (refmpn_overlap_fullonly_p (dst, src1, size)
                    103:           && refmpn_overlap_fullonly_p (dst, src2, size));
                    104: }
                    105:
                    106:
                    107: mp_ptr
                    108: refmpn_malloc_limbs (mp_size_t size)
                    109: {
                    110:   mp_ptr  p;
                    111:   ASSERT (size >= 0);
                    112:   if (size == 0)
                    113:     size = 1;
                    114:   p = (mp_ptr) malloc (size * BYTES_PER_MP_LIMB);
                    115:   ASSERT (p != NULL);
                    116:   return p;
                    117: }
                    118:
                    119: mp_ptr
                    120: refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
                    121: {
                    122:   mp_ptr  p;
                    123:   p = refmpn_malloc_limbs (size);
                    124:   refmpn_copyi (p, ptr, size);
                    125:   return p;
                    126: }
                    127:
                    128: /* malloc n limbs on a multiple of m bytes boundary */
                    129: mp_ptr
                    130: refmpn_malloc_limbs_aligned (size_t n, size_t m)
                    131: {
                    132:   return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
                    133: }
                    134:
                    135:
                    136: void
                    137: refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
                    138: {
                    139:   mp_size_t  i;
                    140:   ASSERT (size >= 0);
                    141:   for (i = 0; i < size; i++)
                    142:     ptr[i] = value;
                    143: }
                    144:
                    145: void
                    146: refmpn_zero (mp_ptr ptr, mp_size_t size)
                    147: {
                    148:   refmpn_fill (ptr, size, CNST_LIMB(0));
                    149: }
                    150:
                    151: int
                    152: refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
                    153: {
                    154:   mp_size_t  i;
                    155:   for (i = 0; i < size; i++)
                    156:     if (ptr[i] != 0)
                    157:       return 0;
                    158:   return 1;
                    159: }
                    160:
                    161: /* the highest one bit in x */
                    162: mp_limb_t
                    163: refmpn_msbone (mp_limb_t x)
                    164: {
                    165:   mp_limb_t  n = (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1);
                    166:
                    167:   while (n != 0)
                    168:     {
                    169:       if (x & n)
                    170:         break;
                    171:       n >>= 1;
                    172:     }
                    173:   return n;
                    174: }
                    175:
                    176: /* a mask of the highest one bit plus and all bits below */
                    177: mp_limb_t
                    178: refmpn_msbone_mask (mp_limb_t x)
                    179: {
                    180:   if (x == 0)
                    181:     return 0;
                    182:
                    183:   return (refmpn_msbone (x) << 1) - 1;
                    184: }
                    185:
                    186:
                    187: void
                    188: refmpn_setbit (mp_ptr ptr, unsigned long bit)
                    189: {
                    190:   ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
                    191: }
                    192:
                    193: void
                    194: refmpn_clrbit (mp_ptr ptr, unsigned long bit)
                    195: {
                    196:   ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
                    197: }
                    198:
                    199: #define REFMPN_TSTBIT(ptr,bit) \
                    200:   (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
                    201:
                    202: int
                    203: refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
                    204: {
                    205:   return REFMPN_TSTBIT (ptr, bit);
                    206: }
                    207:
                    208: unsigned long
                    209: refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
                    210: {
                    211:   while (REFMPN_TSTBIT (ptr, bit) != 0)
                    212:     bit++;
                    213:   return bit;
                    214: }
                    215:
                    216: unsigned long
                    217: refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
                    218: {
                    219:   while (REFMPN_TSTBIT (ptr, bit) == 0)
                    220:     bit++;
                    221:   return bit;
                    222: }
                    223:
                    224: void
                    225: refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    226: {
                    227:   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
                    228:   refmpn_copyi (rp, sp, size);
                    229: }
                    230:
                    231: void
                    232: refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    233: {
                    234:   mp_size_t i;
                    235:
                    236:   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
                    237:   ASSERT (size >= 0);
                    238:
                    239:   for (i = 0; i < size; i++)
                    240:     rp[i] = sp[i];
                    241: }
                    242:
                    243: void
                    244: refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    245: {
                    246:   mp_size_t i;
                    247:
                    248:   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
                    249:   ASSERT (size >= 0);
                    250:
                    251:   for (i = size-1; i >= 0; i--)
                    252:     rp[i] = sp[i];
                    253: }
                    254:
                    255: void
                    256: refmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                    257: {
                    258:   mp_size_t i;
                    259:
                    260:   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
                    261:   ASSERT (size >= 1);
                    262:   ASSERT_MPN (sp, size);
                    263:
                    264:   for (i = 0; i < size; i++)
                    265:     rp[i] = sp[i] ^ GMP_NUMB_MASK;
                    266: }
                    267:
                    268:
                    269: int
                    270: refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
                    271: {
                    272:   mp_size_t  i;
                    273:
                    274:   ASSERT (size >= 1);
                    275:   ASSERT_MPN (xp, size);
                    276:   ASSERT_MPN (yp, size);
                    277:
                    278:   for (i = size-1; i >= 0; i--)
                    279:     {
                    280:       if (xp[i] > yp[i])  return 1;
                    281:       if (xp[i] < yp[i])  return -1;
                    282:     }
                    283:   return 0;
                    284: }
                    285:
                    286: int
                    287: refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
                    288: {
                    289:   if (size == 0)
                    290:     return 0;
                    291:   else
                    292:     return refmpn_cmp (xp, yp, size);
                    293: }
                    294:
                    295: int
                    296: refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
                    297:                      mp_srcptr yp, mp_size_t ysize)
                    298: {
                    299:   int  opp, cmp;
                    300:
                    301:   ASSERT_MPN (xp, xsize);
                    302:   ASSERT_MPN (yp, ysize);
                    303:
                    304:   opp = (xsize < ysize);
                    305:   if (opp)
                    306:     MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
                    307:
                    308:   if (! refmpn_zero_p (xp+ysize, xsize-ysize))
                    309:     cmp = 1;
                    310:   else
                    311:     cmp = refmpn_cmp (xp, yp, ysize);
                    312:
                    313:   return (opp ? -cmp : cmp);
                    314: }
                    315:
                    316: int
                    317: refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
                    318: {
                    319:   mp_size_t  i;
                    320:   ASSERT (size >= 0);
                    321:
                    322:   for (i = 0; i < size; i++)
                    323:       if (xp[i] != yp[i])
                    324:         return 0;
                    325:   return 1;
                    326: }
                    327:
                    328:
                    329: #define LOGOPS(operation)                                               \
                    330:   {                                                                     \
                    331:     mp_size_t  i;                                                       \
                    332:                                                                         \
                    333:     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
                    334:     ASSERT (size >= 1);                                                 \
                    335:     ASSERT_MPN (s1p, size);                                             \
                    336:     ASSERT_MPN (s2p, size);                                             \
                    337:                                                                         \
                    338:     for (i = 0; i < size; i++)                                          \
                    339:       rp[i] = operation;                                                \
                    340:   }
                    341:
                    342: void
                    343: refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    344: {
                    345:   LOGOPS (s1p[i] & s2p[i]);
                    346: }
                    347: void
                    348: refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    349: {
                    350:   LOGOPS (s1p[i] & ~s2p[i]);
                    351: }
                    352: void
                    353: refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    354: {
                    355:   LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
                    356: }
                    357: void
                    358: refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    359: {
                    360:   LOGOPS (s1p[i] | s2p[i]);
                    361: }
                    362: void
                    363: refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    364: {
                    365:   LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
                    366: }
                    367: void
                    368: refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    369: {
                    370:   LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
                    371: }
                    372: void
                    373: refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    374: {
                    375:   LOGOPS (s1p[i] ^ s2p[i]);
                    376: }
                    377: void
                    378: refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    379: {
                    380:   LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
                    381: }
                    382:
                    383: /* set *w to x+y, return 0 or 1 carry */
                    384: mp_limb_t
                    385: add (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
                    386: {
                    387:   mp_limb_t  sum, cy;
                    388:
                    389:   ASSERT_LIMB (x);
                    390:   ASSERT_LIMB (y);
                    391:
                    392:   sum = x + y;
                    393: #if GMP_NAIL_BITS == 0
                    394:   *w = sum;
                    395:   cy = (sum < x);
                    396: #else
                    397:   *w = sum & GMP_NUMB_MASK;
                    398:   cy = (sum >> GMP_NUMB_BITS);
                    399: #endif
                    400:   return cy;
                    401: }
                    402:
                    403: /* set *w to x-y, return 0 or 1 borrow */
                    404: mp_limb_t
                    405: sub (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
                    406: {
                    407:   mp_limb_t  diff, cy;
                    408:
                    409:   ASSERT_LIMB (x);
                    410:   ASSERT_LIMB (y);
                    411:
                    412:   diff = x - y;
                    413: #if GMP_NAIL_BITS == 0
                    414:   *w = diff;
                    415:   cy = (diff > x);
                    416: #else
                    417:   *w = diff & GMP_NUMB_MASK;
                    418:   cy = (diff >> GMP_NUMB_BITS) & 1;
                    419: #endif
                    420:   return cy;
                    421: }
                    422:
                    423: /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
                    424: mp_limb_t
                    425: adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
                    426: {
                    427:   mp_limb_t  r;
                    428:
                    429:   ASSERT_LIMB (x);
                    430:   ASSERT_LIMB (y);
                    431:   ASSERT (c == 0 || c == 1);
                    432:
                    433:   r = add (w, x, y);
                    434:   return r + add (w, *w, c);
                    435: }
                    436:
                    437: /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
                    438: mp_limb_t
                    439: sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
                    440: {
                    441:   mp_limb_t  r;
                    442:
                    443:   ASSERT_LIMB (x);
                    444:   ASSERT_LIMB (y);
                    445:   ASSERT (c == 0 || c == 1);
                    446:
                    447:   r = sub (w, x, y);
                    448:   return r + sub (w, *w, c);
                    449: }
                    450:
                    451:
                    452: #define AORS_1(operation)                               \
                    453:   {                                                     \
                    454:     mp_limb_t  i;                                       \
                    455:                                                         \
                    456:     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
                    457:     ASSERT (size >= 1);                                 \
                    458:     ASSERT_MPN (sp, size);                              \
                    459:     ASSERT_LIMB (n);                                    \
                    460:                                                         \
                    461:     for (i = 0; i < size; i++)                          \
                    462:       n = operation (&rp[i], sp[i], n);                 \
                    463:     return n;                                           \
                    464:   }
                    465:
                    466: mp_limb_t
                    467: refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
                    468: {
                    469:   AORS_1 (add);
                    470: }
                    471: mp_limb_t
                    472: refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
                    473: {
                    474:   AORS_1 (sub);
                    475: }
                    476:
                    477: #define AORS_NC(operation)                                              \
                    478:   {                                                                     \
                    479:     mp_size_t  i;                                                       \
                    480:                                                                         \
                    481:     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
                    482:     ASSERT (carry == 0 || carry == 1);                                  \
                    483:     ASSERT (size >= 1);                                                 \
                    484:     ASSERT_MPN (s1p, size);                                             \
                    485:     ASSERT_MPN (s2p, size);                                             \
                    486:                                                                         \
                    487:     for (i = 0; i < size; i++)                                          \
                    488:       carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
                    489:     return carry;                                                       \
                    490:   }
                    491:
                    492: mp_limb_t
                    493: refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
                    494:                mp_limb_t carry)
                    495: {
                    496:   AORS_NC (adc);
                    497: }
                    498: mp_limb_t
                    499: refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
                    500:                mp_limb_t carry)
                    501: {
                    502:   AORS_NC (sbb);
                    503: }
                    504:
                    505:
                    506: mp_limb_t
                    507: refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    508: {
                    509:   return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
                    510: }
                    511: mp_limb_t
                    512: refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    513: {
                    514:   return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
                    515: }
                    516:
                    517:
                    518: /* Twos complement, return borrow. */
                    519: mp_limb_t
                    520: refmpn_neg_n (mp_ptr dst, mp_srcptr src, mp_size_t size)
                    521: {
                    522:   mp_ptr     zeros;
                    523:   mp_limb_t  ret;
                    524:
                    525:   ASSERT (size >= 1);
                    526:
                    527:   zeros = refmpn_malloc_limbs (size);
                    528:   refmpn_fill (zeros, size, CNST_LIMB(0));
                    529:   ret = refmpn_sub_n (dst, zeros, src, size);
                    530:   free (zeros);
                    531:   return ret;
                    532: }
                    533:
                    534:
                    535: #define AORS(aors_n, aors_1)                                    \
                    536:   {                                                             \
                    537:     mp_limb_t  c;                                               \
                    538:     ASSERT (s1size >= s2size);                                  \
                    539:     ASSERT (s2size >= 1);                                       \
                    540:     c = aors_n (rp, s1p, s2p, s2size);                          \
                    541:     if (s1size-s2size != 0)                                     \
                    542:       c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
                    543:     return c;                                                   \
                    544:   }
                    545: mp_limb_t
                    546: refmpn_add (mp_ptr rp,
                    547:             mp_srcptr s1p, mp_size_t s1size,
                    548:             mp_srcptr s2p, mp_size_t s2size)
                    549: {
                    550:   AORS (refmpn_add_n, refmpn_add_1);
                    551: }
                    552: mp_limb_t
                    553: refmpn_sub (mp_ptr rp,
                    554:             mp_srcptr s1p, mp_size_t s1size,
                    555:             mp_srcptr s2p, mp_size_t s2size)
                    556: {
                    557:   AORS (refmpn_sub_n, refmpn_sub_1);
                    558: }
                    559:
                    560:
                    561: #define SHIFTHIGH(x) ((x) << BITS_PER_MP_LIMB/2)
                    562: #define SHIFTLOW(x)  ((x) >> BITS_PER_MP_LIMB/2)
                    563:
                    564: #define LOWMASK   (((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)-1)
                    565: #define HIGHMASK  SHIFTHIGH(LOWMASK)
                    566:
                    567: #define LOWPART(x)   ((x) & LOWMASK)
                    568: #define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
                    569:
                    570: /* Set *hi,*lo to x*y, using full limbs not nails. */
                    571: mp_limb_t
                    572: refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
                    573: {
                    574:   mp_limb_t  hi, s;
                    575:
                    576:   *lo = LOWPART(x) * LOWPART(y);
                    577:   hi = HIGHPART(x) * HIGHPART(y);
                    578:
                    579:   s = LOWPART(x) * HIGHPART(y);
                    580:   hi += HIGHPART(s);
                    581:   s = SHIFTHIGH(LOWPART(s));
                    582:   *lo += s;
                    583:   hi += (*lo < s);
                    584:
                    585:   s = HIGHPART(x) * LOWPART(y);
                    586:   hi += HIGHPART(s);
                    587:   s = SHIFTHIGH(LOWPART(s));
                    588:   *lo += s;
                    589:   hi += (*lo < s);
                    590:
                    591:   return hi;
                    592: }
                    593:
                    594: mp_limb_t
                    595: refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
                    596: {
                    597:   return refmpn_umul_ppmm (lo, x, y);
                    598: }
                    599:
                    600: mp_limb_t
                    601: refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
                    602:                mp_limb_t carry)
                    603: {
                    604:   mp_size_t  i;
                    605:   mp_limb_t  hi, lo;
                    606:
                    607:   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
                    608:   ASSERT (size >= 1);
                    609:   ASSERT_MPN (sp, size);
                    610:   ASSERT_LIMB (multiplier);
                    611:   ASSERT_LIMB (carry);
                    612:
                    613:   multiplier <<= GMP_NAIL_BITS;
                    614:   for (i = 0; i < size; i++)
                    615:     {
                    616:       hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
                    617:       lo >>= GMP_NAIL_BITS;
                    618:       ASSERT_NOCARRY (add (&hi, hi, add (&lo, lo, carry)));
                    619:       rp[i] = lo;
                    620:       carry = hi;
                    621:     }
                    622:   return carry;
                    623: }
                    624:
                    625: mp_limb_t
                    626: refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
                    627: {
                    628:   return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
                    629: }
                    630:
                    631:
                    632: mp_limb_t
                    633: refmpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult)
                    634: {
                    635:   mp_ptr     src_copy;
                    636:   mp_limb_t  c;
                    637:
                    638:   ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
                    639:   ASSERT (! refmpn_overlap_p (dst, size+1, mult, 2));
                    640:   ASSERT (size >= 1);
                    641:   ASSERT_MPN (mult, 2);
                    642:
                    643:   /* in case dst==src */
                    644:   src_copy = refmpn_malloc_limbs (size);
                    645:   refmpn_copyi (src_copy, src, size);
                    646:   src = src_copy;
                    647:
                    648:   dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
                    649:   c = refmpn_addmul_1 (dst+1, src, size, mult[1]);
                    650:   free (src_copy);
                    651:   return c;
                    652: }
                    653:
                    654:
                    655: #define AORSMUL_1C(operation_n)                                 \
                    656:   {                                                             \
                    657:     mp_ptr     p;                                               \
                    658:     mp_limb_t  ret;                                             \
                    659:                                                                 \
                    660:     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
                    661:                                                                 \
                    662:     p = refmpn_malloc_limbs (size);                             \
                    663:     ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
                    664:     ret += operation_n (rp, rp, p, size);                       \
                    665:                                                                 \
                    666:     free (p);                                                   \
                    667:     return ret;                                                 \
                    668:   }
                    669:
                    670: mp_limb_t
                    671: refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
                    672:                   mp_limb_t multiplier, mp_limb_t carry)
                    673: {
                    674:   AORSMUL_1C (refmpn_add_n);
                    675: }
                    676: mp_limb_t
                    677: refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
                    678:                   mp_limb_t multiplier, mp_limb_t carry)
                    679: {
                    680:   AORSMUL_1C (refmpn_sub_n);
                    681: }
                    682:
                    683:
                    684: mp_limb_t
                    685: refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
                    686: {
                    687:   return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
                    688: }
                    689: mp_limb_t
                    690: refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
                    691: {
                    692:   return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
                    693: }
                    694:
                    695:
                    696: mp_limb_t
                    697: refmpn_addsub_nc (mp_ptr r1p, mp_ptr r2p,
                    698:                   mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
                    699:                   mp_limb_t carry)
                    700: {
                    701:   mp_ptr p;
                    702:   mp_limb_t acy, scy;
                    703:
                    704:   /* Destinations can't overlap. */
                    705:   ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
                    706:   ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
                    707:   ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
                    708:   ASSERT (size >= 1);
                    709:
                    710:   /* in case r1p==s1p or r1p==s2p */
                    711:   p = refmpn_malloc_limbs (size);
                    712:
                    713:   acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
                    714:   scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
                    715:   refmpn_copyi (r1p, p, size);
                    716:
                    717:   free (p);
                    718:   return 2 * acy + scy;
                    719: }
                    720:
                    721: mp_limb_t
                    722: refmpn_addsub_n (mp_ptr r1p, mp_ptr r2p,
                    723:                 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                    724: {
                    725:   return refmpn_addsub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
                    726: }
                    727:
                    728:
                    729: /* Right shift hi,lo and return the low limb of the result.
                    730:    Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
                    731: mp_limb_t
                    732: rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
                    733: {
                    734:   ASSERT (shift < GMP_NUMB_BITS);
                    735:   if (shift == 0)
                    736:     return lo;
                    737:   else
                    738:     return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
                    739: }
                    740:
                    741: /* Left shift hi,lo and return the high limb of the result.
                    742:    Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
                    743: mp_limb_t
                    744: lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
                    745: {
                    746:   ASSERT (shift < GMP_NUMB_BITS);
                    747:   if (shift == 0)
                    748:     return hi;
                    749:   else
                    750:     return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
                    751: }
                    752:
                    753:
                    754: mp_limb_t
                    755: refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
                    756: {
                    757:   mp_limb_t  ret;
                    758:   mp_size_t  i;
                    759:
                    760:   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
                    761:   ASSERT (size >= 1);
                    762:   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
                    763:   ASSERT_MPN (sp, size);
                    764:
                    765:   ret = rshift_make (sp[0], CNST_LIMB(0), shift);
                    766:
                    767:   for (i = 0; i < size-1; i++)
                    768:     rp[i] = rshift_make (sp[i+1], sp[i], shift);
                    769:
                    770:   rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
                    771:   return ret;
                    772: }
                    773:
                    774: mp_limb_t
                    775: refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
                    776: {
                    777:   mp_limb_t  ret;
                    778:   mp_size_t  i;
                    779:
                    780:   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
                    781:   ASSERT (size >= 1);
                    782:   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
                    783:   ASSERT_MPN (sp, size);
                    784:
                    785:   ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
                    786:
                    787:   for (i = size-2; i >= 0; i--)
                    788:     rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
                    789:
                    790:   rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
                    791:   return ret;
                    792: }
                    793:
                    794:
                    795: mp_limb_t
                    796: refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
                    797: {
                    798:   if (shift == 0)
                    799:     {
                    800:       refmpn_copyi (rp, sp, size);
                    801:       return 0;
                    802:     }
                    803:   else
                    804:     {
                    805:       return refmpn_rshift (rp, sp, size, shift);
                    806:     }
                    807: }
                    808:
                    809: mp_limb_t
                    810: refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
                    811: {
                    812:   if (shift == 0)
                    813:     {
                    814:       refmpn_copyd (rp, sp, size);
                    815:       return 0;
                    816:     }
                    817:   else
                    818:     {
                    819:       return refmpn_lshift (rp, sp, size, shift);
                    820:     }
                    821: }
                    822:
                    823:
                    824: /* Divide h,l by d, return quotient, store remainder to *rp.
                    825:    Operates on full limbs, not nails.
                    826:    Must have h < d.
                    827:    __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
                    828: mp_limb_t
                    829: refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
                    830: {
                    831:   mp_limb_t  q, r;
                    832:   int  n;
                    833:
                    834:   ASSERT (d != 0);
                    835:   ASSERT (h < d);
                    836:
                    837: #if 0
                    838:   udiv_qrnnd (q, r, h, l, d);
                    839:   *rp = r;
                    840:   return q;
                    841: #endif
                    842:
                    843:   n = refmpn_count_leading_zeros (d);
                    844:   d <<= n;
                    845:
                    846:   if (n != 0)
                    847:     {
                    848:       h = (h << n) | (l >> (GMP_LIMB_BITS - n));
                    849:       l <<= n;
                    850:     }
                    851:
                    852:   __udiv_qrnnd_c (q, r, h, l, d);
                    853:   r >>= n;
                    854:   *rp = r;
                    855:   return q;
                    856: }
                    857:
                    858: mp_limb_t
                    859: refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
                    860: {
                    861:   return refmpn_udiv_qrnnd (rp, h, l, d);
                    862: }
                    863:
                    864: /* This little subroutine avoids some bad code generation from i386 gcc 3.0
                    865:    -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */
                    866: static mp_limb_t
                    867: refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
                    868:                              mp_limb_t divisor, mp_limb_t carry)
                    869: {
                    870:   mp_size_t  i;
                    871:   for (i = size-1; i >= 0; i--)
                    872:     {
                    873:       rp[i] = refmpn_udiv_qrnnd (&carry, carry,
                    874:                                  sp[i] << GMP_NAIL_BITS,
                    875:                                  divisor << GMP_NAIL_BITS);
                    876:       carry >>= GMP_NAIL_BITS;
                    877:     }
                    878:   return carry;
                    879: }
                    880:
                    881: mp_limb_t
                    882: refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
                    883:                   mp_limb_t divisor, mp_limb_t carry)
                    884: {
                    885:   mp_ptr     sp_orig;
                    886:   mp_ptr     prod;
                    887:   mp_limb_t  carry_orig;
                    888:
                    889:   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
                    890:   ASSERT (size >= 0);
                    891:   ASSERT (carry < divisor);
                    892:   ASSERT_MPN (sp, size);
                    893:   ASSERT_LIMB (divisor);
                    894:   ASSERT_LIMB (carry);
                    895:
                    896:   if (size == 0)
                    897:     return carry;
                    898:
                    899:   sp_orig = refmpn_memdup_limbs (sp, size);
                    900:   prod = refmpn_malloc_limbs (size);
                    901:   carry_orig = carry;
                    902:
                    903:   carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
                    904:
                    905:   /* check by multiplying back */
                    906: #if 0
                    907:   printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
                    908:           size, divisor, carry_orig, carry);
                    909:   mpn_trace("s",sp_copy,size);
                    910:   mpn_trace("r",rp,size);
                    911:   printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
                    912:   mpn_trace("p",prod,size);
                    913: #endif
                    914:   ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
                    915:   ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
                    916:   free (sp_orig);
                    917:   free (prod);
                    918:
                    919:   return carry;
                    920: }
                    921:
                    922: mp_limb_t
                    923: refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
                    924: {
                    925:   return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
                    926: }
                    927:
                    928:
                    929: mp_limb_t
                    930: refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
                    931:                mp_limb_t carry)
                    932: {
                    933:   mp_ptr  p = refmpn_malloc_limbs (size);
                    934:   carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
                    935:   free (p);
                    936:   return carry;
                    937: }
                    938:
                    939: mp_limb_t
                    940: refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
                    941: {
                    942:   return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
                    943: }
                    944:
                    945: mp_limb_t
                    946: refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
                    947:                      mp_limb_t inverse)
                    948: {
                    949:   ASSERT (divisor & GMP_NUMB_HIGHBIT);
                    950:   ASSERT (inverse == refmpn_invert_limb (divisor));
                    951:   return refmpn_mod_1 (sp, size, divisor);
                    952: }
                    953:
                    954: /* This implementation will be rather slow, but has the advantage of being
                    955:    in a different style than the libgmp versions.  */
                    956: mp_limb_t
                    957: refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
                    958: {
                    959:   ASSERT ((GMP_NUMB_BITS % 4) == 0);
                    960:   return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
                    961: }
                    962:
                    963:
                    964: mp_limb_t
                    965: refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
                    966:                   mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
                    967:                   mp_limb_t carry)
                    968: {
                    969:   mp_ptr  z;
                    970:
                    971:   z = refmpn_malloc_limbs (xsize);
                    972:   refmpn_fill (z, xsize, CNST_LIMB(0));
                    973:
                    974:   carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
                    975:   carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
                    976:
                    977:   free (z);
                    978:   return carry;
                    979: }
                    980:
                    981: mp_limb_t
                    982: refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
                    983:                  mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
                    984: {
                    985:   return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
                    986: }
                    987:
                    988: mp_limb_t
                    989: refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
                    990:                         mp_srcptr sp, mp_size_t size,
                    991:                         mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
                    992: {
                    993:   ASSERT (size >= 0);
                    994:   ASSERT (shift == refmpn_count_leading_zeros (divisor));
                    995:   ASSERT (inverse == refmpn_invert_limb (divisor << shift));
                    996:
                    997:   return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
                    998: }
                    999:
                   1000:
                   1001: /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
                   1002:    paper, figure 8.1 m', where b=2^BITS_PER_MP_LIMB.  Note that -d-1 < d
                   1003:    since d has the high bit set. */
                   1004:
                   1005: mp_limb_t
                   1006: refmpn_invert_limb (mp_limb_t d)
                   1007: {
                   1008:   mp_limb_t r;
                   1009:   ASSERT (d & GMP_LIMB_HIGHBIT);
                   1010:   return refmpn_udiv_qrnnd (&r, -d-1, -1, d);
                   1011: }
                   1012:
                   1013:
                   1014: /* The aim is to produce a dst quotient and return a remainder c, satisfying
                   1015:    c*b^n + src-i == 3*dst, where i is the incoming carry.
                   1016:
                   1017:    Some value c==0, c==1 or c==2 will satisfy, so just try each.
                   1018:
                   1019:    If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
                   1020:    remainder from the first division attempt determines the correct
                   1021:    remainder (3-c), but don't bother with that, since we can't guarantee
                   1022:    anything about GMP_NUMB_BITS when using nails.
                   1023:
                   1024:    If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
                   1025:    complement negative, ie. b^n+a-i, and the calculation produces c1
                   1026:    satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This
                   1027:    means it's enough to just add any borrow back at the end.
                   1028:
                   1029:    A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
                   1030:    mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
                   1031:    or 1 respectively, so with 1 added the final return value is still in the
                   1032:    prescribed range 0 to 2. */
                   1033:
                   1034: mp_limb_t
                   1035: refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
                   1036: {
                   1037:   mp_ptr     spcopy;
                   1038:   mp_limb_t  c, cs;
                   1039:
                   1040:   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
                   1041:   ASSERT (size >= 1);
                   1042:   ASSERT (carry <= 2);
                   1043:   ASSERT_MPN (sp, size);
                   1044:
                   1045:   spcopy = refmpn_malloc_limbs (size);
                   1046:   cs = refmpn_sub_1 (spcopy, sp, size, carry);
                   1047:
                   1048:   for (c = 0; c <= 2; c++)
                   1049:     if (refmpn_divmod_1c (rp, spcopy, size, 3, c) == 0)
                   1050:       goto done;
                   1051:   ASSERT_FAIL (no value of c satisfies);
                   1052:
                   1053:  done:
                   1054:   c += cs;
                   1055:   ASSERT (c <= 2);
                   1056:
                   1057:   free (spcopy);
                   1058:   return c;
                   1059: }
                   1060:
                   1061: mp_limb_t
                   1062: refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
                   1063: {
                   1064:   return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
                   1065: }
                   1066:
                   1067:
                   1068: /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
                   1069: void
                   1070: refmpn_mul_basecase (mp_ptr prodp,
                   1071:                      mp_srcptr up, mp_size_t usize,
                   1072:                      mp_srcptr vp, mp_size_t vsize)
                   1073: {
                   1074:   mp_size_t i;
                   1075:
                   1076:   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
                   1077:   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
                   1078:   ASSERT (usize >= vsize);
                   1079:   ASSERT (vsize >= 1);
                   1080:   ASSERT_MPN (up, usize);
                   1081:   ASSERT_MPN (vp, vsize);
                   1082:
                   1083:   prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
                   1084:   for (i = 1; i < vsize; i++)
                   1085:     prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
                   1086: }
                   1087:
                   1088: void
                   1089: refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
                   1090: {
                   1091:   refmpn_mul_basecase (prodp, up, size, vp, size);
                   1092: }
                   1093:
                   1094: void
                   1095: refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
                   1096: {
                   1097:   refmpn_mul_basecase (dst, src, size, src, size);
                   1098: }
                   1099:
                   1100: /* Allowing usize<vsize, usize==0 or vsize==0. */
                   1101: void
                   1102: refmpn_mul_any (mp_ptr prodp,
                   1103:                      mp_srcptr up, mp_size_t usize,
                   1104:                      mp_srcptr vp, mp_size_t vsize)
                   1105: {
                   1106:   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
                   1107:   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
                   1108:   ASSERT (usize >= 0);
                   1109:   ASSERT (vsize >= 0);
                   1110:   ASSERT_MPN (up, usize);
                   1111:   ASSERT_MPN (vp, vsize);
                   1112:
                   1113:   if (usize == 0)
                   1114:     {
                   1115:       refmpn_fill (prodp, vsize, CNST_LIMB(0));
                   1116:       return;
                   1117:     }
                   1118:
                   1119:   if (vsize == 0)
                   1120:     {
                   1121:       refmpn_fill (prodp, usize, CNST_LIMB(0));
                   1122:       return;
                   1123:     }
                   1124:
                   1125:   if (usize >= vsize)
                   1126:     refmpn_mul_basecase (prodp, up, usize, vp, vsize);
                   1127:   else
                   1128:     refmpn_mul_basecase (prodp, vp, vsize, up, usize);
                   1129: }
                   1130:
                   1131:
                   1132: mp_limb_t
                   1133: refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
                   1134: {
                   1135:   mp_limb_t  x;
                   1136:   int  twos;
                   1137:
                   1138:   ASSERT (y != 0);
                   1139:   ASSERT (! refmpn_zero_p (xp, xsize));
                   1140:   ASSERT_MPN (xp, xsize);
                   1141:   ASSERT_LIMB (y);
                   1142:
                   1143:   x = refmpn_mod_1 (xp, xsize, y);
                   1144:   if (x == 0)
                   1145:     return y;
                   1146:
                   1147:   twos = 0;
                   1148:   while ((x & 1) == 0 && (y & 1) == 0)
                   1149:     {
                   1150:       x >>= 1;
                   1151:       y >>= 1;
                   1152:       twos++;
                   1153:     }
                   1154:
                   1155:   for (;;)
                   1156:     {
                   1157:       while ((x & 1) == 0)  x >>= 1;
                   1158:       while ((y & 1) == 0)  y >>= 1;
                   1159:
                   1160:       if (x < y)
                   1161:         MP_LIMB_T_SWAP (x, y);
                   1162:
                   1163:       x -= y;
                   1164:       if (x == 0)
                   1165:         break;
                   1166:     }
                   1167:
                   1168:   return y << twos;
                   1169: }
                   1170:
                   1171:
                   1172: /* Based on the full limb x, not nails. */
                   1173: unsigned
                   1174: refmpn_count_leading_zeros (mp_limb_t x)
                   1175: {
                   1176:   unsigned  n = 0;
                   1177:
                   1178:   ASSERT (x != 0);
                   1179:
                   1180:   while ((x & GMP_LIMB_HIGHBIT) == 0)
                   1181:     {
                   1182:       x <<= 1;
                   1183:       n++;
                   1184:     }
                   1185:   return n;
                   1186: }
                   1187:
                   1188: /* Full limbs allowed, not limited to nails. */
                   1189: unsigned
                   1190: refmpn_count_trailing_zeros (mp_limb_t x)
                   1191: {
                   1192:   unsigned  n = 0;
                   1193:
                   1194:   ASSERT (x != 0);
                   1195:   ASSERT_LIMB (x);
                   1196:
                   1197:   while ((x & 1) == 0)
                   1198:     {
                   1199:       x >>= 1;
                   1200:       n++;
                   1201:     }
                   1202:   return n;
                   1203: }
                   1204:
                   1205: /* Strip factors of two (low zero bits) from {p,size} by right shifting.
                   1206:    The return value is the number of twos stripped.  */
                   1207: mp_size_t
                   1208: refmpn_strip_twos (mp_ptr p, mp_size_t size)
                   1209: {
                   1210:   mp_size_t  limbs;
                   1211:   unsigned   shift;
                   1212:
                   1213:   ASSERT (size >= 1);
                   1214:   ASSERT (! refmpn_zero_p (p, size));
                   1215:   ASSERT_MPN (p, size);
                   1216:
                   1217:   for (limbs = 0; p[0] == 0; limbs++)
                   1218:     {
                   1219:       refmpn_copyi (p, p+1, size-1);
                   1220:       p[size-1] = 0;
                   1221:     }
                   1222:
                   1223:   shift = refmpn_count_trailing_zeros (p[0]);
                   1224:   if (shift)
                   1225:     refmpn_rshift (p, p, size, shift);
                   1226:
                   1227:   return limbs*GMP_NUMB_BITS + shift;
                   1228: }
                   1229:
                   1230: mp_limb_t
                   1231: refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
                   1232: {
                   1233:   int       cmp;
                   1234:
                   1235:   ASSERT (ysize >= 1);
                   1236:   ASSERT (xsize >= ysize);
                   1237:   ASSERT ((xp[0] & 1) != 0);
                   1238:   ASSERT ((yp[0] & 1) != 0);
                   1239:   /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */
                   1240:   ASSERT (yp[ysize-1] != 0);
                   1241:   ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
                   1242:   ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
                   1243:   ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
                   1244:   if (xsize == ysize)
                   1245:     ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
                   1246:   ASSERT_MPN (xp, xsize);
                   1247:   ASSERT_MPN (yp, ysize);
                   1248:
                   1249:   refmpn_strip_twos (xp, xsize);
                   1250:   MPN_NORMALIZE (xp, xsize);
                   1251:   MPN_NORMALIZE (yp, ysize);
                   1252:
                   1253:   for (;;)
                   1254:     {
                   1255:       cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
                   1256:       if (cmp == 0)
                   1257:         break;
                   1258:       if (cmp < 0)
                   1259:         MPN_PTR_SWAP (xp,xsize, yp,ysize);
                   1260:
                   1261:       ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
                   1262:
                   1263:       refmpn_strip_twos (xp, xsize);
                   1264:       MPN_NORMALIZE (xp, xsize);
                   1265:     }
                   1266:
                   1267:   refmpn_copyi (gp, xp, xsize);
                   1268:   return xsize;
                   1269: }
                   1270:
                   1271:
                   1272: unsigned long
                   1273: refmpn_popcount (mp_srcptr sp, mp_size_t size)
                   1274: {
                   1275:   unsigned long  count = 0;
                   1276:   mp_size_t  i;
                   1277:   int        j;
                   1278:   mp_limb_t  l;
                   1279:
                   1280:   ASSERT (size >= 0);
                   1281:   ASSERT_MPN (sp, size);
                   1282:
                   1283:   for (i = 0; i < size; i++)
                   1284:     {
                   1285:       l = sp[i];
                   1286:       for (j = 0; j < GMP_NUMB_BITS; j++)
                   1287:         {
                   1288:           count += (l & 1);
                   1289:           l >>= 1;
                   1290:         }
                   1291:     }
                   1292:   return count;
                   1293: }
                   1294:
                   1295: unsigned long
                   1296: refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
                   1297: {
                   1298:   mp_ptr  d;
                   1299:   unsigned long  count;
                   1300:
                   1301:   ASSERT (size >= 0);
                   1302:   ASSERT_MPN (s1p, size);
                   1303:   ASSERT_MPN (s2p, size);
                   1304:
                   1305:   if (size == 0)
                   1306:     return 0;
                   1307:
                   1308:   d = refmpn_malloc_limbs (size);
                   1309:   refmpn_xor_n (d, s1p, s2p, size);
                   1310:   count = refmpn_popcount (d, size);
                   1311:   free (d);
                   1312:   return count;
                   1313: }
                   1314:
                   1315:
                   1316: /* set r to a%d */
                   1317: void
                   1318: refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
                   1319: {
                   1320:   mp_limb_t  D[2];
                   1321:   int        n;
                   1322:
                   1323:   ASSERT (! refmpn_overlap_p (r, 2, a, 2));
                   1324:   ASSERT (! refmpn_overlap_p (r, 2, d, 2));
                   1325:   ASSERT_MPN (a, 2);
                   1326:   ASSERT_MPN (d, 2);
                   1327:
                   1328:   D[1] = d[1], D[0] = d[0];
                   1329:   r[1] = a[1], r[0] = a[0];
                   1330:   n = 0;
                   1331:
                   1332:   for (;;)
                   1333:     {
                   1334:       if (D[1] & GMP_NUMB_HIGHBIT)
                   1335:         break;
                   1336:       if (refmpn_cmp (r, D, 2) <= 0)
                   1337:         break;
                   1338:       refmpn_lshift (D, D, 2, 1);
                   1339:       n++;
                   1340:       ASSERT (n <= GMP_NUMB_BITS);
                   1341:     }
                   1342:
                   1343:   while (n >= 0)
                   1344:     {
                   1345:       if (refmpn_cmp (r, D, 2) >= 0)
                   1346:         ASSERT_NOCARRY (refmpn_sub_n (r, r, D, 2));
                   1347:       refmpn_rshift (D, D, 2, 1);
                   1348:       n--;
                   1349:     }
                   1350:
                   1351:   ASSERT (refmpn_cmp (r, d, 2) < 0);
                   1352: }
                   1353:
                   1354:
                   1355: /* Find n where 0<n<2^GMP_NUMB_BITS and n==c mod m */
                   1356: mp_limb_t
                   1357: refmpn_gcd_finda (const mp_limb_t c[2])
                   1358: {
                   1359:   mp_limb_t  n1[2], n2[2];
                   1360:
                   1361:   ASSERT (c[0] != 0);
                   1362:   ASSERT (c[1] != 0);
                   1363:   ASSERT_MPN (c, 2);
                   1364:
                   1365:   n1[0] = c[0];
                   1366:   n1[1] = c[1];
                   1367:
                   1368:   n2[0] = -n1[0];
                   1369:   n2[1] = ~n1[1];
                   1370:
                   1371:   while (n2[1] != 0)
                   1372:     {
                   1373:       refmpn_mod2 (n1, n1, n2);
                   1374:
                   1375:       MP_LIMB_T_SWAP (n1[0], n2[0]);
                   1376:       MP_LIMB_T_SWAP (n1[1], n2[1]);
                   1377:     }
                   1378:
                   1379:   return n2[0];
                   1380: }
                   1381:
                   1382:
                   1383: /* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
                   1384:    particular the trial quotient is allowed to be 2 too big. */
                   1385: mp_limb_t
                   1386: refmpn_sb_divrem_mn (mp_ptr qp,
                   1387:                      mp_ptr np, mp_size_t nsize,
                   1388:                      mp_srcptr dp, mp_size_t dsize)
                   1389: {
                   1390:   mp_limb_t  retval = 0;
                   1391:   mp_size_t  i;
                   1392:   mp_limb_t  d1 = dp[dsize-1];
                   1393:   mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);
                   1394:
                   1395:   ASSERT (nsize >= dsize);
                   1396:   /* ASSERT (dsize > 2); */
                   1397:   ASSERT (dsize >= 2);
                   1398:   ASSERT (dp[dsize-1] & GMP_LIMB_HIGHBIT);
                   1399:   ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
                   1400:   ASSERT_MPN (np, nsize);
                   1401:   ASSERT_MPN (dp, dsize);
                   1402:
                   1403:   i = nsize-dsize;
                   1404:   if (refmpn_cmp (np+i, dp, dsize) >= 0)
                   1405:     {
                   1406:       ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
                   1407:       retval = 1;
                   1408:     }
                   1409:
                   1410:   for (i--; i >= 0; i--)
                   1411:     {
                   1412:       mp_limb_t  n0 = np[i+dsize];
                   1413:       mp_limb_t  n1 = np[i+dsize-1];
                   1414:       mp_limb_t  q, dummy_r;
                   1415:
                   1416:       ASSERT (n0 <= d1);
                   1417:       if (n0 == d1)
                   1418:         q = MP_LIMB_T_MAX;
                   1419:       else
                   1420:         q = refmpn_udiv_qrnnd (&dummy_r, n0, n1, d1);
                   1421:
                   1422:       n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
                   1423:       ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
                   1424:       if (n0)
                   1425:         {
                   1426:           q--;
                   1427:           if (! refmpn_add_n (np+i, np+i, dp, dsize))
                   1428:             {
                   1429:               q--;
                   1430:               ASSERT (refmpn_add_n (np+i, np+i, dp, dsize) != 0);
                   1431:             }
                   1432:         }
                   1433:       np[i+dsize] = 0;
                   1434:
                   1435:       qp[i] = q;
                   1436:     }
                   1437:
                   1438:   /* remainder < divisor */
                   1439:   ASSERT (refmpn_cmp (np, dp, dsize) < 0);
                   1440:
                   1441:   /* multiply back to original */
                   1442:   {
                   1443:     mp_ptr  mp = refmpn_malloc_limbs (nsize);
                   1444:
                   1445:     refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
                   1446:     if (retval)
                   1447:       ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
                   1448:     ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
                   1449:     ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
                   1450:
                   1451:     free (mp);
                   1452:   }
                   1453:
                   1454:   free (np_orig);
                   1455:   return retval;
                   1456: }
                   1457:
                   1458: /* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
                   1459:    particular the trial quotient is allowed to be 2 too big. */
                   1460: void
                   1461: refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
                   1462:                 mp_ptr np, mp_size_t nsize,
                   1463:                 mp_srcptr dp, mp_size_t dsize)
                   1464: {
                   1465:   ASSERT (qxn == 0);
                   1466:   ASSERT_MPN (np, nsize);
                   1467:   ASSERT_MPN (dp, dsize);
                   1468:
                   1469:   if (dsize == 1)
                   1470:     {
                   1471:       rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
                   1472:       return;
                   1473:     }
                   1474:   else
                   1475:     {
                   1476:       mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);
                   1477:       mp_ptr  d2p = refmpn_malloc_limbs (dsize);
                   1478:       int     norm = refmpn_count_leading_zeros (dp[dsize-1]);
                   1479:
                   1480:       n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
                   1481:       ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
                   1482:
                   1483:       refmpn_sb_divrem_mn (qp, n2p, nsize+1, d2p, dsize);
                   1484:       refmpn_rshift_or_copy (rp, n2p, dsize, norm);
                   1485:
                   1486:       /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
                   1487:       free (n2p);
                   1488:       free (d2p);
                   1489:     }
                   1490: }
                   1491:
                   1492: size_t
                   1493: refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
                   1494: {
                   1495:   unsigned char  *d;
                   1496:   size_t  dsize;
                   1497:
                   1498:   ASSERT (size >= 0);
                   1499:   ASSERT (base >= 2);
                   1500:   ASSERT (base < numberof (__mp_bases));
                   1501:   ASSERT (size == 0 || src[size-1] != 0);
                   1502:   ASSERT_MPN (src, size);
                   1503:
                   1504:   MPN_SIZEINBASE (dsize, src, size, base);
                   1505:   ASSERT (dsize >= 1);
                   1506:   ASSERT (! byte_overlap_p (dst, dsize, src, size * BYTES_PER_MP_LIMB));
                   1507:
                   1508:   if (size == 0)
                   1509:     {
                   1510:       dst[0] = 0;
                   1511:       return 1;
                   1512:     }
                   1513:
                   1514:   /* don't clobber input for power of 2 bases */
                   1515:   if (POW2_P (base))
                   1516:     src = refmpn_memdup_limbs (src, size);
                   1517:
                   1518:   d = dst + dsize;
                   1519:   do
                   1520:     {
                   1521:       d--;
                   1522:       ASSERT (d >= dst);
                   1523:       *d = refmpn_divrem_1 (src, 0, src, size, (mp_limb_t) base);
                   1524:       size -= (src[size-1] == 0);
                   1525:     }
                   1526:   while (size != 0);
                   1527:
                   1528:   /* at most one leading zero */
                   1529:   ASSERT (d == dst || d == dst+1);
                   1530:   if (d != dst)
                   1531:     *dst = 0;
                   1532:
                   1533:   if (POW2_P (base))
                   1534:     free (src);
                   1535:
                   1536:   return dsize;
                   1537: }
                   1538:
                   1539:
                   1540: mp_limb_t
                   1541: refmpn_bswap_limb (mp_limb_t src)
                   1542: {
                   1543:   mp_limb_t  dst;
                   1544:   int        i;
                   1545:
                   1546:   dst = 0;
                   1547:   for (i = 0; i < BYTES_PER_MP_LIMB; i++)
                   1548:     {
                   1549:       dst = (dst << 8) + (src & 0xFF);
                   1550:       src >>= 8;
                   1551:     }
                   1552:   return dst;
                   1553: }
                   1554:
                   1555:
                   1556: /* These random functions are mostly for transitional purposes while adding
                   1557:    nail support, since they're independent of the normal mpn routines.  They
                   1558:    can probably be removed when those normal routines are reliable, though
                   1559:    perhaps something independent would still be useful at times.  */
                   1560:
                   1561: #if BITS_PER_MP_LIMB == 32
                   1562: #define RAND_A  CNST_LIMB(0x29CF535)
                   1563: #endif
                   1564: #if BITS_PER_MP_LIMB == 64
                   1565: #define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)
                   1566: #endif
                   1567:
                   1568: mp_limb_t  refmpn_random_seed;
                   1569:
                   1570: mp_limb_t
                   1571: refmpn_random_half (void)
                   1572: {
                   1573:   refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
                   1574:   return (refmpn_random_seed >> BITS_PER_MP_LIMB/2);
                   1575: }
                   1576:
                   1577: mp_limb_t
                   1578: refmpn_random_limb (void)
                   1579: {
                   1580:   return ((refmpn_random_half () << (BITS_PER_MP_LIMB/2))
                   1581:            | refmpn_random_half ()) & GMP_NUMB_MASK;
                   1582: }
                   1583:
                   1584: void
                   1585: refmpn_random (mp_ptr ptr, mp_size_t size)
                   1586: {
                   1587:   mp_size_t  i;
                   1588:   if (GMP_NAIL_BITS == 0)
                   1589:     {
                   1590:       mpn_random (ptr, size);
                   1591:       return;
                   1592:     }
                   1593:
                   1594:   for (i = 0; i < size; i++)
                   1595:     ptr[i] = refmpn_random_limb ();
                   1596: }
                   1597:
                   1598: void
                   1599: refmpn_random2 (mp_ptr ptr, mp_size_t size)
                   1600: {
                   1601:   mp_size_t  i;
                   1602:   mp_limb_t  bit, mask, limb;
                   1603:   int        run;
                   1604:
                   1605:   if (GMP_NAIL_BITS == 0)
                   1606:     {
                   1607:       mpn_random2 (ptr, size);
                   1608:       return;
                   1609:     }
                   1610:
                   1611: #define RUN_MODULUS  32
                   1612:
                   1613:   /* start with ones at a random pos in the high limb */
                   1614:   bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
                   1615:   mask = 0;
                   1616:   run = 0;
                   1617:
                   1618:   for (i = size-1; i >= 0; i--)
                   1619:     {
                   1620:       limb = 0;
                   1621:       do
                   1622:         {
                   1623:           if (run == 0)
                   1624:             {
                   1625:               run = (refmpn_random_half () % RUN_MODULUS) + 1;
                   1626:               mask = ~mask;
                   1627:             }
                   1628:
                   1629:           limb |= (bit & mask);
                   1630:           bit >>= 1;
                   1631:           run--;
                   1632:         }
                   1633:       while (bit != 0);
                   1634:
                   1635:       ptr[i] = limb;
                   1636:       bit = GMP_NUMB_HIGHBIT;
                   1637:     }
                   1638: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>