Annotation of OpenXM_contrib/gmp/tests/refmpn.c, Revision 1.1.1.1
1.1 ohara 1: /* Reference mpn functions, designed to be simple, portable and independent
2: of the normal gmp code. Speed isn't a consideration.
3:
4: Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
5: Inc.
6:
7: This file is part of the GNU MP Library.
8:
9: The GNU MP Library is free software; you can redistribute it and/or modify
10: it under the terms of the GNU Lesser General Public License as published by
11: the Free Software Foundation; either version 2.1 of the License, or (at your
12: option) any later version.
13:
14: The GNU MP Library is distributed in the hope that it will be useful, but
15: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17: License for more details.
18:
19: You should have received a copy of the GNU Lesser General Public License
20: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
21: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22: MA 02111-1307, USA. */
23:
24:
25: /* Most routines have assertions representing what the mpn routines are
26: supposed to accept. Many of these reference routines do sensible things
27: outside these ranges (eg. for size==0), but the assertions are present to
28: pick up bad parameters passed here that are about to be passed the same
29: to a real mpn routine being compared. */
30:
31: /* always do assertion checking */
32: #define WANT_ASSERT 1
33:
34: #include <stdio.h> /* for NULL */
35: #include <stdlib.h> /* for malloc */
36:
37: #include "gmp.h"
38: #include "gmp-impl.h"
39: #include "longlong.h"
40:
41: #include "tests.h"
42:
43:
44:
45: /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
46: in bytes. */
47: int
48: byte_overlap_p (const void *v_xp, mp_size_t xsize,
49: const void *v_yp, mp_size_t ysize)
50: {
51: const char *xp = v_xp;
52: const char *yp = v_yp;
53:
54: ASSERT (xsize >= 0);
55: ASSERT (ysize >= 0);
56:
57: /* no wraparounds */
58: ASSERT (xp+xsize >= xp);
59: ASSERT (yp+ysize >= yp);
60:
61: if (xp + xsize <= yp)
62: return 0;
63:
64: if (yp + ysize <= xp)
65: return 0;
66:
67: return 1;
68: }
69:
70: /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
71: int
72: refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
73: {
74: return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB,
75: yp, ysize * BYTES_PER_MP_LIMB);
76: }
77:
78: /* Check overlap for a routine defined to work low to high. */
79: int
80: refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
81: {
82: return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
83: }
84:
85: /* Check overlap for a routine defined to work high to low. */
86: int
87: refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
88: {
89: return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
90: }
91:
92: /* Check overlap for a standard routine requiring equal or separate. */
93: int
94: refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
95: {
96: return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
97: }
98: int
99: refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
100: mp_size_t size)
101: {
102: return (refmpn_overlap_fullonly_p (dst, src1, size)
103: && refmpn_overlap_fullonly_p (dst, src2, size));
104: }
105:
106:
107: mp_ptr
108: refmpn_malloc_limbs (mp_size_t size)
109: {
110: mp_ptr p;
111: ASSERT (size >= 0);
112: if (size == 0)
113: size = 1;
114: p = (mp_ptr) malloc (size * BYTES_PER_MP_LIMB);
115: ASSERT (p != NULL);
116: return p;
117: }
118:
119: mp_ptr
120: refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
121: {
122: mp_ptr p;
123: p = refmpn_malloc_limbs (size);
124: refmpn_copyi (p, ptr, size);
125: return p;
126: }
127:
128: /* malloc n limbs on a multiple of m bytes boundary */
129: mp_ptr
130: refmpn_malloc_limbs_aligned (size_t n, size_t m)
131: {
132: return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
133: }
134:
135:
136: void
137: refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
138: {
139: mp_size_t i;
140: ASSERT (size >= 0);
141: for (i = 0; i < size; i++)
142: ptr[i] = value;
143: }
144:
145: void
146: refmpn_zero (mp_ptr ptr, mp_size_t size)
147: {
148: refmpn_fill (ptr, size, CNST_LIMB(0));
149: }
150:
151: int
152: refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
153: {
154: mp_size_t i;
155: for (i = 0; i < size; i++)
156: if (ptr[i] != 0)
157: return 0;
158: return 1;
159: }
160:
161: /* the highest one bit in x */
162: mp_limb_t
163: refmpn_msbone (mp_limb_t x)
164: {
165: mp_limb_t n = (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1);
166:
167: while (n != 0)
168: {
169: if (x & n)
170: break;
171: n >>= 1;
172: }
173: return n;
174: }
175:
176: /* a mask of the highest one bit plus and all bits below */
177: mp_limb_t
178: refmpn_msbone_mask (mp_limb_t x)
179: {
180: if (x == 0)
181: return 0;
182:
183: return (refmpn_msbone (x) << 1) - 1;
184: }
185:
186:
187: void
188: refmpn_setbit (mp_ptr ptr, unsigned long bit)
189: {
190: ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
191: }
192:
193: void
194: refmpn_clrbit (mp_ptr ptr, unsigned long bit)
195: {
196: ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
197: }
198:
199: #define REFMPN_TSTBIT(ptr,bit) \
200: (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
201:
202: int
203: refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
204: {
205: return REFMPN_TSTBIT (ptr, bit);
206: }
207:
208: unsigned long
209: refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
210: {
211: while (REFMPN_TSTBIT (ptr, bit) != 0)
212: bit++;
213: return bit;
214: }
215:
216: unsigned long
217: refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
218: {
219: while (REFMPN_TSTBIT (ptr, bit) == 0)
220: bit++;
221: return bit;
222: }
223:
224: void
225: refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
226: {
227: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
228: refmpn_copyi (rp, sp, size);
229: }
230:
231: void
232: refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
233: {
234: mp_size_t i;
235:
236: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
237: ASSERT (size >= 0);
238:
239: for (i = 0; i < size; i++)
240: rp[i] = sp[i];
241: }
242:
243: void
244: refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
245: {
246: mp_size_t i;
247:
248: ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
249: ASSERT (size >= 0);
250:
251: for (i = size-1; i >= 0; i--)
252: rp[i] = sp[i];
253: }
254:
255: void
256: refmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size)
257: {
258: mp_size_t i;
259:
260: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
261: ASSERT (size >= 1);
262: ASSERT_MPN (sp, size);
263:
264: for (i = 0; i < size; i++)
265: rp[i] = sp[i] ^ GMP_NUMB_MASK;
266: }
267:
268:
269: int
270: refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
271: {
272: mp_size_t i;
273:
274: ASSERT (size >= 1);
275: ASSERT_MPN (xp, size);
276: ASSERT_MPN (yp, size);
277:
278: for (i = size-1; i >= 0; i--)
279: {
280: if (xp[i] > yp[i]) return 1;
281: if (xp[i] < yp[i]) return -1;
282: }
283: return 0;
284: }
285:
286: int
287: refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
288: {
289: if (size == 0)
290: return 0;
291: else
292: return refmpn_cmp (xp, yp, size);
293: }
294:
295: int
296: refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
297: mp_srcptr yp, mp_size_t ysize)
298: {
299: int opp, cmp;
300:
301: ASSERT_MPN (xp, xsize);
302: ASSERT_MPN (yp, ysize);
303:
304: opp = (xsize < ysize);
305: if (opp)
306: MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
307:
308: if (! refmpn_zero_p (xp+ysize, xsize-ysize))
309: cmp = 1;
310: else
311: cmp = refmpn_cmp (xp, yp, ysize);
312:
313: return (opp ? -cmp : cmp);
314: }
315:
316: int
317: refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
318: {
319: mp_size_t i;
320: ASSERT (size >= 0);
321:
322: for (i = 0; i < size; i++)
323: if (xp[i] != yp[i])
324: return 0;
325: return 1;
326: }
327:
328:
329: #define LOGOPS(operation) \
330: { \
331: mp_size_t i; \
332: \
333: ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
334: ASSERT (size >= 1); \
335: ASSERT_MPN (s1p, size); \
336: ASSERT_MPN (s2p, size); \
337: \
338: for (i = 0; i < size; i++) \
339: rp[i] = operation; \
340: }
341:
342: void
343: refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
344: {
345: LOGOPS (s1p[i] & s2p[i]);
346: }
347: void
348: refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
349: {
350: LOGOPS (s1p[i] & ~s2p[i]);
351: }
352: void
353: refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
354: {
355: LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
356: }
357: void
358: refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
359: {
360: LOGOPS (s1p[i] | s2p[i]);
361: }
362: void
363: refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
364: {
365: LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
366: }
367: void
368: refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
369: {
370: LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
371: }
372: void
373: refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
374: {
375: LOGOPS (s1p[i] ^ s2p[i]);
376: }
377: void
378: refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
379: {
380: LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
381: }
382:
383: /* set *w to x+y, return 0 or 1 carry */
384: mp_limb_t
385: add (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
386: {
387: mp_limb_t sum, cy;
388:
389: ASSERT_LIMB (x);
390: ASSERT_LIMB (y);
391:
392: sum = x + y;
393: #if GMP_NAIL_BITS == 0
394: *w = sum;
395: cy = (sum < x);
396: #else
397: *w = sum & GMP_NUMB_MASK;
398: cy = (sum >> GMP_NUMB_BITS);
399: #endif
400: return cy;
401: }
402:
403: /* set *w to x-y, return 0 or 1 borrow */
404: mp_limb_t
405: sub (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
406: {
407: mp_limb_t diff, cy;
408:
409: ASSERT_LIMB (x);
410: ASSERT_LIMB (y);
411:
412: diff = x - y;
413: #if GMP_NAIL_BITS == 0
414: *w = diff;
415: cy = (diff > x);
416: #else
417: *w = diff & GMP_NUMB_MASK;
418: cy = (diff >> GMP_NUMB_BITS) & 1;
419: #endif
420: return cy;
421: }
422:
423: /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
424: mp_limb_t
425: adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
426: {
427: mp_limb_t r;
428:
429: ASSERT_LIMB (x);
430: ASSERT_LIMB (y);
431: ASSERT (c == 0 || c == 1);
432:
433: r = add (w, x, y);
434: return r + add (w, *w, c);
435: }
436:
437: /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
438: mp_limb_t
439: sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
440: {
441: mp_limb_t r;
442:
443: ASSERT_LIMB (x);
444: ASSERT_LIMB (y);
445: ASSERT (c == 0 || c == 1);
446:
447: r = sub (w, x, y);
448: return r + sub (w, *w, c);
449: }
450:
451:
452: #define AORS_1(operation) \
453: { \
454: mp_limb_t i; \
455: \
456: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
457: ASSERT (size >= 1); \
458: ASSERT_MPN (sp, size); \
459: ASSERT_LIMB (n); \
460: \
461: for (i = 0; i < size; i++) \
462: n = operation (&rp[i], sp[i], n); \
463: return n; \
464: }
465:
466: mp_limb_t
467: refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
468: {
469: AORS_1 (add);
470: }
471: mp_limb_t
472: refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
473: {
474: AORS_1 (sub);
475: }
476:
477: #define AORS_NC(operation) \
478: { \
479: mp_size_t i; \
480: \
481: ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
482: ASSERT (carry == 0 || carry == 1); \
483: ASSERT (size >= 1); \
484: ASSERT_MPN (s1p, size); \
485: ASSERT_MPN (s2p, size); \
486: \
487: for (i = 0; i < size; i++) \
488: carry = operation (&rp[i], s1p[i], s2p[i], carry); \
489: return carry; \
490: }
491:
492: mp_limb_t
493: refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
494: mp_limb_t carry)
495: {
496: AORS_NC (adc);
497: }
498: mp_limb_t
499: refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
500: mp_limb_t carry)
501: {
502: AORS_NC (sbb);
503: }
504:
505:
506: mp_limb_t
507: refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
508: {
509: return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
510: }
511: mp_limb_t
512: refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
513: {
514: return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
515: }
516:
517:
518: /* Twos complement, return borrow. */
519: mp_limb_t
520: refmpn_neg_n (mp_ptr dst, mp_srcptr src, mp_size_t size)
521: {
522: mp_ptr zeros;
523: mp_limb_t ret;
524:
525: ASSERT (size >= 1);
526:
527: zeros = refmpn_malloc_limbs (size);
528: refmpn_fill (zeros, size, CNST_LIMB(0));
529: ret = refmpn_sub_n (dst, zeros, src, size);
530: free (zeros);
531: return ret;
532: }
533:
534:
535: #define AORS(aors_n, aors_1) \
536: { \
537: mp_limb_t c; \
538: ASSERT (s1size >= s2size); \
539: ASSERT (s2size >= 1); \
540: c = aors_n (rp, s1p, s2p, s2size); \
541: if (s1size-s2size != 0) \
542: c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \
543: return c; \
544: }
545: mp_limb_t
546: refmpn_add (mp_ptr rp,
547: mp_srcptr s1p, mp_size_t s1size,
548: mp_srcptr s2p, mp_size_t s2size)
549: {
550: AORS (refmpn_add_n, refmpn_add_1);
551: }
552: mp_limb_t
553: refmpn_sub (mp_ptr rp,
554: mp_srcptr s1p, mp_size_t s1size,
555: mp_srcptr s2p, mp_size_t s2size)
556: {
557: AORS (refmpn_sub_n, refmpn_sub_1);
558: }
559:
560:
561: #define SHIFTHIGH(x) ((x) << BITS_PER_MP_LIMB/2)
562: #define SHIFTLOW(x) ((x) >> BITS_PER_MP_LIMB/2)
563:
564: #define LOWMASK (((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)-1)
565: #define HIGHMASK SHIFTHIGH(LOWMASK)
566:
567: #define LOWPART(x) ((x) & LOWMASK)
568: #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK)
569:
570: /* Set *hi,*lo to x*y, using full limbs not nails. */
571: mp_limb_t
572: refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
573: {
574: mp_limb_t hi, s;
575:
576: *lo = LOWPART(x) * LOWPART(y);
577: hi = HIGHPART(x) * HIGHPART(y);
578:
579: s = LOWPART(x) * HIGHPART(y);
580: hi += HIGHPART(s);
581: s = SHIFTHIGH(LOWPART(s));
582: *lo += s;
583: hi += (*lo < s);
584:
585: s = HIGHPART(x) * LOWPART(y);
586: hi += HIGHPART(s);
587: s = SHIFTHIGH(LOWPART(s));
588: *lo += s;
589: hi += (*lo < s);
590:
591: return hi;
592: }
593:
594: mp_limb_t
595: refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
596: {
597: return refmpn_umul_ppmm (lo, x, y);
598: }
599:
600: mp_limb_t
601: refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
602: mp_limb_t carry)
603: {
604: mp_size_t i;
605: mp_limb_t hi, lo;
606:
607: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
608: ASSERT (size >= 1);
609: ASSERT_MPN (sp, size);
610: ASSERT_LIMB (multiplier);
611: ASSERT_LIMB (carry);
612:
613: multiplier <<= GMP_NAIL_BITS;
614: for (i = 0; i < size; i++)
615: {
616: hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
617: lo >>= GMP_NAIL_BITS;
618: ASSERT_NOCARRY (add (&hi, hi, add (&lo, lo, carry)));
619: rp[i] = lo;
620: carry = hi;
621: }
622: return carry;
623: }
624:
625: mp_limb_t
626: refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
627: {
628: return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
629: }
630:
631:
632: mp_limb_t
633: refmpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_srcptr mult)
634: {
635: mp_ptr src_copy;
636: mp_limb_t c;
637:
638: ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
639: ASSERT (! refmpn_overlap_p (dst, size+1, mult, 2));
640: ASSERT (size >= 1);
641: ASSERT_MPN (mult, 2);
642:
643: /* in case dst==src */
644: src_copy = refmpn_malloc_limbs (size);
645: refmpn_copyi (src_copy, src, size);
646: src = src_copy;
647:
648: dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
649: c = refmpn_addmul_1 (dst+1, src, size, mult[1]);
650: free (src_copy);
651: return c;
652: }
653:
654:
655: #define AORSMUL_1C(operation_n) \
656: { \
657: mp_ptr p; \
658: mp_limb_t ret; \
659: \
660: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
661: \
662: p = refmpn_malloc_limbs (size); \
663: ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \
664: ret += operation_n (rp, rp, p, size); \
665: \
666: free (p); \
667: return ret; \
668: }
669:
670: mp_limb_t
671: refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
672: mp_limb_t multiplier, mp_limb_t carry)
673: {
674: AORSMUL_1C (refmpn_add_n);
675: }
676: mp_limb_t
677: refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
678: mp_limb_t multiplier, mp_limb_t carry)
679: {
680: AORSMUL_1C (refmpn_sub_n);
681: }
682:
683:
684: mp_limb_t
685: refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
686: {
687: return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
688: }
689: mp_limb_t
690: refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
691: {
692: return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
693: }
694:
695:
696: mp_limb_t
697: refmpn_addsub_nc (mp_ptr r1p, mp_ptr r2p,
698: mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
699: mp_limb_t carry)
700: {
701: mp_ptr p;
702: mp_limb_t acy, scy;
703:
704: /* Destinations can't overlap. */
705: ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
706: ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
707: ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
708: ASSERT (size >= 1);
709:
710: /* in case r1p==s1p or r1p==s2p */
711: p = refmpn_malloc_limbs (size);
712:
713: acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
714: scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
715: refmpn_copyi (r1p, p, size);
716:
717: free (p);
718: return 2 * acy + scy;
719: }
720:
721: mp_limb_t
722: refmpn_addsub_n (mp_ptr r1p, mp_ptr r2p,
723: mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
724: {
725: return refmpn_addsub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
726: }
727:
728:
729: /* Right shift hi,lo and return the low limb of the result.
730: Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
731: mp_limb_t
732: rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
733: {
734: ASSERT (shift < GMP_NUMB_BITS);
735: if (shift == 0)
736: return lo;
737: else
738: return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
739: }
740:
741: /* Left shift hi,lo and return the high limb of the result.
742: Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
743: mp_limb_t
744: lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
745: {
746: ASSERT (shift < GMP_NUMB_BITS);
747: if (shift == 0)
748: return hi;
749: else
750: return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
751: }
752:
753:
754: mp_limb_t
755: refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
756: {
757: mp_limb_t ret;
758: mp_size_t i;
759:
760: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
761: ASSERT (size >= 1);
762: ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
763: ASSERT_MPN (sp, size);
764:
765: ret = rshift_make (sp[0], CNST_LIMB(0), shift);
766:
767: for (i = 0; i < size-1; i++)
768: rp[i] = rshift_make (sp[i+1], sp[i], shift);
769:
770: rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
771: return ret;
772: }
773:
774: mp_limb_t
775: refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
776: {
777: mp_limb_t ret;
778: mp_size_t i;
779:
780: ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
781: ASSERT (size >= 1);
782: ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
783: ASSERT_MPN (sp, size);
784:
785: ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
786:
787: for (i = size-2; i >= 0; i--)
788: rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
789:
790: rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
791: return ret;
792: }
793:
794:
795: mp_limb_t
796: refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
797: {
798: if (shift == 0)
799: {
800: refmpn_copyi (rp, sp, size);
801: return 0;
802: }
803: else
804: {
805: return refmpn_rshift (rp, sp, size, shift);
806: }
807: }
808:
809: mp_limb_t
810: refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
811: {
812: if (shift == 0)
813: {
814: refmpn_copyd (rp, sp, size);
815: return 0;
816: }
817: else
818: {
819: return refmpn_lshift (rp, sp, size, shift);
820: }
821: }
822:
823:
824: /* Divide h,l by d, return quotient, store remainder to *rp.
825: Operates on full limbs, not nails.
826: Must have h < d.
827: __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
828: mp_limb_t
829: refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
830: {
831: mp_limb_t q, r;
832: int n;
833:
834: ASSERT (d != 0);
835: ASSERT (h < d);
836:
837: #if 0
838: udiv_qrnnd (q, r, h, l, d);
839: *rp = r;
840: return q;
841: #endif
842:
843: n = refmpn_count_leading_zeros (d);
844: d <<= n;
845:
846: if (n != 0)
847: {
848: h = (h << n) | (l >> (GMP_LIMB_BITS - n));
849: l <<= n;
850: }
851:
852: __udiv_qrnnd_c (q, r, h, l, d);
853: r >>= n;
854: *rp = r;
855: return q;
856: }
857:
858: mp_limb_t
859: refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
860: {
861: return refmpn_udiv_qrnnd (rp, h, l, d);
862: }
863:
864: /* This little subroutine avoids some bad code generation from i386 gcc 3.0
865: -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */
866: static mp_limb_t
867: refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
868: mp_limb_t divisor, mp_limb_t carry)
869: {
870: mp_size_t i;
871: for (i = size-1; i >= 0; i--)
872: {
873: rp[i] = refmpn_udiv_qrnnd (&carry, carry,
874: sp[i] << GMP_NAIL_BITS,
875: divisor << GMP_NAIL_BITS);
876: carry >>= GMP_NAIL_BITS;
877: }
878: return carry;
879: }
880:
881: mp_limb_t
882: refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
883: mp_limb_t divisor, mp_limb_t carry)
884: {
885: mp_ptr sp_orig;
886: mp_ptr prod;
887: mp_limb_t carry_orig;
888:
889: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
890: ASSERT (size >= 0);
891: ASSERT (carry < divisor);
892: ASSERT_MPN (sp, size);
893: ASSERT_LIMB (divisor);
894: ASSERT_LIMB (carry);
895:
896: if (size == 0)
897: return carry;
898:
899: sp_orig = refmpn_memdup_limbs (sp, size);
900: prod = refmpn_malloc_limbs (size);
901: carry_orig = carry;
902:
903: carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
904:
905: /* check by multiplying back */
906: #if 0
907: printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
908: size, divisor, carry_orig, carry);
909: mpn_trace("s",sp_copy,size);
910: mpn_trace("r",rp,size);
911: printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
912: mpn_trace("p",prod,size);
913: #endif
914: ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
915: ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
916: free (sp_orig);
917: free (prod);
918:
919: return carry;
920: }
921:
922: mp_limb_t
923: refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
924: {
925: return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
926: }
927:
928:
929: mp_limb_t
930: refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
931: mp_limb_t carry)
932: {
933: mp_ptr p = refmpn_malloc_limbs (size);
934: carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
935: free (p);
936: return carry;
937: }
938:
939: mp_limb_t
940: refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
941: {
942: return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
943: }
944:
945: mp_limb_t
946: refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
947: mp_limb_t inverse)
948: {
949: ASSERT (divisor & GMP_NUMB_HIGHBIT);
950: ASSERT (inverse == refmpn_invert_limb (divisor));
951: return refmpn_mod_1 (sp, size, divisor);
952: }
953:
954: /* This implementation will be rather slow, but has the advantage of being
955: in a different style than the libgmp versions. */
956: mp_limb_t
957: refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
958: {
959: ASSERT ((GMP_NUMB_BITS % 4) == 0);
960: return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
961: }
962:
963:
964: mp_limb_t
965: refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
966: mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
967: mp_limb_t carry)
968: {
969: mp_ptr z;
970:
971: z = refmpn_malloc_limbs (xsize);
972: refmpn_fill (z, xsize, CNST_LIMB(0));
973:
974: carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
975: carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
976:
977: free (z);
978: return carry;
979: }
980:
981: mp_limb_t
982: refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
983: mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
984: {
985: return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
986: }
987:
988: mp_limb_t
989: refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
990: mp_srcptr sp, mp_size_t size,
991: mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
992: {
993: ASSERT (size >= 0);
994: ASSERT (shift == refmpn_count_leading_zeros (divisor));
995: ASSERT (inverse == refmpn_invert_limb (divisor << shift));
996:
997: return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
998: }
999:
1000:
1001: /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1002: paper, figure 8.1 m', where b=2^BITS_PER_MP_LIMB. Note that -d-1 < d
1003: since d has the high bit set. */
1004:
1005: mp_limb_t
1006: refmpn_invert_limb (mp_limb_t d)
1007: {
1008: mp_limb_t r;
1009: ASSERT (d & GMP_LIMB_HIGHBIT);
1010: return refmpn_udiv_qrnnd (&r, -d-1, -1, d);
1011: }
1012:
1013:
1014: /* The aim is to produce a dst quotient and return a remainder c, satisfying
1015: c*b^n + src-i == 3*dst, where i is the incoming carry.
1016:
1017: Some value c==0, c==1 or c==2 will satisfy, so just try each.
1018:
1019: If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1020: remainder from the first division attempt determines the correct
1021: remainder (3-c), but don't bother with that, since we can't guarantee
1022: anything about GMP_NUMB_BITS when using nails.
1023:
1024: If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1025: complement negative, ie. b^n+a-i, and the calculation produces c1
1026: satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This
1027: means it's enough to just add any borrow back at the end.
1028:
1029: A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1030: mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1031: or 1 respectively, so with 1 added the final return value is still in the
1032: prescribed range 0 to 2. */
1033:
1034: mp_limb_t
1035: refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1036: {
1037: mp_ptr spcopy;
1038: mp_limb_t c, cs;
1039:
1040: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1041: ASSERT (size >= 1);
1042: ASSERT (carry <= 2);
1043: ASSERT_MPN (sp, size);
1044:
1045: spcopy = refmpn_malloc_limbs (size);
1046: cs = refmpn_sub_1 (spcopy, sp, size, carry);
1047:
1048: for (c = 0; c <= 2; c++)
1049: if (refmpn_divmod_1c (rp, spcopy, size, 3, c) == 0)
1050: goto done;
1051: ASSERT_FAIL (no value of c satisfies);
1052:
1053: done:
1054: c += cs;
1055: ASSERT (c <= 2);
1056:
1057: free (spcopy);
1058: return c;
1059: }
1060:
1061: mp_limb_t
1062: refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1063: {
1064: return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1065: }
1066:
1067:
1068: /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1069: void
1070: refmpn_mul_basecase (mp_ptr prodp,
1071: mp_srcptr up, mp_size_t usize,
1072: mp_srcptr vp, mp_size_t vsize)
1073: {
1074: mp_size_t i;
1075:
1076: ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1077: ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1078: ASSERT (usize >= vsize);
1079: ASSERT (vsize >= 1);
1080: ASSERT_MPN (up, usize);
1081: ASSERT_MPN (vp, vsize);
1082:
1083: prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1084: for (i = 1; i < vsize; i++)
1085: prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1086: }
1087:
1088: void
1089: refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1090: {
1091: refmpn_mul_basecase (prodp, up, size, vp, size);
1092: }
1093:
1094: void
1095: refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1096: {
1097: refmpn_mul_basecase (dst, src, size, src, size);
1098: }
1099:
1100: /* Allowing usize<vsize, usize==0 or vsize==0. */
1101: void
1102: refmpn_mul_any (mp_ptr prodp,
1103: mp_srcptr up, mp_size_t usize,
1104: mp_srcptr vp, mp_size_t vsize)
1105: {
1106: ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1107: ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1108: ASSERT (usize >= 0);
1109: ASSERT (vsize >= 0);
1110: ASSERT_MPN (up, usize);
1111: ASSERT_MPN (vp, vsize);
1112:
1113: if (usize == 0)
1114: {
1115: refmpn_fill (prodp, vsize, CNST_LIMB(0));
1116: return;
1117: }
1118:
1119: if (vsize == 0)
1120: {
1121: refmpn_fill (prodp, usize, CNST_LIMB(0));
1122: return;
1123: }
1124:
1125: if (usize >= vsize)
1126: refmpn_mul_basecase (prodp, up, usize, vp, vsize);
1127: else
1128: refmpn_mul_basecase (prodp, vp, vsize, up, usize);
1129: }
1130:
1131:
1132: mp_limb_t
1133: refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
1134: {
1135: mp_limb_t x;
1136: int twos;
1137:
1138: ASSERT (y != 0);
1139: ASSERT (! refmpn_zero_p (xp, xsize));
1140: ASSERT_MPN (xp, xsize);
1141: ASSERT_LIMB (y);
1142:
1143: x = refmpn_mod_1 (xp, xsize, y);
1144: if (x == 0)
1145: return y;
1146:
1147: twos = 0;
1148: while ((x & 1) == 0 && (y & 1) == 0)
1149: {
1150: x >>= 1;
1151: y >>= 1;
1152: twos++;
1153: }
1154:
1155: for (;;)
1156: {
1157: while ((x & 1) == 0) x >>= 1;
1158: while ((y & 1) == 0) y >>= 1;
1159:
1160: if (x < y)
1161: MP_LIMB_T_SWAP (x, y);
1162:
1163: x -= y;
1164: if (x == 0)
1165: break;
1166: }
1167:
1168: return y << twos;
1169: }
1170:
1171:
1172: /* Based on the full limb x, not nails. */
1173: unsigned
1174: refmpn_count_leading_zeros (mp_limb_t x)
1175: {
1176: unsigned n = 0;
1177:
1178: ASSERT (x != 0);
1179:
1180: while ((x & GMP_LIMB_HIGHBIT) == 0)
1181: {
1182: x <<= 1;
1183: n++;
1184: }
1185: return n;
1186: }
1187:
1188: /* Full limbs allowed, not limited to nails. */
1189: unsigned
1190: refmpn_count_trailing_zeros (mp_limb_t x)
1191: {
1192: unsigned n = 0;
1193:
1194: ASSERT (x != 0);
1195: ASSERT_LIMB (x);
1196:
1197: while ((x & 1) == 0)
1198: {
1199: x >>= 1;
1200: n++;
1201: }
1202: return n;
1203: }
1204:
1205: /* Strip factors of two (low zero bits) from {p,size} by right shifting.
1206: The return value is the number of twos stripped. */
1207: mp_size_t
1208: refmpn_strip_twos (mp_ptr p, mp_size_t size)
1209: {
1210: mp_size_t limbs;
1211: unsigned shift;
1212:
1213: ASSERT (size >= 1);
1214: ASSERT (! refmpn_zero_p (p, size));
1215: ASSERT_MPN (p, size);
1216:
1217: for (limbs = 0; p[0] == 0; limbs++)
1218: {
1219: refmpn_copyi (p, p+1, size-1);
1220: p[size-1] = 0;
1221: }
1222:
1223: shift = refmpn_count_trailing_zeros (p[0]);
1224: if (shift)
1225: refmpn_rshift (p, p, size, shift);
1226:
1227: return limbs*GMP_NUMB_BITS + shift;
1228: }
1229:
1230: mp_limb_t
1231: refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
1232: {
1233: int cmp;
1234:
1235: ASSERT (ysize >= 1);
1236: ASSERT (xsize >= ysize);
1237: ASSERT ((xp[0] & 1) != 0);
1238: ASSERT ((yp[0] & 1) != 0);
1239: /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */
1240: ASSERT (yp[ysize-1] != 0);
1241: ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
1242: ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
1243: ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
1244: if (xsize == ysize)
1245: ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
1246: ASSERT_MPN (xp, xsize);
1247: ASSERT_MPN (yp, ysize);
1248:
1249: refmpn_strip_twos (xp, xsize);
1250: MPN_NORMALIZE (xp, xsize);
1251: MPN_NORMALIZE (yp, ysize);
1252:
1253: for (;;)
1254: {
1255: cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
1256: if (cmp == 0)
1257: break;
1258: if (cmp < 0)
1259: MPN_PTR_SWAP (xp,xsize, yp,ysize);
1260:
1261: ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
1262:
1263: refmpn_strip_twos (xp, xsize);
1264: MPN_NORMALIZE (xp, xsize);
1265: }
1266:
1267: refmpn_copyi (gp, xp, xsize);
1268: return xsize;
1269: }
1270:
1271:
1272: unsigned long
1273: refmpn_popcount (mp_srcptr sp, mp_size_t size)
1274: {
1275: unsigned long count = 0;
1276: mp_size_t i;
1277: int j;
1278: mp_limb_t l;
1279:
1280: ASSERT (size >= 0);
1281: ASSERT_MPN (sp, size);
1282:
1283: for (i = 0; i < size; i++)
1284: {
1285: l = sp[i];
1286: for (j = 0; j < GMP_NUMB_BITS; j++)
1287: {
1288: count += (l & 1);
1289: l >>= 1;
1290: }
1291: }
1292: return count;
1293: }
1294:
1295: unsigned long
1296: refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1297: {
1298: mp_ptr d;
1299: unsigned long count;
1300:
1301: ASSERT (size >= 0);
1302: ASSERT_MPN (s1p, size);
1303: ASSERT_MPN (s2p, size);
1304:
1305: if (size == 0)
1306: return 0;
1307:
1308: d = refmpn_malloc_limbs (size);
1309: refmpn_xor_n (d, s1p, s2p, size);
1310: count = refmpn_popcount (d, size);
1311: free (d);
1312: return count;
1313: }
1314:
1315:
1316: /* set r to a%d */
1317: void
1318: refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
1319: {
1320: mp_limb_t D[2];
1321: int n;
1322:
1323: ASSERT (! refmpn_overlap_p (r, 2, a, 2));
1324: ASSERT (! refmpn_overlap_p (r, 2, d, 2));
1325: ASSERT_MPN (a, 2);
1326: ASSERT_MPN (d, 2);
1327:
1328: D[1] = d[1], D[0] = d[0];
1329: r[1] = a[1], r[0] = a[0];
1330: n = 0;
1331:
1332: for (;;)
1333: {
1334: if (D[1] & GMP_NUMB_HIGHBIT)
1335: break;
1336: if (refmpn_cmp (r, D, 2) <= 0)
1337: break;
1338: refmpn_lshift (D, D, 2, 1);
1339: n++;
1340: ASSERT (n <= GMP_NUMB_BITS);
1341: }
1342:
1343: while (n >= 0)
1344: {
1345: if (refmpn_cmp (r, D, 2) >= 0)
1346: ASSERT_NOCARRY (refmpn_sub_n (r, r, D, 2));
1347: refmpn_rshift (D, D, 2, 1);
1348: n--;
1349: }
1350:
1351: ASSERT (refmpn_cmp (r, d, 2) < 0);
1352: }
1353:
1354:
1355: /* Find n where 0<n<2^GMP_NUMB_BITS and n==c mod m */
1356: mp_limb_t
1357: refmpn_gcd_finda (const mp_limb_t c[2])
1358: {
1359: mp_limb_t n1[2], n2[2];
1360:
1361: ASSERT (c[0] != 0);
1362: ASSERT (c[1] != 0);
1363: ASSERT_MPN (c, 2);
1364:
1365: n1[0] = c[0];
1366: n1[1] = c[1];
1367:
1368: n2[0] = -n1[0];
1369: n2[1] = ~n1[1];
1370:
1371: while (n2[1] != 0)
1372: {
1373: refmpn_mod2 (n1, n1, n2);
1374:
1375: MP_LIMB_T_SWAP (n1[0], n2[0]);
1376: MP_LIMB_T_SWAP (n1[1], n2[1]);
1377: }
1378:
1379: return n2[0];
1380: }
1381:
1382:
1383: /* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
1384: particular the trial quotient is allowed to be 2 too big. */
1385: mp_limb_t
1386: refmpn_sb_divrem_mn (mp_ptr qp,
1387: mp_ptr np, mp_size_t nsize,
1388: mp_srcptr dp, mp_size_t dsize)
1389: {
1390: mp_limb_t retval = 0;
1391: mp_size_t i;
1392: mp_limb_t d1 = dp[dsize-1];
1393: mp_ptr np_orig = refmpn_memdup_limbs (np, nsize);
1394:
1395: ASSERT (nsize >= dsize);
1396: /* ASSERT (dsize > 2); */
1397: ASSERT (dsize >= 2);
1398: ASSERT (dp[dsize-1] & GMP_LIMB_HIGHBIT);
1399: ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
1400: ASSERT_MPN (np, nsize);
1401: ASSERT_MPN (dp, dsize);
1402:
1403: i = nsize-dsize;
1404: if (refmpn_cmp (np+i, dp, dsize) >= 0)
1405: {
1406: ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
1407: retval = 1;
1408: }
1409:
1410: for (i--; i >= 0; i--)
1411: {
1412: mp_limb_t n0 = np[i+dsize];
1413: mp_limb_t n1 = np[i+dsize-1];
1414: mp_limb_t q, dummy_r;
1415:
1416: ASSERT (n0 <= d1);
1417: if (n0 == d1)
1418: q = MP_LIMB_T_MAX;
1419: else
1420: q = refmpn_udiv_qrnnd (&dummy_r, n0, n1, d1);
1421:
1422: n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
1423: ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
1424: if (n0)
1425: {
1426: q--;
1427: if (! refmpn_add_n (np+i, np+i, dp, dsize))
1428: {
1429: q--;
1430: ASSERT (refmpn_add_n (np+i, np+i, dp, dsize) != 0);
1431: }
1432: }
1433: np[i+dsize] = 0;
1434:
1435: qp[i] = q;
1436: }
1437:
1438: /* remainder < divisor */
1439: ASSERT (refmpn_cmp (np, dp, dsize) < 0);
1440:
1441: /* multiply back to original */
1442: {
1443: mp_ptr mp = refmpn_malloc_limbs (nsize);
1444:
1445: refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
1446: if (retval)
1447: ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
1448: ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
1449: ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
1450:
1451: free (mp);
1452: }
1453:
1454: free (np_orig);
1455: return retval;
1456: }
1457:
1458: /* Similar to mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
1459: particular the trial quotient is allowed to be 2 too big. */
1460: void
1461: refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
1462: mp_ptr np, mp_size_t nsize,
1463: mp_srcptr dp, mp_size_t dsize)
1464: {
1465: ASSERT (qxn == 0);
1466: ASSERT_MPN (np, nsize);
1467: ASSERT_MPN (dp, dsize);
1468:
1469: if (dsize == 1)
1470: {
1471: rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
1472: return;
1473: }
1474: else
1475: {
1476: mp_ptr n2p = refmpn_malloc_limbs (nsize+1);
1477: mp_ptr d2p = refmpn_malloc_limbs (dsize);
1478: int norm = refmpn_count_leading_zeros (dp[dsize-1]);
1479:
1480: n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
1481: ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
1482:
1483: refmpn_sb_divrem_mn (qp, n2p, nsize+1, d2p, dsize);
1484: refmpn_rshift_or_copy (rp, n2p, dsize, norm);
1485:
1486: /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
1487: free (n2p);
1488: free (d2p);
1489: }
1490: }
1491:
1492: size_t
1493: refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
1494: {
1495: unsigned char *d;
1496: size_t dsize;
1497:
1498: ASSERT (size >= 0);
1499: ASSERT (base >= 2);
1500: ASSERT (base < numberof (__mp_bases));
1501: ASSERT (size == 0 || src[size-1] != 0);
1502: ASSERT_MPN (src, size);
1503:
1504: MPN_SIZEINBASE (dsize, src, size, base);
1505: ASSERT (dsize >= 1);
1506: ASSERT (! byte_overlap_p (dst, dsize, src, size * BYTES_PER_MP_LIMB));
1507:
1508: if (size == 0)
1509: {
1510: dst[0] = 0;
1511: return 1;
1512: }
1513:
1514: /* don't clobber input for power of 2 bases */
1515: if (POW2_P (base))
1516: src = refmpn_memdup_limbs (src, size);
1517:
1518: d = dst + dsize;
1519: do
1520: {
1521: d--;
1522: ASSERT (d >= dst);
1523: *d = refmpn_divrem_1 (src, 0, src, size, (mp_limb_t) base);
1524: size -= (src[size-1] == 0);
1525: }
1526: while (size != 0);
1527:
1528: /* at most one leading zero */
1529: ASSERT (d == dst || d == dst+1);
1530: if (d != dst)
1531: *dst = 0;
1532:
1533: if (POW2_P (base))
1534: free (src);
1535:
1536: return dsize;
1537: }
1538:
1539:
1540: mp_limb_t
1541: refmpn_bswap_limb (mp_limb_t src)
1542: {
1543: mp_limb_t dst;
1544: int i;
1545:
1546: dst = 0;
1547: for (i = 0; i < BYTES_PER_MP_LIMB; i++)
1548: {
1549: dst = (dst << 8) + (src & 0xFF);
1550: src >>= 8;
1551: }
1552: return dst;
1553: }
1554:
1555:
1556: /* These random functions are mostly for transitional purposes while adding
1557: nail support, since they're independent of the normal mpn routines. They
1558: can probably be removed when those normal routines are reliable, though
1559: perhaps something independent would still be useful at times. */
1560:
1561: #if BITS_PER_MP_LIMB == 32
1562: #define RAND_A CNST_LIMB(0x29CF535)
1563: #endif
1564: #if BITS_PER_MP_LIMB == 64
1565: #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D)
1566: #endif
1567:
1568: mp_limb_t refmpn_random_seed;
1569:
1570: mp_limb_t
1571: refmpn_random_half (void)
1572: {
1573: refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
1574: return (refmpn_random_seed >> BITS_PER_MP_LIMB/2);
1575: }
1576:
1577: mp_limb_t
1578: refmpn_random_limb (void)
1579: {
1580: return ((refmpn_random_half () << (BITS_PER_MP_LIMB/2))
1581: | refmpn_random_half ()) & GMP_NUMB_MASK;
1582: }
1583:
1584: void
1585: refmpn_random (mp_ptr ptr, mp_size_t size)
1586: {
1587: mp_size_t i;
1588: if (GMP_NAIL_BITS == 0)
1589: {
1590: mpn_random (ptr, size);
1591: return;
1592: }
1593:
1594: for (i = 0; i < size; i++)
1595: ptr[i] = refmpn_random_limb ();
1596: }
1597:
1598: void
1599: refmpn_random2 (mp_ptr ptr, mp_size_t size)
1600: {
1601: mp_size_t i;
1602: mp_limb_t bit, mask, limb;
1603: int run;
1604:
1605: if (GMP_NAIL_BITS == 0)
1606: {
1607: mpn_random2 (ptr, size);
1608: return;
1609: }
1610:
1611: #define RUN_MODULUS 32
1612:
1613: /* start with ones at a random pos in the high limb */
1614: bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
1615: mask = 0;
1616: run = 0;
1617:
1618: for (i = size-1; i >= 0; i--)
1619: {
1620: limb = 0;
1621: do
1622: {
1623: if (run == 0)
1624: {
1625: run = (refmpn_random_half () % RUN_MODULUS) + 1;
1626: mask = ~mask;
1627: }
1628:
1629: limb |= (bit & mask);
1630: bit >>= 1;
1631: run--;
1632: }
1633: while (bit != 0);
1634:
1635: ptr[i] = limb;
1636: bit = GMP_NUMB_HIGHBIT;
1637: }
1638: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>