Annotation of OpenXM_contrib/gmp/mpn/tests/ref.c, Revision 1.1.1.1
1.1 maekawa 1: /* Reference mpn functions, designed to be simple, portable and independent
2: of the normal gmp code. Speed isn't a consideration. */
3:
4: /*
5: Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
6:
7: This file is part of the GNU MP Library.
8:
9: The GNU MP Library is free software; you can redistribute it and/or modify
10: it under the terms of the GNU Lesser General Public License as published by
11: the Free Software Foundation; either version 2.1 of the License, or (at your
12: option) any later version.
13:
14: The GNU MP Library is distributed in the hope that it will be useful, but
15: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17: License for more details.
18:
19: You should have received a copy of the GNU Lesser General Public License
20: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
21: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22: MA 02111-1307, USA.
23: */
24:
25:
26: /* Most routines have assertions representing what the mpn routines are
27: supposed to accept. Many of these reference routines do sensible things
28: outside these ranges (eg. for size==0), but the assertions are present to
29: pick up bad parameters passed here that are about to be passed the same
30: to a real mpn routine being compared. */
31:
32: /* always do assertion checking */
33: #define WANT_ASSERT 1
34:
35: #include <stdio.h> /* for NULL */
36: #include <stdlib.h> /* for malloc */
37: #include "gmp.h"
38: #include "gmp-impl.h"
39: #include "longlong.h"
40:
41: #include "ref.h"
42:
43:
44: /* Check overlap for a routine defined to work low to high. */
45: int
46: refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
47: {
48: return (!MPN_OVERLAP_P (dst, size, src, size) || dst <= src);
49: }
50:
51: /* Check overlap for a routine defined to work high to low. */
52: int
53: refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
54: {
55: return (!MPN_OVERLAP_P (dst, size, src, size) || dst >= src);
56: }
57:
58: /* Check overlap for a standard routine requiring equal or separate. */
59: int
60: refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
61: {
62: return (dst == src || !MPN_OVERLAP_P (dst, size, src, size));
63: }
64: int
65: refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
66: mp_size_t size)
67: {
68: return (refmpn_overlap_fullonly_p (dst, src1, size)
69: && refmpn_overlap_fullonly_p (dst, src2, size));
70: }
71:
72:
73: mp_ptr
74: refmpn_malloc_limbs (mp_size_t size)
75: {
76: mp_ptr p;
77: ASSERT (size >= 0);
78: if (size == 0)
79: size = 1;
80: p = (mp_ptr) malloc (size * BYTES_PER_MP_LIMB);
81: ASSERT (p != NULL);
82: return p;
83: }
84:
85: mp_ptr
86: refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
87: {
88: mp_ptr p;
89: p = refmpn_malloc_limbs (size);
90: refmpn_copyi (p, ptr, size);
91: return p;
92: }
93:
94: void
95: refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
96: {
97: mp_size_t i;
98: ASSERT (size >= 0);
99: for (i = 0; i < size; i++)
100: ptr[i] = value;
101: }
102:
103: int
104: refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
105: {
106: mp_size_t i;
107: for (i = 0; i < size; i++)
108: if (ptr[i] != 0)
109: return 0;
110: return 1;
111: }
112:
113: mp_limb_t
114: refmpn_msbone (mp_limb_t x)
115: {
116: mp_limb_t n = (mp_limb_t) 1 << (BITS_PER_MP_LIMB-1);
117:
118: while (n != 0)
119: {
120: if (x & n)
121: break;
122: n >>= 1;
123: }
124: return n;
125: }
126:
127: /* a mask of the MSB one bit and all bits below */
128: mp_limb_t
129: refmpn_msbone_mask (mp_limb_t x)
130: {
131: if (x == 0)
132: return 0;
133:
134: return (refmpn_msbone (x) << 1) - 1;
135: }
136:
137: void
138: refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
139: {
140: mp_size_t i;
141:
142: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
143: ASSERT (size >= 0);
144:
145: for (i = 0; i < size; i++)
146: rp[i] = sp[i];
147: }
148:
149: void
150: refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
151: {
152: mp_size_t i;
153:
154: ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
155: ASSERT (size >= 0);
156:
157: for (i = size-1; i >= 0; i--)
158: rp[i] = sp[i];
159: }
160:
161: void
162: refmpn_com_n (mp_ptr rp, mp_srcptr sp, mp_size_t size)
163: {
164: mp_size_t i;
165:
166: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
167: ASSERT (size >= 1);
168:
169: for (i = 0; i < size; i++)
170: rp[i] = ~sp[i];
171: }
172:
173:
174: int
175: refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
176: {
177: mp_size_t i;
178:
179: ASSERT (size >= 1);
180:
181: for (i = size-1; i >= 0; i--)
182: {
183: if (xp[i] > yp[i]) return 1;
184: if (xp[i] < yp[i]) return -1;
185: }
186: return 0;
187: }
188:
189: int
190: refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
191: mp_srcptr yp, mp_size_t ysize)
192: {
193: int opp, cmp;
194:
195: opp = (xsize < ysize);
196: if (opp)
197: MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
198:
199: if (! refmpn_zero_p (xp+ysize, xsize-ysize))
200: cmp = 1;
201: else
202: cmp = refmpn_cmp (xp, yp, ysize);
203:
204: return (opp ? -cmp : cmp);
205: }
206:
207:
208: #define LOGOPS(operation) \
209: { \
210: mp_size_t i; \
211: \
212: ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
213: ASSERT (size >= 1); \
214: \
215: for (i = 0; i < size; i++) \
216: rp[i] = operation; \
217: }
218:
219: void
220: refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
221: {
222: LOGOPS (s1p[i] & s2p[i]);
223: }
224: void
225: refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
226: {
227: LOGOPS (s1p[i] & ~s2p[i]);
228: }
229: void
230: refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
231: {
232: LOGOPS (~(s1p[i] & s2p[i]));
233: }
234: void
235: refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
236: {
237: LOGOPS (s1p[i] | s2p[i]);
238: }
239: void
240: refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
241: {
242: LOGOPS (s1p[i] | ~s2p[i]);
243: }
244: void
245: refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
246: {
247: LOGOPS (~(s1p[i] | s2p[i]));
248: }
249: void
250: refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
251: {
252: LOGOPS (s1p[i] ^ s2p[i]);
253: }
254: void
255: refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
256: {
257: LOGOPS (~(s1p[i] ^ s2p[i]));
258: }
259:
260: /* set *w to x+y, return 0 or 1 carry */
261: mp_limb_t
262: add (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
263: {
264: *w = x + y;
265: return *w < x;
266: }
267:
268: /* set *w to x-y, return 0 or 1 borrow */
269: mp_limb_t
270: sub (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
271: {
272: *w = x - y;
273: return *w > x;
274: }
275:
276: /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
277: mp_limb_t
278: adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
279: {
280: mp_limb_t r;
281: ASSERT (c == 0 || c == 1);
282: r = add (w, x, y);
283: return r + add (w, *w, c);
284: }
285:
286: /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
287: mp_limb_t
288: sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
289: {
290: mp_limb_t r;
291: ASSERT (c == 0 || c == 1);
292: r = sub (w, x, y);
293: return r + sub (w, *w, c);
294: }
295:
296:
297: #define AORS_1(operation) \
298: { \
299: mp_limb_t i; \
300: \
301: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
302: ASSERT (size >= 1); \
303: \
304: for (i = 0; i < size; i++) \
305: n = operation (&rp[i], sp[i], n); \
306: return n; \
307: }
308:
309: mp_limb_t
310: refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
311: {
312: AORS_1 (add);
313: }
314: mp_limb_t
315: refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
316: {
317: AORS_1 (sub);
318: }
319:
320: #define AORS_NC(operation) \
321: { \
322: mp_size_t i; \
323: \
324: ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
325: ASSERT (carry == 0 || carry == 1); \
326: ASSERT (size >= 1); \
327: \
328: for (i = 0; i < size; i++) \
329: carry = operation (&rp[i], s1p[i], s2p[i], carry); \
330: return carry; \
331: }
332:
333: mp_limb_t
334: refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
335: mp_limb_t carry)
336: {
337: AORS_NC (adc);
338: }
339: mp_limb_t
340: refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
341: mp_limb_t carry)
342: {
343: AORS_NC (sbb);
344: }
345:
346:
347: mp_limb_t
348: refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
349: {
350: return refmpn_add_nc (rp, s1p, s2p, size, 0);
351: }
352: mp_limb_t
353: refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
354: {
355: return refmpn_sub_nc (rp, s1p, s2p, size, 0);
356: }
357:
358:
359: #define AORS(aors_n, aors_1) \
360: { \
361: mp_limb_t c; \
362: ASSERT (s1size >= s2size); \
363: ASSERT (s2size >= 1); \
364: c = aors_n (rp, s1p, s2p, s2size); \
365: if (s1size-s2size != 0) \
366: c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \
367: return c; \
368: }
369: mp_limb_t
370: refmpn_add (mp_ptr rp,
371: mp_srcptr s1p, mp_size_t s1size,
372: mp_srcptr s2p, mp_size_t s2size)
373: {
374: AORS (refmpn_add_n, refmpn_add_1);
375: }
376: mp_limb_t
377: refmpn_sub (mp_ptr rp,
378: mp_srcptr s1p, mp_size_t s1size,
379: mp_srcptr s2p, mp_size_t s2size)
380: {
381: AORS (refmpn_sub_n, refmpn_sub_1);
382: }
383:
384:
385: #define SHIFTHIGH(x) ((x) << BITS_PER_MP_LIMB/2)
386: #define SHIFTLOW(x) ((x) >> BITS_PER_MP_LIMB/2)
387:
388: #define LOWMASK (((mp_limb_t) 1 << BITS_PER_MP_LIMB/2)-1)
389: #define HIGHMASK SHIFTHIGH(LOWMASK)
390:
391: #define LOWPART(x) ((x) & LOWMASK)
392: #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK)
393:
394: /* set *hi,*lo to x*y */
395: void
396: mul (mp_limb_t *hi, mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
397: {
398: mp_limb_t s;
399:
400: *lo = LOWPART(x) * LOWPART(y);
401: *hi = HIGHPART(x) * HIGHPART(y);
402:
403: s = LOWPART(x) * HIGHPART(y);
404: ASSERT_NOCARRY (add (hi, *hi, add (lo, *lo, SHIFTHIGH(LOWPART(s)))));
405: ASSERT_NOCARRY (add (hi, *hi, HIGHPART(s)));
406:
407: s = HIGHPART(x) * LOWPART(y);
408: ASSERT_NOCARRY (add (hi, *hi, add (lo, *lo, SHIFTHIGH(LOWPART(s)))));
409: ASSERT_NOCARRY (add (hi, *hi, HIGHPART(s)));
410: }
411:
412: mp_limb_t
413: refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
414: mp_limb_t carry)
415: {
416: mp_size_t i;
417: mp_limb_t hi, lo;
418:
419: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
420: ASSERT (size >= 1);
421:
422: for (i = 0; i < size; i++)
423: {
424: mul (&hi, &lo, sp[i], multiplier);
425: ASSERT_NOCARRY (add (&hi, hi, add (&lo, lo, carry)));
426: rp[i] = lo;
427: carry = hi;
428: }
429:
430: return carry;
431: }
432:
433: mp_limb_t
434: refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
435: {
436: return refmpn_mul_1c (rp, sp, size, multiplier, 0);
437: }
438:
439:
440: #define AORSMUL_1C(operation_n) \
441: { \
442: mp_ptr p = refmpn_malloc_limbs (size); \
443: mp_limb_t ret; \
444: \
445: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
446: ASSERT (size >= 1); \
447: \
448: ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \
449: ret += operation_n (rp, rp, p, size); \
450: \
451: free (p); \
452: return ret; \
453: }
454:
455: mp_limb_t
456: refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
457: mp_limb_t multiplier, mp_limb_t carry)
458: {
459: AORSMUL_1C (refmpn_add_n);
460: }
461: mp_limb_t
462: refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
463: mp_limb_t multiplier, mp_limb_t carry)
464: {
465: AORSMUL_1C (refmpn_sub_n);
466: }
467:
468:
469: mp_limb_t
470: refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
471: {
472: return refmpn_addmul_1c (rp, sp, size, multiplier, 0);
473: }
474: mp_limb_t
475: refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
476: {
477: return refmpn_submul_1c (rp, sp, size, multiplier, 0);
478: }
479:
480:
481: mp_limb_t
482: refmpn_addsub_nc (mp_ptr r1p, mp_ptr r2p,
483: mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
484: mp_limb_t carry)
485: {
486: mp_ptr p;
487: mp_limb_t acy, scy;
488:
489: /* Destinations can't overlap. */
490: ASSERT (! MPN_OVERLAP_P (r1p, size, r2p, size));
491: ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
492: ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
493: ASSERT (size >= 1);
494:
495: p = refmpn_malloc_limbs (size);
496: acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
497: scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
498: refmpn_copyi (r1p, p, size);
499: free (p);
500: return 2 * acy + scy;
501: }
502:
503: mp_limb_t
504: refmpn_addsub_n (mp_ptr r1p, mp_ptr r2p,
505: mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
506: {
507: return refmpn_addsub_nc (r1p, r2p, s1p, s2p, size, 0);
508: }
509:
510:
511: /* Right shift hi,lo and return the low limb of the result.
512: Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
513: mp_limb_t
514: rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
515: {
516: ASSERT (shift >= 0 && shift < BITS_PER_MP_LIMB);
517: if (shift == 0)
518: return lo;
519: else
520: return (hi << (BITS_PER_MP_LIMB-shift)) | (lo >> shift);
521: }
522:
523: /* Left shift hi,lo and return the high limb of the result.
524: Note a shift by BITS_PER_MP_LIMB isn't assumed to work (doesn't on x86). */
525: mp_limb_t
526: lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
527: {
528: ASSERT (shift >= 0 && shift < BITS_PER_MP_LIMB);
529: if (shift == 0)
530: return hi;
531: else
532: return (hi << shift) | (lo >> (BITS_PER_MP_LIMB-shift));
533: }
534:
535:
536: mp_limb_t
537: refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
538: {
539: mp_limb_t ret;
540: mp_size_t i;
541:
542: ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
543: ASSERT (size >= 1);
544: ASSERT (shift >= 1 && shift < BITS_PER_MP_LIMB);
545:
546: ret = rshift_make (sp[0], 0, shift);
547:
548: for (i = 0; i < size-1; i++)
549: rp[i] = rshift_make (sp[i+1], sp[i], shift);
550:
551: rp[i] = rshift_make (0, sp[i], shift);
552: return ret;
553: }
554:
555: mp_limb_t
556: refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
557: {
558: mp_limb_t ret;
559: mp_size_t i;
560:
561: ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
562: ASSERT (size >= 1);
563: ASSERT (shift >= 1 && shift < BITS_PER_MP_LIMB);
564:
565: ret = lshift_make (0, sp[size-1], shift);
566:
567: for (i = size-2; i >= 0; i--)
568: rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
569:
570: rp[i+1] = lshift_make (sp[i+1], 0, shift);
571: return ret;
572: }
573:
574:
575: /* Divide h,l by d, producing a quotient *q and remainder *r.
576: Must have h < d.
577: __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
578: void
579: div1 (mp_limb_t *q, mp_limb_t *r, mp_limb_t h, mp_limb_t l, mp_limb_t d)
580: {
581: int n;
582:
583: ASSERT (d != 0);
584: ASSERT (h < d);
585:
586: #if 0
587: udiv_qrnnd (*q, *r, h, l, d);
588: return;
589: #endif
590:
591: for (n = 0; !(d & MP_LIMB_T_HIGHBIT); n++)
592: d <<= 1;
593:
594: h = lshift_make (h, l, n);
595: l <<= n;
596:
597: __udiv_qrnnd_c (*q, *r, h, l, d);
598: *r >>= n;
599: }
600:
601: mp_limb_t
602: refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
603: mp_limb_t carry)
604: {
605: mp_ptr sp_orig;
606: mp_ptr prod;
607: mp_limb_t carry_orig;
608: mp_size_t i;
609:
610: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
611: ASSERT (size >= 0);
612: ASSERT (carry < divisor);
613:
614: if (size == 0)
615: return carry;
616:
617: sp_orig = refmpn_memdup_limbs (sp, size);
618: prod = refmpn_malloc_limbs (size);
619: carry_orig = carry;
620:
621: for (i = size-1; i >= 0; i--)
622: div1 (&rp[i], &carry, carry, sp[i], divisor);
623:
624: /* check by multiplying back */
625: #if 0
626: printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
627: size, divisor, carry_orig, carry);
628: mpn_trace("s",sp_copy,size);
629: mpn_trace("r",rp,size);
630: printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
631: mpn_trace("p",prod,size);
632: #endif
633: ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
634: ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
635: free (sp_orig);
636: free (prod);
637:
638: return carry;
639: }
640:
641: mp_limb_t
642: refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
643: {
644: return refmpn_divmod_1c (rp, sp, size, divisor, 0);
645: }
646:
647:
648: mp_limb_t
649: refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
650: mp_limb_t carry)
651: {
652: mp_ptr p = refmpn_malloc_limbs (size);
653: carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
654: free (p);
655: return carry;
656: }
657:
658: mp_limb_t
659: refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
660: {
661: return refmpn_mod_1c (sp, size, divisor, 0);
662: }
663:
664: mp_limb_t
665: refmpn_mod_1_rshift (mp_srcptr sp, mp_size_t size,
666: unsigned shift, mp_limb_t divisor)
667: {
668: mp_limb_t r;
669: mp_ptr p = refmpn_malloc_limbs (size);
670: refmpn_rshift (p, sp, size, shift);
671: r = refmpn_mod_1 (p, size, divisor);
672: free (p);
673: return r;
674: }
675:
676:
677: mp_limb_t
678: refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
679: mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
680: mp_limb_t carry)
681: {
682: mp_ptr z;
683:
684: z = refmpn_malloc_limbs (xsize);
685: refmpn_fill (z, xsize, 0);
686:
687: carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
688: carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
689:
690: free (z);
691: return carry;
692: }
693:
694: mp_limb_t
695: refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
696: mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
697: {
698: return refmpn_divrem_1c (rp, xsize, sp, size, divisor, 0);
699: }
700:
701:
702: /* As given in the manual, the divexact method gives quotient q and return
703: value c satisfying
704:
705: c*b^n + a-i == 3*q
706:
707: where a=dividend, i=initial carry, b=2^BITS_PER_MP_LIMB, and n=size.
708:
709: If a-i is divisible by 3 then c==0 and a plain divmod gives the quotient.
710: If (a-i)%3==r then c is a high limb tacked on that will turn r into 0.
711: Because 2^BITS_PER_MP_LIMB==1mod3 (so long as BITS_PER_MP_LIMB is even)
712: it's enough to set c=3-r, ie. if r=1 then c=2, or if r=2 then c=1.
713:
714: If a-i produces a borrow then refmpn_sub_1 leaves a twos complement
715: negative, ie. b^n+a-i, and the calculation produces c1 satisfying
716:
717: c1*b^n + b^n+a-i == 3*q
718:
719: From which clearly c=c1+1, so it's enough to just add any borrow to the
720: return value otherwise calculated.
721:
722: A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
723: mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
724: or 1 respectively, so with 1 added the final return value is still in the
725: prescribed range 0 to 2. */
726:
727: mp_limb_t
728: refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
729: {
730: mp_ptr spcopy;
731: mp_limb_t c, cs;
732:
733: ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
734: ASSERT (size >= 1);
735: ASSERT (carry <= 2);
736:
737: spcopy = refmpn_memdup_limbs (sp, size);
738: cs = refmpn_sub_1 (spcopy, spcopy, size, carry);
739:
740: c = refmpn_divmod_1 (rp, spcopy, size, 3);
741: if (c != 0)
742: {
743: ASSERT ((BITS_PER_MP_LIMB % 2) == 0);
744: c = 3-c;
745: ASSERT_NOCARRY (refmpn_divmod_1c (rp, spcopy, size, 3, c));
746: }
747:
748: c += cs;
749: ASSERT (c <= 2);
750:
751: free (spcopy);
752: return c;
753: }
754:
755: mp_limb_t
756: refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
757: {
758: return refmpn_divexact_by3c (rp, sp, size, 0);
759: }
760:
761:
762: /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
763: void
764: refmpn_mul_basecase (mp_ptr prodp,
765: mp_srcptr up, mp_size_t usize,
766: mp_srcptr vp, mp_size_t vsize)
767: {
768: mp_size_t i;
769:
770: ASSERT (! MPN_OVERLAP_P (prodp, usize+vsize, up, usize));
771: ASSERT (! MPN_OVERLAP_P (prodp, usize+vsize, vp, vsize));
772: ASSERT (usize >= vsize);
773: ASSERT (vsize >= 1);
774:
775: prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
776: for (i = 1; i < vsize; i++)
777: prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
778: }
779:
780: void
781: refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
782: {
783: refmpn_mul_basecase (prodp, up, size, vp, size);
784: }
785:
786: void
787: refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
788: {
789: refmpn_mul_basecase (dst, src, size, src, size);
790: }
791:
792:
793: mp_limb_t
794: refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
795: {
796: mp_limb_t x;
797: int twos;
798:
799: ASSERT (y != 0);
800: ASSERT (! refmpn_zero_p (xp, xsize));
801:
802: x = mpn_mod_1 (xp, xsize, y);
803: if (x == 0)
804: return y;
805:
806: twos = 0;
807: while ((x & 1) == 0 && (y & 1) == 0)
808: {
809: x >>= 1;
810: y >>= 1;
811: twos++;
812: }
813:
814: for (;;)
815: {
816: while ((x & 1) == 0) x >>= 1;
817: while ((y & 1) == 0) y >>= 1;
818:
819: if (x < y)
820: MP_LIMB_T_SWAP (x, y);
821:
822: x -= y;
823: if (x == 0)
824: break;
825: }
826:
827: return y << twos;
828: }
829:
830:
831: unsigned
832: refmpn_count_trailing_zeros (mp_limb_t x)
833: {
834: unsigned n = 0;
835:
836: ASSERT (x != 0);
837: while ((x & 1) == 0)
838: {
839: x >>= 1;
840: n++;
841: }
842: return n;
843: }
844:
845: mp_size_t
846: refmpn_strip_twos (mp_ptr p, mp_size_t size)
847: {
848: mp_size_t limbs;
849: unsigned shift;
850:
851: ASSERT (size >= 1);
852: ASSERT (! refmpn_zero_p (p, size));
853:
854: for (limbs = 0; p[0] == 0; limbs++)
855: {
856: MPN_COPY_INCR (p, p+1, size-1);
857: p[size-1] = 0;
858: }
859:
860: shift = refmpn_count_trailing_zeros (p[0]);
861: if (shift)
862: refmpn_rshift (p, p, size, shift);
863:
864: return limbs*BITS_PER_MP_LIMB + shift;
865: }
866:
867: mp_limb_t
868: refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
869: {
870: int cmp;
871:
872: ASSERT (ysize >= 1);
873: ASSERT (xsize >= ysize);
874: ASSERT ((xp[0] & 1) != 0);
875: ASSERT ((yp[0] & 1) != 0);
876: ASSERT (xp[xsize-1] != 0);
877: ASSERT (yp[ysize-1] != 0);
878: ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
879: ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
880: ASSERT (! MPN_OVERLAP_P (xp, xsize, yp, ysize));
881: if (xsize == ysize)
882: ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
883:
884: refmpn_strip_twos (xp, xsize);
885: MPN_NORMALIZE (xp, xsize);
886: MPN_NORMALIZE (yp, ysize);
887:
888: for (;;)
889: {
890: cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
891: if (cmp == 0)
892: break;
893: if (cmp < 0)
894: MPN_PTR_SWAP (xp,xsize, yp,ysize);
895:
896: ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
897:
898: refmpn_strip_twos (xp, xsize);
899: MPN_NORMALIZE (xp, xsize);
900: }
901:
902: refmpn_copyi (gp, xp, xsize);
903: return xsize;
904: }
905:
906:
907: unsigned long
908: refmpn_popcount (mp_srcptr sp, mp_size_t size)
909: {
910: unsigned long count = 0;
911: mp_size_t i;
912: int j;
913: mp_limb_t l;
914:
915: ASSERT (size >= 0);
916: for (i = 0; i < size; i++)
917: {
918: l = sp[i];
919: for (j = 0; j < BITS_PER_MP_LIMB; j++)
920: {
921: count += (l & 1);
922: l >>= 1;
923: }
924: }
925: return count;
926: }
927:
928: unsigned long
929: refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
930: {
931: mp_ptr d;
932: unsigned long count;
933:
934: if (size == 0)
935: return 0;
936:
937: d = refmpn_malloc_limbs (size);
938: refmpn_xor_n (d, s1p, s2p, size);
939: count = refmpn_popcount (d, size);
940: free (d);
941: return count;
942: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>