Annotation of OpenXM_contrib/gmp/mpn/generic/mode1o.c, Revision 1.1
1.1 ! ohara 1: /* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.
! 2:
! 3: THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
! 4: CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
! 5: FUTURE GNU MP RELEASES.
! 6:
! 7: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
! 8:
! 9: This file is part of the GNU MP Library.
! 10:
! 11: The GNU MP Library is free software; you can redistribute it and/or modify
! 12: it under the terms of the GNU Lesser General Public License as published by
! 13: the Free Software Foundation; either version 2.1 of the License, or (at your
! 14: option) any later version.
! 15:
! 16: The GNU MP Library is distributed in the hope that it will be useful, but
! 17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 18: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 19: License for more details.
! 20:
! 21: You should have received a copy of the GNU Lesser General Public License
! 22: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 24: MA 02111-1307, USA.
! 25: */
! 26:
! 27: #include "gmp.h"
! 28: #include "gmp-impl.h"
! 29: #include "longlong.h"
! 30:
! 31:
! 32: /* Calculate an r satisfying
! 33:
! 34: r*b^k + a - c == q*d
! 35:
! 36: where b=2^BITS_PER_MP_LIMB, a is {src,size}, k is either size or size-1
! 37: (the caller won't know which), and q is the quotient (discarded). d must
! 38: be odd, c can be any limb value.
! 39:
! 40: If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.
! 41:
! 42: This slightly strange function suits the initial Nx1 reduction for GCDs
! 43: or Jacobi symbols since the factors of 2 in b^k can be ignored, leaving
! 44: -r == a mod d (by passing c=0). For a GCD the factor of -1 on r can be
! 45: ignored, or for the Jacobi symbol it can be accounted for. The function
! 46: also suits divisibility and congruence testing since if r=0 (or r=d) is
! 47: obtained then a==c mod d.
! 48:
! 49:
! 50: r is a bit like the remainder returned by mpn_divexact_by3c, and is the
! 51: sort of remainder a hypothetical mpn_divexact_1 might return. Like
! 52: mpn_divexact_by3c, r represents a borrow, since effectively quotient
! 53: limbs are chosen so that subtracting that multiple of d from src at each
! 54: step will produce a zero limb.
! 55:
! 56: A long calculation can be done piece by piece from low to high by passing
! 57: the return value from one part as the carry parameter to the next part.
! 58: The effective final k becomes anything between size and size-n, if n
! 59: pieces are used.
! 60:
! 61:
! 62: A similar sort of routine could be constructed based on adding multiples
! 63: of d at each limb, much like redc in mpz_powm does. Subtracting however
! 64: has a small advantage that when subtracting to cancel out l there's never
! 65: a borrow into h, whereas using an addition would put a carry into h
! 66: depending whether l==0 or l!=0.
! 67:
! 68:
! 69: In terms of efficiency, this function is similar to a mul-by-inverse
! 70: mpn_mod_1. Both are essentially two multiplies and are best suited to
! 71: CPUs with low latency multipliers (in comparison to a divide instruction
! 72: at least.) But modexact has a few less supplementary operations, only
! 73: needs low part and high part multiplies, and has fewer working quantities
! 74: (helping CPUs with few registers).
! 75:
! 76:
! 77: In the main loop it will be noted that the new carry (call it r) is the
! 78: sum of the high product h and any borrow from l=s-c. If c<d then we will
! 79: have r<d too, for the following reasons. Let q=l*inverse be the quotient
! 80: limb, so that q*d = b*h + l. Now if h=d-1 then
! 81:
! 82: l = q*d - b*(d-1) <= (b-1)*d - b*(d-1) = b-d
! 83:
! 84: But if l=s-c produces a borrow when c<d, then l>=b-d+1 and hence will
! 85: never have h=d-1 and so r=h+borrow <= d-1.
! 86:
! 87: When c>=d, on the other hand, h=d-1 can certainly occur together with a
! 88: borrow, thereby giving only r<=d, as per the function definition above.
! 89:
! 90: As a design decision it's left to the caller to check for r=d if it might
! 91: be passing c>=d. Several applications have c<d initially so the extra
! 92: test is often unnecessary, for example the GCDs or a plain divisibility
! 93: d|a test will pass c=0.
! 94:
! 95:
! 96: The special case for size==1 is so that it can be assumed c<=d in the
! 97: high<=divisor test at the end. c<=d is only guaranteed after at least
! 98: one iteration of the main loop. There's also a decent chance one % is
! 99: faster than a modlimb_invert, though that will depend on the processor.
! 100:
! 101: A CPU specific implementation might want to omit the size==1 code or the
! 102: high<divisor test. mpn/x86/k6/mode1o.asm for instance finds neither
! 103: useful. */
! 104:
! 105:
! 106: mp_limb_t
! 107: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t c)
! 108: {
! 109: mp_limb_t s, h, l, inverse, dummy, dmul;
! 110: mp_size_t i;
! 111:
! 112: ASSERT (size >= 1);
! 113: ASSERT (d & 1);
! 114: ASSERT_MPN (src, size);
! 115: ASSERT_LIMB (d);
! 116: ASSERT_LIMB (c);
! 117:
! 118: if (size == 1)
! 119: {
! 120: s = src[0];
! 121: if (s > c)
! 122: {
! 123: l = s-c;
! 124: h = l % d;
! 125: if (h != 0)
! 126: h = d - h;
! 127: }
! 128: else
! 129: {
! 130: l = c-s;
! 131: h = l % d;
! 132: }
! 133: return h;
! 134: }
! 135:
! 136:
! 137: modlimb_invert (inverse, d);
! 138: dmul = d << GMP_NAIL_BITS;
! 139:
! 140: i = 0;
! 141: do
! 142: {
! 143: s = src[i];
! 144: SUBC_LIMB (c, l, s, c);
! 145: l = (l * inverse) & GMP_NUMB_MASK;
! 146: umul_ppmm (h, dummy, l, dmul);
! 147: c += h;
! 148: }
! 149: while (++i < size-1);
! 150:
! 151:
! 152: s = src[i];
! 153: if (s <= d)
! 154: {
! 155: /* With high<=d the final step can be a subtract and addback. If c==0
! 156: then the addback will restore to l>=0. If c==d then will get l==d
! 157: if s==0, but that's ok per the function definition. */
! 158:
! 159: l = c - s;
! 160: if (l > c)
! 161: l += d;
! 162:
! 163: ASSERT (l <= d);
! 164: return l;
! 165: }
! 166: else
! 167: {
! 168: /* Can't skip a divide, just do the loop code once more. */
! 169:
! 170: SUBC_LIMB (c, l, s, c);
! 171: l = (l * inverse) & GMP_NUMB_MASK;
! 172: umul_ppmm (h, dummy, l, dmul);
! 173: c += h;
! 174:
! 175: ASSERT (c <= d);
! 176: return c;
! 177: }
! 178: }
! 179:
! 180:
! 181:
! 182: #if 0
! 183:
! 184: /* The following is an alternate form that might shave one cycle on a
! 185: superscalar processor since it takes c+=h off the dependent chain,
! 186: leaving just a low product, high product, and a subtract.
! 187:
! 188: This is for CPU specific implementations to consider. A special case for
! 189: high<divisor and/or size==1 can be added if desired.
! 190:
! 191: Notice that c is only ever 0 or 1, since if s-c produces a borrow then
! 192: x=0xFF..FF and x-h cannot produce a borrow. The c=(x>s) could become
! 193: c=(x==0xFF..FF) too, if that helped. */
! 194:
! 195: mp_limb_t
! 196: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
! 197: {
! 198: mp_limb_t s, x, y, inverse, dummy, dmul, c1, c2;
! 199: mp_limb_t c = 0;
! 200: mp_size_t i;
! 201:
! 202: ASSERT (size >= 1);
! 203: ASSERT (d & 1);
! 204:
! 205: modlimb_invert (inverse, d);
! 206: dmul = d << GMP_NAIL_BITS;
! 207:
! 208: for (i = 0; i < size; i++)
! 209: {
! 210: ASSERT (c==0 || c==1);
! 211:
! 212: s = src[i];
! 213: SUBC_LIMB (c1, x, s, c);
! 214:
! 215: SUBC_LIMB (c2, y, x, h);
! 216: c = c1 + c2;
! 217:
! 218: y = (y * inverse) & GMP_NUMB_MASK;
! 219: umul_ppmm (h, dummy, y, dmul);
! 220: }
! 221:
! 222: h += c;
! 223: return h;
! 224: }
! 225:
! 226: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>