[BACK]Return to mode1o.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Annotation of OpenXM_contrib/gmp/mpn/generic/mode1o.c, Revision 1.1

1.1     ! ohara       1: /* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.
        !             2:
        !             3:    THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
        !             4:    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
        !             5:    FUTURE GNU MP RELEASES.
        !             6:
        !             7: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
        !             8:
        !             9: This file is part of the GNU MP Library.
        !            10:
        !            11: The GNU MP Library is free software; you can redistribute it and/or modify
        !            12: it under the terms of the GNU Lesser General Public License as published by
        !            13: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            14: option) any later version.
        !            15:
        !            16: The GNU MP Library is distributed in the hope that it will be useful, but
        !            17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            18: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            19: License for more details.
        !            20:
        !            21: You should have received a copy of the GNU Lesser General Public License
        !            22: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            24: MA 02111-1307, USA.
        !            25: */
        !            26:
        !            27: #include "gmp.h"
        !            28: #include "gmp-impl.h"
        !            29: #include "longlong.h"
        !            30:
        !            31:
        !            32: /* Calculate an r satisfying
        !            33:
        !            34:            r*b^k + a - c == q*d
        !            35:
        !            36:    where b=2^BITS_PER_MP_LIMB, a is {src,size}, k is either size or size-1
        !            37:    (the caller won't know which), and q is the quotient (discarded).  d must
        !            38:    be odd, c can be any limb value.
        !            39:
        !            40:    If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.
        !            41:
        !            42:    This slightly strange function suits the initial Nx1 reduction for GCDs
        !            43:    or Jacobi symbols since the factors of 2 in b^k can be ignored, leaving
        !            44:    -r == a mod d (by passing c=0).  For a GCD the factor of -1 on r can be
        !            45:    ignored, or for the Jacobi symbol it can be accounted for.  The function
        !            46:    also suits divisibility and congruence testing since if r=0 (or r=d) is
        !            47:    obtained then a==c mod d.
        !            48:
        !            49:
        !            50:    r is a bit like the remainder returned by mpn_divexact_by3c, and is the
        !            51:    sort of remainder a hypothetical mpn_divexact_1 might return.  Like
        !            52:    mpn_divexact_by3c, r represents a borrow, since effectively quotient
        !            53:    limbs are chosen so that subtracting that multiple of d from src at each
        !            54:    step will produce a zero limb.
        !            55:
        !            56:    A long calculation can be done piece by piece from low to high by passing
        !            57:    the return value from one part as the carry parameter to the next part.
        !            58:    The effective final k becomes anything between size and size-n, if n
        !            59:    pieces are used.
        !            60:
        !            61:
        !            62:    A similar sort of routine could be constructed based on adding multiples
        !            63:    of d at each limb, much like redc in mpz_powm does.  Subtracting however
        !            64:    has a small advantage that when subtracting to cancel out l there's never
        !            65:    a borrow into h, whereas using an addition would put a carry into h
        !            66:    depending whether l==0 or l!=0.
        !            67:
        !            68:
        !            69:    In terms of efficiency, this function is similar to a mul-by-inverse
        !            70:    mpn_mod_1.  Both are essentially two multiplies and are best suited to
        !            71:    CPUs with low latency multipliers (in comparison to a divide instruction
        !            72:    at least.)  But modexact has a few less supplementary operations, only
        !            73:    needs low part and high part multiplies, and has fewer working quantities
        !            74:    (helping CPUs with few registers).
        !            75:
        !            76:
        !            77:    In the main loop it will be noted that the new carry (call it r) is the
        !            78:    sum of the high product h and any borrow from l=s-c.  If c<d then we will
        !            79:    have r<d too, for the following reasons.  Let q=l*inverse be the quotient
        !            80:    limb, so that q*d = b*h + l.  Now if h=d-1 then
        !            81:
        !            82:        l = q*d - b*(d-1) <= (b-1)*d - b*(d-1) = b-d
        !            83:
        !            84:    But if l=s-c produces a borrow when c<d, then l>=b-d+1 and hence will
        !            85:    never have h=d-1 and so r=h+borrow <= d-1.
        !            86:
        !            87:    When c>=d, on the other hand, h=d-1 can certainly occur together with a
        !            88:    borrow, thereby giving only r<=d, as per the function definition above.
        !            89:
        !            90:    As a design decision it's left to the caller to check for r=d if it might
        !            91:    be passing c>=d.  Several applications have c<d initially so the extra
        !            92:    test is often unnecessary, for example the GCDs or a plain divisibility
        !            93:    d|a test will pass c=0.
        !            94:
        !            95:
        !            96:    The special case for size==1 is so that it can be assumed c<=d in the
        !            97:    high<=divisor test at the end.  c<=d is only guaranteed after at least
        !            98:    one iteration of the main loop.  There's also a decent chance one % is
        !            99:    faster than a modlimb_invert, though that will depend on the processor.
        !           100:
        !           101:    A CPU specific implementation might want to omit the size==1 code or the
        !           102:    high<divisor test.  mpn/x86/k6/mode1o.asm for instance finds neither
        !           103:    useful.  */
        !           104:
        !           105:
        !           106: mp_limb_t
        !           107: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t c)
        !           108: {
        !           109:   mp_limb_t  s, h, l, inverse, dummy, dmul;
        !           110:   mp_size_t  i;
        !           111:
        !           112:   ASSERT (size >= 1);
        !           113:   ASSERT (d & 1);
        !           114:   ASSERT_MPN (src, size);
        !           115:   ASSERT_LIMB (d);
        !           116:   ASSERT_LIMB (c);
        !           117:
        !           118:   if (size == 1)
        !           119:     {
        !           120:       s = src[0];
        !           121:       if (s > c)
        !           122:        {
        !           123:          l = s-c;
        !           124:          h = l % d;
        !           125:          if (h != 0)
        !           126:            h = d - h;
        !           127:        }
        !           128:       else
        !           129:        {
        !           130:          l = c-s;
        !           131:          h = l % d;
        !           132:        }
        !           133:       return h;
        !           134:     }
        !           135:
        !           136:
        !           137:   modlimb_invert (inverse, d);
        !           138:   dmul = d << GMP_NAIL_BITS;
        !           139:
        !           140:   i = 0;
        !           141:   do
        !           142:     {
        !           143:       s = src[i];
        !           144:       SUBC_LIMB (c, l, s, c);
        !           145:       l = (l * inverse) & GMP_NUMB_MASK;
        !           146:       umul_ppmm (h, dummy, l, dmul);
        !           147:       c += h;
        !           148:     }
        !           149:   while (++i < size-1);
        !           150:
        !           151:
        !           152:   s = src[i];
        !           153:   if (s <= d)
        !           154:     {
        !           155:       /* With high<=d the final step can be a subtract and addback.  If c==0
        !           156:         then the addback will restore to l>=0.  If c==d then will get l==d
        !           157:         if s==0, but that's ok per the function definition.  */
        !           158:
        !           159:       l = c - s;
        !           160:       if (l > c)
        !           161:        l += d;
        !           162:
        !           163:       ASSERT (l <= d);
        !           164:       return l;
        !           165:     }
        !           166:   else
        !           167:     {
        !           168:       /* Can't skip a divide, just do the loop code once more. */
        !           169:
        !           170:       SUBC_LIMB (c, l, s, c);
        !           171:       l = (l * inverse) & GMP_NUMB_MASK;
        !           172:       umul_ppmm (h, dummy, l, dmul);
        !           173:       c += h;
        !           174:
        !           175:       ASSERT (c <= d);
        !           176:       return c;
        !           177:     }
        !           178: }
        !           179:
        !           180:
        !           181:
        !           182: #if 0
        !           183:
        !           184: /* The following is an alternate form that might shave one cycle on a
        !           185:    superscalar processor since it takes c+=h off the dependent chain,
        !           186:    leaving just a low product, high product, and a subtract.
        !           187:
        !           188:    This is for CPU specific implementations to consider.  A special case for
        !           189:    high<divisor and/or size==1 can be added if desired.
        !           190:
        !           191:    Notice that c is only ever 0 or 1, since if s-c produces a borrow then
        !           192:    x=0xFF..FF and x-h cannot produce a borrow.  The c=(x>s) could become
        !           193:    c=(x==0xFF..FF) too, if that helped.  */
        !           194:
        !           195: mp_limb_t
        !           196: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
        !           197: {
        !           198:   mp_limb_t  s, x, y, inverse, dummy, dmul, c1, c2;
        !           199:   mp_limb_t  c = 0;
        !           200:   mp_size_t  i;
        !           201:
        !           202:   ASSERT (size >= 1);
        !           203:   ASSERT (d & 1);
        !           204:
        !           205:   modlimb_invert (inverse, d);
        !           206:   dmul = d << GMP_NAIL_BITS;
        !           207:
        !           208:   for (i = 0; i < size; i++)
        !           209:     {
        !           210:       ASSERT (c==0 || c==1);
        !           211:
        !           212:       s = src[i];
        !           213:       SUBC_LIMB (c1, x, s, c);
        !           214:
        !           215:       SUBC_LIMB (c2, y, x, h);
        !           216:       c = c1 + c2;
        !           217:
        !           218:       y = (y * inverse) & GMP_NUMB_MASK;
        !           219:       umul_ppmm (h, dummy, y, dmul);
        !           220:     }
        !           221:
        !           222:   h += c;
        !           223:   return h;
        !           224: }
        !           225:
        !           226: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>