OpenXM_contrib/gmp/mpn/generic/mode1o.c - annotate

Return to mode1o.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / generic
Annotation of OpenXM_contrib/gmp/mpn/generic/mode1o.c, Revision 1.1.1.1

1.1       ohara       1: /* mpn_modexact_1c_odd -- mpn by limb exact division style remainder.
                      2:
                      3:    THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
                      4:    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
                      5:    FUTURE GNU MP RELEASES.
                      6:
                      7: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
                      8:
                      9: This file is part of the GNU MP Library.
                     10:
                     11: The GNU MP Library is free software; you can redistribute it and/or modify
                     12: it under the terms of the GNU Lesser General Public License as published by
                     13: the Free Software Foundation; either version 2.1 of the License, or (at your
                     14: option) any later version.
                     15:
                     16: The GNU MP Library is distributed in the hope that it will be useful, but
                     17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     18: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     19: License for more details.
                     20:
                     21: You should have received a copy of the GNU Lesser General Public License
                     22: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     24: MA 02111-1307, USA.
                     25: */
                     26:
                     27: #include "gmp.h"
                     28: #include "gmp-impl.h"
                     29: #include "longlong.h"
                     30:
                     31:
                     32: /* Calculate an r satisfying
                     33:
                     34:            r*b^k + a - c == q*d
                     35:
                     36:    where b=2^BITS_PER_MP_LIMB, a is {src,size}, k is either size or size-1
                     37:    (the caller won't know which), and q is the quotient (discarded).  d must
                     38:    be odd, c can be any limb value.
                     39:
                     40:    If c<d then r will be in the range 0<=r<d, or if c>=d then 0<=r<=d.
                     41:
                     42:    This slightly strange function suits the initial Nx1 reduction for GCDs
                     43:    or Jacobi symbols since the factors of 2 in b^k can be ignored, leaving
                     44:    -r == a mod d (by passing c=0).  For a GCD the factor of -1 on r can be
                     45:    ignored, or for the Jacobi symbol it can be accounted for.  The function
                     46:    also suits divisibility and congruence testing since if r=0 (or r=d) is
                     47:    obtained then a==c mod d.
                     48:
                     49:
                     50:    r is a bit like the remainder returned by mpn_divexact_by3c, and is the
                     51:    sort of remainder a hypothetical mpn_divexact_1 might return.  Like
                     52:    mpn_divexact_by3c, r represents a borrow, since effectively quotient
                     53:    limbs are chosen so that subtracting that multiple of d from src at each
                     54:    step will produce a zero limb.
                     55:
                     56:    A long calculation can be done piece by piece from low to high by passing
                     57:    the return value from one part as the carry parameter to the next part.
                     58:    The effective final k becomes anything between size and size-n, if n
                     59:    pieces are used.
                     60:
                     61:
                     62:    A similar sort of routine could be constructed based on adding multiples
                     63:    of d at each limb, much like redc in mpz_powm does.  Subtracting however
                     64:    has a small advantage that when subtracting to cancel out l there's never
                     65:    a borrow into h, whereas using an addition would put a carry into h
                     66:    depending whether l==0 or l!=0.
                     67:
                     68:
                     69:    In terms of efficiency, this function is similar to a mul-by-inverse
                     70:    mpn_mod_1.  Both are essentially two multiplies and are best suited to
                     71:    CPUs with low latency multipliers (in comparison to a divide instruction
                     72:    at least.)  But modexact has a few less supplementary operations, only
                     73:    needs low part and high part multiplies, and has fewer working quantities
                     74:    (helping CPUs with few registers).
                     75:
                     76:
                     77:    In the main loop it will be noted that the new carry (call it r) is the
                     78:    sum of the high product h and any borrow from l=s-c.  If c<d then we will
                     79:    have r<d too, for the following reasons.  Let q=l*inverse be the quotient
                     80:    limb, so that q*d = b*h + l.  Now if h=d-1 then
                     81:
                     82:        l = q*d - b*(d-1) <= (b-1)*d - b*(d-1) = b-d
                     83:
                     84:    But if l=s-c produces a borrow when c<d, then l>=b-d+1 and hence will
                     85:    never have h=d-1 and so r=h+borrow <= d-1.
                     86:
                     87:    When c>=d, on the other hand, h=d-1 can certainly occur together with a
                     88:    borrow, thereby giving only r<=d, as per the function definition above.
                     89:
                     90:    As a design decision it's left to the caller to check for r=d if it might
                     91:    be passing c>=d.  Several applications have c<d initially so the extra
                     92:    test is often unnecessary, for example the GCDs or a plain divisibility
                     93:    d|a test will pass c=0.
                     94:
                     95:
                     96:    The special case for size==1 is so that it can be assumed c<=d in the
                     97:    high<=divisor test at the end.  c<=d is only guaranteed after at least
                     98:    one iteration of the main loop.  There's also a decent chance one % is
                     99:    faster than a modlimb_invert, though that will depend on the processor.
                    100:
                    101:    A CPU specific implementation might want to omit the size==1 code or the
                    102:    high<divisor test.  mpn/x86/k6/mode1o.asm for instance finds neither
                    103:    useful.  */
                    104:
                    105:
                    106: mp_limb_t
                    107: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t c)
                    108: {
                    109:   mp_limb_t  s, h, l, inverse, dummy, dmul;
                    110:   mp_size_t  i;
                    111:
                    112:   ASSERT (size >= 1);
                    113:   ASSERT (d & 1);
                    114:   ASSERT_MPN (src, size);
                    115:   ASSERT_LIMB (d);
                    116:   ASSERT_LIMB (c);
                    117:
                    118:   if (size == 1)
                    119:     {
                    120:       s = src[0];
                    121:       if (s > c)
                    122:        {
                    123:          l = s-c;
                    124:          h = l % d;
                    125:          if (h != 0)
                    126:            h = d - h;
                    127:        }
                    128:       else
                    129:        {
                    130:          l = c-s;
                    131:          h = l % d;
                    132:        }
                    133:       return h;
                    134:     }
                    135:
                    136:
                    137:   modlimb_invert (inverse, d);
                    138:   dmul = d << GMP_NAIL_BITS;
                    139:
                    140:   i = 0;
                    141:   do
                    142:     {
                    143:       s = src[i];
                    144:       SUBC_LIMB (c, l, s, c);
                    145:       l = (l * inverse) & GMP_NUMB_MASK;
                    146:       umul_ppmm (h, dummy, l, dmul);
                    147:       c += h;
                    148:     }
                    149:   while (++i < size-1);
                    150:
                    151:
                    152:   s = src[i];
                    153:   if (s <= d)
                    154:     {
                    155:       /* With high<=d the final step can be a subtract and addback.  If c==0
                    156:         then the addback will restore to l>=0.  If c==d then will get l==d
                    157:         if s==0, but that's ok per the function definition.  */
                    158:
                    159:       l = c - s;
                    160:       if (l > c)
                    161:        l += d;
                    162:
                    163:       ASSERT (l <= d);
                    164:       return l;
                    165:     }
                    166:   else
                    167:     {
                    168:       /* Can't skip a divide, just do the loop code once more. */
                    169:
                    170:       SUBC_LIMB (c, l, s, c);
                    171:       l = (l * inverse) & GMP_NUMB_MASK;
                    172:       umul_ppmm (h, dummy, l, dmul);
                    173:       c += h;
                    174:
                    175:       ASSERT (c <= d);
                    176:       return c;
                    177:     }
                    178: }
                    179:
                    180:
                    181:
                    182: #if 0
                    183:
                    184: /* The following is an alternate form that might shave one cycle on a
                    185:    superscalar processor since it takes c+=h off the dependent chain,
                    186:    leaving just a low product, high product, and a subtract.
                    187:
                    188:    This is for CPU specific implementations to consider.  A special case for
                    189:    high<divisor and/or size==1 can be added if desired.
                    190:
                    191:    Notice that c is only ever 0 or 1, since if s-c produces a borrow then
                    192:    x=0xFF..FF and x-h cannot produce a borrow.  The c=(x>s) could become
                    193:    c=(x==0xFF..FF) too, if that helped.  */
                    194:
                    195: mp_limb_t
                    196: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
                    197: {
                    198:   mp_limb_t  s, x, y, inverse, dummy, dmul, c1, c2;
                    199:   mp_limb_t  c = 0;
                    200:   mp_size_t  i;
                    201:
                    202:   ASSERT (size >= 1);
                    203:   ASSERT (d & 1);
                    204:
                    205:   modlimb_invert (inverse, d);
                    206:   dmul = d << GMP_NAIL_BITS;
                    207:
                    208:   for (i = 0; i < size; i++)
                    209:     {
                    210:       ASSERT (c==0 || c==1);
                    211:
                    212:       s = src[i];
                    213:       SUBC_LIMB (c1, x, s, c);
                    214:
                    215:       SUBC_LIMB (c2, y, x, h);
                    216:       c = c1 + c2;
                    217:
                    218:       y = (y * inverse) & GMP_NUMB_MASK;
                    219:       umul_ppmm (h, dummy, y, dmul);
                    220:     }
                    221:
                    222:   h += c;
                    223:   return h;
                    224: }
                    225:
                    226: #endif
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>