OpenXM_contrib/gmp/mpn/alpha/ev5/mode1o.c - annotate

Return to mode1o.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / alpha / ev5
Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/mode1o.c, Revision 1.1.1.1

1.1       ohara       1: /* Alpha EV5 mpn_modexact_1c_odd -- mpn by limb exact style remainder.
                      2:
                      3:         cycles/limb
                      4:    EV5:    30.0
                      5:    EV6:    15.0
                      6: */
                      7:
                      8: /*
                      9: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
                     10:
                     11: This file is part of the GNU MP Library.
                     12:
                     13: The GNU MP Library is free software; you can redistribute it and/or modify
                     14: it under the terms of the GNU Lesser General Public License as published by
                     15: the Free Software Foundation; either version 2.1 of the License, or (at your
                     16: option) any later version.
                     17:
                     18: The GNU MP Library is distributed in the hope that it will be useful, but
                     19: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     20: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     21: License for more details.
                     22:
                     23: You should have received a copy of the GNU Lesser General Public License
                     24: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     25: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     26: MA 02111-1307, USA.
                     27: */
                     28:
                     29: #include "gmp.h"
                     30: #include "gmp-impl.h"
                     31: #include "longlong.h"
                     32:
                     33:
                     34: /* modlimb_invert is already faster than invert_limb or a "%", so the
                     35:    modexact style can be used even at size==1.
                     36:
                     37:    The dependent chain is a subtract (1), mul1 (13) and umulh (15), which
                     38:    would suggest 29 is a lower bound, or maybe the measured 30 is already as
                     39:    good as possible, not sure.
                     40:
                     41:    For reference, ev6 runs this code at 15 cycles, which is 1 faster than
                     42:    the generic loop at 16.  But maybe something better is possible.  */
                     43:
                     44: mp_limb_t
                     45: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
                     46: {
                     47:   mp_limb_t  s, x, y, inverse, dummy;
                     48:   mp_limb_t  c = 0;
                     49:   mp_size_t  i;
                     50:
                     51:   ASSERT (size >= 1);
                     52:   ASSERT (d & 1);
                     53:
                     54:   modlimb_invert (inverse, d);
                     55:
                     56:   i = 0;
                     57:   if (size == 1)
                     58:     {
                     59:       s = src[0];
                     60:       goto last_step;
                     61:     }
                     62:
                     63:   do
                     64:     {
                     65:       s = src[i];
                     66:       x = s - c;
                     67:       c = (x > s);
                     68:
                     69:       y = x - h;
                     70:       c += (y > x);
                     71:       y *= inverse;
                     72:       umul_ppmm (h, dummy, y, d);
                     73:     }
                     74:   while (++i < size-1);
                     75:
                     76:
                     77:   s = src[i];
                     78:   if (s <= d)
                     79:     {
                     80:       /* With high<=d the final step can be a subtract and addback.  If
                     81:         c+h==0 then the addback will restore to l>=0.  If c+h==d then will
                     82:         get x==d if s==0, but that's ok per the function definition.  */
                     83:
                     84:       c += h;
                     85:
                     86:       x = c - s;
                     87:       if (x > c)
                     88:        x += d;
                     89:
                     90:       ASSERT (x <= d);
                     91:       return x;
                     92:     }
                     93:   else
                     94:     {
                     95:       /* Can't skip a divide, just do the loop code once more. */
                     96:     last_step:
                     97:       x = s - c;
                     98:       c = (x > s);
                     99:
                    100:       y = x - h;
                    101:       c += (y > x);
                    102:
                    103:       y *= inverse;
                    104:       umul_ppmm (h, dummy, y, d);
                    105:
                    106:       c += h;
                    107:
                    108:       ASSERT (c <= d);
                    109:       return c;
                    110:     }
                    111: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>