OpenXM_contrib/gmp/mpn/alpha/ev5/mode1o.c - annotate

Return to mode1o.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / alpha / ev5
Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/mode1o.c, Revision 1.1

1.1     ! ohara       1: /* Alpha EV5 mpn_modexact_1c_odd -- mpn by limb exact style remainder.
        !             2:
        !             3:         cycles/limb
        !             4:    EV5:    30.0
        !             5:    EV6:    15.0
        !             6: */
        !             7:
        !             8: /*
        !             9: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
        !            10:
        !            11: This file is part of the GNU MP Library.
        !            12:
        !            13: The GNU MP Library is free software; you can redistribute it and/or modify
        !            14: it under the terms of the GNU Lesser General Public License as published by
        !            15: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            16: option) any later version.
        !            17:
        !            18: The GNU MP Library is distributed in the hope that it will be useful, but
        !            19: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            20: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            21: License for more details.
        !            22:
        !            23: You should have received a copy of the GNU Lesser General Public License
        !            24: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            25: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            26: MA 02111-1307, USA.
        !            27: */
        !            28:
        !            29: #include "gmp.h"
        !            30: #include "gmp-impl.h"
        !            31: #include "longlong.h"
        !            32:
        !            33:
        !            34: /* modlimb_invert is already faster than invert_limb or a "%", so the
        !            35:    modexact style can be used even at size==1.
        !            36:
        !            37:    The dependent chain is a subtract (1), mul1 (13) and umulh (15), which
        !            38:    would suggest 29 is a lower bound, or maybe the measured 30 is already as
        !            39:    good as possible, not sure.
        !            40:
        !            41:    For reference, ev6 runs this code at 15 cycles, which is 1 faster than
        !            42:    the generic loop at 16.  But maybe something better is possible.  */
        !            43:
        !            44: mp_limb_t
        !            45: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
        !            46: {
        !            47:   mp_limb_t  s, x, y, inverse, dummy;
        !            48:   mp_limb_t  c = 0;
        !            49:   mp_size_t  i;
        !            50:
        !            51:   ASSERT (size >= 1);
        !            52:   ASSERT (d & 1);
        !            53:
        !            54:   modlimb_invert (inverse, d);
        !            55:
        !            56:   i = 0;
        !            57:   if (size == 1)
        !            58:     {
        !            59:       s = src[0];
        !            60:       goto last_step;
        !            61:     }
        !            62:
        !            63:   do
        !            64:     {
        !            65:       s = src[i];
        !            66:       x = s - c;
        !            67:       c = (x > s);
        !            68:
        !            69:       y = x - h;
        !            70:       c += (y > x);
        !            71:       y *= inverse;
        !            72:       umul_ppmm (h, dummy, y, d);
        !            73:     }
        !            74:   while (++i < size-1);
        !            75:
        !            76:
        !            77:   s = src[i];
        !            78:   if (s <= d)
        !            79:     {
        !            80:       /* With high<=d the final step can be a subtract and addback.  If
        !            81:         c+h==0 then the addback will restore to l>=0.  If c+h==d then will
        !            82:         get x==d if s==0, but that's ok per the function definition.  */
        !            83:
        !            84:       c += h;
        !            85:
        !            86:       x = c - s;
        !            87:       if (x > c)
        !            88:        x += d;
        !            89:
        !            90:       ASSERT (x <= d);
        !            91:       return x;
        !            92:     }
        !            93:   else
        !            94:     {
        !            95:       /* Can't skip a divide, just do the loop code once more. */
        !            96:     last_step:
        !            97:       x = s - c;
        !            98:       c = (x > s);
        !            99:
        !           100:       y = x - h;
        !           101:       c += (y > x);
        !           102:
        !           103:       y *= inverse;
        !           104:       umul_ppmm (h, dummy, y, d);
        !           105:
        !           106:       c += h;
        !           107:
        !           108:       ASSERT (c <= d);
        !           109:       return c;
        !           110:     }
        !           111: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>