Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/mode1o.c, Revision 1.1
1.1 ! ohara 1: /* Alpha EV5 mpn_modexact_1c_odd -- mpn by limb exact style remainder.
! 2:
! 3: cycles/limb
! 4: EV5: 30.0
! 5: EV6: 15.0
! 6: */
! 7:
! 8: /*
! 9: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
! 10:
! 11: This file is part of the GNU MP Library.
! 12:
! 13: The GNU MP Library is free software; you can redistribute it and/or modify
! 14: it under the terms of the GNU Lesser General Public License as published by
! 15: the Free Software Foundation; either version 2.1 of the License, or (at your
! 16: option) any later version.
! 17:
! 18: The GNU MP Library is distributed in the hope that it will be useful, but
! 19: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 20: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 21: License for more details.
! 22:
! 23: You should have received a copy of the GNU Lesser General Public License
! 24: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 25: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 26: MA 02111-1307, USA.
! 27: */
! 28:
! 29: #include "gmp.h"
! 30: #include "gmp-impl.h"
! 31: #include "longlong.h"
! 32:
! 33:
! 34: /* modlimb_invert is already faster than invert_limb or a "%", so the
! 35: modexact style can be used even at size==1.
! 36:
! 37: The dependent chain is a subtract (1), mul1 (13) and umulh (15), which
! 38: would suggest 29 is a lower bound, or maybe the measured 30 is already as
! 39: good as possible, not sure.
! 40:
! 41: For reference, ev6 runs this code at 15 cycles, which is 1 faster than
! 42: the generic loop at 16. But maybe something better is possible. */
! 43:
! 44: mp_limb_t
! 45: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
! 46: {
! 47: mp_limb_t s, x, y, inverse, dummy;
! 48: mp_limb_t c = 0;
! 49: mp_size_t i;
! 50:
! 51: ASSERT (size >= 1);
! 52: ASSERT (d & 1);
! 53:
! 54: modlimb_invert (inverse, d);
! 55:
! 56: i = 0;
! 57: if (size == 1)
! 58: {
! 59: s = src[0];
! 60: goto last_step;
! 61: }
! 62:
! 63: do
! 64: {
! 65: s = src[i];
! 66: x = s - c;
! 67: c = (x > s);
! 68:
! 69: y = x - h;
! 70: c += (y > x);
! 71: y *= inverse;
! 72: umul_ppmm (h, dummy, y, d);
! 73: }
! 74: while (++i < size-1);
! 75:
! 76:
! 77: s = src[i];
! 78: if (s <= d)
! 79: {
! 80: /* With high<=d the final step can be a subtract and addback. If
! 81: c+h==0 then the addback will restore to l>=0. If c+h==d then will
! 82: get x==d if s==0, but that's ok per the function definition. */
! 83:
! 84: c += h;
! 85:
! 86: x = c - s;
! 87: if (x > c)
! 88: x += d;
! 89:
! 90: ASSERT (x <= d);
! 91: return x;
! 92: }
! 93: else
! 94: {
! 95: /* Can't skip a divide, just do the loop code once more. */
! 96: last_step:
! 97: x = s - c;
! 98: c = (x > s);
! 99:
! 100: y = x - h;
! 101: c += (y > x);
! 102:
! 103: y *= inverse;
! 104: umul_ppmm (h, dummy, y, d);
! 105:
! 106: c += h;
! 107:
! 108: ASSERT (c <= d);
! 109: return c;
! 110: }
! 111: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>