Annotation of OpenXM_contrib/gmp/mpn/alpha/ev5/mode1o.c, Revision 1.1.1.1
1.1 ohara 1: /* Alpha EV5 mpn_modexact_1c_odd -- mpn by limb exact style remainder.
2:
3: cycles/limb
4: EV5: 30.0
5: EV6: 15.0
6: */
7:
8: /*
9: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
10:
11: This file is part of the GNU MP Library.
12:
13: The GNU MP Library is free software; you can redistribute it and/or modify
14: it under the terms of the GNU Lesser General Public License as published by
15: the Free Software Foundation; either version 2.1 of the License, or (at your
16: option) any later version.
17:
18: The GNU MP Library is distributed in the hope that it will be useful, but
19: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21: License for more details.
22:
23: You should have received a copy of the GNU Lesser General Public License
24: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
25: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
26: MA 02111-1307, USA.
27: */
28:
29: #include "gmp.h"
30: #include "gmp-impl.h"
31: #include "longlong.h"
32:
33:
34: /* modlimb_invert is already faster than invert_limb or a "%", so the
35: modexact style can be used even at size==1.
36:
37: The dependent chain is a subtract (1), mul1 (13) and umulh (15), which
38: would suggest 29 is a lower bound, or maybe the measured 30 is already as
39: good as possible, not sure.
40:
41: For reference, ev6 runs this code at 15 cycles, which is 1 faster than
42: the generic loop at 16. But maybe something better is possible. */
43:
44: mp_limb_t
45: mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
46: {
47: mp_limb_t s, x, y, inverse, dummy;
48: mp_limb_t c = 0;
49: mp_size_t i;
50:
51: ASSERT (size >= 1);
52: ASSERT (d & 1);
53:
54: modlimb_invert (inverse, d);
55:
56: i = 0;
57: if (size == 1)
58: {
59: s = src[0];
60: goto last_step;
61: }
62:
63: do
64: {
65: s = src[i];
66: x = s - c;
67: c = (x > s);
68:
69: y = x - h;
70: c += (y > x);
71: y *= inverse;
72: umul_ppmm (h, dummy, y, d);
73: }
74: while (++i < size-1);
75:
76:
77: s = src[i];
78: if (s <= d)
79: {
80: /* With high<=d the final step can be a subtract and addback. If
81: c+h==0 then the addback will restore to l>=0. If c+h==d then will
82: get x==d if s==0, but that's ok per the function definition. */
83:
84: c += h;
85:
86: x = c - s;
87: if (x > c)
88: x += d;
89:
90: ASSERT (x <= d);
91: return x;
92: }
93: else
94: {
95: /* Can't skip a divide, just do the loop code once more. */
96: last_step:
97: x = s - c;
98: c = (x > s);
99:
100: y = x - h;
101: c += (y > x);
102:
103: y *= inverse;
104: umul_ppmm (h, dummy, y, d);
105:
106: c += h;
107:
108: ASSERT (c <= d);
109: return c;
110: }
111: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>