Annotation of OpenXM_contrib/gmp/mpn/cray/ieee/mul_1.c, Revision 1.1.1.1
1.1 ohara 1: /* Cray PVP/IEEE mpn_mul_1 -- multiply a limb vector with a limb and store the
2: result in a second limb vector.
3:
4: Copyright 2000, 2001 Free Software Foundation, Inc.
5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16: License for more details.
17:
18: You should have received a copy of the GNU Lesser General Public License
19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21: MA 02111-1307, USA. */
22:
23: /* This code runs at 5 cycles/limb on a T90. That would probably
24: be hard to improve upon, even with assembly code. */
25:
26: #include <intrinsics.h>
27: #include "gmp.h"
28: #include "gmp-impl.h"
29:
30: mp_limb_t
31: mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
32: {
33: mp_limb_t cy[n];
34: mp_limb_t a, b, r, s0, s1, c0, c1;
35: mp_size_t i;
36: int more_carries;
37:
38: if (up == rp)
39: {
40: /* The algorithm used below cannot handle overlap. Handle it here by
41: making a temporary copy of the source vector, then call ourselves. */
42: mp_limb_t xp[n];
43: MPN_COPY (xp, up, n);
44: return mpn_mul_1 (rp, xp, n, vl);
45: }
46:
47: a = up[0] * vl;
48: rp[0] = a;
49: cy[0] = 0;
50:
51: /* Main multiply loop. Generate a raw accumulated output product in rp[]
52: and a carry vector in cy[]. */
53: #pragma _CRI ivdep
54: for (i = 1; i < n; i++)
55: {
56: a = up[i] * vl;
57: b = _int_mult_upper (up[i - 1], vl);
58: s0 = a + b;
59: c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
60: rp[i] = s0;
61: cy[i] = c0;
62: }
63: /* Carry add loop. Add the carry vector cy[] to the raw sum rp[] and
64: store the new sum back to rp[0]. */
65: more_carries = 0;
66: #pragma _CRI ivdep
67: for (i = 2; i < n; i++)
68: {
69: r = rp[i];
70: c0 = cy[i - 1];
71: s0 = r + c0;
72: rp[i] = s0;
73: c0 = (r & ~s0) >> 63;
74: more_carries += c0;
75: }
76: /* If that second loop generated carry, handle that in scalar loop. */
77: if (more_carries)
78: {
79: mp_limb_t cyrec = 0;
80: /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
81: These are where we got a recurrency carry. */
82: for (i = 2; i < n; i++)
83: {
84: r = rp[i];
85: c0 = (r == 0 && cy[i - 1] != 0);
86: s0 = r + cyrec;
87: rp[i] = s0;
88: c1 = (r & ~s0) >> 63;
89: cyrec = c0 | c1;
90: }
91: return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
92: }
93:
94: return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
95: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>