Annotation of OpenXM_contrib/gmp/mpn/cray/ieee/mul_1.c, Revision 1.1
1.1 ! ohara 1: /* Cray PVP/IEEE mpn_mul_1 -- multiply a limb vector with a limb and store the
! 2: result in a second limb vector.
! 3:
! 4: Copyright 2000, 2001 Free Software Foundation, Inc.
! 5:
! 6: This file is part of the GNU MP Library.
! 7:
! 8: The GNU MP Library is free software; you can redistribute it and/or modify
! 9: it under the terms of the GNU Lesser General Public License as published by
! 10: the Free Software Foundation; either version 2.1 of the License, or (at your
! 11: option) any later version.
! 12:
! 13: The GNU MP Library is distributed in the hope that it will be useful, but
! 14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: License for more details.
! 17:
! 18: You should have received a copy of the GNU Lesser General Public License
! 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: MA 02111-1307, USA. */
! 22:
! 23: /* This code runs at 5 cycles/limb on a T90. That would probably
! 24: be hard to improve upon, even with assembly code. */
! 25:
! 26: #include <intrinsics.h>
! 27: #include "gmp.h"
! 28: #include "gmp-impl.h"
! 29:
! 30: mp_limb_t
! 31: mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
! 32: {
! 33: mp_limb_t cy[n];
! 34: mp_limb_t a, b, r, s0, s1, c0, c1;
! 35: mp_size_t i;
! 36: int more_carries;
! 37:
! 38: if (up == rp)
! 39: {
! 40: /* The algorithm used below cannot handle overlap. Handle it here by
! 41: making a temporary copy of the source vector, then call ourselves. */
! 42: mp_limb_t xp[n];
! 43: MPN_COPY (xp, up, n);
! 44: return mpn_mul_1 (rp, xp, n, vl);
! 45: }
! 46:
! 47: a = up[0] * vl;
! 48: rp[0] = a;
! 49: cy[0] = 0;
! 50:
! 51: /* Main multiply loop. Generate a raw accumulated output product in rp[]
! 52: and a carry vector in cy[]. */
! 53: #pragma _CRI ivdep
! 54: for (i = 1; i < n; i++)
! 55: {
! 56: a = up[i] * vl;
! 57: b = _int_mult_upper (up[i - 1], vl);
! 58: s0 = a + b;
! 59: c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
! 60: rp[i] = s0;
! 61: cy[i] = c0;
! 62: }
! 63: /* Carry add loop. Add the carry vector cy[] to the raw sum rp[] and
! 64: store the new sum back to rp[0]. */
! 65: more_carries = 0;
! 66: #pragma _CRI ivdep
! 67: for (i = 2; i < n; i++)
! 68: {
! 69: r = rp[i];
! 70: c0 = cy[i - 1];
! 71: s0 = r + c0;
! 72: rp[i] = s0;
! 73: c0 = (r & ~s0) >> 63;
! 74: more_carries += c0;
! 75: }
! 76: /* If that second loop generated carry, handle that in scalar loop. */
! 77: if (more_carries)
! 78: {
! 79: mp_limb_t cyrec = 0;
! 80: /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
! 81: These are where we got a recurrency carry. */
! 82: for (i = 2; i < n; i++)
! 83: {
! 84: r = rp[i];
! 85: c0 = (r == 0 && cy[i - 1] != 0);
! 86: s0 = r + cyrec;
! 87: rp[i] = s0;
! 88: c1 = (r & ~s0) >> 63;
! 89: cyrec = c0 | c1;
! 90: }
! 91: return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
! 92: }
! 93:
! 94: return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
! 95: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>