Annotation of OpenXM_contrib/gmp/mpn/cray/ieee/submul_1.c, Revision 1.1
1.1 ! ohara 1: /* Cray PVP/IEEE mpn_submul_1 -- multiply a limb vector with a limb and
! 2: subtract the result from a second limb vector.
! 3:
! 4: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
! 5:
! 6: This file is part of the GNU MP Library.
! 7:
! 8: The GNU MP Library is free software; you can redistribute it and/or modify
! 9: it under the terms of the GNU Lesser General Public License as published by
! 10: the Free Software Foundation; either version 2.1 of the License, or (at your
! 11: option) any later version.
! 12:
! 13: The GNU MP Library is distributed in the hope that it will be useful, but
! 14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 16: License for more details.
! 17:
! 18: You should have received a copy of the GNU Lesser General Public License
! 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 21: MA 02111-1307, USA. */
! 22:
! 23: /* This code runs at just under 9 cycles/limb on a T90. That is not perfect,
! 24: mainly due to vector register shortage in the main loop. Assembly code
! 25: should bring it down to perhaps 7 cycles/limb. */
! 26:
! 27: #include <intrinsics.h>
! 28: #include "gmp.h"
! 29: #include "gmp-impl.h"
! 30:
! 31: mp_limb_t
! 32: mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
! 33: {
! 34: mp_limb_t cy[n];
! 35: mp_limb_t a, b, r, s0, s1, c0, c1;
! 36: mp_size_t i;
! 37: int more_carries;
! 38:
! 39: if (up == rp)
! 40: {
! 41: /* The algorithm used below cannot handle overlap. Handle it here by
! 42: making a temporary copy of the source vector, then call ourselves. */
! 43: mp_limb_t xp[n];
! 44: MPN_COPY (xp, up, n);
! 45: return mpn_submul_1 (rp, xp, n, vl);
! 46: }
! 47:
! 48: a = up[0] * vl;
! 49: r = rp[0];
! 50: s0 = r - a;
! 51: rp[0] = s0;
! 52: c1 = ((s0 & a) | ((s0 | a) & ~r)) >> 63;
! 53: cy[0] = c1;
! 54:
! 55: /* Main multiply loop. Generate a raw accumulated output product in rp[]
! 56: and a carry vector in cy[]. */
! 57: #pragma _CRI ivdep
! 58: for (i = 1; i < n; i++)
! 59: {
! 60: a = up[i] * vl;
! 61: b = _int_mult_upper (up[i - 1], vl);
! 62: s0 = a + b;
! 63: c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
! 64: r = rp[i];
! 65: s1 = r - s0;
! 66: rp[i] = s1;
! 67: c1 = ((s1 & s0) | ((s1 | s0) & ~r)) >> 63;
! 68: cy[i] = c0 + c1;
! 69: }
! 70: /* Carry subtract loop. Subtract the carry vector cy[] from the raw result
! 71: rp[] and store the new result back to rp[]. */
! 72: more_carries = 0;
! 73: #pragma _CRI ivdep
! 74: for (i = 1; i < n; i++)
! 75: {
! 76: r = rp[i];
! 77: c0 = cy[i - 1];
! 78: s0 = r - c0;
! 79: rp[i] = s0;
! 80: c0 = (s0 & ~r) >> 63;
! 81: more_carries += c0;
! 82: }
! 83: /* If that second loop generated carry, handle that in scalar loop. */
! 84: if (more_carries)
! 85: {
! 86: mp_limb_t cyrec = 0;
! 87: /* Look for places where rp[k] == ~0 and cy[k-1] == 1 or
! 88: rp[k] == ~1 and cy[k-1] == 2.
! 89: These are where we got a recurrency carry. */
! 90: for (i = 1; i < n; i++)
! 91: {
! 92: r = rp[i];
! 93: c0 = ~r < cy[i - 1];
! 94: s0 = r - cyrec;
! 95: rp[i] = s0;
! 96: c1 = (s0 & ~r) >> 63;
! 97: cyrec = c0 | c1;
! 98: }
! 99: return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
! 100: }
! 101:
! 102: return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
! 103: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>