OpenXM_contrib/gmp/mpn/cray/ieee/mul_1.c - annotate

Return to mul_1.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / cray / ieee
Annotation of OpenXM_contrib/gmp/mpn/cray/ieee/mul_1.c, Revision 1.1

1.1     ! ohara       1: /* Cray PVP/IEEE mpn_mul_1 -- multiply a limb vector with a limb and store the
        !             2:    result in a second limb vector.
        !             3:
        !             4: Copyright 2000, 2001 Free Software Foundation, Inc.
        !             5:
        !             6: This file is part of the GNU MP Library.
        !             7:
        !             8: The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: it under the terms of the GNU Lesser General Public License as published by
        !            10: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            11: option) any later version.
        !            12:
        !            13: The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: License for more details.
        !            17:
        !            18: You should have received a copy of the GNU Lesser General Public License
        !            19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: MA 02111-1307, USA.  */
        !            22:
        !            23: /* This code runs at 5 cycles/limb on a T90.  That would probably
        !            24:    be hard to improve upon, even with assembly code.  */
        !            25:
        !            26: #include <intrinsics.h>
        !            27: #include "gmp.h"
        !            28: #include "gmp-impl.h"
        !            29:
        !            30: mp_limb_t
        !            31: mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
        !            32: {
        !            33:   mp_limb_t cy[n];
        !            34:   mp_limb_t a, b, r, s0, s1, c0, c1;
        !            35:   mp_size_t i;
        !            36:   int more_carries;
        !            37:
        !            38:   if (up == rp)
        !            39:     {
        !            40:       /* The algorithm used below cannot handle overlap.  Handle it here by
        !            41:         making a temporary copy of the source vector, then call ourselves.  */
        !            42:       mp_limb_t xp[n];
        !            43:       MPN_COPY (xp, up, n);
        !            44:       return mpn_mul_1 (rp, xp, n, vl);
        !            45:     }
        !            46:
        !            47:   a = up[0] * vl;
        !            48:   rp[0] = a;
        !            49:   cy[0] = 0;
        !            50:
        !            51:   /* Main multiply loop.  Generate a raw accumulated output product in rp[]
        !            52:      and a carry vector in cy[].  */
        !            53: #pragma _CRI ivdep
        !            54:   for (i = 1; i < n; i++)
        !            55:     {
        !            56:       a = up[i] * vl;
        !            57:       b = _int_mult_upper (up[i - 1], vl);
        !            58:       s0 = a + b;
        !            59:       c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
        !            60:       rp[i] = s0;
        !            61:       cy[i] = c0;
        !            62:     }
        !            63:   /* Carry add loop.  Add the carry vector cy[] to the raw sum rp[] and
        !            64:      store the new sum back to rp[0].  */
        !            65:   more_carries = 0;
        !            66: #pragma _CRI ivdep
        !            67:   for (i = 2; i < n; i++)
        !            68:     {
        !            69:       r = rp[i];
        !            70:       c0 = cy[i - 1];
        !            71:       s0 = r + c0;
        !            72:       rp[i] = s0;
        !            73:       c0 = (r & ~s0) >> 63;
        !            74:       more_carries += c0;
        !            75:     }
        !            76:   /* If that second loop generated carry, handle that in scalar loop.  */
        !            77:   if (more_carries)
        !            78:     {
        !            79:       mp_limb_t cyrec = 0;
        !            80:       /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
        !            81:         These are where we got a recurrency carry.  */
        !            82:       for (i = 2; i < n; i++)
        !            83:        {
        !            84:          r = rp[i];
        !            85:          c0 = (r == 0 && cy[i - 1] != 0);
        !            86:          s0 = r + cyrec;
        !            87:          rp[i] = s0;
        !            88:          c1 = (r & ~s0) >> 63;
        !            89:          cyrec = c0 | c1;
        !            90:        }
        !            91:       return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
        !            92:     }
        !            93:
        !            94:   return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
        !            95: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>