OpenXM_contrib/gmp/mpn/cray/ieee/addmul_1.c - annotate

Return to addmul_1.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / cray / ieee
Annotation of OpenXM_contrib/gmp/mpn/cray/ieee/addmul_1.c, Revision 1.1

1.1     ! ohara       1: /* Cray PVP/IEEE mpn_addmul_1 -- multiply a limb vector with a limb and add the
        !             2:    result to a second limb vector.
        !             3:
        !             4: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
        !             5:
        !             6: This file is part of the GNU MP Library.
        !             7:
        !             8: The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: it under the terms of the GNU Lesser General Public License as published by
        !            10: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            11: option) any later version.
        !            12:
        !            13: The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: License for more details.
        !            17:
        !            18: You should have received a copy of the GNU Lesser General Public License
        !            19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: MA 02111-1307, USA.  */
        !            22:
        !            23: /* This code runs at just under 9 cycles/limb on a T90.  That is not perfect,
        !            24:    mainly due to vector register shortage in the main loop.  Assembly code
        !            25:    should bring it down to perhaps 7 cycles/limb.  */
        !            26:
        !            27: #include <intrinsics.h>
        !            28: #include "gmp.h"
        !            29: #include "gmp-impl.h"
        !            30:
        !            31: mp_limb_t
        !            32: mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
        !            33: {
        !            34:   mp_limb_t cy[n];
        !            35:   mp_limb_t a, b, r, s0, s1, c0, c1;
        !            36:   mp_size_t i;
        !            37:   int more_carries;
        !            38:
        !            39:   if (up == rp)
        !            40:     {
        !            41:       /* The algorithm used below cannot handle overlap.  Handle it here by
        !            42:         making a temporary copy of the source vector, then call ourselves.  */
        !            43:       mp_limb_t xp[n];
        !            44:       MPN_COPY (xp, up, n);
        !            45:       return mpn_addmul_1 (rp, xp, n, vl);
        !            46:     }
        !            47:
        !            48:   a = up[0] * vl;
        !            49:   r = rp[0];
        !            50:   s0 = a + r;
        !            51:   rp[0] = s0;
        !            52:   c0 = ((a & r) | ((a | r) & ~s0)) >> 63;
        !            53:   cy[0] = c0;
        !            54:
        !            55:   /* Main multiply loop.  Generate a raw accumulated output product in rp[]
        !            56:      and a carry vector in cy[].  */
        !            57: #pragma _CRI ivdep
        !            58:   for (i = 1; i < n; i++)
        !            59:     {
        !            60:       a = up[i] * vl;
        !            61:       b = _int_mult_upper (up[i - 1], vl);
        !            62:       s0 = a + b;
        !            63:       c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
        !            64:       r = rp[i];
        !            65:       s1 = s0 + r;
        !            66:       rp[i] = s1;
        !            67:       c1 = ((s0 & r) | ((s0 | r) & ~s1)) >> 63;
        !            68:       cy[i] = c0 + c1;
        !            69:     }
        !            70:   /* Carry add loop.  Add the carry vector cy[] to the raw result rp[] and
        !            71:      store the new result back to rp[].  */
        !            72:   more_carries = 0;
        !            73: #pragma _CRI ivdep
        !            74:   for (i = 1; i < n; i++)
        !            75:     {
        !            76:       r = rp[i];
        !            77:       c0 = cy[i - 1];
        !            78:       s0 = r + c0;
        !            79:       rp[i] = s0;
        !            80:       c0 = (r & ~s0) >> 63;
        !            81:       more_carries += c0;
        !            82:     }
        !            83:   /* If that second loop generated carry, handle that in scalar loop.  */
        !            84:   if (more_carries)
        !            85:     {
        !            86:       mp_limb_t cyrec = 0;
        !            87:       /* Look for places where rp[k] == 0 and cy[k-1] == 1 or
        !            88:         rp[k] == 1 and cy[k-1] == 2.
        !            89:         These are where we got a recurrency carry.  */
        !            90:       for (i = 1; i < n; i++)
        !            91:        {
        !            92:          r = rp[i];
        !            93:          c0 = r < cy[i - 1];
        !            94:          s0 = r + cyrec;
        !            95:          rp[i] = s0;
        !            96:          c1 = (r & ~s0) >> 63;
        !            97:          cyrec = c0 | c1;
        !            98:        }
        !            99:       return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
        !           100:     }
        !           101:
        !           102:   return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
        !           103: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>