OpenXM_contrib/gmp/mpn/cray/ieee/mul_1.c - annotate

Return to mul_1.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / cray / ieee
Annotation of OpenXM_contrib/gmp/mpn/cray/ieee/mul_1.c, Revision 1.1.1.1

1.1       ohara       1: /* Cray PVP/IEEE mpn_mul_1 -- multiply a limb vector with a limb and store the
                      2:    result in a second limb vector.
                      3:
                      4: Copyright 2000, 2001 Free Software Foundation, Inc.
                      5:
                      6: This file is part of the GNU MP Library.
                      7:
                      8: The GNU MP Library is free software; you can redistribute it and/or modify
                      9: it under the terms of the GNU Lesser General Public License as published by
                     10: the Free Software Foundation; either version 2.1 of the License, or (at your
                     11: option) any later version.
                     12:
                     13: The GNU MP Library is distributed in the hope that it will be useful, but
                     14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     16: License for more details.
                     17:
                     18: You should have received a copy of the GNU Lesser General Public License
                     19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: MA 02111-1307, USA.  */
                     22:
                     23: /* This code runs at 5 cycles/limb on a T90.  That would probably
                     24:    be hard to improve upon, even with assembly code.  */
                     25:
                     26: #include <intrinsics.h>
                     27: #include "gmp.h"
                     28: #include "gmp-impl.h"
                     29:
                     30: mp_limb_t
                     31: mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
                     32: {
                     33:   mp_limb_t cy[n];
                     34:   mp_limb_t a, b, r, s0, s1, c0, c1;
                     35:   mp_size_t i;
                     36:   int more_carries;
                     37:
                     38:   if (up == rp)
                     39:     {
                     40:       /* The algorithm used below cannot handle overlap.  Handle it here by
                     41:         making a temporary copy of the source vector, then call ourselves.  */
                     42:       mp_limb_t xp[n];
                     43:       MPN_COPY (xp, up, n);
                     44:       return mpn_mul_1 (rp, xp, n, vl);
                     45:     }
                     46:
                     47:   a = up[0] * vl;
                     48:   rp[0] = a;
                     49:   cy[0] = 0;
                     50:
                     51:   /* Main multiply loop.  Generate a raw accumulated output product in rp[]
                     52:      and a carry vector in cy[].  */
                     53: #pragma _CRI ivdep
                     54:   for (i = 1; i < n; i++)
                     55:     {
                     56:       a = up[i] * vl;
                     57:       b = _int_mult_upper (up[i - 1], vl);
                     58:       s0 = a + b;
                     59:       c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
                     60:       rp[i] = s0;
                     61:       cy[i] = c0;
                     62:     }
                     63:   /* Carry add loop.  Add the carry vector cy[] to the raw sum rp[] and
                     64:      store the new sum back to rp[0].  */
                     65:   more_carries = 0;
                     66: #pragma _CRI ivdep
                     67:   for (i = 2; i < n; i++)
                     68:     {
                     69:       r = rp[i];
                     70:       c0 = cy[i - 1];
                     71:       s0 = r + c0;
                     72:       rp[i] = s0;
                     73:       c0 = (r & ~s0) >> 63;
                     74:       more_carries += c0;
                     75:     }
                     76:   /* If that second loop generated carry, handle that in scalar loop.  */
                     77:   if (more_carries)
                     78:     {
                     79:       mp_limb_t cyrec = 0;
                     80:       /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
                     81:         These are where we got a recurrency carry.  */
                     82:       for (i = 2; i < n; i++)
                     83:        {
                     84:          r = rp[i];
                     85:          c0 = (r == 0 && cy[i - 1] != 0);
                     86:          s0 = r + cyrec;
                     87:          rp[i] = s0;
                     88:          c1 = (r & ~s0) >> 63;
                     89:          cyrec = c0 | c1;
                     90:        }
                     91:       return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
                     92:     }
                     93:
                     94:   return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
                     95: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>