OpenXM_contrib/gmp/mpn/cray/ieee/submul_1.c - annotate

Return to submul_1.c CVS log
Up to [local] / OpenXM_contrib / gmp / mpn / cray / ieee
Annotation of OpenXM_contrib/gmp/mpn/cray/ieee/submul_1.c, Revision 1.1.1.1

1.1       ohara       1: /* Cray PVP/IEEE mpn_submul_1 -- multiply a limb vector with a limb and
                      2:    subtract the result from a second limb vector.
                      3:
                      4: Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
                      5:
                      6: This file is part of the GNU MP Library.
                      7:
                      8: The GNU MP Library is free software; you can redistribute it and/or modify
                      9: it under the terms of the GNU Lesser General Public License as published by
                     10: the Free Software Foundation; either version 2.1 of the License, or (at your
                     11: option) any later version.
                     12:
                     13: The GNU MP Library is distributed in the hope that it will be useful, but
                     14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
                     16: License for more details.
                     17:
                     18: You should have received a copy of the GNU Lesser General Public License
                     19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     21: MA 02111-1307, USA.  */
                     22:
                     23: /* This code runs at just under 9 cycles/limb on a T90.  That is not perfect,
                     24:    mainly due to vector register shortage in the main loop.  Assembly code
                     25:    should bring it down to perhaps 7 cycles/limb.  */
                     26:
                     27: #include <intrinsics.h>
                     28: #include "gmp.h"
                     29: #include "gmp-impl.h"
                     30:
                     31: mp_limb_t
                     32: mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
                     33: {
                     34:   mp_limb_t cy[n];
                     35:   mp_limb_t a, b, r, s0, s1, c0, c1;
                     36:   mp_size_t i;
                     37:   int more_carries;
                     38:
                     39:   if (up == rp)
                     40:     {
                     41:       /* The algorithm used below cannot handle overlap.  Handle it here by
                     42:         making a temporary copy of the source vector, then call ourselves.  */
                     43:       mp_limb_t xp[n];
                     44:       MPN_COPY (xp, up, n);
                     45:       return mpn_submul_1 (rp, xp, n, vl);
                     46:     }
                     47:
                     48:   a = up[0] * vl;
                     49:   r = rp[0];
                     50:   s0 = r - a;
                     51:   rp[0] = s0;
                     52:   c1 = ((s0 & a) | ((s0 | a) & ~r)) >> 63;
                     53:   cy[0] = c1;
                     54:
                     55:   /* Main multiply loop.  Generate a raw accumulated output product in rp[]
                     56:      and a carry vector in cy[].  */
                     57: #pragma _CRI ivdep
                     58:   for (i = 1; i < n; i++)
                     59:     {
                     60:       a = up[i] * vl;
                     61:       b = _int_mult_upper (up[i - 1], vl);
                     62:       s0 = a + b;
                     63:       c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
                     64:       r = rp[i];
                     65:       s1 = r - s0;
                     66:       rp[i] = s1;
                     67:       c1 = ((s1 & s0) | ((s1 | s0) & ~r)) >> 63;
                     68:       cy[i] = c0 + c1;
                     69:     }
                     70:   /* Carry subtract loop.  Subtract the carry vector cy[] from the raw result
                     71:      rp[] and store the new result back to rp[].  */
                     72:   more_carries = 0;
                     73: #pragma _CRI ivdep
                     74:   for (i = 1; i < n; i++)
                     75:     {
                     76:       r = rp[i];
                     77:       c0 = cy[i - 1];
                     78:       s0 = r - c0;
                     79:       rp[i] = s0;
                     80:       c0 = (s0 & ~r) >> 63;
                     81:       more_carries += c0;
                     82:     }
                     83:   /* If that second loop generated carry, handle that in scalar loop.  */
                     84:   if (more_carries)
                     85:     {
                     86:       mp_limb_t cyrec = 0;
                     87:       /* Look for places where rp[k] == ~0 and cy[k-1] == 1 or
                     88:         rp[k] == ~1 and cy[k-1] == 2.
                     89:         These are where we got a recurrency carry.  */
                     90:       for (i = 1; i < n; i++)
                     91:        {
                     92:          r = rp[i];
                     93:          c0 = ~r < cy[i - 1];
                     94:          s0 = r - cyrec;
                     95:          rp[i] = s0;
                     96:          c1 = (s0 & ~r) >> 63;
                     97:          cyrec = c0 | c1;
                     98:        }
                     99:       return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];
                    100:     }
                    101:
                    102:   return _int_mult_upper (up[n - 1], vl) + cy[n - 1];
                    103: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>