[BACK]Return to add_n.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / cray

Annotation of OpenXM_contrib/gmp/mpn/cray/add_n.c, Revision 1.1.1.2

1.1.1.2 ! ohara       1: /* Cray PVP mpn_add_n -- add two limb vectors and store their sum in a third
        !             2:    limb vector.
1.1       maekawa     3:
1.1.1.2 ! ohara       4: Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
1.1       maekawa     5:
1.1.1.2 ! ohara       6: This file is part of the GNU MP Library.
1.1       maekawa     7:
1.1.1.2 ! ohara       8: The GNU MP Library is free software; you can redistribute it and/or modify
        !             9: it under the terms of the GNU Lesser General Public License as published by
        !            10: the Free Software Foundation; either version 2.1 of the License, or (at your
        !            11: option) any later version.
        !            12:
        !            13: The GNU MP Library is distributed in the hope that it will be useful, but
        !            14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
        !            16: License for more details.
        !            17:
        !            18: You should have received a copy of the GNU Lesser General Public License
        !            19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
        !            20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            21: MA 02111-1307, USA.  */
        !            22:
        !            23: /* This code runs at 4 cycles/limb.  It may be possible to bring it down
        !            24:    to 3 cycles/limb.  */
1.1       maekawa    25:
                     26: #include "gmp.h"
                     27: #include "gmp-impl.h"
                     28:
                     29: mp_limb_t
1.1.1.2 ! ohara      30: mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1.1       maekawa    31: {
1.1.1.2 ! ohara      32:   mp_limb_t cy[n];
        !            33:   mp_limb_t a, b, r, s0, c0, c1;
1.1       maekawa    34:   mp_size_t i;
1.1.1.2 ! ohara      35:   int more_carries;
1.1       maekawa    36:
1.1.1.2 ! ohara      37:   /* Main add loop.  Generate a raw output sum in rp[] and a carry vector
        !            38:      in cy[].  */
        !            39: #pragma _CRI ivdep
        !            40:   for (i = 0; i < n; i++)
1.1       maekawa    41:     {
1.1.1.2 ! ohara      42:       a = up[i];
        !            43:       b = vp[i];
        !            44:       s0 = a + b;
        !            45:       rp[i] = s0;
        !            46:       c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
        !            47:       cy[i] = c0;
1.1       maekawa    48:     }
1.1.1.2 ! ohara      49:   /* Carry add loop.  Add the carry vector cy[] to the raw sum rp[] and
        !            50:      store the new sum back to rp[0].  If this generates further carry, set
        !            51:      more_carries.  */
        !            52:   more_carries = 0;
        !            53: #pragma _CRI ivdep
        !            54:   for (i = 1; i < n; i++)
1.1       maekawa    55:     {
1.1.1.2 ! ohara      56:       r = rp[i];
        !            57:       c0 = cy[i - 1];
        !            58:       s0 = r + c0;
        !            59:       rp[i] = s0;
        !            60:       c0 = (r & ~s0) >> 63;
        !            61:       more_carries += c0;
1.1       maekawa    62:     }
1.1.1.2 ! ohara      63:   /* If that second loop generated carry, handle that in scalar loop.  */
1.1       maekawa    64:   if (more_carries)
                     65:     {
1.1.1.2 ! ohara      66:       mp_limb_t cyrec = 0;
        !            67:       /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
        !            68:         These are where we got a recurrency carry.  */
        !            69:       for (i = 1; i < n; i++)
1.1       maekawa    70:        {
1.1.1.2 ! ohara      71:          r = rp[i];
        !            72:          c0 = (r == 0 && cy[i - 1] != 0);
        !            73:          s0 = r + cyrec;
        !            74:          rp[i] = s0;
        !            75:          c1 = (r & ~s0) >> 63;
        !            76:          cyrec = c0 | c1;
1.1       maekawa    77:        }
1.1.1.2 ! ohara      78:       return cyrec | cy[n - 1];
1.1       maekawa    79:     }
                     80:
1.1.1.2 ! ohara      81:   return cy[n - 1];
1.1       maekawa    82: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>