[BACK]Return to add_n.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / cray

Diff for /OpenXM_contrib/gmp/mpn/cray/Attic/add_n.c between version 1.1 and 1.1.1.2

version 1.1, 2000/09/09 14:12:22 version 1.1.1.2, 2003/08/25 16:06:18
Line 1 
Line 1 
 /* mpn_add_n -- Add two limb vectors of equal, non-zero length.  /* Cray PVP mpn_add_n -- add two limb vectors and store their sum in a third
    For Cray vector processors.     limb vector.
   
    Copyright (C) 1996, 2000 Free Software Foundation, Inc.  Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
   
    This file is part of the GNU MP Library.  This file is part of the GNU MP Library.
   
    The GNU MP Library is free software; you can redistribute it and/or modify  The GNU MP Library is free software; you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by  it under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation; either version 2.1 of the License, or (at your  the Free Software Foundation; either version 2.1 of the License, or (at your
    option) any later version.  option) any later version.
   
    The GNU MP Library is distributed in the hope that it will be useful, but  The GNU MP Library is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
    License for more details.  License for more details.
   
    You should have received a copy of the GNU Lesser General Public License  You should have received a copy of the GNU Lesser General Public License
    along with the GNU MP Library; see the file COPYING.LIB.  If not, write to  along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,  the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
    MA 02111-1307, USA.  */  MA 02111-1307, USA.  */
   
   /* This code runs at 4 cycles/limb.  It may be possible to bring it down
      to 3 cycles/limb.  */
   
 #include "gmp.h"  #include "gmp.h"
 #include "gmp-impl.h"  #include "gmp-impl.h"
   
 mp_limb_t  mp_limb_t
 mpn_add_n (c, a, b, n)  mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
      mp_ptr c;  
      mp_srcptr a, b;  
      mp_size_t n;  
 {  {
     mp_limb_t cy[n];
     mp_limb_t a, b, r, s0, c0, c1;
   mp_size_t i;    mp_size_t i;
   mp_size_t nm1 = n - 1;    int more_carries;
   int more_carries = 0;  
   int carry_out;  
   
   /* For small operands the non-vector code is faster.  */    /* Main add loop.  Generate a raw output sum in rp[] and a carry vector
   if (n < 16)       in cy[].  */
     goto sequential;  #pragma _CRI ivdep
     for (i = 0; i < n; i++)
   if (a == c || b == c)  
     {      {
       TMP_DECL (marker);        a = up[i];
       TMP_MARK (marker);        b = vp[i];
       if (c == a)        s0 = a + b;
         {        rp[i] = s0;
           /* allocate temp space for a */        c0 = ((a & b) | ((a | b) & ~s0)) >> 63;
           mp_ptr ax = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);        cy[i] = c0;
           MPN_COPY (ax, a, n);  
           a = (mp_srcptr) ax;  
         }  
       if (c == b)  
         {  
           /* allocate temp space for b */  
           mp_ptr bx = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);  
           MPN_COPY (bx, b, n);  
           b = (mp_srcptr) bx;  
         }  
       carry_out = mpn_add_n (c, a, b, n);  
       TMP_FREE (marker);  
       return carry_out;  
     }      }
     /* Carry add loop.  Add the carry vector cy[] to the raw sum rp[] and
   carry_out = a[nm1] + b[nm1] < a[nm1];       store the new sum back to rp[0].  If this generates further carry, set
        more_carries.  */
 #pragma _CRI ivdep                      /* Cray PVP systems */    more_carries = 0;
   for (i = nm1; i > 0; i--)  #pragma _CRI ivdep
     for (i = 1; i < n; i++)
     {      {
       int cy_in;        r = rp[i];
       cy_in = a[i - 1] + b[i - 1] < a[i - 1];        c0 = cy[i - 1];
       c[i] = a[i] + b[i] + cy_in;        s0 = r + c0;
       more_carries += c[i] < cy_in;        rp[i] = s0;
         c0 = (r & ~s0) >> 63;
         more_carries += c0;
     }      }
   c[0] = a[0] + b[0];    /* If that second loop generated carry, handle that in scalar loop.  */
   
   if (more_carries)    if (more_carries)
     {      {
       /* This won't vectorize, but we should come here rarely.  */        mp_limb_t cyrec = 0;
       int cy;        /* Look for places where rp[k] is zero and cy[k-1] is non-zero.
     sequential:           These are where we got a recurrency carry.  */
       cy = 0;        for (i = 1; i < n; i++)
       for (i = 0; i < n; i++)  
         {          {
           mp_limb_t ai, ci, t;            r = rp[i];
           ai = a[i];            c0 = (r == 0 && cy[i - 1] != 0);
           t = b[i] + cy;            s0 = r + cyrec;
           cy = t < cy;            rp[i] = s0;
           ci = ai + t;            c1 = (r & ~s0) >> 63;
           cy += ci < ai;            cyrec = c0 | c1;
           c[i] = ci;  
         }          }
       carry_out = cy;        return cyrec | cy[n - 1];
     }      }
   
   return carry_out;    return cy[n - 1];
 }  }

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.1.1.2

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>