version 1.1.1.1, 2000/09/09 14:12:22 |
version 1.1.1.2, 2003/08/25 16:06:18 |
|
|
/* mpn_add_n -- Add two limb vectors of equal, non-zero length. |
/* Cray PVP mpn_add_n -- add two limb vectors and store their sum in a third |
For Cray vector processors. |
limb vector. |
|
|
Copyright (C) 1996, 2000 Free Software Foundation, Inc. |
Copyright 1996, 2000, 2001 Free Software Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
The GNU MP Library is free software; you can redistribute it and/or modify |
The GNU MP Library is free software; you can redistribute it and/or modify |
it under the terms of the GNU Lesser General Public License as published by |
it under the terms of the GNU Lesser General Public License as published by |
the Free Software Foundation; either version 2.1 of the License, or (at your |
the Free Software Foundation; either version 2.1 of the License, or (at your |
option) any later version. |
option) any later version. |
|
|
The GNU MP Library is distributed in the hope that it will be useful, but |
The GNU MP Library is distributed in the hope that it will be useful, but |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
License for more details. |
License for more details. |
|
|
You should have received a copy of the GNU Lesser General Public License |
You should have received a copy of the GNU Lesser General Public License |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
MA 02111-1307, USA. */ |
MA 02111-1307, USA. */ |
|
|
|
/* This code runs at 4 cycles/limb. It may be possible to bring it down |
|
to 3 cycles/limb. */ |
|
|
#include "gmp.h" |
#include "gmp.h" |
#include "gmp-impl.h" |
#include "gmp-impl.h" |
|
|
mp_limb_t |
mp_limb_t |
mpn_add_n (c, a, b, n) |
mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) |
mp_ptr c; |
|
mp_srcptr a, b; |
|
mp_size_t n; |
|
{ |
{ |
|
mp_limb_t cy[n]; |
|
mp_limb_t a, b, r, s0, c0, c1; |
mp_size_t i; |
mp_size_t i; |
mp_size_t nm1 = n - 1; |
int more_carries; |
int more_carries = 0; |
|
int carry_out; |
|
|
|
/* For small operands the non-vector code is faster. */ |
/* Main add loop. Generate a raw output sum in rp[] and a carry vector |
if (n < 16) |
in cy[]. */ |
goto sequential; |
#pragma _CRI ivdep |
|
for (i = 0; i < n; i++) |
if (a == c || b == c) |
|
{ |
{ |
TMP_DECL (marker); |
a = up[i]; |
TMP_MARK (marker); |
b = vp[i]; |
if (c == a) |
s0 = a + b; |
{ |
rp[i] = s0; |
/* allocate temp space for a */ |
c0 = ((a & b) | ((a | b) & ~s0)) >> 63; |
mp_ptr ax = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); |
cy[i] = c0; |
MPN_COPY (ax, a, n); |
|
a = (mp_srcptr) ax; |
|
} |
|
if (c == b) |
|
{ |
|
/* allocate temp space for b */ |
|
mp_ptr bx = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); |
|
MPN_COPY (bx, b, n); |
|
b = (mp_srcptr) bx; |
|
} |
|
carry_out = mpn_add_n (c, a, b, n); |
|
TMP_FREE (marker); |
|
return carry_out; |
|
} |
} |
|
/* Carry add loop. Add the carry vector cy[] to the raw sum rp[] and |
carry_out = a[nm1] + b[nm1] < a[nm1]; |
store the new sum back to rp[0]. If this generates further carry, set |
|
more_carries. */ |
#pragma _CRI ivdep /* Cray PVP systems */ |
more_carries = 0; |
for (i = nm1; i > 0; i--) |
#pragma _CRI ivdep |
|
for (i = 1; i < n; i++) |
{ |
{ |
int cy_in; |
r = rp[i]; |
cy_in = a[i - 1] + b[i - 1] < a[i - 1]; |
c0 = cy[i - 1]; |
c[i] = a[i] + b[i] + cy_in; |
s0 = r + c0; |
more_carries += c[i] < cy_in; |
rp[i] = s0; |
|
c0 = (r & ~s0) >> 63; |
|
more_carries += c0; |
} |
} |
c[0] = a[0] + b[0]; |
/* If that second loop generated carry, handle that in scalar loop. */ |
|
|
if (more_carries) |
if (more_carries) |
{ |
{ |
/* This won't vectorize, but we should come here rarely. */ |
mp_limb_t cyrec = 0; |
int cy; |
/* Look for places where rp[k] is zero and cy[k-1] is non-zero. |
sequential: |
These are where we got a recurrency carry. */ |
cy = 0; |
for (i = 1; i < n; i++) |
for (i = 0; i < n; i++) |
|
{ |
{ |
mp_limb_t ai, ci, t; |
r = rp[i]; |
ai = a[i]; |
c0 = (r == 0 && cy[i - 1] != 0); |
t = b[i] + cy; |
s0 = r + cyrec; |
cy = t < cy; |
rp[i] = s0; |
ci = ai + t; |
c1 = (r & ~s0) >> 63; |
cy += ci < ai; |
cyrec = c0 | c1; |
c[i] = ci; |
|
} |
} |
carry_out = cy; |
return cyrec | cy[n - 1]; |
} |
} |
|
|
return carry_out; |
return cy[n - 1]; |
} |
} |