version 1.1.1.1, 2000/09/09 14:12:24 |
version 1.1.1.2, 2003/08/25 16:06:20 |
|
|
/* mpn_divexact_by3 -- mpn division by 3, expecting no remainder. */ |
/* mpn_divexact_by3 -- mpn division by 3, expecting no remainder. */ |
|
|
/* |
/* |
Copyright (C) 2000 Free Software Foundation, Inc. |
Copyright 2000, 2001, 2002 Free Software Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
Line 26 MA 02111-1307, USA. |
|
Line 26 MA 02111-1307, USA. |
|
#include "gmp-impl.h" |
#include "gmp-impl.h" |
|
|
|
|
/* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB. |
|
0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */ |
|
#define INVERSE_3 ((MP_LIMB_T_MAX / 3) * 2 + 1) |
|
|
|
|
|
/* The "c += ..."s are adding the high limb of 3*l to c. That high limb |
/* The "c += ..."s are adding the high limb of 3*l to c. That high limb |
will be 0, 1 or 2. Doing two separate "+="s seems to turn out better |
will be 0, 1 or 2. Doing two separate "+="s seems to turn out better |
code on gcc (as of 2.95.2 at least). |
code on gcc (as of 2.95.2 at least). |
|
|
When a subtraction of a 0,1,2 carry value causes a borrow, that leaves a |
When a subtraction of a 0,1,2 carry value causes a borrow, that leaves a |
limb value of either 0xFF...FF or 0xFF...FE and the multiply by INVERSE_3 |
limb value of either 0xFF...FF or 0xFF...FE and the multiply by |
gives 0x55...55 or 0xAA...AA respectively, producing a further borrow of |
MODLIMB_INVERSE_3 gives 0x55...55 or 0xAA...AA respectively, producing a |
only 0 or 1 respectively. Hence the carry out of each stage and for the |
further borrow of only 0 or 1 respectively. Hence the carry out of each |
return value is always only 0, 1 or 2. */ |
stage and for the return value is always only 0, 1 or 2. |
|
|
|
This implementation has each multiply successively dependent due to the |
|
"l=s-c", but the multiply src[i]*MODLIMB_INVERSE_3 could be scheduled |
|
back as far as desired, and the effect of the "-c" applied by subtracting |
|
0, 1 or 2 copies or MODLIMB_INVERSE_3 from the product. With some good |
|
scheduling in assembler it might come down to a dependent chain of maybe |
|
5 simple operations per limb. */ |
|
|
mp_limb_t |
mp_limb_t |
#if __STDC__ |
|
mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t c) |
mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t c) |
#else |
|
mpn_divexact_by3c (dst, src, size, c) |
|
mp_ptr dst; |
|
mp_srcptr src; |
|
mp_size_t size; |
|
mp_limb_t c; |
|
#endif |
|
{ |
{ |
mp_size_t i; |
mp_size_t i; |
|
|
ASSERT (size >= 1); |
ASSERT (size >= 1); |
|
ASSERT (c == 0 || c == 1 || c == 2); |
|
ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); |
|
|
i = 0; |
i = 0; |
do |
do |
Line 65 mpn_divexact_by3c (dst, src, size, c) |
|
Line 61 mpn_divexact_by3c (dst, src, size, c) |
|
l = s - c; |
l = s - c; |
c = (l > s); |
c = (l > s); |
|
|
l *= INVERSE_3; |
l = (l * MODLIMB_INVERSE_3) & GMP_NUMB_MASK; |
dst[i] = l; |
dst[i] = l; |
|
|
c += (l > MP_LIMB_T_MAX/3); |
c += (l > GMP_NUMB_MAX/3); |
c += (l > (MP_LIMB_T_MAX/3)*2); |
c += (l > (GMP_NUMB_MAX/3)*2); |
} |
} |
while (++i < size); |
while (++i < size); |
|
|