version 1.1.1.1, 2000/01/10 15:35:23 |
version 1.1.1.2, 2000/09/09 14:12:24 |
|
|
/* mpn/gcd.c: mpn_gcd for gcd of two odd integers. |
/* mpn/gcd.c: mpn_gcd for gcd of two odd integers. |
|
|
Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc. |
Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000 Free Software |
|
Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
The GNU MP Library is free software; you can redistribute it and/or modify |
The GNU MP Library is free software; you can redistribute it and/or modify |
it under the terms of the GNU Library General Public License as published by |
it under the terms of the GNU Lesser General Public License as published by |
the Free Software Foundation; either version 2 of the License, or (at your |
the Free Software Foundation; either version 2.1 of the License, or (at your |
option) any later version. |
option) any later version. |
|
|
The GNU MP Library is distributed in the hope that it will be useful, but |
The GNU MP Library is distributed in the hope that it will be useful, but |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
License for more details. |
License for more details. |
|
|
You should have received a copy of the GNU Library General Public License |
You should have received a copy of the GNU Lesser General Public License |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
MA 02111-1307, USA. */ |
MA 02111-1307, USA. */ |
Line 22 MA 02111-1307, USA. */ |
|
Line 23 MA 02111-1307, USA. */ |
|
/* Integer greatest common divisor of two unsigned integers, using |
/* Integer greatest common divisor of two unsigned integers, using |
the accelerated algorithm (see reference below). |
the accelerated algorithm (see reference below). |
|
|
mp_size_t mpn_gcd (vp, vsize, up, usize). |
mp_size_t mpn_gcd (up, usize, vp, vsize). |
|
|
Preconditions [U = (up, usize) and V = (vp, vsize)]: |
Preconditions [U = (up, usize) and V = (vp, vsize)]: |
|
|
Line 47 MA 02111-1307, USA. */ |
|
Line 48 MA 02111-1307, USA. */ |
|
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "longlong.h" |
#include "longlong.h" |
|
|
/* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is |
/* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated |
used, otherwise the binary algorithm is used. This may be adjusted for |
algorithm is used, otherwise the binary algorithm is used. This may be |
different architectures. */ |
adjusted for different architectures. */ |
#ifndef ACCEL_THRESHOLD |
#ifndef GCD_ACCEL_THRESHOLD |
#define ACCEL_THRESHOLD 4 |
#define GCD_ACCEL_THRESHOLD 5 |
#endif |
#endif |
|
|
/* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated |
/* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated |
|
|
BMOD_THRESHOLD = BITS_PER_MP_LIMB/2 |
BMOD_THRESHOLD = BITS_PER_MP_LIMB/2 |
}; |
}; |
|
|
#define SIGN_BIT (~(~(mp_limb_t)0 >> 1)) |
|
|
|
|
|
#define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0) |
|
#define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0) |
|
#define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0) |
|
#define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0) |
|
|
|
/* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2. |
/* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2. |
Both U and V must be odd. */ |
Both U and V must be odd. */ |
static __gmp_inline mp_size_t |
static __gmp_inline mp_size_t |
|
|
precision. If N2 > N1 initially, the first iteration of the while loop |
precision. If N2 > N1 initially, the first iteration of the while loop |
will swap them. In all other situations, N1 >= N2 is maintained. */ |
will swap them. In all other situations, N1 >= N2 is maintained. */ |
|
|
static __gmp_inline mp_limb_t |
static |
|
#if ! defined (__i386__) |
|
__gmp_inline /* don't inline this for the x86 */ |
|
#endif |
|
mp_limb_t |
#if __STDC__ |
#if __STDC__ |
find_a (mp_srcptr cp) |
find_a (mp_srcptr cp) |
#else |
#else |
|
|
while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */ |
while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */ |
{ |
{ |
/* N1 <-- N1 % N2. */ |
/* N1 <-- N1 % N2. */ |
if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0) |
if ((MP_LIMB_T_HIGHBIT >> leading_zero_bits & n2_h) == 0) |
{ |
{ |
unsigned long int i; |
unsigned long int i; |
count_leading_zeros (i, n2_h); |
count_leading_zeros (i, n2_h); |
|
|
if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) |
if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l)) |
n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; |
n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l; |
|
|
SWAP_LIMB (n1_h, n2_h); |
MP_LIMB_T_SWAP (n1_h, n2_h); |
SWAP_LIMB (n1_l, n2_l); |
MP_LIMB_T_SWAP (n1_l, n2_l); |
} |
} |
|
|
return n2_l; |
return n2_l; |
|
|
|
|
mp_size_t |
mp_size_t |
#if __STDC__ |
#if __STDC__ |
mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize) |
mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize) |
#else |
#else |
mpn_gcd (gp, vp, vsize, up, usize) |
mpn_gcd (gp, up, usize, vp, vsize) |
mp_ptr gp; |
mp_ptr gp; |
mp_ptr vp; |
|
mp_size_t vsize; |
|
mp_ptr up; |
mp_ptr up; |
mp_size_t usize; |
mp_size_t usize; |
|
mp_ptr vp; |
|
mp_size_t vsize; |
#endif |
#endif |
{ |
{ |
mp_ptr orig_vp = vp; |
mp_ptr orig_vp = vp; |
Line 193 mpn_gcd (gp, vp, vsize, up, usize) |
|
Line 191 mpn_gcd (gp, vp, vsize, up, usize) |
|
|
|
TMP_MARK (marker); |
TMP_MARK (marker); |
|
|
/* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD. |
/* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD. |
Two EXTRA limbs for U and V are required for kary reduction. */ |
Two EXTRA limbs for U and V are required for kary reduction. */ |
if (vsize > ACCEL_THRESHOLD) |
if (vsize >= GCD_ACCEL_THRESHOLD) |
{ |
{ |
unsigned long int vbitsize, d; |
unsigned long int vbitsize, d; |
mp_ptr orig_up = up; |
mp_ptr orig_up = up; |
Line 228 mpn_gcd (gp, vp, vsize, up, usize) |
|
Line 226 mpn_gcd (gp, vp, vsize, up, usize) |
|
|
|
do /* Main loop. */ |
do /* Main loop. */ |
{ |
{ |
if (up[usize-1] & SIGN_BIT) /* U < 0; take twos' compl. */ |
/* mpn_com_n can't be used here because anchor_up and up may |
|
partially overlap */ |
|
if (up[usize-1] & MP_LIMB_T_HIGHBIT) /* U < 0; take twos' compl. */ |
{ |
{ |
mp_size_t i; |
mp_size_t i; |
anchor_up[0] = -up[0]; |
anchor_up[0] = -up[0]; |
Line 241 mpn_gcd (gp, vp, vsize, up, usize) |
|
Line 241 mpn_gcd (gp, vp, vsize, up, usize) |
|
|
|
if ((up[0] & 1) == 0) /* Result even; remove twos. */ |
if ((up[0] & 1) == 0) /* Result even; remove twos. */ |
{ |
{ |
unsigned long int r; |
unsigned int r; |
count_trailing_zeros (r, up[0]); |
count_trailing_zeros (r, up[0]); |
mpn_rshift (anchor_up, up, usize, r); |
mpn_rshift (anchor_up, up, usize, r); |
usize -= (anchor_up[usize-1] == 0); |
usize -= (anchor_up[usize-1] == 0); |
} |
} |
else if (anchor_up != up) |
else if (anchor_up != up) |
MPN_COPY (anchor_up, up, usize); |
MPN_COPY_INCR (anchor_up, up, usize); |
|
|
SWAP_MPN (anchor_up, usize, vp, vsize); |
MPN_PTR_SWAP (anchor_up,usize, vp,vsize); |
up = anchor_up; |
up = anchor_up; |
|
|
if (vsize <= 2) /* Kary can't handle < 2 limbs and */ |
if (vsize <= 2) /* Kary can't handle < 2 limbs and */ |
Line 271 mpn_gcd (gp, vp, vsize, up, usize) |
|
Line 271 mpn_gcd (gp, vp, vsize, up, usize) |
|
mp_limb_t bp[2], cp[2]; |
mp_limb_t bp[2], cp[2]; |
|
|
/* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */ |
/* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */ |
cp[0] = vp[0], cp[1] = vp[1]; |
{ |
mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB); |
mp_limb_t u_inv, hi, lo; |
|
modlimb_invert (u_inv, up[0]); |
|
cp[0] = vp[0] * u_inv; |
|
umul_ppmm (hi, lo, cp[0], up[0]); |
|
cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv; |
|
} |
|
|
/* U <-- find_a (C) * U. */ |
/* U <-- find_a (C) * U. */ |
up[usize] = mpn_mul_1 (up, up, usize, find_a (cp)); |
up[usize] = mpn_mul_1 (up, up, usize, find_a (cp)); |
Line 280 mpn_gcd (gp, vp, vsize, up, usize) |
|
Line 285 mpn_gcd (gp, vp, vsize, up, usize) |
|
|
|
/* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1). |
/* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1). |
bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and |
bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and |
bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */ |
bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 |
bp[0] = up[0], bp[1] = up[1]; |
|
mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB); |
|
bp[1] &= 1; /* Since V is odd, division is unnecessary. */ |
|
|
|
|
Like V/U above, but simplified because only the low bit of |
|
bp[1] is wanted. */ |
|
{ |
|
mp_limb_t v_inv, hi, lo; |
|
modlimb_invert (v_inv, vp[0]); |
|
bp[0] = up[0] * v_inv; |
|
umul_ppmm (hi, lo, bp[0], vp[0]); |
|
bp[1] = (up[1] + hi + (bp[0]&vp[1])) & 1; |
|
} |
|
|
up[usize++] = 0; |
up[usize++] = 0; |
if (bp[1]) /* B < 0: U <-- U + (-B) * V. */ |
if (bp[1]) /* B < 0: U <-- U + (-B) * V. */ |
{ |
{ |
Line 342 mpn_gcd (gp, vp, vsize, up, usize) |
|
Line 354 mpn_gcd (gp, vp, vsize, up, usize) |
|
up += 1, usize -= 1; |
up += 1, usize -= 1; |
if ((up[0] & 1) == 0) |
if ((up[0] & 1) == 0) |
{ |
{ |
unsigned long int r; |
unsigned int r; |
count_trailing_zeros (r, up[0]); |
count_trailing_zeros (r, up[0]); |
mpn_rshift (up, up, usize, r); |
mpn_rshift (up, up, usize, r); |
usize -= (up[usize-1] == 0); |
usize -= (up[usize-1] == 0); |
Line 350 mpn_gcd (gp, vp, vsize, up, usize) |
|
Line 362 mpn_gcd (gp, vp, vsize, up, usize) |
|
|
|
/* Keep usize >= vsize. */ |
/* Keep usize >= vsize. */ |
if (usize < vsize) |
if (usize < vsize) |
SWAP_MPN (up, usize, vp, vsize); |
MPN_PTR_SWAP (up, usize, vp, vsize); |
|
|
if (usize <= 2) /* Double precision. */ |
if (usize <= 2) /* Double precision. */ |
{ |
{ |
Line 375 mpn_gcd (gp, vp, vsize, up, usize) |
|
Line 387 mpn_gcd (gp, vp, vsize, up, usize) |
|
size--; |
size--; |
while (up[size] == vp[size]); |
while (up[size] == vp[size]); |
if (up[size] < vp[size]) /* usize == vsize. */ |
if (up[size] < vp[size]) /* usize == vsize. */ |
SWAP_PTR (up, vp); |
MP_PTR_SWAP (up, vp); |
up += zeros, usize = size + 1 - zeros; |
up += zeros, usize = size + 1 - zeros; |
mpn_sub_n (up, up, vp + zeros, usize); |
mpn_sub_n (up, up, vp + zeros, usize); |
} |
} |