OpenXM_contrib/gmp/mpn/generic/gcd.c - diff

Return to gcd.c CVS log

Up to [local] / OpenXM_contrib / gmp / mpn / generic

Diff for /OpenXM_contrib/gmp/mpn/generic/Attic/gcd.c between version 1.1 and 1.1.1.3

version 1.1, 2000/01/10 15:35:23

version 1.1.1.3, 2003/08/25 16:06:20

Line 1

/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.

Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify

it under the terms of the GNU Library General Public License as published by

it under the terms of the GNU Lesser General Public License as published by

the Free Software Foundation; either version 2 of the License, or (at your

the Free Software Foundation; either version 2.1 of the License, or (at your

option) any later version.

The GNU MP Library is distributed in the hope that it will be useful, but

WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY

or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public

or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public

License for more details.

You should have received a copy of the GNU Library General Public License

You should have received a copy of the GNU Lesser General Public License

along with the GNU MP Library; see the file COPYING.LIB. If not, write to

the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,

MA 02111-1307, USA. */

Line 22 MA 02111-1307, USA. */

Line 23 MA 02111-1307, USA. */

/* Integer greatest common divisor of two unsigned integers, using

the accelerated algorithm (see reference below).

mp_size_t mpn_gcd (vp, vsize, up, usize).

mp_size_t mpn_gcd (up, usize, vp, vsize).

Preconditions [U = (up, usize) and V = (vp, vsize)]:

Line 47 MA 02111-1307, USA. */

Line 48 MA 02111-1307, USA. */

#include "gmp-impl.h"

#include "longlong.h"

/* If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is

/* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated

used, otherwise the binary algorithm is used. This may be adjusted for

algorithm is used, otherwise the binary algorithm is used. This may be

different architectures. */

adjusted for different architectures. */

#ifndef ACCEL_THRESHOLD

#ifndef GCD_ACCEL_THRESHOLD

#define ACCEL_THRESHOLD 4

#define GCD_ACCEL_THRESHOLD 5

#endif

/* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated

algorithm reduces using the bmod operation. Otherwise, the k-ary reduction

is used. 0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB. */

is used. 0 <= BMOD_THRESHOLD < GMP_NUMB_BITS. */

enum

{

BMOD_THRESHOLD = BITS_PER_MP_LIMB/2

BMOD_THRESHOLD = GMP_NUMB_BITS/2

};

#define SIGN_BIT (~(~(mp_limb_t)0 >> 1))

#define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0)

#define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0)

#define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0)

#define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0)

/* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.

Both U and V must be odd. */

static __gmp_inline mp_size_t

static inline mp_size_t

#if __STDC__

gcd_2 (mp_ptr vp, mp_srcptr up)

#else

gcd_2 (vp, up)

mp_ptr vp;

mp_srcptr up;

#endif

{

mp_limb_t u0, u1, v0, v1;

mp_size_t vsize;

u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];

u0 = up[0];

u1 = up[1];

v0 = vp[0];

v1 = vp[1];

while (u1 != v1 && u0 != v0)

{

unsigned long int r;

if (u1 > v1)

{

u1 -= v1 + (u0 < v0), u0 -= v0;

u1 -= v1 + (u0 < v0);

u0 = (u0 - v0) & GMP_NUMB_MASK;

count_trailing_zeros (r, u0);

u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;

u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);

u1 >>= r;

}

else /* u1 < v1. */

{

v1 -= u1 + (v0 < u0), v0 -= u0;

v1 -= u1 + (v0 < u0);

v0 = (v0 - u0) & GMP_NUMB_MASK;

count_trailing_zeros (r, v0);

v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;

v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);

v1 >>= r;

}

Line 117 gcd_2 (vp, up)

Line 110 gcd_2 (vp, up)

return 1;

}

/* The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists

/* The function find_a finds 0 < N < 2^GMP_NUMB_BITS such that there exists

0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).

0 < |D| < 2^GMP_NUMB_BITS, and N == D * C mod 2^(2*GMP_NUMB_BITS).

In the reference article, D was computed along with N, but it is better to

compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating

compute D separately as D <-- N / C mod 2^(GMP_NUMB_BITS + 1), treating

the result as a twos' complement signed integer.

Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB). According to the reference

Initialize N1 to C mod 2^(2*GMP_NUMB_BITS). According to the reference

article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use

article, N2 should be initialized to 2^(2*GMP_NUMB_BITS), but we use

2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double

2^(2*GMP_NUMB_BITS) - N1 to start the calculations within double

precision. If N2 > N1 initially, the first iteration of the while loop

will swap them. In all other situations, N1 >= N2 is maintained. */

static __gmp_inline mp_limb_t

#if HAVE_NATIVE_mpn_gcd_finda

#if __STDC__

#define find_a(cp) mpn_gcd_finda (cp)

find_a (mp_srcptr cp)

#else

find_a (cp)

static

mp_srcptr cp;

#if ! defined (__i386__)

inline /* don't inline this for the x86 */

#endif

mp_limb_t

find_a (mp_srcptr cp)

{

unsigned long int leading_zero_bits = 0;

mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l. */

mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^GMP_NUMB_BITS + n1_l. */

mp_limb_t n1_h = cp[1];

mp_limb_t n2_l = -n1_l; /* N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l. */

mp_limb_t n2_l = (-n1_l & GMP_NUMB_MASK); /* N2 == n2_h * 2^GMP_NUMB_BITS + n2_l. */

mp_limb_t n2_h = ~n1_h;

mp_limb_t n2_h = (~n1_h & GMP_NUMB_MASK);

/* Main loop. */

while (n2_h) /* While N2 >= 2^BITS_PER_MP_LIMB. */

while (n2_h != 0) /* While N2 >= 2^GMP_NUMB_BITS. */

{

/* N1 <-- N1 % N2. */

if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0)

if (((GMP_NUMB_HIGHBIT >> leading_zero_bits) & n2_h) == 0)

{

unsigned long int i;

count_leading_zeros (i, n2_h);

i -= leading_zero_bits, leading_zero_bits += i;

i -= GMP_NAIL_BITS;

n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;

i -= leading_zero_bits;

leading_zero_bits += i;

n2_h = ((n2_h << i) & GMP_NUMB_MASK) | (n2_l >> (GMP_NUMB_BITS - i));

n2_l = (n2_l << i) & GMP_NUMB_MASK;

{

if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))

n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;

{

n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;

n1_h -= n2_h + (n1_l < n2_l);

n1_l = (n1_l - n2_l) & GMP_NUMB_MASK;

}

n2_l = (n2_l >> 1) | ((n2_h << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK);

n2_h >>= 1;

i -= 1;

}

while (i);

while (i != 0);

}

if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))

n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;

{

n1_h -= n2_h + (n1_l < n2_l);

n1_l = (n1_l - n2_l) & GMP_NUMB_MASK;

}

SWAP_LIMB (n1_h, n2_h);

MP_LIMB_T_SWAP (n1_h, n2_h);

SWAP_LIMB (n1_l, n2_l);

MP_LIMB_T_SWAP (n1_l, n2_l);

}

return n2_l;

}

#endif

mp_size_t

#if __STDC__

mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize)

mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize)

#else

mpn_gcd (gp, vp, vsize, up, usize)

mp_ptr gp;

mp_ptr vp;

mp_size_t vsize;

mp_ptr up;

mp_size_t usize;

#endif

{

mp_ptr orig_vp = vp;

mp_size_t orig_vsize = vsize;

int binary_gcd_ctr; /* Number of times binary gcd will execute. */

TMP_DECL (marker);

ASSERT (usize >= 1);

ASSERT (vsize >= 1);

ASSERT (usize >= vsize);

ASSERT (vp[0] & 1);

ASSERT (up[usize - 1] != 0);

ASSERT (vp[vsize - 1] != 0);

#if WANT_ASSERT

if (usize == vsize)

{

int uzeros, vzeros;

count_leading_zeros (uzeros, up[usize - 1]);

count_leading_zeros (vzeros, vp[vsize - 1]);

ASSERT (uzeros <= vzeros);

}

#endif

ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));

ASSERT (MPN_SAME_OR_SEPARATE2_P (gp, vsize, up, usize));

ASSERT (MPN_SAME_OR_SEPARATE2_P (gp, vsize, vp, vsize));

TMP_MARK (marker);

/* Use accelerated algorithm if vsize is over ACCEL_THRESHOLD.

/* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD.

Two EXTRA limbs for U and V are required for kary reduction. */

if (vsize > ACCEL_THRESHOLD)

if (vsize >= GCD_ACCEL_THRESHOLD)

{

unsigned long int vbitsize, d;

mp_ptr orig_up = up;

Line 205 mpn_gcd (gp, vp, vsize, up, usize)

Line 223 mpn_gcd (gp, vp, vsize, up, usize)

MPN_COPY (anchor_up, orig_up, usize);

up = anchor_up;

count_leading_zeros (d, up[usize-1]);

count_leading_zeros (d, up[usize - 1]);

d = usize * BITS_PER_MP_LIMB - d;

d -= GMP_NAIL_BITS;

count_leading_zeros (vbitsize, vp[vsize-1]);

d = usize * GMP_NUMB_BITS - d;

vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;

count_leading_zeros (vbitsize, vp[vsize - 1]);

vbitsize -= GMP_NAIL_BITS;

vbitsize = vsize * GMP_NUMB_BITS - vbitsize;

ASSERT (d >= vbitsize);

d = d - vbitsize + 1;

/* Use bmod reduction to quickly discover whether V divides U. */

Line 216 mpn_gcd (gp, vp, vsize, up, usize)

Line 237 mpn_gcd (gp, vp, vsize, up, usize)

mpn_bdivmod (up, up, usize, vp, vsize, d);

/* Now skip U/V mod 2^d and any low zero limbs. */

d /= BITS_PER_MP_LIMB, up += d, usize -= d;

d /= GMP_NUMB_BITS, up += d, usize -= d;

while (usize != 0 && up[0] == 0)

up++, usize--;

Line 228 mpn_gcd (gp, vp, vsize, up, usize)

Line 249 mpn_gcd (gp, vp, vsize, up, usize)

do /* Main loop. */

{

if (up[usize-1] & SIGN_BIT) /* U < 0; take twos' compl. */

/* mpn_com_n can't be used here because anchor_up and up may

partially overlap */

if ((up[usize - 1] & GMP_NUMB_HIGHBIT) != 0) /* U < 0; take twos' compl. */

{

mp_size_t i;

anchor_up[0] = -up[0];

anchor_up[0] = -up[0] & GMP_NUMB_MASK;

for (i = 1; i < usize; i++)

anchor_up[i] = ~up[i];

anchor_up[i] = (~up[i] & GMP_NUMB_MASK);

up = anchor_up;

}

Line 241 mpn_gcd (gp, vp, vsize, up, usize)

Line 264 mpn_gcd (gp, vp, vsize, up, usize)

if ((up[0] & 1) == 0) /* Result even; remove twos. */

{

unsigned long int r;

unsigned int r;

count_trailing_zeros (r, up[0]);

mpn_rshift (anchor_up, up, usize, r);

usize -= (anchor_up[usize-1] == 0);

usize -= (anchor_up[usize - 1] == 0);

}

else if (anchor_up != up)

MPN_COPY (anchor_up, up, usize);

MPN_COPY_INCR (anchor_up, up, usize);

SWAP_MPN (anchor_up, usize, vp, vsize);

MPN_PTR_SWAP (anchor_up,usize, vp,vsize);

up = anchor_up;

if (vsize <= 2) /* Kary can't handle < 2 limbs and */

break; /* isn't efficient for == 2 limbs. */

d = vbitsize;

count_leading_zeros (vbitsize, vp[vsize-1]);

count_leading_zeros (vbitsize, vp[vsize - 1]);

vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;

vbitsize -= GMP_NAIL_BITS;

vbitsize = vsize * GMP_NUMB_BITS - vbitsize;

d = d - vbitsize + 1;

if (d > BMOD_THRESHOLD) /* Bmod reduction. */

{

up[usize++] = 0;

mpn_bdivmod (up, up, usize, vp, vsize, d);

d /= BITS_PER_MP_LIMB, up += d, usize -= d;

d /= GMP_NUMB_BITS, up += d, usize -= d;

}

else /* Kary reduction. */

{

mp_limb_t bp[2], cp[2];

/* C <-- V/U mod 2^(2*BITS_PER_MP_LIMB). */

/* C <-- V/U mod 2^(2*GMP_NUMB_BITS). */

cp[0] = vp[0], cp[1] = vp[1];

{

mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB);

mp_limb_t u_inv, hi, lo;

modlimb_invert (u_inv, up[0]);

cp[0] = (vp[0] * u_inv) & GMP_NUMB_MASK;

umul_ppmm (hi, lo, cp[0], up[0] << GMP_NAIL_BITS);

lo >>= GMP_NAIL_BITS;

cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv & GMP_NUMB_MASK;

}

/* U <-- find_a (C) * U. */

up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));

usize++;

/* B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).

/* B <-- A/C == U/V mod 2^(GMP_NUMB_BITS + 1).

bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and

bp[0] <-- U/V mod 2^GMP_NUMB_BITS and

bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2 */

bp[1] <-- ( (U - bp[0] * V)/2^GMP_NUMB_BITS ) / V mod 2

bp[0] = up[0], bp[1] = up[1];

mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB);

bp[1] &= 1; /* Since V is odd, division is unnecessary. */

Like V/U above, but simplified because only the low bit of

bp[1] is wanted. */

{

mp_limb_t v_inv, hi, lo;

modlimb_invert (v_inv, vp[0]);

bp[0] = (up[0] * v_inv) & GMP_NUMB_MASK;

umul_ppmm (hi, lo, bp[0], vp[0] << GMP_NAIL_BITS);

lo >>= GMP_NAIL_BITS;

bp[1] = (up[1] + hi + (bp[0] & vp[1])) & 1;

}

up[usize++] = 0;

if (bp[1]) /* B < 0: U <-- U + (-B) * V. */

if (bp[1] != 0) /* B < 0: U <-- U + (-B) * V. */

{

mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);

mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0] & GMP_NUMB_MASK);

mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);

}

else /* B >= 0: U <-- U - B * V. */

Line 304 mpn_gcd (gp, vp, vsize, up, usize)

Line 342 mpn_gcd (gp, vp, vsize, up, usize)

while (usize != 0 && up[0] == 0)

up++, usize--;

}

while (usize);

while (usize != 0);

/* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */

up = orig_up, usize = orig_usize;

Line 319 mpn_gcd (gp, vp, vsize, up, usize)

Line 357 mpn_gcd (gp, vp, vsize, up, usize)

if (usize > 2) /* First make U close to V in size. */

{

unsigned long int vbitsize, d;

count_leading_zeros (d, up[usize-1]);

count_leading_zeros (d, up[usize - 1]);

d = usize * BITS_PER_MP_LIMB - d;

d -= GMP_NAIL_BITS;

count_leading_zeros (vbitsize, vp[vsize-1]);

d = usize * GMP_NUMB_BITS - d;

vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;

count_leading_zeros (vbitsize, vp[vsize - 1]);

vbitsize -= GMP_NAIL_BITS;

vbitsize = vsize * GMP_NUMB_BITS - vbitsize;

d = d - vbitsize - 1;

if (d != -(unsigned long int)1 && d > 2)

{

mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */

d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;

d /= (unsigned long int)GMP_NUMB_BITS, up += d, usize -= d;

}

Line 342 mpn_gcd (gp, vp, vsize, up, usize)

Line 382 mpn_gcd (gp, vp, vsize, up, usize)

up += 1, usize -= 1;

if ((up[0] & 1) == 0)

{

unsigned long int r;

unsigned int r;

count_trailing_zeros (r, up[0]);

mpn_rshift (up, up, usize, r);

usize -= (up[usize-1] == 0);

usize -= (up[usize - 1] == 0);

}

/* Keep usize >= vsize. */

if (usize < vsize)

SWAP_MPN (up, usize, vp, vsize);

MPN_PTR_SWAP (up, usize, vp, vsize);

if (usize <= 2) /* Double precision. */

{

Line 375 mpn_gcd (gp, vp, vsize, up, usize)

Line 415 mpn_gcd (gp, vp, vsize, up, usize)

size--;

while (up[size] == vp[size]);

if (up[size] < vp[size]) /* usize == vsize. */

SWAP_PTR (up, vp);

MP_PTR_SWAP (up, vp);

up += zeros, usize = size + 1 - zeros;

mpn_sub_n (up, up, vp + zeros, usize);

}

Line 396 mpn_gcd (gp, vp, vsize, up, usize)

Line 436 mpn_gcd (gp, vp, vsize, up, usize)

done:

if (vp != gp)

MPN_COPY (gp, vp, vsize);

MPN_COPY_INCR (gp, vp, vsize);

TMP_FREE (marker);

return vsize;

}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>