version 1.1.1.1, 2000/01/10 15:35:23 |
version 1.1.1.2, 2000/09/09 14:12:25 |
|
|
/* mpn_gcdext -- Extended Greatest Common Divisor. |
/* mpn_gcdext -- Extended Greatest Common Divisor. |
|
|
Copyright (C) 1996 Free Software Foundation, Inc. |
Copyright (C) 1996, 1998, 2000 Free Software Foundation, Inc. |
|
|
This file is part of the GNU MP Library. |
This file is part of the GNU MP Library. |
|
|
The GNU MP Library is free software; you can redistribute it and/or modify |
The GNU MP Library is free software; you can redistribute it and/or modify |
it under the terms of the GNU Library General Public License as published by |
it under the terms of the GNU Lesser General Public License as published by |
the Free Software Foundation; either version 2 of the License, or (at your |
the Free Software Foundation; either version 2.1 of the License, or (at your |
option) any later version. |
option) any later version. |
|
|
The GNU MP Library is distributed in the hope that it will be useful, but |
The GNU MP Library is distributed in the hope that it will be useful, but |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
License for more details. |
License for more details. |
|
|
You should have received a copy of the GNU Library General Public License |
You should have received a copy of the GNU Lesser General Public License |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
along with the GNU MP Library; see the file COPYING.LIB. If not, write to |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
MA 02111-1307, USA. */ |
MA 02111-1307, USA. */ |
Line 23 MA 02111-1307, USA. */ |
|
Line 23 MA 02111-1307, USA. */ |
|
#include "gmp-impl.h" |
#include "gmp-impl.h" |
#include "longlong.h" |
#include "longlong.h" |
|
|
|
#ifndef GCDEXT_THRESHOLD |
|
#define GCDEXT_THRESHOLD 17 |
|
#endif |
|
|
#ifndef EXTEND |
#ifndef EXTEND |
#define EXTEND 1 |
#define EXTEND 1 |
#endif |
#endif |
Line 31 MA 02111-1307, USA. */ |
|
Line 35 MA 02111-1307, USA. */ |
|
int arr[BITS_PER_MP_LIMB]; |
int arr[BITS_PER_MP_LIMB]; |
#endif |
#endif |
|
|
#define SGN(A) (((A) < 0) ? -1 : ((A) > 0)) |
|
|
|
/* Idea 1: After we have performed a full division, don't shift operands back, |
/* mpn_gcdext (GP, SP, SSIZE, UP, USIZE, VP, VSIZE) |
|
|
|
Compute the extended GCD of {UP,USIZE} and {VP,VSIZE} and store the |
|
greatest common divisor at GP (unless it is 0), and the first cofactor at |
|
SP. Write the size of the cofactor through the pointer SSIZE. Return the |
|
size of the value at GP. Note that SP might be a negative number; this is |
|
denoted by storing the negative of the size through SSIZE. |
|
|
|
{UP,USIZE} and {VP,VSIZE} are both clobbered. |
|
|
|
The space allocation for all four areas needs to be USIZE+1. |
|
|
|
Preconditions: 1) U >= V. |
|
2) V > 0. */ |
|
|
|
/* We use Lehmer's algorithm. The idea is to extract the most significant |
|
bits of the operands, and compute the continued fraction for them. We then |
|
apply the gathered cofactors to the full operands. |
|
|
|
Idea 1: After we have performed a full division, don't shift operands back, |
but instead account for the extra factors-of-2 thus introduced. |
but instead account for the extra factors-of-2 thus introduced. |
Idea 2: Simple generalization to use divide-and-conquer would give us an |
Idea 2: Simple generalization to use divide-and-conquer would give us an |
algorithm that runs faster than O(n^2). |
algorithm that runs faster than O(n^2). |
Idea 3: The input numbers need less space as the computation progresses, |
Idea 3: The input numbers need less space as the computation progresses, |
while the s0 and s1 variables need more space. To save space, we |
while the s0 and s1 variables need more space. To save memory, we |
could make them share space, and have the latter variables grow |
could make them share space, and have the latter variables grow |
into the former. */ |
into the former. |
|
Idea 4: We should not do double-limb arithmetic from the start. Instead, |
|
do things in single-limb arithmetic until the quotients differ, |
|
and then switch to double-limb arithmetic. */ |
|
|
/* Precondition: U >= V. */ |
|
|
|
|
/* Division optimized for small quotients. If the quotient is more than one limb, |
|
store 1 in *qh and return 0. */ |
|
static mp_limb_t |
|
#if __STDC__ |
|
div2 (mp_limb_t *qh, mp_limb_t n1, mp_limb_t n0, mp_limb_t d1, mp_limb_t d0) |
|
#else |
|
div2 (qh, n1, n0, d1, d0) |
|
mp_limb_t *qh; |
|
mp_limb_t n1; |
|
mp_limb_t n0; |
|
mp_limb_t d1; |
|
mp_limb_t d0; |
|
#endif |
|
{ |
|
if (d1 == 0) |
|
{ |
|
*qh = 1; |
|
return 0; |
|
} |
|
|
|
if ((mp_limb_signed_t) n1 < 0) |
|
{ |
|
mp_limb_t q; |
|
int cnt; |
|
for (cnt = 1; (mp_limb_signed_t) d1 >= 0; cnt++) |
|
{ |
|
d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1)); |
|
d0 = d0 << 1; |
|
} |
|
|
|
q = 0; |
|
while (cnt) |
|
{ |
|
q <<= 1; |
|
if (n1 > d1 || (n1 == d1 && n0 >= d0)) |
|
{ |
|
sub_ddmmss (n1, n0, n1, n0, d1, d0); |
|
q |= 1; |
|
} |
|
d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1); |
|
d1 = d1 >> 1; |
|
cnt--; |
|
} |
|
|
|
*qh = 0; |
|
return q; |
|
} |
|
else |
|
{ |
|
mp_limb_t q; |
|
int cnt; |
|
for (cnt = 0; n1 > d1 || (n1 == d1 && n0 >= d0); cnt++) |
|
{ |
|
d1 = (d1 << 1) | (d0 >> (BITS_PER_MP_LIMB - 1)); |
|
d0 = d0 << 1; |
|
} |
|
|
|
q = 0; |
|
while (cnt) |
|
{ |
|
d0 = (d1 << (BITS_PER_MP_LIMB - 1)) | (d0 >> 1); |
|
d1 = d1 >> 1; |
|
q <<= 1; |
|
if (n1 > d1 || (n1 == d1 && n0 >= d0)) |
|
{ |
|
sub_ddmmss (n1, n0, n1, n0, d1, d0); |
|
q |= 1; |
|
} |
|
cnt--; |
|
} |
|
|
|
*qh = 0; |
|
return q; |
|
} |
|
} |
|
|
mp_size_t |
mp_size_t |
#if EXTEND |
#if EXTEND |
#if __STDC__ |
#if __STDC__ |
mpn_gcdext (mp_ptr gp, mp_ptr s0p, |
mpn_gcdext (mp_ptr gp, mp_ptr s0p, mp_size_t *s0size, |
mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) |
mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize) |
#else |
#else |
mpn_gcdext (gp, s0p, up, size, vp, vsize) |
mpn_gcdext (gp, s0p, s0size, up, size, vp, vsize) |
mp_ptr gp; |
mp_ptr gp; |
mp_ptr s0p; |
mp_ptr s0p; |
|
mp_size_t *s0size; |
mp_ptr up; |
mp_ptr up; |
mp_size_t size; |
mp_size_t size; |
mp_ptr vp; |
mp_ptr vp; |
Line 72 mpn_gcd (gp, up, size, vp, vsize) |
|
Line 173 mpn_gcd (gp, up, size, vp, vsize) |
|
#endif |
#endif |
#endif |
#endif |
{ |
{ |
mp_limb_t uh, vh; |
mp_limb_t A, B, C, D; |
mp_limb_signed_t A, B, C, D; |
|
int cnt; |
int cnt; |
mp_ptr tp, wp; |
mp_ptr tp, wp; |
#if RECORD |
#if RECORD |
mp_limb_signed_t min = 0, max = 0; |
mp_limb_t max = 0; |
#endif |
#endif |
#if EXTEND |
#if EXTEND |
mp_ptr s1p; |
mp_ptr s1p; |
mp_ptr orig_s0p = s0p; |
mp_ptr orig_s0p = s0p; |
mp_size_t ssize, orig_size = size; |
mp_size_t ssize; |
|
int sign = 1; |
|
#endif |
|
int use_double_flag; |
TMP_DECL (mark); |
TMP_DECL (mark); |
|
|
TMP_MARK (mark); |
TMP_MARK (mark); |
|
|
|
use_double_flag = (size >= GCDEXT_THRESHOLD); |
|
|
tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); |
tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); |
wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); |
wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); |
s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB); |
#if EXTEND |
|
s1p = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB); |
|
|
MPN_ZERO (s0p, size); |
MPN_ZERO (s0p, size); |
MPN_ZERO (s1p, size); |
MPN_ZERO (s1p, size); |
Line 117 mpn_gcd (gp, up, size, vp, vsize) |
|
Line 223 mpn_gcd (gp, up, size, vp, vsize) |
|
/* This is really what it boils down to in this case... */ |
/* This is really what it boils down to in this case... */ |
s0p[0] = 0; |
s0p[0] = 0; |
s1p[0] = 1; |
s1p[0] = 1; |
|
sign = -sign; |
#endif |
#endif |
size = vsize; |
size = vsize; |
if (cnt != 0) |
if (cnt != 0) |
Line 124 mpn_gcd (gp, up, size, vp, vsize) |
|
Line 231 mpn_gcd (gp, up, size, vp, vsize) |
|
mpn_rshift (up, up, size, cnt); |
mpn_rshift (up, up, size, cnt); |
mpn_rshift (vp, vp, size, cnt); |
mpn_rshift (vp, vp, size, cnt); |
} |
} |
{ |
MP_PTR_SWAP (up, vp); |
mp_ptr xp; |
|
xp = up; up = vp; vp = xp; |
|
} |
|
} |
} |
|
|
for (;;) |
for (;;) |
{ |
{ |
|
mp_limb_t asign; |
/* Figure out exact size of V. */ |
/* Figure out exact size of V. */ |
vsize = size; |
vsize = size; |
MPN_NORMALIZE (vp, vsize); |
MPN_NORMALIZE (vp, vsize); |
if (vsize <= 1) |
if (vsize <= 1) |
break; |
break; |
|
|
/* Make UH be the most significant limb of U, and make VH be |
if (use_double_flag) |
corresponding bits from V. */ |
|
uh = up[size - 1]; |
|
vh = vp[size - 1]; |
|
count_leading_zeros (cnt, uh); |
|
if (cnt != 0) |
|
{ |
{ |
uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt)); |
mp_limb_t uh, vh, ul, vl; |
vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt)); |
/* Let UH,UL be the most significant limbs of U, and let VH,VL be |
} |
the corresponding bits from V. */ |
|
uh = up[size - 1]; |
|
vh = vp[size - 1]; |
|
ul = up[size - 2]; |
|
vl = vp[size - 2]; |
|
count_leading_zeros (cnt, uh); |
|
if (cnt != 0) |
|
{ |
|
uh = (uh << cnt) | (ul >> (BITS_PER_MP_LIMB - cnt)); |
|
vh = (vh << cnt) | (vl >> (BITS_PER_MP_LIMB - cnt)); |
|
vl <<= cnt; |
|
ul <<= cnt; |
|
if (size >= 3) |
|
{ |
|
ul |= (up[size - 3] >> (BITS_PER_MP_LIMB - cnt)); |
|
vl |= (vp[size - 3] >> (BITS_PER_MP_LIMB - cnt)); |
|
} |
|
} |
|
|
#if 0 |
A = 1; |
/* For now, only handle BITS_PER_MP_LIMB-1 bits. This makes |
B = 0; |
room for sign bit. */ |
C = 0; |
uh >>= 1; |
D = 1; |
vh >>= 1; |
|
#endif |
|
A = 1; |
|
B = 0; |
|
C = 0; |
|
D = 1; |
|
|
|
for (;;) |
asign = 0; |
|
for (;;) |
|
{ |
|
mp_limb_t T; |
|
mp_limb_t qh, q1, q2; |
|
mp_limb_t nh, nl, dh, dl; |
|
mp_limb_t t1, t0; |
|
mp_limb_t Th, Tl; |
|
|
|
sub_ddmmss (dh, dl, vh, vl, 0, C); |
|
if ((dl | dh) == 0) |
|
break; |
|
add_ssaaaa (nh, nl, uh, ul, 0, A); |
|
q1 = div2 (&qh, nh, nl, dh, dl); |
|
if (qh != 0) |
|
break; /* could handle this */ |
|
|
|
add_ssaaaa (dh, dl, vh, vl, 0, D); |
|
if ((dl | dh) == 0) |
|
break; |
|
sub_ddmmss (nh, nl, uh, ul, 0, B); |
|
q2 = div2 (&qh, nh, nl, dh, dl); |
|
if (qh != 0) |
|
break; /* could handle this */ |
|
|
|
if (q1 != q2) |
|
break; |
|
|
|
asign = ~asign; |
|
|
|
T = A + q1 * C; |
|
A = C; |
|
C = T; |
|
T = B + q1 * D; |
|
B = D; |
|
D = T; |
|
umul_ppmm (t1, t0, q1, vl); |
|
t1 += q1 * vh; |
|
sub_ddmmss (Th, Tl, uh, ul, t1, t0); |
|
uh = vh, ul = vl; |
|
vh = Th, vl = Tl; |
|
|
|
add_ssaaaa (dh, dl, vh, vl, 0, C); |
|
sub_ddmmss (nh, nl, uh, ul, 0, A); |
|
q1 = div2 (&qh, nh, nl, dh, dl); |
|
if (qh != 0) |
|
break; /* could handle this */ |
|
|
|
sub_ddmmss (dh, dl, vh, vl, 0, D); |
|
if ((dl | dh) == 0) |
|
break; |
|
add_ssaaaa (nh, nl, uh, ul, 0, B); |
|
q2 = div2 (&qh, nh, nl, dh, dl); |
|
if (qh != 0) |
|
break; /* could handle this */ |
|
|
|
if (q1 != q2) |
|
break; |
|
|
|
asign = ~asign; |
|
|
|
T = A + q1 * C; |
|
A = C; |
|
C = T; |
|
T = B + q1 * D; |
|
B = D; |
|
D = T; |
|
umul_ppmm (t1, t0, q1, vl); |
|
t1 += q1 * vh; |
|
sub_ddmmss (Th, Tl, uh, ul, t1, t0); |
|
uh = vh, ul = vl; |
|
vh = Th, vl = Tl; |
|
} |
|
#if EXTEND |
|
if (asign) |
|
sign = -sign; |
|
#endif |
|
} |
|
else /* Same, but using single-limb calculations. */ |
{ |
{ |
mp_limb_signed_t q, T; |
mp_limb_t uh, vh; |
if (vh + C == 0 || vh + D == 0) |
/* Make UH be the most significant limb of U, and make VH be |
break; |
corresponding bits from V. */ |
|
uh = up[size - 1]; |
|
vh = vp[size - 1]; |
|
count_leading_zeros (cnt, uh); |
|
if (cnt != 0) |
|
{ |
|
uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt)); |
|
vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt)); |
|
} |
|
|
q = (uh + A) / (vh + C); |
A = 1; |
if (q != (uh + B) / (vh + D)) |
B = 0; |
break; |
C = 0; |
|
D = 1; |
|
|
T = A - q * C; |
asign = 0; |
A = C; |
for (;;) |
C = T; |
{ |
T = B - q * D; |
mp_limb_t q, T; |
B = D; |
if (vh - C == 0 || vh + D == 0) |
D = T; |
break; |
T = uh - q * vh; |
|
uh = vh; |
q = (uh + A) / (vh - C); |
vh = T; |
if (q != (uh - B) / (vh + D)) |
|
break; |
|
|
|
asign = ~asign; |
|
|
|
T = A + q * C; |
|
A = C; |
|
C = T; |
|
T = B + q * D; |
|
B = D; |
|
D = T; |
|
T = uh - q * vh; |
|
uh = vh; |
|
vh = T; |
|
|
|
if (vh - D == 0) |
|
break; |
|
|
|
q = (uh - A) / (vh + C); |
|
if (q != (uh + B) / (vh - D)) |
|
break; |
|
|
|
asign = ~asign; |
|
|
|
T = A + q * C; |
|
A = C; |
|
C = T; |
|
T = B + q * D; |
|
B = D; |
|
D = T; |
|
T = uh - q * vh; |
|
uh = vh; |
|
vh = T; |
|
} |
|
#if EXTEND |
|
if (asign) |
|
sign = -sign; |
|
#endif |
} |
} |
|
|
#if RECORD |
#if RECORD |
min = MIN (A, min); min = MIN (B, min); |
|
min = MIN (C, min); min = MIN (D, min); |
|
max = MAX (A, max); max = MAX (B, max); |
max = MAX (A, max); max = MAX (B, max); |
max = MAX (C, max); max = MAX (D, max); |
max = MAX (C, max); max = MAX (D, max); |
#endif |
#endif |
Line 192 mpn_gcd (gp, up, size, vp, vsize) |
|
Line 425 mpn_gcd (gp, up, size, vp, vsize) |
|
{ |
{ |
mp_limb_t qh; |
mp_limb_t qh; |
mp_size_t i; |
mp_size_t i; |
|
|
/* This is quite rare. I.e., optimize something else! */ |
/* This is quite rare. I.e., optimize something else! */ |
|
|
/* Normalize V (and shift up U the same amount). */ |
/* Normalize V (and shift up U the same amount). */ |
Line 209 mpn_gcd (gp, up, size, vp, vsize) |
|
Line 441 mpn_gcd (gp, up, size, vp, vsize) |
|
qh = mpn_divmod (up + vsize, up, size, vp, vsize); |
qh = mpn_divmod (up + vsize, up, size, vp, vsize); |
#if EXTEND |
#if EXTEND |
MPN_COPY (tp, s0p, ssize); |
MPN_COPY (tp, s0p, ssize); |
for (i = 0; i < size - vsize; i++) |
|
{ |
|
mp_limb_t cy; |
|
cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]); |
|
if (cy != 0) |
|
tp[ssize++] = cy; |
|
} |
|
if (qh != 0) |
|
{ |
|
mp_limb_t cy; |
|
abort (); |
|
/* XXX since qh == 1, mpn_addmul_1 is overkill */ |
|
cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh); |
|
if (cy != 0) |
|
tp[ssize++] = cy; |
|
} |
|
#if 0 |
|
MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */ |
|
MPN_COPY (s1p, tp, ssize); |
|
#else |
|
{ |
{ |
mp_ptr xp; |
mp_size_t qsize; |
xp = s0p; s0p = s1p; s1p = xp; |
|
xp = s1p; s1p = tp; tp = xp; |
qsize = size - vsize; /* size of stored quotient from division */ |
|
if (ssize < qsize) |
|
{ |
|
MPN_ZERO (tp + ssize, qsize - ssize); |
|
MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */ |
|
for (i = 0; i < ssize; i++) |
|
{ |
|
mp_limb_t cy; |
|
cy = mpn_addmul_1 (tp + i, up + vsize, qsize, s1p[i]); |
|
tp[qsize + i] = cy; |
|
} |
|
if (qh != 0) |
|
{ |
|
mp_limb_t cy; |
|
cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize); |
|
if (cy != 0) |
|
abort (); |
|
} |
|
} |
|
else |
|
{ |
|
MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */ |
|
for (i = 0; i < qsize; i++) |
|
{ |
|
mp_limb_t cy; |
|
cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]); |
|
tp[ssize + i] = cy; |
|
} |
|
if (qh != 0) |
|
{ |
|
mp_limb_t cy; |
|
cy = mpn_add_n (tp + qsize, tp + qsize, s1p, ssize); |
|
if (cy != 0) |
|
{ |
|
tp[qsize + ssize] = cy; |
|
s1p[qsize + ssize] = 0; |
|
ssize++; |
|
} |
|
} |
|
} |
|
ssize += qsize; |
|
ssize -= tp[ssize - 1] == 0; |
} |
} |
|
|
|
sign = -sign; |
|
MP_PTR_SWAP (s0p, s1p); |
|
MP_PTR_SWAP (s1p, tp); |
#endif |
#endif |
#endif |
|
size = vsize; |
size = vsize; |
if (cnt != 0) |
if (cnt != 0) |
{ |
{ |
mpn_rshift (up, up, size, cnt); |
mpn_rshift (up, up, size, cnt); |
mpn_rshift (vp, vp, size, cnt); |
mpn_rshift (vp, vp, size, cnt); |
} |
} |
|
MP_PTR_SWAP (up, vp); |
{ |
|
mp_ptr xp; |
|
xp = up; up = vp; vp = xp; |
|
} |
|
MPN_NORMALIZE (up, size); |
|
} |
} |
else |
else |
{ |
{ |
|
#if EXTEND |
|
mp_size_t tsize, wsize; |
|
#endif |
/* T = U*A + V*B |
/* T = U*A + V*B |
W = U*C + V*D |
W = U*C + V*D |
U = T |
U = T |
V = W */ |
V = W */ |
|
|
if (SGN(A) == SGN(B)) /* should be different sign */ |
|
abort (); |
|
if (SGN(C) == SGN(D)) /* should be different sign */ |
|
abort (); |
|
#if STAT |
#if STAT |
{ mp_limb_t x; |
{ mp_limb_t x; x = A | B | C | D; count_leading_zeros (cnt, x); |
x = ABS (A) | ABS (B) | ABS (C) | ABS (D); |
arr[BITS_PER_MP_LIMB - cnt]++; } |
count_leading_zeros (cnt, x); |
|
arr[BITS_PER_MP_LIMB - cnt]++; } |
|
#endif |
#endif |
if (A == 0) |
if (A == 0) |
{ |
{ |
if (B != 1) abort (); |
/* B == 1 and C == 1 (D is arbitrary) */ |
|
mp_limb_t cy; |
MPN_COPY (tp, vp, size); |
MPN_COPY (tp, vp, size); |
} |
MPN_COPY (wp, up, size); |
else |
mpn_submul_1 (wp, vp, size, D); |
{ |
MP_PTR_SWAP (tp, up); |
if (A < 0) |
MP_PTR_SWAP (wp, vp); |
{ |
|
mpn_mul_1 (tp, vp, size, B); |
|
mpn_submul_1 (tp, up, size, -A); |
|
} |
|
else |
|
{ |
|
mpn_mul_1 (tp, up, size, A); |
|
mpn_submul_1 (tp, vp, size, -B); |
|
} |
|
} |
|
if (C < 0) |
|
{ |
|
mpn_mul_1 (wp, vp, size, D); |
|
mpn_submul_1 (wp, up, size, -C); |
|
} |
|
else |
|
{ |
|
mpn_mul_1 (wp, up, size, C); |
|
mpn_submul_1 (wp, vp, size, -D); |
|
} |
|
|
|
{ |
|
mp_ptr xp; |
|
xp = tp; tp = up; up = xp; |
|
xp = wp; wp = vp; vp = xp; |
|
} |
|
|
|
#if EXTEND |
#if EXTEND |
{ mp_limb_t cy; |
|
MPN_ZERO (tp, orig_size); |
|
if (A == 0) |
|
{ |
|
if (B != 1) abort (); |
|
MPN_COPY (tp, s1p, ssize); |
MPN_COPY (tp, s1p, ssize); |
|
tsize = ssize; |
|
tp[ssize] = 0; /* must zero since wp might spill below */ |
|
MPN_COPY (wp, s0p, ssize); |
|
cy = mpn_addmul_1 (wp, s1p, ssize, D); |
|
wp[ssize] = cy; |
|
wsize = ssize + (cy != 0); |
|
MP_PTR_SWAP (tp, s0p); |
|
MP_PTR_SWAP (wp, s1p); |
|
ssize = MAX (wsize, tsize); |
|
#endif |
} |
} |
else |
else |
{ |
{ |
if (A < 0) |
if (asign) |
{ |
{ |
|
mp_limb_t cy; |
|
mpn_mul_1 (tp, vp, size, B); |
|
mpn_submul_1 (tp, up, size, A); |
|
mpn_mul_1 (wp, up, size, C); |
|
mpn_submul_1 (wp, vp, size, D); |
|
MP_PTR_SWAP (tp, up); |
|
MP_PTR_SWAP (wp, vp); |
|
#if EXTEND |
cy = mpn_mul_1 (tp, s1p, ssize, B); |
cy = mpn_mul_1 (tp, s1p, ssize, B); |
cy += mpn_addmul_1 (tp, s0p, ssize, -A); |
cy += mpn_addmul_1 (tp, s0p, ssize, A); |
|
tp[ssize] = cy; |
|
tsize = ssize + (cy != 0); |
|
cy = mpn_mul_1 (wp, s0p, ssize, C); |
|
cy += mpn_addmul_1 (wp, s1p, ssize, D); |
|
wp[ssize] = cy; |
|
wsize = ssize + (cy != 0); |
|
MP_PTR_SWAP (tp, s0p); |
|
MP_PTR_SWAP (wp, s1p); |
|
ssize = MAX (wsize, tsize); |
|
#endif |
} |
} |
else |
else |
{ |
{ |
|
mp_limb_t cy; |
|
mpn_mul_1 (tp, up, size, A); |
|
mpn_submul_1 (tp, vp, size, B); |
|
mpn_mul_1 (wp, vp, size, D); |
|
mpn_submul_1 (wp, up, size, C); |
|
MP_PTR_SWAP (tp, up); |
|
MP_PTR_SWAP (wp, vp); |
|
#if EXTEND |
cy = mpn_mul_1 (tp, s0p, ssize, A); |
cy = mpn_mul_1 (tp, s0p, ssize, A); |
cy += mpn_addmul_1 (tp, s1p, ssize, -B); |
cy += mpn_addmul_1 (tp, s1p, ssize, B); |
|
tp[ssize] = cy; |
|
tsize = ssize + (cy != 0); |
|
cy = mpn_mul_1 (wp, s1p, ssize, D); |
|
cy += mpn_addmul_1 (wp, s0p, ssize, C); |
|
wp[ssize] = cy; |
|
wsize = ssize + (cy != 0); |
|
MP_PTR_SWAP (tp, s0p); |
|
MP_PTR_SWAP (wp, s1p); |
|
ssize = MAX (wsize, tsize); |
|
#endif |
} |
} |
if (cy != 0) |
|
tp[ssize++] = cy; |
|
} |
} |
MPN_ZERO (wp, orig_size); |
|
if (C < 0) |
size -= up[size - 1] == 0; |
{ |
|
cy = mpn_mul_1 (wp, s1p, ssize, D); |
|
cy += mpn_addmul_1 (wp, s0p, ssize, -C); |
|
} |
|
else |
|
{ |
|
cy = mpn_mul_1 (wp, s0p, ssize, C); |
|
cy += mpn_addmul_1 (wp, s1p, ssize, -D); |
|
} |
|
if (cy != 0) |
|
wp[ssize++] = cy; |
|
} |
|
{ |
|
mp_ptr xp; |
|
xp = tp; tp = s0p; s0p = xp; |
|
xp = wp; wp = s1p; s1p = xp; |
|
} |
|
#endif |
|
#if 0 /* Is it a win to remove multiple zeros here? */ |
|
MPN_NORMALIZE (up, size); |
|
#else |
|
if (up[size - 1] == 0) |
|
size--; |
|
#endif |
|
} |
} |
} |
} |
|
|
#if RECORD |
#if RECORD |
printf ("min: %ld\n", min); |
printf ("max: %lx\n", max); |
printf ("max: %ld\n", max); |
|
#endif |
#endif |
|
|
|
#if STAT |
|
{int i; for (i = 0; i < BITS_PER_MP_LIMB; i++) printf ("%d:%d\n", i, arr[i]);} |
|
#endif |
|
|
if (vsize == 0) |
if (vsize == 0) |
{ |
{ |
if (gp != up) |
if (gp != up && gp != 0) |
MPN_COPY (gp, up, size); |
MPN_COPY (gp, up, size); |
#if EXTEND |
#if EXTEND |
|
MPN_NORMALIZE (s0p, ssize); |
if (orig_s0p != s0p) |
if (orig_s0p != s0p) |
MPN_COPY (orig_s0p, s0p, ssize); |
MPN_COPY (orig_s0p, s0p, ssize); |
|
*s0size = sign >= 0 ? ssize : -ssize; |
#endif |
#endif |
TMP_FREE (mark); |
TMP_FREE (mark); |
return size; |
return size; |
Line 373 mpn_gcd (gp, up, size, vp, vsize) |
|
Line 615 mpn_gcd (gp, up, size, vp, vsize) |
|
{ |
{ |
mp_limb_t vl, ul, t; |
mp_limb_t vl, ul, t; |
#if EXTEND |
#if EXTEND |
mp_limb_t cy; |
mp_size_t qsize, i; |
mp_size_t i; |
|
#endif |
#endif |
vl = vp[0]; |
vl = vp[0]; |
#if EXTEND |
#if EXTEND |
t = mpn_divmod_1 (wp, up, size, vl); |
t = mpn_divmod_1 (wp, up, size, vl); |
|
|
MPN_COPY (tp, s0p, ssize); |
MPN_COPY (tp, s0p, ssize); |
for (i = 0; i < size; i++) |
|
|
qsize = size - (wp[size - 1] == 0); /* size of quotient from division */ |
|
if (ssize < qsize) |
{ |
{ |
cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]); |
MPN_ZERO (tp + ssize, qsize - ssize); |
if (cy != 0) |
MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */ |
tp[ssize++] = cy; |
for (i = 0; i < ssize; i++) |
|
{ |
|
mp_limb_t cy; |
|
cy = mpn_addmul_1 (tp + i, wp, qsize, s1p[i]); |
|
tp[qsize + i] = cy; |
|
} |
} |
} |
#if 0 |
else |
MPN_COPY (s0p, s1p, ssize); |
{ |
MPN_COPY (s1p, tp, ssize); |
MPN_ZERO (s1p + ssize, qsize); /* zero s1 too */ |
|
for (i = 0; i < qsize; i++) |
|
{ |
|
mp_limb_t cy; |
|
cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]); |
|
tp[ssize + i] = cy; |
|
} |
|
} |
|
ssize += qsize; |
|
ssize -= tp[ssize - 1] == 0; |
|
|
|
sign = -sign; |
|
MP_PTR_SWAP (s0p, s1p); |
|
MP_PTR_SWAP (s1p, tp); |
#else |
#else |
{ |
|
mp_ptr xp; |
|
xp = s0p; s0p = s1p; s1p = xp; |
|
xp = s1p; s1p = tp; tp = xp; |
|
} |
|
#endif |
|
#else |
|
t = mpn_mod_1 (up, size, vl); |
t = mpn_mod_1 (up, size, vl); |
#endif |
#endif |
ul = vl; |
ul = vl; |
Line 405 mpn_gcd (gp, up, size, vp, vsize) |
|
Line 660 mpn_gcd (gp, up, size, vp, vsize) |
|
{ |
{ |
mp_limb_t t; |
mp_limb_t t; |
#if EXTEND |
#if EXTEND |
mp_limb_t q, cy; |
mp_limb_t q; |
q = ul / vl; |
q = ul / vl; |
t = ul - q*vl; |
t = ul - q * vl; |
|
|
MPN_COPY (tp, s0p, ssize); |
MPN_COPY (tp, s0p, ssize); |
cy = mpn_addmul_1 (tp, s1p, ssize, q); |
|
if (cy != 0) |
MPN_ZERO (s1p + ssize, 1); /* zero s1 too */ |
tp[ssize++] = cy; |
|
#if 0 |
|
MPN_COPY (s0p, s1p, ssize); |
|
MPN_COPY (s1p, tp, ssize); |
|
#else |
|
{ |
{ |
mp_ptr xp; |
mp_limb_t cy; |
xp = s0p; s0p = s1p; s1p = xp; |
cy = mpn_addmul_1 (tp, s1p, ssize, q); |
xp = s1p; s1p = tp; tp = xp; |
tp[ssize] = cy; |
} |
} |
#endif |
|
|
|
|
ssize += 1; |
|
ssize -= tp[ssize - 1] == 0; |
|
|
|
sign = -sign; |
|
MP_PTR_SWAP (s0p, s1p); |
|
MP_PTR_SWAP (s1p, tp); |
#else |
#else |
t = ul % vl; |
t = ul % vl; |
#endif |
#endif |
ul = vl; |
ul = vl; |
vl = t; |
vl = t; |
} |
} |
gp[0] = ul; |
if (gp != 0) |
|
gp[0] = ul; |
#if EXTEND |
#if EXTEND |
|
MPN_NORMALIZE (s0p, ssize); |
if (orig_s0p != s0p) |
if (orig_s0p != s0p) |
MPN_COPY (orig_s0p, s0p, ssize); |
MPN_COPY (orig_s0p, s0p, ssize); |
|
*s0size = sign >= 0 ? ssize : -ssize; |
#endif |
#endif |
TMP_FREE (mark); |
TMP_FREE (mark); |
return 1; |
return 1; |