version 1.1.1.1, 2000/09/09 14:12:24 |
version 1.1.1.2, 2000/12/01 05:44:51 |
Line 35 MA 02111-1307, USA. */ |
|
Line 35 MA 02111-1307, USA. */ |
|
http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz |
http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz |
*/ |
*/ |
|
|
static mp_limb_t mpn_bz_div_3_halves_by_2 _PROTO ((mp_ptr, mp_ptr, mp_srcptr, |
static mp_limb_t mpn_bz_div_3_halves_by_2 |
mp_size_t, mp_ptr)); |
_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)); |
|
|
static mp_limb_t mpn_bz_divrem_aux _PROTO ((mp_ptr, mp_ptr, mp_srcptr, |
|
mp_size_t, mp_ptr)); |
|
|
|
/* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than |
/* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than |
div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way, |
div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way, |
Line 73 unused_mpn_divrem (qp, qxn, np, nn, dp, dn) |
|
Line 71 unused_mpn_divrem (qp, qxn, np, nn, dp, dn) |
|
} |
} |
#endif |
#endif |
|
|
|
|
/* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n) |
/* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n) |
by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n). |
by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n). |
Returns most significant limb of the quotient, which is 0 or 1. |
Returns most significant limb of the quotient, which is 0 or 1. |
Line 89 mpn_bz_divrem_n (qp, np, dp, n) |
|
Line 88 mpn_bz_divrem_n (qp, np, dp, n) |
|
mp_size_t n; |
mp_size_t n; |
#endif |
#endif |
{ |
{ |
mp_limb_t qhl = 0; |
mp_limb_t qhl, cc; |
if (mpn_cmp (np + n, dp, n) >= 0) |
|
{ |
|
qhl = 1; |
|
mpn_sub_n (np + n, np + n, dp, n); |
|
abort (); |
|
} |
|
if (n % 2 != 0) |
|
{ |
|
/* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */ |
|
if (n < BZ_THRESHOLD) |
|
qhl += mpn_sb_divrem_mn (qp + 1, np + 2, 2 * (n - 1), dp + 1, n - 1); |
|
else |
|
qhl += mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1); |
|
/* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by |
|
(dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */ |
|
if (mpn_sub_1 (np + n, np + n, 1, |
|
mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]))) |
|
{ |
|
/* quotient too large */ |
|
qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L); |
|
if (mpn_add_n (np + 1, np + 1, dp, n) == 0) |
|
{ /* still too large */ |
|
qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L); |
|
mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */ |
|
} |
|
} |
|
/* now divide (np, n + 1) by (dp, n) */ |
|
qhl += mpn_add_1 (qp + 1, qp + 1, n - 1, |
|
mpn_sb_divrem_mn (qp, np, n + 1, dp, n)); |
|
} |
|
else |
|
{ |
|
mp_ptr tmp; |
|
mp_size_t n2 = n/2; |
|
TMP_DECL (marker); |
|
TMP_MARK (marker); |
|
tmp = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); |
|
qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp); |
|
qhl += mpn_add_1 (qp + n2, qp + n2, n2, |
|
mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp)); |
|
TMP_FREE (marker); |
|
} |
|
return qhl; |
|
} |
|
|
|
/* Like mpn_bz_divrem_n, but without memory allocation. Also |
|
assumes mpn_cmp (np + n, dp, n) < 0 */ |
|
|
|
static mp_limb_t |
|
#if __STDC__ |
|
mpn_bz_divrem_aux (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr tmp) |
|
#else |
|
mpn_bz_divrem_aux (qp, np, dp, n, tmp) |
|
mp_ptr qp; |
|
mp_ptr np; |
|
mp_srcptr dp; |
|
mp_size_t n; |
|
mp_ptr tmp; |
|
#endif |
|
{ |
|
mp_limb_t qhl; |
|
|
|
if (n % 2 != 0) |
if (n % 2 != 0) |
{ |
{ |
/* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */ |
qhl = mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1); |
qhl = mpn_bz_divrem_aux (qp + 1, np + 2, dp + 1, n - 1, tmp); |
cc = mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]); |
/* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by |
cc = mpn_sub_1 (np + n, np + n, 1, cc); |
(dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */ |
if (qhl) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]); |
if (mpn_sub_1 (np + n, np + n, 1, |
while (cc) |
mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]))) |
{ |
{ |
qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, (mp_limb_t) 1); |
/* quotient too large */ |
cc -= mpn_add_n (np + 1, np + 1, dp, n); |
qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L); |
} |
if (mpn_add_n (np + 1, np + 1, dp, n) == 0) |
|
{ /* still too large */ |
|
qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L); |
|
mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */ |
|
} |
|
} |
|
/* now divide (np, n + 1) by (dp, n) */ |
|
qhl += mpn_add_1 (qp + 1, qp + 1, n - 1, |
qhl += mpn_add_1 (qp + 1, qp + 1, n - 1, |
mpn_sb_divrem_mn (qp, np, n + 1, dp, n)); |
mpn_sb_divrem_mn (qp, np, n + 1, dp, n)); |
} |
} |
else |
else |
{ |
{ |
mp_size_t n2 = n/2; |
mp_size_t n2 = n/2; |
qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp); |
qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2); |
qhl += mpn_add_1 (qp + n2, qp + n2, n2, |
qhl += mpn_add_1 (qp + n2, qp + n2, n2, |
mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp)); |
mpn_bz_div_3_halves_by_2 (qp, np, dp, n2)); |
} |
} |
return qhl; |
return qhl; |
} |
} |
|
|
|
|
/* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n), |
/* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n), |
the remainder in (np, 2n) */ |
the remainder in (np, 2n) */ |
|
|
static mp_limb_t |
static mp_limb_t |
#if __STDC__ |
#if __STDC__ |
mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, |
mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n) |
mp_ptr tmp) |
|
#else |
#else |
mpn_bz_div_3_halves_by_2 (qp, np, dp, n, tmp) |
mpn_bz_div_3_halves_by_2 (qp, np, dp, n) |
mp_ptr qp; |
mp_ptr qp; |
mp_ptr np; |
mp_ptr np; |
mp_srcptr dp; |
mp_srcptr dp; |
mp_size_t n; |
mp_size_t n; |
mp_ptr tmp; |
|
#endif |
#endif |
{ |
{ |
mp_size_t twon = n + n; |
mp_size_t twon = n + n; |
mp_limb_t qhl; |
mp_limb_t qhl, cc; |
|
mp_ptr tmp; |
|
TMP_DECL (marker); |
|
|
|
TMP_MARK (marker); |
if (n < BZ_THRESHOLD) |
if (n < BZ_THRESHOLD) |
qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n); |
qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n); |
else |
else |
qhl = mpn_bz_divrem_aux (qp, np + n, dp + n, n, tmp); |
qhl = mpn_bz_divrem_n (qp, np + n, dp + n, n); |
/* q = (qp, n), c = (np + n, n) with the notations of [1] */ |
tmp = (mp_ptr) TMP_ALLOC (twon * BYTES_PER_MP_LIMB); |
mpn_mul_n (tmp, qp, dp, n); |
mpn_mul_n (tmp, qp, dp, n); |
if (qhl != 0) |
cc = mpn_sub_n (np, np, tmp, twon); |
mpn_add_n (tmp + n, tmp + n, dp, n); |
TMP_FREE (marker); |
if (mpn_sub_n (np, np, tmp, twon)) /* R = (np, 2n) */ |
if (qhl) cc += mpn_sub_n (np + n, np + n, dp, n); |
|
while (cc) |
{ |
{ |
qhl -= mpn_sub_1 (qp, qp, n, 1L); |
qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1); |
if (mpn_add_n (np, np, dp, twon) == 0) |
cc -= mpn_add_n (np, np, dp, twon); |
{ /* qp still too large */ |
|
qhl -= mpn_sub_1 (qp, qp, n, 1L); |
|
mpn_add_n (np, np, dp, twon); /* always carry here */ |
|
} |
|
} |
} |
return qhl; |
return qhl; |
} |
} |