Annotation of OpenXM_contrib/gmp/mpn/generic/bz_divrem_n.c, Revision 1.1
1.1 ! maekawa 1: /* mpn_bz_divrem_n and auxilliary routines.
! 2:
! 3: THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
! 4: INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
! 5: IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
! 6: FUTURE GNU MP RELEASE.
! 7:
! 8:
! 9: Copyright (C) 2000 Free Software Foundation, Inc.
! 10: Contributed by Paul Zimmermann.
! 11:
! 12: This file is part of the GNU MP Library.
! 13:
! 14: The GNU MP Library is free software; you can redistribute it and/or modify
! 15: it under the terms of the GNU Lesser General Public License as published by
! 16: the Free Software Foundation; either version 2.1 of the License, or (at your
! 17: option) any later version.
! 18:
! 19: The GNU MP Library is distributed in the hope that it will be useful, but
! 20: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 21: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 22: License for more details.
! 23:
! 24: You should have received a copy of the GNU Lesser General Public License
! 25: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 26: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 27: MA 02111-1307, USA. */
! 28:
! 29: #include "gmp.h"
! 30: #include "gmp-impl.h"
! 31:
! 32: /*
! 33: [1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
! 34: Technical report MPI-I-98-1-022, october 1998.
! 35: http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
! 36: */
! 37:
! 38: static mp_limb_t mpn_bz_div_3_halves_by_2 _PROTO ((mp_ptr, mp_ptr, mp_srcptr,
! 39: mp_size_t, mp_ptr));
! 40:
! 41: static mp_limb_t mpn_bz_divrem_aux _PROTO ((mp_ptr, mp_ptr, mp_srcptr,
! 42: mp_size_t, mp_ptr));
! 43:
! 44: /* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
! 45: div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
! 46: i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
! 47: #ifndef BZ_THRESHOLD
! 48: #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
! 49: #endif
! 50:
! 51: #if 0
! 52: static
! 53: unused_mpn_divrem (qp, qxn, np, nn, dp, dn)
! 54: mp_ptr qp;
! 55: mp_size_t qxn;
! 56: mp_ptr np;
! 57: mp_size_t nn;
! 58: mp_srcptr dp;
! 59: mp_size_t dn;
! 60: {
! 61: /* This might be useful: */
! 62: if (qxn != 0)
! 63: {
! 64: mp_limb_t c;
! 65: mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB);
! 66: MPN_COPY (tp + qxn - nn, np, nn);
! 67: MPN_ZERO (tp, qxn);
! 68: c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn);
! 69: /* Maybe copy proper part of tp to np? Documentation is unclear about
! 70: the returned np value when qxn != 0 */
! 71: return c;
! 72: }
! 73: }
! 74: #endif
! 75:
! 76: /* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
! 77: by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
! 78: Returns most significant limb of the quotient, which is 0 or 1.
! 79: Requires that the most significant bit of the divisor is set. */
! 80:
! 81: mp_limb_t
! 82: #if __STDC__
! 83: mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
! 84: #else
! 85: mpn_bz_divrem_n (qp, np, dp, n)
! 86: mp_ptr qp;
! 87: mp_ptr np;
! 88: mp_srcptr dp;
! 89: mp_size_t n;
! 90: #endif
! 91: {
! 92: mp_limb_t qhl = 0;
! 93: if (mpn_cmp (np + n, dp, n) >= 0)
! 94: {
! 95: qhl = 1;
! 96: mpn_sub_n (np + n, np + n, dp, n);
! 97: abort ();
! 98: }
! 99: if (n % 2 != 0)
! 100: {
! 101: /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
! 102: if (n < BZ_THRESHOLD)
! 103: qhl += mpn_sb_divrem_mn (qp + 1, np + 2, 2 * (n - 1), dp + 1, n - 1);
! 104: else
! 105: qhl += mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
! 106: /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
! 107: (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
! 108: if (mpn_sub_1 (np + n, np + n, 1,
! 109: mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
! 110: {
! 111: /* quotient too large */
! 112: qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
! 113: if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
! 114: { /* still too large */
! 115: qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
! 116: mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
! 117: }
! 118: }
! 119: /* now divide (np, n + 1) by (dp, n) */
! 120: qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
! 121: mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
! 122: }
! 123: else
! 124: {
! 125: mp_ptr tmp;
! 126: mp_size_t n2 = n/2;
! 127: TMP_DECL (marker);
! 128: TMP_MARK (marker);
! 129: tmp = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
! 130: qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
! 131: qhl += mpn_add_1 (qp + n2, qp + n2, n2,
! 132: mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
! 133: TMP_FREE (marker);
! 134: }
! 135: return qhl;
! 136: }
! 137:
! 138: /* Like mpn_bz_divrem_n, but without memory allocation. Also
! 139: assumes mpn_cmp (np + n, dp, n) < 0 */
! 140:
! 141: static mp_limb_t
! 142: #if __STDC__
! 143: mpn_bz_divrem_aux (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr tmp)
! 144: #else
! 145: mpn_bz_divrem_aux (qp, np, dp, n, tmp)
! 146: mp_ptr qp;
! 147: mp_ptr np;
! 148: mp_srcptr dp;
! 149: mp_size_t n;
! 150: mp_ptr tmp;
! 151: #endif
! 152: {
! 153: mp_limb_t qhl;
! 154:
! 155: if (n % 2 != 0)
! 156: {
! 157: /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
! 158: qhl = mpn_bz_divrem_aux (qp + 1, np + 2, dp + 1, n - 1, tmp);
! 159: /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
! 160: (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
! 161: if (mpn_sub_1 (np + n, np + n, 1,
! 162: mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
! 163: {
! 164: /* quotient too large */
! 165: qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
! 166: if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
! 167: { /* still too large */
! 168: qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
! 169: mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
! 170: }
! 171: }
! 172: /* now divide (np, n + 1) by (dp, n) */
! 173: qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
! 174: mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
! 175: }
! 176: else
! 177: {
! 178: mp_size_t n2 = n/2;
! 179: qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
! 180: qhl += mpn_add_1 (qp + n2, qp + n2, n2,
! 181: mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
! 182: }
! 183: return qhl;
! 184: }
! 185:
! 186: /* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
! 187: the remainder in (np, 2n) */
! 188:
! 189: static mp_limb_t
! 190: #if __STDC__
! 191: mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
! 192: mp_ptr tmp)
! 193: #else
! 194: mpn_bz_div_3_halves_by_2 (qp, np, dp, n, tmp)
! 195: mp_ptr qp;
! 196: mp_ptr np;
! 197: mp_srcptr dp;
! 198: mp_size_t n;
! 199: mp_ptr tmp;
! 200: #endif
! 201: {
! 202: mp_size_t twon = n + n;
! 203: mp_limb_t qhl;
! 204:
! 205: if (n < BZ_THRESHOLD)
! 206: qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
! 207: else
! 208: qhl = mpn_bz_divrem_aux (qp, np + n, dp + n, n, tmp);
! 209: /* q = (qp, n), c = (np + n, n) with the notations of [1] */
! 210: mpn_mul_n (tmp, qp, dp, n);
! 211: if (qhl != 0)
! 212: mpn_add_n (tmp + n, tmp + n, dp, n);
! 213: if (mpn_sub_n (np, np, tmp, twon)) /* R = (np, 2n) */
! 214: {
! 215: qhl -= mpn_sub_1 (qp, qp, n, 1L);
! 216: if (mpn_add_n (np, np, dp, twon) == 0)
! 217: { /* qp still too large */
! 218: qhl -= mpn_sub_1 (qp, qp, n, 1L);
! 219: mpn_add_n (np, np, dp, twon); /* always carry here */
! 220: }
! 221: }
! 222: return qhl;
! 223: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>