Annotation of OpenXM_contrib/gmp/mpfr/karasqrt.c, Revision 1.1
1.1 ! maekawa 1: /* kara_sqrtrem -- Karatsuba square root
! 2:
! 3: Copyright (C) 1999-2000 PolKA project, Inria Lorraine and Loria
! 4:
! 5: This file is part of the MPFR Library.
! 6:
! 7: The MPFR Library is free software; you can redistribute it and/or modify
! 8: it under the terms of the GNU Library General Public License as published by
! 9: the Free Software Foundation; either version 2 of the License, or (at your
! 10: option) any later version.
! 11:
! 12: The MPFR Library is distributed in the hope that it will be useful, but
! 13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 14: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 15: License for more details.
! 16:
! 17: You should have received a copy of the GNU Library General Public License
! 18: along with the MPFR Library; see the file COPYING.LIB. If not, write to
! 19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 20: MA 02111-1307, USA. */
! 21:
! 22: /* Reference: Karatsuba Square Root, Paul Zimmermann, Research Report 3805,
! 23: INRIA, November 1999. */
! 24:
! 25: #include "gmp.h"
! 26: #include "gmp-impl.h"
! 27: #include "mpfr.h"
! 28:
! 29: #define SQRT_LIMIT KARATSUBA_MUL_THRESHOLD /* must be at least 3, should be
! 30: near from optimal */
! 31:
! 32: /* n must be even */
! 33: mp_size_t kara_sqrtrem(mp_limb_t *s, mp_limb_t *r, mp_limb_t *op, mp_size_t n)
! 34: {
! 35: if (n<SQRT_LIMIT) return mpn_sqrtrem(s, r, op, n);
! 36: else {
! 37: mp_size_t nn, rn, rrn, sn, qn; mp_limb_t *q, tmp;
! 38: TMP_DECL (marker);
! 39:
! 40: TMP_MARK (marker);
! 41: nn = n/4; /* block size 'b' corresponds to nn limbs */
! 42: rn = kara_sqrtrem(s+nn, r+nn, op+2*nn, n-2*nn);
! 43: /* rn <= ceil(n-2*nn, 2) + 1 <= ceil(2*nn+3, 2) + 1 <= nn+3 */
! 44: /* to divide by 2*s', first divide by 2, to ensure the dividend is
! 45: less than b^2 */
! 46: sn=(n-2*nn+1)/2; /* sn >= nn */
! 47: MPN_COPY(r, op+nn, nn); /* copy a_1 */
! 48: tmp = mpn_rshift(r, r, nn+rn, 1);
! 49: if (r[nn+rn-1]==0) rn--;
! 50: q = (mp_limb_t*) TMP_ALLOC(2*(sn+1)*sizeof(mp_limb_t));
! 51: if (nn+rn < 2*sn) MPN_ZERO(r+nn+rn, 2*sn-nn-rn);
! 52: qn = sn; if (mpn_cmp(r+sn, s+nn, sn)>=0) {
! 53: q[qn++]=1; mpn_sub_n(r+sn, r+sn, s+nn, sn);
! 54: }
! 55: #if 0
! 56: mpn_divrem(q, 0, r, 2*sn, s+nn, sn);
! 57: #else
! 58: mpn_divrem_n(q, r, s+nn, sn);
! 59: #endif
! 60: while (qn>nn && q[qn-1]==0) qn--;
! 61: MPN_COPY(s, q, nn);
! 62: if (nn+rn > 2*sn) {
! 63: tmp=mpn_add_n(s+sn, s+sn, q+sn, nn+rn-2*sn);
! 64: if (tmp) mpn_add_1(s+nn+rn-sn, s+nn+rn-sn, (n+1)/2-nn-rn+sn, tmp);
! 65: }
! 66: /* multiply remainder by two and add low bit of a_1 */
! 67: rrn = nn+sn; /* size of output remainder */
! 68: rrn += mpn_lshift(r+nn, r, sn, 1);
! 69: r[nn] |= (op[nn] & 1);
! 70: sn += nn;
! 71: if (qn>nn) {
! 72: MPN_COPY(r, s+nn, qn-nn); /* save the qn-nn limbs from s */
! 73: MPN_COPY(s+nn, q+nn, qn-nn); /* replace by those of q */
! 74: }
! 75: mpn_mul_n(q, s, s, qn);
! 76: if (qn>nn) { /* restore the limbs from s, adding them to those of q */
! 77: mp_limb_t cy;
! 78:
! 79: cy = mpn_add_n(s+nn, s+nn, r, qn-nn);
! 80: if (qn<sn) cy = mpn_add_1(s+qn, s+qn, sn-qn, cy);
! 81: if (cy) s[sn++]=1;
! 82: }
! 83: MPN_COPY(r, op, nn); /* copy a_0 */
! 84: qn = 2*qn;
! 85: if (qn<sn) MPN_ZERO(q+qn, sn-qn);
! 86: if (rrn<sn) MPN_ZERO(r+rrn, sn-rrn);
! 87: if (mpn_sub_n(r, r, q, sn) || (qn>sn)) {
! 88: if (rrn>sn) rrn=sn;
! 89: else {
! 90: /* one shift and one add is faster than two add's */
! 91: r[sn] = mpn_lshift(q, s, sn, 1) + mpn_add_n(r, r, q, sn)
! 92: - mpn_sub_1(r, r, sn, 1) - 1;
! 93: rrn = sn + r[sn];
! 94: mpn_sub_1(s, s, sn, 1);
! 95: }
! 96: }
! 97: else if (rrn>sn) r[sn]=1;
! 98: TMP_FREE (marker);
! 99: MPN_NORMALIZE(r, rrn);
! 100: return rrn;
! 101: }
! 102: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>