[BACK]Return to karasqrt.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpfr

Annotation of OpenXM_contrib/gmp/mpfr/karasqrt.c, Revision 1.1

1.1     ! maekawa     1: /*  kara_sqrtrem -- Karatsuba square root
        !             2:
        !             3: Copyright (C) 1999-2000 PolKA project, Inria Lorraine and Loria
        !             4:
        !             5: This file is part of the MPFR Library.
        !             6:
        !             7: The MPFR Library is free software; you can redistribute it and/or modify
        !             8: it under the terms of the GNU Library General Public License as published by
        !             9: the Free Software Foundation; either version 2 of the License, or (at your
        !            10: option) any later version.
        !            11:
        !            12: The MPFR Library is distributed in the hope that it will be useful, but
        !            13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
        !            14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
        !            15: License for more details.
        !            16:
        !            17: You should have received a copy of the GNU Library General Public License
        !            18: along with the MPFR Library; see the file COPYING.LIB.  If not, write to
        !            19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
        !            20: MA 02111-1307, USA. */
        !            21:
        !            22: /* Reference: Karatsuba Square Root, Paul Zimmermann, Research Report 3805,
        !            23:    INRIA, November 1999. */
        !            24:
        !            25: #include "gmp.h"
        !            26: #include "gmp-impl.h"
        !            27: #include "mpfr.h"
        !            28:
        !            29: #define SQRT_LIMIT KARATSUBA_MUL_THRESHOLD /* must be at least 3, should be
        !            30:                                              near from optimal */
        !            31:
        !            32: /* n must be even */
        !            33: mp_size_t kara_sqrtrem(mp_limb_t *s, mp_limb_t *r, mp_limb_t *op, mp_size_t n)
        !            34: {
        !            35:   if (n<SQRT_LIMIT) return mpn_sqrtrem(s, r, op, n);
        !            36:   else {
        !            37:     mp_size_t nn, rn, rrn, sn, qn; mp_limb_t *q, tmp;
        !            38:     TMP_DECL (marker);
        !            39:
        !            40:     TMP_MARK (marker);
        !            41:     nn = n/4; /* block size 'b' corresponds to nn limbs */
        !            42:     rn = kara_sqrtrem(s+nn, r+nn, op+2*nn, n-2*nn);
        !            43:     /* rn <= ceil(n-2*nn, 2) + 1 <= ceil(2*nn+3, 2) + 1 <= nn+3 */
        !            44:     /* to divide by 2*s', first divide by 2, to ensure the dividend is
        !            45:        less than b^2 */
        !            46:     sn=(n-2*nn+1)/2; /* sn >= nn */
        !            47:     MPN_COPY(r, op+nn, nn); /* copy a_1 */
        !            48:     tmp = mpn_rshift(r, r, nn+rn, 1);
        !            49:     if (r[nn+rn-1]==0) rn--;
        !            50:     q = (mp_limb_t*) TMP_ALLOC(2*(sn+1)*sizeof(mp_limb_t));
        !            51:     if (nn+rn < 2*sn) MPN_ZERO(r+nn+rn, 2*sn-nn-rn);
        !            52:     qn = sn; if (mpn_cmp(r+sn, s+nn, sn)>=0) {
        !            53:       q[qn++]=1; mpn_sub_n(r+sn, r+sn, s+nn, sn);
        !            54:     }
        !            55: #if 0
        !            56:     mpn_divrem(q, 0, r, 2*sn, s+nn, sn);
        !            57: #else
        !            58:     mpn_divrem_n(q, r, s+nn, sn);
        !            59: #endif
        !            60:     while (qn>nn && q[qn-1]==0) qn--;
        !            61:     MPN_COPY(s, q, nn);
        !            62:     if (nn+rn > 2*sn) {
        !            63:       tmp=mpn_add_n(s+sn, s+sn, q+sn, nn+rn-2*sn);
        !            64:       if (tmp) mpn_add_1(s+nn+rn-sn, s+nn+rn-sn, (n+1)/2-nn-rn+sn, tmp);
        !            65:     }
        !            66:     /* multiply remainder by two and add low bit of a_1 */
        !            67:     rrn = nn+sn; /* size of output remainder */
        !            68:     rrn += mpn_lshift(r+nn, r, sn, 1);
        !            69:     r[nn] |= (op[nn] & 1);
        !            70:     sn += nn;
        !            71:     if (qn>nn) {
        !            72:       MPN_COPY(r, s+nn, qn-nn); /* save the qn-nn limbs from s */
        !            73:       MPN_COPY(s+nn, q+nn, qn-nn); /* replace by those of q */
        !            74:     }
        !            75:     mpn_mul_n(q, s, s, qn);
        !            76:     if (qn>nn) { /* restore the limbs from s, adding them to those of q */
        !            77:       mp_limb_t cy;
        !            78:
        !            79:       cy = mpn_add_n(s+nn, s+nn, r, qn-nn);
        !            80:       if (qn<sn) cy = mpn_add_1(s+qn, s+qn, sn-qn, cy);
        !            81:       if (cy) s[sn++]=1;
        !            82:     }
        !            83:     MPN_COPY(r, op, nn); /* copy a_0 */
        !            84:     qn = 2*qn;
        !            85:     if (qn<sn) MPN_ZERO(q+qn, sn-qn);
        !            86:     if (rrn<sn) MPN_ZERO(r+rrn, sn-rrn);
        !            87:     if (mpn_sub_n(r, r, q, sn) || (qn>sn)) {
        !            88:       if (rrn>sn) rrn=sn;
        !            89:       else {
        !            90:        /* one shift and one add is faster than two add's */
        !            91:        r[sn] = mpn_lshift(q, s, sn, 1) + mpn_add_n(r, r, q, sn)
        !            92:          - mpn_sub_1(r, r, sn, 1) - 1;
        !            93:        rrn = sn + r[sn];
        !            94:        mpn_sub_1(s, s, sn, 1);
        !            95:       }
        !            96:     }
        !            97:     else if (rrn>sn) r[sn]=1;
        !            98:     TMP_FREE (marker);
        !            99:     MPN_NORMALIZE(r, rrn);
        !           100:     return rrn;
        !           101:   }
        !           102: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>