[BACK]Return to karasqrt.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpfr

Annotation of OpenXM_contrib/gmp/mpfr/karasqrt.c, Revision 1.1.1.1

1.1       maekawa     1: /*  kara_sqrtrem -- Karatsuba square root
                      2:
                      3: Copyright (C) 1999-2000 PolKA project, Inria Lorraine and Loria
                      4:
                      5: This file is part of the MPFR Library.
                      6:
                      7: The MPFR Library is free software; you can redistribute it and/or modify
                      8: it under the terms of the GNU Library General Public License as published by
                      9: the Free Software Foundation; either version 2 of the License, or (at your
                     10: option) any later version.
                     11:
                     12: The MPFR Library is distributed in the hope that it will be useful, but
                     13: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
                     14: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
                     15: License for more details.
                     16:
                     17: You should have received a copy of the GNU Library General Public License
                     18: along with the MPFR Library; see the file COPYING.LIB.  If not, write to
                     19: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
                     20: MA 02111-1307, USA. */
                     21:
                     22: /* Reference: Karatsuba Square Root, Paul Zimmermann, Research Report 3805,
                     23:    INRIA, November 1999. */
                     24:
                     25: #include "gmp.h"
                     26: #include "gmp-impl.h"
                     27: #include "mpfr.h"
                     28:
                     29: #define SQRT_LIMIT KARATSUBA_MUL_THRESHOLD /* must be at least 3, should be
                     30:                                              near from optimal */
                     31:
                     32: /* n must be even */
                     33: mp_size_t kara_sqrtrem(mp_limb_t *s, mp_limb_t *r, mp_limb_t *op, mp_size_t n)
                     34: {
                     35:   if (n<SQRT_LIMIT) return mpn_sqrtrem(s, r, op, n);
                     36:   else {
                     37:     mp_size_t nn, rn, rrn, sn, qn; mp_limb_t *q, tmp;
                     38:     TMP_DECL (marker);
                     39:
                     40:     TMP_MARK (marker);
                     41:     nn = n/4; /* block size 'b' corresponds to nn limbs */
                     42:     rn = kara_sqrtrem(s+nn, r+nn, op+2*nn, n-2*nn);
                     43:     /* rn <= ceil(n-2*nn, 2) + 1 <= ceil(2*nn+3, 2) + 1 <= nn+3 */
                     44:     /* to divide by 2*s', first divide by 2, to ensure the dividend is
                     45:        less than b^2 */
                     46:     sn=(n-2*nn+1)/2; /* sn >= nn */
                     47:     MPN_COPY(r, op+nn, nn); /* copy a_1 */
                     48:     tmp = mpn_rshift(r, r, nn+rn, 1);
                     49:     if (r[nn+rn-1]==0) rn--;
                     50:     q = (mp_limb_t*) TMP_ALLOC(2*(sn+1)*sizeof(mp_limb_t));
                     51:     if (nn+rn < 2*sn) MPN_ZERO(r+nn+rn, 2*sn-nn-rn);
                     52:     qn = sn; if (mpn_cmp(r+sn, s+nn, sn)>=0) {
                     53:       q[qn++]=1; mpn_sub_n(r+sn, r+sn, s+nn, sn);
                     54:     }
                     55: #if 0
                     56:     mpn_divrem(q, 0, r, 2*sn, s+nn, sn);
                     57: #else
                     58:     mpn_divrem_n(q, r, s+nn, sn);
                     59: #endif
                     60:     while (qn>nn && q[qn-1]==0) qn--;
                     61:     MPN_COPY(s, q, nn);
                     62:     if (nn+rn > 2*sn) {
                     63:       tmp=mpn_add_n(s+sn, s+sn, q+sn, nn+rn-2*sn);
                     64:       if (tmp) mpn_add_1(s+nn+rn-sn, s+nn+rn-sn, (n+1)/2-nn-rn+sn, tmp);
                     65:     }
                     66:     /* multiply remainder by two and add low bit of a_1 */
                     67:     rrn = nn+sn; /* size of output remainder */
                     68:     rrn += mpn_lshift(r+nn, r, sn, 1);
                     69:     r[nn] |= (op[nn] & 1);
                     70:     sn += nn;
                     71:     if (qn>nn) {
                     72:       MPN_COPY(r, s+nn, qn-nn); /* save the qn-nn limbs from s */
                     73:       MPN_COPY(s+nn, q+nn, qn-nn); /* replace by those of q */
                     74:     }
                     75:     mpn_mul_n(q, s, s, qn);
                     76:     if (qn>nn) { /* restore the limbs from s, adding them to those of q */
                     77:       mp_limb_t cy;
                     78:
                     79:       cy = mpn_add_n(s+nn, s+nn, r, qn-nn);
                     80:       if (qn<sn) cy = mpn_add_1(s+qn, s+qn, sn-qn, cy);
                     81:       if (cy) s[sn++]=1;
                     82:     }
                     83:     MPN_COPY(r, op, nn); /* copy a_0 */
                     84:     qn = 2*qn;
                     85:     if (qn<sn) MPN_ZERO(q+qn, sn-qn);
                     86:     if (rrn<sn) MPN_ZERO(r+rrn, sn-rrn);
                     87:     if (mpn_sub_n(r, r, q, sn) || (qn>sn)) {
                     88:       if (rrn>sn) rrn=sn;
                     89:       else {
                     90:        /* one shift and one add is faster than two add's */
                     91:        r[sn] = mpn_lshift(q, s, sn, 1) + mpn_add_n(r, r, q, sn)
                     92:          - mpn_sub_1(r, r, sn, 1) - 1;
                     93:        rrn = sn + r[sn];
                     94:        mpn_sub_1(s, s, sn, 1);
                     95:       }
                     96:     }
                     97:     else if (rrn>sn) r[sn]=1;
                     98:     TMP_FREE (marker);
                     99:     MPN_NORMALIZE(r, rrn);
                    100:     return rrn;
                    101:   }
                    102: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>