Annotation of OpenXM_contrib/gmp/mpn/generic/sqrtrem.c, Revision 1.1.1.3
1.1.1.3 ! ohara 1: /* mpn_sqrtrem -- square root and remainder */
1.1 maekawa 2:
1.1.1.3 ! ohara 3: /*
! 4: Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1 maekawa 5:
6: This file is part of the GNU MP Library.
7:
8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 maekawa 9: it under the terms of the GNU Lesser General Public License as published by
10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 11: option) any later version.
12:
13: The GNU MP Library is distributed in the hope that it will be useful, but
14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 maekawa 15: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 16: License for more details.
17:
1.1.1.2 maekawa 18: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 19: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
1.1.1.3 ! ohara 21: MA 02111-1307, USA.
! 22: */
! 23:
! 24:
! 25: /* Contributed by Paul Zimmermann.
! 26: See "Karatsuba Square Root", reference in gmp.texi. */
! 27:
1.1 maekawa 28:
1.1.1.3 ! ohara 29: #include <stdio.h>
! 30: #include <stdlib.h>
1.1 maekawa 31:
32: #include "gmp.h"
33: #include "gmp-impl.h"
34: #include "longlong.h"
35:
36:
37:
1.1.1.3 ! ohara 38: /* Square roots table. Generated by the following program:
! 39: #include "gmp.h"
! 40: main(){mpz_t x;int i;mpz_init(x);for(i=64;i<256;i++){mpz_set_ui(x,256*i);
! 41: mpz_sqrt(x,x);mpz_out_str(0,10,x);printf(",");if(i%16==15)printf("\n");}}
! 42: */
! 43: static const unsigned char approx_tab[192] =
! 44: {
! 45: 128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
! 46: 143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
! 47: 156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
! 48: 169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
! 49: 181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
! 50: 192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
! 51: 202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
! 52: 212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
! 53: 221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
! 54: 230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
! 55: 239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
! 56: 247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
! 57: };
! 58:
! 59: #define HALF_NAIL (GMP_NAIL_BITS / 2)
! 60:
! 61: /* same as mpn_sqrtrem, but for size=1 and {np, 1} normalized */
! 62: static mp_size_t
! 63: mpn_sqrtrem1 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
1.1 maekawa 64: {
1.1.1.3 ! ohara 65: mp_limb_t np0, s, r, q, u;
! 66: int prec;
1.1 maekawa 67:
1.1.1.3 ! ohara 68: ASSERT (np[0] >= GMP_NUMB_HIGHBIT / 2);
! 69: ASSERT (GMP_LIMB_BITS >= 16);
1.1 maekawa 70:
1.1.1.3 ! ohara 71: /* first compute a 8-bit approximation from the high 8-bits of np[0] */
! 72: np0 = np[0] << GMP_NAIL_BITS;
! 73: q = np0 >> (GMP_LIMB_BITS - 8);
! 74: /* 2^6 = 64 <= q < 256 = 2^8 */
! 75: s = approx_tab[q - 64]; /* 128 <= s < 255 */
! 76: r = (np0 >> (GMP_LIMB_BITS - 16)) - s * s; /* r <= 2*s */
! 77: if (r > 2 * s)
1.1 maekawa 78: {
1.1.1.3 ! ohara 79: r -= 2 * s + 1;
! 80: s++;
1.1 maekawa 81: }
82:
1.1.1.3 ! ohara 83: prec = 8;
! 84: np0 <<= 2 * prec;
! 85: while (2 * prec < GMP_LIMB_BITS)
! 86: {
! 87: /* invariant: s has prec bits, and r <= 2*s */
! 88: r = (r << prec) + (np0 >> (GMP_LIMB_BITS - prec));
! 89: np0 <<= prec;
! 90: u = 2 * s;
! 91: q = r / u;
! 92: u = r - q * u;
! 93: s = (s << prec) + q;
! 94: u = (u << prec) + (np0 >> (GMP_LIMB_BITS - prec));
! 95: q = q * q;
! 96: r = u - q;
! 97: if (u < q)
! 98: {
! 99: r += 2 * s - 1;
! 100: s --;
! 101: }
! 102: np0 <<= prec;
! 103: prec = 2 * prec;
! 104: }
1.1 maekawa 105:
1.1.1.3 ! ohara 106: ASSERT (2 * prec == GMP_LIMB_BITS); /* GMP_LIMB_BITS must be a power of 2 */
1.1 maekawa 107:
1.1.1.3 ! ohara 108: /* normalize back, assuming GMP_NAIL_BITS is even */
! 109: ASSERT (GMP_NAIL_BITS % 2 == 0);
! 110: sp[0] = s >> HALF_NAIL;
! 111: u = s - (sp[0] << HALF_NAIL); /* s mod 2^HALF_NAIL */
! 112: r += u * ((sp[0] << (HALF_NAIL + 1)) + u);
! 113: r = r >> GMP_NAIL_BITS;
! 114:
! 115: if (rp != NULL)
! 116: rp[0] = r;
! 117: return r != 0 ? 1 : 0;
! 118: }
! 119:
! 120:
! 121: #define Prec (GMP_NUMB_BITS >> 1)
! 122:
! 123: /* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
! 124: return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
! 125: static mp_limb_t
! 126: mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
! 127: {
! 128: mp_limb_t qhl, q, u, np0;
! 129: int cc;
1.1 maekawa 130:
1.1.1.3 ! ohara 131: ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
1.1 maekawa 132:
1.1.1.3 ! ohara 133: np0 = np[0];
! 134: mpn_sqrtrem1 (sp, rp, np + 1);
! 135: qhl = 0;
! 136: while (rp[0] >= sp[0])
1.1 maekawa 137: {
1.1.1.3 ! ohara 138: qhl++;
! 139: rp[0] -= sp[0];
1.1 maekawa 140: }
1.1.1.3 ! ohara 141: /* now rp[0] < sp[0] < 2^Prec */
! 142: rp[0] = (rp[0] << Prec) + (np0 >> Prec);
! 143: u = 2 * sp[0];
! 144: q = rp[0] / u;
! 145: u = rp[0] - q * u;
! 146: q += (qhl & 1) << (Prec - 1);
! 147: qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */
! 148: /* now we have (initial rp[0])<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp[0]) + u */
! 149: sp[0] = ((sp[0] + qhl) << Prec) + q;
! 150: cc = u >> Prec;
! 151: rp[0] = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));
! 152: /* subtract q * q or qhl*2^(2*Prec) from rp */
! 153: cc -= mpn_sub_1 (rp, rp, 1, q * q) + qhl;
! 154: /* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */
! 155: if (cc < 0)
1.1 maekawa 156: {
1.1.1.3 ! ohara 157: cc += sp[0] != 0 ? mpn_add_1 (rp, rp, 1, sp[0]) : 1;
! 158: cc += mpn_add_1 (rp, rp, 1, --sp[0]);
1.1 maekawa 159: }
160:
1.1.1.3 ! ohara 161: return cc;
! 162: }
1.1 maekawa 163:
1.1.1.3 ! ohara 164: /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
! 165: and in {np, n} the low n limbs of the remainder, returns the high
! 166: limb of the remainder (which is 0 or 1).
! 167: Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
! 168: where B=2^GMP_NUMB_BITS. */
! 169: static mp_limb_t
! 170: mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
! 171: {
! 172: mp_limb_t q; /* carry out of {sp, n} */
! 173: int c, b; /* carry out of remainder */
! 174: mp_size_t l, h;
1.1 maekawa 175:
1.1.1.3 ! ohara 176: ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
1.1 maekawa 177:
1.1.1.3 ! ohara 178: if (n == 1)
! 179: c = mpn_sqrtrem2 (sp, np, np);
! 180: else
! 181: {
! 182: l = n / 2;
! 183: h = n - l;
! 184: q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);
! 185: if (q != 0)
! 186: mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);
! 187: q += mpn_divrem (sp, 0, np + l, n, sp + l, h);
! 188: c = sp[0] & 1;
! 189: mpn_rshift (sp, sp, l, 1);
! 190: sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
! 191: q >>= 1;
! 192: if (c != 0)
! 193: c = mpn_add_n (np + l, np + l, sp + l, h);
! 194: mpn_sqr_n (np + n, sp, l);
! 195: b = q + mpn_sub_n (np, np, np + n, 2 * l);
! 196: c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, b);
! 197: q = mpn_add_1 (sp + l, sp + l, h, q);
! 198:
! 199: if (c < 0)
! 200: {
! 201: c += mpn_addmul_1 (np, sp, n, 2) + 2 * q;
! 202: c -= mpn_sub_1 (np, np, n, 1);
! 203: q -= mpn_sub_1 (sp, sp, n, 1);
! 204: }
1.1 maekawa 205: }
206:
1.1.1.3 ! ohara 207: return c;
! 208: }
1.1 maekawa 209:
210:
1.1.1.3 ! ohara 211: mp_size_t
! 212: mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
! 213: {
! 214: mp_limb_t *tp, s0[1], cc, high, rl;
! 215: int c;
! 216: mp_size_t rn, tn;
! 217: TMP_DECL (marker);
1.1 maekawa 218:
1.1.1.3 ! ohara 219: ASSERT (nn >= 0);
1.1 maekawa 220:
1.1.1.3 ! ohara 221: /* If OP is zero, both results are zero. */
! 222: if (nn == 0)
! 223: return 0;
1.1 maekawa 224:
1.1.1.3 ! ohara 225: ASSERT (np[nn - 1] != 0);
! 226: ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
! 227: ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
! 228: ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
! 229:
! 230: high = np[nn - 1];
! 231: if (nn == 1 && (high & GMP_NUMB_HIGHBIT))
! 232: return mpn_sqrtrem1 (sp, rp, np);
! 233: count_leading_zeros (c, high);
! 234: c -= GMP_NAIL_BITS;
1.1 maekawa 235:
1.1.1.3 ! ohara 236: c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
! 237: tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
1.1 maekawa 238:
1.1.1.3 ! ohara 239: TMP_MARK (marker);
! 240: if (nn % 2 != 0 || c > 0)
! 241: {
! 242: tp = TMP_ALLOC_LIMBS (2 * tn);
! 243: tp[0] = 0; /* needed only when 2*tn > nn, but saves a test */
! 244: if (c != 0)
! 245: mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);
! 246: else
! 247: MPN_COPY (tp + 2 * tn - nn, np, nn);
! 248: rl = mpn_dc_sqrtrem (sp, tp, tn);
! 249: /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
! 250: thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
! 251: c += (nn % 2) * GMP_NUMB_BITS / 2; /* c now represents k */
! 252: s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1); /* S mod 2^k */
! 253: rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]); /* R = R + 2*s0*S */
! 254: cc = mpn_submul_1 (tp, s0, 1, s0[0]);
! 255: rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
! 256: mpn_rshift (sp, sp, tn, c);
! 257: tp[tn] = rl;
! 258: if (rp == NULL)
! 259: rp = tp;
! 260: c = c << 1;
! 261: if (c < GMP_NUMB_BITS)
! 262: tn++;
! 263: else
1.1 maekawa 264: {
1.1.1.3 ! ohara 265: tp++;
! 266: c -= GMP_NUMB_BITS;
1.1 maekawa 267: }
1.1.1.3 ! ohara 268: if (c != 0)
! 269: mpn_rshift (rp, tp, tn, c);
! 270: else
! 271: MPN_COPY_INCR (rp, tp, tn);
! 272: rn = tn;
1.1 maekawa 273: }
1.1.1.3 ! ohara 274: else
1.1 maekawa 275: {
1.1.1.3 ! ohara 276: if (rp == NULL)
! 277: rp = TMP_ALLOC_LIMBS (nn);
! 278: if (rp != np)
! 279: MPN_COPY (rp, np, nn);
! 280: rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));
1.1 maekawa 281: }
282:
1.1.1.3 ! ohara 283: MPN_NORMALIZE (rp, rn);
1.1 maekawa 284:
1.1.1.3 ! ohara 285: TMP_FREE (marker);
! 286: return rn;
1.1 maekawa 287: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>