[BACK]Return to sqrtrem.c CVS log [TXT][DIR] Up to [local] / OpenXM_contrib / gmp / mpn / generic

Annotation of OpenXM_contrib/gmp/mpn/generic/sqrtrem.c, Revision 1.1.1.3

1.1.1.3 ! ohara       1: /* mpn_sqrtrem -- square root and remainder */
1.1       maekawa     2:
1.1.1.3 ! ohara       3: /*
        !             4: Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
1.1       maekawa     5:
                      6: This file is part of the GNU MP Library.
                      7:
                      8: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2   maekawa     9: it under the terms of the GNU Lesser General Public License as published by
                     10: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1       maekawa    11: option) any later version.
                     12:
                     13: The GNU MP Library is distributed in the hope that it will be useful, but
                     14: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2   maekawa    15: or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1.1       maekawa    16: License for more details.
                     17:
1.1.1.2   maekawa    18: You should have received a copy of the GNU Lesser General Public License
1.1       maekawa    19: along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
                     20: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
1.1.1.3 ! ohara      21: MA 02111-1307, USA.
        !            22: */
        !            23:
        !            24:
        !            25: /* Contributed by Paul Zimmermann.
        !            26:    See "Karatsuba Square Root", reference in gmp.texi.  */
        !            27:
1.1       maekawa    28:
1.1.1.3 ! ohara      29: #include <stdio.h>
        !            30: #include <stdlib.h>
1.1       maekawa    31:
                     32: #include "gmp.h"
                     33: #include "gmp-impl.h"
                     34: #include "longlong.h"
                     35:
                     36:
                     37:
1.1.1.3 ! ohara      38: /* Square roots table.  Generated by the following program:
        !            39: #include "gmp.h"
        !            40: main(){mpz_t x;int i;mpz_init(x);for(i=64;i<256;i++){mpz_set_ui(x,256*i);
        !            41: mpz_sqrt(x,x);mpz_out_str(0,10,x);printf(",");if(i%16==15)printf("\n");}}
        !            42: */
        !            43: static const unsigned char approx_tab[192] =
        !            44:   {
        !            45:     128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
        !            46:     143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
        !            47:     156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
        !            48:     169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
        !            49:     181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
        !            50:     192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
        !            51:     202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
        !            52:     212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
        !            53:     221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
        !            54:     230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
        !            55:     239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
        !            56:     247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
        !            57:   };
        !            58:
        !            59: #define HALF_NAIL (GMP_NAIL_BITS / 2)
        !            60:
        !            61: /* same as mpn_sqrtrem, but for size=1 and {np, 1} normalized */
        !            62: static mp_size_t
        !            63: mpn_sqrtrem1 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
1.1       maekawa    64: {
1.1.1.3 ! ohara      65:   mp_limb_t np0, s, r, q, u;
        !            66:   int prec;
1.1       maekawa    67:
1.1.1.3 ! ohara      68:   ASSERT (np[0] >= GMP_NUMB_HIGHBIT / 2);
        !            69:   ASSERT (GMP_LIMB_BITS >= 16);
1.1       maekawa    70:
1.1.1.3 ! ohara      71:   /* first compute a 8-bit approximation from the high 8-bits of np[0] */
        !            72:   np0 = np[0] << GMP_NAIL_BITS;
        !            73:   q = np0 >> (GMP_LIMB_BITS - 8);
        !            74:   /* 2^6 = 64 <= q < 256 = 2^8 */
        !            75:   s = approx_tab[q - 64];                              /* 128 <= s < 255 */
        !            76:   r = (np0 >> (GMP_LIMB_BITS - 16)) - s * s;           /* r <= 2*s */
        !            77:   if (r > 2 * s)
1.1       maekawa    78:     {
1.1.1.3 ! ohara      79:       r -= 2 * s + 1;
        !            80:       s++;
1.1       maekawa    81:     }
                     82:
1.1.1.3 ! ohara      83:   prec = 8;
        !            84:   np0 <<= 2 * prec;
        !            85:   while (2 * prec < GMP_LIMB_BITS)
        !            86:     {
        !            87:       /* invariant: s has prec bits, and r <= 2*s */
        !            88:       r = (r << prec) + (np0 >> (GMP_LIMB_BITS - prec));
        !            89:       np0 <<= prec;
        !            90:       u = 2 * s;
        !            91:       q = r / u;
        !            92:       u = r - q * u;
        !            93:       s = (s << prec) + q;
        !            94:       u = (u << prec) + (np0 >> (GMP_LIMB_BITS - prec));
        !            95:       q = q * q;
        !            96:       r = u - q;
        !            97:       if (u < q)
        !            98:        {
        !            99:          r += 2 * s - 1;
        !           100:          s --;
        !           101:        }
        !           102:       np0 <<= prec;
        !           103:       prec = 2 * prec;
        !           104:     }
1.1       maekawa   105:
1.1.1.3 ! ohara     106:   ASSERT (2 * prec == GMP_LIMB_BITS); /* GMP_LIMB_BITS must be a power of 2 */
1.1       maekawa   107:
1.1.1.3 ! ohara     108:   /* normalize back, assuming GMP_NAIL_BITS is even */
        !           109:   ASSERT (GMP_NAIL_BITS % 2 == 0);
        !           110:   sp[0] = s >> HALF_NAIL;
        !           111:   u = s - (sp[0] << HALF_NAIL); /* s mod 2^HALF_NAIL */
        !           112:   r += u * ((sp[0] << (HALF_NAIL + 1)) + u);
        !           113:   r = r >> GMP_NAIL_BITS;
        !           114:
        !           115:   if (rp != NULL)
        !           116:     rp[0] = r;
        !           117:   return r != 0 ? 1 : 0;
        !           118: }
        !           119:
        !           120:
        !           121: #define Prec (GMP_NUMB_BITS >> 1)
        !           122:
        !           123: /* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
        !           124:    return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
        !           125: static mp_limb_t
        !           126: mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
        !           127: {
        !           128:   mp_limb_t qhl, q, u, np0;
        !           129:   int cc;
1.1       maekawa   130:
1.1.1.3 ! ohara     131:   ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
1.1       maekawa   132:
1.1.1.3 ! ohara     133:   np0 = np[0];
        !           134:   mpn_sqrtrem1 (sp, rp, np + 1);
        !           135:   qhl = 0;
        !           136:   while (rp[0] >= sp[0])
1.1       maekawa   137:     {
1.1.1.3 ! ohara     138:       qhl++;
        !           139:       rp[0] -= sp[0];
1.1       maekawa   140:     }
1.1.1.3 ! ohara     141:   /* now rp[0] < sp[0] < 2^Prec */
        !           142:   rp[0] = (rp[0] << Prec) + (np0 >> Prec);
        !           143:   u = 2 * sp[0];
        !           144:   q = rp[0] / u;
        !           145:   u = rp[0] - q * u;
        !           146:   q += (qhl & 1) << (Prec - 1);
        !           147:   qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */
        !           148:   /* now we have (initial rp[0])<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp[0]) + u */
        !           149:   sp[0] = ((sp[0] + qhl) << Prec) + q;
        !           150:   cc = u >> Prec;
        !           151:   rp[0] = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));
        !           152:   /* subtract q * q or qhl*2^(2*Prec) from rp */
        !           153:   cc -= mpn_sub_1 (rp, rp, 1, q * q) + qhl;
        !           154:   /* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */
        !           155:   if (cc < 0)
1.1       maekawa   156:     {
1.1.1.3 ! ohara     157:       cc += sp[0] != 0 ? mpn_add_1 (rp, rp, 1, sp[0]) : 1;
        !           158:       cc += mpn_add_1 (rp, rp, 1, --sp[0]);
1.1       maekawa   159:     }
                    160:
1.1.1.3 ! ohara     161:   return cc;
        !           162: }
1.1       maekawa   163:
1.1.1.3 ! ohara     164: /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
        !           165:    and in {np, n} the low n limbs of the remainder, returns the high
        !           166:    limb of the remainder (which is 0 or 1).
        !           167:    Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
        !           168:    where B=2^GMP_NUMB_BITS.  */
        !           169: static mp_limb_t
        !           170: mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
        !           171: {
        !           172:   mp_limb_t q;                 /* carry out of {sp, n} */
        !           173:   int c, b;                    /* carry out of remainder */
        !           174:   mp_size_t l, h;
1.1       maekawa   175:
1.1.1.3 ! ohara     176:   ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
1.1       maekawa   177:
1.1.1.3 ! ohara     178:   if (n == 1)
        !           179:     c = mpn_sqrtrem2 (sp, np, np);
        !           180:   else
        !           181:     {
        !           182:       l = n / 2;
        !           183:       h = n - l;
        !           184:       q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);
        !           185:       if (q != 0)
        !           186:         mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);
        !           187:       q += mpn_divrem (sp, 0, np + l, n, sp + l, h);
        !           188:       c = sp[0] & 1;
        !           189:       mpn_rshift (sp, sp, l, 1);
        !           190:       sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
        !           191:       q >>= 1;
        !           192:       if (c != 0)
        !           193:         c = mpn_add_n (np + l, np + l, sp + l, h);
        !           194:       mpn_sqr_n (np + n, sp, l);
        !           195:       b = q + mpn_sub_n (np, np, np + n, 2 * l);
        !           196:       c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, b);
        !           197:       q = mpn_add_1 (sp + l, sp + l, h, q);
        !           198:
        !           199:       if (c < 0)
        !           200:         {
        !           201:           c += mpn_addmul_1 (np, sp, n, 2) + 2 * q;
        !           202:           c -= mpn_sub_1 (np, np, n, 1);
        !           203:           q -= mpn_sub_1 (sp, sp, n, 1);
        !           204:         }
1.1       maekawa   205:     }
                    206:
1.1.1.3 ! ohara     207:   return c;
        !           208: }
1.1       maekawa   209:
                    210:
1.1.1.3 ! ohara     211: mp_size_t
        !           212: mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
        !           213: {
        !           214:   mp_limb_t *tp, s0[1], cc, high, rl;
        !           215:   int c;
        !           216:   mp_size_t rn, tn;
        !           217:   TMP_DECL (marker);
1.1       maekawa   218:
1.1.1.3 ! ohara     219:   ASSERT (nn >= 0);
1.1       maekawa   220:
1.1.1.3 ! ohara     221:   /* If OP is zero, both results are zero.  */
        !           222:   if (nn == 0)
        !           223:     return 0;
1.1       maekawa   224:
1.1.1.3 ! ohara     225:   ASSERT (np[nn - 1] != 0);
        !           226:   ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
        !           227:   ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
        !           228:   ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
        !           229:
        !           230:   high = np[nn - 1];
        !           231:   if (nn == 1 && (high & GMP_NUMB_HIGHBIT))
        !           232:     return mpn_sqrtrem1 (sp, rp, np);
        !           233:   count_leading_zeros (c, high);
        !           234:   c -= GMP_NAIL_BITS;
1.1       maekawa   235:
1.1.1.3 ! ohara     236:   c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
        !           237:   tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
1.1       maekawa   238:
1.1.1.3 ! ohara     239:   TMP_MARK (marker);
        !           240:   if (nn % 2 != 0 || c > 0)
        !           241:     {
        !           242:       tp = TMP_ALLOC_LIMBS (2 * tn);
        !           243:       tp[0] = 0;            /* needed only when 2*tn > nn, but saves a test */
        !           244:       if (c != 0)
        !           245:        mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);
        !           246:       else
        !           247:        MPN_COPY (tp + 2 * tn - nn, np, nn);
        !           248:       rl = mpn_dc_sqrtrem (sp, tp, tn);
        !           249:       /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
        !           250:         thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
        !           251:       c += (nn % 2) * GMP_NUMB_BITS / 2;               /* c now represents k */
        !           252:       s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1);      /* S mod 2^k */
        !           253:       rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]);      /* R = R + 2*s0*S */
        !           254:       cc = mpn_submul_1 (tp, s0, 1, s0[0]);
        !           255:       rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
        !           256:       mpn_rshift (sp, sp, tn, c);
        !           257:       tp[tn] = rl;
        !           258:       if (rp == NULL)
        !           259:        rp = tp;
        !           260:       c = c << 1;
        !           261:       if (c < GMP_NUMB_BITS)
        !           262:        tn++;
        !           263:       else
1.1       maekawa   264:        {
1.1.1.3 ! ohara     265:          tp++;
        !           266:          c -= GMP_NUMB_BITS;
1.1       maekawa   267:        }
1.1.1.3 ! ohara     268:       if (c != 0)
        !           269:        mpn_rshift (rp, tp, tn, c);
        !           270:       else
        !           271:        MPN_COPY_INCR (rp, tp, tn);
        !           272:       rn = tn;
1.1       maekawa   273:     }
1.1.1.3 ! ohara     274:   else
1.1       maekawa   275:     {
1.1.1.3 ! ohara     276:       if (rp == NULL)
        !           277:        rp = TMP_ALLOC_LIMBS (nn);
        !           278:       if (rp != np)
        !           279:        MPN_COPY (rp, np, nn);
        !           280:       rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));
1.1       maekawa   281:     }
                    282:
1.1.1.3 ! ohara     283:   MPN_NORMALIZE (rp, rn);
1.1       maekawa   284:
1.1.1.3 ! ohara     285:   TMP_FREE (marker);
        !           286:   return rn;
1.1       maekawa   287: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>