Annotation of OpenXM_contrib/gmp/mpn/generic/sqrtrem.c, Revision 1.1
1.1 ! maekawa 1: /* mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
! 2:
! 3: Write the square root of {OP_PTR, OP_SIZE} at ROOT_PTR.
! 4: Write the remainder at REM_PTR, if REM_PTR != NULL.
! 5: Return the size of the remainder.
! 6: (The size of the root is always half of the size of the operand.)
! 7:
! 8: OP_PTR and ROOT_PTR may not point to the same object.
! 9: OP_PTR and REM_PTR may point to the same object.
! 10:
! 11: If REM_PTR is NULL, only the root is computed and the return value of
! 12: the function is 0 if OP is a perfect square, and *any* non-zero number
! 13: otherwise.
! 14:
! 15: Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
! 16:
! 17: This file is part of the GNU MP Library.
! 18:
! 19: The GNU MP Library is free software; you can redistribute it and/or modify
! 20: it under the terms of the GNU Library General Public License as published by
! 21: the Free Software Foundation; either version 2 of the License, or (at your
! 22: option) any later version.
! 23:
! 24: The GNU MP Library is distributed in the hope that it will be useful, but
! 25: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 26: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! 27: License for more details.
! 28:
! 29: You should have received a copy of the GNU Library General Public License
! 30: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 31: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 32: MA 02111-1307, USA. */
! 33:
! 34: /* This code is just correct if "unsigned char" has at least 8 bits. It
! 35: doesn't help to use CHAR_BIT from limits.h, as the real problem is
! 36: the static arrays. */
! 37:
! 38: #include "gmp.h"
! 39: #include "gmp-impl.h"
! 40: #include "longlong.h"
! 41:
! 42: /* Square root algorithm:
! 43:
! 44: 1. Shift OP (the input) to the left an even number of bits s.t. there
! 45: are an even number of words and either (or both) of the most
! 46: significant bits are set. This way, sqrt(OP) has exactly half as
! 47: many words as OP, and has its most significant bit set.
! 48:
! 49: 2. Get a 9-bit approximation to sqrt(OP) using the pre-computed tables.
! 50: This approximation is used for the first single-precision
! 51: iterations of Newton's method, yielding a full-word approximation
! 52: to sqrt(OP).
! 53:
! 54: 3. Perform multiple-precision Newton iteration until we have the
! 55: exact result. Only about half of the input operand is used in
! 56: this calculation, as the square root is perfectly determinable
! 57: from just the higher half of a number. */
! 58:
! 59: /* Define this macro for IEEE P854 machines with a fast sqrt instruction. */
! 60: #if defined __GNUC__ && ! defined __SOFT_FLOAT
! 61:
! 62: #if defined __sparc__
! 63: #define SQRT(a) \
! 64: ({ \
! 65: double __sqrt_res; \
! 66: asm ("fsqrtd %1,%0" : "=f" (__sqrt_res) : "f" (a)); \
! 67: __sqrt_res; \
! 68: })
! 69: #endif
! 70:
! 71: #if defined __HAVE_68881__
! 72: #define SQRT(a) \
! 73: ({ \
! 74: double __sqrt_res; \
! 75: asm ("fsqrtx %1,%0" : "=f" (__sqrt_res) : "f" (a)); \
! 76: __sqrt_res; \
! 77: })
! 78: #endif
! 79:
! 80: #if defined __hppa
! 81: #define SQRT(a) \
! 82: ({ \
! 83: double __sqrt_res; \
! 84: asm ("fsqrt,dbl %1,%0" : "=fx" (__sqrt_res) : "fx" (a)); \
! 85: __sqrt_res; \
! 86: })
! 87: #endif
! 88:
! 89: #if defined _ARCH_PWR2
! 90: #define SQRT(a) \
! 91: ({ \
! 92: double __sqrt_res; \
! 93: asm ("fsqrt %0,%1" : "=f" (__sqrt_res) : "f" (a)); \
! 94: __sqrt_res; \
! 95: })
! 96: #endif
! 97:
! 98: #endif
! 99:
! 100: #ifndef SQRT
! 101:
! 102: /* Tables for initial approximation of the square root. These are
! 103: indexed with bits 1-8 of the operand for which the square root is
! 104: calculated, where bit 0 is the most significant non-zero bit. I.e.
! 105: the most significant one-bit is not used, since that per definition
! 106: is one. Likewise, the tables don't return the highest bit of the
! 107: result. That bit must be inserted by or:ing the returned value with
! 108: 0x100. This way, we get a 9-bit approximation from 8-bit tables! */
! 109:
! 110: /* Table to be used for operands with an even total number of bits.
! 111: (Exactly as in the decimal system there are similarities between the
! 112: square root of numbers with the same initial digits and an even
! 113: difference in the total number of digits. Consider the square root
! 114: of 1, 10, 100, 1000, ...) */
! 115: static unsigned char even_approx_tab[256] =
! 116: {
! 117: 0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e,
! 118: 0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74,
! 119: 0x75, 0x75, 0x76, 0x77, 0x77, 0x78, 0x79, 0x79,
! 120: 0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7f,
! 121: 0x80, 0x80, 0x81, 0x81, 0x82, 0x83, 0x83, 0x84,
! 122: 0x85, 0x85, 0x86, 0x87, 0x87, 0x88, 0x89, 0x89,
! 123: 0x8a, 0x8b, 0x8b, 0x8c, 0x8d, 0x8d, 0x8e, 0x8f,
! 124: 0x8f, 0x90, 0x90, 0x91, 0x92, 0x92, 0x93, 0x94,
! 125: 0x94, 0x95, 0x96, 0x96, 0x97, 0x97, 0x98, 0x99,
! 126: 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9d, 0x9e,
! 127: 0x9e, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa2, 0xa3,
! 128: 0xa3, 0xa4, 0xa4, 0xa5, 0xa6, 0xa6, 0xa7, 0xa7,
! 129: 0xa8, 0xa9, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xac,
! 130: 0xad, 0xad, 0xae, 0xaf, 0xaf, 0xb0, 0xb0, 0xb1,
! 131: 0xb2, 0xb2, 0xb3, 0xb3, 0xb4, 0xb5, 0xb5, 0xb6,
! 132: 0xb6, 0xb7, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xba,
! 133: 0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf,
! 134: 0xc0, 0xc0, 0xc1, 0xc1, 0xc2, 0xc2, 0xc3, 0xc3,
! 135: 0xc4, 0xc5, 0xc5, 0xc6, 0xc6, 0xc7, 0xc7, 0xc8,
! 136: 0xc9, 0xc9, 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcc,
! 137: 0xcd, 0xce, 0xce, 0xcf, 0xcf, 0xd0, 0xd0, 0xd1,
! 138: 0xd1, 0xd2, 0xd3, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5,
! 139: 0xd6, 0xd6, 0xd7, 0xd7, 0xd8, 0xd9, 0xd9, 0xda,
! 140: 0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde,
! 141: 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe1, 0xe2, 0xe2,
! 142: 0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, 0xe6,
! 143: 0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb,
! 144: 0xeb, 0xec, 0xec, 0xed, 0xed, 0xee, 0xee, 0xef,
! 145: 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3,
! 146: 0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7,
! 147: 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb,
! 148: 0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff,
! 149: };
! 150:
! 151: /* Table to be used for operands with an odd total number of bits.
! 152: (Further comments before previous table.) */
! 153: static unsigned char odd_approx_tab[256] =
! 154: {
! 155: 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
! 156: 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07,
! 157: 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b,
! 158: 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f,
! 159: 0x0f, 0x10, 0x10, 0x10, 0x11, 0x11, 0x12, 0x12,
! 160: 0x13, 0x13, 0x14, 0x14, 0x15, 0x15, 0x16, 0x16,
! 161: 0x16, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a,
! 162: 0x1a, 0x1b, 0x1b, 0x1b, 0x1c, 0x1c, 0x1d, 0x1d,
! 163: 0x1e, 0x1e, 0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21,
! 164: 0x21, 0x22, 0x22, 0x23, 0x23, 0x23, 0x24, 0x24,
! 165: 0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x27, 0x28,
! 166: 0x28, 0x29, 0x29, 0x2a, 0x2a, 0x2a, 0x2b, 0x2b,
! 167: 0x2c, 0x2c, 0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2f,
! 168: 0x2f, 0x30, 0x30, 0x30, 0x31, 0x31, 0x32, 0x32,
! 169: 0x32, 0x33, 0x33, 0x34, 0x34, 0x35, 0x35, 0x35,
! 170: 0x36, 0x36, 0x37, 0x37, 0x37, 0x38, 0x38, 0x39,
! 171: 0x39, 0x39, 0x3a, 0x3a, 0x3b, 0x3b, 0x3b, 0x3c,
! 172: 0x3c, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3f, 0x3f,
! 173: 0x40, 0x40, 0x40, 0x41, 0x41, 0x41, 0x42, 0x42,
! 174: 0x43, 0x43, 0x43, 0x44, 0x44, 0x45, 0x45, 0x45,
! 175: 0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, 0x49,
! 176: 0x49, 0x49, 0x4a, 0x4a, 0x4b, 0x4b, 0x4b, 0x4c,
! 177: 0x4c, 0x4c, 0x4d, 0x4d, 0x4e, 0x4e, 0x4e, 0x4f,
! 178: 0x4f, 0x50, 0x50, 0x50, 0x51, 0x51, 0x51, 0x52,
! 179: 0x52, 0x53, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55,
! 180: 0x55, 0x56, 0x56, 0x56, 0x57, 0x57, 0x57, 0x58,
! 181: 0x58, 0x59, 0x59, 0x59, 0x5a, 0x5a, 0x5a, 0x5b,
! 182: 0x5b, 0x5b, 0x5c, 0x5c, 0x5d, 0x5d, 0x5d, 0x5e,
! 183: 0x5e, 0x5e, 0x5f, 0x5f, 0x60, 0x60, 0x60, 0x61,
! 184: 0x61, 0x61, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63,
! 185: 0x64, 0x64, 0x65, 0x65, 0x65, 0x66, 0x66, 0x66,
! 186: 0x67, 0x67, 0x67, 0x68, 0x68, 0x68, 0x69, 0x69,
! 187: };
! 188: #endif
! 189:
! 190:
! 191: mp_size_t
! 192: #if __STDC__
! 193: mpn_sqrtrem (mp_ptr root_ptr, mp_ptr rem_ptr, mp_srcptr op_ptr, mp_size_t op_size)
! 194: #else
! 195: mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
! 196: mp_ptr root_ptr;
! 197: mp_ptr rem_ptr;
! 198: mp_srcptr op_ptr;
! 199: mp_size_t op_size;
! 200: #endif
! 201: {
! 202: /* R (root result) */
! 203: mp_ptr rp; /* Pointer to least significant word */
! 204: mp_size_t rsize; /* The size in words */
! 205:
! 206: /* T (OP shifted to the left a.k.a. normalized) */
! 207: mp_ptr tp; /* Pointer to least significant word */
! 208: mp_size_t tsize; /* The size in words */
! 209: mp_ptr t_end_ptr; /* Pointer right beyond most sign. word */
! 210: mp_limb_t t_high0, t_high1; /* The two most significant words */
! 211:
! 212: /* TT (temporary for numerator/remainder) */
! 213: mp_ptr ttp; /* Pointer to least significant word */
! 214:
! 215: /* X (temporary for quotient in main loop) */
! 216: mp_ptr xp; /* Pointer to least significant word */
! 217: mp_size_t xsize; /* The size in words */
! 218:
! 219: unsigned cnt;
! 220: mp_limb_t initial_approx; /* Initially made approximation */
! 221: mp_size_t tsizes[BITS_PER_MP_LIMB]; /* Successive calculation precisions */
! 222: mp_size_t tmp;
! 223: mp_size_t i;
! 224:
! 225: mp_limb_t cy_limb;
! 226: TMP_DECL (marker);
! 227:
! 228: /* If OP is zero, both results are zero. */
! 229: if (op_size == 0)
! 230: return 0;
! 231:
! 232: count_leading_zeros (cnt, op_ptr[op_size - 1]);
! 233: tsize = op_size;
! 234: if ((tsize & 1) != 0)
! 235: {
! 236: cnt += BITS_PER_MP_LIMB;
! 237: tsize++;
! 238: }
! 239:
! 240: rsize = tsize / 2;
! 241: rp = root_ptr;
! 242:
! 243: TMP_MARK (marker);
! 244:
! 245: /* Shift OP an even number of bits into T, such that either the most or
! 246: the second most significant bit is set, and such that the number of
! 247: words in T becomes even. This way, the number of words in R=sqrt(OP)
! 248: is exactly half as many as in OP, and the most significant bit of R
! 249: is set.
! 250:
! 251: Also, the initial approximation is simplified by this up-shifted OP.
! 252:
! 253: Finally, the Newtonian iteration which is the main part of this
! 254: program performs division by R. The fast division routine expects
! 255: the divisor to be "normalized" in exactly the sense of having the
! 256: most significant bit set. */
! 257:
! 258: tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
! 259:
! 260: if ((cnt & ~1) % BITS_PER_MP_LIMB != 0)
! 261: t_high0 = mpn_lshift (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size,
! 262: (cnt & ~1) % BITS_PER_MP_LIMB);
! 263: else
! 264: MPN_COPY (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size);
! 265:
! 266: if (cnt >= BITS_PER_MP_LIMB)
! 267: tp[0] = 0;
! 268:
! 269: t_high0 = tp[tsize - 1];
! 270: t_high1 = tp[tsize - 2]; /* Never stray. TSIZE is >= 2. */
! 271:
! 272: /* Is there a fast sqrt instruction defined for this machine? */
! 273: #ifdef SQRT
! 274: {
! 275: initial_approx = SQRT (t_high0 * 2.0
! 276: * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1))
! 277: + t_high1);
! 278: /* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have
! 279: become incorrect due to overflow in the conversion from double to
! 280: mp_limb_t above. It will typically be zero in that case, but might be
! 281: a small number on some machines. The most significant bit of
! 282: INITIAL_APPROX should be set, so that bit is a good overflow
! 283: indication. */
! 284: if ((mp_limb_signed_t) initial_approx >= 0)
! 285: initial_approx = ~(mp_limb_t)0;
! 286: }
! 287: #else
! 288: /* Get a 9 bit approximation from the tables. The tables expect to
! 289: be indexed with the 8 high bits right below the highest bit.
! 290: Also, the highest result bit is not returned by the tables, and
! 291: must be or:ed into the result. The scheme gives 9 bits of start
! 292: approximation with just 256-entry 8 bit tables. */
! 293:
! 294: if ((cnt & 1) == 0)
! 295: {
! 296: /* The most sign bit of t_high0 is set. */
! 297: initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1);
! 298: initial_approx &= 0xff;
! 299: initial_approx = even_approx_tab[initial_approx];
! 300: }
! 301: else
! 302: {
! 303: /* The most significant bit of T_HIGH0 is unset,
! 304: the second most significant is set. */
! 305: initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2);
! 306: initial_approx &= 0xff;
! 307: initial_approx = odd_approx_tab[initial_approx];
! 308: }
! 309: initial_approx |= 0x100;
! 310: initial_approx <<= BITS_PER_MP_LIMB - 8 - 1;
! 311:
! 312: /* Perform small precision Newtonian iterations to get a full word
! 313: approximation. For small operands, these iteration will make the
! 314: entire job. */
! 315: if (t_high0 == ~(mp_limb_t)0)
! 316: initial_approx = t_high0;
! 317: else
! 318: {
! 319: mp_limb_t quot;
! 320:
! 321: if (t_high0 >= initial_approx)
! 322: initial_approx = t_high0 + 1;
! 323:
! 324: /* First get about 18 bits with pure C arithmetics. */
! 325: quot = t_high0 / (initial_approx >> BITS_PER_MP_LIMB/2) << BITS_PER_MP_LIMB/2;
! 326: initial_approx = (initial_approx + quot) / 2;
! 327: initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
! 328:
! 329: /* Now get a full word by one (or for > 36 bit machines) several
! 330: iterations. */
! 331: for (i = 16; i < BITS_PER_MP_LIMB; i <<= 1)
! 332: {
! 333: mp_limb_t ignored_remainder;
! 334:
! 335: udiv_qrnnd (quot, ignored_remainder,
! 336: t_high0, t_high1, initial_approx);
! 337: initial_approx = (initial_approx + quot) / 2;
! 338: initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
! 339: }
! 340: }
! 341: #endif
! 342:
! 343: rp[0] = initial_approx;
! 344: rsize = 1;
! 345:
! 346: #ifdef DEBUG
! 347: printf ("\n\nT = ");
! 348: mpn_dump (tp, tsize);
! 349: #endif
! 350:
! 351: if (tsize > 2)
! 352: {
! 353: /* Determine the successive precisions to use in the iteration. We
! 354: minimize the precisions, beginning with the highest (i.e. last
! 355: iteration) to the lowest (i.e. first iteration). */
! 356:
! 357: xp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
! 358: ttp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
! 359:
! 360: t_end_ptr = tp + tsize;
! 361:
! 362: tmp = tsize / 2;
! 363: for (i = 0;; i++)
! 364: {
! 365: tsize = (tmp + 1) / 2;
! 366: if (tmp == tsize)
! 367: break;
! 368: tsizes[i] = tsize + tmp;
! 369: tmp = tsize;
! 370: }
! 371:
! 372: /* Main Newton iteration loop. For big arguments, most of the
! 373: time is spent here. */
! 374:
! 375: /* It is possible to do a great optimization here. The successive
! 376: divisors in the mpn_divmod call below has more and more leading
! 377: words equal to its predecessor. Therefore the beginning of
! 378: each division will repeat the same work as did the last
! 379: division. If we could guarantee that the leading words of two
! 380: consecutive divisors are the same (i.e. in this case, a later
! 381: divisor has just more digits at the end) it would be a simple
! 382: matter of just using the old remainder of the last division in
! 383: a subsequent division, to take care of this optimization. This
! 384: idea would surely make a difference even for small arguments. */
! 385:
! 386: /* Loop invariants:
! 387:
! 388: R <= shiftdown_to_same_size(floor(sqrt(OP))) < R + 1.
! 389: X - 1 < shiftdown_to_same_size(floor(sqrt(OP))) <= X.
! 390: R <= shiftdown_to_same_size(X). */
! 391:
! 392: while (--i >= 0)
! 393: {
! 394: mp_limb_t cy;
! 395: #ifdef DEBUG
! 396: mp_limb_t old_least_sign_r = rp[0];
! 397: mp_size_t old_rsize = rsize;
! 398:
! 399: printf ("R = ");
! 400: mpn_dump (rp, rsize);
! 401: #endif
! 402: tsize = tsizes[i];
! 403:
! 404: /* Need to copy the numerator into temporary space, as
! 405: mpn_divmod overwrites its numerator argument with the
! 406: remainder (which we currently ignore). */
! 407: MPN_COPY (ttp, t_end_ptr - tsize, tsize);
! 408: cy = mpn_divmod (xp, ttp, tsize, rp, rsize);
! 409: xsize = tsize - rsize;
! 410:
! 411: #ifdef DEBUG
! 412: printf ("X =%d ", cy);
! 413: mpn_dump (xp, xsize);
! 414: #endif
! 415:
! 416: /* Add X and R with the most significant limbs aligned,
! 417: temporarily ignoring at least one limb at the low end of X. */
! 418: tmp = xsize - rsize;
! 419: cy += mpn_add_n (xp + tmp, rp, xp + tmp, rsize);
! 420:
! 421: /* If T begins with more than 2 x BITS_PER_MP_LIMB of ones, we get
! 422: intermediate roots that'd need an extra bit. We don't want to
! 423: handle that since it would make the subsequent divisor
! 424: non-normalized, so round such roots down to be only ones in the
! 425: current precision. */
! 426: if (cy == 2)
! 427: {
! 428: mp_size_t j;
! 429: for (j = xsize; j >= 0; j--)
! 430: xp[j] = ~(mp_limb_t)0;
! 431: }
! 432:
! 433: /* Divide X by 2 and put the result in R. This is the new
! 434: approximation. Shift in the carry from the addition. */
! 435: mpn_rshift (rp, xp, xsize, 1);
! 436: rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1));
! 437: rsize = xsize;
! 438: #ifdef DEBUG
! 439: if (old_least_sign_r != rp[rsize - old_rsize])
! 440: printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n",
! 441: i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r,
! 442: 2 * BYTES_PER_MP_LIMB, rp[rsize - old_rsize]);
! 443: #endif
! 444: }
! 445: }
! 446:
! 447: #ifdef DEBUG
! 448: printf ("(final) R = ");
! 449: mpn_dump (rp, rsize);
! 450: #endif
! 451:
! 452: /* We computed the square root of OP * 2**(2*floor(cnt/2)).
! 453: This has resulted in R being 2**floor(cnt/2) to large.
! 454: Shift it down here to fix that. */
! 455: if (cnt / 2 != 0)
! 456: {
! 457: mpn_rshift (rp, rp, rsize, cnt/2);
! 458: rsize -= rp[rsize - 1] == 0;
! 459: }
! 460:
! 461: /* Calculate the remainder. */
! 462: mpn_mul_n (tp, rp, rp, rsize);
! 463: tsize = rsize + rsize;
! 464: tsize -= tp[tsize - 1] == 0;
! 465: if (op_size < tsize
! 466: || (op_size == tsize && mpn_cmp (op_ptr, tp, op_size) < 0))
! 467: {
! 468: /* R is too large. Decrement it. */
! 469:
! 470: /* These operations can't overflow. */
! 471: cy_limb = mpn_sub_n (tp, tp, rp, rsize);
! 472: cy_limb += mpn_sub_n (tp, tp, rp, rsize);
! 473: mpn_sub_1 (tp + rsize, tp + rsize, tsize - rsize, cy_limb);
! 474: mpn_add_1 (tp, tp, tsize, (mp_limb_t) 1);
! 475:
! 476: mpn_sub_1 (rp, rp, rsize, (mp_limb_t) 1);
! 477:
! 478: #ifdef DEBUG
! 479: printf ("(adjusted) R = ");
! 480: mpn_dump (rp, rsize);
! 481: #endif
! 482: }
! 483:
! 484: if (rem_ptr != NULL)
! 485: {
! 486: cy_limb = mpn_sub (rem_ptr, op_ptr, op_size, tp, tsize);
! 487: MPN_NORMALIZE (rem_ptr, op_size);
! 488: TMP_FREE (marker);
! 489: return op_size;
! 490: }
! 491: else
! 492: {
! 493: int res;
! 494: res = op_size != tsize || mpn_cmp (op_ptr, tp, op_size);
! 495: TMP_FREE (marker);
! 496: return res;
! 497: }
! 498: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>