Annotation of OpenXM_contrib/gmp/mpn/generic/get_str.c, Revision 1.1.1.3
1.1 maekawa 1: /* mpn_get_str -- Convert a MSIZE long limb vector pointed to by MPTR
2: to a printable string in STR in base BASE.
3:
1.1.1.3 ! ohara 4: Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software
! 5: Foundation, Inc.
1.1 maekawa 6:
7: This file is part of the GNU MP Library.
8:
9: The GNU MP Library is free software; you can redistribute it and/or modify
1.1.1.2 maekawa 10: it under the terms of the GNU Lesser General Public License as published by
11: the Free Software Foundation; either version 2.1 of the License, or (at your
1.1 maekawa 12: option) any later version.
13:
14: The GNU MP Library is distributed in the hope that it will be useful, but
15: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1.1.1.2 maekawa 16: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1.1 maekawa 17: License for more details.
18:
1.1.1.2 maekawa 19: You should have received a copy of the GNU Lesser General Public License
1.1 maekawa 20: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
21: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
22: MA 02111-1307, USA. */
23:
24: #include "gmp.h"
25: #include "gmp-impl.h"
26: #include "longlong.h"
27:
1.1.1.3 ! ohara 28: /* Conversion of U {up,un} to a string in base b. Internally, we convert to
! 29: base B = b^m, the largest power of b that fits a limb. Basic algorithms:
1.1 maekawa 30:
1.1.1.3 ! ohara 31: A) Divide U repeatedly by B, generating a quotient and remainder, until the
! 32: quotient becomes zero. The remainders hold the converted digits. Digits
! 33: come out from right to left. (Used in mpn_sb_get_str.)
! 34:
! 35: B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
! 36: Then develop digits by multiplying the fraction repeatedly by b. Digits
! 37: come out from left to right. (Currently not used herein, except for in
! 38: code for converting single limbs to individual digits.)
! 39:
! 40: C) Compute B^1, B^2, B^4, ..., B^(2^s), for s such that B^(2^s) > sqrt(U).
! 41: Then divide U by B^(2^k), generating an integer quotient and remainder.
! 42: Recursively convert the quotient, then the remainder, using the
! 43: precomputed powers. Digits come out from left to right. (Used in
! 44: mpn_dc_get_str.)
! 45:
! 46: When using algorithm C, algorithm B might be suitable for basecase code,
! 47: since the required b^g power will be readily accessible.
! 48:
! 49: Optimization ideas:
! 50: 1. The recursive function of (C) could avoid TMP allocation:
! 51: a) Overwrite dividend with quotient and remainder, just as permitted by
! 52: mpn_sb_divrem_mn.
! 53: b) If TMP memory is anyway needed, pass it as a parameter, similarly to
! 54: how we do it in Karatsuba multiplication.
! 55: 2. Store the powers of (C) in normalized form, with the normalization count.
! 56: Quotients will usually need to be left-shifted before each divide, and
! 57: remainders will either need to be left-shifted of right-shifted.
! 58: 3. When b is even, the powers will end up with lots of low zero limbs. Could
! 59: save significant time in the mpn_tdiv_qr call by stripping these zeros.
! 60: 4. In the code for developing digits from a single limb, we could avoid using
! 61: a full umul_ppmm except for the first (or first few) digits, provided base
! 62: is even. Subsequent digits can be developed using plain multiplication.
! 63: (This saves on register-starved machines (read x86) and on all machines
! 64: that generate the upper product half using a separate instruction (alpha,
! 65: powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
! 66: 5. Separate mpn_dc_get_str basecase code from code for small conversions. The
! 67: former code will have the exact right power readily available in the
! 68: powtab parameter for dividing the current number into a fraction. Convert
! 69: that using algorithm B.
! 70: 6. Completely avoid division. Compute the inverses of the powers now in
! 71: powtab instead of the actual powers.
! 72:
! 73: Basic structure of (C):
! 74: mpn_get_str:
! 75: if POW2_P (n)
! 76: ...
! 77: else
! 78: if (un < GET_STR_PRECOMPUTE_THRESHOLD)
! 79: mpn_sb_get_str (str, base, up, un);
! 80: else
! 81: precompute_power_tables
! 82: mpn_dc_get_str
! 83:
! 84: mpn_dc_get_str:
! 85: mpn_tdiv_qr
! 86: if (qn < GET_STR_DC_THRESHOLD)
! 87: mpn_sb_get_str
! 88: else
! 89: mpn_dc_get_str
! 90: if (rn < GET_STR_DC_THRESHOLD)
! 91: mpn_sb_get_str
! 92: else
! 93: mpn_dc_get_str
! 94:
! 95:
! 96: The reason for the two threshold values is the cost of
! 97: precompute_power_tables. GET_STR_PRECOMPUTE_THRESHOLD will be considerably
! 98: larger than GET_STR_PRECOMPUTE_THRESHOLD. Do you think I should change
! 99: mpn_dc_get_str to instead look like the following? */
! 100:
! 101:
! 102: /* The x86s and m68020 have a quotient and remainder "div" instruction and
! 103: gcc recognises an adjacent "/" and "%" can be combined using that.
! 104: Elsewhere "/" and "%" are either separate instructions, or separate
! 105: libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
! 106: A multiply and subtract should be faster than a "%" in those cases. */
! 107: #if HAVE_HOST_CPU_FAMILY_x86 \
! 108: || HAVE_HOST_CPU_m68020 \
! 109: || HAVE_HOST_CPU_m68030 \
! 110: || HAVE_HOST_CPU_m68040 \
! 111: || HAVE_HOST_CPU_m68060 \
! 112: || HAVE_HOST_CPU_m68360 /* CPU32 */
! 113: #define udiv_qrnd_unnorm(q,r,n,d) \
! 114: do { \
! 115: mp_limb_t __q = (n) / (d); \
! 116: mp_limb_t __r = (n) % (d); \
! 117: (q) = __q; \
! 118: (r) = __r; \
! 119: } while (0)
! 120: #else
! 121: #define udiv_qrnd_unnorm(q,r,n,d) \
! 122: do { \
! 123: mp_limb_t __q = (n) / (d); \
! 124: mp_limb_t __r = (n) - __q*(d); \
! 125: (q) = __q; \
! 126: (r) = __r; \
! 127: } while (0)
! 128: #endif
1.1 maekawa 129:
1.1.1.3 ! ohara 130: /* When to stop divide-and-conquer and call the basecase mpn_get_str. */
! 131: #ifndef GET_STR_DC_THRESHOLD
! 132: #define GET_STR_DC_THRESHOLD 15
! 133: #endif
! 134: /* Whether to bother at all with precomputing powers of the base, or go
! 135: to the basecase mpn_get_str directly. */
! 136: #ifndef GET_STR_PRECOMPUTE_THRESHOLD
! 137: #define GET_STR_PRECOMPUTE_THRESHOLD 30
! 138: #endif
1.1 maekawa 139:
1.1.1.3 ! ohara 140: struct powers
! 141: {
! 142: size_t digits_in_base;
! 143: mp_ptr p;
! 144: mp_size_t n; /* mpz_struct uses int for sizes, but not mpn! */
! 145: int base;
! 146: };
! 147: typedef struct powers powers_t;
! 148:
! 149:
! 150: /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
! 151: the string in STR. Generate LEN characters, possibly padding with zeros to
! 152: the left. If LEN is zero, generate as many characters as required.
! 153: Return a pointer immediately after the last digit of the result string.
! 154: Complexity is O(UN^2); intended for small conversions. */
! 155: static unsigned char *
! 156: mpn_sb_get_str (unsigned char *str, size_t len,
! 157: mp_ptr up, mp_size_t un,
! 158: const powers_t *powtab)
! 159: {
! 160: mp_limb_t rl, ul;
! 161: unsigned char *s;
! 162: int base;
! 163: size_t l;
! 164: /* Allocate memory for largest possible string, given that we only get here
! 165: for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
! 166: base is 3. 7/11 is an approximation to 1/log2(3). */
! 167: #if TUNE_PROGRAM_BUILD
! 168: #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * BITS_PER_MP_LIMB * 7 / 11)
1.1.1.2 maekawa 169: #else
1.1.1.3 ! ohara 170: #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * BITS_PER_MP_LIMB * 7 / 11)
1.1.1.2 maekawa 171: #endif
1.1.1.3 ! ohara 172: unsigned char buf[BUF_ALLOC];
! 173: #if TUNE_PROGRAM_BUILD
! 174: mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
! 175: #else
! 176: mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
1.1 maekawa 177: #endif
1.1.1.3 ! ohara 178:
! 179: base = powtab->base;
! 180: if (base == 10)
! 181: {
! 182: /* Special case code for base==10 so that the compiler has a chance to
! 183: optimize things. */
! 184:
! 185: MPN_COPY (rp + 1, up, un);
! 186:
! 187: s = buf + BUF_ALLOC;
! 188: while (un > 1)
! 189: {
! 190: int i;
! 191: mp_limb_t frac, digit;
! 192: MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
! 193: MP_BASES_BIG_BASE_10,
! 194: MP_BASES_BIG_BASE_INVERTED_10,
! 195: MP_BASES_NORMALIZATION_STEPS_10);
! 196: un -= rp[un] == 0;
! 197: frac = (rp[0] + 1) << GMP_NAIL_BITS;
! 198: s -= MP_BASES_CHARS_PER_LIMB_10;
! 199: #if HAVE_HOST_CPU_FAMILY_x86
! 200: /* The code below turns out to be a bit slower for x86 using gcc.
! 201: Use plain code. */
! 202: i = MP_BASES_CHARS_PER_LIMB_10;
! 203: do
! 204: {
! 205: umul_ppmm (digit, frac, frac, 10);
! 206: *s++ = digit;
! 207: }
! 208: while (--i);
! 209: #else
! 210: /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
! 211: After a few umul_ppmm, we will have accumulated enough low zeros
! 212: to use a plain multiply. */
! 213: if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
! 214: {
! 215: umul_ppmm (digit, frac, frac, 10);
! 216: *s++ = digit;
! 217: }
! 218: if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
! 219: {
! 220: umul_ppmm (digit, frac, frac, 10);
! 221: *s++ = digit;
! 222: }
! 223: if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
! 224: {
! 225: umul_ppmm (digit, frac, frac, 10);
! 226: *s++ = digit;
! 227: }
! 228: if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
! 229: {
! 230: umul_ppmm (digit, frac, frac, 10);
! 231: *s++ = digit;
! 232: }
! 233: i = MP_BASES_CHARS_PER_LIMB_10 - (4-MP_BASES_NORMALIZATION_STEPS_10);
! 234: frac = (frac + 0xf) >> 4;
! 235: do
! 236: {
! 237: frac *= 10;
! 238: digit = frac >> (BITS_PER_MP_LIMB - 4);
! 239: *s++ = digit;
! 240: frac &= (~(mp_limb_t) 0) >> 4;
! 241: }
! 242: while (--i);
1.1 maekawa 243: #endif
1.1.1.3 ! ohara 244: s -= MP_BASES_CHARS_PER_LIMB_10;
! 245: }
1.1 maekawa 246:
1.1.1.3 ! ohara 247: ul = rp[1];
! 248: while (ul != 0)
! 249: {
! 250: udiv_qrnd_unnorm (ul, rl, ul, 10);
! 251: *--s = rl;
! 252: }
! 253: }
! 254: else /* not base 10 */
! 255: {
! 256: unsigned chars_per_limb;
! 257: mp_limb_t big_base, big_base_inverted;
! 258: unsigned normalization_steps;
! 259:
! 260: chars_per_limb = __mp_bases[base].chars_per_limb;
! 261: big_base = __mp_bases[base].big_base;
! 262: big_base_inverted = __mp_bases[base].big_base_inverted;
! 263: count_leading_zeros (normalization_steps, big_base);
! 264:
! 265: MPN_COPY (rp + 1, up, un);
1.1 maekawa 266:
1.1.1.3 ! ohara 267: s = buf + BUF_ALLOC;
! 268: while (un > 1)
! 269: {
! 270: int i;
! 271: mp_limb_t frac;
! 272: MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
! 273: big_base, big_base_inverted,
! 274: normalization_steps);
! 275: un -= rp[un] == 0;
! 276: frac = (rp[0] + 1) << GMP_NAIL_BITS;
! 277: s -= chars_per_limb;
! 278: i = chars_per_limb;
! 279: do
! 280: {
! 281: mp_limb_t digit;
! 282: umul_ppmm (digit, frac, frac, base);
! 283: *s++ = digit;
! 284: }
! 285: while (--i);
! 286: s -= chars_per_limb;
! 287: }
! 288:
! 289: ul = rp[1];
! 290: while (ul != 0)
! 291: {
! 292: udiv_qrnd_unnorm (ul, rl, ul, base);
! 293: *--s = rl;
! 294: }
! 295: }
! 296:
! 297: l = buf + BUF_ALLOC - s;
! 298: while (l < len)
! 299: {
! 300: *str++ = 0;
! 301: len--;
! 302: }
! 303: while (l != 0)
! 304: {
! 305: *str++ = *s++;
! 306: l--;
! 307: }
! 308: return str;
! 309: }
! 310:
! 311:
! 312: /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
! 313: the string in STR. Generate LEN characters, possibly padding with zeros to
! 314: the left. If LEN is zero, generate as many characters as required.
! 315: Return a pointer immediately after the last digit of the result string.
! 316: This uses divide-and-conquer and is intended for large conversions. */
! 317: static unsigned char *
! 318: mpn_dc_get_str (unsigned char *str, size_t len,
! 319: mp_ptr up, mp_size_t un,
! 320: const powers_t *powtab)
! 321: {
! 322: if (un < GET_STR_DC_THRESHOLD)
! 323: {
! 324: if (un != 0)
! 325: str = mpn_sb_get_str (str, len, up, un, powtab);
! 326: else
! 327: {
! 328: while (len != 0)
! 329: {
! 330: *str++ = 0;
! 331: len--;
! 332: }
! 333: }
! 334: }
! 335: else
! 336: {
! 337: mp_ptr pwp, qp, rp;
! 338: mp_size_t pwn, qn;
! 339:
! 340: pwp = powtab->p;
! 341: pwn = powtab->n;
! 342:
! 343: if (un < pwn || (un == pwn && mpn_cmp (up, pwp, un) < 0))
! 344: {
! 345: str = mpn_dc_get_str (str, len, up, un, powtab - 1);
! 346: }
! 347: else
! 348: {
! 349: TMP_DECL (marker);
! 350: TMP_MARK (marker);
! 351: qp = TMP_ALLOC_LIMBS (un - pwn + 1);
! 352: rp = TMP_ALLOC_LIMBS (pwn);
! 353:
! 354: mpn_tdiv_qr (qp, rp, 0L, up, un, pwp, pwn);
! 355: qn = un - pwn; qn += qp[qn] != 0; /* quotient size */
! 356: if (len != 0)
! 357: len = len - powtab->digits_in_base;
! 358: str = mpn_dc_get_str (str, len, qp, qn, powtab - 1);
! 359: str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn, powtab - 1);
! 360: TMP_FREE (marker);
! 361: }
! 362: }
! 363: return str;
! 364: }
! 365:
! 366:
! 367: /* There are no leading zeros on the digits generated at str, but that's not
! 368: currently a documented feature. */
! 369:
! 370: size_t
! 371: mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
! 372: {
! 373: mp_ptr powtab_mem, powtab_mem_ptr;
! 374: mp_limb_t big_base;
! 375: size_t digits_in_base;
! 376: powers_t powtab[30];
! 377: int pi;
! 378: mp_size_t n;
! 379: mp_ptr p, t;
! 380: size_t out_len;
1.1 maekawa 381:
382: /* Special case zero, as the code below doesn't handle it. */
1.1.1.3 ! ohara 383: if (un == 0)
1.1 maekawa 384: {
1.1.1.3 ! ohara 385: str[0] = 0;
1.1 maekawa 386: return 1;
387: }
388:
1.1.1.3 ! ohara 389: if (POW2_P (base))
1.1 maekawa 390: {
1.1.1.3 ! ohara 391: /* The base is a power of 2. Convert from most significant end. */
1.1 maekawa 392: mp_limb_t n1, n0;
1.1.1.3 ! ohara 393: int bits_per_digit = __mp_bases[base].big_base;
! 394: int cnt;
! 395: int bit_pos;
! 396: mp_size_t i;
! 397: unsigned char *s = str;
! 398:
! 399: n1 = up[un - 1];
! 400: count_leading_zeros (cnt, n1);
! 401:
! 402: /* BIT_POS should be R when input ends in least significant nibble,
! 403: R + bits_per_digit * n when input ends in nth least significant
! 404: nibble. */
1.1 maekawa 405:
406: {
1.1.1.3 ! ohara 407: unsigned long bits;
1.1 maekawa 408:
1.1.1.3 ! ohara 409: bits = GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
! 410: cnt = bits % bits_per_digit;
! 411: if (cnt != 0)
! 412: bits += bits_per_digit - cnt;
! 413: bit_pos = bits - (un - 1) * GMP_NUMB_BITS;
1.1 maekawa 414: }
415:
416: /* Fast loop for bit output. */
1.1.1.3 ! ohara 417: i = un - 1;
1.1 maekawa 418: for (;;)
419: {
420: bit_pos -= bits_per_digit;
421: while (bit_pos >= 0)
422: {
423: *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
424: bit_pos -= bits_per_digit;
425: }
426: i--;
427: if (i < 0)
428: break;
429: n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
1.1.1.3 ! ohara 430: n1 = up[i];
! 431: bit_pos += GMP_NUMB_BITS;
1.1 maekawa 432: *s++ = n0 | (n1 >> bit_pos);
433: }
434:
435: *s = 0;
436:
437: return s - str;
438: }
439:
1.1.1.3 ! ohara 440: /* General case. The base is not a power of 2. */
1.1 maekawa 441:
1.1.1.3 ! ohara 442: if (un < GET_STR_PRECOMPUTE_THRESHOLD)
! 443: {
! 444: struct powers ptab[1];
! 445: ptab[0].base = base;
! 446: return mpn_sb_get_str (str, (size_t) 0, up, un, ptab) - str;
! 447: }
1.1 maekawa 448:
1.1.1.3 ! ohara 449: /* Allocate one large block for the powers of big_base. With the current
! 450: scheme, we need to allocate twice as much as would be possible if a
! 451: minimal set of powers were generated. */
! 452: #define ALLOC_SIZE (2 * un + 30)
! 453: powtab_mem = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC_SIZE);
! 454: powtab_mem_ptr = powtab_mem;
! 455:
! 456: /* Compute a table of powers: big_base^1, big_base^2, big_base^4, ...,
! 457: big_base^(2^k), for k such that the biggest power is between U and
! 458: sqrt(U). */
1.1 maekawa 459:
1.1.1.3 ! ohara 460: big_base = __mp_bases[base].big_base;
! 461: digits_in_base = __mp_bases[base].chars_per_limb;
1.1 maekawa 462:
1.1.1.3 ! ohara 463: powtab[0].base = base; /* FIXME: hack for getting base to mpn_sb_get_str */
! 464: powtab[1].p = &big_base;
! 465: powtab[1].n = 1;
! 466: powtab[1].digits_in_base = digits_in_base;
! 467: powtab[1].base = base;
! 468: powtab[2].p = &big_base;
! 469: powtab[2].n = 1;
! 470: powtab[2].digits_in_base = digits_in_base;
! 471: powtab[2].base = base;
! 472: n = 1;
! 473: pi = 2;
! 474: p = &big_base;
! 475: for (;;)
! 476: {
! 477: ++pi;
! 478: t = powtab_mem_ptr;
! 479: powtab_mem_ptr += 2 * n;
! 480: mpn_sqr_n (t, p, n);
! 481: n *= 2; n -= t[n - 1] == 0;
! 482: digits_in_base *= 2;
! 483: p = t;
! 484: powtab[pi].p = p;
! 485: powtab[pi].n = n;
! 486: powtab[pi].digits_in_base = digits_in_base;
! 487: powtab[pi].base = base;
1.1 maekawa 488:
1.1.1.3 ! ohara 489: if (2 * n > un)
! 490: break;
! 491: }
! 492: ASSERT_ALWAYS (ALLOC_SIZE > powtab_mem_ptr - powtab_mem);
1.1 maekawa 493:
1.1.1.3 ! ohara 494: /* Using our precomputed powers, now in powtab[], convert our number. */
! 495: out_len = mpn_dc_get_str (str, 0, up, un, powtab + pi) - str;
1.1 maekawa 496:
1.1.1.3 ! ohara 497: __GMP_FREE_FUNC_LIMBS (powtab_mem, ALLOC_SIZE);
1.1 maekawa 498:
1.1.1.3 ! ohara 499: return out_len;
1.1 maekawa 500: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>