Annotation of OpenXM_contrib/gmp/scanf/doscan.c, Revision 1.1
1.1 ! ohara 1: /* __gmp_doscan -- formatted input internals.
! 2:
! 3: THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
! 4: CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
! 5: FUTURE GNU MP RELEASES.
! 6:
! 7: Copyright 2001, 2002 Free Software Foundation, Inc.
! 8:
! 9: This file is part of the GNU MP Library.
! 10:
! 11: The GNU MP Library is free software; you can redistribute it and/or modify
! 12: it under the terms of the GNU Lesser General Public License as published by
! 13: the Free Software Foundation; either version 2.1 of the License, or (at your
! 14: option) any later version.
! 15:
! 16: The GNU MP Library is distributed in the hope that it will be useful, but
! 17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! 18: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! 19: License for more details.
! 20:
! 21: You should have received a copy of the GNU Lesser General Public License
! 22: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! 23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! 24: MA 02111-1307, USA. */
! 25:
! 26: #include "config.h"
! 27:
! 28: #if HAVE_STDARG
! 29: #include <stdarg.h>
! 30: #else
! 31: #include <varargs.h>
! 32: #endif
! 33:
! 34: #include <ctype.h>
! 35: #include <stddef.h> /* for ptrdiff_t */
! 36: #include <stdio.h>
! 37: #include <string.h>
! 38:
! 39: #if HAVE_LOCALE_H
! 40: #include <locale.h> /* for localeconv */
! 41: #endif
! 42:
! 43: #if HAVE_INTTYPES_H
! 44: # include <inttypes.h> /* for intmax_t */
! 45: #else
! 46: # if HAVE_STDINT_H
! 47: # include <stdint.h>
! 48: # endif
! 49: #endif
! 50:
! 51: #if HAVE_SYS_TYPES_H
! 52: #include <sys/types.h> /* for quad_t */
! 53: #endif
! 54:
! 55: #include "gmp.h"
! 56: #include "gmp-impl.h"
! 57:
! 58:
! 59: /* Change this to "#define TRACE(x) x" for some traces. */
! 60: #define TRACE(x)
! 61:
! 62:
! 63: /* It's necessary to parse up the string to recognise the GMP extra types F,
! 64: Q and Z. Other types and conversions are passed across to the standard
! 65: sscanf or fscanf via funs->scan, for ease of implemenation. This is
! 66: essential in the case of something like glibc %p where the pointer format
! 67: isn't actually documented.
! 68:
! 69: Because funs->scan doesn't get the whole input it can't put the right
! 70: values in for %n, so that's handled in __gmp_doscan. Neither sscanf nor
! 71: fscanf directly indicate how many characters were read, so an extra %n is
! 72: appended to each run for that. For fscanf this merely supports our %n
! 73: output, but for sscanf it lets funs->step move us along the input string.
! 74:
! 75: Whitespace and literal matches in the format string, including %%, are
! 76: handled directly within __gmp_doscan. This is reasonably efficient, and
! 77: avoids some suspicious behaviour observed in various system libc's.
! 78: GLIBC 2.2.4 for instance returns 0 on sscanf(" "," x") or on sscanf(" ",
! 79: " x%d",&n), whereas we think they should return EOF, since end-of-string
! 80: is reached when a match of "x" is required.
! 81:
! 82: For standard % conversions, funs->scan is called once for each
! 83: conversion. If we had vfscanf and vsscanf and could rely on their fixed
! 84: text matching behaviour then we could call them with multiple consecutive
! 85: standard conversions. But plain fscanf and sscanf work fine, and parsing
! 86: one field at a time shouldn't be too much of a slowdown.
! 87:
! 88: gmpscan reads a gmp type. It's only used from one place, but is a
! 89: separate subroutine to avoid a big chunk of complicated code in the
! 90: middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it
! 91: possible to share code for parsing integers, rationals and floats.
! 92:
! 93: In gmpscan normally one char of lookahead is maintained, but when width
! 94: is reached that stops, on the principle that an fgetc/ungetc of a char
! 95: past where we're told to stop would be undesirable. "chars" is how many
! 96: characters have been read so far, including the current c. When
! 97: chars==width and another character is desired then a jump is done to the
! 98: "convert" stage. c is invalid and mustn't be unget'ed in this case;
! 99: chars is set to width+1 to indicate that.
! 100:
! 101: gmpscan normally returns the number of characters read. -1 means an
! 102: invalid field, like a "-" or "+" alone. -2 means EOF reached before any
! 103: matching characters were read.
! 104:
! 105: Consideration was given to using a separate code for gmp_fscanf and
! 106: gmp_sscanf. The sscanf case could zip across a string making literal
! 107: matches or recognising digits in gmpscan, rather than making a function
! 108: call fun->get per character. The fscanf could use getc rather than fgetc
! 109: too, which might help those systems where getc is a macro or otherwise
! 110: inlined. But none of this scanning and converting will be particularly
! 111: fast, so the two are done together to keep it a bit simpler for now.
! 112:
! 113: Enhancements:
! 114:
! 115: A way to read the GLIBC printf %a format that we support in gmp_printf
! 116: would be good. That would probably be good for plain GLIBC scanf too, so
! 117: perhaps we can simply follow its lead if it gets such a feature in the
! 118: future. */
! 119:
! 120:
! 121: struct gmp_doscan_params_t {
! 122: int base;
! 123: int ignore;
! 124: char type;
! 125: int width;
! 126: };
! 127:
! 128:
! 129: #define GET(c) \
! 130: do { \
! 131: ASSERT (chars <= width); \
! 132: chars++; \
! 133: if (chars > width) \
! 134: goto convert; \
! 135: (c) = (*funs->get) (data); \
! 136: } while (0)
! 137:
! 138: /* store into "s", extending if necessary */
! 139: #define STORE(c) \
! 140: do { \
! 141: ASSERT (s_upto <= s_alloc); \
! 142: if (s_upto >= s_alloc) \
! 143: { \
! 144: size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \
! 145: s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \
! 146: s_alloc = s_alloc_new; \
! 147: } \
! 148: s[s_upto++] = c; \
! 149: } while (0)
! 150:
! 151: #define S_ALLOC_STEP 512
! 152:
! 153: static int
! 154: gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
! 155: const struct gmp_doscan_params_t *p, void *dst)
! 156: {
! 157: int chars, c, base, first, width, seen_point, seen_digit;
! 158: size_t s_upto, s_alloc;
! 159: char *s;
! 160: int invalid = 0;
! 161:
! 162: TRACE (printf ("gmpscan\n"));
! 163:
! 164: ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
! 165:
! 166: c = (*funs->get) (data);
! 167: if (c == EOF)
! 168: return -2;
! 169:
! 170: chars = 1;
! 171: first = 1;
! 172: seen_point = 0;
! 173: seen_digit = 0;
! 174: width = (p->width == 0 ? INT_MAX-1 : p->width);
! 175: base = p->base;
! 176: s_alloc = S_ALLOC_STEP;
! 177: s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
! 178: s_upto = 0;
! 179:
! 180: another:
! 181: if (c == '-')
! 182: {
! 183: STORE (c);
! 184: goto get_for_sign;
! 185: }
! 186: else if (c == '+')
! 187: {
! 188: /* don't store '+', it's not accepted by mpz_set_str etc */
! 189: get_for_sign:
! 190: GET (c);
! 191: }
! 192:
! 193: if (base == 0)
! 194: {
! 195: base = 10;
! 196: if (c == '0')
! 197: {
! 198: seen_digit = 1;
! 199: base = 8;
! 200: STORE (c);
! 201: GET (c);
! 202: if (c == 'x' || c == 'X')
! 203: {
! 204: base = 16;
! 205: store_get_digits:
! 206: STORE (c);
! 207: GET (c);
! 208: }
! 209: }
! 210: }
! 211:
! 212: digits:
! 213: for (;;)
! 214: {
! 215: if (base == 16)
! 216: {
! 217: if (! (isascii (c) && isxdigit (c)))
! 218: break;
! 219: }
! 220: else
! 221: {
! 222: if (! (isascii (c) && isdigit (c)))
! 223: break;
! 224: if (base == 8 && (c == '8' || c == '9'))
! 225: break;
! 226: }
! 227:
! 228: seen_digit = 1;
! 229: STORE (c);
! 230: GET (c);
! 231: }
! 232:
! 233: if (first)
! 234: {
! 235: /* decimal point */
! 236: if (p->type == 'F' && ! seen_point)
! 237: {
! 238: #if HAVE_LOCALECONV
! 239: /* For a multi-character decimal point, if the first character is
! 240: present then all of it must be, otherwise the input is
! 241: considered invalid. */
! 242: const char *point;
! 243: int pc;
! 244: point = localeconv()->decimal_point;
! 245: pc = *point++;
! 246: if (c == pc)
! 247: {
! 248: for (;;)
! 249: {
! 250: STORE (c);
! 251: GET (c);
! 252: pc = *point++;
! 253: if (pc == '\0')
! 254: break;
! 255: if (c != pc)
! 256: goto invalid;
! 257: }
! 258: seen_point = 1;
! 259: goto digits;
! 260: }
! 261: #else
! 262: if (c == '.')
! 263: {
! 264: seen_point = 1;
! 265: goto store_get_digits;
! 266: }
! 267: #endif
! 268: }
! 269:
! 270: /* exponent */
! 271: if (p->type == 'F' && (c == 'e' || c == 'E'))
! 272: {
! 273: /* must have at least one digit in the mantissa, just an exponent
! 274: is not good enough */
! 275: if (! seen_digit)
! 276: goto invalid;
! 277:
! 278: exponent:
! 279: first = 0;
! 280: STORE (c);
! 281: GET (c);
! 282: goto another;
! 283: }
! 284:
! 285: /* denominator */
! 286: if (p->type == 'Q' && c == '/')
! 287: {
! 288: /* must have at least one digit in the numerator */
! 289: if (! seen_digit)
! 290: goto invalid;
! 291:
! 292: /* now look for at least one digit in the denominator */
! 293: seen_digit = 0;
! 294:
! 295: /* allow the base to be redetermined for "%i" */
! 296: base = p->base;
! 297: goto exponent;
! 298: }
! 299: }
! 300:
! 301: convert:
! 302: if (! seen_digit)
! 303: {
! 304: invalid:
! 305: invalid = 1;
! 306: goto done;
! 307: }
! 308:
! 309: if (! p->ignore)
! 310: {
! 311: STORE ('\0');
! 312: TRACE (printf (" convert \"%s\"\n", s));
! 313:
! 314: /* We ought to have parsed out a valid string above, so just test
! 315: mpz_set_str etc with an ASSERT. */
! 316: switch (p->type) {
! 317: case 'F':
! 318: ASSERT (p->base == 10);
! 319: ASSERT_NOCARRY (mpf_set_str ((mpf_ptr) dst, s, 10));
! 320: break;
! 321: case 'Q':
! 322: ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
! 323: break;
! 324: case 'Z':
! 325: ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
! 326: break;
! 327: default:
! 328: ASSERT (0);
! 329: /*FALLTHRU*/
! 330: break;
! 331: }
! 332: }
! 333:
! 334: done:
! 335: ASSERT (chars <= width+1);
! 336: if (chars != width+1)
! 337: {
! 338: (*funs->unget) (c, data);
! 339: TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1));
! 340: }
! 341: chars--;
! 342:
! 343: (*__gmp_free_func) (s, s_alloc);
! 344:
! 345: if (invalid)
! 346: {
! 347: TRACE (printf (" invalid\n"));
! 348: return -1;
! 349: }
! 350:
! 351: TRACE (printf (" return %d chars (cf width %d)\n", chars, width));
! 352: return chars;
! 353: }
! 354:
! 355:
! 356: /* Read and discard whitespace, if any. Return number of chars skipped.
! 357: Whitespace skipping never provokes the EOF return from __gmp_doscan, so
! 358: it's not necessary to watch for EOF from funs->get, */
! 359: static int
! 360: skip_white (const struct gmp_doscan_funs_t *funs, void *data)
! 361: {
! 362: int c;
! 363: int ret = 0;
! 364:
! 365: do
! 366: {
! 367: c = (funs->get) (data);
! 368: ret++;
! 369: }
! 370: while (isascii (c) && isspace (c));
! 371:
! 372: (funs->unget) (c, data);
! 373: ret--;
! 374:
! 375: TRACE (printf (" skip white %d\n", ret));
! 376: return ret;
! 377: }
! 378:
! 379:
! 380: int
! 381: __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
! 382: const char *orig_fmt, va_list orig_ap)
! 383: {
! 384: struct gmp_doscan_params_t param;
! 385: va_list ap;
! 386: char *alloc_fmt;
! 387: const char *fmt, *this_fmt, *end_fmt;
! 388: size_t orig_fmt_len, alloc_fmt_size, len;
! 389: int new_fields, new_chars;
! 390: char fchar;
! 391: int fields = 0;
! 392: int chars = 0;
! 393:
! 394: TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
! 395: if (funs->scan == (gmp_doscan_scan_t) sscanf)
! 396: printf (" s=\"%s\"\n", (const char *) data));
! 397:
! 398: /* Don't modify orig_ap, if va_list is actually an array and hence call by
! 399: reference. It could be argued that it'd be more efficient to leave
! 400: callers to make a copy if they care, but doing so here is going to be a
! 401: very small part of the total work, and we may as well keep applications
! 402: out of trouble. */
! 403: va_copy (ap, orig_ap);
! 404:
! 405: /* Parts of the format string are going to be copied so that a " %n" can
! 406: be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be
! 407: needed if fmt consists of a single "%" specifier, but otherwise is an
! 408: overestimate. We're not going to be very fast here, so use
! 409: __gmp_allocate_func rather than TMP_ALLOC. */
! 410: orig_fmt_len = strlen (orig_fmt);
! 411: alloc_fmt_size = orig_fmt_len + 4;
! 412: alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
! 413:
! 414: fmt = orig_fmt;
! 415: end_fmt = orig_fmt + orig_fmt_len;
! 416:
! 417: for (;;)
! 418: {
! 419: next:
! 420: fchar = *fmt++;
! 421:
! 422: if (fchar == '\0')
! 423: break;
! 424:
! 425: if (isascii (fchar) && isspace (fchar))
! 426: {
! 427: chars += skip_white (funs, data);
! 428: continue;
! 429: }
! 430:
! 431: if (fchar != '%')
! 432: {
! 433: int c;
! 434: literal:
! 435: c = (funs->get) (data);
! 436: if (c != fchar)
! 437: {
! 438: (funs->unget) (c, data);
! 439: if (c == EOF)
! 440: {
! 441: eof_no_match:
! 442: if (fields == 0)
! 443: fields = EOF;
! 444: }
! 445: goto done;
! 446: }
! 447: chars++;
! 448: continue;
! 449: }
! 450:
! 451: param.type = '\0';
! 452: param.base = 10;
! 453: param.ignore = 0;
! 454: param.width = 0;
! 455:
! 456: this_fmt = fmt-1;
! 457: TRACE (printf (" this_fmt \"%s\"\n", this_fmt));
! 458:
! 459: for (;;)
! 460: {
! 461: ASSERT (fmt <= end_fmt);
! 462:
! 463: fchar = *fmt++;
! 464: switch (fchar) {
! 465:
! 466: case '\0': /* unterminated % sequence */
! 467: ASSERT (0);
! 468: goto done;
! 469:
! 470: case '%': /* literal % */
! 471: goto literal;
! 472:
! 473: case '[': /* character range */
! 474: fchar = *fmt++;
! 475: if (fchar == '^')
! 476: fchar = *fmt++;
! 477: /* ']' allowed as the first char (possibly after '^') */
! 478: if (fchar == ']')
! 479: fchar = *fmt++;
! 480: for (;;)
! 481: {
! 482: ASSERT (fmt <= end_fmt);
! 483: if (fchar == '\0')
! 484: {
! 485: /* unterminated % sequence */
! 486: ASSERT (0);
! 487: goto done;
! 488: }
! 489: if (fchar == ']')
! 490: break;
! 491: fchar = *fmt++;
! 492: }
! 493: /*FALLTHRU*/
! 494: case 'c': /* characters */
! 495: case 's': /* string of non-whitespace */
! 496: case 'p': /* pointer */
! 497: libc_type:
! 498: len = fmt - this_fmt;
! 499: memcpy (alloc_fmt, this_fmt, len);
! 500: alloc_fmt[len++] = '%';
! 501: alloc_fmt[len++] = 'n';
! 502: alloc_fmt[len] = '\0';
! 503:
! 504: TRACE (printf (" scan \"%s\"\n", alloc_fmt);
! 505: if (funs->scan == (gmp_doscan_scan_t) sscanf)
! 506: printf (" s=\"%s\"\n", (const char *) data));
! 507:
! 508: new_chars = -1;
! 509: if (param.ignore)
! 510: {
! 511: new_fields = (*funs->scan) (data, alloc_fmt, &new_chars);
! 512: ASSERT (new_fields == 0 || new_fields == EOF);
! 513: }
! 514: else
! 515: {
! 516: new_fields = (*funs->scan) (data, alloc_fmt,
! 517: va_arg (ap, void *), &new_chars);
! 518: ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
! 519:
! 520: if (new_fields == 0)
! 521: goto done; /* invalid input */
! 522:
! 523: if (new_fields == 1)
! 524: ASSERT (new_chars != -1);
! 525: }
! 526: TRACE (printf (" new_fields %d new_chars %d\n",
! 527: new_fields, new_chars));
! 528:
! 529: if (new_fields == -1)
! 530: goto eof_no_match; /* EOF before anything matched */
! 531:
! 532: /* Wnder param.ignore, when new_fields==0 we don't know if
! 533: it's a successful match or an invalid field. new_chars
! 534: won't have been assigned if it was an invalid field. */
! 535: if (new_chars == -1)
! 536: goto done; /* invalid input */
! 537:
! 538: chars += new_chars;
! 539: (*funs->step) (data, new_chars);
! 540:
! 541: increment_fields:
! 542: if (! param.ignore)
! 543: fields++;
! 544: goto next;
! 545:
! 546: case 'd': /* decimal */
! 547: case 'e': /* float */
! 548: case 'E': /* float */
! 549: case 'f': /* float */
! 550: case 'g': /* float */
! 551: case 'G': /* float */
! 552: case 'u': /* decimal */
! 553: numeric:
! 554: if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
! 555: goto libc_type;
! 556:
! 557: chars += skip_white (funs, data);
! 558:
! 559: new_chars = gmpscan (funs, data, ¶m,
! 560: param.ignore ? NULL : va_arg (ap, void*));
! 561: if (new_chars == -2)
! 562: goto eof_no_match;
! 563: if (new_chars == -1)
! 564: goto done;
! 565:
! 566: ASSERT (new_chars >= 0);
! 567: chars += new_chars;
! 568: goto increment_fields;
! 569:
! 570: case 'a': /* glibc allocate string */
! 571: case '\'': /* glibc digit groupings */
! 572: break;
! 573:
! 574: case 'F': /* mpf_t */
! 575: case 'j': /* intmax_t */
! 576: case 'L': /* long long */
! 577: case 'q': /* quad_t */
! 578: case 'Q': /* mpq_t */
! 579: case 't': /* ptrdiff_t */
! 580: case 'z': /* size_t */
! 581: case 'Z': /* mpz_t */
! 582: set_type:
! 583: param.type = fchar;
! 584: break;
! 585:
! 586: case 'h': /* short or char */
! 587: if (param.type != 'h')
! 588: goto set_type;
! 589: param.type = 'H'; /* internal code for "hh" */
! 590: break;
! 591:
! 592: case 'i':
! 593: param.base = 0;
! 594: goto numeric;
! 595:
! 596: case 'l': /* long, long long, double or long double */
! 597: if (param.type != 'l')
! 598: goto set_type;
! 599: param.type = 'L'; /* "ll" means "L" */
! 600: break;
! 601:
! 602: case 'n':
! 603: if (! param.ignore)
! 604: {
! 605: void *p;
! 606: p = va_arg (ap, void *);
! 607: TRACE (printf (" store %%n to %p\n", p));
! 608: switch (param.type) {
! 609: case '\0': * (int *) p = chars; break;
! 610: case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break;
! 611: case 'H': * (char *) p = chars; break;
! 612: case 'h': * (short *) p = chars; break;
! 613: #if HAVE_INTMAX_T
! 614: case 'j': * (intmax_t *) p = chars; break;
! 615: #else
! 616: case 'j': ASSERT_FAIL (intmax_t not available); break;
! 617: #endif
! 618: case 'l': * (long *) p = chars; break;
! 619: #if HAVE_QUAD_T && HAVE_LONG_LONG
! 620: case 'q':
! 621: ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
! 622: /*FALLTHRU*/
! 623: #else
! 624: case 'q': ASSERT_FAIL (quad_t not available); break;
! 625: #endif
! 626: #if HAVE_LONG_LONG
! 627: case 'L': * (long long *) p = chars; break;
! 628: #else
! 629: case 'L': ASSERT_FAIL (long long not available); break;
! 630: #endif
! 631: case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
! 632: #if HAVE_PTRDIFF_T
! 633: case 't': * (ptrdiff_t *) p = chars; break;
! 634: #else
! 635: case 't': ASSERT_FAIL (ptrdiff_t not available); break;
! 636: #endif
! 637: case 'z': * (size_t *) p = chars; break;
! 638: case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break;
! 639: default: ASSERT (0); break;
! 640: }
! 641: }
! 642: goto next;
! 643:
! 644: case 'o':
! 645: param.base = 8;
! 646: goto numeric;
! 647:
! 648: case 'x':
! 649: case 'X':
! 650: param.base = 16;
! 651: goto numeric;
! 652:
! 653: case '0': case '1': case '2': case '3': case '4':
! 654: case '5': case '6': case '7': case '8': case '9':
! 655: param.width = 0;
! 656: do {
! 657: param.width = param.width * 10 + (fchar-'0');
! 658: fchar = *fmt++;
! 659: } while (isascii (fchar) && isdigit (fchar));
! 660: fmt--; /* unget the non-digit */
! 661: break;
! 662:
! 663: case '*':
! 664: param.ignore = 1;
! 665: break;
! 666:
! 667: default:
! 668: /* something invalid in a % sequence */
! 669: ASSERT (0);
! 670: goto next;
! 671: }
! 672: }
! 673: }
! 674:
! 675: done:
! 676: (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
! 677: return fields;
! 678: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>