Annotation of OpenXM_contrib/gmp/scanf/doscan.c, Revision 1.1.1.1
1.1 ohara 1: /* __gmp_doscan -- formatted input internals.
2:
3: THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
4: CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5: FUTURE GNU MP RELEASES.
6:
7: Copyright 2001, 2002 Free Software Foundation, Inc.
8:
9: This file is part of the GNU MP Library.
10:
11: The GNU MP Library is free software; you can redistribute it and/or modify
12: it under the terms of the GNU Lesser General Public License as published by
13: the Free Software Foundation; either version 2.1 of the License, or (at your
14: option) any later version.
15:
16: The GNU MP Library is distributed in the hope that it will be useful, but
17: WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18: or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19: License for more details.
20:
21: You should have received a copy of the GNU Lesser General Public License
22: along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23: the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24: MA 02111-1307, USA. */
25:
26: #include "config.h"
27:
28: #if HAVE_STDARG
29: #include <stdarg.h>
30: #else
31: #include <varargs.h>
32: #endif
33:
34: #include <ctype.h>
35: #include <stddef.h> /* for ptrdiff_t */
36: #include <stdio.h>
37: #include <string.h>
38:
39: #if HAVE_LOCALE_H
40: #include <locale.h> /* for localeconv */
41: #endif
42:
43: #if HAVE_INTTYPES_H
44: # include <inttypes.h> /* for intmax_t */
45: #else
46: # if HAVE_STDINT_H
47: # include <stdint.h>
48: # endif
49: #endif
50:
51: #if HAVE_SYS_TYPES_H
52: #include <sys/types.h> /* for quad_t */
53: #endif
54:
55: #include "gmp.h"
56: #include "gmp-impl.h"
57:
58:
59: /* Change this to "#define TRACE(x) x" for some traces. */
60: #define TRACE(x)
61:
62:
63: /* It's necessary to parse up the string to recognise the GMP extra types F,
64: Q and Z. Other types and conversions are passed across to the standard
65: sscanf or fscanf via funs->scan, for ease of implemenation. This is
66: essential in the case of something like glibc %p where the pointer format
67: isn't actually documented.
68:
69: Because funs->scan doesn't get the whole input it can't put the right
70: values in for %n, so that's handled in __gmp_doscan. Neither sscanf nor
71: fscanf directly indicate how many characters were read, so an extra %n is
72: appended to each run for that. For fscanf this merely supports our %n
73: output, but for sscanf it lets funs->step move us along the input string.
74:
75: Whitespace and literal matches in the format string, including %%, are
76: handled directly within __gmp_doscan. This is reasonably efficient, and
77: avoids some suspicious behaviour observed in various system libc's.
78: GLIBC 2.2.4 for instance returns 0 on sscanf(" "," x") or on sscanf(" ",
79: " x%d",&n), whereas we think they should return EOF, since end-of-string
80: is reached when a match of "x" is required.
81:
82: For standard % conversions, funs->scan is called once for each
83: conversion. If we had vfscanf and vsscanf and could rely on their fixed
84: text matching behaviour then we could call them with multiple consecutive
85: standard conversions. But plain fscanf and sscanf work fine, and parsing
86: one field at a time shouldn't be too much of a slowdown.
87:
88: gmpscan reads a gmp type. It's only used from one place, but is a
89: separate subroutine to avoid a big chunk of complicated code in the
90: middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it
91: possible to share code for parsing integers, rationals and floats.
92:
93: In gmpscan normally one char of lookahead is maintained, but when width
94: is reached that stops, on the principle that an fgetc/ungetc of a char
95: past where we're told to stop would be undesirable. "chars" is how many
96: characters have been read so far, including the current c. When
97: chars==width and another character is desired then a jump is done to the
98: "convert" stage. c is invalid and mustn't be unget'ed in this case;
99: chars is set to width+1 to indicate that.
100:
101: gmpscan normally returns the number of characters read. -1 means an
102: invalid field, like a "-" or "+" alone. -2 means EOF reached before any
103: matching characters were read.
104:
105: Consideration was given to using a separate code for gmp_fscanf and
106: gmp_sscanf. The sscanf case could zip across a string making literal
107: matches or recognising digits in gmpscan, rather than making a function
108: call fun->get per character. The fscanf could use getc rather than fgetc
109: too, which might help those systems where getc is a macro or otherwise
110: inlined. But none of this scanning and converting will be particularly
111: fast, so the two are done together to keep it a bit simpler for now.
112:
113: Enhancements:
114:
115: A way to read the GLIBC printf %a format that we support in gmp_printf
116: would be good. That would probably be good for plain GLIBC scanf too, so
117: perhaps we can simply follow its lead if it gets such a feature in the
118: future. */
119:
120:
121: struct gmp_doscan_params_t {
122: int base;
123: int ignore;
124: char type;
125: int width;
126: };
127:
128:
129: #define GET(c) \
130: do { \
131: ASSERT (chars <= width); \
132: chars++; \
133: if (chars > width) \
134: goto convert; \
135: (c) = (*funs->get) (data); \
136: } while (0)
137:
138: /* store into "s", extending if necessary */
139: #define STORE(c) \
140: do { \
141: ASSERT (s_upto <= s_alloc); \
142: if (s_upto >= s_alloc) \
143: { \
144: size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \
145: s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \
146: s_alloc = s_alloc_new; \
147: } \
148: s[s_upto++] = c; \
149: } while (0)
150:
151: #define S_ALLOC_STEP 512
152:
153: static int
154: gmpscan (const struct gmp_doscan_funs_t *funs, void *data,
155: const struct gmp_doscan_params_t *p, void *dst)
156: {
157: int chars, c, base, first, width, seen_point, seen_digit;
158: size_t s_upto, s_alloc;
159: char *s;
160: int invalid = 0;
161:
162: TRACE (printf ("gmpscan\n"));
163:
164: ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z');
165:
166: c = (*funs->get) (data);
167: if (c == EOF)
168: return -2;
169:
170: chars = 1;
171: first = 1;
172: seen_point = 0;
173: seen_digit = 0;
174: width = (p->width == 0 ? INT_MAX-1 : p->width);
175: base = p->base;
176: s_alloc = S_ALLOC_STEP;
177: s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char);
178: s_upto = 0;
179:
180: another:
181: if (c == '-')
182: {
183: STORE (c);
184: goto get_for_sign;
185: }
186: else if (c == '+')
187: {
188: /* don't store '+', it's not accepted by mpz_set_str etc */
189: get_for_sign:
190: GET (c);
191: }
192:
193: if (base == 0)
194: {
195: base = 10;
196: if (c == '0')
197: {
198: seen_digit = 1;
199: base = 8;
200: STORE (c);
201: GET (c);
202: if (c == 'x' || c == 'X')
203: {
204: base = 16;
205: store_get_digits:
206: STORE (c);
207: GET (c);
208: }
209: }
210: }
211:
212: digits:
213: for (;;)
214: {
215: if (base == 16)
216: {
217: if (! (isascii (c) && isxdigit (c)))
218: break;
219: }
220: else
221: {
222: if (! (isascii (c) && isdigit (c)))
223: break;
224: if (base == 8 && (c == '8' || c == '9'))
225: break;
226: }
227:
228: seen_digit = 1;
229: STORE (c);
230: GET (c);
231: }
232:
233: if (first)
234: {
235: /* decimal point */
236: if (p->type == 'F' && ! seen_point)
237: {
238: #if HAVE_LOCALECONV
239: /* For a multi-character decimal point, if the first character is
240: present then all of it must be, otherwise the input is
241: considered invalid. */
242: const char *point;
243: int pc;
244: point = localeconv()->decimal_point;
245: pc = *point++;
246: if (c == pc)
247: {
248: for (;;)
249: {
250: STORE (c);
251: GET (c);
252: pc = *point++;
253: if (pc == '\0')
254: break;
255: if (c != pc)
256: goto invalid;
257: }
258: seen_point = 1;
259: goto digits;
260: }
261: #else
262: if (c == '.')
263: {
264: seen_point = 1;
265: goto store_get_digits;
266: }
267: #endif
268: }
269:
270: /* exponent */
271: if (p->type == 'F' && (c == 'e' || c == 'E'))
272: {
273: /* must have at least one digit in the mantissa, just an exponent
274: is not good enough */
275: if (! seen_digit)
276: goto invalid;
277:
278: exponent:
279: first = 0;
280: STORE (c);
281: GET (c);
282: goto another;
283: }
284:
285: /* denominator */
286: if (p->type == 'Q' && c == '/')
287: {
288: /* must have at least one digit in the numerator */
289: if (! seen_digit)
290: goto invalid;
291:
292: /* now look for at least one digit in the denominator */
293: seen_digit = 0;
294:
295: /* allow the base to be redetermined for "%i" */
296: base = p->base;
297: goto exponent;
298: }
299: }
300:
301: convert:
302: if (! seen_digit)
303: {
304: invalid:
305: invalid = 1;
306: goto done;
307: }
308:
309: if (! p->ignore)
310: {
311: STORE ('\0');
312: TRACE (printf (" convert \"%s\"\n", s));
313:
314: /* We ought to have parsed out a valid string above, so just test
315: mpz_set_str etc with an ASSERT. */
316: switch (p->type) {
317: case 'F':
318: ASSERT (p->base == 10);
319: ASSERT_NOCARRY (mpf_set_str ((mpf_ptr) dst, s, 10));
320: break;
321: case 'Q':
322: ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base));
323: break;
324: case 'Z':
325: ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base));
326: break;
327: default:
328: ASSERT (0);
329: /*FALLTHRU*/
330: break;
331: }
332: }
333:
334: done:
335: ASSERT (chars <= width+1);
336: if (chars != width+1)
337: {
338: (*funs->unget) (c, data);
339: TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1));
340: }
341: chars--;
342:
343: (*__gmp_free_func) (s, s_alloc);
344:
345: if (invalid)
346: {
347: TRACE (printf (" invalid\n"));
348: return -1;
349: }
350:
351: TRACE (printf (" return %d chars (cf width %d)\n", chars, width));
352: return chars;
353: }
354:
355:
356: /* Read and discard whitespace, if any. Return number of chars skipped.
357: Whitespace skipping never provokes the EOF return from __gmp_doscan, so
358: it's not necessary to watch for EOF from funs->get, */
359: static int
360: skip_white (const struct gmp_doscan_funs_t *funs, void *data)
361: {
362: int c;
363: int ret = 0;
364:
365: do
366: {
367: c = (funs->get) (data);
368: ret++;
369: }
370: while (isascii (c) && isspace (c));
371:
372: (funs->unget) (c, data);
373: ret--;
374:
375: TRACE (printf (" skip white %d\n", ret));
376: return ret;
377: }
378:
379:
380: int
381: __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data,
382: const char *orig_fmt, va_list orig_ap)
383: {
384: struct gmp_doscan_params_t param;
385: va_list ap;
386: char *alloc_fmt;
387: const char *fmt, *this_fmt, *end_fmt;
388: size_t orig_fmt_len, alloc_fmt_size, len;
389: int new_fields, new_chars;
390: char fchar;
391: int fields = 0;
392: int chars = 0;
393:
394: TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt);
395: if (funs->scan == (gmp_doscan_scan_t) sscanf)
396: printf (" s=\"%s\"\n", (const char *) data));
397:
398: /* Don't modify orig_ap, if va_list is actually an array and hence call by
399: reference. It could be argued that it'd be more efficient to leave
400: callers to make a copy if they care, but doing so here is going to be a
401: very small part of the total work, and we may as well keep applications
402: out of trouble. */
403: va_copy (ap, orig_ap);
404:
405: /* Parts of the format string are going to be copied so that a " %n" can
406: be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be
407: needed if fmt consists of a single "%" specifier, but otherwise is an
408: overestimate. We're not going to be very fast here, so use
409: __gmp_allocate_func rather than TMP_ALLOC. */
410: orig_fmt_len = strlen (orig_fmt);
411: alloc_fmt_size = orig_fmt_len + 4;
412: alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char);
413:
414: fmt = orig_fmt;
415: end_fmt = orig_fmt + orig_fmt_len;
416:
417: for (;;)
418: {
419: next:
420: fchar = *fmt++;
421:
422: if (fchar == '\0')
423: break;
424:
425: if (isascii (fchar) && isspace (fchar))
426: {
427: chars += skip_white (funs, data);
428: continue;
429: }
430:
431: if (fchar != '%')
432: {
433: int c;
434: literal:
435: c = (funs->get) (data);
436: if (c != fchar)
437: {
438: (funs->unget) (c, data);
439: if (c == EOF)
440: {
441: eof_no_match:
442: if (fields == 0)
443: fields = EOF;
444: }
445: goto done;
446: }
447: chars++;
448: continue;
449: }
450:
451: param.type = '\0';
452: param.base = 10;
453: param.ignore = 0;
454: param.width = 0;
455:
456: this_fmt = fmt-1;
457: TRACE (printf (" this_fmt \"%s\"\n", this_fmt));
458:
459: for (;;)
460: {
461: ASSERT (fmt <= end_fmt);
462:
463: fchar = *fmt++;
464: switch (fchar) {
465:
466: case '\0': /* unterminated % sequence */
467: ASSERT (0);
468: goto done;
469:
470: case '%': /* literal % */
471: goto literal;
472:
473: case '[': /* character range */
474: fchar = *fmt++;
475: if (fchar == '^')
476: fchar = *fmt++;
477: /* ']' allowed as the first char (possibly after '^') */
478: if (fchar == ']')
479: fchar = *fmt++;
480: for (;;)
481: {
482: ASSERT (fmt <= end_fmt);
483: if (fchar == '\0')
484: {
485: /* unterminated % sequence */
486: ASSERT (0);
487: goto done;
488: }
489: if (fchar == ']')
490: break;
491: fchar = *fmt++;
492: }
493: /*FALLTHRU*/
494: case 'c': /* characters */
495: case 's': /* string of non-whitespace */
496: case 'p': /* pointer */
497: libc_type:
498: len = fmt - this_fmt;
499: memcpy (alloc_fmt, this_fmt, len);
500: alloc_fmt[len++] = '%';
501: alloc_fmt[len++] = 'n';
502: alloc_fmt[len] = '\0';
503:
504: TRACE (printf (" scan \"%s\"\n", alloc_fmt);
505: if (funs->scan == (gmp_doscan_scan_t) sscanf)
506: printf (" s=\"%s\"\n", (const char *) data));
507:
508: new_chars = -1;
509: if (param.ignore)
510: {
511: new_fields = (*funs->scan) (data, alloc_fmt, &new_chars);
512: ASSERT (new_fields == 0 || new_fields == EOF);
513: }
514: else
515: {
516: new_fields = (*funs->scan) (data, alloc_fmt,
517: va_arg (ap, void *), &new_chars);
518: ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF);
519:
520: if (new_fields == 0)
521: goto done; /* invalid input */
522:
523: if (new_fields == 1)
524: ASSERT (new_chars != -1);
525: }
526: TRACE (printf (" new_fields %d new_chars %d\n",
527: new_fields, new_chars));
528:
529: if (new_fields == -1)
530: goto eof_no_match; /* EOF before anything matched */
531:
532: /* Wnder param.ignore, when new_fields==0 we don't know if
533: it's a successful match or an invalid field. new_chars
534: won't have been assigned if it was an invalid field. */
535: if (new_chars == -1)
536: goto done; /* invalid input */
537:
538: chars += new_chars;
539: (*funs->step) (data, new_chars);
540:
541: increment_fields:
542: if (! param.ignore)
543: fields++;
544: goto next;
545:
546: case 'd': /* decimal */
547: case 'e': /* float */
548: case 'E': /* float */
549: case 'f': /* float */
550: case 'g': /* float */
551: case 'G': /* float */
552: case 'u': /* decimal */
553: numeric:
554: if (param.type != 'F' && param.type != 'Q' && param.type != 'Z')
555: goto libc_type;
556:
557: chars += skip_white (funs, data);
558:
559: new_chars = gmpscan (funs, data, ¶m,
560: param.ignore ? NULL : va_arg (ap, void*));
561: if (new_chars == -2)
562: goto eof_no_match;
563: if (new_chars == -1)
564: goto done;
565:
566: ASSERT (new_chars >= 0);
567: chars += new_chars;
568: goto increment_fields;
569:
570: case 'a': /* glibc allocate string */
571: case '\'': /* glibc digit groupings */
572: break;
573:
574: case 'F': /* mpf_t */
575: case 'j': /* intmax_t */
576: case 'L': /* long long */
577: case 'q': /* quad_t */
578: case 'Q': /* mpq_t */
579: case 't': /* ptrdiff_t */
580: case 'z': /* size_t */
581: case 'Z': /* mpz_t */
582: set_type:
583: param.type = fchar;
584: break;
585:
586: case 'h': /* short or char */
587: if (param.type != 'h')
588: goto set_type;
589: param.type = 'H'; /* internal code for "hh" */
590: break;
591:
592: case 'i':
593: param.base = 0;
594: goto numeric;
595:
596: case 'l': /* long, long long, double or long double */
597: if (param.type != 'l')
598: goto set_type;
599: param.type = 'L'; /* "ll" means "L" */
600: break;
601:
602: case 'n':
603: if (! param.ignore)
604: {
605: void *p;
606: p = va_arg (ap, void *);
607: TRACE (printf (" store %%n to %p\n", p));
608: switch (param.type) {
609: case '\0': * (int *) p = chars; break;
610: case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break;
611: case 'H': * (char *) p = chars; break;
612: case 'h': * (short *) p = chars; break;
613: #if HAVE_INTMAX_T
614: case 'j': * (intmax_t *) p = chars; break;
615: #else
616: case 'j': ASSERT_FAIL (intmax_t not available); break;
617: #endif
618: case 'l': * (long *) p = chars; break;
619: #if HAVE_QUAD_T && HAVE_LONG_LONG
620: case 'q':
621: ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long));
622: /*FALLTHRU*/
623: #else
624: case 'q': ASSERT_FAIL (quad_t not available); break;
625: #endif
626: #if HAVE_LONG_LONG
627: case 'L': * (long long *) p = chars; break;
628: #else
629: case 'L': ASSERT_FAIL (long long not available); break;
630: #endif
631: case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break;
632: #if HAVE_PTRDIFF_T
633: case 't': * (ptrdiff_t *) p = chars; break;
634: #else
635: case 't': ASSERT_FAIL (ptrdiff_t not available); break;
636: #endif
637: case 'z': * (size_t *) p = chars; break;
638: case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break;
639: default: ASSERT (0); break;
640: }
641: }
642: goto next;
643:
644: case 'o':
645: param.base = 8;
646: goto numeric;
647:
648: case 'x':
649: case 'X':
650: param.base = 16;
651: goto numeric;
652:
653: case '0': case '1': case '2': case '3': case '4':
654: case '5': case '6': case '7': case '8': case '9':
655: param.width = 0;
656: do {
657: param.width = param.width * 10 + (fchar-'0');
658: fchar = *fmt++;
659: } while (isascii (fchar) && isdigit (fchar));
660: fmt--; /* unget the non-digit */
661: break;
662:
663: case '*':
664: param.ignore = 1;
665: break;
666:
667: default:
668: /* something invalid in a % sequence */
669: ASSERT (0);
670: goto next;
671: }
672: }
673: }
674:
675: done:
676: (*__gmp_free_func) (alloc_fmt, alloc_fmt_size);
677: return fields;
678: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>