Annotation of OpenXM/src/ox_toolkit/parse.c, Revision 1.3
1.1 ohara 1: /* -*- mode: C; coding: euc-japan -*- */
1.3 ! ohara 2: /* $OpenXM: OpenXM/src/ox_toolkit/parse.c,v 1.2 1999/12/22 11:26:37 ohara Exp $ */
1.1 ohara 3:
1.3 ! ohara 4: /*
! 5: This module is a parser for OX/CMO expressions.
! 6: Some commnets is written in Japanese by using the EUC-JP coded
! 7: character set.
! 8: */
1.1 ohara 9:
10: #include <stdio.h>
11: #include <stdlib.h>
12: #include <string.h>
13: #include <sys/param.h>
14: #include <setjmp.h>
15: #include "oxtag.h"
16: #include "ox.h"
17: #include "parse.h"
18:
1.3 ! ohara 19: /* --- Parser --- */
! 20: /* Remarks for semantics.
1.1 ohara 21: CMO_LIST, CMO_STRING は、あらかじめ与えられた要素の個数を無視する.
22: CMO_MONOMIAL32 は無視しない. (つまりおかしいときは構文エラーになる)
23: */
24:
1.3 ! ohara 25: /*
! 26: parse.c では, Lisp 表現された CMO 文字列を読み込み,
1.1 ohara 27: バイト列を出力する. 中間表現として、cmo 構造体を利用する.
28: parse() はトークンの列から cmo 構造体を生成し、そのポインタを返す.
1.3 ! ohara 29: 重要なことはパーサ(の各サブルーチン)は
1.1 ohara 30: 常にトークンをひとつ先読みしていると言うことである.
31: */
32:
33: /* 現在読み込み中のトークンを表す. */
34: static int token = 0;
35:
36: /* トークンの属性値. yylval は lex() によってセットされる. */
37: static union{
38: int d;
39: char *sym;
40: } yylval;
41:
1.3 ! ohara 42: /*
! 43: If `pflag_cmo_addrev' sets, then we admit extended CMO expressions.
! 44: For example, (CMO_STRING, "hello") is not a real CMO expression
! 45: but it is admitted.
! 46: */
! 47: static int pflag_cmo_addrev = 1;
1.1 ohara 48:
1.3 ! ohara 49: /* definitions of local functions */
1.1 ohara 50: static int parse_error(char *s);
51: static int parse_lf();
52: static int parse_right_parenthesis();
53: static int parse_left_parenthesis();
54: static int parse_comma();
1.3 ! ohara 55: static mpz_ptr parse_integer();
1.1 ohara 56: static char *parse_string();
57: static cmo *parse_cmo_null();
58: static cmo *parse_cmo_int32();
59: static cmo *parse_cmo_string();
60: static cmo *parse_cmo_mathcap();
61: static cmo *parse_cmo_list();
62: static cmo *parse_cmo_monomial32();
63: static cmo *parse_cmo_zz();
64: static cmo *parse_cmo_zero();
65: static cmo *parse_cmo_dms_generic();
66: static cmo *parse_cmo_ring_by_name();
67: static cmo *parse_cmo_distributed_polynomial();
68: static cmo *parse_cmo_indeterminate();
69: static cmo *parse_cmo_error2();
70: static cmo *parse_cmo();
71: static int parse_sm();
72: static ox *parse_ox();
73: static ox *parse_ox_command();
74: static ox *parse_ox_data();
75:
76: static int is_token_cmo(int token)
77: {
78: return (token >= MIN_T_CMO && token < MAX_T_CMO) || token == TOKEN(CMO_ERROR2);
79: }
80:
81: static int is_token_sm(int token)
82: {
83: return token == TOKEN(SM);
84: }
85:
86: static int is_token_ox(int token)
87: {
88: return token >= MIN_T_OX && token < MAX_T_OX;
89: }
90:
91: static jmp_buf env_parse;
92:
1.3 ! ohara 93: /* This is a parsing fault. */
1.1 ohara 94: static int parse_error(char *s)
95: {
96: fprintf(stderr, "syntax error: %s\n", s);
97: longjmp(env_parse, 1);
98: }
99:
1.2 ohara 100: int setflag_parse(int flag)
101: {
102: pflag_cmo_addrev = flag;
103: }
104:
105: int init_parser(char *s)
106: {
107: setflag_parse(PFLAG_ADDREV);
1.3 ! ohara 108: init_lex(s);
1.2 ohara 109: }
110:
1.1 ohara 111: cmo *parse()
112: {
113: cmo *m;
114:
115: if (setjmp(env_parse) != 0) {
1.3 ! ohara 116: return NULL;
! 117: /* This is an error. */
1.1 ohara 118: }
119:
1.3 ! ohara 120: token = lex();
1.1 ohara 121: if (token == '(') {
122: token = lex();
123: if (is_token_cmo(token)) {
124: m = parse_cmo();
125: }else if(is_token_ox(token)) {
126: m = parse_ox();
127: }else {
128: parse_error("invalid symbol.");
129: }
130: return m;
131: }
132: return NULL;
133: }
134:
135: static ox *parse_ox()
136: {
137: ox *m = NULL;
138:
139: switch(token) {
140: case TOKEN(OX_COMMAND):
141: token = lex();
142: m = parse_ox_command();
143: break;
144: case TOKEN(OX_DATA):
145: token = lex();
146: m = parse_ox_data();
147: break;
148: default:
149: parse_error("invalid ox.");
150: }
151: return m;
152: }
153:
154: static ox *parse_ox_data()
155: {
156: ox *m;
157:
158: parse_comma();
159: parse_left_parenthesis();
160: m = (ox *)new_ox_data(parse_cmo());
161: parse_right_parenthesis();
162: return m;
163: }
164:
165: static ox *parse_ox_command()
166: {
167: ox *m;
168:
169: parse_comma();
170: parse_left_parenthesis();
171: m = (ox *)new_ox_command(parse_sm());
172: parse_right_parenthesis();
173: return m;
174: }
175:
176: static int parse_sm()
177: {
178: int sm_code;
179: if (token != TOKEN(SM)) {
180: parse_error("no opecode.");
181: }
182: sm_code = yylval.d;
183: token = lex();
184: parse_right_parenthesis();
185: return sm_code;
186: }
187:
188: static cmo *parse_cmo()
189: {
190: cmo *m = NULL;
191:
192: switch(token) {
193: case TOKEN(CMO_NULL):
194: token = lex();
195: m = parse_cmo_null();
196: break;
197: case TOKEN(CMO_INT32):
198: token = lex();
199: m = parse_cmo_int32();
200: break;
201: case TOKEN(CMO_STRING):
202: token = lex();
203: m = parse_cmo_string();
204: break;
205: case TOKEN(CMO_MATHCAP):
206: token = lex();
207: m = parse_cmo_mathcap();
208: break;
209: case TOKEN(CMO_LIST):
210: token = lex();
211: m = parse_cmo_list();
212: break;
213: case TOKEN(CMO_MONOMIAL32):
214: token = lex();
215: m = parse_cmo_monomial32();
216: break;
217: case TOKEN(CMO_ZZ):
218: token = lex();
219: m = parse_cmo_zz();
220: break;
221: case TOKEN(CMO_ZERO):
222: token = lex();
223: m = parse_cmo_zero();
224: break;
225: case TOKEN(CMO_DMS_GENERIC):
226: token = lex();
227: m = parse_cmo_dms_generic();
228: break;
229: case TOKEN(CMO_RING_BY_NAME):
230: token = lex();
231: m = parse_cmo_ring_by_name();
232: break;
233: case TOKEN(CMO_DISTRIBUTED_POLYNOMIAL):
234: token = lex();
235: m = parse_cmo_distributed_polynomial();
236: break;
237: case TOKEN(CMO_INDETERMINATE):
238: token = lex();
239: m = parse_cmo_indeterminate();
240: break;
241: case TOKEN(CMO_ERROR2):
242: token = lex();
243: m = parse_cmo_error2();
244: break;
245: default:
246: parse_error("invalid cmo.");
247: }
248: return m;
249: }
250:
251: static int parse_left_parenthesis()
252: {
253: if (token != '(') {
254: parse_error("no left parenthesis.");
255: }
256: token = lex();
257: }
258:
259: static int parse_right_parenthesis()
260: {
261: if (token != ')') {
262: parse_error("no right parenthesis.");
263: }
264: token = lex();
265: }
266:
267: static int parse_comma()
268: {
269: if (token != ',') {
270: parse_error("no comma.");
271: }
272: token = lex();
273: }
274:
1.3 ! ohara 275: static mpz_ptr new_mpz_set_str(char *s)
! 276: {
! 277: mpz_ptr z = malloc(sizeof(mpz_t));
! 278: mpz_init_set_str(z, s, 10);
! 279: return z;
! 280: }
! 281:
! 282: static mpz_ptr my_mpz_neg(mpz_ptr src)
! 283: {
! 284: mpz_ptr z = malloc(sizeof(mpz_t));
! 285: mpz_init(z);
! 286: mpz_neg(z, src);
! 287: #ifndef DEBUG
! 288: free(src);
! 289: #endif
! 290: return z;
! 291: }
! 292:
! 293: static mpz_ptr parse_integer()
1.1 ohara 294: {
1.3 ! ohara 295: int sign = 1;
! 296: mpz_ptr val;
! 297:
! 298: if (token == '+') {
! 299: token = lex();
! 300: }else if (token == '-') {
! 301: sign = -1;
! 302: token = lex();
! 303: }
! 304:
! 305: if (token != T_DIGIT) {
1.1 ohara 306: parse_error("no integer.");
307: }
1.3 ! ohara 308: val = new_mpz_set_str(yylval.sym);
! 309: if (sign == -1) {
! 310: val = my_mpz_neg(val);
! 311: }
! 312: #ifndef DEBUG
! 313: free(yylval.sym);
! 314: #endif
1.1 ohara 315: token = lex();
316: return val;
317: }
318:
319: static char *parse_string()
320: {
321: char *s;
322: if (token != T_STRING) {
323: parse_error("no string.");
324: }
325: s = yylval.sym;
326: token = lex();
327: return s;
328: }
329:
330: static cmo *parse_cmo_null()
331: {
332: parse_right_parenthesis();
333: return (cmo *)new_cmo_null();
334: }
335:
336: static cmo *parse_cmo_int32()
337: {
1.3 ! ohara 338: mpz_ptr z;
1.1 ohara 339:
340: parse_comma();
1.3 ! ohara 341: z = parse_integer();
1.1 ohara 342: parse_right_parenthesis();
1.3 ! ohara 343: return (cmo *)new_cmo_int32(mpz_get_si(z));
1.1 ohara 344: }
345:
346: static cmo *parse_cmo_string()
347: {
348: cmo_string *m;
349: char *s;
350:
351: parse_comma();
1.3 ! ohara 352: if (token == T_DIGIT) {
1.1 ohara 353: parse_integer();
354: parse_comma();
355: }else if (!pflag_cmo_addrev) {
356: parse_error("invalid cmo string.");
357: }
358: s = parse_string();
359: m = new_cmo_string(s);
360: parse_right_parenthesis();
361: return (cmo *)m;
362: }
363:
364: static cmo *parse_cmo_mathcap()
365: {
366: cmo *ob;
367:
368: parse_comma();
369: parse_left_parenthesis();
370: ob = parse_cmo();
371: parse_right_parenthesis();
372: return (cmo *)new_cmo_mathcap(ob);
373: }
374:
375: static cmo *parse_cmo_list()
376: {
377: int length=0;
378: int i=0;
379: cmo_list *m = new_cmo_list();
380: cmo *newcmo;
381:
382: if (token == ',') {
383: parse_comma();
384:
1.3 ! ohara 385: if (token == T_DIGIT) {
1.1 ohara 386: parse_integer();
387: parse_comma();
388: }else if (!pflag_cmo_addrev) {
389: parse_error("invalid cmo_list.");
390: }
391:
392: while(token == '(') {
393: parse_left_parenthesis();
394: newcmo = parse_cmo();
395: append_cmo_list(m, newcmo);
396: if (token != ',') {
397: break;
398: }
399: parse_comma();
400: }
401: }else if (!pflag_cmo_addrev) {
402: parse_error("invalid cmo_list.");
403: }
404: parse_right_parenthesis();
405: return (cmo *)m;
406: }
407:
408: static cmo *parse_cmo_monomial32()
409: {
410: int size;
411: int *exps;
412: int i;
413: cmo_monomial32 *m;
414: int tag;
415:
416: parse_comma();
1.3 ! ohara 417: size = mpz_get_si(parse_integer());
1.1 ohara 418: if (size < 0) {
419: parse_error("invalid value.");
420: }
421: m = new_cmo_monomial32_size(size);
422:
423: for(i=0; i<size; i++) {
424: parse_comma();
1.3 ! ohara 425: m->exps[i] = mpz_get_si(parse_integer());
1.1 ohara 426: }
427: parse_comma();
428: parse_left_parenthesis();
429: m->coef = parse_cmo();
430: tag = m->coef->tag;
431:
1.3 ! ohara 432: /* semantics:
! 433: The tag of m->coef must be CMO_ZZ or CMO_INT32. */
1.1 ohara 434: if (tag != CMO_ZZ && tag != CMO_INT32) {
435: parse_error("invalid cmo.");
436: }
437: parse_right_parenthesis();
438: return (cmo *)m;
439: }
440:
1.3 ! ohara 441: /* the following function rewrite internal data of mpz/cmo_zz. */
1.1 ohara 442: static cmo *parse_cmo_zz()
443: {
444: int length;
445: int i=0;
446: cmo_zz *m= NULL;
1.3 ! ohara 447: mpz_ptr z;
1.1 ohara 448:
449: parse_comma();
1.3 ! ohara 450: z = parse_integer();
1.1 ohara 451: if (token == ',') {
1.3 ! ohara 452: length = mpz_get_si(z);
1.1 ohara 453: m = new_cmo_zz_size(length);
454:
455: length = abs(length);
456: for(i=0; i<length; i++) {
457: parse_comma();
1.3 ! ohara 458: m->mpz->_mp_d[i] = mpz_get_si(parse_integer());
1.1 ohara 459: }
460: }else if (pflag_cmo_addrev) {
1.3 ! ohara 461: m = new_cmo_zz_set_mpz(z);
1.1 ohara 462: }else {
463: parse_error("no comma.");
464: }
465:
466: parse_right_parenthesis();
467: return (cmo *)m;
468: }
469:
470: static cmo *parse_cmo_zero()
471: {
472: parse_right_parenthesis();
473: return (cmo *)new_cmo_zero();
474: }
475:
476: static cmo *parse_cmo_dms_generic()
477: {
478: parse_right_parenthesis();
479: return (cmo *)new_cmo_dms_generic();
480: }
481:
482: static cmo *parse_cmo_ring_by_name()
483: {
484: cmo *ob;
485:
486: parse_comma();
487: parse_left_parenthesis();
488: ob = parse_cmo();
489:
1.3 ! ohara 490: /* The ob has a type of CMO_STRING. */
1.1 ohara 491: if (ob->tag != CMO_STRING) {
492: parse_error("invalid cmo.");
493: }
494: parse_right_parenthesis();
495: return (cmo *)new_cmo_ring_by_name(ob);
496: }
497:
498: static cmo *parse_cmo_distributed_polynomial()
499: {
500: int length=0;
501: int i=0;
502: cmo_distributed_polynomial *m = new_cmo_distributed_polynomial();
503: cmo *ob;
504: int tag;
505:
506: if (token == ',') {
507: parse_comma();
508:
1.3 ! ohara 509: if (token == T_DIGIT) {
1.1 ohara 510: parse_integer();
511: parse_comma();
512: }else if (!pflag_cmo_addrev) {
513: parse_error("invalid d-polynomial.");
514: }
515:
516: parse_left_parenthesis();
517: m->ringdef = parse_cmo();
518: tag = m->ringdef->tag;
1.3 ! ohara 519: /* m->ringdef needs to be a DringDefinition. */
1.1 ohara 520: if (tag != CMO_RING_BY_NAME && tag != CMO_DMS_GENERIC
521: && tag != CMO_DMS_OF_N_VARIABLES) {
522: parse_error("invalid cmo.");
523: }
524:
525: parse_comma();
526:
527: while(token == '(') {
528: parse_left_parenthesis();
529: ob = parse_cmo();
530: if (ob->tag != CMO_MONOMIAL32 && ob->tag != CMO_ZERO) {
531: parse_error("invalid cmo.");
532: }
533: append_cmo_list((cmo_list *)m, ob);
534: if (token != ',') {
535: break;
536: }
537: parse_comma();
538: }
539: }else if (!pflag_cmo_addrev) {
540: parse_error("invalid d-polynomial.");
541: }
542: parse_right_parenthesis();
543: return (cmo *)m;
544: }
545:
546: static cmo *parse_cmo_indeterminate()
547: {
548: cmo *ob;
549:
550: parse_comma();
551: parse_left_parenthesis();
552: ob = parse_cmo();
553: parse_right_parenthesis();
554: return (cmo *)new_cmo_indeterminate(ob);
555: }
556:
557: static cmo *parse_cmo_error2()
558: {
559: cmo *ob;
560:
561: parse_comma();
562: parse_left_parenthesis();
563: ob = parse_cmo();
564: parse_right_parenthesis();
565: return (cmo *)new_cmo_error2(ob);
566: }
567:
1.3 ! ohara 568: /* --- lexical analyzer --- */
1.1 ohara 569:
1.3 ! ohara 570: /* A white space is ignored by lexical analyzer. */
1.1 ohara 571: static int c = ' ';
572:
1.3 ! ohara 573: /* getting a character from string. */
1.2 ohara 574: static char *mygetc_ptr;
575: static int mygetc()
576: {
577: return *mygetc_ptr++;
578: }
1.1 ohara 579:
1.3 ! ohara 580: int init_lex(char *s)
1.1 ohara 581: {
1.2 ohara 582: mygetc_ptr=s;
1.1 ohara 583: }
584:
585: #define SIZE_BUFFER 8192
586: static char buffer[SIZE_BUFFER];
587:
1.3 ! ohara 588: static char *mkstr(char *src)
1.1 ohara 589: {
1.3 ! ohara 590: int len;
! 591: char *s;
! 592: len = strlen(src);
! 593: s = malloc(len+1);
! 594: strcpy(s, src);
! 595: return s;
! 596: }
! 597:
! 598: /* no measure for buffer overflow */
! 599: static char *lex_digit()
! 600: {
! 601: static char buff[SIZE_BUFFER];
! 602: int i;
! 603: char *s;
! 604: int len;
! 605:
! 606: for(i=0; i<SIZE_BUFFER-1; i++) {
! 607: if(isdigit(c)) {
! 608: buff[i] = c;
! 609: }else {
! 610: buff[i] = '\0';
! 611: return mkstr(buff);
! 612: }
! 613: c = mygetc();
! 614: }
! 615: buff[SIZE_BUFFER-1] = '\0';
! 616: return mkstr(buff);
1.1 ohara 617: }
618:
619: #define MK_KEY_CMO(x) { #x , x , TOKEN(x) , IS_CMO }
620: #define MK_KEY_SM(x) { #x , x , TOKEN(SM) , IS_SM }
621: #define MK_KEY_OX(x) { #x , x , TOKEN(x) , IS_OX }
622:
623: static symbol symbol_list[] = {
624: MK_KEY_CMO(CMO_NULL),
625: MK_KEY_CMO(CMO_INT32),
626: MK_KEY_CMO(CMO_DATUM),
627: MK_KEY_CMO(CMO_STRING),
628: MK_KEY_CMO(CMO_MATHCAP),
629: MK_KEY_CMO(CMO_LIST),
630: MK_KEY_CMO(CMO_MONOMIAL32),
631: MK_KEY_CMO(CMO_ZZ),
632: MK_KEY_CMO(CMO_ZERO),
633: MK_KEY_CMO(CMO_DMS_GENERIC),
634: MK_KEY_CMO(CMO_RING_BY_NAME),
635: MK_KEY_CMO(CMO_INDETERMINATE),
636: MK_KEY_CMO(CMO_DISTRIBUTED_POLYNOMIAL),
637: MK_KEY_CMO(CMO_ERROR2),
638: MK_KEY_SM(SM_popCMO),
639: MK_KEY_SM(SM_popString),
640: MK_KEY_SM(SM_mathcap),
641: MK_KEY_SM(SM_pops),
642: MK_KEY_SM(SM_executeStringByLocalParser),
643: MK_KEY_SM(SM_executeFunction),
644: MK_KEY_SM(SM_setMathCap),
645: MK_KEY_SM(SM_shutdown),
646: MK_KEY_SM(SM_control_kill),
647: MK_KEY_SM(SM_control_reset_connection),
648: MK_KEY_OX(OX_COMMAND),
649: MK_KEY_OX(OX_DATA),
650: {NULL, 0, 0, 0} /* a gate keeper */
651: };
652:
653: symbol* lookup_by_symbol(char *key)
654: {
655: symbol *symp;
656: for(symp = symbol_list; symp->key != NULL; symp++) {
657: if (strcmp(key, symp->key)==0) {
658: return symp;
659: }
660: }
661: return NULL;
662: }
663:
664: symbol* lookup_by_token(int tok)
665: {
666: symbol *symp;
667: for(symp = symbol_list; symp->key != NULL; symp++) {
668: if (tok == symp->token) {
669: return symp;
670: }
671: }
672: return NULL;
673: }
674:
675: symbol* lookup_by_tag(int tag)
676: {
677: symbol *symp;
678: for(symp = symbol_list; symp->key != NULL; symp++) {
679: if (tag == symp->tag) {
680: return symp;
681: }
682: }
683: return NULL;
684: }
685:
686: symbol* lookup(int i)
687: {
688: return &symbol_list[i];
689: }
690:
1.3 ! ohara 691: /* no measure for buffer overflow */
1.1 ohara 692: static char *lex_quoted_string()
693: {
694: int i;
695: char c0 = ' ';
1.3 ! ohara 696:
1.1 ohara 697: for (i=0; i<SIZE_BUFFER; i++) {
1.3 ! ohara 698: c = mygetc();
1.1 ohara 699: if(c == '"') {
1.3 ! ohara 700: c = mygetc();
1.1 ohara 701: buffer[i]='\0';
1.3 ! ohara 702: return mkstr(buffer);
1.1 ohara 703: }else if (c == '\\') {
704: c0 = c;
1.3 ! ohara 705: c = mygetc();
1.1 ohara 706: if (c != '"') {
707: buffer[i++] = c0;
708: }
709: }
710: buffer[i]=c;
711: }
712: fprintf(stderr, "buffer overflow!\n");
713: exit(1);
714: /* return NULL; */
715: }
716:
717: static int token_of_symbol(char *key)
718: {
719: symbol *symp = lookup_by_symbol(key);
720: if (symp != NULL) {
721: yylval.d = symp->tag;
722: return symp->token;
723: }
724: #if DEBUG
725: fprintf(stderr, "lex error:: \"%s\" is unknown symbol.\n", key);
726: #endif
727: return 0;
728: }
729:
730: static int lex_symbol()
731: {
732: int i;
733: for (i=0; i<SIZE_BUFFER; i++) {
734: if (!isalnum(c) && c != '_') {
735: buffer[i]='\0';
736: return token_of_symbol(buffer);
737: }
738: buffer[i]=c;
1.3 ! ohara 739: c = mygetc();
1.1 ohara 740: }
741: fprintf(stderr, "buffer overflow!\n");
742: return 0;
743: }
744:
745: /* return する前に一文字先読みしておく. */
746: int lex()
747: {
748: int c_dash = 0;
749:
1.3 ! ohara 750: /* white spaces are ignored. */
! 751: while (isspace(c)) {
! 752: c = mygetc();
1.1 ohara 753: }
754:
755: switch(c) {
756: case '(':
757: case ')':
758: case ',':
1.3 ! ohara 759: case '+':
! 760: case '-':
1.1 ohara 761: c_dash = c;
762: c = ' ';
763: return c_dash;
764: case EOF:
1.3 ! ohara 765: c = mygetc();
1.1 ohara 766: return c_dash;
767: case '"': /* a quoted string! */
768: yylval.sym = lex_quoted_string();
769: return T_STRING;
770: default:
771: }
772:
1.3 ! ohara 773: if (isalpha(c)) {
! 774: /* symbols */
1.1 ohara 775: return lex_symbol();
776: }
777:
1.3 ! ohara 778: /* digit */
1.1 ohara 779: if (isdigit(c)){
1.3 ! ohara 780: yylval.sym = lex_digit();
! 781: return T_DIGIT;
1.1 ohara 782: }
1.3 ! ohara 783: c = mygetc();
1.1 ohara 784: return 0;
785: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>