Annotation of OpenXM/src/ox_toolkit/parse.c, Revision 1.4
1.1 ohara 1: /* -*- mode: C; coding: euc-japan -*- */
1.4 ! ohara 2: /* $OpenXM: OpenXM/src/ox_toolkit/parse.c,v 1.3 2000/01/17 19:55:56 ohara Exp $ */
1.1 ohara 3:
1.3 ohara 4: /*
5: This module is a parser for OX/CMO expressions.
6: Some commnets is written in Japanese by using the EUC-JP coded
7: character set.
8: */
1.1 ohara 9:
10: #include <stdio.h>
11: #include <stdlib.h>
12: #include <string.h>
13: #include <sys/param.h>
14: #include <setjmp.h>
1.4 ! ohara 15:
! 16: #include "ox_toolkit.h"
1.1 ohara 17: #include "parse.h"
18:
1.3 ohara 19: /* --- Parser --- */
20: /* Remarks for semantics.
1.1 ohara 21: CMO_LIST, CMO_STRING は、あらかじめ与えられた要素の個数を無視する.
22: CMO_MONOMIAL32 は無視しない. (つまりおかしいときは構文エラーになる)
23: */
24:
1.3 ohara 25: /*
26: parse.c では, Lisp 表現された CMO 文字列を読み込み,
1.1 ohara 27: バイト列を出力する. 中間表現として、cmo 構造体を利用する.
28: parse() はトークンの列から cmo 構造体を生成し、そのポインタを返す.
1.3 ohara 29: 重要なことはパーサ(の各サブルーチン)は
1.1 ohara 30: 常にトークンをひとつ先読みしていると言うことである.
31: */
32:
33: /* 現在読み込み中のトークンを表す. */
34: static int token = 0;
35:
36: /* トークンの属性値. yylval は lex() によってセットされる. */
37: static union{
38: int d;
39: char *sym;
40: } yylval;
41:
1.3 ohara 42: /*
43: If `pflag_cmo_addrev' sets, then we admit extended CMO expressions.
44: For example, (CMO_STRING, "hello") is not a real CMO expression
45: but it is admitted.
46: */
47: static int pflag_cmo_addrev = 1;
1.1 ohara 48:
1.3 ohara 49: /* definitions of local functions */
1.1 ohara 50: static int parse_error(char *s);
51: static int parse_lf();
52: static int parse_right_parenthesis();
53: static int parse_left_parenthesis();
54: static int parse_comma();
1.3 ohara 55: static mpz_ptr parse_integer();
1.1 ohara 56: static char *parse_string();
57: static cmo *parse_cmo_null();
58: static cmo *parse_cmo_int32();
59: static cmo *parse_cmo_string();
60: static cmo *parse_cmo_mathcap();
61: static cmo *parse_cmo_list();
62: static cmo *parse_cmo_monomial32();
63: static cmo *parse_cmo_zz();
64: static cmo *parse_cmo_zero();
65: static cmo *parse_cmo_dms_generic();
66: static cmo *parse_cmo_ring_by_name();
67: static cmo *parse_cmo_distributed_polynomial();
68: static cmo *parse_cmo_indeterminate();
69: static cmo *parse_cmo_error2();
70: static cmo *parse_cmo();
71: static int parse_sm();
72: static ox *parse_ox();
73: static ox *parse_ox_command();
74: static ox *parse_ox_data();
75:
76: static int is_token_cmo(int token)
77: {
78: return (token >= MIN_T_CMO && token < MAX_T_CMO) || token == TOKEN(CMO_ERROR2);
79: }
80:
81: static int is_token_sm(int token)
82: {
83: return token == TOKEN(SM);
84: }
85:
86: static int is_token_ox(int token)
87: {
88: return token >= MIN_T_OX && token < MAX_T_OX;
89: }
90:
91: static jmp_buf env_parse;
92:
1.3 ohara 93: /* This is a parsing fault. */
1.1 ohara 94: static int parse_error(char *s)
95: {
96: fprintf(stderr, "syntax error: %s\n", s);
97: longjmp(env_parse, 1);
98: }
99:
1.2 ohara 100: int setflag_parse(int flag)
101: {
102: pflag_cmo_addrev = flag;
103: }
104:
105: int init_parser(char *s)
106: {
107: setflag_parse(PFLAG_ADDREV);
1.3 ohara 108: init_lex(s);
1.2 ohara 109: }
110:
1.1 ohara 111: cmo *parse()
112: {
113: cmo *m;
114:
115: if (setjmp(env_parse) != 0) {
1.3 ohara 116: return NULL;
117: /* This is an error. */
1.1 ohara 118: }
119:
1.3 ohara 120: token = lex();
1.1 ohara 121: if (token == '(') {
122: token = lex();
123: if (is_token_cmo(token)) {
124: m = parse_cmo();
125: }else if(is_token_ox(token)) {
126: m = parse_ox();
127: }else {
128: parse_error("invalid symbol.");
129: }
130: return m;
131: }
132: return NULL;
133: }
134:
135: static ox *parse_ox()
136: {
137: ox *m = NULL;
138:
139: switch(token) {
140: case TOKEN(OX_COMMAND):
141: token = lex();
142: m = parse_ox_command();
143: break;
144: case TOKEN(OX_DATA):
145: token = lex();
146: m = parse_ox_data();
147: break;
148: default:
149: parse_error("invalid ox.");
150: }
151: return m;
152: }
153:
154: static ox *parse_ox_data()
155: {
156: ox *m;
157:
158: parse_comma();
159: parse_left_parenthesis();
160: m = (ox *)new_ox_data(parse_cmo());
161: parse_right_parenthesis();
162: return m;
163: }
164:
165: static ox *parse_ox_command()
166: {
167: ox *m;
168:
169: parse_comma();
170: parse_left_parenthesis();
171: m = (ox *)new_ox_command(parse_sm());
172: parse_right_parenthesis();
173: return m;
174: }
175:
176: static int parse_sm()
177: {
178: int sm_code;
179: if (token != TOKEN(SM)) {
180: parse_error("no opecode.");
181: }
182: sm_code = yylval.d;
183: token = lex();
184: parse_right_parenthesis();
185: return sm_code;
186: }
187:
188: static cmo *parse_cmo()
189: {
190: cmo *m = NULL;
191:
192: switch(token) {
193: case TOKEN(CMO_NULL):
194: token = lex();
195: m = parse_cmo_null();
196: break;
197: case TOKEN(CMO_INT32):
198: token = lex();
199: m = parse_cmo_int32();
200: break;
201: case TOKEN(CMO_STRING):
202: token = lex();
203: m = parse_cmo_string();
204: break;
205: case TOKEN(CMO_MATHCAP):
206: token = lex();
207: m = parse_cmo_mathcap();
208: break;
209: case TOKEN(CMO_LIST):
210: token = lex();
211: m = parse_cmo_list();
212: break;
213: case TOKEN(CMO_MONOMIAL32):
214: token = lex();
215: m = parse_cmo_monomial32();
216: break;
217: case TOKEN(CMO_ZZ):
218: token = lex();
219: m = parse_cmo_zz();
220: break;
221: case TOKEN(CMO_ZERO):
222: token = lex();
223: m = parse_cmo_zero();
224: break;
225: case TOKEN(CMO_DMS_GENERIC):
226: token = lex();
227: m = parse_cmo_dms_generic();
228: break;
229: case TOKEN(CMO_RING_BY_NAME):
230: token = lex();
231: m = parse_cmo_ring_by_name();
232: break;
233: case TOKEN(CMO_DISTRIBUTED_POLYNOMIAL):
234: token = lex();
235: m = parse_cmo_distributed_polynomial();
236: break;
237: case TOKEN(CMO_INDETERMINATE):
238: token = lex();
239: m = parse_cmo_indeterminate();
240: break;
241: case TOKEN(CMO_ERROR2):
242: token = lex();
243: m = parse_cmo_error2();
244: break;
245: default:
246: parse_error("invalid cmo.");
247: }
248: return m;
249: }
250:
251: static int parse_left_parenthesis()
252: {
253: if (token != '(') {
254: parse_error("no left parenthesis.");
255: }
256: token = lex();
257: }
258:
259: static int parse_right_parenthesis()
260: {
261: if (token != ')') {
262: parse_error("no right parenthesis.");
263: }
264: token = lex();
265: }
266:
267: static int parse_comma()
268: {
269: if (token != ',') {
270: parse_error("no comma.");
271: }
272: token = lex();
273: }
274:
1.3 ohara 275: static mpz_ptr new_mpz_set_str(char *s)
276: {
277: mpz_ptr z = malloc(sizeof(mpz_t));
278: mpz_init_set_str(z, s, 10);
279: return z;
280: }
281:
282: static mpz_ptr my_mpz_neg(mpz_ptr src)
283: {
284: mpz_ptr z = malloc(sizeof(mpz_t));
285: mpz_init(z);
286: mpz_neg(z, src);
287: #ifndef DEBUG
288: free(src);
289: #endif
290: return z;
291: }
292:
293: static mpz_ptr parse_integer()
1.1 ohara 294: {
1.3 ohara 295: int sign = 1;
296: mpz_ptr val;
297:
298: if (token == '+') {
299: token = lex();
300: }else if (token == '-') {
301: sign = -1;
302: token = lex();
303: }
304:
305: if (token != T_DIGIT) {
1.1 ohara 306: parse_error("no integer.");
307: }
1.3 ohara 308: val = new_mpz_set_str(yylval.sym);
309: if (sign == -1) {
310: val = my_mpz_neg(val);
311: }
312: #ifndef DEBUG
313: free(yylval.sym);
314: #endif
1.1 ohara 315: token = lex();
316: return val;
317: }
318:
319: static char *parse_string()
320: {
321: char *s;
322: if (token != T_STRING) {
323: parse_error("no string.");
324: }
325: s = yylval.sym;
326: token = lex();
327: return s;
328: }
329:
330: static cmo *parse_cmo_null()
331: {
332: parse_right_parenthesis();
333: return (cmo *)new_cmo_null();
334: }
335:
336: static cmo *parse_cmo_int32()
337: {
1.3 ohara 338: mpz_ptr z;
1.1 ohara 339:
340: parse_comma();
1.3 ohara 341: z = parse_integer();
1.1 ohara 342: parse_right_parenthesis();
1.3 ohara 343: return (cmo *)new_cmo_int32(mpz_get_si(z));
1.1 ohara 344: }
345:
346: static cmo *parse_cmo_string()
347: {
348: cmo_string *m;
349: char *s;
350:
351: parse_comma();
1.3 ohara 352: if (token == T_DIGIT) {
1.1 ohara 353: parse_integer();
354: parse_comma();
355: }else if (!pflag_cmo_addrev) {
356: parse_error("invalid cmo string.");
357: }
358: s = parse_string();
359: m = new_cmo_string(s);
360: parse_right_parenthesis();
361: return (cmo *)m;
362: }
363:
364: static cmo *parse_cmo_mathcap()
365: {
366: cmo *ob;
367:
368: parse_comma();
369: parse_left_parenthesis();
370: ob = parse_cmo();
371: parse_right_parenthesis();
372: return (cmo *)new_cmo_mathcap(ob);
373: }
374:
375: static cmo *parse_cmo_list()
376: {
377: int length=0;
378: int i=0;
379: cmo_list *m = new_cmo_list();
380: cmo *newcmo;
381:
382: if (token == ',') {
383: parse_comma();
384:
1.3 ohara 385: if (token == T_DIGIT) {
1.1 ohara 386: parse_integer();
387: parse_comma();
388: }else if (!pflag_cmo_addrev) {
389: parse_error("invalid cmo_list.");
390: }
391:
392: while(token == '(') {
393: parse_left_parenthesis();
394: newcmo = parse_cmo();
395: append_cmo_list(m, newcmo);
396: if (token != ',') {
397: break;
398: }
399: parse_comma();
400: }
401: }else if (!pflag_cmo_addrev) {
402: parse_error("invalid cmo_list.");
403: }
404: parse_right_parenthesis();
405: return (cmo *)m;
406: }
407:
408: static cmo *parse_cmo_monomial32()
409: {
410: int size;
411: int *exps;
412: int i;
413: cmo_monomial32 *m;
414: int tag;
415:
416: parse_comma();
1.3 ohara 417: size = mpz_get_si(parse_integer());
1.1 ohara 418: if (size < 0) {
419: parse_error("invalid value.");
420: }
421: m = new_cmo_monomial32_size(size);
422:
423: for(i=0; i<size; i++) {
424: parse_comma();
1.3 ohara 425: m->exps[i] = mpz_get_si(parse_integer());
1.1 ohara 426: }
427: parse_comma();
428: parse_left_parenthesis();
429: m->coef = parse_cmo();
430: tag = m->coef->tag;
431:
1.3 ohara 432: /* semantics:
433: The tag of m->coef must be CMO_ZZ or CMO_INT32. */
1.1 ohara 434: if (tag != CMO_ZZ && tag != CMO_INT32) {
435: parse_error("invalid cmo.");
436: }
437: parse_right_parenthesis();
438: return (cmo *)m;
439: }
440:
1.3 ohara 441: /* the following function rewrite internal data of mpz/cmo_zz. */
1.1 ohara 442: static cmo *parse_cmo_zz()
443: {
444: int length;
445: int i=0;
446: cmo_zz *m= NULL;
1.3 ohara 447: mpz_ptr z;
1.1 ohara 448:
449: parse_comma();
1.3 ohara 450: z = parse_integer();
1.1 ohara 451: if (token == ',') {
1.3 ohara 452: length = mpz_get_si(z);
1.1 ohara 453: m = new_cmo_zz_size(length);
454:
455: length = abs(length);
456: for(i=0; i<length; i++) {
457: parse_comma();
1.3 ohara 458: m->mpz->_mp_d[i] = mpz_get_si(parse_integer());
1.1 ohara 459: }
460: }else if (pflag_cmo_addrev) {
1.3 ohara 461: m = new_cmo_zz_set_mpz(z);
1.1 ohara 462: }else {
463: parse_error("no comma.");
464: }
465:
466: parse_right_parenthesis();
467: return (cmo *)m;
468: }
469:
470: static cmo *parse_cmo_zero()
471: {
472: parse_right_parenthesis();
473: return (cmo *)new_cmo_zero();
474: }
475:
476: static cmo *parse_cmo_dms_generic()
477: {
478: parse_right_parenthesis();
479: return (cmo *)new_cmo_dms_generic();
480: }
481:
482: static cmo *parse_cmo_ring_by_name()
483: {
484: cmo *ob;
485:
486: parse_comma();
487: parse_left_parenthesis();
488: ob = parse_cmo();
489:
1.3 ohara 490: /* The ob has a type of CMO_STRING. */
1.1 ohara 491: if (ob->tag != CMO_STRING) {
492: parse_error("invalid cmo.");
493: }
494: parse_right_parenthesis();
495: return (cmo *)new_cmo_ring_by_name(ob);
496: }
497:
498: static cmo *parse_cmo_distributed_polynomial()
499: {
500: int length=0;
501: int i=0;
502: cmo_distributed_polynomial *m = new_cmo_distributed_polynomial();
503: cmo *ob;
504: int tag;
505:
506: if (token == ',') {
507: parse_comma();
508:
1.3 ohara 509: if (token == T_DIGIT) {
1.1 ohara 510: parse_integer();
511: parse_comma();
512: }else if (!pflag_cmo_addrev) {
513: parse_error("invalid d-polynomial.");
514: }
515:
516: parse_left_parenthesis();
517: m->ringdef = parse_cmo();
518: tag = m->ringdef->tag;
1.3 ohara 519: /* m->ringdef needs to be a DringDefinition. */
1.1 ohara 520: if (tag != CMO_RING_BY_NAME && tag != CMO_DMS_GENERIC
521: && tag != CMO_DMS_OF_N_VARIABLES) {
522: parse_error("invalid cmo.");
523: }
524:
525: parse_comma();
526:
527: while(token == '(') {
528: parse_left_parenthesis();
529: ob = parse_cmo();
530: if (ob->tag != CMO_MONOMIAL32 && ob->tag != CMO_ZERO) {
531: parse_error("invalid cmo.");
532: }
533: append_cmo_list((cmo_list *)m, ob);
534: if (token != ',') {
535: break;
536: }
537: parse_comma();
538: }
539: }else if (!pflag_cmo_addrev) {
540: parse_error("invalid d-polynomial.");
541: }
542: parse_right_parenthesis();
543: return (cmo *)m;
544: }
545:
546: static cmo *parse_cmo_indeterminate()
547: {
548: cmo *ob;
549:
550: parse_comma();
551: parse_left_parenthesis();
552: ob = parse_cmo();
553: parse_right_parenthesis();
554: return (cmo *)new_cmo_indeterminate(ob);
555: }
556:
557: static cmo *parse_cmo_error2()
558: {
559: cmo *ob;
560:
561: parse_comma();
562: parse_left_parenthesis();
563: ob = parse_cmo();
564: parse_right_parenthesis();
565: return (cmo *)new_cmo_error2(ob);
566: }
567:
1.3 ohara 568: /* --- lexical analyzer --- */
1.1 ohara 569:
1.3 ohara 570: /* A white space is ignored by lexical analyzer. */
1.1 ohara 571: static int c = ' ';
572:
1.3 ohara 573: /* getting a character from string. */
1.2 ohara 574: static char *mygetc_ptr;
575: static int mygetc()
576: {
577: return *mygetc_ptr++;
578: }
1.1 ohara 579:
1.3 ohara 580: int init_lex(char *s)
1.1 ohara 581: {
1.2 ohara 582: mygetc_ptr=s;
1.1 ohara 583: }
584:
585: #define SIZE_BUFFER 8192
586: static char buffer[SIZE_BUFFER];
587:
1.3 ohara 588: static char *mkstr(char *src)
1.1 ohara 589: {
1.3 ohara 590: int len;
591: char *s;
592: len = strlen(src);
593: s = malloc(len+1);
594: strcpy(s, src);
595: return s;
596: }
597:
598: /* no measure for buffer overflow */
599: static char *lex_digit()
600: {
601: static char buff[SIZE_BUFFER];
602: int i;
603: char *s;
604: int len;
605:
606: for(i=0; i<SIZE_BUFFER-1; i++) {
607: if(isdigit(c)) {
608: buff[i] = c;
609: }else {
610: buff[i] = '\0';
611: return mkstr(buff);
612: }
613: c = mygetc();
614: }
615: buff[SIZE_BUFFER-1] = '\0';
616: return mkstr(buff);
1.1 ohara 617: }
618:
619: #define MK_KEY_CMO(x) { #x , x , TOKEN(x) , IS_CMO }
620: #define MK_KEY_SM(x) { #x , x , TOKEN(SM) , IS_SM }
621: #define MK_KEY_OX(x) { #x , x , TOKEN(x) , IS_OX }
622:
1.4 ! ohara 623: static struct symbol symbol_list[] = {
1.1 ohara 624: MK_KEY_CMO(CMO_NULL),
625: MK_KEY_CMO(CMO_INT32),
626: MK_KEY_CMO(CMO_DATUM),
627: MK_KEY_CMO(CMO_STRING),
628: MK_KEY_CMO(CMO_MATHCAP),
629: MK_KEY_CMO(CMO_LIST),
630: MK_KEY_CMO(CMO_MONOMIAL32),
631: MK_KEY_CMO(CMO_ZZ),
632: MK_KEY_CMO(CMO_ZERO),
633: MK_KEY_CMO(CMO_DMS_GENERIC),
634: MK_KEY_CMO(CMO_RING_BY_NAME),
635: MK_KEY_CMO(CMO_INDETERMINATE),
636: MK_KEY_CMO(CMO_DISTRIBUTED_POLYNOMIAL),
637: MK_KEY_CMO(CMO_ERROR2),
638: MK_KEY_SM(SM_popCMO),
639: MK_KEY_SM(SM_popString),
640: MK_KEY_SM(SM_mathcap),
641: MK_KEY_SM(SM_pops),
642: MK_KEY_SM(SM_executeStringByLocalParser),
643: MK_KEY_SM(SM_executeFunction),
644: MK_KEY_SM(SM_setMathCap),
645: MK_KEY_SM(SM_shutdown),
646: MK_KEY_SM(SM_control_kill),
647: MK_KEY_SM(SM_control_reset_connection),
648: MK_KEY_OX(OX_COMMAND),
649: MK_KEY_OX(OX_DATA),
650: {NULL, 0, 0, 0} /* a gate keeper */
651: };
652:
1.4 ! ohara 653: symbol_t lookup_by_symbol(char *key)
1.1 ohara 654: {
1.4 ! ohara 655: symbol_t symp;
1.1 ohara 656: for(symp = symbol_list; symp->key != NULL; symp++) {
657: if (strcmp(key, symp->key)==0) {
658: return symp;
659: }
660: }
661: return NULL;
662: }
663:
1.4 ! ohara 664: symbol_t lookup_by_token(int tok)
1.1 ohara 665: {
1.4 ! ohara 666: symbol_t symp;
1.1 ohara 667: for(symp = symbol_list; symp->key != NULL; symp++) {
668: if (tok == symp->token) {
669: return symp;
670: }
671: }
672: return NULL;
673: }
674:
1.4 ! ohara 675: symbol_t lookup_by_tag(int tag)
1.1 ohara 676: {
1.4 ! ohara 677: symbol_t symp;
1.1 ohara 678: for(symp = symbol_list; symp->key != NULL; symp++) {
679: if (tag == symp->tag) {
680: return symp;
681: }
682: }
683: return NULL;
684: }
685:
1.4 ! ohara 686: symbol_t lookup(int i)
1.1 ohara 687: {
688: return &symbol_list[i];
689: }
690:
1.4 ! ohara 691: char *symbol_get_key(symbol_t sp)
! 692: {
! 693: return sp->key;
! 694: }
! 695:
1.3 ohara 696: /* no measure for buffer overflow */
1.1 ohara 697: static char *lex_quoted_string()
698: {
699: int i;
700: char c0 = ' ';
1.3 ohara 701:
1.1 ohara 702: for (i=0; i<SIZE_BUFFER; i++) {
1.3 ohara 703: c = mygetc();
1.1 ohara 704: if(c == '"') {
1.3 ohara 705: c = mygetc();
1.1 ohara 706: buffer[i]='\0';
1.3 ohara 707: return mkstr(buffer);
1.1 ohara 708: }else if (c == '\\') {
709: c0 = c;
1.3 ohara 710: c = mygetc();
1.1 ohara 711: if (c != '"') {
712: buffer[i++] = c0;
713: }
714: }
715: buffer[i]=c;
716: }
717: fprintf(stderr, "buffer overflow!\n");
718: exit(1);
719: /* return NULL; */
720: }
721:
722: static int token_of_symbol(char *key)
723: {
1.4 ! ohara 724: symbol_t symp = lookup_by_symbol(key);
1.1 ohara 725: if (symp != NULL) {
726: yylval.d = symp->tag;
727: return symp->token;
728: }
729: #if DEBUG
730: fprintf(stderr, "lex error:: \"%s\" is unknown symbol.\n", key);
731: #endif
732: return 0;
733: }
734:
735: static int lex_symbol()
736: {
737: int i;
738: for (i=0; i<SIZE_BUFFER; i++) {
739: if (!isalnum(c) && c != '_') {
740: buffer[i]='\0';
741: return token_of_symbol(buffer);
742: }
743: buffer[i]=c;
1.3 ohara 744: c = mygetc();
1.1 ohara 745: }
746: fprintf(stderr, "buffer overflow!\n");
747: return 0;
748: }
749:
750: /* return する前に一文字先読みしておく. */
751: int lex()
752: {
753: int c_dash = 0;
754:
1.3 ohara 755: /* white spaces are ignored. */
756: while (isspace(c)) {
757: c = mygetc();
1.1 ohara 758: }
759:
760: switch(c) {
761: case '(':
762: case ')':
763: case ',':
1.3 ohara 764: case '+':
765: case '-':
1.1 ohara 766: c_dash = c;
767: c = ' ';
768: return c_dash;
769: case EOF:
1.3 ohara 770: c = mygetc();
1.1 ohara 771: return c_dash;
772: case '"': /* a quoted string! */
773: yylval.sym = lex_quoted_string();
774: return T_STRING;
775: default:
776: }
777:
1.3 ohara 778: if (isalpha(c)) {
779: /* symbols */
1.1 ohara 780: return lex_symbol();
781: }
782:
1.3 ohara 783: /* digit */
1.1 ohara 784: if (isdigit(c)){
1.3 ohara 785: yylval.sym = lex_digit();
786: return T_DIGIT;
1.1 ohara 787: }
1.3 ohara 788: c = mygetc();
1.1 ohara 789: return 0;
790: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>