Annotation of OpenXM/src/ox_math/parse.c, Revision 1.1
1.1 ! ohara 1: /* -*- mode: C; coding: euc-japan -*- */
! 2: /* $OpenXM$ */
! 3: /* $Id: parse.c,v 1.4 1999/10/14 10:19:04 ohara Exp ohara $ */
! 4: /* OX expression, CMO expression パーサ */
! 5:
! 6: /* cmo_addrev がセットされていれば、
! 7: 厳密には CMO expression でないもの, 例えば
! 8: (CMO_STRING, "abcdef") も CMO に変換される. */
! 9:
! 10: #include <stdio.h>
! 11: #include <stdlib.h>
! 12: #include <string.h>
! 13: #include <sys/param.h>
! 14: #include "oxtag.h"
! 15: #include "ox.h"
! 16: #include "parse.h"
! 17:
! 18: /* --- 構文解析部 --- */
! 19: /* parse.c, lex.c では, Lisp 表現された CMO 文字列を読み込み,
! 20: バイト列を出力する. 中間表現として、cmo *を利用する.
! 21: parse() はトークンの列から cmo *(の指す構造体)を生成する.
! 22: */
! 23:
! 24: /* 重要なことはパーサ(の各サブルーチン)は
! 25: 常にトークンをひとつ先読みしていると言うことである.
! 26: */
! 27:
! 28: /* 現在読み込み中のトークンを表す. */
! 29: static int token = 0;
! 30:
! 31: /* yylval は lex() によってセットされる. */
! 32: extern lex_value_t yylval;
! 33:
! 34: int cmo_addrev = 1; /* CMO の省略記法を許すか否かのフラグ */
! 35:
! 36: /* 関数の宣言 */
! 37: static int parse_error(char *s);
! 38: static int parse_lf();
! 39: static int parse_right_parenthesis();
! 40: static int parse_left_parenthesis();
! 41: static int parse_comma();
! 42: static int parse_integer();
! 43: static cmo *parse_cmo_null();
! 44: static cmo *parse_cmo_zz();
! 45: static cmo *parse_cmo_list();
! 46: static cmo *parse_cmo_int32();
! 47: static cmo *parse_cmo_string();
! 48: static cmo *parse_cmo();
! 49: static char *parse_string();
! 50: static int parse_sm();
! 51: static ox* parse_ox();
! 52: static ox* parse_ox_command();
! 53: static ox* parse_ox_data();
! 54:
! 55: static int parse_error(char *s)
! 56: {
! 57: if (s != NULL) {
! 58: fprintf(stderr, "%s\n", s);
! 59: }else {
! 60: fprintf(stderr, "syntax error.\n");
! 61: }
! 62: exit(1); /* 例外処理. 本当は longjmp すべきであろう. */
! 63: }
! 64:
! 65: #define MIN_T_CMO (T_MAGIC + 0)
! 66: #define MAX_T_CMO (T_MAGIC + 256)
! 67:
! 68: static int is_t_cmo(int token)
! 69: {
! 70: return (token >= MIN_T_CMO && token < MAX_T_CMO) || token == T_CMO_ERROR2;
! 71: }
! 72:
! 73: #define MIN_T_SM (T_MAGIC + 256)
! 74: #define MAX_T_SM (T_MAGIC + 1100)
! 75:
! 76: static int is_t_sm(int token)
! 77: {
! 78: return token >= MIN_T_SM && token < MAX_T_SM;
! 79: }
! 80:
! 81: #define MIN_T_OX (T_MAGIC + 512)
! 82: #define MAX_T_OX (T_MAGIC + 600)
! 83:
! 84: static int is_t_ox(int token)
! 85: {
! 86: return token >= MIN_T_OX && token < MAX_T_OX;
! 87: }
! 88:
! 89: /* この部分は書き換え予定. */
! 90: cmo *parse()
! 91: {
! 92: cmo *m;
! 93:
! 94: do{
! 95: token = lex();
! 96: }while (token == '\n');
! 97:
! 98: if (token == '(') {
! 99: token = lex();
! 100: if (is_t_cmo(token)) {
! 101: m = parse_cmo();
! 102: }else if(is_t_ox(token)) {
! 103: m = parse_ox();
! 104: }else {
! 105: parse_error("syntax error: unknown keyword.");
! 106: return NULL;
! 107: }
! 108: parse_lf();
! 109: return m;
! 110: }
! 111: return NULL;
! 112: }
! 113:
! 114: /* トークンを先読みしない(重要). */
! 115: static int parse_lf()
! 116: {
! 117: if (token != '\n') {
! 118: parse_error("syntax error: not new line.");
! 119: }
! 120: return 0;
! 121: }
! 122:
! 123:
! 124: static ox* parse_ox()
! 125: {
! 126: ox *m = NULL;
! 127:
! 128: switch(token) {
! 129: case T_OX_COMMAND:
! 130: token = lex();
! 131: m = parse_ox_command();
! 132: break;
! 133: case T_OX_DATA:
! 134: token = lex();
! 135: m = parse_ox_data();
! 136: break;
! 137: default:
! 138: parse_error("syntax error: invalid ox_tag.");
! 139: }
! 140: return m;
! 141: }
! 142:
! 143: static ox* parse_ox_data()
! 144: {
! 145: ox* m;
! 146:
! 147: parse_comma();
! 148: parse_left_parenthesis();
! 149: m = (ox *)new_ox_data(parse_cmo());
! 150: parse_right_parenthesis();
! 151: return m;
! 152: }
! 153:
! 154: static int parse_sm()
! 155: {
! 156: int sm_code;
! 157: if (!is_t_sm(token)) {
! 158: parse_error("syntax error: invalid sm code.");
! 159: }
! 160: sm_code = token - T_MAGIC;
! 161: token = lex();
! 162: return sm_code;
! 163: }
! 164:
! 165:
! 166: static ox* parse_ox_command()
! 167: {
! 168: ox* m;
! 169:
! 170: parse_comma();
! 171: m = (ox *)new_ox_command(parse_sm());
! 172: parse_right_parenthesis();
! 173: return m;
! 174: }
! 175:
! 176: /* 正しい入力ならば, parse_cmo を呼ぶ時点で, token には
! 177: T_CMO_xxx, T_OX_xxx のいずれかがセットされている. */
! 178: static cmo *parse_cmo()
! 179: {
! 180: cmo *m = NULL;
! 181:
! 182: switch(token) {
! 183: case T_CMO_NULL:
! 184: token = lex();
! 185: m = parse_cmo_null();
! 186: break;
! 187: case T_CMO_INT32:
! 188: token = lex();
! 189: m = parse_cmo_int32();
! 190: break;
! 191: case T_CMO_STRING:
! 192: token = lex();
! 193: m = parse_cmo_string();
! 194: break;
! 195: case T_CMO_ZZ:
! 196: token = lex();
! 197: m = parse_cmo_zz();
! 198: break;
! 199: case T_CMO_LIST:
! 200: token = lex();
! 201: m = parse_cmo_list();
! 202: break;
! 203: default:
! 204: parse_error("syntax error: invalid cmo_tag.");
! 205: }
! 206: return m;
! 207: }
! 208:
! 209: static int parse_right_parenthesis()
! 210: {
! 211: if (token != ')') {
! 212: parse_error("syntax error: no right parenthesis exists.");
! 213: return 0;
! 214: }
! 215: token = lex();
! 216: }
! 217:
! 218: static int parse_left_parenthesis()
! 219: {
! 220: if (token != '(') {
! 221: parse_error("syntax error: no left parenthesis exists.");
! 222: return 0;
! 223: }
! 224: token = lex();
! 225: }
! 226:
! 227: static int parse_comma()
! 228: {
! 229: if (token != ',') {
! 230: parse_error("syntax error: no comma exists.");
! 231: return 0;
! 232: }
! 233: token = lex();
! 234: return 1;
! 235: }
! 236:
! 237: /* cmo_zz の内部を直接いじる. */
! 238: static cmo *parse_cmo_zz()
! 239: {
! 240: int length;
! 241: int i=0;
! 242: cmo_zz *m= NULL;
! 243:
! 244: parse_comma();
! 245: length = parse_integer();
! 246: if (token == ',') {
! 247: m = new_cmo_zz_size(length);
! 248:
! 249: length = abs(length);
! 250: for(i=0; i<length; i++) {
! 251: parse_comma();
! 252: m->mpz->_mp_d[i] = parse_integer();
! 253: }
! 254: }else if (cmo_addrev) {
! 255: m = new_cmo_zz_set_si(length);
! 256: }else {
! 257: parse_error("syntax error: invalid keyword.");
! 258: }
! 259:
! 260: parse_right_parenthesis();
! 261: return (cmo *)m;
! 262: }
! 263:
! 264: static cmo *parse_cmo_list()
! 265: {
! 266: int length=0;
! 267: int i=0;
! 268: cmo_list *m;
! 269: cmo *newcmo;
! 270:
! 271: parse_comma();
! 272:
! 273: length = parse_integer();
! 274: m = new_cmo_list();
! 275: if (length<0) {
! 276: parse_error("semantics error: a list has negative length.");
! 277: }
! 278:
! 279: for(i=0; i<length; i++) {
! 280: parse_comma();
! 281: parse_left_parenthesis();
! 282: newcmo = parse_cmo();
! 283: append_cmo_list(m, newcmo);
! 284: }
! 285: parse_right_parenthesis();
! 286: return (cmo *)m;
! 287: }
! 288:
! 289: static int parse_integer()
! 290: {
! 291: int val;
! 292: if (token != T_INTEGER) {
! 293: parse_error("syntax error: in parse_integer().");
! 294: }
! 295: val = yylval.d;
! 296: token = lex();
! 297: return val;
! 298: }
! 299:
! 300: static char *parse_string()
! 301: {
! 302: char *s;
! 303: if (token != T_STRING) {
! 304: parse_error("syntax error: in parse_string().");
! 305: }
! 306: s = yylval.sym;
! 307: token = lex();
! 308: return s;
! 309: }
! 310:
! 311: static cmo *parse_cmo_null()
! 312: {
! 313: cmo_null *m = new_cmo_null();
! 314: parse_right_parenthesis();
! 315: return (cmo *)m;
! 316: }
! 317:
! 318: static cmo *parse_cmo_int32()
! 319: {
! 320: cmo_int32 *m;
! 321: int i;
! 322:
! 323: parse_comma();
! 324: i = parse_integer();
! 325: m = new_cmo_int32(i);
! 326: parse_right_parenthesis();
! 327: return (cmo *)m;
! 328: }
! 329:
! 330: static cmo *parse_cmo_string()
! 331: {
! 332: cmo_string *m;
! 333: int length;
! 334: char *s;
! 335:
! 336: parse_comma();
! 337: if (token == T_INTEGER) {
! 338: length = parse_integer();
! 339: parse_comma();
! 340: s = parse_string();
! 341: if (length != strlen(s)) {
! 342: fprintf(stderr, "warning: strlen unmatched.\n");
! 343: }
! 344: }else if (cmo_addrev) {
! 345: s = parse_string();
! 346: }else {
! 347: parse_error("syntax error: invalid keyword.");
! 348: }
! 349: m = new_cmo_string(s);
! 350: parse_right_parenthesis();
! 351: return (cmo *)m;
! 352: }
! 353:
! 354: /* --- 字句解析部 --- */
! 355:
! 356: lex_value_t yylval;
! 357:
! 358: /* lexical analyzer で読み飛ばされる文字なら何を初期値にしてもよい */
! 359: static int c = ' ';
! 360:
! 361: /* 一文字読み込む関数 */
! 362: static int (*GETC)() = getchar;
! 363:
! 364: int setgetc(int (*foo)())
! 365: {
! 366: GETC = foo;
! 367: }
! 368:
! 369: int resetgetc()
! 370: {
! 371: GETC = getchar;
! 372: }
! 373:
! 374: #define SIZE_BUFFER 1024
! 375: static char buffer[SIZE_BUFFER];
! 376: static char* PARS = "(),\n";
! 377:
! 378: /* 桁溢れの場合の対策はない */
! 379: static int lex_digit()
! 380: {
! 381: int d = 0;
! 382: do {
! 383: d = 10*d + (c - '0');
! 384: c = GETC();
! 385: } while(isdigit(c));
! 386: return d;
! 387: }
! 388:
! 389: /* バッファあふれした場合の対策をちゃんと考えるべき */
! 390: static char *lex_quoted_string()
! 391: {
! 392: int i;
! 393: char c0 = ' ';
! 394: char *s = NULL;
! 395: for (i=0; i<SIZE_BUFFER; i++) {
! 396: c = GETC();
! 397: if(c == '"') {
! 398: s = malloc(i+1);
! 399: buffer[i]='\0';
! 400: strcpy(s, buffer);
! 401:
! 402: c = GETC();
! 403: return s;
! 404: }else if (c == '\\') {
! 405: c0 = c;
! 406: c = GETC();
! 407: if (c != '"') {
! 408: buffer[i++] = c0;
! 409: }
! 410: }
! 411: buffer[i]=c;
! 412: }
! 413: fprintf(stderr, "buffer overflow!\n");
! 414: exit(1);
! 415: /* return NULL; */
! 416: }
! 417:
! 418: /* キーワードを増やしたあと修正するのを忘れてはいけない */
! 419: #undef NUM_OF_KEYWORDS 18
! 420:
! 421: static char *keywords[] = {
! 422: "CMO_INT32", "CMO_STRING", "CMO_LIST", "CMO_ZZ", "CMO_NULL",
! 423: "CMO_ZERO", "CMO_DATUM",
! 424: "SM_popCMO", "SM_popString", "SM_mathcap", "SM_pops",
! 425: "SM_executeStringByLocalParser", "SM_executeFunction",
! 426: "SM_setMathcap",
! 427: "SM_control_kill", "SM_control_reset_connection",
! 428: "OX_COMMAND", "OX_DATA",
! 429: NULL /* a gate keeper */
! 430: };
! 431:
! 432: static int token_of_keyword[] = {
! 433: T_CMO_INT32, T_CMO_STRING, T_CMO_LIST, T_CMO_ZZ, T_CMO_NULL,
! 434: T_CMO_ZERO, T_CMO_DATUM,
! 435: T_SM_popCMO, T_SM_popString, T_SM_mathcap, T_SM_pops,
! 436: T_SM_executeStringByLocalParser, T_SM_executeFunction,
! 437: T_SM_setMathcap,
! 438: T_SM_control_kill, T_SM_control_reset_connection,
! 439: T_OX_COMMAND, T_OX_DATA,
! 440: 0 /* dummy */
! 441: };
! 442:
! 443: static int token_of_matched_keyword(char *key)
! 444: {
! 445: int i;
! 446:
! 447: for(i=0; keywords[i] != NULL; i++) {
! 448: if(strcmp(key, keywords[i])==0) {
! 449: return token_of_keyword[i];
! 450: }
! 451: }
! 452: fprintf(stderr, "lex error\n");
! 453: return 0;
! 454: }
! 455:
! 456: static int lex_keyword()
! 457: {
! 458: int i;
! 459: for (i=0; i<SIZE_BUFFER; i++) {
! 460: if (!isalnum(c) && c != '_') {
! 461: buffer[i]='\0';
! 462: return token_of_matched_keyword(buffer);
! 463: }
! 464: buffer[i]=c;
! 465: c = GETC();
! 466: }
! 467: fprintf(stderr, "buffer overflow!\n");
! 468: exit(1);
! 469: }
! 470:
! 471: /* return する前に一文字先読みしておく。 */
! 472: int lex()
! 473: {
! 474: int c_dash = 0;
! 475:
! 476: /* 空白をスキップする. */
! 477: while (isspace(c) && c != '\n') {
! 478: c = GETC();
! 479: }
! 480:
! 481: switch(c) {
! 482: case '(':
! 483: case ')':
! 484: case ',':
! 485: case '\n':
! 486: c_dash = c;
! 487: c = ' ';
! 488: return c_dash;
! 489: case EOF:
! 490: c = GETC();
! 491: return c_dash;
! 492: case '"': /* a quoted string! */
! 493: yylval.sym = lex_quoted_string();
! 494: return T_STRING;
! 495: default:
! 496: }
! 497:
! 498: if (isalpha(c)) { /* 識別子 */
! 499: return lex_keyword();
! 500: }
! 501:
! 502: /* 32bit 整数値 */
! 503: if (isdigit(c)){
! 504: yylval.d = lex_digit();
! 505: return T_INTEGER;
! 506: }
! 507: if (c == '-') {
! 508: c = GETC();
! 509: while (isspace(c) && c != '\n') {
! 510: c = GETC();
! 511: }
! 512: if (isdigit(c)){
! 513: yylval.d = - lex_digit();
! 514: return T_INTEGER;
! 515: }
! 516: return 0;
! 517: }
! 518:
! 519: c = GETC();
! 520: return 0;
! 521: }
! 522:
! 523: static char *mygetc_line;
! 524: static int mygetc_counter;
! 525: static int mygetc_counter_max;
! 526:
! 527: int mygetc()
! 528: {
! 529: if (mygetc_counter <= mygetc_counter_max) {
! 530: return mygetc_line[mygetc_counter++];
! 531: }
! 532: return 0;
! 533: }
! 534:
! 535: int setmode_mygetc(char *s, int len)
! 536: {
! 537: mygetc_counter=0;
! 538: mygetc_counter_max=len;
! 539: mygetc_line=s;
! 540: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>