OpenXM/src/ox_math/parse.c - annotate

Return to parse.c CVS log
Up to [local] / OpenXM / src / ox_math
Annotation of OpenXM/src/ox_math/parse.c, Revision 1.1

1.1     ! ohara       1: /* -*- mode: C; coding: euc-japan -*- */
        !             2: /* $OpenXM$ */
        !             3: /* $Id: parse.c,v 1.4 1999/10/14 10:19:04 ohara Exp ohara $ */
        !             4: /* OX expression, CMO expression パーサ */
        !             5:
        !             6: /* cmo_addrev がセットされていれば、
        !             7:    厳密には CMO expression でないもの, 例えば
        !             8:    (CMO_STRING, "abcdef") も CMO に変換される. */
        !             9:
        !            10: #include <stdio.h>
        !            11: #include <stdlib.h>
        !            12: #include <string.h>
        !            13: #include <sys/param.h>
        !            14: #include "oxtag.h"
        !            15: #include "ox.h"
        !            16: #include "parse.h"
        !            17:
        !            18: /* --- 構文解析部 --- */
        !            19: /* parse.c, lex.c では, Lisp 表現された CMO 文字列を読み込み,
        !            20:    バイト列を出力する.  中間表現として、cmo *を利用する.
        !            21:    parse() はトークンの列から cmo *(の指す構造体)を生成する.
        !            22: */
        !            23:
        !            24: /* 重要なことはパーサ(の各サブルーチン)は
        !            25:    常にトークンをひとつ先読みしていると言うことである.
        !            26: */
        !            27:
        !            28: /* 現在読み込み中のトークンを表す. */
        !            29: static int token = 0;
        !            30:
        !            31: /* yylval は lex() によってセットされる. */
        !            32: extern lex_value_t yylval;
        !            33:
        !            34: int cmo_addrev = 1;  /* CMO の省略記法を許すか否かのフラグ */
        !            35:
        !            36: /* 関数の宣言 */
        !            37: static int parse_error(char *s);
        !            38: static int parse_lf();
        !            39: static int parse_right_parenthesis();
        !            40: static int parse_left_parenthesis();
        !            41: static int parse_comma();
        !            42: static int parse_integer();
        !            43: static cmo *parse_cmo_null();
        !            44: static cmo *parse_cmo_zz();
        !            45: static cmo *parse_cmo_list();
        !            46: static cmo *parse_cmo_int32();
        !            47: static cmo *parse_cmo_string();
        !            48: static cmo *parse_cmo();
        !            49: static char *parse_string();
        !            50: static int parse_sm();
        !            51: static ox* parse_ox();
        !            52: static ox* parse_ox_command();
        !            53: static ox* parse_ox_data();
        !            54:
        !            55: static int parse_error(char *s)
        !            56: {
        !            57:     if (s != NULL) {
        !            58:         fprintf(stderr, "%s\n", s);
        !            59:     }else {
        !            60:         fprintf(stderr, "syntax error.\n");
        !            61:     }
        !            62:     exit(1);  /* 例外処理.  本当は longjmp すべきであろう. */
        !            63: }
        !            64:
        !            65: #define MIN_T_CMO      (T_MAGIC + 0)
        !            66: #define MAX_T_CMO      (T_MAGIC + 256)
        !            67:
        !            68: static int is_t_cmo(int token)
        !            69: {
        !            70:     return (token >= MIN_T_CMO && token < MAX_T_CMO) || token == T_CMO_ERROR2;
        !            71: }
        !            72:
        !            73: #define MIN_T_SM      (T_MAGIC + 256)
        !            74: #define MAX_T_SM      (T_MAGIC + 1100)
        !            75:
        !            76: static int is_t_sm(int token)
        !            77: {
        !            78:     return token >= MIN_T_SM && token < MAX_T_SM;
        !            79: }
        !            80:
        !            81: #define MIN_T_OX      (T_MAGIC + 512)
        !            82: #define MAX_T_OX      (T_MAGIC + 600)
        !            83:
        !            84: static int is_t_ox(int token)
        !            85: {
        !            86:     return token >= MIN_T_OX && token < MAX_T_OX;
        !            87: }
        !            88:
        !            89: /* この部分は書き換え予定. */
        !            90: cmo *parse()
        !            91: {
        !            92:     cmo *m;
        !            93:
        !            94:     do{
        !            95:         token = lex();
        !            96:     }while (token == '\n');
        !            97:
        !            98:     if (token == '(') {
        !            99:         token = lex();
        !           100:         if (is_t_cmo(token)) {
        !           101:             m = parse_cmo();
        !           102:         }else if(is_t_ox(token)) {
        !           103:             m = parse_ox();
        !           104:         }else {
        !           105:             parse_error("syntax error: unknown keyword.");
        !           106:             return NULL;
        !           107:         }
        !           108:         parse_lf();
        !           109:         return m;
        !           110:     }
        !           111:     return NULL;
        !           112: }
        !           113:
        !           114: /* トークンを先読みしない(重要). */
        !           115: static int parse_lf()
        !           116: {
        !           117:     if (token != '\n') {
        !           118:         parse_error("syntax error: not new line.");
        !           119:     }
        !           120:     return 0;
        !           121: }
        !           122:
        !           123:
        !           124: static ox* parse_ox()
        !           125: {
        !           126:     ox *m = NULL;
        !           127:
        !           128:     switch(token) {
        !           129:     case T_OX_COMMAND:
        !           130:         token = lex();
        !           131:         m = parse_ox_command();
        !           132:         break;
        !           133:     case T_OX_DATA:
        !           134:         token = lex();
        !           135:         m = parse_ox_data();
        !           136:         break;
        !           137:     default:
        !           138:         parse_error("syntax error: invalid ox_tag.");
        !           139:     }
        !           140:     return m;
        !           141: }
        !           142:
        !           143: static ox* parse_ox_data()
        !           144: {
        !           145:     ox* m;
        !           146:
        !           147:     parse_comma();
        !           148:     parse_left_parenthesis();
        !           149:     m = (ox *)new_ox_data(parse_cmo());
        !           150:     parse_right_parenthesis();
        !           151:     return m;
        !           152: }
        !           153:
        !           154: static int parse_sm()
        !           155: {
        !           156:     int sm_code;
        !           157:     if (!is_t_sm(token)) {
        !           158:         parse_error("syntax error: invalid sm code.");
        !           159:     }
        !           160:     sm_code = token - T_MAGIC;
        !           161:     token = lex();
        !           162:     return sm_code;
        !           163: }
        !           164:
        !           165:
        !           166: static ox* parse_ox_command()
        !           167: {
        !           168:     ox* m;
        !           169:
        !           170:     parse_comma();
        !           171:     m = (ox *)new_ox_command(parse_sm());
        !           172:     parse_right_parenthesis();
        !           173:     return m;
        !           174: }
        !           175:
        !           176: /* 正しい入力ならば, parse_cmo を呼ぶ時点で, token には
        !           177:    T_CMO_xxx, T_OX_xxx のいずれかがセットされている. */
        !           178: static cmo *parse_cmo()
        !           179: {
        !           180:     cmo *m = NULL;
        !           181:
        !           182:     switch(token) {
        !           183:     case T_CMO_NULL:
        !           184:         token = lex();
        !           185:         m = parse_cmo_null();
        !           186:         break;
        !           187:     case T_CMO_INT32:
        !           188:         token = lex();
        !           189:         m = parse_cmo_int32();
        !           190:         break;
        !           191:     case T_CMO_STRING:
        !           192:         token = lex();
        !           193:         m = parse_cmo_string();
        !           194:         break;
        !           195:     case T_CMO_ZZ:
        !           196:         token = lex();
        !           197:         m = parse_cmo_zz();
        !           198:         break;
        !           199:     case T_CMO_LIST:
        !           200:         token = lex();
        !           201:         m = parse_cmo_list();
        !           202:         break;
        !           203:     default:
        !           204:         parse_error("syntax error: invalid cmo_tag.");
        !           205:     }
        !           206:     return m;
        !           207: }
        !           208:
        !           209: static int parse_right_parenthesis()
        !           210: {
        !           211:     if (token != ')') {
        !           212:         parse_error("syntax error: no right parenthesis exists.");
        !           213:         return 0;
        !           214:     }
        !           215:     token = lex();
        !           216: }
        !           217:
        !           218: static int parse_left_parenthesis()
        !           219: {
        !           220:     if (token != '(') {
        !           221:         parse_error("syntax error: no left parenthesis exists.");
        !           222:         return 0;
        !           223:     }
        !           224:     token = lex();
        !           225: }
        !           226:
        !           227: static int parse_comma()
        !           228: {
        !           229:     if (token != ',') {
        !           230:         parse_error("syntax error: no comma exists.");
        !           231:         return 0;
        !           232:     }
        !           233:     token = lex();
        !           234:     return 1;
        !           235: }
        !           236:
        !           237: /* cmo_zz の内部を直接いじる. */
        !           238: static cmo *parse_cmo_zz()
        !           239: {
        !           240:     int length;
        !           241:     int i=0;
        !           242:     cmo_zz *m= NULL;
        !           243:
        !           244:     parse_comma();
        !           245:     length = parse_integer();
        !           246:     if (token == ',') {
        !           247:         m = new_cmo_zz_size(length);
        !           248:
        !           249:         length = abs(length);
        !           250:         for(i=0; i<length; i++) {
        !           251:             parse_comma();
        !           252:             m->mpz->_mp_d[i] = parse_integer();
        !           253:         }
        !           254:     }else if (cmo_addrev) {
        !           255:         m = new_cmo_zz_set_si(length);
        !           256:     }else {
        !           257:         parse_error("syntax error: invalid keyword.");
        !           258:     }
        !           259:
        !           260:     parse_right_parenthesis();
        !           261:     return (cmo *)m;
        !           262: }
        !           263:
        !           264: static cmo *parse_cmo_list()
        !           265: {
        !           266:     int length=0;
        !           267:     int i=0;
        !           268:     cmo_list *m;
        !           269:     cmo *newcmo;
        !           270:
        !           271:     parse_comma();
        !           272:
        !           273:     length = parse_integer();
        !           274:     m = new_cmo_list();
        !           275:     if (length<0) {
        !           276:         parse_error("semantics error: a list has negative length.");
        !           277:     }
        !           278:
        !           279:     for(i=0; i<length; i++) {
        !           280:         parse_comma();
        !           281:         parse_left_parenthesis();
        !           282:         newcmo = parse_cmo();
        !           283:         append_cmo_list(m, newcmo);
        !           284:     }
        !           285:     parse_right_parenthesis();
        !           286:     return (cmo *)m;
        !           287: }
        !           288:
        !           289: static int parse_integer()
        !           290: {
        !           291:     int val;
        !           292:     if (token != T_INTEGER) {
        !           293:         parse_error("syntax error: in parse_integer().");
        !           294:     }
        !           295:     val = yylval.d;
        !           296:     token = lex();
        !           297:     return val;
        !           298: }
        !           299:
        !           300: static char *parse_string()
        !           301: {
        !           302:     char *s;
        !           303:     if (token != T_STRING) {
        !           304:         parse_error("syntax error: in parse_string().");
        !           305:     }
        !           306:     s = yylval.sym;
        !           307:     token = lex();
        !           308:     return s;
        !           309: }
        !           310:
        !           311: static cmo *parse_cmo_null()
        !           312: {
        !           313:     cmo_null *m = new_cmo_null();
        !           314:     parse_right_parenthesis();
        !           315:     return (cmo *)m;
        !           316: }
        !           317:
        !           318: static cmo *parse_cmo_int32()
        !           319: {
        !           320:     cmo_int32 *m;
        !           321:     int i;
        !           322:
        !           323:     parse_comma();
        !           324:     i = parse_integer();
        !           325:     m = new_cmo_int32(i);
        !           326:     parse_right_parenthesis();
        !           327:     return (cmo *)m;
        !           328: }
        !           329:
        !           330: static cmo *parse_cmo_string()
        !           331: {
        !           332:     cmo_string *m;
        !           333:     int length;
        !           334:     char *s;
        !           335:
        !           336:     parse_comma();
        !           337:     if (token == T_INTEGER) {
        !           338:         length = parse_integer();
        !           339:         parse_comma();
        !           340:         s = parse_string();
        !           341:         if (length != strlen(s)) {
        !           342:           fprintf(stderr, "warning: strlen unmatched.\n");
        !           343:         }
        !           344:     }else if (cmo_addrev) {
        !           345:         s = parse_string();
        !           346:     }else {
        !           347:         parse_error("syntax error: invalid keyword.");
        !           348:     }
        !           349:     m = new_cmo_string(s);
        !           350:     parse_right_parenthesis();
        !           351:     return (cmo *)m;
        !           352: }
        !           353:
        !           354: /* --- 字句解析部 --- */
        !           355:
        !           356: lex_value_t yylval;
        !           357:
        !           358: /* lexical analyzer で読み飛ばされる文字なら何を初期値にしてもよい */
        !           359: static int c = ' ';
        !           360:
        !           361: /* 一文字読み込む関数 */
        !           362: static int (*GETC)() = getchar;
        !           363:
        !           364: int setgetc(int (*foo)())
        !           365: {
        !           366:     GETC = foo;
        !           367: }
        !           368:
        !           369: int resetgetc()
        !           370: {
        !           371:     GETC = getchar;
        !           372: }
        !           373:
        !           374: #define SIZE_BUFFER  1024
        !           375: static char buffer[SIZE_BUFFER];
        !           376: static char* PARS = "(),\n";
        !           377:
        !           378: /* 桁溢れの場合の対策はない */
        !           379: static int lex_digit()
        !           380: {
        !           381:     int d = 0;
        !           382:     do {
        !           383:         d = 10*d + (c - '0');
        !           384:         c = GETC();
        !           385:     } while(isdigit(c));
        !           386:     return d;
        !           387: }
        !           388:
        !           389: /* バッファあふれした場合の対策をちゃんと考えるべき */
        !           390: static char *lex_quoted_string()
        !           391: {
        !           392:     int i;
        !           393:     char c0 = ' ';
        !           394:     char *s = NULL;
        !           395:     for (i=0; i<SIZE_BUFFER; i++) {
        !           396:         c = GETC();
        !           397:         if(c == '"') {
        !           398:             s = malloc(i+1);
        !           399:             buffer[i]='\0';
        !           400:             strcpy(s, buffer);
        !           401:
        !           402:             c = GETC();
        !           403:             return s;
        !           404:         }else if (c == '\\') {
        !           405:             c0 = c;
        !           406:             c = GETC();
        !           407:             if (c != '"') {
        !           408:                 buffer[i++] = c0;
        !           409:             }
        !           410:         }
        !           411:         buffer[i]=c;
        !           412:     }
        !           413:     fprintf(stderr, "buffer overflow!\n");
        !           414:     exit(1);
        !           415:     /* return NULL; */
        !           416: }
        !           417:
        !           418: /* キーワードを増やしたあと修正するのを忘れてはいけない */
        !           419: #undef NUM_OF_KEYWORDS  18
        !           420:
        !           421: static char *keywords[] = {
        !           422:     "CMO_INT32", "CMO_STRING", "CMO_LIST", "CMO_ZZ", "CMO_NULL",
        !           423:     "CMO_ZERO", "CMO_DATUM",
        !           424:     "SM_popCMO", "SM_popString", "SM_mathcap", "SM_pops",
        !           425:     "SM_executeStringByLocalParser", "SM_executeFunction",
        !           426:     "SM_setMathcap",
        !           427:     "SM_control_kill", "SM_control_reset_connection",
        !           428:     "OX_COMMAND", "OX_DATA",
        !           429:     NULL  /* a gate keeper */
        !           430: };
        !           431:
        !           432: static int token_of_keyword[] = {
        !           433:     T_CMO_INT32, T_CMO_STRING, T_CMO_LIST, T_CMO_ZZ, T_CMO_NULL,
        !           434:     T_CMO_ZERO, T_CMO_DATUM,
        !           435:     T_SM_popCMO, T_SM_popString, T_SM_mathcap, T_SM_pops,
        !           436:     T_SM_executeStringByLocalParser, T_SM_executeFunction,
        !           437:     T_SM_setMathcap,
        !           438:     T_SM_control_kill, T_SM_control_reset_connection,
        !           439:     T_OX_COMMAND, T_OX_DATA,
        !           440:     0     /* dummy */
        !           441: };
        !           442:
        !           443: static int token_of_matched_keyword(char *key)
        !           444: {
        !           445:     int i;
        !           446:
        !           447:     for(i=0; keywords[i] != NULL; i++) {
        !           448:         if(strcmp(key, keywords[i])==0) {
        !           449:             return token_of_keyword[i];
        !           450:         }
        !           451:     }
        !           452:     fprintf(stderr, "lex error\n");
        !           453:     return 0;
        !           454: }
        !           455:
        !           456: static int lex_keyword()
        !           457: {
        !           458:     int i;
        !           459:     for (i=0; i<SIZE_BUFFER; i++) {
        !           460:         if (!isalnum(c) && c != '_') {
        !           461:             buffer[i]='\0';
        !           462:             return token_of_matched_keyword(buffer);
        !           463:         }
        !           464:         buffer[i]=c;
        !           465:         c = GETC();
        !           466:     }
        !           467:     fprintf(stderr, "buffer overflow!\n");
        !           468:     exit(1);
        !           469: }
        !           470:
        !           471: /* return する前に一文字先読みしておく。 */
        !           472: int lex()
        !           473: {
        !           474:     int c_dash = 0;
        !           475:
        !           476:     /* 空白をスキップする. */
        !           477:     while (isspace(c) && c != '\n') {
        !           478:         c = GETC();
        !           479:     }
        !           480:
        !           481:     switch(c) {
        !           482:     case '(':
        !           483:     case ')':
        !           484:     case ',':
        !           485:     case '\n':
        !           486:         c_dash = c;
        !           487:         c = ' ';
        !           488:         return c_dash;
        !           489:     case EOF:
        !           490:         c = GETC();
        !           491:         return c_dash;
        !           492:     case '"':      /* a quoted string! */
        !           493:         yylval.sym = lex_quoted_string();
        !           494:         return T_STRING;
        !           495:     default:
        !           496:     }
        !           497:
        !           498:     if (isalpha(c)) {    /* 識別子 */
        !           499:         return lex_keyword();
        !           500:     }
        !           501:
        !           502:     /* 32bit 整数値 */
        !           503:     if (isdigit(c)){
        !           504:         yylval.d = lex_digit();
        !           505:         return T_INTEGER;
        !           506:     }
        !           507:     if (c == '-') {
        !           508:         c = GETC();
        !           509:         while (isspace(c) && c != '\n') {
        !           510:             c = GETC();
        !           511:         }
        !           512:         if (isdigit(c)){
        !           513:             yylval.d = - lex_digit();
        !           514:             return T_INTEGER;
        !           515:         }
        !           516:         return 0;
        !           517:     }
        !           518:
        !           519:     c = GETC();
        !           520:     return 0;
        !           521: }
        !           522:
        !           523: static char *mygetc_line;
        !           524: static int  mygetc_counter;
        !           525: static int  mygetc_counter_max;
        !           526:
        !           527: int mygetc()
        !           528: {
        !           529:     if (mygetc_counter <= mygetc_counter_max) {
        !           530:         return mygetc_line[mygetc_counter++];
        !           531:     }
        !           532:     return 0;
        !           533: }
        !           534:
        !           535: int setmode_mygetc(char *s, int len)
        !           536: {
        !           537:     mygetc_counter=0;
        !           538:     mygetc_counter_max=len;
        !           539:     mygetc_line=s;
        !           540: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>