OpenXM/src/ox_math/parse.c - annotate

Return to parse.c CVS log
Up to [local] / OpenXM / src / ox_math
Annotation of OpenXM/src/ox_math/parse.c, Revision 1.1.1.1

1.1       ohara       1: /* -*- mode: C; coding: euc-japan -*- */
                      2: /* $OpenXM$ */
                      3: /* $Id: parse.c,v 1.4 1999/10/14 10:19:04 ohara Exp ohara $ */
                      4: /* OX expression, CMO expression パーサ */
                      5:
                      6: /* cmo_addrev がセットされていれば、
                      7:    厳密には CMO expression でないもの, 例えば
                      8:    (CMO_STRING, "abcdef") も CMO に変換される. */
                      9:
                     10: #include <stdio.h>
                     11: #include <stdlib.h>
                     12: #include <string.h>
                     13: #include <sys/param.h>
                     14: #include "oxtag.h"
                     15: #include "ox.h"
                     16: #include "parse.h"
                     17:
                     18: /* --- 構文解析部 --- */
                     19: /* parse.c, lex.c では, Lisp 表現された CMO 文字列を読み込み,
                     20:    バイト列を出力する.  中間表現として、cmo *を利用する.
                     21:    parse() はトークンの列から cmo *(の指す構造体)を生成する.
                     22: */
                     23:
                     24: /* 重要なことはパーサ(の各サブルーチン)は
                     25:    常にトークンをひとつ先読みしていると言うことである.
                     26: */
                     27:
                     28: /* 現在読み込み中のトークンを表す. */
                     29: static int token = 0;
                     30:
                     31: /* yylval は lex() によってセットされる. */
                     32: extern lex_value_t yylval;
                     33:
                     34: int cmo_addrev = 1;  /* CMO の省略記法を許すか否かのフラグ */
                     35:
                     36: /* 関数の宣言 */
                     37: static int parse_error(char *s);
                     38: static int parse_lf();
                     39: static int parse_right_parenthesis();
                     40: static int parse_left_parenthesis();
                     41: static int parse_comma();
                     42: static int parse_integer();
                     43: static cmo *parse_cmo_null();
                     44: static cmo *parse_cmo_zz();
                     45: static cmo *parse_cmo_list();
                     46: static cmo *parse_cmo_int32();
                     47: static cmo *parse_cmo_string();
                     48: static cmo *parse_cmo();
                     49: static char *parse_string();
                     50: static int parse_sm();
                     51: static ox* parse_ox();
                     52: static ox* parse_ox_command();
                     53: static ox* parse_ox_data();
                     54:
                     55: static int parse_error(char *s)
                     56: {
                     57:     if (s != NULL) {
                     58:         fprintf(stderr, "%s\n", s);
                     59:     }else {
                     60:         fprintf(stderr, "syntax error.\n");
                     61:     }
                     62:     exit(1);  /* 例外処理.  本当は longjmp すべきであろう. */
                     63: }
                     64:
                     65: #define MIN_T_CMO      (T_MAGIC + 0)
                     66: #define MAX_T_CMO      (T_MAGIC + 256)
                     67:
                     68: static int is_t_cmo(int token)
                     69: {
                     70:     return (token >= MIN_T_CMO && token < MAX_T_CMO) || token == T_CMO_ERROR2;
                     71: }
                     72:
                     73: #define MIN_T_SM      (T_MAGIC + 256)
                     74: #define MAX_T_SM      (T_MAGIC + 1100)
                     75:
                     76: static int is_t_sm(int token)
                     77: {
                     78:     return token >= MIN_T_SM && token < MAX_T_SM;
                     79: }
                     80:
                     81: #define MIN_T_OX      (T_MAGIC + 512)
                     82: #define MAX_T_OX      (T_MAGIC + 600)
                     83:
                     84: static int is_t_ox(int token)
                     85: {
                     86:     return token >= MIN_T_OX && token < MAX_T_OX;
                     87: }
                     88:
                     89: /* この部分は書き換え予定. */
                     90: cmo *parse()
                     91: {
                     92:     cmo *m;
                     93:
                     94:     do{
                     95:         token = lex();
                     96:     }while (token == '\n');
                     97:
                     98:     if (token == '(') {
                     99:         token = lex();
                    100:         if (is_t_cmo(token)) {
                    101:             m = parse_cmo();
                    102:         }else if(is_t_ox(token)) {
                    103:             m = parse_ox();
                    104:         }else {
                    105:             parse_error("syntax error: unknown keyword.");
                    106:             return NULL;
                    107:         }
                    108:         parse_lf();
                    109:         return m;
                    110:     }
                    111:     return NULL;
                    112: }
                    113:
                    114: /* トークンを先読みしない(重要). */
                    115: static int parse_lf()
                    116: {
                    117:     if (token != '\n') {
                    118:         parse_error("syntax error: not new line.");
                    119:     }
                    120:     return 0;
                    121: }
                    122:
                    123:
                    124: static ox* parse_ox()
                    125: {
                    126:     ox *m = NULL;
                    127:
                    128:     switch(token) {
                    129:     case T_OX_COMMAND:
                    130:         token = lex();
                    131:         m = parse_ox_command();
                    132:         break;
                    133:     case T_OX_DATA:
                    134:         token = lex();
                    135:         m = parse_ox_data();
                    136:         break;
                    137:     default:
                    138:         parse_error("syntax error: invalid ox_tag.");
                    139:     }
                    140:     return m;
                    141: }
                    142:
                    143: static ox* parse_ox_data()
                    144: {
                    145:     ox* m;
                    146:
                    147:     parse_comma();
                    148:     parse_left_parenthesis();
                    149:     m = (ox *)new_ox_data(parse_cmo());
                    150:     parse_right_parenthesis();
                    151:     return m;
                    152: }
                    153:
                    154: static int parse_sm()
                    155: {
                    156:     int sm_code;
                    157:     if (!is_t_sm(token)) {
                    158:         parse_error("syntax error: invalid sm code.");
                    159:     }
                    160:     sm_code = token - T_MAGIC;
                    161:     token = lex();
                    162:     return sm_code;
                    163: }
                    164:
                    165:
                    166: static ox* parse_ox_command()
                    167: {
                    168:     ox* m;
                    169:
                    170:     parse_comma();
                    171:     m = (ox *)new_ox_command(parse_sm());
                    172:     parse_right_parenthesis();
                    173:     return m;
                    174: }
                    175:
                    176: /* 正しい入力ならば, parse_cmo を呼ぶ時点で, token には
                    177:    T_CMO_xxx, T_OX_xxx のいずれかがセットされている. */
                    178: static cmo *parse_cmo()
                    179: {
                    180:     cmo *m = NULL;
                    181:
                    182:     switch(token) {
                    183:     case T_CMO_NULL:
                    184:         token = lex();
                    185:         m = parse_cmo_null();
                    186:         break;
                    187:     case T_CMO_INT32:
                    188:         token = lex();
                    189:         m = parse_cmo_int32();
                    190:         break;
                    191:     case T_CMO_STRING:
                    192:         token = lex();
                    193:         m = parse_cmo_string();
                    194:         break;
                    195:     case T_CMO_ZZ:
                    196:         token = lex();
                    197:         m = parse_cmo_zz();
                    198:         break;
                    199:     case T_CMO_LIST:
                    200:         token = lex();
                    201:         m = parse_cmo_list();
                    202:         break;
                    203:     default:
                    204:         parse_error("syntax error: invalid cmo_tag.");
                    205:     }
                    206:     return m;
                    207: }
                    208:
                    209: static int parse_right_parenthesis()
                    210: {
                    211:     if (token != ')') {
                    212:         parse_error("syntax error: no right parenthesis exists.");
                    213:         return 0;
                    214:     }
                    215:     token = lex();
                    216: }
                    217:
                    218: static int parse_left_parenthesis()
                    219: {
                    220:     if (token != '(') {
                    221:         parse_error("syntax error: no left parenthesis exists.");
                    222:         return 0;
                    223:     }
                    224:     token = lex();
                    225: }
                    226:
                    227: static int parse_comma()
                    228: {
                    229:     if (token != ',') {
                    230:         parse_error("syntax error: no comma exists.");
                    231:         return 0;
                    232:     }
                    233:     token = lex();
                    234:     return 1;
                    235: }
                    236:
                    237: /* cmo_zz の内部を直接いじる. */
                    238: static cmo *parse_cmo_zz()
                    239: {
                    240:     int length;
                    241:     int i=0;
                    242:     cmo_zz *m= NULL;
                    243:
                    244:     parse_comma();
                    245:     length = parse_integer();
                    246:     if (token == ',') {
                    247:         m = new_cmo_zz_size(length);
                    248:
                    249:         length = abs(length);
                    250:         for(i=0; i<length; i++) {
                    251:             parse_comma();
                    252:             m->mpz->_mp_d[i] = parse_integer();
                    253:         }
                    254:     }else if (cmo_addrev) {
                    255:         m = new_cmo_zz_set_si(length);
                    256:     }else {
                    257:         parse_error("syntax error: invalid keyword.");
                    258:     }
                    259:
                    260:     parse_right_parenthesis();
                    261:     return (cmo *)m;
                    262: }
                    263:
                    264: static cmo *parse_cmo_list()
                    265: {
                    266:     int length=0;
                    267:     int i=0;
                    268:     cmo_list *m;
                    269:     cmo *newcmo;
                    270:
                    271:     parse_comma();
                    272:
                    273:     length = parse_integer();
                    274:     m = new_cmo_list();
                    275:     if (length<0) {
                    276:         parse_error("semantics error: a list has negative length.");
                    277:     }
                    278:
                    279:     for(i=0; i<length; i++) {
                    280:         parse_comma();
                    281:         parse_left_parenthesis();
                    282:         newcmo = parse_cmo();
                    283:         append_cmo_list(m, newcmo);
                    284:     }
                    285:     parse_right_parenthesis();
                    286:     return (cmo *)m;
                    287: }
                    288:
                    289: static int parse_integer()
                    290: {
                    291:     int val;
                    292:     if (token != T_INTEGER) {
                    293:         parse_error("syntax error: in parse_integer().");
                    294:     }
                    295:     val = yylval.d;
                    296:     token = lex();
                    297:     return val;
                    298: }
                    299:
                    300: static char *parse_string()
                    301: {
                    302:     char *s;
                    303:     if (token != T_STRING) {
                    304:         parse_error("syntax error: in parse_string().");
                    305:     }
                    306:     s = yylval.sym;
                    307:     token = lex();
                    308:     return s;
                    309: }
                    310:
                    311: static cmo *parse_cmo_null()
                    312: {
                    313:     cmo_null *m = new_cmo_null();
                    314:     parse_right_parenthesis();
                    315:     return (cmo *)m;
                    316: }
                    317:
                    318: static cmo *parse_cmo_int32()
                    319: {
                    320:     cmo_int32 *m;
                    321:     int i;
                    322:
                    323:     parse_comma();
                    324:     i = parse_integer();
                    325:     m = new_cmo_int32(i);
                    326:     parse_right_parenthesis();
                    327:     return (cmo *)m;
                    328: }
                    329:
                    330: static cmo *parse_cmo_string()
                    331: {
                    332:     cmo_string *m;
                    333:     int length;
                    334:     char *s;
                    335:
                    336:     parse_comma();
                    337:     if (token == T_INTEGER) {
                    338:         length = parse_integer();
                    339:         parse_comma();
                    340:         s = parse_string();
                    341:         if (length != strlen(s)) {
                    342:           fprintf(stderr, "warning: strlen unmatched.\n");
                    343:         }
                    344:     }else if (cmo_addrev) {
                    345:         s = parse_string();
                    346:     }else {
                    347:         parse_error("syntax error: invalid keyword.");
                    348:     }
                    349:     m = new_cmo_string(s);
                    350:     parse_right_parenthesis();
                    351:     return (cmo *)m;
                    352: }
                    353:
                    354: /* --- 字句解析部 --- */
                    355:
                    356: lex_value_t yylval;
                    357:
                    358: /* lexical analyzer で読み飛ばされる文字なら何を初期値にしてもよい */
                    359: static int c = ' ';
                    360:
                    361: /* 一文字読み込む関数 */
                    362: static int (*GETC)() = getchar;
                    363:
                    364: int setgetc(int (*foo)())
                    365: {
                    366:     GETC = foo;
                    367: }
                    368:
                    369: int resetgetc()
                    370: {
                    371:     GETC = getchar;
                    372: }
                    373:
                    374: #define SIZE_BUFFER  1024
                    375: static char buffer[SIZE_BUFFER];
                    376: static char* PARS = "(),\n";
                    377:
                    378: /* 桁溢れの場合の対策はない */
                    379: static int lex_digit()
                    380: {
                    381:     int d = 0;
                    382:     do {
                    383:         d = 10*d + (c - '0');
                    384:         c = GETC();
                    385:     } while(isdigit(c));
                    386:     return d;
                    387: }
                    388:
                    389: /* バッファあふれした場合の対策をちゃんと考えるべき */
                    390: static char *lex_quoted_string()
                    391: {
                    392:     int i;
                    393:     char c0 = ' ';
                    394:     char *s = NULL;
                    395:     for (i=0; i<SIZE_BUFFER; i++) {
                    396:         c = GETC();
                    397:         if(c == '"') {
                    398:             s = malloc(i+1);
                    399:             buffer[i]='\0';
                    400:             strcpy(s, buffer);
                    401:
                    402:             c = GETC();
                    403:             return s;
                    404:         }else if (c == '\\') {
                    405:             c0 = c;
                    406:             c = GETC();
                    407:             if (c != '"') {
                    408:                 buffer[i++] = c0;
                    409:             }
                    410:         }
                    411:         buffer[i]=c;
                    412:     }
                    413:     fprintf(stderr, "buffer overflow!\n");
                    414:     exit(1);
                    415:     /* return NULL; */
                    416: }
                    417:
                    418: /* キーワードを増やしたあと修正するのを忘れてはいけない */
                    419: #undef NUM_OF_KEYWORDS  18
                    420:
                    421: static char *keywords[] = {
                    422:     "CMO_INT32", "CMO_STRING", "CMO_LIST", "CMO_ZZ", "CMO_NULL",
                    423:     "CMO_ZERO", "CMO_DATUM",
                    424:     "SM_popCMO", "SM_popString", "SM_mathcap", "SM_pops",
                    425:     "SM_executeStringByLocalParser", "SM_executeFunction",
                    426:     "SM_setMathcap",
                    427:     "SM_control_kill", "SM_control_reset_connection",
                    428:     "OX_COMMAND", "OX_DATA",
                    429:     NULL  /* a gate keeper */
                    430: };
                    431:
                    432: static int token_of_keyword[] = {
                    433:     T_CMO_INT32, T_CMO_STRING, T_CMO_LIST, T_CMO_ZZ, T_CMO_NULL,
                    434:     T_CMO_ZERO, T_CMO_DATUM,
                    435:     T_SM_popCMO, T_SM_popString, T_SM_mathcap, T_SM_pops,
                    436:     T_SM_executeStringByLocalParser, T_SM_executeFunction,
                    437:     T_SM_setMathcap,
                    438:     T_SM_control_kill, T_SM_control_reset_connection,
                    439:     T_OX_COMMAND, T_OX_DATA,
                    440:     0     /* dummy */
                    441: };
                    442:
                    443: static int token_of_matched_keyword(char *key)
                    444: {
                    445:     int i;
                    446:
                    447:     for(i=0; keywords[i] != NULL; i++) {
                    448:         if(strcmp(key, keywords[i])==0) {
                    449:             return token_of_keyword[i];
                    450:         }
                    451:     }
                    452:     fprintf(stderr, "lex error\n");
                    453:     return 0;
                    454: }
                    455:
                    456: static int lex_keyword()
                    457: {
                    458:     int i;
                    459:     for (i=0; i<SIZE_BUFFER; i++) {
                    460:         if (!isalnum(c) && c != '_') {
                    461:             buffer[i]='\0';
                    462:             return token_of_matched_keyword(buffer);
                    463:         }
                    464:         buffer[i]=c;
                    465:         c = GETC();
                    466:     }
                    467:     fprintf(stderr, "buffer overflow!\n");
                    468:     exit(1);
                    469: }
                    470:
                    471: /* return する前に一文字先読みしておく。 */
                    472: int lex()
                    473: {
                    474:     int c_dash = 0;
                    475:
                    476:     /* 空白をスキップする. */
                    477:     while (isspace(c) && c != '\n') {
                    478:         c = GETC();
                    479:     }
                    480:
                    481:     switch(c) {
                    482:     case '(':
                    483:     case ')':
                    484:     case ',':
                    485:     case '\n':
                    486:         c_dash = c;
                    487:         c = ' ';
                    488:         return c_dash;
                    489:     case EOF:
                    490:         c = GETC();
                    491:         return c_dash;
                    492:     case '"':      /* a quoted string! */
                    493:         yylval.sym = lex_quoted_string();
                    494:         return T_STRING;
                    495:     default:
                    496:     }
                    497:
                    498:     if (isalpha(c)) {    /* 識別子 */
                    499:         return lex_keyword();
                    500:     }
                    501:
                    502:     /* 32bit 整数値 */
                    503:     if (isdigit(c)){
                    504:         yylval.d = lex_digit();
                    505:         return T_INTEGER;
                    506:     }
                    507:     if (c == '-') {
                    508:         c = GETC();
                    509:         while (isspace(c) && c != '\n') {
                    510:             c = GETC();
                    511:         }
                    512:         if (isdigit(c)){
                    513:             yylval.d = - lex_digit();
                    514:             return T_INTEGER;
                    515:         }
                    516:         return 0;
                    517:     }
                    518:
                    519:     c = GETC();
                    520:     return 0;
                    521: }
                    522:
                    523: static char *mygetc_line;
                    524: static int  mygetc_counter;
                    525: static int  mygetc_counter_max;
                    526:
                    527: int mygetc()
                    528: {
                    529:     if (mygetc_counter <= mygetc_counter_max) {
                    530:         return mygetc_line[mygetc_counter++];
                    531:     }
                    532:     return 0;
                    533: }
                    534:
                    535: int setmode_mygetc(char *s, int len)
                    536: {
                    537:     mygetc_counter=0;
                    538:     mygetc_counter_max=len;
                    539:     mygetc_line=s;
                    540: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>