OpenXM/src/ox_math/parse.c - annotate

Return to parse.c CVS log
Up to [local] / OpenXM / src / ox_math
Annotation of OpenXM/src/ox_math/parse.c, Revision 1.2

1.1       ohara       1: /* -*- mode: C; coding: euc-japan -*- */
                      2: /* $OpenXM$ */
1.2     ! ohara       3: /* $Id: parse.c,v 1.6 1999/11/01 17:58:38 ohara Exp $ */
1.1       ohara       4: /* OX expression, CMO expression パーサ */
                      5:
                      6: #include <stdio.h>
                      7: #include <stdlib.h>
                      8: #include <string.h>
                      9: #include <sys/param.h>
1.2     ! ohara      10: #include <setjmp.h>
1.1       ohara      11: #include "oxtag.h"
                     12: #include "ox.h"
                     13: #include "parse.h"
                     14:
                     15: /* --- 構文解析部 --- */
1.2     ! ohara      16: /* (重要)セマンティックスについての注意.
        !            17:    CMO_LIST, CMO_STRING は、あらかじめ与えられた要素の個数を無視する.
        !            18:    CMO_MONOMIAL32 は無視しない. (つまりおかしいときは構文エラーになる)
        !            19: */
        !            20:
        !            21:
1.1       ohara      22: /* parse.c, lex.c では, Lisp 表現された CMO 文字列を読み込み,
                     23:    バイト列を出力する.  中間表現として、cmo *を利用する.
                     24:    parse() はトークンの列から cmo *(の指す構造体)を生成する.
                     25: */
                     26:
                     27: /* 重要なことはパーサ(の各サブルーチン)は
                     28:    常にトークンをひとつ先読みしていると言うことである.
                     29: */
                     30:
                     31: /* 現在読み込み中のトークンを表す. */
                     32: static int token = 0;
                     33:
1.2     ! ohara      34: /* トークンの属性値. yylval は lex() によってセットされる. */
        !            35: static union{
        !            36:     int   d;
        !            37:     char* sym;
        !            38: } yylval;
        !            39:
        !            40: /* pflag_cmo_addrev がセットされていれば、厳密には CMO expression では
        !            41:    ないもの, 例えば (CMO_STRING, "hello") も CMO に変換される. */
1.1       ohara      42:
1.2     ! ohara      43: static int pflag_cmo_addrev = 1;  /* CMO の省略記法を許すか否かのフラグ */
1.1       ohara      44:
                     45: /* 関数の宣言 */
                     46: static int parse_error(char *s);
                     47: static int parse_lf();
                     48: static int parse_right_parenthesis();
                     49: static int parse_left_parenthesis();
                     50: static int parse_comma();
                     51: static int parse_integer();
                     52: static cmo *parse_cmo_null();
1.2     ! ohara      53: static cmo *parse_cmo_int32();
        !            54: static cmo *parse_cmo_string();
        !            55: static cmo *parse_cmo_mathcap();
        !            56: static cmo *parse_cmo_dms_generic();
        !            57: static cmo *parse_cmo_ring_by_name();
        !            58: static cmo *parse_cmo_error2();
        !            59: static cmo *parse_cmo_zero();
1.1       ohara      60: static cmo *parse_cmo_zz();
                     61: static cmo *parse_cmo_list();
                     62: static cmo *parse_cmo();
                     63: static char *parse_string();
                     64: static int parse_sm();
                     65: static ox* parse_ox();
                     66: static ox* parse_ox_command();
                     67: static ox* parse_ox_data();
                     68:
                     69: static int is_t_cmo(int token)
                     70: {
1.2     ! ohara      71:     return (token >= MIN_T_CMO && token < MAX_T_CMO) || token == TOKEN(CMO_ERROR2);
1.1       ohara      72: }
                     73:
                     74: static int is_t_sm(int token)
                     75: {
                     76:     return token >= MIN_T_SM && token < MAX_T_SM;
                     77: }
                     78:
                     79: static int is_t_ox(int token)
                     80: {
                     81:     return token >= MIN_T_OX && token < MAX_T_OX;
                     82: }
                     83:
1.2     ! ohara      84: static jmp_buf env_parse;
        !            85:
        !            86: /* 構文解析に失敗したことを意味する. */
        !            87: static int parse_error(char *s)
        !            88: {
        !            89:        fprintf(stderr, "%s\n", s);
        !            90:        longjmp(env_parse, 1);
        !            91: }
        !            92:
1.1       ohara      93: /* この部分は書き換え予定. */
                     94: cmo *parse()
                     95: {
                     96:     cmo *m;
                     97:
1.2     ! ohara      98:        if (setjmp(env_parse) != 0) {
        !            99:                return NULL; /* 構文解析に失敗したら NULL を返す. */
        !           100:        }
        !           101:
1.1       ohara     102:     do{
                    103:         token = lex();
                    104:     }while (token == '\n');
                    105:
                    106:     if (token == '(') {
                    107:         token = lex();
                    108:         if (is_t_cmo(token)) {
                    109:             m = parse_cmo();
                    110:         }else if(is_t_ox(token)) {
                    111:             m = parse_ox();
                    112:         }else {
1.2     ! ohara     113:             parse_error("syntax error: unknown symbol.");
1.1       ohara     114:         }
                    115:         parse_lf();
                    116:         return m;
                    117:     }
                    118:     return NULL;
                    119: }
                    120:
                    121: /* トークンを先読みしない(重要). */
                    122: static int parse_lf()
                    123: {
                    124:     if (token != '\n') {
                    125:         parse_error("syntax error: not new line.");
                    126:     }
                    127:     return 0;
                    128: }
                    129:
                    130:
                    131: static ox* parse_ox()
                    132: {
                    133:     ox *m = NULL;
                    134:
                    135:     switch(token) {
1.2     ! ohara     136:     case TOKEN(OX_COMMAND):
1.1       ohara     137:         token = lex();
                    138:         m = parse_ox_command();
                    139:         break;
1.2     ! ohara     140:     case TOKEN(OX_DATA):
1.1       ohara     141:         token = lex();
                    142:         m = parse_ox_data();
                    143:         break;
                    144:     default:
                    145:         parse_error("syntax error: invalid ox_tag.");
                    146:     }
                    147:     return m;
                    148: }
                    149:
                    150: static ox* parse_ox_data()
                    151: {
                    152:     ox* m;
                    153:
                    154:     parse_comma();
                    155:     parse_left_parenthesis();
                    156:     m = (ox *)new_ox_data(parse_cmo());
                    157:     parse_right_parenthesis();
                    158:     return m;
                    159: }
                    160:
                    161: static int parse_sm()
                    162: {
                    163:     int sm_code;
                    164:     if (!is_t_sm(token)) {
                    165:         parse_error("syntax error: invalid sm code.");
                    166:     }
                    167:     sm_code = token - T_MAGIC;
                    168:     token = lex();
                    169:     return sm_code;
                    170: }
                    171:
                    172:
                    173: static ox* parse_ox_command()
                    174: {
                    175:     ox* m;
                    176:
                    177:     parse_comma();
                    178:     m = (ox *)new_ox_command(parse_sm());
                    179:     parse_right_parenthesis();
                    180:     return m;
                    181: }
                    182:
                    183: /* 正しい入力ならば, parse_cmo を呼ぶ時点で, token には
1.2     ! ohara     184:    TOKEN(CMO_xxx), TOKEN(OX_xxx) のいずれかがセットされている. */
1.1       ohara     185: static cmo *parse_cmo()
                    186: {
                    187:     cmo *m = NULL;
                    188:
                    189:     switch(token) {
1.2     ! ohara     190:     case TOKEN(CMO_NULL):
1.1       ohara     191:         token = lex();
                    192:         m = parse_cmo_null();
                    193:         break;
1.2     ! ohara     194:     case TOKEN(CMO_INT32):
1.1       ohara     195:         token = lex();
                    196:         m = parse_cmo_int32();
                    197:         break;
1.2     ! ohara     198:     case TOKEN(CMO_STRING):
1.1       ohara     199:         token = lex();
                    200:         m = parse_cmo_string();
                    201:         break;
1.2     ! ohara     202:     case TOKEN(CMO_MATHCAP):
        !           203:         token = lex();
        !           204:         m = parse_cmo_mathcap();
        !           205:         break;
        !           206:     case TOKEN(CMO_LIST):
        !           207:         token = lex();
        !           208:         m = parse_cmo_list();
        !           209:         break;
        !           210:     case TOKEN(CMO_MONOMIAL32):
        !           211:         token = lex();
        !           212:         m = parse_cmo_monomial32();
        !           213:         break;
        !           214:     case TOKEN(CMO_ZZ):
1.1       ohara     215:         token = lex();
                    216:         m = parse_cmo_zz();
                    217:         break;
1.2     ! ohara     218:     case TOKEN(CMO_ZERO):
        !           219:         token = lex();
        !           220:         m = parse_cmo_zero();
        !           221:         break;
        !           222:     case TOKEN(CMO_DMS_GENERIC):
1.1       ohara     223:         token = lex();
1.2     ! ohara     224:         m = parse_cmo_dms_generic();
        !           225:         break;
        !           226:     case TOKEN(CMO_RING_BY_NAME):
        !           227:         token = lex();
        !           228:         m = parse_cmo_ring_by_name();
        !           229:         break;
        !           230:     case TOKEN(CMO_ERROR2):
        !           231:         token = lex();
        !           232:         m = parse_cmo_error2();
1.1       ohara     233:         break;
                    234:     default:
                    235:         parse_error("syntax error: invalid cmo_tag.");
                    236:     }
                    237:     return m;
                    238: }
                    239:
1.2     ! ohara     240: static int parse_left_parenthesis()
1.1       ohara     241: {
1.2     ! ohara     242:     if (token != '(') {
        !           243:         parse_error("syntax error: no left parenthesis.");
1.1       ohara     244:     }
                    245:     token = lex();
                    246: }
                    247:
1.2     ! ohara     248: static int parse_right_parenthesis()
1.1       ohara     249: {
1.2     ! ohara     250:     if (token != ')') {
        !           251:         parse_error("syntax error: no right parenthesis.");
1.1       ohara     252:     }
                    253:     token = lex();
                    254: }
                    255:
                    256: static int parse_comma()
                    257: {
                    258:     if (token != ',') {
1.2     ! ohara     259:         parse_error("syntax error: no comma.");
1.1       ohara     260:     }
                    261:     token = lex();
                    262: }
                    263:
                    264: static int parse_integer()
                    265: {
                    266:     int val;
                    267:     if (token != T_INTEGER) {
1.2     ! ohara     268:         parse_error("syntax error: no integer.");
1.1       ohara     269:     }
                    270:     val = yylval.d;
                    271:     token = lex();
                    272:     return val;
                    273: }
                    274:
                    275: static char *parse_string()
                    276: {
                    277:     char *s;
                    278:     if (token != T_STRING) {
1.2     ! ohara     279:         parse_error("syntax error: no string.");
1.1       ohara     280:     }
                    281:     s = yylval.sym;
                    282:     token = lex();
                    283:     return s;
                    284: }
                    285:
                    286: static cmo *parse_cmo_null()
                    287: {
                    288:     parse_right_parenthesis();
1.2     ! ohara     289:     return (cmo *)new_cmo_null();
1.1       ohara     290: }
                    291:
                    292: static cmo *parse_cmo_int32()
                    293: {
                    294:     int i;
                    295:
                    296:     parse_comma();
                    297:     i = parse_integer();
                    298:     parse_right_parenthesis();
1.2     ! ohara     299:     return (cmo *)new_cmo_int32(i);
1.1       ohara     300: }
                    301:
                    302: static cmo *parse_cmo_string()
                    303: {
                    304:     cmo_string *m;
                    305:     char *s;
                    306:
                    307:     parse_comma();
                    308:     if (token == T_INTEGER) {
1.2     ! ohara     309:         parse_integer();
1.1       ohara     310:         parse_comma();
1.2     ! ohara     311:     }else if (!pflag_cmo_addrev) {
        !           312:         parse_error("syntax error: not a cmo string.");
        !           313:     }
        !           314:        s = parse_string();
        !           315:     m = new_cmo_string(s);
        !           316:     parse_right_parenthesis();
        !           317:     return (cmo *)m;
        !           318: }
        !           319:
        !           320: static cmo *parse_cmo_mathcap()
        !           321: {
        !           322:        cmo *ob;
        !           323:
        !           324:     parse_comma();
        !           325:        parse_left_parenthesis();
        !           326:        ob = parse_cmo();
        !           327:     parse_right_parenthesis();
        !           328:     return (cmo *)new_cmo_mathcap(ob);
        !           329: }
        !           330:
        !           331: static cmo *parse_cmo_list()
        !           332: {
        !           333:     int length=0;
        !           334:     int i=0;
        !           335:     cmo_list *m = new_cmo_list();
        !           336:     cmo *newcmo;
        !           337:
        !           338:        if (token == ',') {
        !           339:                parse_comma();
        !           340:
        !           341:                if (token == T_INTEGER) {
        !           342:                        parse_integer();
        !           343:                        parse_comma();
        !           344:                }else if (!pflag_cmo_addrev) {
        !           345:                        parse_error("syntax error: not a list.");
        !           346:                }
        !           347:
        !           348:                while(token == '(') {
        !           349:                        parse_left_parenthesis();
        !           350:                        newcmo = parse_cmo();
        !           351:                        append_cmo_list(m, newcmo);
        !           352:                        if (token != ',') {
        !           353:                                break;
        !           354:                        }
        !           355:                        parse_comma();
        !           356:                }
        !           357:        }else if (!pflag_cmo_addrev) {
        !           358:                parse_error("syntax error: not a list.");
        !           359:        }
        !           360:     parse_right_parenthesis();
        !           361:     return (cmo *)m;
        !           362: }
        !           363:
        !           364: static cmo *parse_cmo_monomial32()
        !           365: {
        !           366:        int size;
        !           367:        int *exps;
        !           368:        int i;
        !           369:        cmo_monomial32 *m;
        !           370:
        !           371:        parse_comma();
        !           372:        size = parse_integer();
        !           373:        if (size <= 0) {
        !           374:                parse_error("syntax error: invalid value.");
        !           375:        }
        !           376:        m = new_cmo_monomial32_size(size);
        !           377:
        !           378:        for(i=0; i<size; i++) {
        !           379:                parse_comma();
        !           380:                m->exps[i] = parse_integer();
        !           381:        }
        !           382:        parse_comma();
        !           383:        parse_left_parenthesis();
        !           384:        m->coef = parse_cmo();
        !           385:     /* 意味的チェックの必要あり */
        !           386:     parse_right_parenthesis();
        !           387:     return (cmo *)m;
        !           388: }
        !           389:
        !           390: /* cmo_zz の内部を直接いじる. */
        !           391: static cmo *parse_cmo_zz()
        !           392: {
        !           393:     int length;
        !           394:     int i=0;
        !           395:     cmo_zz *m= NULL;
        !           396:
        !           397:     parse_comma();
        !           398:     length = parse_integer();
        !           399:     if (token == ',') {
        !           400:         m = new_cmo_zz_size(length);
        !           401:
        !           402:         length = abs(length);
        !           403:         for(i=0; i<length; i++) {
        !           404:             parse_comma();
        !           405:             m->mpz->_mp_d[i] = parse_integer();
1.1       ohara     406:         }
1.2     ! ohara     407:     }else if (pflag_cmo_addrev) {
        !           408:         m = new_cmo_zz_set_si(length);
1.1       ohara     409:     }else {
1.2     ! ohara     410:         parse_error("syntax error: invalid symbol.");
1.1       ohara     411:     }
1.2     ! ohara     412:
1.1       ohara     413:     parse_right_parenthesis();
                    414:     return (cmo *)m;
                    415: }
                    416:
1.2     ! ohara     417: static cmo *parse_cmo_zero()
        !           418: {
        !           419:     parse_right_parenthesis();
        !           420:     return (cmo *)new_cmo_zero();
        !           421: }
        !           422:
        !           423: static cmo *parse_cmo_dms_generic()
        !           424: {
        !           425:     parse_right_parenthesis();
        !           426:     return (cmo *)new_cmo_dms_generic();
        !           427: }
        !           428:
        !           429: static cmo *parse_cmo_ring_by_name()
        !           430: {
        !           431:        cmo *ob;
        !           432:
        !           433:     parse_comma();
        !           434:        parse_left_parenthesis();
        !           435:        ob = parse_cmo();
        !           436:     /* 意味的チェックが必要(ob->tag == CMO_STRINGでなければいけない) */
        !           437:     parse_right_parenthesis();
        !           438:     return (cmo *)new_cmo_ring_by_name(ob);
        !           439: }
        !           440:
        !           441: static cmo *parse_cmo_error2()
        !           442: {
        !           443:        cmo *ob;
        !           444:
        !           445:     parse_comma();
        !           446:        parse_left_parenthesis();
        !           447:        ob = parse_cmo();
        !           448:     parse_right_parenthesis();
        !           449:     return (cmo *)new_cmo_error2(ob);
        !           450: }
        !           451:
1.1       ohara     452: /* --- 字句解析部 --- */
                    453:
                    454: /* lexical analyzer で読み飛ばされる文字なら何を初期値にしてもよい */
                    455: static int c = ' ';
                    456:
                    457: /* 一文字読み込む関数 */
                    458: static int (*GETC)() = getchar;
                    459:
                    460: int setgetc(int (*foo)())
                    461: {
                    462:     GETC = foo;
                    463: }
                    464:
                    465: int resetgetc()
                    466: {
                    467:     GETC = getchar;
                    468: }
                    469:
1.2     ! ohara     470: #define SIZE_BUFFER  8192
1.1       ohara     471: static char buffer[SIZE_BUFFER];
                    472:
                    473: /* 桁溢れの場合の対策はない */
                    474: static int lex_digit()
                    475: {
                    476:     int d = 0;
                    477:     do {
                    478:         d = 10*d + (c - '0');
                    479:         c = GETC();
                    480:     } while(isdigit(c));
                    481:     return d;
                    482: }
                    483:
                    484: /* バッファあふれした場合の対策をちゃんと考えるべき */
                    485: static char *lex_quoted_string()
                    486: {
                    487:     int i;
                    488:     char c0 = ' ';
                    489:     char *s = NULL;
                    490:     for (i=0; i<SIZE_BUFFER; i++) {
                    491:         c = GETC();
                    492:         if(c == '"') {
                    493:             s = malloc(i+1);
                    494:             buffer[i]='\0';
                    495:             strcpy(s, buffer);
                    496:
                    497:             c = GETC();
                    498:             return s;
                    499:         }else if (c == '\\') {
                    500:             c0 = c;
                    501:             c = GETC();
                    502:             if (c != '"') {
                    503:                 buffer[i++] = c0;
                    504:             }
                    505:         }
                    506:         buffer[i]=c;
                    507:     }
                    508:     fprintf(stderr, "buffer overflow!\n");
                    509:     exit(1);
                    510:     /* return NULL; */
                    511: }
                    512:
1.2     ! ohara     513: typedef struct {
        !           514:        char *key;
        !           515:        int  token;
        !           516: } symbol;
        !           517:
        !           518: #define MK_KEY(x)  { #x  , TOKEN(x) }
        !           519:
        !           520: static symbol symbol_list[] = {
        !           521:        MK_KEY(CMO_NULL),
        !           522:     MK_KEY(CMO_INT32),
        !           523:        MK_KEY(CMO_DATUM),
        !           524:        MK_KEY(CMO_STRING),
        !           525:        MK_KEY(CMO_MATHCAP),
        !           526:        MK_KEY(CMO_LIST),
        !           527:        MK_KEY(CMO_MONOMIAL32),
        !           528:        MK_KEY(CMO_ZZ),
        !           529:        MK_KEY(CMO_ZERO),
        !           530:        MK_KEY(CMO_DMS_GENERIC),
        !           531:        MK_KEY(CMO_RING_BY_NAME),
        !           532:        MK_KEY(CMO_INDETERMINATE),
        !           533:        MK_KEY(CMO_ERROR2),
        !           534:     MK_KEY(SM_popCMO),
        !           535:        MK_KEY(SM_popString),
        !           536:        MK_KEY(SM_mathcap),
        !           537:        MK_KEY(SM_pops),
        !           538:        MK_KEY(SM_executeStringByLocalParser),
        !           539:        MK_KEY(SM_executeFunction),
        !           540:        MK_KEY(SM_setMathCap),
        !           541:     MK_KEY(SM_control_kill),
        !           542:        MK_KEY(SM_control_reset_connection),
        !           543:     MK_KEY(OX_COMMAND),  MK_KEY(OX_DATA),
        !           544:        {NULL, 0}        /* a gate keeper */
        !           545: };
        !           546:
        !           547: static int token_of_symbol(char *key)
        !           548: {
        !           549:        symbol *kp;
        !           550:        for(kp = symbol_list; kp->key != NULL; kp++) {
        !           551:                if (strcmp(key, kp->key)==0) {
        !           552:                        return kp->token;
        !           553:                }
        !           554:        }
        !           555: #if DEBUG
1.1       ohara     556:     fprintf(stderr, "lex error\n");
1.2     ! ohara     557: #endif
1.1       ohara     558:     return 0;
                    559: }
                    560:
1.2     ! ohara     561: static int lex_symbol()
1.1       ohara     562: {
                    563:     int i;
                    564:     for (i=0; i<SIZE_BUFFER; i++) {
                    565:         if (!isalnum(c) && c != '_') {
                    566:             buffer[i]='\0';
1.2     ! ohara     567:             return token_of_symbol(buffer);
1.1       ohara     568:         }
                    569:         buffer[i]=c;
                    570:         c = GETC();
                    571:     }
                    572:     fprintf(stderr, "buffer overflow!\n");
1.2     ! ohara     573:        return 0;
1.1       ohara     574: }
                    575:
                    576: /* return する前に一文字先読みしておく。 */
                    577: int lex()
                    578: {
                    579:     int c_dash = 0;
                    580:
                    581:     /* 空白をスキップする. */
                    582:     while (isspace(c) && c != '\n') {
                    583:         c = GETC();
                    584:     }
                    585:
                    586:     switch(c) {
                    587:     case '(':
                    588:     case ')':
                    589:     case ',':
                    590:     case '\n':
                    591:         c_dash = c;
                    592:         c = ' ';
                    593:         return c_dash;
                    594:     case EOF:
                    595:         c = GETC();
                    596:         return c_dash;
                    597:     case '"':      /* a quoted string! */
                    598:         yylval.sym = lex_quoted_string();
                    599:         return T_STRING;
                    600:     default:
                    601:     }
                    602:
                    603:     if (isalpha(c)) {    /* 識別子 */
1.2     ! ohara     604:         return lex_symbol();
1.1       ohara     605:     }
                    606:
                    607:     /* 32bit 整数値 */
                    608:     if (isdigit(c)){
                    609:         yylval.d = lex_digit();
                    610:         return T_INTEGER;
                    611:     }
                    612:     if (c == '-') {
                    613:         c = GETC();
                    614:         while (isspace(c) && c != '\n') {
                    615:             c = GETC();
                    616:         }
                    617:         if (isdigit(c)){
                    618:             yylval.d = - lex_digit();
                    619:             return T_INTEGER;
                    620:         }
                    621:         return 0;
                    622:     }
                    623:
                    624:     c = GETC();
                    625:     return 0;
                    626: }
                    627:
                    628: static char *mygetc_line;
                    629: static int  mygetc_counter;
                    630: static int  mygetc_counter_max;
                    631:
                    632: int mygetc()
                    633: {
                    634:     if (mygetc_counter <= mygetc_counter_max) {
                    635:         return mygetc_line[mygetc_counter++];
                    636:     }
                    637:     return 0;
                    638: }
                    639:
                    640: int setmode_mygetc(char *s, int len)
                    641: {
                    642:     mygetc_counter=0;
                    643:     mygetc_counter_max=len;
                    644:     mygetc_line=s;
1.2     ! ohara     645: }
        !           646:
        !           647: int setflag_parse(int flag)
        !           648: {
        !           649:        pflag_cmo_addrev = flag;
1.1       ohara     650: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>