/* -*- mode: C; coding: euc-japan -*- */ /* $OpenXM: OpenXM/src/ox_toolkit/parse.c,v 1.14 2003/06/02 10:25:57 ohara Exp $ */ /* This module is a parser for OX/CMO expressions. Some commnets are written in Japanese by using the EUC-JP coded character set. */ #include #include #include #include #include #include #include "ox_toolkit.h" #include "parse.h" /* --- Parser --- */ /* Remarks for semantics. CMO_LIST, CMO_STRING は、あらかじめ与えられた要素の個数を無視する. CMO_MONOMIAL32 は無視しない. (つまりおかしいときは構文エラーになる) */ /* parse.c では, Lisp 表現された CMO 文字列を読み込み, バイト列を出力する. 中間表現として、cmo 構造体を利用する. parse() はトークンの列から cmo 構造体を生成し、そのポインタを返す. 重要なことはパーサ(の各サブルーチン)は 常にトークンをひとつ先読みしていると言うことである. */ /* 現在読み込み中のトークンを表す. */ static int token = 0; /* トークンの属性値. yylval は lex() によってセットされる. */ static union{ int d; char *sym; } yylval; /* If `pflag_cmo_addrev' sets, then we admit extended CMO expressions. For example, (CMO_STRING, "hello") is not a real CMO expression but it is admitted. */ static int pflag_cmo_addrev = 1; /* definitions of local functions */ static void parse_error(char *s); static void parse_right_parenthesis(); static void parse_left_parenthesis(); static void parse_comma(); static mpz_ptr parse_mpz_integer(); static int parse_integer(); static char *parse_string(); static cmo *parse_cmo_null(); static cmo *parse_cmo_int32(); static cmo *parse_cmo_string(); static cmo *parse_cmo_mathcap(); static cmo *parse_cmo_list(); static cmo *parse_cmo_monomial32(); static cmo *parse_cmo_zz(); static cmo *parse_cmo_zero(); static cmo *parse_cmo_dms_generic(); static cmo *parse_cmo_ring_by_name(); static cmo *parse_cmo_distributed_polynomial(); static cmo *parse_cmo_indeterminate(); static cmo *parse_cmo_error2(); static cmo *parse_cmo(); static int parse_sm(); static ox *parse_ox(); static ox *parse_ox_command(); static ox *parse_ox_data(); static void init_lex(char *s); static int lex(); static int is_token_cmo(int token) { return (token >= MIN_T_CMO && token < MAX_T_CMO) || token == TOKEN(CMO_ERROR2); } static int is_token_sm(int token) { return token == TOKEN(SM); } static int is_token_ox(int token) { return token >= MIN_T_OX && token < MAX_T_OX; } static jmp_buf env_parse; /* This is a parsing fault. */ static void parse_error(char *s) { ox_printf("syntax error: %s\n", s); longjmp(env_parse, 1); } void setflag_parse(int flag) { pflag_cmo_addrev = flag; } void init_parser(char *s) { setflag_parse(PFLAG_ADDREV); init_lex(s); } cmo *parse() { cmo *m; if (setjmp(env_parse) != 0) { return NULL; /* This is an error. */ } token = lex(); if (token == '(') { token = lex(); if (is_token_cmo(token)) { m = parse_cmo(); }else if(is_token_ox(token)) { m = parse_ox(); }else { parse_error("invalid symbol."); } return m; } return NULL; } static ox *parse_ox() { ox *m = NULL; switch(token) { case TOKEN(OX_COMMAND): token = lex(); m = parse_ox_command(); break; case TOKEN(OX_DATA): token = lex(); m = parse_ox_data(); break; default: parse_error("invalid ox."); } return m; } static ox *parse_ox_data() { ox *m; parse_comma(); parse_left_parenthesis(); m = (ox *)new_ox_data(parse_cmo()); parse_right_parenthesis(); return m; } static ox *parse_ox_command() { ox *m; parse_comma(); parse_left_parenthesis(); m = (ox *)new_ox_command(parse_sm()); parse_right_parenthesis(); return m; } static int parse_sm() { int sm_code; if (token != TOKEN(SM)) { parse_error("no opecode."); } sm_code = yylval.d; token = lex(); parse_right_parenthesis(); return sm_code; } static cmo *parse_cmo() { cmo *m = NULL; switch(token) { case TOKEN(CMO_NULL): token = lex(); m = parse_cmo_null(); break; case TOKEN(CMO_INT32): token = lex(); m = parse_cmo_int32(); break; case TOKEN(CMO_STRING): token = lex(); m = parse_cmo_string(); break; case TOKEN(CMO_MATHCAP): token = lex(); m = parse_cmo_mathcap(); break; case TOKEN(CMO_LIST): token = lex(); m = parse_cmo_list(); break; case TOKEN(CMO_MONOMIAL32): token = lex(); m = parse_cmo_monomial32(); break; case TOKEN(CMO_ZZ): token = lex(); m = parse_cmo_zz(); break; case TOKEN(CMO_ZERO): token = lex(); m = parse_cmo_zero(); break; case TOKEN(CMO_DMS_GENERIC): token = lex(); m = parse_cmo_dms_generic(); break; case TOKEN(CMO_RING_BY_NAME): token = lex(); m = parse_cmo_ring_by_name(); break; case TOKEN(CMO_DISTRIBUTED_POLYNOMIAL): token = lex(); m = parse_cmo_distributed_polynomial(); break; case TOKEN(CMO_INDETERMINATE): token = lex(); m = parse_cmo_indeterminate(); break; case TOKEN(CMO_ERROR2): token = lex(); m = parse_cmo_error2(); break; default: parse_error("invalid cmo."); } return m; } static void parse_left_parenthesis() { if (token != '(') { parse_error("no left parenthesis."); } token = lex(); } static void parse_right_parenthesis() { if (token != ')') { parse_error("no right parenthesis."); } token = lex(); } static void parse_comma() { if (token != ',') { parse_error("no comma."); } token = lex(); } static mpz_ptr new_mpz_set_str(char *s) { mpz_ptr z = MALLOC(sizeof(mpz_t)); mpz_init_set_str(z, s, 10); return z; } static mpz_ptr my_mpz_neg(mpz_ptr src) { mpz_ptr z = MALLOC(sizeof(mpz_t)); mpz_init(z); mpz_neg(z, src); return z; } static mpz_ptr parse_mpz_integer() { int sign = 1; mpz_ptr val; if (token == '+') { token = lex(); }else if (token == '-') { sign = -1; token = lex(); } if (token != T_DIGIT) { parse_error("no integer."); } val = new_mpz_set_str(yylval.sym); if (sign == -1) { val = my_mpz_neg(val); } token = lex(); return val; } static int parse_integer() { #if 0 return mpz_get_si(parse_mpz_integer()); #else int sign = 1; int val; if (token == '+') { token = lex(); }else if (token == '-') { sign = -1; token = lex(); } if (token != T_DIGIT) { parse_error("no integer."); } val = sign*atoi(yylval.sym); token = lex(); return val; #endif } static char *parse_string() { char *s; if (token != T_STRING) { parse_error("no string."); } s = yylval.sym; token = lex(); return s; } static cmo *parse_cmo_null() { parse_right_parenthesis(); return (cmo *)new_cmo_null(); } static cmo *parse_cmo_int32() { int z; parse_comma(); z = parse_integer(); parse_right_parenthesis(); return (cmo *)new_cmo_int32(z); } static cmo *parse_cmo_string() { cmo_string *m; char *s; parse_comma(); if (token == T_DIGIT) { parse_integer(); parse_comma(); }else if (!pflag_cmo_addrev) { parse_error("invalid cmo string."); } s = parse_string(); m = new_cmo_string(s); parse_right_parenthesis(); return (cmo *)m; } static cmo *parse_cmo_mathcap() { cmo *ob; parse_comma(); parse_left_parenthesis(); ob = parse_cmo(); parse_right_parenthesis(); return (cmo *)new_cmo_mathcap(ob); } static cmo *parse_cmo_list() { cmo_list *m = new_cmo_list(); cmo *newcmo; if (token == ',') { parse_comma(); if (token == T_DIGIT) { parse_integer(); parse_comma(); }else if (!pflag_cmo_addrev) { parse_error("invalid cmo_list."); } while(token == '(') { parse_left_parenthesis(); newcmo = parse_cmo(); list_append(m, newcmo); if (token != ',') { break; } parse_comma(); } }else if (!pflag_cmo_addrev) { parse_error("invalid cmo_list."); } parse_right_parenthesis(); return (cmo *)m; } static cmo *parse_cmo_monomial32() { int size; int i; cmo_monomial32 *m; int tag; parse_comma(); size = parse_integer(); if (size < 0) { parse_error("invalid value."); } m = new_cmo_monomial32_size(size); for(i=0; iexps[i] = parse_integer(); } parse_comma(); parse_left_parenthesis(); m->coef = parse_cmo(); tag = m->coef->tag; /* semantics: The tag of m->coef must be CMO_ZZ or CMO_INT32. */ if (tag != CMO_ZZ && tag != CMO_INT32) { parse_error("invalid cmo."); } parse_right_parenthesis(); return (cmo *)m; } /* the following function rewrite internal data of mpz/cmo_zz. */ static cmo *parse_cmo_zz() { int length; int i=0; cmo_zz *m= NULL; mpz_ptr z; parse_comma(); z = parse_mpz_integer(); if (token == ',') { length = mpz_get_si(z); m = new_cmo_zz_size(length); length = abs(length); for(i=0; impz->_mp_d[i] = parse_integer(); } }else if (pflag_cmo_addrev) { m = new_cmo_zz_set_mpz(z); }else { parse_error("no comma."); } parse_right_parenthesis(); return (cmo *)m; } static cmo *parse_cmo_zero() { parse_right_parenthesis(); return (cmo *)new_cmo_zero(); } static cmo *parse_cmo_dms_generic() { parse_right_parenthesis(); return (cmo *)new_cmo_dms_generic(); } static cmo *parse_cmo_ring_by_name() { cmo *ob; parse_comma(); parse_left_parenthesis(); ob = parse_cmo(); /* The ob has a type of CMO_STRING. */ if (ob->tag != CMO_STRING) { parse_error("invalid cmo."); } parse_right_parenthesis(); return (cmo *)new_cmo_ring_by_name(ob); } static cmo *parse_cmo_distributed_polynomial() { cmo_distributed_polynomial *m = new_cmo_distributed_polynomial(); cmo *ob; int tag; if (token == ',') { parse_comma(); if (token == T_DIGIT) { parse_integer(); parse_comma(); }else if (!pflag_cmo_addrev) { parse_error("invalid d-polynomial."); } parse_left_parenthesis(); m->ringdef = parse_cmo(); tag = m->ringdef->tag; /* m->ringdef needs to be a DringDefinition. */ if (tag != CMO_RING_BY_NAME && tag != CMO_DMS_GENERIC && tag != CMO_DMS_OF_N_VARIABLES) { parse_error("invalid cmo."); } parse_comma(); while(token == '(') { parse_left_parenthesis(); ob = parse_cmo(); if (ob->tag != CMO_MONOMIAL32 && ob->tag != CMO_ZERO) { parse_error("invalid cmo."); } list_append((cmo_list *)m, ob); if (token != ',') { break; } parse_comma(); } }else if (!pflag_cmo_addrev) { parse_error("invalid d-polynomial."); } parse_right_parenthesis(); return (cmo *)m; } static cmo *parse_cmo_indeterminate() { cmo *ob; parse_comma(); parse_left_parenthesis(); ob = parse_cmo(); parse_right_parenthesis(); return (cmo *)new_cmo_indeterminate(ob); } static cmo *parse_cmo_error2() { cmo *ob; parse_comma(); parse_left_parenthesis(); ob = parse_cmo(); parse_right_parenthesis(); return (cmo *)new_cmo_error2(ob); } /* --- lexical analyzer --- */ /* A white space is ignored by lexical analyzer. */ static int c = ' '; /* getting a character from string. */ static char *mygetc_ptr; static int mygetc() { return *mygetc_ptr++; } static void init_lex(char *s) { c=' '; mygetc_ptr=s; } #define SIZE_BUFFER 8192 static char buffer[SIZE_BUFFER]; static char *new_string(char *s) { char *t = MALLOC(strlen(s)+1); strcpy(t, s); return t; } /* no measure for buffer overflow */ static char *lex_digit() { static char buff[SIZE_BUFFER]; int i; for(i=0; ikey != NULL; symp++) { if (strcmp(key, symp->key)==0) { return symp; } } return NULL; } symbol_t lookup_by_token(int tok) { symbol_t symp; for(symp = symbol_list; symp->key != NULL; symp++) { if (tok == symp->token) { return symp; } } return NULL; } symbol_t lookup_by_tag(int tag) { symbol_t symp; for(symp = symbol_list; symp->key != NULL; symp++) { if (tag == symp->tag) { return symp; } } return NULL; } symbol_t lookup(int i) { return &symbol_list[i]; } char *get_symbol_by_tag(int tag) { symbol_t symp = lookup_by_tag(tag); return (symp != NULL)? symp->key: NULL; } /* no measure for buffer overflow */ static char *lex_quoted_string() { int i; char c0 = ' '; for (i=0; itag; return symp->token; } ox_printf("lex error:: \"%s\" is unknown symbol.\n", key); return 0; } static int lex_symbol() { int i; for (i=0; i