===================================================================
RCS file: /home/cvs/OpenXM/src/ox_toolkit/parse.c,v
retrieving revision 1.1
retrieving revision 1.9
diff -u -p -r1.1 -r1.9
--- OpenXM/src/ox_toolkit/parse.c	1999/12/09 22:44:56	1.1
+++ OpenXM/src/ox_toolkit/parse.c	2003/01/13 12:03:12	1.9
@@ -1,29 +1,33 @@
 /* -*- mode: C; coding: euc-japan -*- */
-/* $OpenXM$ */
+/* $OpenXM: OpenXM/src/ox_toolkit/parse.c,v 1.8 2003/01/11 11:42:31 ohara Exp $ */
 
-/* OX expression, CMO expression パーサ */
+/* 
+   This module is a parser for OX/CMO expressions.
+   Some commnets are written in Japanese by using the EUC-JP coded 
+   character set.
+*/
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/param.h>
 #include <setjmp.h>
-#include "oxtag.h"
-#include "ox.h"
+#include <ctype.h>
+
+#include "ox_toolkit.h"
 #include "parse.h"
 
-/* --- 構文解析部 --- */
-/* (重要)セマンティックスについての注意.
+/* --- Parser --- */
+/* Remarks for semantics.
    CMO_LIST, CMO_STRING は、あらかじめ与えられた要素の個数を無視する.
    CMO_MONOMIAL32 は無視しない. (つまりおかしいときは構文エラーになる)
 */
 
-/* parse.c, lex.c では, Lisp 表現された CMO 文字列を読み込み,
+/* 
+   parse.c では, Lisp 表現された CMO 文字列を読み込み,
    バイト列を出力する.  中間表現として、cmo 構造体を利用する.
    parse() はトークンの列から cmo 構造体を生成し、そのポインタを返す.  
-*/
-
-/* 重要なことはパーサ(の各サブルーチン)は
+   重要なことはパーサ(の各サブルーチン)は
    常にトークンをひとつ先読みしていると言うことである.
 */
 
@@ -36,18 +40,19 @@ static union{
     char *sym;
 } yylval;
 
-/* pflag_cmo_addrev がセットされていれば、厳密には CMO expression では
-   ないもの, 例えば (CMO_STRING, "hello") も CMO に変換される. */
+/* 
+   If `pflag_cmo_addrev' sets, then we admit extended CMO expressions.
+   For example, (CMO_STRING, "hello") is not a real CMO expression
+   but it is admitted.
+*/
+static int pflag_cmo_addrev = 1;
 
-static int pflag_cmo_addrev = 1;  /* CMO の省略記法を許すか否かのフラグ */
-
-/* 関数の宣言 */
-static int  parse_error(char *s);
-static int  parse_lf();
-static int  parse_right_parenthesis();
-static int  parse_left_parenthesis();
-static int  parse_comma();
-static int  parse_integer();
+/* definitions of local functions */
+static void parse_error(char *s);
+static void parse_right_parenthesis();
+static void parse_left_parenthesis();
+static void parse_comma();
+static mpz_ptr parse_integer();
 static char *parse_string();
 static cmo  *parse_cmo_null();
 static cmo  *parse_cmo_int32();
@@ -67,7 +72,10 @@ static int  parse_sm();
 static ox   *parse_ox();
 static ox   *parse_ox_command();
 static ox   *parse_ox_data();
+static void init_lex(char *s);
+static int  lex();
 
+
 static int is_token_cmo(int token)
 {
     return (token >= MIN_T_CMO && token < MAX_T_CMO) || token == TOKEN(CMO_ERROR2);
@@ -85,26 +93,34 @@ static int is_token_ox(int token)
 
 static jmp_buf env_parse;
 
-/* 構文解析に失敗したことを意味する. */
-static int parse_error(char *s)
+/* This is a parsing fault. */
+static void parse_error(char *s)
 {
-    fprintf(stderr, "syntax error: %s\n", s);
+    ox_printf("syntax error: %s\n", s);
     longjmp(env_parse, 1);
 }
 
-/* この部分は書き換え予定. */
+void setflag_parse(int flag)
+{
+    pflag_cmo_addrev = flag;
+}
+
+void init_parser(char *s)
+{
+    setflag_parse(PFLAG_ADDREV);
+    init_lex(s);
+}
+
 cmo *parse()
 {
     cmo *m;
 
     if (setjmp(env_parse) != 0) {
-        return NULL; /* 構文解析に失敗したら NULL を返す. */
+        return NULL;
+        /* This is an error. */
     }
 
-    do{
-        token = lex();
-    }while (token == '\n');
-
+    token = lex();
     if (token == '(') {
         token = lex();
         if (is_token_cmo(token)) {
@@ -114,22 +130,11 @@ cmo *parse()
         }else {
             parse_error("invalid symbol.");
         }
-        parse_lf();
         return m;
     }
     return NULL;
 }
 
-/* トークンを先読みしない(重要). */
-static int parse_lf()
-{
-    if (token != '\n') {
-        parse_error("no new line.");
-    }
-    return 0;
-}
-
-
 static ox *parse_ox()
 {
     ox *m = NULL;
@@ -183,9 +188,6 @@ static int parse_sm()
     return sm_code;
 }
 
-
-/* 正しい入力ならば, parse_cmo を呼ぶ時点で, token には 
-   TOKEN(CMO_xxx), TOKEN(OX_xxx) のいずれかがセットされている. */
 static cmo *parse_cmo()
 {
     cmo *m = NULL;
@@ -249,7 +251,7 @@ static cmo *parse_cmo()
     return m;
 }
 
-static int parse_left_parenthesis()
+static void parse_left_parenthesis()
 {
     if (token != '(') {
         parse_error("no left parenthesis.");
@@ -257,7 +259,7 @@ static int parse_left_parenthesis()
     token = lex();
 }
 
-static int parse_right_parenthesis()
+static void parse_right_parenthesis()
 {
     if (token != ')') {
         parse_error("no right parenthesis.");
@@ -265,7 +267,7 @@ static int parse_right_parenthesis()
     token = lex();
 }
 
-static int parse_comma()
+static void parse_comma()
 {
     if (token != ',') {
         parse_error("no comma.");
@@ -273,13 +275,46 @@ static int parse_comma()
     token = lex();
 }
 
-static int parse_integer()
+static mpz_ptr new_mpz_set_str(char *s)
 {
-    int val;
-    if (token != T_INTEGER) {
+    mpz_ptr z = malloc(sizeof(mpz_t));
+    mpz_init_set_str(z, s, 10);
+    return z;
+}
+
+static mpz_ptr my_mpz_neg(mpz_ptr src)
+{
+    mpz_ptr z = malloc(sizeof(mpz_t));
+    mpz_init(z);
+    mpz_neg(z, src);
+#ifndef DEBUG
+    free(src);
+#endif
+    return z;
+}
+
+static mpz_ptr parse_integer()
+{
+    int sign = 1;
+    mpz_ptr val;
+
+    if (token == '+') {
+        token = lex();
+    }else if (token == '-') {
+        sign = -1;
+        token = lex();
+    }
+
+    if (token != T_DIGIT) {
         parse_error("no integer.");
     }
-    val = yylval.d;
+    val = new_mpz_set_str(yylval.sym);
+    if (sign == -1) {
+        val = my_mpz_neg(val);
+    }
+#ifndef DEBUG
+    free(yylval.sym);
+#endif
     token = lex();
     return val;
 }
@@ -303,12 +338,12 @@ static cmo *parse_cmo_null()
 
 static cmo *parse_cmo_int32()
 {
-    int i;
+    mpz_ptr z;
 
     parse_comma();
-    i = parse_integer();
+    z = parse_integer();
     parse_right_parenthesis();
-    return (cmo *)new_cmo_int32(i);
+    return (cmo *)new_cmo_int32(mpz_get_si(z));
 }
 
 static cmo *parse_cmo_string()
@@ -317,7 +352,7 @@ static cmo *parse_cmo_string()
     char *s;
 
     parse_comma();
-    if (token == T_INTEGER) {
+    if (token == T_DIGIT) {
         parse_integer();
         parse_comma();
     }else if (!pflag_cmo_addrev) {
@@ -342,15 +377,13 @@ static cmo *parse_cmo_mathcap()
 
 static cmo *parse_cmo_list()
 {
-    int length=0;
-    int i=0;
     cmo_list *m = new_cmo_list();
     cmo *newcmo;
 
     if (token == ',') {
         parse_comma();
 
-        if (token == T_INTEGER) {
+        if (token == T_DIGIT) {
             parse_integer();
             parse_comma();
         }else if (!pflag_cmo_addrev) {
@@ -360,7 +393,7 @@ static cmo *parse_cmo_list()
         while(token == '(') {
             parse_left_parenthesis();
             newcmo = parse_cmo();
-            append_cmo_list(m, newcmo);
+            list_append(m, newcmo);
             if (token != ',') {
                 break;
             }
@@ -376,13 +409,12 @@ static cmo *parse_cmo_list()
 static cmo *parse_cmo_monomial32()
 {
     int size;
-    int *exps;
     int i;
     cmo_monomial32 *m;
     int tag;
 
     parse_comma();
-    size = parse_integer();
+    size = mpz_get_si(parse_integer());
     if (size < 0) {
         parse_error("invalid value.");
     }
@@ -390,14 +422,15 @@ static cmo *parse_cmo_monomial32()
 
     for(i=0; i<size; i++) {
         parse_comma();
-        m->exps[i] = parse_integer();
+        m->exps[i] = mpz_get_si(parse_integer());
     }
     parse_comma();
     parse_left_parenthesis();
     m->coef = parse_cmo(); 
     tag = m->coef->tag;
 
-    /* m->coef は CMO_ZZ 型か CMO_INT32 型でなければならない */
+    /* semantics: 
+       The tag of m->coef must be CMO_ZZ or CMO_INT32. */
     if (tag != CMO_ZZ && tag != CMO_INT32) {
         parse_error("invalid cmo.");
     }
@@ -405,25 +438,27 @@ static cmo *parse_cmo_monomial32()
     return (cmo *)m;
 }
 
-/* cmo_zz の内部を直接いじる. */
+/* the following function rewrite internal data of mpz/cmo_zz. */
 static cmo *parse_cmo_zz()
 {
     int length;
     int i=0;
     cmo_zz *m= NULL;
+    mpz_ptr z;
 
     parse_comma();
-    length = parse_integer();
+    z = parse_integer();
     if (token == ',') {
+        length = mpz_get_si(z);
         m = new_cmo_zz_size(length);
         
         length = abs(length);
         for(i=0; i<length; i++) {
             parse_comma();
-            m->mpz->_mp_d[i] = parse_integer();
+            m->mpz->_mp_d[i] = mpz_get_si(parse_integer());
         }
     }else if (pflag_cmo_addrev) {
-        m = new_cmo_zz_set_si(length);
+        m = new_cmo_zz_set_mpz(z);
     }else {
         parse_error("no comma.");
     }
@@ -452,7 +487,7 @@ static cmo *parse_cmo_ring_by_name()
     parse_left_parenthesis();
     ob = parse_cmo();   
 
-    /* ob は CMO_STRING 型でなければならない */
+    /* The ob has a type of CMO_STRING. */
     if (ob->tag != CMO_STRING) {
         parse_error("invalid cmo.");
     }
@@ -462,8 +497,6 @@ static cmo *parse_cmo_ring_by_name()
 
 static cmo *parse_cmo_distributed_polynomial()
 {
-    int length=0;
-    int i=0;
     cmo_distributed_polynomial *m = new_cmo_distributed_polynomial();
     cmo *ob;
     int tag;
@@ -471,7 +504,7 @@ static cmo *parse_cmo_distributed_polynomial()
     if (token == ',') {
         parse_comma();
 
-        if (token == T_INTEGER) {
+        if (token == T_DIGIT) {
             parse_integer();
             parse_comma();
         }else if (!pflag_cmo_addrev) {
@@ -481,7 +514,7 @@ static cmo *parse_cmo_distributed_polynomial()
         parse_left_parenthesis();
         m->ringdef = parse_cmo();
         tag = m->ringdef->tag;
-        /* m->ringdef は DringDefinition でなければならない */
+        /* m->ringdef needs to be a DringDefinition. */
         if (tag != CMO_RING_BY_NAME && tag != CMO_DMS_GENERIC 
             && tag != CMO_DMS_OF_N_VARIABLES) {
             parse_error("invalid cmo.");
@@ -495,7 +528,7 @@ static cmo *parse_cmo_distributed_polynomial()
             if (ob->tag != CMO_MONOMIAL32 && ob->tag != CMO_ZERO) {
                 parse_error("invalid cmo.");
             }
-            append_cmo_list((cmo_list *)m, ob);
+            list_append((cmo_list *)m, ob);
             if (token != ',') {
                 break;
             }
@@ -530,43 +563,61 @@ static cmo *parse_cmo_error2()
     return (cmo *)new_cmo_error2(ob);
 }
 
-/* --- 字句解析部 --- */
+/* --- lexical analyzer --- */
 
-/* lexical analyzer で読み飛ばされる文字なら何を初期値にしてもよい */
+/* A white space is ignored by lexical analyzer. */
 static int c = ' ';    
 
-/* 一文字読み込む関数 */
-static int (*GETC)() = getchar;
-
-int setgetc(int (*foo)())
+/* getting a character from string. */
+static char *mygetc_ptr;
+static int mygetc()
 {
-    GETC = foo;
+    return *mygetc_ptr++;
 }
 
-int resetgetc()
+static void init_lex(char *s)
 {
-    GETC = getchar;
+    c=' ';
+    mygetc_ptr=s;
 }
 
 #define SIZE_BUFFER  8192
 static char buffer[SIZE_BUFFER];
 
-/* 桁溢れの場合の対策はない */
-static int lex_digit()
+static char *mkstr(char *src)
 {
-    int d = 0;
-    do {
-        d = 10*d + (c - '0');
-        c = GETC();
-    } while(isdigit(c));
-    return d;
+    int len;
+    char *s;
+    len = strlen(src);
+    s = malloc(len+1);
+    strcpy(s, src);
+    return s;
 }
 
+/* no measure for buffer overflow */
+static char *lex_digit()
+{
+    static char buff[SIZE_BUFFER];
+    int i;
+
+    for(i=0; i<SIZE_BUFFER-1; i++) {
+        if(isdigit(c)) {
+            buff[i] = c;
+        }else {
+            buff[i] = '\0';
+            return mkstr(buff);
+        }
+        c = mygetc();
+    }
+    buff[SIZE_BUFFER-1] = '\0';
+    return mkstr(buff);
+}
+
 #define MK_KEY_CMO(x)  { #x , x  , TOKEN(x)  , IS_CMO }
 #define MK_KEY_SM(x)   { #x , x  , TOKEN(SM) , IS_SM  }
 #define MK_KEY_OX(x)   { #x , x  , TOKEN(x)  , IS_OX  }
 
-static symbol symbol_list[] = {
+static struct symbol symbol_list[] = {
     MK_KEY_CMO(CMO_NULL),
     MK_KEY_CMO(CMO_INT32), 
     MK_KEY_CMO(CMO_DATUM),
@@ -596,9 +647,9 @@ static symbol symbol_list[] = {
     {NULL, 0, 0, 0}        /* a gate keeper */
 }; 
 
-symbol* lookup_by_symbol(char *key)
+symbol_t lookup_by_symbol(char *key)
 {
-    symbol *symp;
+    symbol_t symp;
     for(symp = symbol_list; symp->key != NULL; symp++) {
         if (strcmp(key, symp->key)==0) {
             return symp;
@@ -607,9 +658,9 @@ symbol* lookup_by_symbol(char *key)
     return NULL;
 }
 
-symbol* lookup_by_token(int tok)
+symbol_t lookup_by_token(int tok)
 {
-    symbol *symp;
+    symbol_t symp;
     for(symp = symbol_list; symp->key != NULL; symp++) {
         if (tok == symp->token) {
             return symp;
@@ -618,9 +669,9 @@ symbol* lookup_by_token(int tok)
     return NULL;
 }
 
-symbol* lookup_by_tag(int tag)
+symbol_t lookup_by_tag(int tag)
 {
-    symbol *symp;
+    symbol_t symp;
     for(symp = symbol_list; symp->key != NULL; symp++) {
         if (tag == symp->tag) {
             return symp;
@@ -629,49 +680,51 @@ symbol* lookup_by_tag(int tag)
     return NULL;
 }
 
-symbol* lookup(int i)
+symbol_t lookup(int i)
 {
     return &symbol_list[i];
 }
 
-/* バッファあふれした場合の対策をちゃんと考えるべき */
+char *symbol_get_key(symbol_t sp)
+{
+    return sp->key;
+}
+
+/* no measure for buffer overflow */
 static char *lex_quoted_string()
 {
     int i;
     char c0 = ' ';
-    char *s = NULL;
+
     for (i=0; i<SIZE_BUFFER; i++) {
-        c = GETC();
+        c = mygetc();
         if(c == '"') {
-            s = malloc(i+1);
+            c = mygetc();
             buffer[i]='\0';
-            strcpy(s, buffer);
-      
-            c = GETC();
-            return s;
+            return mkstr(buffer);
         }else if (c == '\\') {
             c0 = c;
-            c = GETC();
+            c = mygetc();
             if (c != '"') {
                 buffer[i++] = c0;
             }
         }
         buffer[i]=c;
     }
-    fprintf(stderr, "buffer overflow!\n");
+    ox_printf("buffer overflow!\n");
     exit(1);
     /* return NULL; */
 }
 
 static int token_of_symbol(char *key)
 {
-    symbol *symp = lookup_by_symbol(key);
+    symbol_t symp = lookup_by_symbol(key);
     if (symp != NULL) {
         yylval.d = symp->tag;
         return symp->token;
     }
 #if DEBUG
-    fprintf(stderr, "lex error:: \"%s\" is unknown symbol.\n", key);
+    ox_printf("lex error:: \"%s\" is unknown symbol.\n", key);
 #endif
     return 0;
 }
@@ -685,32 +738,33 @@ static int lex_symbol()
             return token_of_symbol(buffer);
         }
         buffer[i]=c;
-        c = GETC();
+        c = mygetc();
     }
-    fprintf(stderr, "buffer overflow!\n");
+    ox_printf("buffer overflow!\n");
     return 0;
 }
 
-/* return する前に一文字先読みしておく. */
-int lex()
+/* Remark: prefetching a character before return. */
+static int lex()
 {
     int c_dash = 0;
   
-    /* 空白をスキップする. */
-    while (isspace(c) && c != '\n') {
-        c = GETC();
+    /* white spaces are ignored. */
+    while (isspace(c)) {
+        c = mygetc();
     }
 
     switch(c) {
     case '(':
     case ')':
     case ',':
-    case '\n':
+    case '+':
+    case '-':
         c_dash = c;
         c = ' ';
         return c_dash;
     case EOF:
-        c = GETC();
+        c = mygetc();
         return c_dash;
     case '"':      /* a quoted string! */
         yylval.sym = lex_quoted_string();
@@ -718,60 +772,16 @@ int lex()
     default:
     }
 
-    if (isalpha(c)) {    /* 識別子 */
+    if (isalpha(c)) {
+        /* symbols */
         return lex_symbol();
     }
 
-    /* 32bit 整数値 */
+    /* digit */
     if (isdigit(c)){
-        yylval.d = lex_digit();
-        return T_INTEGER;
+        yylval.sym = lex_digit();
+        return T_DIGIT;
     }
-    if (c == '-') {
-        c = GETC();
-        while (isspace(c) && c != '\n') {
-            c = GETC();
-        }
-        if (isdigit(c)){
-            yylval.d = - lex_digit();
-            return T_INTEGER;
-        }
-        return 0;
-    }
-
-    c = GETC();
+    c = mygetc();
     return 0;
-}
-
-/* 一文字読み込む関数 */
-static char *mygetc_line;
-static int  mygetc_counter;
-static int  mygetc_counter_max;
-static int  mygetc_nonlf_flag;
-
-int mygetc()
-{
-    int c = '\0';
-
-    if (mygetc_nonlf_flag && mygetc_counter <= mygetc_counter_max) {
-        c = mygetc_line[mygetc_counter++];
-        if (c == '\0') {
-            c = '\n';
-            mygetc_nonlf_flag = 0;
-        }
-    }
-    return c;
-}
-
-int setmode_mygetc(char *s, int len)
-{
-    mygetc_nonlf_flag=1;
-    mygetc_counter=0;
-    mygetc_counter_max=len;
-    mygetc_line=s;
-}
-
-int setflag_parse(int flag)
-{
-    pflag_cmo_addrev = flag;
 }