Annotation of OpenXM/src/kan96xx/Kan/scanner2.c, Revision 1.7
1.7 ! takayama 1: /* $OpenXM: OpenXM/src/kan96xx/Kan/scanner2.c,v 1.6 2004/09/12 00:26:21 takayama Exp $ */
1.1 maekawa 2: /* scanner2.c (SM StackMachine) */
3: /* export: struct tokens decompostToTokens(char *str,int *sizep);
4: scanner2.c is for getting tokens from a string.
5: */
6: #include <stdio.h>
7: #include "datatype.h"
8: #include "stackm.h"
9: struct tokens lookupTokens(struct tokens t);
10: int isLiteral(char *s);
11: struct object lookupLiteralString(char *s);
12: /**************** defined in stackm.h ****************************
1.3 takayama 13: typedef enum {INIT,GET,PUT,OPEN} actionType;
1.1 maekawa 14:
1.3 takayama 15: struct tokens{
16: char *token;
17: int kind;
18: };
1.1 maekawa 19:
20:
1.3 takayama 21: #define ID 2
22: #define DOLLAR 3 strings enclosed by dollar sign
23: #define EXECUTABLE_STRING 4 strings enclosed by {}
24: #define EXECUTABLE_ARRAY 8 Don't set it in this file.
1.1 maekawa 25: ******************************************************************/
26:
27:
28: /******* declaration-part of lexical analizer ********************/
29: #define mygetchar() getSM()
30: /* to use getSM() ( input from StringSM ),
31: setup StringSM;
32: getokenSM(INIT);
33: */
34:
35:
36:
37:
38: #define BUF0LIMIT 40000
39: static char *StringSM;
40: static int StrpSM = 0;
41: static char BufSMorg[BUF0LIMIT];
42: static char *BufSM = BufSMorg;
43: static int Buf0limit = BUF0LIMIT ;
44: static int ExistSM = 0;
45: static int TypeSM = ID;
46:
47: /**************** end of declaration part of lexical analizer ******/
48:
49: static int getSM();
50: static putSM();
51: static struct tokens flushSM();
52: static isSpaceSM();
53: static isDollarSM();
54: static isBraceSM();
55: static isKakkoSM();
56: static isSymbolSM();
57: static struct tokens getokenSM2();
58:
59: /**************** code part of lexical analizer ********************/
60:
61: struct tokens *decomposeToTokens(str,sizep)
1.3 takayama 62: char *str;
63: int *sizep;
1.1 maekawa 64: {
65: struct tokens *tArray;
66: struct tokens token;
67: int k;
68: int size;
69:
70: StringSM = (char *) sGC_malloc((strlen(str)+3)*sizeof(char));
71: if (StringSM == (char *)NULL) {
72: errorScanner2("I have no memormy.");
73: }
74: strcpy(StringSM,str);
75: getokenSM2(INIT);
76: size = 0;
77: token = getokenSM2(GET);
78: while (token.kind != -1) {
79: size++;
80: token = getokenSM2(GET);
81: }
82:
83: tArray = (struct tokens *)sGC_malloc((size+1)*sizeof(struct tokens));
84: strcpy(StringSM,str);
85: getokenSM2(INIT);
86: for (k=0; k<size; k++) {
87: tArray[k] = getokenSM2(GET);
88: }
89:
90: *sizep = size;
91: return(tArray);
92: }
93:
94:
95:
96: static int getSM()
1.3 takayama 97: /* get a letter from StringSM */
1.1 maekawa 98: {
99: int c;
1.5 takayama 100:
101: if ((StrpSM > 0) && (StringSM[StrpSM] == ',') && (StringSM[StrpSM-1] == ',')) { int i;
102: fprintf(stderr,"Warning: ,, is found: ");
103: for (i=(StrpSM-30>0?StrpSM-30:0); i<=StrpSM; i++) {
104: fprintf(stderr,"%c",StringSM[i]);
105: }
106: fprintf(stderr,"\n");
107: }
108:
1.7 ! takayama 109: c = (unsigned char) StringSM[StrpSM++];
1.1 maekawa 110: if (c == '\0') {
111: StrpSM--;return(EOF);
112: } else return(c);
113: }
114:
115: static putSM(c)
1.3 takayama 116: int c;
117: /* put a letter on BufSM */
1.1 maekawa 118: {
119: char *new; int i;
120: BufSM[ExistSM++] = ((c=='\n')? ' ' : c);
121: if (ExistSM >= Buf0limit-1) {
122: new = (char *) sGC_malloc(sizeof(char *)*Buf0limit*2) ;
123: if (new == (char *)NULL) {
124: fprintf(stderr,"No more memory in parserpass0.c\n");
125: exit(18);
126: }
127: fprintf(stderr,"\nSystem Message: Increasing BufSM to %d in scanner2.c\n",Buf0limit*2);
128: for (i=0; i<Buf0limit; i++) {
129: new[i] = BufSM[i];
130: }
131: BufSM = new; Buf0limit *= 2;
132: }
133: }
134:
135: static struct tokens flushSM()
136: {
137: char *token;
138: struct tokens r;
139: if (ExistSM<=0) {
140: fprintf(stderr,"\n flushSM() is called without data. Don't use the empty string $$. \n");
141: r.token = (char *)NULL; r.kind = -10; /* -1 ==> -10 ***/
142: return(r);
143: }
144: BufSM[ExistSM] = '\0';
145: ExistSM = 0;
146: token = (char *)sGC_malloc((strlen(BufSM)+1)*sizeof(char));
147: strcpy(token,BufSM);
148: r.token = token;
149: r.kind = TypeSM;
1.6 takayama 150: r.tflag = 0;
1.1 maekawa 151: if (r.kind == ID) {
152: if (isLiteral(r.token)) {
1.3 takayama 153: r.object = lookupLiteralString(r.token);
1.1 maekawa 154: }else{
155: r = lookupTokens(r); /* Compute hashing values */
156: }
157: }
158: return(r);
159: }
160:
161: static isSpaceSM(c)
1.3 takayama 162: int c;
1.1 maekawa 163: {
1.4 takayama 164: if (((c <= ' ') || c == ',') && (c!= EOF)) return(1);
1.1 maekawa 165: else return(0);
166: }
167:
168: static isDollarSM(c)
1.3 takayama 169: int c;
1.1 maekawa 170: {
171: if (c == '$') return(1);
172: else return(0);
173: }
174:
175: static isBraceSM(c)
1.3 takayama 176: int c;
1.1 maekawa 177: {
178: if (c == '{') return(1);
179: else return(0);
180: }
181:
182: static isKakkoSM(c)
1.3 takayama 183: int c;
1.1 maekawa 184: {
185: if (c == '(') return(1);
186: else return(0);
187: }
188:
189: static isSymbolSM(c)
1.3 takayama 190: int c;
1.1 maekawa 191: {
192: if ((c == '{') ||
193: (c == '}') ||
194: (c == '[') ||
195: (c == ']') ||
196: (c == '(') ||
197: (c == ')'))
198: return(1);
199: else return(0);
200: }
201:
202: static struct tokens getokenSM2(kind,str)
1.3 takayama 203: actionType kind;
204: char *str;
1.1 maekawa 205: {
206: static int c;
207: static struct tokens rnull;
208: int level;
209:
210: if (kind == INIT) {
211: StrpSM = 0;
212: ExistSM = 0;
213:
214: c = mygetchar();
215: rnull.token = (char *)NULL; rnull.kind = -1;
216: return(rnull);
217: }
218:
219:
220:
221: for (;;) {
222: TypeSM = ID;
223: if (c == EOF) {
224: if (ExistSM) return(flushSM());
225: else return(rnull);
226: } else if (isSpaceSM(c)) {
227: if (ExistSM) {
1.3 takayama 228: c = mygetchar(); return(flushSM());
1.1 maekawa 229: }else {
1.3 takayama 230: while (isSpaceSM(c=mygetchar())) ;
1.1 maekawa 231: }
232: } else if (isDollarSM(c)) { /* output contents in dollar signs. */
233: if (ExistSM) return(flushSM());
234: else {
1.3 takayama 235: c = mygetchar();
236: while ((c != EOF) && (c != '$')) {
237: putSM(c);
238: c = mygetchar();
239: }
240: if (c=='$') c=mygetchar();
241: TypeSM = DOLLAR;
242: return(flushSM());
1.1 maekawa 243: }
244: } else if (isBraceSM(c)) { /* output contents in { } */
245: /* { { } } */
246: level = 0;
247: if (ExistSM) return(flushSM());
248: else {
1.3 takayama 249: c = mygetchar();
250: while (1) {
251: if (c == '%') { /* skip the comment in the brace. */
252: while (((c=mygetchar()) != '\n') && (c != EOF)) ;
253: }
254: if (c == EOF) break;
255: if ((c == '}') && (level <= 0)) break;
256: if ( c == '{') ++level;
257: if ( c == '}') --level;
258: putSM(c);
259: c = mygetchar();
260: }
261: if (c=='}') c=mygetchar();
262: TypeSM = EXECUTABLE_STRING;
263: return(flushSM());
1.1 maekawa 264: }
265: } else if (isKakkoSM(c)) { /* output contents in ( ) */
266: level = 0;
267: if (ExistSM) return(flushSM());
268: else {
1.3 takayama 269: c = mygetchar();
270: while (1) {
271: if (c == EOF) break;
272: if (c == '\\') { /* e.g. \( */
273: putSM(c);
274: c = mygetchar();
275: if (c == EOF) break;
276: }else{
277: if ((c == ')') && (level <= 0)) break;
278: if ( c == '(') ++level;
279: if ( c == ')') --level;
280: }
281: putSM(c);
282: c = mygetchar();
283: }
284: if (c==')') c=mygetchar();
285: TypeSM = DOLLAR;
286: return(flushSM());
1.1 maekawa 287: }
288: } else if (c=='%') { /* comment */
289: while (((c=mygetchar()) != '\n') && (c != EOF)) ;
290: if(ExistSM) return(flushSM());
291: } else if (isSymbolSM(c)) { /* symbols. {,} etc */
292: if(ExistSM) return(flushSM());
293: else {
1.3 takayama 294: putSM(c);
295: c = mygetchar();
296: return(flushSM());
1.1 maekawa 297: }
298: } else { /* identifier */
299: putSM(c);
300: c =mygetchar();
301: while ((!isDollarSM(c)) &&
1.3 takayama 302: (!isSpaceSM(c)) &&
303: (!isSymbolSM(c)) &&
304: (c != EOF)) {
305: putSM(c);
306: c = mygetchar();
1.1 maekawa 307: }
308: return(flushSM());
309: }
310: }
311: }
312:
313: /*********** end of code part of lexical analizer ********************/
314:
315:
316: errorScanner2(str)
1.3 takayama 317: char *str;
1.1 maekawa 318: {
319: fprintf(stderr,"Error (scanner2.c): %s\n",str);
320: exit(10);
321: }
322:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>