Annotation of OpenXM/src/kan96xx/Kan/scanner2.c, Revision 1.8
1.8 ! ohara 1: /* $OpenXM: OpenXM/src/kan96xx/Kan/scanner2.c,v 1.7 2005/01/23 02:41:17 takayama Exp $ */
1.1 maekawa 2: /* scanner2.c (SM StackMachine) */
3: /* export: struct tokens decompostToTokens(char *str,int *sizep);
4: scanner2.c is for getting tokens from a string.
5: */
6: #include <stdio.h>
1.8 ! ohara 7: #include <stdlib.h>
! 8: #include <string.h>
1.1 maekawa 9: #include "datatype.h"
10: #include "stackm.h"
11: struct tokens lookupTokens(struct tokens t);
12: int isLiteral(char *s);
13: struct object lookupLiteralString(char *s);
14: /**************** defined in stackm.h ****************************
1.3 takayama 15: typedef enum {INIT,GET,PUT,OPEN} actionType;
1.1 maekawa 16:
1.3 takayama 17: struct tokens{
18: char *token;
19: int kind;
20: };
1.1 maekawa 21:
22:
1.3 takayama 23: #define ID 2
24: #define DOLLAR 3 strings enclosed by dollar sign
25: #define EXECUTABLE_STRING 4 strings enclosed by {}
26: #define EXECUTABLE_ARRAY 8 Don't set it in this file.
1.1 maekawa 27: ******************************************************************/
28:
29:
30: /******* declaration-part of lexical analizer ********************/
31: #define mygetchar() getSM()
32: /* to use getSM() ( input from StringSM ),
33: setup StringSM;
34: getokenSM(INIT);
35: */
36:
37:
38:
39:
40: #define BUF0LIMIT 40000
41: static char *StringSM;
42: static int StrpSM = 0;
43: static char BufSMorg[BUF0LIMIT];
44: static char *BufSM = BufSMorg;
45: static int Buf0limit = BUF0LIMIT ;
46: static int ExistSM = 0;
47: static int TypeSM = ID;
48:
49: /**************** end of declaration part of lexical analizer ******/
50:
51: static int getSM();
52: static putSM();
53: static struct tokens flushSM();
54: static isSpaceSM();
55: static isDollarSM();
56: static isBraceSM();
57: static isKakkoSM();
58: static isSymbolSM();
59: static struct tokens getokenSM2();
60:
61: /**************** code part of lexical analizer ********************/
62:
63: struct tokens *decomposeToTokens(str,sizep)
1.3 takayama 64: char *str;
65: int *sizep;
1.1 maekawa 66: {
67: struct tokens *tArray;
68: struct tokens token;
69: int k;
70: int size;
71:
72: StringSM = (char *) sGC_malloc((strlen(str)+3)*sizeof(char));
73: if (StringSM == (char *)NULL) {
74: errorScanner2("I have no memormy.");
75: }
76: strcpy(StringSM,str);
77: getokenSM2(INIT);
78: size = 0;
79: token = getokenSM2(GET);
80: while (token.kind != -1) {
81: size++;
82: token = getokenSM2(GET);
83: }
84:
85: tArray = (struct tokens *)sGC_malloc((size+1)*sizeof(struct tokens));
86: strcpy(StringSM,str);
87: getokenSM2(INIT);
88: for (k=0; k<size; k++) {
89: tArray[k] = getokenSM2(GET);
90: }
91:
92: *sizep = size;
93: return(tArray);
94: }
95:
96:
97:
98: static int getSM()
1.3 takayama 99: /* get a letter from StringSM */
1.1 maekawa 100: {
101: int c;
1.5 takayama 102:
103: if ((StrpSM > 0) && (StringSM[StrpSM] == ',') && (StringSM[StrpSM-1] == ',')) { int i;
104: fprintf(stderr,"Warning: ,, is found: ");
105: for (i=(StrpSM-30>0?StrpSM-30:0); i<=StrpSM; i++) {
106: fprintf(stderr,"%c",StringSM[i]);
107: }
108: fprintf(stderr,"\n");
109: }
110:
1.7 takayama 111: c = (unsigned char) StringSM[StrpSM++];
1.1 maekawa 112: if (c == '\0') {
113: StrpSM--;return(EOF);
114: } else return(c);
115: }
116:
117: static putSM(c)
1.3 takayama 118: int c;
119: /* put a letter on BufSM */
1.1 maekawa 120: {
121: char *new; int i;
122: BufSM[ExistSM++] = ((c=='\n')? ' ' : c);
123: if (ExistSM >= Buf0limit-1) {
124: new = (char *) sGC_malloc(sizeof(char *)*Buf0limit*2) ;
125: if (new == (char *)NULL) {
126: fprintf(stderr,"No more memory in parserpass0.c\n");
127: exit(18);
128: }
129: fprintf(stderr,"\nSystem Message: Increasing BufSM to %d in scanner2.c\n",Buf0limit*2);
130: for (i=0; i<Buf0limit; i++) {
131: new[i] = BufSM[i];
132: }
133: BufSM = new; Buf0limit *= 2;
134: }
135: }
136:
137: static struct tokens flushSM()
138: {
139: char *token;
140: struct tokens r;
141: if (ExistSM<=0) {
142: fprintf(stderr,"\n flushSM() is called without data. Don't use the empty string $$. \n");
143: r.token = (char *)NULL; r.kind = -10; /* -1 ==> -10 ***/
144: return(r);
145: }
146: BufSM[ExistSM] = '\0';
147: ExistSM = 0;
148: token = (char *)sGC_malloc((strlen(BufSM)+1)*sizeof(char));
149: strcpy(token,BufSM);
150: r.token = token;
151: r.kind = TypeSM;
1.6 takayama 152: r.tflag = 0;
1.1 maekawa 153: if (r.kind == ID) {
154: if (isLiteral(r.token)) {
1.3 takayama 155: r.object = lookupLiteralString(r.token);
1.1 maekawa 156: }else{
157: r = lookupTokens(r); /* Compute hashing values */
158: }
159: }
160: return(r);
161: }
162:
163: static isSpaceSM(c)
1.3 takayama 164: int c;
1.1 maekawa 165: {
1.4 takayama 166: if (((c <= ' ') || c == ',') && (c!= EOF)) return(1);
1.1 maekawa 167: else return(0);
168: }
169:
170: static isDollarSM(c)
1.3 takayama 171: int c;
1.1 maekawa 172: {
173: if (c == '$') return(1);
174: else return(0);
175: }
176:
177: static isBraceSM(c)
1.3 takayama 178: int c;
1.1 maekawa 179: {
180: if (c == '{') return(1);
181: else return(0);
182: }
183:
184: static isKakkoSM(c)
1.3 takayama 185: int c;
1.1 maekawa 186: {
187: if (c == '(') return(1);
188: else return(0);
189: }
190:
191: static isSymbolSM(c)
1.3 takayama 192: int c;
1.1 maekawa 193: {
194: if ((c == '{') ||
195: (c == '}') ||
196: (c == '[') ||
197: (c == ']') ||
198: (c == '(') ||
199: (c == ')'))
200: return(1);
201: else return(0);
202: }
203:
204: static struct tokens getokenSM2(kind,str)
1.3 takayama 205: actionType kind;
206: char *str;
1.1 maekawa 207: {
208: static int c;
209: static struct tokens rnull;
210: int level;
211:
212: if (kind == INIT) {
213: StrpSM = 0;
214: ExistSM = 0;
215:
216: c = mygetchar();
217: rnull.token = (char *)NULL; rnull.kind = -1;
218: return(rnull);
219: }
220:
221:
222:
223: for (;;) {
224: TypeSM = ID;
225: if (c == EOF) {
226: if (ExistSM) return(flushSM());
227: else return(rnull);
228: } else if (isSpaceSM(c)) {
229: if (ExistSM) {
1.3 takayama 230: c = mygetchar(); return(flushSM());
1.1 maekawa 231: }else {
1.3 takayama 232: while (isSpaceSM(c=mygetchar())) ;
1.1 maekawa 233: }
234: } else if (isDollarSM(c)) { /* output contents in dollar signs. */
235: if (ExistSM) return(flushSM());
236: else {
1.3 takayama 237: c = mygetchar();
238: while ((c != EOF) && (c != '$')) {
239: putSM(c);
240: c = mygetchar();
241: }
242: if (c=='$') c=mygetchar();
243: TypeSM = DOLLAR;
244: return(flushSM());
1.1 maekawa 245: }
246: } else if (isBraceSM(c)) { /* output contents in { } */
247: /* { { } } */
248: level = 0;
249: if (ExistSM) return(flushSM());
250: else {
1.3 takayama 251: c = mygetchar();
252: while (1) {
253: if (c == '%') { /* skip the comment in the brace. */
254: while (((c=mygetchar()) != '\n') && (c != EOF)) ;
255: }
256: if (c == EOF) break;
257: if ((c == '}') && (level <= 0)) break;
258: if ( c == '{') ++level;
259: if ( c == '}') --level;
260: putSM(c);
261: c = mygetchar();
262: }
263: if (c=='}') c=mygetchar();
264: TypeSM = EXECUTABLE_STRING;
265: return(flushSM());
1.1 maekawa 266: }
267: } else if (isKakkoSM(c)) { /* output contents in ( ) */
268: level = 0;
269: if (ExistSM) return(flushSM());
270: else {
1.3 takayama 271: c = mygetchar();
272: while (1) {
273: if (c == EOF) break;
274: if (c == '\\') { /* e.g. \( */
275: putSM(c);
276: c = mygetchar();
277: if (c == EOF) break;
278: }else{
279: if ((c == ')') && (level <= 0)) break;
280: if ( c == '(') ++level;
281: if ( c == ')') --level;
282: }
283: putSM(c);
284: c = mygetchar();
285: }
286: if (c==')') c=mygetchar();
287: TypeSM = DOLLAR;
288: return(flushSM());
1.1 maekawa 289: }
290: } else if (c=='%') { /* comment */
291: while (((c=mygetchar()) != '\n') && (c != EOF)) ;
292: if(ExistSM) return(flushSM());
293: } else if (isSymbolSM(c)) { /* symbols. {,} etc */
294: if(ExistSM) return(flushSM());
295: else {
1.3 takayama 296: putSM(c);
297: c = mygetchar();
298: return(flushSM());
1.1 maekawa 299: }
300: } else { /* identifier */
301: putSM(c);
302: c =mygetchar();
303: while ((!isDollarSM(c)) &&
1.3 takayama 304: (!isSpaceSM(c)) &&
305: (!isSymbolSM(c)) &&
306: (c != EOF)) {
307: putSM(c);
308: c = mygetchar();
1.1 maekawa 309: }
310: return(flushSM());
311: }
312: }
313: }
314:
315: /*********** end of code part of lexical analizer ********************/
316:
317:
318: errorScanner2(str)
1.3 takayama 319: char *str;
1.1 maekawa 320: {
321: fprintf(stderr,"Error (scanner2.c): %s\n",str);
322: exit(10);
323: }
324:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>