Annotation of OpenXM/src/kan96xx/Kan/scanner2.c, Revision 1.9
1.9 ! takayama 1: /* $OpenXM: OpenXM/src/kan96xx/Kan/scanner2.c,v 1.8 2005/07/03 11:08:54 ohara Exp $ */
1.1 maekawa 2: /* scanner2.c (SM StackMachine) */
3: /* export: struct tokens decompostToTokens(char *str,int *sizep);
4: scanner2.c is for getting tokens from a string.
5: */
6: #include <stdio.h>
1.8 ohara 7: #include <stdlib.h>
8: #include <string.h>
1.1 maekawa 9: #include "datatype.h"
10: #include "stackm.h"
11: struct tokens lookupTokens(struct tokens t);
12: int isLiteral(char *s);
13: struct object lookupLiteralString(char *s);
14: /**************** defined in stackm.h ****************************
1.3 takayama 15: typedef enum {INIT,GET,PUT,OPEN} actionType;
1.1 maekawa 16:
1.3 takayama 17: struct tokens{
18: char *token;
19: int kind;
20: };
1.1 maekawa 21:
22:
1.3 takayama 23: #define ID 2
24: #define DOLLAR 3 strings enclosed by dollar sign
25: #define EXECUTABLE_STRING 4 strings enclosed by {}
26: #define EXECUTABLE_ARRAY 8 Don't set it in this file.
1.1 maekawa 27: ******************************************************************/
28:
29:
30: /******* declaration-part of lexical analizer ********************/
31: #define mygetchar() getSM()
32: /* to use getSM() ( input from StringSM ),
33: setup StringSM;
34: getokenSM(INIT);
35: */
36:
37:
38:
39:
40: #define BUF0LIMIT 40000
41: static char *StringSM;
42: static int StrpSM = 0;
43: static char BufSMorg[BUF0LIMIT];
44: static char *BufSM = BufSMorg;
45: static int Buf0limit = BUF0LIMIT ;
46: static int ExistSM = 0;
47: static int TypeSM = ID;
48:
49: /**************** end of declaration part of lexical analizer ******/
50:
51: static int getSM();
52: static putSM();
53: static struct tokens flushSM();
54: static isSpaceSM();
55: static isDollarSM();
56: static isBraceSM();
57: static isKakkoSM();
58: static isSymbolSM();
59: static struct tokens getokenSM2();
60:
1.9 ! takayama 61: extern int ScannerWhich;
! 62: extern unsigned char ScannerBuf[];
! 63: extern int ScannerPt;
! 64:
1.1 maekawa 65: /**************** code part of lexical analizer ********************/
66:
67: struct tokens *decomposeToTokens(str,sizep)
1.3 takayama 68: char *str;
69: int *sizep;
1.1 maekawa 70: {
71: struct tokens *tArray;
72: struct tokens token;
73: int k;
74: int size;
75:
76: StringSM = (char *) sGC_malloc((strlen(str)+3)*sizeof(char));
77: if (StringSM == (char *)NULL) {
78: errorScanner2("I have no memormy.");
79: }
80: strcpy(StringSM,str);
81: getokenSM2(INIT);
82: size = 0;
83: token = getokenSM2(GET);
84: while (token.kind != -1) {
85: size++;
86: token = getokenSM2(GET);
87: }
88:
89: tArray = (struct tokens *)sGC_malloc((size+1)*sizeof(struct tokens));
90: strcpy(StringSM,str);
91: getokenSM2(INIT);
92: for (k=0; k<size; k++) {
93: tArray[k] = getokenSM2(GET);
94: }
95:
96: *sizep = size;
97: return(tArray);
98: }
99:
100:
101:
102: static int getSM()
1.3 takayama 103: /* get a letter from StringSM */
1.1 maekawa 104: {
105: int c;
1.5 takayama 106:
107: if ((StrpSM > 0) && (StringSM[StrpSM] == ',') && (StringSM[StrpSM-1] == ',')) { int i;
108: fprintf(stderr,"Warning: ,, is found: ");
109: for (i=(StrpSM-30>0?StrpSM-30:0); i<=StrpSM; i++) {
110: fprintf(stderr,"%c",StringSM[i]);
111: }
112: fprintf(stderr,"\n");
113: }
114:
1.7 takayama 115: c = (unsigned char) StringSM[StrpSM++];
1.9 ! takayama 116: if (c != 0) {
! 117: ScannerPt++; if (ScannerPt >= SCANNERBUF_SIZE) ScannerPt = 0;
! 118: ScannerBuf[ScannerPt] = c;
! 119: }
1.1 maekawa 120: if (c == '\0') {
121: StrpSM--;return(EOF);
122: } else return(c);
123: }
124:
125: static putSM(c)
1.3 takayama 126: int c;
127: /* put a letter on BufSM */
1.1 maekawa 128: {
129: char *new; int i;
130: BufSM[ExistSM++] = ((c=='\n')? ' ' : c);
131: if (ExistSM >= Buf0limit-1) {
132: new = (char *) sGC_malloc(sizeof(char *)*Buf0limit*2) ;
133: if (new == (char *)NULL) {
134: fprintf(stderr,"No more memory in parserpass0.c\n");
135: exit(18);
136: }
137: fprintf(stderr,"\nSystem Message: Increasing BufSM to %d in scanner2.c\n",Buf0limit*2);
138: for (i=0; i<Buf0limit; i++) {
139: new[i] = BufSM[i];
140: }
141: BufSM = new; Buf0limit *= 2;
142: }
143: }
144:
145: static struct tokens flushSM()
146: {
147: char *token;
148: struct tokens r;
149: if (ExistSM<=0) {
150: fprintf(stderr,"\n flushSM() is called without data. Don't use the empty string $$. \n");
151: r.token = (char *)NULL; r.kind = -10; /* -1 ==> -10 ***/
152: return(r);
153: }
154: BufSM[ExistSM] = '\0';
155: ExistSM = 0;
156: token = (char *)sGC_malloc((strlen(BufSM)+1)*sizeof(char));
157: strcpy(token,BufSM);
158: r.token = token;
159: r.kind = TypeSM;
1.6 takayama 160: r.tflag = 0;
1.1 maekawa 161: if (r.kind == ID) {
162: if (isLiteral(r.token)) {
1.3 takayama 163: r.object = lookupLiteralString(r.token);
1.1 maekawa 164: }else{
165: r = lookupTokens(r); /* Compute hashing values */
166: }
167: }
168: return(r);
169: }
170:
171: static isSpaceSM(c)
1.3 takayama 172: int c;
1.1 maekawa 173: {
1.4 takayama 174: if (((c <= ' ') || c == ',') && (c!= EOF)) return(1);
1.1 maekawa 175: else return(0);
176: }
177:
178: static isDollarSM(c)
1.3 takayama 179: int c;
1.1 maekawa 180: {
181: if (c == '$') return(1);
182: else return(0);
183: }
184:
185: static isBraceSM(c)
1.3 takayama 186: int c;
1.1 maekawa 187: {
188: if (c == '{') return(1);
189: else return(0);
190: }
191:
192: static isKakkoSM(c)
1.3 takayama 193: int c;
1.1 maekawa 194: {
195: if (c == '(') return(1);
196: else return(0);
197: }
198:
199: static isSymbolSM(c)
1.3 takayama 200: int c;
1.1 maekawa 201: {
202: if ((c == '{') ||
203: (c == '}') ||
204: (c == '[') ||
205: (c == ']') ||
206: (c == '(') ||
207: (c == ')'))
208: return(1);
209: else return(0);
210: }
211:
212: static struct tokens getokenSM2(kind,str)
1.3 takayama 213: actionType kind;
214: char *str;
1.1 maekawa 215: {
216: static int c;
217: static struct tokens rnull;
218: int level;
219:
220: if (kind == INIT) {
1.9 ! takayama 221: ScannerWhich = 2;
! 222: ScannerPt = 0;
! 223: ScannerBuf[0] = 0;
! 224:
1.1 maekawa 225: StrpSM = 0;
226: ExistSM = 0;
227:
228: c = mygetchar();
229: rnull.token = (char *)NULL; rnull.kind = -1;
230: return(rnull);
231: }
232:
233:
234:
235: for (;;) {
236: TypeSM = ID;
237: if (c == EOF) {
238: if (ExistSM) return(flushSM());
239: else return(rnull);
240: } else if (isSpaceSM(c)) {
241: if (ExistSM) {
1.3 takayama 242: c = mygetchar(); return(flushSM());
1.1 maekawa 243: }else {
1.3 takayama 244: while (isSpaceSM(c=mygetchar())) ;
1.1 maekawa 245: }
246: } else if (isDollarSM(c)) { /* output contents in dollar signs. */
247: if (ExistSM) return(flushSM());
248: else {
1.3 takayama 249: c = mygetchar();
250: while ((c != EOF) && (c != '$')) {
251: putSM(c);
252: c = mygetchar();
253: }
254: if (c=='$') c=mygetchar();
255: TypeSM = DOLLAR;
256: return(flushSM());
1.1 maekawa 257: }
258: } else if (isBraceSM(c)) { /* output contents in { } */
259: /* { { } } */
260: level = 0;
261: if (ExistSM) return(flushSM());
262: else {
1.3 takayama 263: c = mygetchar();
264: while (1) {
265: if (c == '%') { /* skip the comment in the brace. */
266: while (((c=mygetchar()) != '\n') && (c != EOF)) ;
267: }
268: if (c == EOF) break;
269: if ((c == '}') && (level <= 0)) break;
270: if ( c == '{') ++level;
271: if ( c == '}') --level;
272: putSM(c);
273: c = mygetchar();
274: }
275: if (c=='}') c=mygetchar();
276: TypeSM = EXECUTABLE_STRING;
277: return(flushSM());
1.1 maekawa 278: }
279: } else if (isKakkoSM(c)) { /* output contents in ( ) */
280: level = 0;
281: if (ExistSM) return(flushSM());
282: else {
1.3 takayama 283: c = mygetchar();
284: while (1) {
285: if (c == EOF) break;
286: if (c == '\\') { /* e.g. \( */
287: putSM(c);
288: c = mygetchar();
289: if (c == EOF) break;
290: }else{
291: if ((c == ')') && (level <= 0)) break;
292: if ( c == '(') ++level;
293: if ( c == ')') --level;
294: }
295: putSM(c);
296: c = mygetchar();
297: }
298: if (c==')') c=mygetchar();
299: TypeSM = DOLLAR;
300: return(flushSM());
1.1 maekawa 301: }
302: } else if (c=='%') { /* comment */
303: while (((c=mygetchar()) != '\n') && (c != EOF)) ;
304: if(ExistSM) return(flushSM());
305: } else if (isSymbolSM(c)) { /* symbols. {,} etc */
306: if(ExistSM) return(flushSM());
307: else {
1.3 takayama 308: putSM(c);
309: c = mygetchar();
310: return(flushSM());
1.1 maekawa 311: }
312: } else { /* identifier */
313: putSM(c);
314: c =mygetchar();
315: while ((!isDollarSM(c)) &&
1.3 takayama 316: (!isSpaceSM(c)) &&
317: (!isSymbolSM(c)) &&
318: (c != EOF)) {
319: putSM(c);
320: c = mygetchar();
1.1 maekawa 321: }
322: return(flushSM());
323: }
324: }
325: }
326:
327: /*********** end of code part of lexical analizer ********************/
328:
329:
330: errorScanner2(str)
1.3 takayama 331: char *str;
1.1 maekawa 332: {
333: fprintf(stderr,"Error (scanner2.c): %s\n",str);
334: exit(10);
335: }
336:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>