Annotation of OpenXM/src/kan96xx/Kan/scanner2.c, Revision 1.10
1.10 ! takayama 1: /* $OpenXM: OpenXM/src/kan96xx/Kan/scanner2.c,v 1.9 2005/07/18 10:55:16 takayama Exp $ */
1.1 maekawa 2: /* scanner2.c (SM StackMachine) */
3: /* export: struct tokens decompostToTokens(char *str,int *sizep);
4: scanner2.c is for getting tokens from a string.
5: */
6: #include <stdio.h>
1.8 ohara 7: #include <stdlib.h>
8: #include <string.h>
1.1 maekawa 9: #include "datatype.h"
10: #include "stackm.h"
11: struct tokens lookupTokens(struct tokens t);
12: int isLiteral(char *s);
13: struct object lookupLiteralString(char *s);
14: /**************** defined in stackm.h ****************************
1.3 takayama 15: typedef enum {INIT,GET,PUT,OPEN} actionType;
1.1 maekawa 16:
1.3 takayama 17: struct tokens{
18: char *token;
19: int kind;
20: };
1.1 maekawa 21:
22:
1.3 takayama 23: #define ID 2
24: #define DOLLAR 3 strings enclosed by dollar sign
25: #define EXECUTABLE_STRING 4 strings enclosed by {}
26: #define EXECUTABLE_ARRAY 8 Don't set it in this file.
1.1 maekawa 27: ******************************************************************/
28:
29:
30: /******* declaration-part of lexical analizer ********************/
31: #define mygetchar() getSM()
32: /* to use getSM() ( input from StringSM ),
33: setup StringSM;
34: getokenSM(INIT);
35: */
36:
37:
38:
39:
40: #define BUF0LIMIT 40000
41: static char *StringSM;
42: static int StrpSM = 0;
43: static char BufSMorg[BUF0LIMIT];
44: static char *BufSM = BufSMorg;
45: static int Buf0limit = BUF0LIMIT ;
46: static int ExistSM = 0;
47: static int TypeSM = ID;
48:
49: /**************** end of declaration part of lexical analizer ******/
50:
51: static int getSM();
1.10 ! takayama 52: static void putSM();
1.1 maekawa 53: static struct tokens flushSM();
1.10 ! takayama 54: static int isSpaceSM();
! 55: static int isDollarSM();
! 56: static int isBraceSM();
! 57: static int isKakkoSM();
! 58: static int isSymbolSM();
1.1 maekawa 59: static struct tokens getokenSM2();
60:
1.10 ! takayama 61: void errorScanner2(char *str);
! 62:
1.9 takayama 63: extern int ScannerWhich;
64: extern unsigned char ScannerBuf[];
65: extern int ScannerPt;
66:
1.1 maekawa 67: /**************** code part of lexical analizer ********************/
68:
69: struct tokens *decomposeToTokens(str,sizep)
1.3 takayama 70: char *str;
71: int *sizep;
1.1 maekawa 72: {
73: struct tokens *tArray;
74: struct tokens token;
75: int k;
76: int size;
77:
78: StringSM = (char *) sGC_malloc((strlen(str)+3)*sizeof(char));
79: if (StringSM == (char *)NULL) {
80: errorScanner2("I have no memormy.");
81: }
82: strcpy(StringSM,str);
83: getokenSM2(INIT);
84: size = 0;
85: token = getokenSM2(GET);
86: while (token.kind != -1) {
87: size++;
88: token = getokenSM2(GET);
89: }
90:
91: tArray = (struct tokens *)sGC_malloc((size+1)*sizeof(struct tokens));
92: strcpy(StringSM,str);
93: getokenSM2(INIT);
94: for (k=0; k<size; k++) {
95: tArray[k] = getokenSM2(GET);
96: }
97:
98: *sizep = size;
99: return(tArray);
100: }
101:
102:
103:
104: static int getSM()
1.3 takayama 105: /* get a letter from StringSM */
1.1 maekawa 106: {
107: int c;
1.5 takayama 108:
109: if ((StrpSM > 0) && (StringSM[StrpSM] == ',') && (StringSM[StrpSM-1] == ',')) { int i;
110: fprintf(stderr,"Warning: ,, is found: ");
111: for (i=(StrpSM-30>0?StrpSM-30:0); i<=StrpSM; i++) {
112: fprintf(stderr,"%c",StringSM[i]);
113: }
114: fprintf(stderr,"\n");
115: }
116:
1.7 takayama 117: c = (unsigned char) StringSM[StrpSM++];
1.9 takayama 118: if (c != 0) {
119: ScannerPt++; if (ScannerPt >= SCANNERBUF_SIZE) ScannerPt = 0;
120: ScannerBuf[ScannerPt] = c;
121: }
1.1 maekawa 122: if (c == '\0') {
123: StrpSM--;return(EOF);
124: } else return(c);
125: }
126:
1.10 ! takayama 127: static void putSM(c)
1.3 takayama 128: int c;
129: /* put a letter on BufSM */
1.1 maekawa 130: {
131: char *new; int i;
132: BufSM[ExistSM++] = ((c=='\n')? ' ' : c);
133: if (ExistSM >= Buf0limit-1) {
134: new = (char *) sGC_malloc(sizeof(char *)*Buf0limit*2) ;
135: if (new == (char *)NULL) {
136: fprintf(stderr,"No more memory in parserpass0.c\n");
137: exit(18);
138: }
139: fprintf(stderr,"\nSystem Message: Increasing BufSM to %d in scanner2.c\n",Buf0limit*2);
140: for (i=0; i<Buf0limit; i++) {
141: new[i] = BufSM[i];
142: }
143: BufSM = new; Buf0limit *= 2;
144: }
145: }
146:
147: static struct tokens flushSM()
148: {
149: char *token;
150: struct tokens r;
151: if (ExistSM<=0) {
152: fprintf(stderr,"\n flushSM() is called without data. Don't use the empty string $$. \n");
153: r.token = (char *)NULL; r.kind = -10; /* -1 ==> -10 ***/
154: return(r);
155: }
156: BufSM[ExistSM] = '\0';
157: ExistSM = 0;
158: token = (char *)sGC_malloc((strlen(BufSM)+1)*sizeof(char));
159: strcpy(token,BufSM);
160: r.token = token;
161: r.kind = TypeSM;
1.6 takayama 162: r.tflag = 0;
1.1 maekawa 163: if (r.kind == ID) {
164: if (isLiteral(r.token)) {
1.3 takayama 165: r.object = lookupLiteralString(r.token);
1.1 maekawa 166: }else{
167: r = lookupTokens(r); /* Compute hashing values */
168: }
169: }
170: return(r);
171: }
172:
1.10 ! takayama 173: static int isSpaceSM(c)
1.3 takayama 174: int c;
1.1 maekawa 175: {
1.4 takayama 176: if (((c <= ' ') || c == ',') && (c!= EOF)) return(1);
1.1 maekawa 177: else return(0);
178: }
179:
1.10 ! takayama 180: static int isDollarSM(c)
1.3 takayama 181: int c;
1.1 maekawa 182: {
183: if (c == '$') return(1);
184: else return(0);
185: }
186:
1.10 ! takayama 187: static int isBraceSM(c)
1.3 takayama 188: int c;
1.1 maekawa 189: {
190: if (c == '{') return(1);
191: else return(0);
192: }
193:
1.10 ! takayama 194: static int isKakkoSM(c)
1.3 takayama 195: int c;
1.1 maekawa 196: {
197: if (c == '(') return(1);
198: else return(0);
199: }
200:
1.10 ! takayama 201: static int isSymbolSM(c)
1.3 takayama 202: int c;
1.1 maekawa 203: {
204: if ((c == '{') ||
205: (c == '}') ||
206: (c == '[') ||
207: (c == ']') ||
208: (c == '(') ||
209: (c == ')'))
210: return(1);
211: else return(0);
212: }
213:
214: static struct tokens getokenSM2(kind,str)
1.3 takayama 215: actionType kind;
216: char *str;
1.1 maekawa 217: {
218: static int c;
219: static struct tokens rnull;
220: int level;
221:
222: if (kind == INIT) {
1.9 takayama 223: ScannerWhich = 2;
224: ScannerPt = 0;
225: ScannerBuf[0] = 0;
226:
1.1 maekawa 227: StrpSM = 0;
228: ExistSM = 0;
229:
230: c = mygetchar();
231: rnull.token = (char *)NULL; rnull.kind = -1;
232: return(rnull);
233: }
234:
235:
236:
237: for (;;) {
238: TypeSM = ID;
239: if (c == EOF) {
240: if (ExistSM) return(flushSM());
241: else return(rnull);
242: } else if (isSpaceSM(c)) {
243: if (ExistSM) {
1.3 takayama 244: c = mygetchar(); return(flushSM());
1.1 maekawa 245: }else {
1.3 takayama 246: while (isSpaceSM(c=mygetchar())) ;
1.1 maekawa 247: }
248: } else if (isDollarSM(c)) { /* output contents in dollar signs. */
249: if (ExistSM) return(flushSM());
250: else {
1.3 takayama 251: c = mygetchar();
252: while ((c != EOF) && (c != '$')) {
253: putSM(c);
254: c = mygetchar();
255: }
256: if (c=='$') c=mygetchar();
257: TypeSM = DOLLAR;
258: return(flushSM());
1.1 maekawa 259: }
260: } else if (isBraceSM(c)) { /* output contents in { } */
261: /* { { } } */
262: level = 0;
263: if (ExistSM) return(flushSM());
264: else {
1.3 takayama 265: c = mygetchar();
266: while (1) {
267: if (c == '%') { /* skip the comment in the brace. */
268: while (((c=mygetchar()) != '\n') && (c != EOF)) ;
269: }
270: if (c == EOF) break;
271: if ((c == '}') && (level <= 0)) break;
272: if ( c == '{') ++level;
273: if ( c == '}') --level;
274: putSM(c);
275: c = mygetchar();
276: }
277: if (c=='}') c=mygetchar();
278: TypeSM = EXECUTABLE_STRING;
279: return(flushSM());
1.1 maekawa 280: }
281: } else if (isKakkoSM(c)) { /* output contents in ( ) */
282: level = 0;
283: if (ExistSM) return(flushSM());
284: else {
1.3 takayama 285: c = mygetchar();
286: while (1) {
287: if (c == EOF) break;
288: if (c == '\\') { /* e.g. \( */
289: putSM(c);
290: c = mygetchar();
291: if (c == EOF) break;
292: }else{
293: if ((c == ')') && (level <= 0)) break;
294: if ( c == '(') ++level;
295: if ( c == ')') --level;
296: }
297: putSM(c);
298: c = mygetchar();
299: }
300: if (c==')') c=mygetchar();
301: TypeSM = DOLLAR;
302: return(flushSM());
1.1 maekawa 303: }
304: } else if (c=='%') { /* comment */
305: while (((c=mygetchar()) != '\n') && (c != EOF)) ;
306: if(ExistSM) return(flushSM());
307: } else if (isSymbolSM(c)) { /* symbols. {,} etc */
308: if(ExistSM) return(flushSM());
309: else {
1.3 takayama 310: putSM(c);
311: c = mygetchar();
312: return(flushSM());
1.1 maekawa 313: }
314: } else { /* identifier */
315: putSM(c);
316: c =mygetchar();
317: while ((!isDollarSM(c)) &&
1.3 takayama 318: (!isSpaceSM(c)) &&
319: (!isSymbolSM(c)) &&
320: (c != EOF)) {
321: putSM(c);
322: c = mygetchar();
1.1 maekawa 323: }
324: return(flushSM());
325: }
326: }
327: }
328:
329: /*********** end of code part of lexical analizer ********************/
330:
331:
1.10 ! takayama 332: void errorScanner2(str)
1.3 takayama 333: char *str;
1.1 maekawa 334: {
335: fprintf(stderr,"Error (scanner2.c): %s\n",str);
336: exit(10);
337: }
338:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>