Annotation of OpenXM_contrib/gc/cord/cordxtra.c, Revision 1.1.1.1
1.1 maekawa 1: /*
2: * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
3: *
4: * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
5: * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
6: *
7: * Permission is hereby granted to use or copy this program
8: * for any purpose, provided the above notices are retained on all copies.
9: * Permission to modify the code and to distribute modified code is granted,
10: * provided the above notices are retained, and a notice that the code was
11: * modified is included with the above copyright notice.
12: *
13: * Author: Hans-J. Boehm (boehm@parc.xerox.com)
14: */
15: /*
16: * These are functions on cords that do not need to understand their
17: * implementation. They serve also serve as example client code for
18: * cord_basics.
19: */
20: /* Boehm, December 8, 1995 1:53 pm PST */
21: # include <stdio.h>
22: # include <string.h>
23: # include <stdlib.h>
24: # include <stdarg.h>
25: # include "cord.h"
26: # include "ec.h"
27: # define I_HIDE_POINTERS /* So we get access to allocation lock. */
28: /* We use this for lazy file reading, */
29: /* so that we remain independent */
30: /* of the threads primitives. */
31: # include "gc.h"
32:
33: /* For now we assume that pointer reads and writes are atomic, */
34: /* i.e. another thread always sees the state before or after */
35: /* a write. This might be false on a Motorola M68K with */
36: /* pointers that are not 32-bit aligned. But there probably */
37: /* aren't too many threads packages running on those. */
38: # define ATOMIC_WRITE(x,y) (x) = (y)
39: # define ATOMIC_READ(x) (*(x))
40:
41: /* The standard says these are in stdio.h, but they aren't always: */
42: # ifndef SEEK_SET
43: # define SEEK_SET 0
44: # endif
45: # ifndef SEEK_END
46: # define SEEK_END 2
47: # endif
48:
49: # define BUFSZ 2048 /* Size of stack allocated buffers when */
50: /* we want large buffers. */
51:
52: typedef void (* oom_fn)(void);
53:
54: # define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
55: ABORT("Out of memory\n"); }
56: # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
57:
58: CORD CORD_cat_char(CORD x, char c)
59: {
60: register char * string;
61:
62: if (c == '\0') return(CORD_cat(x, CORD_nul(1)));
63: string = GC_MALLOC_ATOMIC(2);
64: if (string == 0) OUT_OF_MEMORY;
65: string[0] = c;
66: string[1] = '\0';
67: return(CORD_cat_char_star(x, string, 1));
68: }
69:
70: CORD CORD_catn(int nargs, ...)
71: {
72: register CORD result = CORD_EMPTY;
73: va_list args;
74: register int i;
75:
76: va_start(args, nargs);
77: for (i = 0; i < nargs; i++) {
78: register CORD next = va_arg(args, CORD);
79: result = CORD_cat(result, next);
80: }
81: va_end(args);
82: return(result);
83: }
84:
85: typedef struct {
86: size_t len;
87: size_t count;
88: char * buf;
89: } CORD_fill_data;
90:
91: int CORD_fill_proc(char c, void * client_data)
92: {
93: register CORD_fill_data * d = (CORD_fill_data *)client_data;
94: register size_t count = d -> count;
95:
96: (d -> buf)[count] = c;
97: d -> count = ++count;
98: if (count >= d -> len) {
99: return(1);
100: } else {
101: return(0);
102: }
103: }
104:
105: int CORD_batched_fill_proc(const char * s, void * client_data)
106: {
107: register CORD_fill_data * d = (CORD_fill_data *)client_data;
108: register size_t count = d -> count;
109: register size_t max = d -> len;
110: register char * buf = d -> buf;
111: register const char * t = s;
112:
113: while((buf[count] = *t++) != '\0') {
114: count++;
115: if (count >= max) {
116: d -> count = count;
117: return(1);
118: }
119: }
120: d -> count = count;
121: return(0);
122: }
123:
124: /* Fill buf with len characters starting at i. */
125: /* Assumes len characters are available. */
126: void CORD_fill_buf(CORD x, size_t i, size_t len, char * buf)
127: {
128: CORD_fill_data fd;
129:
130: fd.len = len;
131: fd.buf = buf;
132: fd.count = 0;
133: (void)CORD_iter5(x, i, CORD_fill_proc, CORD_batched_fill_proc, &fd);
134: }
135:
136: int CORD_cmp(CORD x, CORD y)
137: {
138: CORD_pos xpos;
139: CORD_pos ypos;
140: register size_t avail, yavail;
141:
142: if (y == CORD_EMPTY) return(x != CORD_EMPTY);
143: if (x == CORD_EMPTY) return(-1);
144: if (CORD_IS_STRING(y) && CORD_IS_STRING(x)) return(strcmp(x,y));
145: CORD_set_pos(xpos, x, 0);
146: CORD_set_pos(ypos, y, 0);
147: for(;;) {
148: if (!CORD_pos_valid(xpos)) {
149: if (CORD_pos_valid(ypos)) {
150: return(-1);
151: } else {
152: return(0);
153: }
154: }
155: if (!CORD_pos_valid(ypos)) {
156: return(1);
157: }
158: if ((avail = CORD_pos_chars_left(xpos)) <= 0
159: || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
160: register char xcurrent = CORD_pos_fetch(xpos);
161: register char ycurrent = CORD_pos_fetch(ypos);
162: if (xcurrent != ycurrent) return(xcurrent - ycurrent);
163: CORD_next(xpos);
164: CORD_next(ypos);
165: } else {
166: /* process as many characters as we can */
167: register int result;
168:
169: if (avail > yavail) avail = yavail;
170: result = strncmp(CORD_pos_cur_char_addr(xpos),
171: CORD_pos_cur_char_addr(ypos), avail);
172: if (result != 0) return(result);
173: CORD_pos_advance(xpos, avail);
174: CORD_pos_advance(ypos, avail);
175: }
176: }
177: }
178:
179: int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len)
180: {
181: CORD_pos xpos;
182: CORD_pos ypos;
183: register size_t count;
184: register long avail, yavail;
185:
186: CORD_set_pos(xpos, x, x_start);
187: CORD_set_pos(ypos, y, y_start);
188: for(count = 0; count < len;) {
189: if (!CORD_pos_valid(xpos)) {
190: if (CORD_pos_valid(ypos)) {
191: return(-1);
192: } else {
193: return(0);
194: }
195: }
196: if (!CORD_pos_valid(ypos)) {
197: return(1);
198: }
199: if ((avail = CORD_pos_chars_left(xpos)) <= 0
200: || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
201: register char xcurrent = CORD_pos_fetch(xpos);
202: register char ycurrent = CORD_pos_fetch(ypos);
203: if (xcurrent != ycurrent) return(xcurrent - ycurrent);
204: CORD_next(xpos);
205: CORD_next(ypos);
206: count++;
207: } else {
208: /* process as many characters as we can */
209: register int result;
210:
211: if (avail > yavail) avail = yavail;
212: count += avail;
213: if (count > len) avail -= (count - len);
214: result = strncmp(CORD_pos_cur_char_addr(xpos),
215: CORD_pos_cur_char_addr(ypos), (size_t)avail);
216: if (result != 0) return(result);
217: CORD_pos_advance(xpos, (size_t)avail);
218: CORD_pos_advance(ypos, (size_t)avail);
219: }
220: }
221: return(0);
222: }
223:
224: char * CORD_to_char_star(CORD x)
225: {
226: register size_t len = CORD_len(x);
227: char * result = GC_MALLOC_ATOMIC(len + 1);
228:
229: if (result == 0) OUT_OF_MEMORY;
230: CORD_fill_buf(x, 0, len, result);
231: result[len] = '\0';
232: return(result);
233: }
234:
235: CORD CORD_from_char_star(const char *s)
236: {
237: char * result;
238: size_t len = strlen(s);
239:
240: if (0 == len) return(CORD_EMPTY);
241: result = GC_MALLOC_ATOMIC(len + 1);
242: if (result == 0) OUT_OF_MEMORY;
243: memcpy(result, s, len+1);
244: return(result);
245: }
246:
247: const char * CORD_to_const_char_star(CORD x)
248: {
249: if (x == 0) return("");
250: if (CORD_IS_STRING(x)) return((const char *)x);
251: return(CORD_to_char_star(x));
252: }
253:
254: char CORD_fetch(CORD x, size_t i)
255: {
256: CORD_pos xpos;
257:
258: CORD_set_pos(xpos, x, i);
259: if (!CORD_pos_valid(xpos)) ABORT("bad index?");
260: return(CORD_pos_fetch(xpos));
261: }
262:
263:
264: int CORD_put_proc(char c, void * client_data)
265: {
266: register FILE * f = (FILE *)client_data;
267:
268: return(putc(c, f) == EOF);
269: }
270:
271: int CORD_batched_put_proc(const char * s, void * client_data)
272: {
273: register FILE * f = (FILE *)client_data;
274:
275: return(fputs(s, f) == EOF);
276: }
277:
278:
279: int CORD_put(CORD x, FILE * f)
280: {
281: if (CORD_iter5(x, 0, CORD_put_proc, CORD_batched_put_proc, f)) {
282: return(EOF);
283: } else {
284: return(1);
285: }
286: }
287:
288: typedef struct {
289: size_t pos; /* Current position in the cord */
290: char target; /* Character we're looking for */
291: } chr_data;
292:
293: int CORD_chr_proc(char c, void * client_data)
294: {
295: register chr_data * d = (chr_data *)client_data;
296:
297: if (c == d -> target) return(1);
298: (d -> pos) ++;
299: return(0);
300: }
301:
302: int CORD_rchr_proc(char c, void * client_data)
303: {
304: register chr_data * d = (chr_data *)client_data;
305:
306: if (c == d -> target) return(1);
307: (d -> pos) --;
308: return(0);
309: }
310:
311: int CORD_batched_chr_proc(const char *s, void * client_data)
312: {
313: register chr_data * d = (chr_data *)client_data;
314: register char * occ = strchr(s, d -> target);
315:
316: if (occ == 0) {
317: d -> pos += strlen(s);
318: return(0);
319: } else {
320: d -> pos += occ - s;
321: return(1);
322: }
323: }
324:
325: size_t CORD_chr(CORD x, size_t i, int c)
326: {
327: chr_data d;
328:
329: d.pos = i;
330: d.target = c;
331: if (CORD_iter5(x, i, CORD_chr_proc, CORD_batched_chr_proc, &d)) {
332: return(d.pos);
333: } else {
334: return(CORD_NOT_FOUND);
335: }
336: }
337:
338: size_t CORD_rchr(CORD x, size_t i, int c)
339: {
340: chr_data d;
341:
342: d.pos = i;
343: d.target = c;
344: if (CORD_riter4(x, i, CORD_rchr_proc, &d)) {
345: return(d.pos);
346: } else {
347: return(CORD_NOT_FOUND);
348: }
349: }
350:
351: /* Find the first occurrence of s in x at position start or later. */
352: /* This uses an asymptotically poor algorithm, which should typically */
353: /* perform acceptably. We compare the first few characters directly, */
354: /* and call CORD_ncmp whenever there is a partial match. */
355: /* This has the advantage that we allocate very little, or not at all. */
356: /* It's very fast if there are few close misses. */
357: size_t CORD_str(CORD x, size_t start, CORD s)
358: {
359: CORD_pos xpos;
360: size_t xlen = CORD_len(x);
361: size_t slen;
362: register size_t start_len;
363: const char * s_start;
364: unsigned long s_buf = 0; /* The first few characters of s */
365: unsigned long x_buf = 0; /* Start of candidate substring. */
366: /* Initialized only to make compilers */
367: /* happy. */
368: unsigned long mask = 0;
369: register size_t i;
370: register size_t match_pos;
371:
372: if (s == CORD_EMPTY) return(start);
373: if (CORD_IS_STRING(s)) {
374: s_start = s;
375: slen = strlen(s);
376: } else {
377: s_start = CORD_to_char_star(CORD_substr(s, 0, sizeof(unsigned long)));
378: slen = CORD_len(s);
379: }
380: if (xlen < start || xlen - start < slen) return(CORD_NOT_FOUND);
381: start_len = slen;
382: if (start_len > sizeof(unsigned long)) start_len = sizeof(unsigned long);
383: CORD_set_pos(xpos, x, start);
384: for (i = 0; i < start_len; i++) {
385: mask <<= 8;
386: mask |= 0xff;
387: s_buf <<= 8;
388: s_buf |= s_start[i];
389: x_buf <<= 8;
390: x_buf |= CORD_pos_fetch(xpos);
391: CORD_next(xpos);
392: }
393: for (match_pos = start; ; match_pos++) {
394: if ((x_buf & mask) == s_buf) {
395: if (slen == start_len ||
396: CORD_ncmp(x, match_pos + start_len,
397: s, start_len, slen - start_len) == 0) {
398: return(match_pos);
399: }
400: }
401: if ( match_pos == xlen - slen ) {
402: return(CORD_NOT_FOUND);
403: }
404: x_buf <<= 8;
405: x_buf |= CORD_pos_fetch(xpos);
406: CORD_next(xpos);
407: }
408: }
409:
410: void CORD_ec_flush_buf(CORD_ec x)
411: {
412: register size_t len = x[0].ec_bufptr - x[0].ec_buf;
413: char * s;
414:
415: if (len == 0) return;
416: s = GC_MALLOC_ATOMIC(len+1);
417: memcpy(s, x[0].ec_buf, len);
418: s[len] = '\0';
419: x[0].ec_cord = CORD_cat_char_star(x[0].ec_cord, s, len);
420: x[0].ec_bufptr = x[0].ec_buf;
421: }
422:
423: void CORD_ec_append_cord(CORD_ec x, CORD s)
424: {
425: CORD_ec_flush_buf(x);
426: x[0].ec_cord = CORD_cat(x[0].ec_cord, s);
427: }
428:
429: /*ARGSUSED*/
430: char CORD_nul_func(size_t i, void * client_data)
431: {
432: return((char)(unsigned long)client_data);
433: }
434:
435:
436: CORD CORD_chars(char c, size_t i)
437: {
438: return(CORD_from_fn(CORD_nul_func, (void *)(unsigned long)c, i));
439: }
440:
441: CORD CORD_from_file_eager(FILE * f)
442: {
443: register int c;
444: CORD_ec ecord;
445:
446: CORD_ec_init(ecord);
447: for(;;) {
448: c = getc(f);
449: if (c == 0) {
450: /* Append the right number of NULs */
451: /* Note that any string of NULs is rpresented in 4 words, */
452: /* independent of its length. */
453: register size_t count = 1;
454:
455: CORD_ec_flush_buf(ecord);
456: while ((c = getc(f)) == 0) count++;
457: ecord[0].ec_cord = CORD_cat(ecord[0].ec_cord, CORD_nul(count));
458: }
459: if (c == EOF) break;
460: CORD_ec_append(ecord, c);
461: }
462: (void) fclose(f);
463: return(CORD_balance(CORD_ec_to_cord(ecord)));
464: }
465:
466: /* The state maintained for a lazily read file consists primarily */
467: /* of a large direct-mapped cache of previously read values. */
468: /* We could rely more on stdio buffering. That would have 2 */
469: /* disadvantages: */
470: /* 1) Empirically, not all fseek implementations preserve the */
471: /* buffer whenever they could. */
472: /* 2) It would fail if 2 different sections of a long cord */
473: /* were being read alternately. */
474: /* We do use the stdio buffer for read ahead. */
475: /* To guarantee thread safety in the presence of atomic pointer */
476: /* writes, cache lines are always replaced, and never modified in */
477: /* place. */
478:
479: # define LOG_CACHE_SZ 14
480: # define CACHE_SZ (1 << LOG_CACHE_SZ)
481: # define LOG_LINE_SZ 9
482: # define LINE_SZ (1 << LOG_LINE_SZ)
483:
484: typedef struct {
485: size_t tag;
486: char data[LINE_SZ];
487: /* data[i%LINE_SZ] = ith char in file if tag = i/LINE_SZ */
488: } cache_line;
489:
490: typedef struct {
491: FILE * lf_file;
492: size_t lf_current; /* Current file pointer value */
493: cache_line * volatile lf_cache[CACHE_SZ/LINE_SZ];
494: } lf_state;
495:
496: # define MOD_CACHE_SZ(n) ((n) & (CACHE_SZ - 1))
497: # define DIV_CACHE_SZ(n) ((n) >> LOG_CACHE_SZ)
498: # define MOD_LINE_SZ(n) ((n) & (LINE_SZ - 1))
499: # define DIV_LINE_SZ(n) ((n) >> LOG_LINE_SZ)
500: # define LINE_START(n) ((n) & ~(LINE_SZ - 1))
501:
502: typedef struct {
503: lf_state * state;
504: size_t file_pos; /* Position of needed character. */
505: cache_line * new_cache;
506: } refill_data;
507:
508: /* Executed with allocation lock. */
509: static char refill_cache(client_data)
510: refill_data * client_data;
511: {
512: register lf_state * state = client_data -> state;
513: register size_t file_pos = client_data -> file_pos;
514: FILE *f = state -> lf_file;
515: size_t line_start = LINE_START(file_pos);
516: size_t line_no = DIV_LINE_SZ(MOD_CACHE_SZ(file_pos));
517: cache_line * new_cache = client_data -> new_cache;
518:
519: if (line_start != state -> lf_current
520: && fseek(f, line_start, SEEK_SET) != 0) {
521: ABORT("fseek failed");
522: }
523: if (fread(new_cache -> data, sizeof(char), LINE_SZ, f)
524: <= file_pos - line_start) {
525: ABORT("fread failed");
526: }
527: new_cache -> tag = DIV_LINE_SZ(file_pos);
528: /* Store barrier goes here. */
529: ATOMIC_WRITE(state -> lf_cache[line_no], new_cache);
530: state -> lf_current = line_start + LINE_SZ;
531: return(new_cache->data[MOD_LINE_SZ(file_pos)]);
532: }
533:
534: char CORD_lf_func(size_t i, void * client_data)
535: {
536: register lf_state * state = (lf_state *)client_data;
537: register cache_line * volatile * cl_addr =
538: &(state -> lf_cache[DIV_LINE_SZ(MOD_CACHE_SZ(i))]);
539: register cache_line * cl = (cache_line *)ATOMIC_READ(cl_addr);
540:
541: if (cl == 0 || cl -> tag != DIV_LINE_SZ(i)) {
542: /* Cache miss */
543: refill_data rd;
544:
545: rd.state = state;
546: rd.file_pos = i;
547: rd.new_cache = GC_NEW_ATOMIC(cache_line);
548: if (rd.new_cache == 0) OUT_OF_MEMORY;
549: return((char)(GC_word)
550: GC_call_with_alloc_lock((GC_fn_type) refill_cache, &rd));
551: }
552: return(cl -> data[MOD_LINE_SZ(i)]);
553: }
554:
555: /*ARGSUSED*/
556: void CORD_lf_close_proc(void * obj, void * client_data)
557: {
558: if (fclose(((lf_state *)obj) -> lf_file) != 0) {
559: ABORT("CORD_lf_close_proc: fclose failed");
560: }
561: }
562:
563: CORD CORD_from_file_lazy_inner(FILE * f, size_t len)
564: {
565: register lf_state * state = GC_NEW(lf_state);
566: register int i;
567:
568: if (state == 0) OUT_OF_MEMORY;
569: if (len != 0) {
570: /* Dummy read to force buffer allocation. */
571: /* This greatly increases the probability */
572: /* of avoiding deadlock if buffer allocation */
573: /* is redirected to GC_malloc and the */
574: /* world is multithreaded. */
575: char buf[1];
576:
577: (void) fread(buf, 1, 1, f);
578: rewind(f);
579: }
580: state -> lf_file = f;
581: for (i = 0; i < CACHE_SZ/LINE_SZ; i++) {
582: state -> lf_cache[i] = 0;
583: }
584: state -> lf_current = 0;
585: GC_register_finalizer(state, CORD_lf_close_proc, 0, 0, 0);
586: return(CORD_from_fn(CORD_lf_func, state, len));
587: }
588:
589: CORD CORD_from_file_lazy(FILE * f)
590: {
591: register long len;
592:
593: if (fseek(f, 0l, SEEK_END) != 0) {
594: ABORT("Bad fd argument - fseek failed");
595: }
596: if ((len = ftell(f)) < 0) {
597: ABORT("Bad fd argument - ftell failed");
598: }
599: rewind(f);
600: return(CORD_from_file_lazy_inner(f, (size_t)len));
601: }
602:
603: # define LAZY_THRESHOLD (128*1024 + 1)
604:
605: CORD CORD_from_file(FILE * f)
606: {
607: register long len;
608:
609: if (fseek(f, 0l, SEEK_END) != 0) {
610: ABORT("Bad fd argument - fseek failed");
611: }
612: if ((len = ftell(f)) < 0) {
613: ABORT("Bad fd argument - ftell failed");
614: }
615: rewind(f);
616: if (len < LAZY_THRESHOLD) {
617: return(CORD_from_file_eager(f));
618: } else {
619: return(CORD_from_file_lazy_inner(f, (size_t)len));
620: }
621: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>