Annotation of OpenXM_contrib2/asir2000/gc/cord/cordxtra.c, Revision 1.1
1.1 ! noro 1: /*
! 2: * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
! 3: *
! 4: * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
! 5: * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
! 6: *
! 7: * Permission is hereby granted to use or copy this program
! 8: * for any purpose, provided the above notices are retained on all copies.
! 9: * Permission to modify the code and to distribute modified code is granted,
! 10: * provided the above notices are retained, and a notice that the code was
! 11: * modified is included with the above copyright notice.
! 12: *
! 13: * Author: Hans-J. Boehm (boehm@parc.xerox.com)
! 14: */
! 15: /*
! 16: * These are functions on cords that do not need to understand their
! 17: * implementation. They serve also serve as example client code for
! 18: * cord_basics.
! 19: */
! 20: /* Boehm, December 8, 1995 1:53 pm PST */
! 21: # include <stdio.h>
! 22: # include <string.h>
! 23: # include <stdlib.h>
! 24: # include <stdarg.h>
! 25: # include "cord.h"
! 26: # include "ec.h"
! 27: # define I_HIDE_POINTERS /* So we get access to allocation lock. */
! 28: /* We use this for lazy file reading, */
! 29: /* so that we remain independent */
! 30: /* of the threads primitives. */
! 31: # include "gc.h"
! 32:
! 33: /* For now we assume that pointer reads and writes are atomic, */
! 34: /* i.e. another thread always sees the state before or after */
! 35: /* a write. This might be false on a Motorola M68K with */
! 36: /* pointers that are not 32-bit aligned. But there probably */
! 37: /* aren't too many threads packages running on those. */
! 38: # define ATOMIC_WRITE(x,y) (x) = (y)
! 39: # define ATOMIC_READ(x) (*(x))
! 40:
! 41: /* The standard says these are in stdio.h, but they aren't always: */
! 42: # ifndef SEEK_SET
! 43: # define SEEK_SET 0
! 44: # endif
! 45: # ifndef SEEK_END
! 46: # define SEEK_END 2
! 47: # endif
! 48:
! 49: # define BUFSZ 2048 /* Size of stack allocated buffers when */
! 50: /* we want large buffers. */
! 51:
! 52: typedef void (* oom_fn)(void);
! 53:
! 54: # define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \
! 55: ABORT("Out of memory\n"); }
! 56: # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); }
! 57:
! 58: CORD CORD_cat_char(CORD x, char c)
! 59: {
! 60: register char * string;
! 61:
! 62: if (c == '\0') return(CORD_cat(x, CORD_nul(1)));
! 63: string = GC_MALLOC_ATOMIC(2);
! 64: if (string == 0) OUT_OF_MEMORY;
! 65: string[0] = c;
! 66: string[1] = '\0';
! 67: return(CORD_cat_char_star(x, string, 1));
! 68: }
! 69:
! 70: CORD CORD_catn(int nargs, ...)
! 71: {
! 72: register CORD result = CORD_EMPTY;
! 73: va_list args;
! 74: register int i;
! 75:
! 76: va_start(args, nargs);
! 77: for (i = 0; i < nargs; i++) {
! 78: register CORD next = va_arg(args, CORD);
! 79: result = CORD_cat(result, next);
! 80: }
! 81: va_end(args);
! 82: return(result);
! 83: }
! 84:
! 85: typedef struct {
! 86: size_t len;
! 87: size_t count;
! 88: char * buf;
! 89: } CORD_fill_data;
! 90:
! 91: int CORD_fill_proc(char c, void * client_data)
! 92: {
! 93: register CORD_fill_data * d = (CORD_fill_data *)client_data;
! 94: register size_t count = d -> count;
! 95:
! 96: (d -> buf)[count] = c;
! 97: d -> count = ++count;
! 98: if (count >= d -> len) {
! 99: return(1);
! 100: } else {
! 101: return(0);
! 102: }
! 103: }
! 104:
! 105: int CORD_batched_fill_proc(const char * s, void * client_data)
! 106: {
! 107: register CORD_fill_data * d = (CORD_fill_data *)client_data;
! 108: register size_t count = d -> count;
! 109: register size_t max = d -> len;
! 110: register char * buf = d -> buf;
! 111: register const char * t = s;
! 112:
! 113: while((buf[count] = *t++) != '\0') {
! 114: count++;
! 115: if (count >= max) {
! 116: d -> count = count;
! 117: return(1);
! 118: }
! 119: }
! 120: d -> count = count;
! 121: return(0);
! 122: }
! 123:
! 124: /* Fill buf with len characters starting at i. */
! 125: /* Assumes len characters are available. */
! 126: void CORD_fill_buf(CORD x, size_t i, size_t len, char * buf)
! 127: {
! 128: CORD_fill_data fd;
! 129:
! 130: fd.len = len;
! 131: fd.buf = buf;
! 132: fd.count = 0;
! 133: (void)CORD_iter5(x, i, CORD_fill_proc, CORD_batched_fill_proc, &fd);
! 134: }
! 135:
! 136: int CORD_cmp(CORD x, CORD y)
! 137: {
! 138: CORD_pos xpos;
! 139: CORD_pos ypos;
! 140: register size_t avail, yavail;
! 141:
! 142: if (y == CORD_EMPTY) return(x != CORD_EMPTY);
! 143: if (x == CORD_EMPTY) return(-1);
! 144: if (CORD_IS_STRING(y) && CORD_IS_STRING(x)) return(strcmp(x,y));
! 145: CORD_set_pos(xpos, x, 0);
! 146: CORD_set_pos(ypos, y, 0);
! 147: for(;;) {
! 148: if (!CORD_pos_valid(xpos)) {
! 149: if (CORD_pos_valid(ypos)) {
! 150: return(-1);
! 151: } else {
! 152: return(0);
! 153: }
! 154: }
! 155: if (!CORD_pos_valid(ypos)) {
! 156: return(1);
! 157: }
! 158: if ((avail = CORD_pos_chars_left(xpos)) <= 0
! 159: || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
! 160: register char xcurrent = CORD_pos_fetch(xpos);
! 161: register char ycurrent = CORD_pos_fetch(ypos);
! 162: if (xcurrent != ycurrent) return(xcurrent - ycurrent);
! 163: CORD_next(xpos);
! 164: CORD_next(ypos);
! 165: } else {
! 166: /* process as many characters as we can */
! 167: register int result;
! 168:
! 169: if (avail > yavail) avail = yavail;
! 170: result = strncmp(CORD_pos_cur_char_addr(xpos),
! 171: CORD_pos_cur_char_addr(ypos), avail);
! 172: if (result != 0) return(result);
! 173: CORD_pos_advance(xpos, avail);
! 174: CORD_pos_advance(ypos, avail);
! 175: }
! 176: }
! 177: }
! 178:
! 179: int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len)
! 180: {
! 181: CORD_pos xpos;
! 182: CORD_pos ypos;
! 183: register size_t count;
! 184: register long avail, yavail;
! 185:
! 186: CORD_set_pos(xpos, x, x_start);
! 187: CORD_set_pos(ypos, y, y_start);
! 188: for(count = 0; count < len;) {
! 189: if (!CORD_pos_valid(xpos)) {
! 190: if (CORD_pos_valid(ypos)) {
! 191: return(-1);
! 192: } else {
! 193: return(0);
! 194: }
! 195: }
! 196: if (!CORD_pos_valid(ypos)) {
! 197: return(1);
! 198: }
! 199: if ((avail = CORD_pos_chars_left(xpos)) <= 0
! 200: || (yavail = CORD_pos_chars_left(ypos)) <= 0) {
! 201: register char xcurrent = CORD_pos_fetch(xpos);
! 202: register char ycurrent = CORD_pos_fetch(ypos);
! 203: if (xcurrent != ycurrent) return(xcurrent - ycurrent);
! 204: CORD_next(xpos);
! 205: CORD_next(ypos);
! 206: count++;
! 207: } else {
! 208: /* process as many characters as we can */
! 209: register int result;
! 210:
! 211: if (avail > yavail) avail = yavail;
! 212: count += avail;
! 213: if (count > len) avail -= (count - len);
! 214: result = strncmp(CORD_pos_cur_char_addr(xpos),
! 215: CORD_pos_cur_char_addr(ypos), (size_t)avail);
! 216: if (result != 0) return(result);
! 217: CORD_pos_advance(xpos, (size_t)avail);
! 218: CORD_pos_advance(ypos, (size_t)avail);
! 219: }
! 220: }
! 221: return(0);
! 222: }
! 223:
! 224: char * CORD_to_char_star(CORD x)
! 225: {
! 226: register size_t len = CORD_len(x);
! 227: char * result = GC_MALLOC_ATOMIC(len + 1);
! 228:
! 229: if (result == 0) OUT_OF_MEMORY;
! 230: CORD_fill_buf(x, 0, len, result);
! 231: result[len] = '\0';
! 232: return(result);
! 233: }
! 234:
! 235: CORD CORD_from_char_star(const char *s)
! 236: {
! 237: char * result;
! 238: size_t len = strlen(s);
! 239:
! 240: if (0 == len) return(CORD_EMPTY);
! 241: result = GC_MALLOC_ATOMIC(len + 1);
! 242: if (result == 0) OUT_OF_MEMORY;
! 243: memcpy(result, s, len+1);
! 244: return(result);
! 245: }
! 246:
! 247: const char * CORD_to_const_char_star(CORD x)
! 248: {
! 249: if (x == 0) return("");
! 250: if (CORD_IS_STRING(x)) return((const char *)x);
! 251: return(CORD_to_char_star(x));
! 252: }
! 253:
! 254: char CORD_fetch(CORD x, size_t i)
! 255: {
! 256: CORD_pos xpos;
! 257:
! 258: CORD_set_pos(xpos, x, i);
! 259: if (!CORD_pos_valid(xpos)) ABORT("bad index?");
! 260: return(CORD_pos_fetch(xpos));
! 261: }
! 262:
! 263:
! 264: int CORD_put_proc(char c, void * client_data)
! 265: {
! 266: register FILE * f = (FILE *)client_data;
! 267:
! 268: return(putc(c, f) == EOF);
! 269: }
! 270:
! 271: int CORD_batched_put_proc(const char * s, void * client_data)
! 272: {
! 273: register FILE * f = (FILE *)client_data;
! 274:
! 275: return(fputs(s, f) == EOF);
! 276: }
! 277:
! 278:
! 279: int CORD_put(CORD x, FILE * f)
! 280: {
! 281: if (CORD_iter5(x, 0, CORD_put_proc, CORD_batched_put_proc, f)) {
! 282: return(EOF);
! 283: } else {
! 284: return(1);
! 285: }
! 286: }
! 287:
! 288: typedef struct {
! 289: size_t pos; /* Current position in the cord */
! 290: char target; /* Character we're looking for */
! 291: } chr_data;
! 292:
! 293: int CORD_chr_proc(char c, void * client_data)
! 294: {
! 295: register chr_data * d = (chr_data *)client_data;
! 296:
! 297: if (c == d -> target) return(1);
! 298: (d -> pos) ++;
! 299: return(0);
! 300: }
! 301:
! 302: int CORD_rchr_proc(char c, void * client_data)
! 303: {
! 304: register chr_data * d = (chr_data *)client_data;
! 305:
! 306: if (c == d -> target) return(1);
! 307: (d -> pos) --;
! 308: return(0);
! 309: }
! 310:
! 311: int CORD_batched_chr_proc(const char *s, void * client_data)
! 312: {
! 313: register chr_data * d = (chr_data *)client_data;
! 314: register char * occ = strchr(s, d -> target);
! 315:
! 316: if (occ == 0) {
! 317: d -> pos += strlen(s);
! 318: return(0);
! 319: } else {
! 320: d -> pos += occ - s;
! 321: return(1);
! 322: }
! 323: }
! 324:
! 325: size_t CORD_chr(CORD x, size_t i, int c)
! 326: {
! 327: chr_data d;
! 328:
! 329: d.pos = i;
! 330: d.target = c;
! 331: if (CORD_iter5(x, i, CORD_chr_proc, CORD_batched_chr_proc, &d)) {
! 332: return(d.pos);
! 333: } else {
! 334: return(CORD_NOT_FOUND);
! 335: }
! 336: }
! 337:
! 338: size_t CORD_rchr(CORD x, size_t i, int c)
! 339: {
! 340: chr_data d;
! 341:
! 342: d.pos = i;
! 343: d.target = c;
! 344: if (CORD_riter4(x, i, CORD_rchr_proc, &d)) {
! 345: return(d.pos);
! 346: } else {
! 347: return(CORD_NOT_FOUND);
! 348: }
! 349: }
! 350:
! 351: /* Find the first occurrence of s in x at position start or later. */
! 352: /* This uses an asymptotically poor algorithm, which should typically */
! 353: /* perform acceptably. We compare the first few characters directly, */
! 354: /* and call CORD_ncmp whenever there is a partial match. */
! 355: /* This has the advantage that we allocate very little, or not at all. */
! 356: /* It's very fast if there are few close misses. */
! 357: size_t CORD_str(CORD x, size_t start, CORD s)
! 358: {
! 359: CORD_pos xpos;
! 360: size_t xlen = CORD_len(x);
! 361: size_t slen;
! 362: register size_t start_len;
! 363: const char * s_start;
! 364: unsigned long s_buf = 0; /* The first few characters of s */
! 365: unsigned long x_buf = 0; /* Start of candidate substring. */
! 366: /* Initialized only to make compilers */
! 367: /* happy. */
! 368: unsigned long mask = 0;
! 369: register size_t i;
! 370: register size_t match_pos;
! 371:
! 372: if (s == CORD_EMPTY) return(start);
! 373: if (CORD_IS_STRING(s)) {
! 374: s_start = s;
! 375: slen = strlen(s);
! 376: } else {
! 377: s_start = CORD_to_char_star(CORD_substr(s, 0, sizeof(unsigned long)));
! 378: slen = CORD_len(s);
! 379: }
! 380: if (xlen < start || xlen - start < slen) return(CORD_NOT_FOUND);
! 381: start_len = slen;
! 382: if (start_len > sizeof(unsigned long)) start_len = sizeof(unsigned long);
! 383: CORD_set_pos(xpos, x, start);
! 384: for (i = 0; i < start_len; i++) {
! 385: mask <<= 8;
! 386: mask |= 0xff;
! 387: s_buf <<= 8;
! 388: s_buf |= s_start[i];
! 389: x_buf <<= 8;
! 390: x_buf |= CORD_pos_fetch(xpos);
! 391: CORD_next(xpos);
! 392: }
! 393: for (match_pos = start; ; match_pos++) {
! 394: if ((x_buf & mask) == s_buf) {
! 395: if (slen == start_len ||
! 396: CORD_ncmp(x, match_pos + start_len,
! 397: s, start_len, slen - start_len) == 0) {
! 398: return(match_pos);
! 399: }
! 400: }
! 401: if ( match_pos == xlen - slen ) {
! 402: return(CORD_NOT_FOUND);
! 403: }
! 404: x_buf <<= 8;
! 405: x_buf |= CORD_pos_fetch(xpos);
! 406: CORD_next(xpos);
! 407: }
! 408: }
! 409:
! 410: void CORD_ec_flush_buf(CORD_ec x)
! 411: {
! 412: register size_t len = x[0].ec_bufptr - x[0].ec_buf;
! 413: char * s;
! 414:
! 415: if (len == 0) return;
! 416: s = GC_MALLOC_ATOMIC(len+1);
! 417: memcpy(s, x[0].ec_buf, len);
! 418: s[len] = '\0';
! 419: x[0].ec_cord = CORD_cat_char_star(x[0].ec_cord, s, len);
! 420: x[0].ec_bufptr = x[0].ec_buf;
! 421: }
! 422:
! 423: void CORD_ec_append_cord(CORD_ec x, CORD s)
! 424: {
! 425: CORD_ec_flush_buf(x);
! 426: x[0].ec_cord = CORD_cat(x[0].ec_cord, s);
! 427: }
! 428:
! 429: /*ARGSUSED*/
! 430: char CORD_nul_func(size_t i, void * client_data)
! 431: {
! 432: return((char)(unsigned long)client_data);
! 433: }
! 434:
! 435:
! 436: CORD CORD_chars(char c, size_t i)
! 437: {
! 438: return(CORD_from_fn(CORD_nul_func, (void *)(unsigned long)c, i));
! 439: }
! 440:
! 441: CORD CORD_from_file_eager(FILE * f)
! 442: {
! 443: register int c;
! 444: CORD_ec ecord;
! 445:
! 446: CORD_ec_init(ecord);
! 447: for(;;) {
! 448: c = getc(f);
! 449: if (c == 0) {
! 450: /* Append the right number of NULs */
! 451: /* Note that any string of NULs is rpresented in 4 words, */
! 452: /* independent of its length. */
! 453: register size_t count = 1;
! 454:
! 455: CORD_ec_flush_buf(ecord);
! 456: while ((c = getc(f)) == 0) count++;
! 457: ecord[0].ec_cord = CORD_cat(ecord[0].ec_cord, CORD_nul(count));
! 458: }
! 459: if (c == EOF) break;
! 460: CORD_ec_append(ecord, c);
! 461: }
! 462: (void) fclose(f);
! 463: return(CORD_balance(CORD_ec_to_cord(ecord)));
! 464: }
! 465:
! 466: /* The state maintained for a lazily read file consists primarily */
! 467: /* of a large direct-mapped cache of previously read values. */
! 468: /* We could rely more on stdio buffering. That would have 2 */
! 469: /* disadvantages: */
! 470: /* 1) Empirically, not all fseek implementations preserve the */
! 471: /* buffer whenever they could. */
! 472: /* 2) It would fail if 2 different sections of a long cord */
! 473: /* were being read alternately. */
! 474: /* We do use the stdio buffer for read ahead. */
! 475: /* To guarantee thread safety in the presence of atomic pointer */
! 476: /* writes, cache lines are always replaced, and never modified in */
! 477: /* place. */
! 478:
! 479: # define LOG_CACHE_SZ 14
! 480: # define CACHE_SZ (1 << LOG_CACHE_SZ)
! 481: # define LOG_LINE_SZ 9
! 482: # define LINE_SZ (1 << LOG_LINE_SZ)
! 483:
! 484: typedef struct {
! 485: size_t tag;
! 486: char data[LINE_SZ];
! 487: /* data[i%LINE_SZ] = ith char in file if tag = i/LINE_SZ */
! 488: } cache_line;
! 489:
! 490: typedef struct {
! 491: FILE * lf_file;
! 492: size_t lf_current; /* Current file pointer value */
! 493: cache_line * volatile lf_cache[CACHE_SZ/LINE_SZ];
! 494: } lf_state;
! 495:
! 496: # define MOD_CACHE_SZ(n) ((n) & (CACHE_SZ - 1))
! 497: # define DIV_CACHE_SZ(n) ((n) >> LOG_CACHE_SZ)
! 498: # define MOD_LINE_SZ(n) ((n) & (LINE_SZ - 1))
! 499: # define DIV_LINE_SZ(n) ((n) >> LOG_LINE_SZ)
! 500: # define LINE_START(n) ((n) & ~(LINE_SZ - 1))
! 501:
! 502: typedef struct {
! 503: lf_state * state;
! 504: size_t file_pos; /* Position of needed character. */
! 505: cache_line * new_cache;
! 506: } refill_data;
! 507:
! 508: /* Executed with allocation lock. */
! 509: static char refill_cache(client_data)
! 510: refill_data * client_data;
! 511: {
! 512: register lf_state * state = client_data -> state;
! 513: register size_t file_pos = client_data -> file_pos;
! 514: FILE *f = state -> lf_file;
! 515: size_t line_start = LINE_START(file_pos);
! 516: size_t line_no = DIV_LINE_SZ(MOD_CACHE_SZ(file_pos));
! 517: cache_line * new_cache = client_data -> new_cache;
! 518:
! 519: if (line_start != state -> lf_current
! 520: && fseek(f, line_start, SEEK_SET) != 0) {
! 521: ABORT("fseek failed");
! 522: }
! 523: if (fread(new_cache -> data, sizeof(char), LINE_SZ, f)
! 524: <= file_pos - line_start) {
! 525: ABORT("fread failed");
! 526: }
! 527: new_cache -> tag = DIV_LINE_SZ(file_pos);
! 528: /* Store barrier goes here. */
! 529: ATOMIC_WRITE(state -> lf_cache[line_no], new_cache);
! 530: state -> lf_current = line_start + LINE_SZ;
! 531: return(new_cache->data[MOD_LINE_SZ(file_pos)]);
! 532: }
! 533:
! 534: char CORD_lf_func(size_t i, void * client_data)
! 535: {
! 536: register lf_state * state = (lf_state *)client_data;
! 537: register cache_line * volatile * cl_addr =
! 538: &(state -> lf_cache[DIV_LINE_SZ(MOD_CACHE_SZ(i))]);
! 539: register cache_line * cl = (cache_line *)ATOMIC_READ(cl_addr);
! 540:
! 541: if (cl == 0 || cl -> tag != DIV_LINE_SZ(i)) {
! 542: /* Cache miss */
! 543: refill_data rd;
! 544:
! 545: rd.state = state;
! 546: rd.file_pos = i;
! 547: rd.new_cache = GC_NEW_ATOMIC(cache_line);
! 548: if (rd.new_cache == 0) OUT_OF_MEMORY;
! 549: return((char)(GC_word)
! 550: GC_call_with_alloc_lock((GC_fn_type) refill_cache, &rd));
! 551: }
! 552: return(cl -> data[MOD_LINE_SZ(i)]);
! 553: }
! 554:
! 555: /*ARGSUSED*/
! 556: void CORD_lf_close_proc(void * obj, void * client_data)
! 557: {
! 558: if (fclose(((lf_state *)obj) -> lf_file) != 0) {
! 559: ABORT("CORD_lf_close_proc: fclose failed");
! 560: }
! 561: }
! 562:
! 563: CORD CORD_from_file_lazy_inner(FILE * f, size_t len)
! 564: {
! 565: register lf_state * state = GC_NEW(lf_state);
! 566: register int i;
! 567:
! 568: if (state == 0) OUT_OF_MEMORY;
! 569: if (len != 0) {
! 570: /* Dummy read to force buffer allocation. */
! 571: /* This greatly increases the probability */
! 572: /* of avoiding deadlock if buffer allocation */
! 573: /* is redirected to GC_malloc and the */
! 574: /* world is multithreaded. */
! 575: char buf[1];
! 576:
! 577: (void) fread(buf, 1, 1, f);
! 578: rewind(f);
! 579: }
! 580: state -> lf_file = f;
! 581: for (i = 0; i < CACHE_SZ/LINE_SZ; i++) {
! 582: state -> lf_cache[i] = 0;
! 583: }
! 584: state -> lf_current = 0;
! 585: GC_REGISTER_FINALIZER(state, CORD_lf_close_proc, 0, 0, 0);
! 586: return(CORD_from_fn(CORD_lf_func, state, len));
! 587: }
! 588:
! 589: CORD CORD_from_file_lazy(FILE * f)
! 590: {
! 591: register long len;
! 592:
! 593: if (fseek(f, 0l, SEEK_END) != 0) {
! 594: ABORT("Bad fd argument - fseek failed");
! 595: }
! 596: if ((len = ftell(f)) < 0) {
! 597: ABORT("Bad fd argument - ftell failed");
! 598: }
! 599: rewind(f);
! 600: return(CORD_from_file_lazy_inner(f, (size_t)len));
! 601: }
! 602:
! 603: # define LAZY_THRESHOLD (128*1024 + 1)
! 604:
! 605: CORD CORD_from_file(FILE * f)
! 606: {
! 607: register long len;
! 608:
! 609: if (fseek(f, 0l, SEEK_END) != 0) {
! 610: ABORT("Bad fd argument - fseek failed");
! 611: }
! 612: if ((len = ftell(f)) < 0) {
! 613: ABORT("Bad fd argument - ftell failed");
! 614: }
! 615: rewind(f);
! 616: if (len < LAZY_THRESHOLD) {
! 617: return(CORD_from_file_eager(f));
! 618: } else {
! 619: return(CORD_from_file_lazy_inner(f, (size_t)len));
! 620: }
! 621: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>