Annotation of OpenXM_contrib2/asir2000/gc/include/cord.h, Revision 1.1
1.1 ! noro 1: /*
! 2: * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
! 3: *
! 4: * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
! 5: * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
! 6: *
! 7: * Permission is hereby granted to use or copy this program
! 8: * for any purpose, provided the above notices are retained on all copies.
! 9: * Permission to modify the code and to distribute modified code is granted,
! 10: * provided the above notices are retained, and a notice that the code was
! 11: * modified is included with the above copyright notice.
! 12: *
! 13: * Author: Hans-J. Boehm (boehm@parc.xerox.com)
! 14: */
! 15: /* Boehm, October 5, 1995 4:20 pm PDT */
! 16:
! 17: /*
! 18: * Cords are immutable character strings. A number of operations
! 19: * on long cords are much more efficient than their strings.h counterpart.
! 20: * In particular, concatenation takes constant time independent of the length
! 21: * of the arguments. (Cords are represented as trees, with internal
! 22: * nodes representing concatenation and leaves consisting of either C
! 23: * strings or a functional description of the string.)
! 24: *
! 25: * The following are reasonable applications of cords. They would perform
! 26: * unacceptably if C strings were used:
! 27: * - A compiler that produces assembly language output by repeatedly
! 28: * concatenating instructions onto a cord representing the output file.
! 29: * - A text editor that converts the input file to a cord, and then
! 30: * performs editing operations by producing a new cord representing
! 31: * the file after echa character change (and keeping the old ones in an
! 32: * edit history)
! 33: *
! 34: * For optimal performance, cords should be built by
! 35: * concatenating short sections.
! 36: * This interface is designed for maximum compatibility with C strings.
! 37: * ASCII NUL characters may be embedded in cords using CORD_from_fn.
! 38: * This is handled correctly, but CORD_to_char_star will produce a string
! 39: * with embedded NULs when given such a cord.
! 40: *
! 41: * This interface is fairly big, largely for performance reasons.
! 42: * The most basic constants and functions:
! 43: *
! 44: * CORD - the type fo a cord;
! 45: * CORD_EMPTY - empty cord;
! 46: * CORD_len(cord) - length of a cord;
! 47: * CORD_cat(cord1,cord2) - concatenation of two cords;
! 48: * CORD_substr(cord, start, len) - substring (or subcord);
! 49: * CORD_pos i; CORD_FOR(i, cord) { ... CORD_pos_fetch(i) ... } -
! 50: * examine each character in a cord. CORD_pos_fetch(i) is the char.
! 51: * CORD_fetch(int i) - Retrieve i'th character (slowly).
! 52: * CORD_cmp(cord1, cord2) - compare two cords.
! 53: * CORD_from_file(FILE * f) - turn a read-only file into a cord.
! 54: * CORD_to_char_star(cord) - convert to C string.
! 55: * (Non-NULL C constant strings are cords.)
! 56: * CORD_printf (etc.) - cord version of printf. Use %r for cords.
! 57: */
! 58: # ifndef CORD_H
! 59:
! 60: # define CORD_H
! 61: # include <stddef.h>
! 62: # include <stdio.h>
! 63: /* Cords have type const char *. This is cheating quite a bit, and not */
! 64: /* 100% portable. But it means that nonempty character string */
! 65: /* constants may be used as cords directly, provided the string is */
! 66: /* never modified in place. The empty cord is represented by, and */
! 67: /* can be written as, 0. */
! 68:
! 69: typedef const char * CORD;
! 70:
! 71: /* An empty cord is always represented as nil */
! 72: # define CORD_EMPTY 0
! 73:
! 74: /* Is a nonempty cord represented as a C string? */
! 75: #define CORD_IS_STRING(s) (*(s) != '\0')
! 76:
! 77: /* Concatenate two cords. If the arguments are C strings, they may */
! 78: /* not be subsequently altered. */
! 79: CORD CORD_cat(CORD x, CORD y);
! 80:
! 81: /* Concatenate a cord and a C string with known length. Except for the */
! 82: /* empty string case, this is a special case of CORD_cat. Since the */
! 83: /* length is known, it can be faster. */
! 84: /* The string y is shared with the resulting CORD. Hence it should */
! 85: /* not be altered by the caller. */
! 86: CORD CORD_cat_char_star(CORD x, const char * y, size_t leny);
! 87:
! 88: /* Compute the length of a cord */
! 89: size_t CORD_len(CORD x);
! 90:
! 91: /* Cords may be represented by functions defining the ith character */
! 92: typedef char (* CORD_fn)(size_t i, void * client_data);
! 93:
! 94: /* Turn a functional description into a cord. */
! 95: CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len);
! 96:
! 97: /* Return the substring (subcord really) of x with length at most n, */
! 98: /* starting at position i. (The initial character has position 0.) */
! 99: CORD CORD_substr(CORD x, size_t i, size_t n);
! 100:
! 101: /* Return the argument, but rebalanced to allow more efficient */
! 102: /* character retrieval, substring operations, and comparisons. */
! 103: /* This is useful only for cords that were built using repeated */
! 104: /* concatenation. Guarantees log time access to the result, unless */
! 105: /* x was obtained through a large number of repeated substring ops */
! 106: /* or the embedded functional descriptions take longer to evaluate. */
! 107: /* May reallocate significant parts of the cord. The argument is not */
! 108: /* modified; only the result is balanced. */
! 109: CORD CORD_balance(CORD x);
! 110:
! 111: /* The following traverse a cord by applying a function to each */
! 112: /* character. This is occasionally appropriate, especially where */
! 113: /* speed is crucial. But, since C doesn't have nested functions, */
! 114: /* clients of this sort of traversal are clumsy to write. Consider */
! 115: /* the functions that operate on cord positions instead. */
! 116:
! 117: /* Function to iteratively apply to individual characters in cord. */
! 118: typedef int (* CORD_iter_fn)(char c, void * client_data);
! 119:
! 120: /* Function to apply to substrings of a cord. Each substring is a */
! 121: /* a C character string, not a general cord. */
! 122: typedef int (* CORD_batched_iter_fn)(const char * s, void * client_data);
! 123: # define CORD_NO_FN ((CORD_batched_iter_fn)0)
! 124:
! 125: /* Apply f1 to each character in the cord, in ascending order, */
! 126: /* starting at position i. If */
! 127: /* f2 is not CORD_NO_FN, then multiple calls to f1 may be replaced by */
! 128: /* a single call to f2. The parameter f2 is provided only to allow */
! 129: /* some optimization by the client. This terminates when the right */
! 130: /* end of this string is reached, or when f1 or f2 return != 0. In the */
! 131: /* latter case CORD_iter returns != 0. Otherwise it returns 0. */
! 132: /* The specified value of i must be < CORD_len(x). */
! 133: int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
! 134: CORD_batched_iter_fn f2, void * client_data);
! 135:
! 136: /* A simpler version that starts at 0, and without f2: */
! 137: int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data);
! 138: # define CORD_iter(x, f1, cd) CORD_iter5(x, 0, f1, CORD_NO_FN, cd)
! 139:
! 140: /* Similar to CORD_iter5, but end-to-beginning. No provisions for */
! 141: /* CORD_batched_iter_fn. */
! 142: int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data);
! 143:
! 144: /* A simpler version that starts at the end: */
! 145: int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data);
! 146:
! 147: /* Functions that operate on cord positions. The easy way to traverse */
! 148: /* cords. A cord position is logically a pair consisting of a cord */
! 149: /* and an index into that cord. But it is much faster to retrieve a */
! 150: /* charcter based on a position than on an index. Unfortunately, */
! 151: /* positions are big (order of a few 100 bytes), so allocate them with */
! 152: /* caution. */
! 153: /* Things in cord_pos.h should be treated as opaque, except as */
! 154: /* described below. Also note that */
! 155: /* CORD_pos_fetch, CORD_next and CORD_prev have both macro and function */
! 156: /* definitions. The former may evaluate their argument more than once. */
! 157: # include "private/cord_pos.h"
! 158:
! 159: /*
! 160: Visible definitions from above:
! 161:
! 162: typedef <OPAQUE but fairly big> CORD_pos[1];
! 163:
! 164: * Extract the cord from a position:
! 165: CORD CORD_pos_to_cord(CORD_pos p);
! 166:
! 167: * Extract the current index from a position:
! 168: size_t CORD_pos_to_index(CORD_pos p);
! 169:
! 170: * Fetch the character located at the given position:
! 171: char CORD_pos_fetch(CORD_pos p);
! 172:
! 173: * Initialize the position to refer to the given cord and index.
! 174: * Note that this is the most expensive function on positions:
! 175: void CORD_set_pos(CORD_pos p, CORD x, size_t i);
! 176:
! 177: * Advance the position to the next character.
! 178: * P must be initialized and valid.
! 179: * Invalidates p if past end:
! 180: void CORD_next(CORD_pos p);
! 181:
! 182: * Move the position to the preceding character.
! 183: * P must be initialized and valid.
! 184: * Invalidates p if past beginning:
! 185: void CORD_prev(CORD_pos p);
! 186:
! 187: * Is the position valid, i.e. inside the cord?
! 188: int CORD_pos_valid(CORD_pos p);
! 189: */
! 190: # define CORD_FOR(pos, cord) \
! 191: for (CORD_set_pos(pos, cord, 0); CORD_pos_valid(pos); CORD_next(pos))
! 192:
! 193:
! 194: /* An out of memory handler to call. May be supplied by client. */
! 195: /* Must not return. */
! 196: extern void (* CORD_oom_fn)(void);
! 197:
! 198: /* Dump the representation of x to stdout in an implementation defined */
! 199: /* manner. Intended for debugging only. */
! 200: void CORD_dump(CORD x);
! 201:
! 202: /* The following could easily be implemented by the client. They are */
! 203: /* provided in cordxtra.c for convenience. */
! 204:
! 205: /* Concatenate a character to the end of a cord. */
! 206: CORD CORD_cat_char(CORD x, char c);
! 207:
! 208: /* Concatenate n cords. */
! 209: CORD CORD_catn(int n, /* CORD */ ...);
! 210:
! 211: /* Return the character in CORD_substr(x, i, 1) */
! 212: char CORD_fetch(CORD x, size_t i);
! 213:
! 214: /* Return < 0, 0, or > 0, depending on whether x < y, x = y, x > y */
! 215: int CORD_cmp(CORD x, CORD y);
! 216:
! 217: /* A generalization that takes both starting positions for the */
! 218: /* comparison, and a limit on the number of characters to be compared. */
! 219: int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len);
! 220:
! 221: /* Find the first occurrence of s in x at position start or later. */
! 222: /* Return the position of the first character of s in x, or */
! 223: /* CORD_NOT_FOUND if there is none. */
! 224: size_t CORD_str(CORD x, size_t start, CORD s);
! 225:
! 226: /* Return a cord consisting of i copies of (possibly NUL) c. Dangerous */
! 227: /* in conjunction with CORD_to_char_star. */
! 228: /* The resulting representation takes constant space, independent of i. */
! 229: CORD CORD_chars(char c, size_t i);
! 230: # define CORD_nul(i) CORD_chars('\0', (i))
! 231:
! 232: /* Turn a file into cord. The file must be seekable. Its contents */
! 233: /* must remain constant. The file may be accessed as an immediate */
! 234: /* result of this call and/or as a result of subsequent accesses to */
! 235: /* the cord. Short files are likely to be immediately read, but */
! 236: /* long files are likely to be read on demand, possibly relying on */
! 237: /* stdio for buffering. */
! 238: /* We must have exclusive access to the descriptor f, i.e. we may */
! 239: /* read it at any time, and expect the file pointer to be */
! 240: /* where we left it. Normally this should be invoked as */
! 241: /* CORD_from_file(fopen(...)) */
! 242: /* CORD_from_file arranges to close the file descriptor when it is no */
! 243: /* longer needed (e.g. when the result becomes inaccessible). */
! 244: /* The file f must be such that ftell reflects the actual character */
! 245: /* position in the file, i.e. the number of characters that can be */
! 246: /* or were read with fread. On UNIX systems this is always true. On */
! 247: /* MS Windows systems, f must be opened in binary mode. */
! 248: CORD CORD_from_file(FILE * f);
! 249:
! 250: /* Equivalent to the above, except that the entire file will be read */
! 251: /* and the file pointer will be closed immediately. */
! 252: /* The binary mode restriction from above does not apply. */
! 253: CORD CORD_from_file_eager(FILE * f);
! 254:
! 255: /* Equivalent to the above, except that the file will be read on demand.*/
! 256: /* The binary mode restriction applies. */
! 257: CORD CORD_from_file_lazy(FILE * f);
! 258:
! 259: /* Turn a cord into a C string. The result shares no structure with */
! 260: /* x, and is thus modifiable. */
! 261: char * CORD_to_char_star(CORD x);
! 262:
! 263: /* Turn a C string into a CORD. The C string is copied, and so may */
! 264: /* subsequently be modified. */
! 265: CORD CORD_from_char_star(const char *s);
! 266:
! 267: /* Identical to the above, but the result may share structure with */
! 268: /* the argument and is thus not modifiable. */
! 269: const char * CORD_to_const_char_star(CORD x);
! 270:
! 271: /* Write a cord to a file, starting at the current position. No */
! 272: /* trailing NULs are newlines are added. */
! 273: /* Returns EOF if a write error occurs, 1 otherwise. */
! 274: int CORD_put(CORD x, FILE * f);
! 275:
! 276: /* "Not found" result for the following two functions. */
! 277: # define CORD_NOT_FOUND ((size_t)(-1))
! 278:
! 279: /* A vague analog of strchr. Returns the position (an integer, not */
! 280: /* a pointer) of the first occurrence of (char) c inside x at position */
! 281: /* i or later. The value i must be < CORD_len(x). */
! 282: size_t CORD_chr(CORD x, size_t i, int c);
! 283:
! 284: /* A vague analog of strrchr. Returns index of the last occurrence */
! 285: /* of (char) c inside x at position i or earlier. The value i */
! 286: /* must be < CORD_len(x). */
! 287: size_t CORD_rchr(CORD x, size_t i, int c);
! 288:
! 289:
! 290: /* The following are also not primitive, but are implemented in */
! 291: /* cordprnt.c. They provide functionality similar to the ANSI C */
! 292: /* functions with corresponding names, but with the following */
! 293: /* additions and changes: */
! 294: /* 1. A %r conversion specification specifies a CORD argument. Field */
! 295: /* width, precision, etc. have the same semantics as for %s. */
! 296: /* (Note that %c,%C, and %S were already taken.) */
! 297: /* 2. The format string is represented as a CORD. */
! 298: /* 3. CORD_sprintf and CORD_vsprintf assign the result through the 1st */ /* argument. Unlike their ANSI C versions, there is no need to guess */
! 299: /* the correct buffer size. */
! 300: /* 4. Most of the conversions are implement through the native */
! 301: /* vsprintf. Hence they are usually no faster, and */
! 302: /* idiosyncracies of the native printf are preserved. However, */
! 303: /* CORD arguments to CORD_sprintf and CORD_vsprintf are NOT copied; */
! 304: /* the result shares the original structure. This may make them */
! 305: /* very efficient in some unusual applications. */
! 306: /* The format string is copied. */
! 307: /* All functions return the number of characters generated or -1 on */
! 308: /* error. This complies with the ANSI standard, but is inconsistent */
! 309: /* with some older implementations of sprintf. */
! 310:
! 311: /* The implementation of these is probably less portable than the rest */
! 312: /* of this package. */
! 313:
! 314: #ifndef CORD_NO_IO
! 315:
! 316: #include <stdarg.h>
! 317:
! 318: int CORD_sprintf(CORD * out, CORD format, ...);
! 319: int CORD_vsprintf(CORD * out, CORD format, va_list args);
! 320: int CORD_fprintf(FILE * f, CORD format, ...);
! 321: int CORD_vfprintf(FILE * f, CORD format, va_list args);
! 322: int CORD_printf(CORD format, ...);
! 323: int CORD_vprintf(CORD format, va_list args);
! 324:
! 325: #endif /* CORD_NO_IO */
! 326:
! 327: # endif /* CORD_H */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>