=================================================================== RCS file: /home/cvs/OpenXM_contrib/gc/Attic/gc_hdrs.h,v retrieving revision 1.1.1.2 retrieving revision 1.1.1.3 diff -u -p -r1.1.1.2 -r1.1.1.3 --- OpenXM_contrib/gc/Attic/gc_hdrs.h 2000/04/14 11:07:59 1.1.1.2 +++ OpenXM_contrib/gc/Attic/gc_hdrs.h 2000/12/01 14:48:26 1.1.1.3 @@ -24,6 +24,17 @@ typedef struct hblkhdr hdr; * The 2 level tree data structure that is used to find block headers. * If there are more than 32 bits in a pointer, the top level is a hash * table. + * + * This defines HDR, GET_HDR, and SET_HDR, the main macros used to + * retrieve and set object headers. We also define some variants to + * retrieve 2 unrelated headers in interleaved fashion. This + * slightly improves scheduling. + * + * Since 5.0 alpha 5, we can also take advantage of a header lookup + * cache. This is a locally declared direct mapped cache, used inside + * the marker. The HC_GET_HDR and HC_GET_HDR2 macros use and maintain this + * cache. Assuming we get reasonable hit rates, this shaves a few + * memory references from each pointer validation. */ # if CPP_WORDSZ > 32 @@ -45,6 +56,127 @@ typedef struct hblkhdr hdr; # define TOP_SZ (1 << LOG_TOP_SZ) # define BOTTOM_SZ (1 << LOG_BOTTOM_SZ) +#ifndef SMALL_CONFIG +# define USE_HDR_CACHE +#endif + +/* #define COUNT_HDR_CACHE_HITS */ + +extern hdr * GC_invalid_header; /* header for an imaginary block */ + /* containing no objects. */ + + +/* Check whether p and corresponding hhdr point to long or invalid */ +/* object. If so, advance them to */ +/* beginning of block, or set hhdr to GC_invalid_header. */ +#define ADVANCE(p, hhdr, source) \ + if (IS_FORWARDING_ADDR_OR_NIL(hhdr)) { \ + p = GC_FIND_START(p, hhdr, (word)source); \ + if (p == 0) { \ + hhdr = GC_invalid_header; \ + } else { \ + hhdr = GC_find_header(p); \ + } \ + } + +#ifdef USE_HDR_CACHE + +# ifdef COUNT_HDR_CACHE_HITS + extern word GC_hdr_cache_hits; + extern word GC_hdr_cache_misses; +# define HC_HIT() ++GC_hdr_cache_hits +# define HC_MISS() ++GC_hdr_cache_misses +# else +# define HC_HIT() +# define HC_MISS() +# endif + + typedef struct hce { + word block_addr; /* right shifted by LOG_HBLKSIZE */ + hdr * hce_hdr; + } hdr_cache_entry; + +# define HDR_CACHE_SIZE 8 /* power of 2 */ + +# define DECLARE_HDR_CACHE \ + hdr_cache_entry hdr_cache[HDR_CACHE_SIZE] + +# define INIT_HDR_CACHE BZERO(hdr_cache, sizeof(hdr_cache)); + +# define HCE(h) hdr_cache + (((word)(h) >> LOG_HBLKSIZE) & (HDR_CACHE_SIZE-1)) + +# define HCE_VALID_FOR(hce,h) ((hce) -> block_addr == \ + ((word)(h) >> LOG_HBLKSIZE)) + +# define HCE_HDR(h) ((hce) -> hce_hdr) + + +/* Analogous to GET_HDR, except that in the case of large objects, it */ +/* Returns the header for the object beginning, and updates p. */ +/* Returns &GC_bad_header instead of 0. All of this saves a branch */ +/* in the fast path. */ +# define HC_GET_HDR(p, hhdr, source) \ + { \ + hdr_cache_entry * hce = HCE(p); \ + if (HCE_VALID_FOR(hce, p)) { \ + HC_HIT(); \ + hhdr = hce -> hce_hdr; \ + } else { \ + HC_MISS(); \ + GET_HDR(p, hhdr); \ + ADVANCE(p, hhdr, source); \ + hce -> block_addr = (word)(p) >> LOG_HBLKSIZE; \ + hce -> hce_hdr = hhdr; \ + } \ + } + +# define HC_GET_HDR2(p1, hhdr1, source1, p2, hhdr2, source2) \ + { \ + hdr_cache_entry * hce1 = HCE(p1); \ + hdr_cache_entry * hce2 = HCE(p2); \ + if (HCE_VALID_FOR(hce1, p1)) { \ + HC_HIT(); \ + hhdr1 = hce1 -> hce_hdr; \ + } else { \ + HC_MISS(); \ + GET_HDR(p1, hhdr1); \ + ADVANCE(p1, hhdr1, source1); \ + hce1 -> block_addr = (word)(p1) >> LOG_HBLKSIZE; \ + hce1 -> hce_hdr = hhdr1; \ + } \ + if (HCE_VALID_FOR(hce2, p2)) { \ + HC_HIT(); \ + hhdr2 = hce2 -> hce_hdr; \ + } else { \ + HC_MISS(); \ + GET_HDR(p2, hhdr2); \ + ADVANCE(p2, hhdr2, source2); \ + hce2 -> block_addr = (word)(p2) >> LOG_HBLKSIZE; \ + hce2 -> hce_hdr = hhdr2; \ + } \ + } + +#else /* !USE_HDR_CACHE */ + +# define DECLARE_HDR_CACHE + +# define INIT_HDR_CACHE + +# define HC_GET_HDR(p, hhdr, source) \ + { \ + GET_HDR(p, hhdr); \ + ADVANCE(p, hhdr, source); \ + } + +# define HC_GET_HDR2(p1, hhdr1, source1, p2, hhdr2, source2) \ + { \ + GET_HDR2(p1, hhdr1, p2, hhdr2); \ + ADVANCE(p1, hhdr1, source1); \ + ADVANCE(p2, hhdr2, source2); \ + } + +#endif + typedef struct bi { hdr * index[BOTTOM_SZ]; /* @@ -97,6 +229,8 @@ typedef struct bi { # define GET_HDR(p, hhdr) (hhdr) = HDR(p) # define SET_HDR(p, hhdr) HDR_INNER(p) = (hhdr) # define GET_HDR_ADDR(p, ha) (ha) = &(HDR_INNER(p)) +# define GET_HDR2(p1, hhdr1, p2, hhdr2) \ + { GET_HDR(p1, hhdr1); GET_HDR(p2, hhdr2); } # else /* hash */ /* Hash function for tree top level */ # define TL_HASH(hi) ((hi) & (TOP_SZ - 1)) @@ -123,6 +257,40 @@ typedef struct bi { # define SET_HDR(p, hhdr) { register hdr ** _ha; GET_HDR_ADDR(p, _ha); \ *_ha = (hhdr); } # define HDR(p) GC_find_header((ptr_t)(p)) + /* And some interleaved versions for two pointers at once. */ + /* This hopefully helps scheduling on processors like IA64. */ +# define GET_BI2(p1, bottom_indx1, p2, bottom_indx2) \ + { \ + register word hi1 = \ + (word)(p1) >> (LOG_BOTTOM_SZ + LOG_HBLKSIZE); \ + register word hi2 = \ + (word)(p2) >> (LOG_BOTTOM_SZ + LOG_HBLKSIZE); \ + register bottom_index * _bi1 = GC_top_index[TL_HASH(hi1)]; \ + register bottom_index * _bi2 = GC_top_index[TL_HASH(hi2)]; \ + \ + while (_bi1 -> key != hi1 && _bi1 != GC_all_nils) \ + _bi1 = _bi1 -> hash_link; \ + while (_bi2 -> key != hi2 && _bi2 != GC_all_nils) \ + _bi2 = _bi2 -> hash_link; \ + (bottom_indx1) = _bi1; \ + (bottom_indx2) = _bi2; \ + } +# define GET_HDR_ADDR2(p1, ha1, p2, ha2) \ + { \ + register bottom_index * bi1; \ + register bottom_index * bi2; \ + \ + GET_BI2(p1, bi1, p2, bi2); \ + (ha1) = &(HDR_FROM_BI(bi1, p1)); \ + (ha2) = &(HDR_FROM_BI(bi2, p2)); \ + } +# define GET_HDR2(p1, hhdr1, p2, hhdr2) \ + { register hdr ** _ha1; \ + register hdr ** _ha2; \ + GET_HDR_ADDR2(p1, _ha1, p2, _ha2); \ + (hhdr1) = *_ha1; \ + (hhdr2) = *_ha2; \ + } # endif /* Is the result a forwarding address to someplace closer to the */