=================================================================== RCS file: /home/cvs/OpenXM_contrib2/asir2000/gc/mark.c,v retrieving revision 1.4 retrieving revision 1.7 diff -u -p -r1.4 -r1.7 --- OpenXM_contrib2/asir2000/gc/mark.c 2001/04/20 07:39:19 1.4 +++ OpenXM_contrib2/asir2000/gc/mark.c 2003/06/24 05:11:33 1.7 @@ -19,6 +19,10 @@ # include # include "private/gc_pmark.h" +#if defined(MSWIN32) && defined(__GNUC__) +# include +#endif + /* We put this here to minimize the risk of inlining. */ /*VARARGS*/ #ifdef __WATCOMC__ @@ -261,20 +265,20 @@ static void alloc_mark_stack(); /* remains valid until all marking is complete. */ /* A zero value indicates that it's OK to miss some */ /* register values. */ -GC_bool GC_mark_some(cold_gc_frame) -ptr_t cold_gc_frame; -{ +/* We hold the allocation lock. In the case of */ +/* incremental collection, the world may not be stopped.*/ #ifdef MSWIN32 - /* Windows 98 appears to asynchronously create and remove writable */ - /* memory mappings, for reasons we haven't yet understood. Since */ - /* we look for writable regions to determine the root set, we may */ - /* try to mark from an address range that disappeared since we */ - /* started the collection. Thus we have to recover from faults here. */ - /* This code does not appear to be necessary for Windows 95/NT/2000. */ - /* Note that this code should never generate an incremental GC write */ - /* fault. */ - __try { + /* For win32, this is called after we establish a structured */ + /* exception handler, in case Windows unmaps one of our root */ + /* segments. See below. In either case, we acquire the */ + /* allocator lock long before we get here. */ + GC_bool GC_mark_some_inner(cold_gc_frame) + ptr_t cold_gc_frame; +#else + GC_bool GC_mark_some(cold_gc_frame) + ptr_t cold_gc_frame; #endif +{ switch(GC_mark_state) { case MS_NONE: return(FALSE); @@ -395,23 +399,130 @@ ptr_t cold_gc_frame; ABORT("GC_mark_some: bad state"); return(FALSE); } +} + + #ifdef MSWIN32 - } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ? - EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { -# ifdef CONDPRINT - if (GC_print_stats) { - GC_printf0("Caught ACCESS_VIOLATION in marker. " - "Memory mapping disappeared.\n"); + +# ifdef __GNUC__ + + typedef struct { + EXCEPTION_REGISTRATION ex_reg; + void *alt_path; + } ext_ex_regn; + + + static EXCEPTION_DISPOSITION mark_ex_handler( + struct _EXCEPTION_RECORD *ex_rec, + void *est_frame, + struct _CONTEXT *context, + void *disp_ctxt) + { + if (ex_rec->ExceptionCode == STATUS_ACCESS_VIOLATION) { + ext_ex_regn *xer = (ext_ex_regn *)est_frame; + + /* Unwind from the inner function assuming the standard */ + /* function prologue. */ + /* Assumes code has not been compiled with */ + /* -fomit-frame-pointer. */ + context->Esp = context->Ebp; + context->Ebp = *((DWORD *)context->Esp); + context->Esp = context->Esp - 8; + + /* Resume execution at the "real" handler within the */ + /* wrapper function. */ + context->Eip = (DWORD )(xer->alt_path); + + return ExceptionContinueExecution; + + } else { + return ExceptionContinueSearch; + } + } +# endif /* __GNUC__ */ + + + GC_bool GC_mark_some(cold_gc_frame) + ptr_t cold_gc_frame; + { + GC_bool ret_val; + +# ifndef __GNUC__ + /* Windows 98 appears to asynchronously create and remove */ + /* writable memory mappings, for reasons we haven't yet */ + /* understood. Since we look for writable regions to */ + /* determine the root set, we may try to mark from an */ + /* address range that disappeared since we started the */ + /* collection. Thus we have to recover from faults here. */ + /* This code does not appear to be necessary for Windows */ + /* 95/NT/2000. Note that this code should never generate */ + /* an incremental GC write fault. */ + + __try { + +# else /* __GNUC__ */ + + /* Manually install an exception handler since GCC does */ + /* not yet support Structured Exception Handling (SEH) on */ + /* Win32. */ + + ext_ex_regn er; + + er.alt_path = &&handle_ex; + er.ex_reg.handler = mark_ex_handler; + asm volatile ("movl %%fs:0, %0" : "=r" (er.ex_reg.prev)); + asm volatile ("movl %0, %%fs:0" : : "r" (&er)); + +# endif /* __GNUC__ */ + + ret_val = GC_mark_some_inner(cold_gc_frame); + +# ifndef __GNUC__ + + } __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ? + EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { + +# else /* __GNUC__ */ + + /* Prevent GCC from considering the following code unreachable */ + /* and thus eliminating it. */ + if (er.alt_path != 0) + goto rm_handler; + +handle_ex: + /* Execution resumes from here on an access violation. */ + +# endif /* __GNUC__ */ + +# ifdef CONDPRINT + if (GC_print_stats) { + GC_printf0("Caught ACCESS_VIOLATION in marker. " + "Memory mapping disappeared.\n"); + } +# endif /* CONDPRINT */ + + /* We have bad roots on the stack. Discard mark stack. */ + /* Rescan from marked objects. Redetermine roots. */ + GC_invalidate_mark_state(); + scan_ptr = 0; + + ret_val = FALSE; + +# ifndef __GNUC__ + } -# endif /* CONDPRINT */ - /* We have bad roots on the stack. Discard mark stack. */ - /* Rescan from marked objects. Redetermine roots. */ - GC_invalidate_mark_state(); - scan_ptr = 0; - return FALSE; + +# else /* __GNUC__ */ + +rm_handler: + /* Uninstall the exception handler */ + asm volatile ("mov %0, %%fs:0" : : "r" (er.ex_reg.prev)); + +# endif /* __GNUC__ */ + + return ret_val; } #endif /* MSWIN32 */ -} GC_bool GC_mark_stack_empty() @@ -427,21 +538,16 @@ GC_bool GC_mark_stack_empty() #endif /* Given a pointer to someplace other than a small object page or the */ -/* first page of a large object, return a pointer either to the */ -/* start of the large object or NIL. */ -/* In the latter case black list the address current. */ -/* Returns NIL without black listing if current points to a block */ -/* with IGNORE_OFF_PAGE set. */ +/* first page of a large object, either: */ +/* - return a pointer to somewhere in the first page of the large */ +/* object, if current points to a large object. */ +/* In this case *hhdr is replaced with a pointer to the header */ +/* for the large object. */ +/* - just return current if it does not point to a large object. */ /*ARGSUSED*/ -# ifdef PRINT_BLACK_LIST - ptr_t GC_find_start(current, hhdr, source) - word source; -# else - ptr_t GC_find_start(current, hhdr) -# define source 0 -# endif +ptr_t GC_find_start(current, hhdr, new_hdr_p) register ptr_t current; -register hdr * hhdr; +register hdr *hhdr, **new_hdr_p; { if (GC_all_interior_pointers) { if (hhdr != 0) { @@ -457,19 +563,16 @@ register hdr * hhdr; if ((word *)orig - (word *)current >= (ptrdiff_t)(hhdr->hb_sz)) { /* Pointer past the end of the block */ - GC_ADD_TO_BLACK_LIST_NORMAL((word)orig, source); - return(0); + return(orig); } + *new_hdr_p = hhdr; return(current); } else { - GC_ADD_TO_BLACK_LIST_NORMAL((word)current, source); - return(0); + return(current); } } else { - GC_ADD_TO_BLACK_LIST_NORMAL((word)current, source); - return(0); + return(current); } -# undef source } void GC_invalidate_mark_state() @@ -547,13 +650,13 @@ mse * mark_stack_limit; /* Large length. */ /* Process part of the range to avoid pushing too much on the */ /* stack. */ + GC_ASSERT(descr < (word)GC_greatest_plausible_heap_addr + - (word)GC_least_plausible_heap_addr); # ifdef PARALLEL_MARK # define SHARE_BYTES 2048 if (descr > SHARE_BYTES && GC_parallel && mark_stack_top < mark_stack_limit - 1) { int new_size = (descr/2) & ~(sizeof(word)-1); - GC_ASSERT(descr < GC_greatest_plausible_heap_addr - - GC_least_plausible_heap_addr); mark_stack_top -> mse_start = current_p; mark_stack_top -> mse_descr = new_size + sizeof(word); /* makes sure we handle */ @@ -579,6 +682,7 @@ mse * mark_stack_limit; while (descr != 0) { if ((signed_word)descr < 0) { current = *current_p; + FIXUP_POINTER(current); if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) { PREFETCH(current); HC_PUSH_CONTENTS((ptr_t)current, mark_stack_top, @@ -653,6 +757,7 @@ mse * mark_stack_limit; PREFETCH((ptr_t)limit - PREF_DIST*CACHE_LINE_SIZE); GC_ASSERT(limit >= current_p); deferred = *limit; + FIXUP_POINTER(deferred); limit = (word *)((char *)limit - ALIGNMENT); if ((ptr_t)deferred >= least_ha && (ptr_t)deferred < greatest_ha) { PREFETCH(deferred); @@ -662,6 +767,7 @@ mse * mark_stack_limit; /* Unroll once, so we don't do too many of the prefetches */ /* based on limit. */ deferred = *limit; + FIXUP_POINTER(deferred); limit = (word *)((char *)limit - ALIGNMENT); if ((ptr_t)deferred >= least_ha && (ptr_t)deferred < greatest_ha) { PREFETCH(deferred); @@ -676,6 +782,7 @@ mse * mark_stack_limit; /* Since HC_PUSH_CONTENTS expands to a lot of code, */ /* we don't. */ current = *current_p; + FIXUP_POINTER(current); PREFETCH((ptr_t)current_p + PREF_DIST*CACHE_LINE_SIZE); if ((ptr_t)current >= least_ha && (ptr_t)current < greatest_ha) { /* Prefetch the contents of the object we just pushed. It's */ @@ -727,22 +834,33 @@ mse * GC_steal_mark_stack(mse * low, mse * high, mse * mse *top = local - 1; unsigned i = 0; + /* Make sure that prior writes to the mark stack are visible. */ + /* On some architectures, the fact that the reads are */ + /* volatile should suffice. */ +# if !defined(IA64) && !defined(HP_PA) && !defined(I386) + GC_memory_barrier(); +# endif GC_ASSERT(high >= low-1 && high - low + 1 <= GC_mark_stack_size); for (p = low; p <= high && i <= max; ++p) { word descr = *(volatile word *) &(p -> mse_descr); + /* In the IA64 memory model, the following volatile store is */ + /* ordered after this read of descr. Thus a thread must read */ + /* the original nonzero value. HP_PA appears to be similar, */ + /* and if I'm reading the P4 spec correctly, X86 is probably */ + /* also OK. In some other cases we need a barrier. */ +# if !defined(IA64) && !defined(HP_PA) && !defined(I386) + GC_memory_barrier(); +# endif if (descr != 0) { *(volatile word *) &(p -> mse_descr) = 0; + /* More than one thread may get this entry, but that's only */ + /* a minor performance problem. */ ++top; top -> mse_descr = descr; top -> mse_start = p -> mse_start; GC_ASSERT( top -> mse_descr & GC_DS_TAGS != GC_DS_LENGTH || top -> mse_descr < GC_greatest_plausible_heap_addr - GC_least_plausible_heap_addr); - /* There is no synchronization here. We assume that at */ - /* least one thread will see the original descriptor. */ - /* Otherwise we need a barrier. */ - /* More than one thread may get this entry, but that's only */ - /* a minor performance problem. */ /* If this is a big object, count it as */ /* size/256 + 1 objects. */ ++i; @@ -779,7 +897,7 @@ void GC_return_mark_stack(mse * low, mse * high) BCOPY(low, my_start, stack_size * sizeof(mse)); GC_ASSERT(GC_mark_stack_top = my_top); # if !defined(IA64) && !defined(HP_PA) - GC_memory_write_barrier(); + GC_memory_barrier(); # endif /* On IA64, the volatile write acts as a release barrier. */ GC_mark_stack_top = my_top + stack_size; @@ -839,7 +957,7 @@ long GC_markers = 2; /* Normally changed by thread-li /* -specific code. */ /* Mark using the local mark stack until the global mark stack is empty */ -/* and ther are no active workers. Update GC_first_nonempty to reflect */ +/* and there are no active workers. Update GC_first_nonempty to reflect */ /* progress. */ /* Caller does not hold mark lock. */ /* Caller has already incremented GC_helper_count. We decrement it, */ @@ -919,7 +1037,7 @@ void GC_mark_local(mse *local_mark_stack, int id) return; } /* else there's something on the stack again, or */ - /* another help may push something. */ + /* another helper may push something. */ GC_active_count++; GC_ASSERT(GC_active_count > 0); GC_release_mark_lock(); @@ -951,8 +1069,10 @@ void GC_do_parallel_mark() GC_acquire_mark_lock(); GC_ASSERT(I_HOLD_LOCK()); - GC_ASSERT(!GC_help_wanted); - GC_ASSERT(GC_active_count == 0); + /* This could be a GC_ASSERT, but it seems safer to keep it on */ + /* all the time, especially since it's cheap. */ + if (GC_help_wanted || GC_active_count != 0 || GC_helper_count != 0) + ABORT("Tried to start parallel mark in bad state"); # ifdef PRINTSTATS GC_printf1("Starting marking for mark phase number %lu\n", (unsigned long)GC_mark_no); @@ -1341,8 +1461,8 @@ ptr_t top; # define GC_least_plausible_heap_addr least_ha if (top == 0) return; - /* check all pointers in range and put in push if they appear */ - /* to be valid. */ + /* check all pointers in range and push if they appear */ + /* to be valid. */ lim = t - 1 /* longword */; for (p = b; p <= lim; p = (word *)(((char *)p) + ALIGNMENT)) { q = *p; @@ -1365,7 +1485,7 @@ ptr_t bottom; ptr_t top; ptr_t cold_gc_frame; { - if (GC_all_interior_pointers) { + if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) { # define EAGER_BYTES 1024 /* Push the hot end of the stack eagerly, so that register values */ /* saved inside GC frames are marked before they disappear. */ @@ -1374,12 +1494,13 @@ ptr_t cold_gc_frame; GC_push_all_stack(bottom, top); return; } + GC_ASSERT(bottom <= cold_gc_frame && cold_gc_frame <= top); # ifdef STACK_GROWS_DOWN - GC_push_all_eager(bottom, cold_gc_frame); GC_push_all(cold_gc_frame - sizeof(ptr_t), top); + GC_push_all_eager(bottom, cold_gc_frame); # else /* STACK_GROWS_UP */ - GC_push_all_eager(cold_gc_frame, top); GC_push_all(bottom, cold_gc_frame + sizeof(ptr_t)); + GC_push_all_eager(cold_gc_frame, top); # endif /* STACK_GROWS_UP */ } else { GC_push_all_eager(bottom, top); @@ -1394,7 +1515,7 @@ void GC_push_all_stack(bottom, top) ptr_t bottom; ptr_t top; { - if (GC_all_interior_pointers) { + if (!NEED_FIXUP_POINTER && GC_all_interior_pointers) { GC_push_all(bottom, top); } else { GC_push_all_eager(bottom, top);