Skip to content

Commit 7eadf5b

Browse files
Haoyu Likstefanj
authored andcommitted
8220465: Use shadow regions for faster ParallelGC full GCs
Reviewed-by: sjohanss, tschatzl
1 parent 026eac2 commit 7eadf5b

4 files changed

Lines changed: 326 additions & 40 deletions

File tree

src/hotspot/share/gc/parallel/psCompactionManager.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ ParCompactionManager::ObjArrayTaskQueueSet*
4949
ObjectStartArray* ParCompactionManager::_start_array = NULL;
5050
ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL;
5151
RegionTaskQueueSet* ParCompactionManager::_region_array = NULL;
52+
GrowableArray<size_t >* ParCompactionManager::_shadow_region_array = NULL;
53+
Monitor* ParCompactionManager::_shadow_region_monitor = NULL;
5254

5355
ParCompactionManager::ParCompactionManager() :
5456
_action(CopyAndUpdate) {
@@ -99,6 +101,11 @@ void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
99101
"Could not create ParCompactionManager");
100102
assert(ParallelScavengeHeap::heap()->workers().total_workers() != 0,
101103
"Not initialized?");
104+
105+
_shadow_region_array = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<size_t >(10, true);
106+
107+
_shadow_region_monitor = new Monitor(Mutex::barrier, "CompactionManager monitor",
108+
Mutex::_allow_vm_block_flag, Monitor::_safepoint_check_never);
102109
}
103110

104111
void ParCompactionManager::reset_all_bitmap_query_caches() {
@@ -163,3 +170,33 @@ void ParCompactionManager::drain_region_stacks() {
163170
}
164171
} while (!region_stack()->is_empty());
165172
}
173+
174+
size_t ParCompactionManager::pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr) {
175+
MonitorLocker ml(_shadow_region_monitor, Mutex::_no_safepoint_check_flag);
176+
while (true) {
177+
if (!_shadow_region_array->is_empty()) {
178+
return _shadow_region_array->pop();
179+
}
180+
// Check if the corresponding heap region is available now.
181+
// If so, we don't need to get a shadow region anymore, and
182+
// we return InvalidShadow to indicate such a case.
183+
if (region_ptr->claimed()) {
184+
return InvalidShadow;
185+
}
186+
ml.wait(1);
187+
}
188+
}
189+
190+
void ParCompactionManager::push_shadow_region_mt_safe(size_t shadow_region) {
191+
MonitorLocker ml(_shadow_region_monitor, Mutex::_no_safepoint_check_flag);
192+
_shadow_region_array->push(shadow_region);
193+
ml.notify();
194+
}
195+
196+
void ParCompactionManager::push_shadow_region(size_t shadow_region) {
197+
_shadow_region_array->push(shadow_region);
198+
}
199+
200+
void ParCompactionManager::remove_all_shadow_regions() {
201+
_shadow_region_array->clear();
202+
}

src/hotspot/share/gc/parallel/psCompactionManager.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#ifndef SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_HPP
2626
#define SHARE_GC_PARALLEL_PSCOMPACTIONMANAGER_HPP
2727

28+
#include "gc/parallel/psParallelCompact.hpp"
2829
#include "gc/shared/taskqueue.hpp"
2930
#include "memory/allocation.hpp"
3031
#include "utilities/stack.hpp"
@@ -77,6 +78,7 @@ class ParCompactionManager : public CHeapObj<mtGC> {
7778
private:
7879
OverflowTaskQueue<oop, mtGC> _marking_stack;
7980
ObjArrayTaskQueue _objarray_stack;
81+
size_t _next_shadow_region;
8082

8183
// Is there a way to reuse the _marking_stack for the
8284
// saving empty regions? For now just create a different
@@ -85,6 +87,14 @@ class ParCompactionManager : public CHeapObj<mtGC> {
8587

8688
static ParMarkBitMap* _mark_bitmap;
8789

90+
// Contains currently free shadow regions. We use it in
91+
// a LIFO fashion for better data locality and utilization.
92+
static GrowableArray<size_t>* _shadow_region_array;
93+
94+
// Provides mutual exclusive access of _shadow_region_array.
95+
// See pop/push_shadow_region_mt_safe() below
96+
static Monitor* _shadow_region_monitor;
97+
8898
Action _action;
8999

90100
HeapWord* _last_query_beg;
@@ -109,6 +119,19 @@ class ParCompactionManager : public CHeapObj<mtGC> {
109119
// marking stack and overflow stack directly.
110120

111121
public:
122+
static const size_t InvalidShadow = ~0;
123+
static size_t pop_shadow_region_mt_safe(PSParallelCompact::RegionData* region_ptr);
124+
static void push_shadow_region_mt_safe(size_t shadow_region);
125+
static void push_shadow_region(size_t shadow_region);
126+
static void remove_all_shadow_regions();
127+
128+
inline size_t next_shadow_region() { return _next_shadow_region; }
129+
inline void set_next_shadow_region(size_t record) { _next_shadow_region = record; }
130+
inline size_t move_next_shadow_region_by(size_t workers) {
131+
_next_shadow_region += workers;
132+
return next_shadow_region();
133+
}
134+
112135
void reset_bitmap_query_cache() {
113136
_last_query_beg = NULL;
114137
_last_query_obj = NULL;

src/hotspot/share/gc/parallel/psParallelCompact.cpp

Lines changed: 147 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,7 @@ void PSParallelCompact::pre_compact()
10231023
void PSParallelCompact::post_compact()
10241024
{
10251025
GCTraceTime(Info, gc, phases) tm("Post Compact", &_gc_timer);
1026+
ParCompactionManager::remove_all_shadow_regions();
10261027

10271028
for (unsigned int id = old_space_id; id < last_space_id; ++id) {
10281029
// Clear the marking bitmap, summary data and split info.
@@ -2417,6 +2418,8 @@ void PSParallelCompact::prepare_region_draining_tasks(uint parallel_gc_threads)
24172418
for (size_t cur = end_region - 1; cur + 1 > beg_region; --cur) {
24182419
if (sd.region(cur)->claim_unsafe()) {
24192420
ParCompactionManager* cm = ParCompactionManager::manager_array(worker_id);
2421+
bool result = sd.region(cur)->mark_normal();
2422+
assert(result, "Must succeed at this point.");
24202423
cm->region_stack()->push(cur);
24212424
region_logger.handle(cur);
24222425
// Assign regions to tasks in round-robin fashion.
@@ -2602,6 +2605,10 @@ static void compaction_with_stealing_work(ParallelTaskTerminator* terminator, ui
26022605
if (ParCompactionManager::steal(worker_id, region_index)) {
26032606
PSParallelCompact::fill_and_update_region(cm, region_index);
26042607
cm->drain_region_stacks();
2608+
} else if (PSParallelCompact::steal_unavailable_region(cm, region_index)) {
2609+
// Fill and update an unavailable region with the help of a shadow region
2610+
PSParallelCompact::fill_and_update_shadow_region(cm, region_index);
2611+
cm->drain_region_stacks();
26052612
} else {
26062613
if (terminator->offer_termination()) {
26072614
break;
@@ -2656,6 +2663,7 @@ void PSParallelCompact::compact() {
26562663
//
26572664
// max push count is thus: last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1)
26582665
TaskQueue task_queue(last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1));
2666+
initialize_shadow_regions(active_gc_threads);
26592667
prepare_region_draining_tasks(active_gc_threads);
26602668
enqueue_dense_prefix_tasks(task_queue, active_gc_threads);
26612669

@@ -2962,7 +2970,17 @@ void PSParallelCompact::decrement_destination_counts(ParCompactionManager* cm,
29622970
assert(cur->data_size() > 0, "region must have live data");
29632971
cur->decrement_destination_count();
29642972
if (cur < enqueue_end && cur->available() && cur->claim()) {
2965-
cm->push_region(sd.region(cur));
2973+
if (cur->mark_normal()) {
2974+
cm->push_region(sd.region(cur));
2975+
} else if (cur->mark_copied()) {
2976+
// Try to copy the content of the shadow region back to its corresponding
2977+
// heap region if the shadow region is filled. Otherwise, the GC thread
2978+
// fills the shadow region will copy the data back (see
2979+
// MoveAndUpdateShadowClosure::complete_region).
2980+
copy_back(sd.region_to_addr(cur->shadow_region()), sd.region_to_addr(cur));
2981+
ParCompactionManager::push_shadow_region_mt_safe(cur->shadow_region());
2982+
cur->set_completed();
2983+
}
29662984
}
29672985
}
29682986
}
@@ -3040,28 +3058,19 @@ size_t PSParallelCompact::next_src_region(MoveAndUpdateClosure& closure,
30403058
return 0;
30413059
}
30423060

3043-
void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
3061+
void PSParallelCompact::fill_region(ParCompactionManager* cm, MoveAndUpdateClosure& closure, size_t region_idx)
30443062
{
30453063
typedef ParMarkBitMap::IterationStatus IterationStatus;
3046-
const size_t RegionSize = ParallelCompactData::RegionSize;
30473064
ParMarkBitMap* const bitmap = mark_bitmap();
30483065
ParallelCompactData& sd = summary_data();
30493066
RegionData* const region_ptr = sd.region(region_idx);
30503067

3051-
// Get the items needed to construct the closure.
3052-
HeapWord* dest_addr = sd.region_to_addr(region_idx);
3053-
SpaceId dest_space_id = space_id(dest_addr);
3054-
ObjectStartArray* start_array = _space_info[dest_space_id].start_array();
3055-
HeapWord* new_top = _space_info[dest_space_id].new_top();
3056-
assert(dest_addr < new_top, "sanity");
3057-
const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize);
3058-
30593068
// Get the source region and related info.
30603069
size_t src_region_idx = region_ptr->source_region();
30613070
SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx));
30623071
HeapWord* src_space_top = _space_info[src_space_id].space()->top();
3072+
HeapWord* dest_addr = sd.region_to_addr(region_idx);
30633073

3064-
MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words);
30653074
closure.set_source(first_src_addr(dest_addr, src_space_id, src_region_idx));
30663075

30673076
// Adjust src_region_idx to prepare for decrementing destination counts (the
@@ -3080,7 +3089,7 @@ void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
30803089
decrement_destination_counts(cm, src_space_id, src_region_idx,
30813090
closure.source());
30823091
region_ptr->set_deferred_obj_addr(NULL);
3083-
region_ptr->set_completed();
3092+
closure.complete_region(cm, dest_addr, region_ptr);
30843093
return;
30853094
}
30863095

@@ -3129,15 +3138,15 @@ void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
31293138

31303139
decrement_destination_counts(cm, src_space_id, src_region_idx,
31313140
closure.source());
3132-
region_ptr->set_completed();
3141+
closure.complete_region(cm, dest_addr, region_ptr);
31333142
return;
31343143
}
31353144

31363145
if (status == ParMarkBitMap::full) {
31373146
decrement_destination_counts(cm, src_space_id, src_region_idx,
31383147
closure.source());
31393148
region_ptr->set_deferred_obj_addr(NULL);
3140-
region_ptr->set_completed();
3149+
closure.complete_region(cm, dest_addr, region_ptr);
31413150
return;
31423151
}
31433152

@@ -3150,6 +3159,96 @@ void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
31503159
} while (true);
31513160
}
31523161

3162+
void PSParallelCompact::fill_and_update_region(ParCompactionManager* cm, size_t region_idx)
3163+
{
3164+
MoveAndUpdateClosure cl(mark_bitmap(), cm, region_idx);
3165+
fill_region(cm, cl, region_idx);
3166+
}
3167+
3168+
void PSParallelCompact::fill_and_update_shadow_region(ParCompactionManager* cm, size_t region_idx)
3169+
{
3170+
// Get a shadow region first
3171+
ParallelCompactData& sd = summary_data();
3172+
RegionData* const region_ptr = sd.region(region_idx);
3173+
size_t shadow_region = ParCompactionManager::pop_shadow_region_mt_safe(region_ptr);
3174+
// The InvalidShadow return value indicates the corresponding heap region is available,
3175+
// so use MoveAndUpdateClosure to fill the normal region. Otherwise, use
3176+
// MoveAndUpdateShadowClosure to fill the acquired shadow region.
3177+
if (shadow_region == ParCompactionManager::InvalidShadow) {
3178+
MoveAndUpdateClosure cl(mark_bitmap(), cm, region_idx);
3179+
region_ptr->shadow_to_normal();
3180+
return fill_region(cm, cl, region_idx);
3181+
} else {
3182+
MoveAndUpdateShadowClosure cl(mark_bitmap(), cm, region_idx, shadow_region);
3183+
return fill_region(cm, cl, region_idx);
3184+
}
3185+
}
3186+
3187+
void PSParallelCompact::copy_back(HeapWord *shadow_addr, HeapWord *region_addr)
3188+
{
3189+
Copy::aligned_conjoint_words(shadow_addr, region_addr, _summary_data.RegionSize);
3190+
}
3191+
3192+
bool PSParallelCompact::steal_unavailable_region(ParCompactionManager* cm, size_t &region_idx)
3193+
{
3194+
size_t next = cm->next_shadow_region();
3195+
ParallelCompactData& sd = summary_data();
3196+
size_t old_new_top = sd.addr_to_region_idx(_space_info[old_space_id].new_top());
3197+
uint active_gc_threads = ParallelScavengeHeap::heap()->workers().active_workers();
3198+
3199+
while (next < old_new_top) {
3200+
if (sd.region(next)->mark_shadow()) {
3201+
region_idx = next;
3202+
return true;
3203+
}
3204+
next = cm->move_next_shadow_region_by(active_gc_threads);
3205+
}
3206+
3207+
return false;
3208+
}
3209+
3210+
// The shadow region is an optimization to address region dependencies in full GC. The basic
3211+
// idea is making more regions available by temporally storing their live objects in empty
3212+
// shadow regions to resolve dependencies between them and the destination regions. Therefore,
3213+
// GC threads need not wait destination regions to be available before processing sources.
3214+
//
3215+
// A typical workflow would be:
3216+
// After draining its own stack and failing to steal from others, a GC worker would pick an
3217+
// unavailable region (destination count > 0) and get a shadow region. Then the worker fills
3218+
// the shadow region by copying live objects from source regions of the unavailable one. Once
3219+
// the unavailable region becomes available, the data in the shadow region will be copied back.
3220+
// Shadow regions are empty regions in the to-space and regions between top and end of other spaces.
3221+
//
3222+
// For more details, please refer to §4.2 of the VEE'19 paper:
3223+
// Haoyu Li, Mingyu Wu, Binyu Zang, and Haibo Chen. 2019. ScissorGC: scalable and efficient
3224+
// compaction for Java full garbage collection. In Proceedings of the 15th ACM SIGPLAN/SIGOPS
3225+
// International Conference on Virtual Execution Environments (VEE 2019). ACM, New York, NY, USA,
3226+
// 108-121. DOI: https://doi.org/10.1145/3313808.3313820
3227+
void PSParallelCompact::initialize_shadow_regions(uint parallel_gc_threads)
3228+
{
3229+
const ParallelCompactData& sd = PSParallelCompact::summary_data();
3230+
3231+
for (unsigned int id = old_space_id; id < last_space_id; ++id) {
3232+
SpaceInfo* const space_info = _space_info + id;
3233+
MutableSpace* const space = space_info->space();
3234+
3235+
const size_t beg_region =
3236+
sd.addr_to_region_idx(sd.region_align_up(MAX2(space_info->new_top(), space->top())));
3237+
const size_t end_region =
3238+
sd.addr_to_region_idx(sd.region_align_down(space->end()));
3239+
3240+
for (size_t cur = beg_region; cur < end_region; ++cur) {
3241+
ParCompactionManager::push_shadow_region(cur);
3242+
}
3243+
}
3244+
3245+
size_t beg_region = sd.addr_to_region_idx(_space_info[old_space_id].dense_prefix());
3246+
for (uint i = 0; i < parallel_gc_threads; i++) {
3247+
ParCompactionManager *cm = ParCompactionManager::manager_array(i);
3248+
cm->set_next_shadow_region(beg_region + i);
3249+
}
3250+
}
3251+
31533252
void PSParallelCompact::fill_blocks(size_t region_idx)
31543253
{
31553254
// Fill in the block table elements for the specified region. Each block
@@ -3222,9 +3321,9 @@ void PSParallelCompact::reset_millis_since_last_gc() {
32223321

32233322
ParMarkBitMap::IterationStatus MoveAndUpdateClosure::copy_until_full()
32243323
{
3225-
if (source() != destination()) {
3324+
if (source() != copy_destination()) {
32263325
DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());)
3227-
Copy::aligned_conjoint_words(source(), destination(), words_remaining());
3326+
Copy::aligned_conjoint_words(source(), copy_destination(), words_remaining());
32283327
}
32293328
update_state(words_remaining());
32303329
assert(is_full(), "sanity");
@@ -3243,13 +3342,19 @@ void MoveAndUpdateClosure::copy_partial_obj()
32433342

32443343
// This test is necessary; if omitted, the pointer updates to a partial object
32453344
// that crosses the dense prefix boundary could be overwritten.
3246-
if (source() != destination()) {
3345+
if (source() != copy_destination()) {
32473346
DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());)
3248-
Copy::aligned_conjoint_words(source(), destination(), words);
3347+
Copy::aligned_conjoint_words(source(), copy_destination(), words);
32493348
}
32503349
update_state(words);
32513350
}
32523351

3352+
void MoveAndUpdateClosure::complete_region(ParCompactionManager *cm, HeapWord *dest_addr,
3353+
PSParallelCompact::RegionData *region_ptr) {
3354+
assert(region_ptr->shadow_state() == ParallelCompactData::RegionData::NormalRegion, "Region should be finished");
3355+
region_ptr->set_completed();
3356+
}
3357+
32533358
ParMarkBitMapClosure::IterationStatus
32543359
MoveAndUpdateClosure::do_addr(HeapWord* addr, size_t words) {
32553360
assert(destination() != NULL, "sanity");
@@ -3268,20 +3373,39 @@ MoveAndUpdateClosure::do_addr(HeapWord* addr, size_t words) {
32683373
_start_array->allocate_block(destination());
32693374
}
32703375

3271-
if (destination() != source()) {
3376+
if (copy_destination() != source()) {
32723377
DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());)
3273-
Copy::aligned_conjoint_words(source(), destination(), words);
3378+
Copy::aligned_conjoint_words(source(), copy_destination(), words);
32743379
}
32753380

3276-
oop moved_oop = (oop) destination();
3381+
oop moved_oop = (oop) copy_destination();
32773382
compaction_manager()->update_contents(moved_oop);
32783383
assert(oopDesc::is_oop_or_null(moved_oop), "Expected an oop or NULL at " PTR_FORMAT, p2i(moved_oop));
32793384

32803385
update_state(words);
3281-
assert(destination() == (HeapWord*)moved_oop + moved_oop->size(), "sanity");
3386+
assert(copy_destination() == (HeapWord*)moved_oop + moved_oop->size(), "sanity");
32823387
return is_full() ? ParMarkBitMap::full : ParMarkBitMap::incomplete;
32833388
}
32843389

3390+
void MoveAndUpdateShadowClosure::complete_region(ParCompactionManager *cm, HeapWord *dest_addr,
3391+
PSParallelCompact::RegionData *region_ptr) {
3392+
assert(region_ptr->shadow_state() == ParallelCompactData::RegionData::ShadowRegion, "Region should be shadow");
3393+
// Record the shadow region index
3394+
region_ptr->set_shadow_region(_shadow);
3395+
// Mark the shadow region as filled to indicate the data is ready to be
3396+
// copied back
3397+
region_ptr->mark_filled();
3398+
// Try to copy the content of the shadow region back to its corresponding
3399+
// heap region if available; the GC thread that decreases the destination
3400+
// count to zero will do the copying otherwise (see
3401+
// PSParallelCompact::decrement_destination_counts).
3402+
if (((region_ptr->available() && region_ptr->claim()) || region_ptr->claimed()) && region_ptr->mark_copied()) {
3403+
region_ptr->set_completed();
3404+
PSParallelCompact::copy_back(PSParallelCompact::summary_data().region_to_addr(_shadow), dest_addr);
3405+
ParCompactionManager::push_shadow_region_mt_safe(_shadow);
3406+
}
3407+
}
3408+
32853409
UpdateOnlyClosure::UpdateOnlyClosure(ParMarkBitMap* mbm,
32863410
ParCompactionManager* cm,
32873411
PSParallelCompact::SpaceId space_id) :

0 commit comments

Comments
 (0)