From 8221febda3990640b2d0493eb05c7b2ca91c19d7 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 27 Sep 2022 14:47:01 -0700 Subject: [PATCH] Use b-trees based on bumpalo arenas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alternative to the other PR. Numbers don't look great? ``` $ cargo run --release -- benchmark -e ~/scratch/bumpalo-arena.so -e ~/scratch/main.so -m perf-counters --stop-after compilation --processes 10 --iterations-per-process 20 --engine-flags="--disable-parallel-compilation --disable-cache" -- benchmarks/spidermonkey/benchmark.wasm benchmarks/bz2/benchmark.wasm benchmarks/pulldown-cmark/benchmark.wasm compilation :: cache-misses :: benchmarks/bz2/benchmark.wasm Δ = 2154.84 ± 788.55 (confidence = 99%) main.so is 1.02x to 1.05x faster than bumpalo-arena.so! [62141 66843.37 75776] bumpalo-arena.so [59523 64688.53 81781] main.so compilation :: cache-accesses :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 8595.08 ± 5795.64 (confidence = 99%) bumpalo-arena.so is 1.01x to 1.03x faster than main.so! [401717 437779.00 545867] bumpalo-arena.so [410908 446374.08 536809] main.so compilation :: cache-accesses :: benchmarks/bz2/benchmark.wasm Δ = 2342.34 ± 2055.41 (confidence = 99%) bumpalo-arena.so is 1.00x to 1.03x faster than main.so! [133753 143249.76 180756] bumpalo-arena.so [135428 145592.10 203005] main.so compilation :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 35306.54 ± 32172.66 (confidence = 99%) bumpalo-arena.so is 1.00x to 1.01x faster than main.so! [8801940 8900728.23 9461825] bumpalo-arena.so [8831762 8936034.78 9485351] main.so compilation :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm Δ = 382102.42 ± 81779.69 (confidence = 99%) bumpalo-arena.so is 1.00x to 1.00x faster than main.so! [208106667 209068806.32 210101434] bumpalo-arena.so [208492210 209450908.75 210448369] main.so compilation :: cpu-cycles :: benchmarks/bz2/benchmark.wasm No difference in performance. [2441143 2659275.73 4199423] bumpalo-arena.so [2392678 2608641.54 5347877] main.so compilation :: cpu-cycles :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [7014995 8113396.99 13667438] bumpalo-arena.so [7371875 7996085.34 13293046] main.so compilation :: cache-misses :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [131096 167836.79 217530] bumpalo-arena.so [128340 169539.35 282605] main.so compilation :: cache-accesses :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [8780974 9800932.96 10522183] bumpalo-arena.so [8758505 9771531.34 10590536] main.so compilation :: cpu-cycles :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [185218977 194521807.51 238636300] bumpalo-arena.so [181686455 194998321.78 258060749] main.so compilation :: cache-misses :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [4330008 4990485.00 5498381] bumpalo-arena.so [4192710 4978995.85 5666416] main.so compilation :: instructions-retired :: benchmarks/bz2/benchmark.wasm No difference in performance. [2361496 2431407.85 3123724] bumpalo-arena.so [2362101 2429567.66 3057998] main.so ``` --- Cargo.toml | 1 + src/ion/data_structures.rs | 34 +++++++++++++++++++--------------- src/ion/dump.rs | 2 +- src/ion/liveranges.rs | 4 ++-- src/ion/merge.rs | 2 +- src/ion/mod.rs | 9 +++++++-- src/ion/moves.rs | 6 +++--- src/ion/process.rs | 2 +- src/ion/requirement.rs | 2 +- src/ion/spill.rs | 4 ++-- src/ion/stackmap.rs | 2 +- 11 files changed, 39 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2b588555..de02c61e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ description = "Backtracking register allocator inspired from IonMonkey" repository = "https://github.com/bytecodealliance/regalloc2" [dependencies] +arena-btree = { git = "https://github.com/bytecodealliance/arena-btree.git", branch = "bumpalo-based-arenas" } log = { version = "0.4.8", default-features = false } smallvec = "1.6.1" fxhash = "0.2.1" diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs index de12a93a..d3391f15 100644 --- a/src/ion/data_structures.rs +++ b/src/ion/data_structures.rs @@ -21,11 +21,14 @@ use crate::{ RegClass, VReg, }; use fxhash::FxHashSet; +use arena_btree::BTreeMap; use smallvec::SmallVec; use std::cmp::Ordering; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{HashMap, HashSet}; use std::fmt::Debug; +pub use arena_btree::Arena; + /// A range from `from` (inclusive) to `to` (exclusive). #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct CodeRange { @@ -288,8 +291,8 @@ pub struct VRegData { } #[derive(Clone, Debug)] -pub struct PRegData { - pub allocations: LiveRangeSet, +pub struct PRegData<'arena> { + pub allocations: LiveRangeSet<'arena>, pub is_stack: bool, } @@ -362,8 +365,9 @@ impl BlockparamIn { } } -#[derive(Clone, Debug)] -pub struct Env<'a, F: Function> { +pub struct Env<'a, 'arena, F: Function> { + pub arena: &'arena Arena, + pub func: &'a F, pub env: &'a MachineEnv, pub cfginfo: CFGInfo, @@ -376,13 +380,13 @@ pub struct Env<'a, F: Function> { pub bundles: Vec, pub spillsets: Vec, pub vregs: Vec, - pub pregs: Vec, + pub pregs: Vec>, pub allocation_queue: PrioQueue, pub safepoints: Vec, // Sorted list of safepoint insts. pub safepoints_per_vreg: HashMap>, pub spilled_bundles: Vec, - pub spillslots: Vec, + pub spillslots: Vec>, pub slots_by_size: Vec, pub extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 2], @@ -437,7 +441,7 @@ pub struct Env<'a, F: Function> { pub conflict_set: FxHashSet, } -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { /// Get the VReg (with bundled RegClass) from a vreg index. #[inline] pub fn vreg(&self, index: VRegIndex) -> VReg { @@ -463,8 +467,8 @@ impl<'a, F: Function> Env<'a, F> { } #[derive(Clone, Debug)] -pub struct SpillSlotData { - pub ranges: LiveRangeSet, +pub struct SpillSlotData<'arena> { + pub ranges: LiveRangeSet<'arena>, pub slots: u32, pub alloc: Allocation, } @@ -501,8 +505,8 @@ pub struct PrioQueueEntry { } #[derive(Clone, Debug)] -pub struct LiveRangeSet { - pub btree: BTreeMap, +pub struct LiveRangeSet<'arena> { + pub btree: BTreeMap<'arena, LiveRangeKey, LiveRangeIndex>, } #[derive(Clone, Copy, Debug)] @@ -592,10 +596,10 @@ impl PrioQueue { } } -impl LiveRangeSet { - pub(crate) fn new() -> Self { +impl<'arena> LiveRangeSet<'arena> { + pub(crate) fn new(arena: &'arena Arena) -> Self { Self { - btree: BTreeMap::new(), + btree: BTreeMap::new(arena), } } } diff --git a/src/ion/dump.rs b/src/ion/dump.rs index ba4f74f5..d2efc330 100644 --- a/src/ion/dump.rs +++ b/src/ion/dump.rs @@ -3,7 +3,7 @@ use super::Env; use crate::{Block, Function, ProgPoint}; -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { pub fn dump_state(&self) { trace!("Bundles:"); for (i, b) in self.bundles.iter().enumerate() { diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index bdee6f1a..60b1c42a 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -98,13 +98,13 @@ impl std::ops::Add for SpillWeight { } } -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { pub fn create_pregs_and_vregs(&mut self) { // Create PRegs from the env. self.pregs.resize( PReg::NUM_INDEX, PRegData { - allocations: LiveRangeSet::new(), + allocations: LiveRangeSet::new(self.arena), is_stack: false, }, ); diff --git a/src/ion/merge.rs b/src/ion/merge.rs index ec685d32..df7e356c 100644 --- a/src/ion/merge.rs +++ b/src/ion/merge.rs @@ -21,7 +21,7 @@ use crate::{ }; use smallvec::smallvec; -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { pub fn merge_bundles(&mut self, from: LiveBundleIndex, to: LiveBundleIndex) -> bool { if from == to { // Merge bundle into self -- trivial merge. diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 020c418a..7c839819 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -15,6 +15,7 @@ use crate::cfg::CFGInfo; use crate::{Function, MachineEnv, Output, PReg, ProgPoint, RegAllocError, RegClass}; +use arena_btree::Arena; use std::collections::HashMap; pub(crate) mod data_structures; @@ -37,15 +38,18 @@ pub(crate) mod moves; pub(crate) mod spill; pub(crate) mod stackmap; -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { pub(crate) fn new( func: &'a F, env: &'a MachineEnv, + arena: &'arena Arena, cfginfo: CFGInfo, annotations_enabled: bool, ) -> Self { let n = func.num_insts(); Self { + arena, + func, env, cfginfo, @@ -123,7 +127,8 @@ pub fn run( ) -> Result { let cfginfo = CFGInfo::new(func)?; - let mut env = Env::new(func, mach_env, cfginfo, enable_annotations); + let arena = Arena::new(); + let mut env = Env::new(func, mach_env, &arena, cfginfo, enable_annotations); env.init()?; env.run()?; diff --git a/src/ion/moves.rs b/src/ion/moves.rs index 9f6e3da8..6c7b32d3 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -29,7 +29,7 @@ use fxhash::FxHashMap; use smallvec::{smallvec, SmallVec}; use std::fmt::Debug; -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { pub fn is_start_of_block(&self, pos: ProgPoint) -> bool { let block = self.cfginfo.insn_block[pos.inst().index()]; pos == self.cfginfo.block_entry[block.index()] @@ -885,8 +885,8 @@ impl<'a, F: Function> Env<'a, F> { // Redundant-move elimination state tracker. let mut redundant_moves = RedundantMoveEliminator::default(); - fn redundant_move_process_side_effects<'a, F: Function>( - this: &Env<'a, F>, + fn redundant_move_process_side_effects<'a, 'arena, F: Function>( + this: &Env<'a, 'arena, F>, redundant_moves: &mut RedundantMoveEliminator, from: ProgPoint, to: ProgPoint, diff --git a/src/ion/process.rs b/src/ion/process.rs index 4a99567f..e8bf2737 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -37,7 +37,7 @@ pub enum AllocRegResult { ConflictHighCost, } -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { pub fn process_bundles(&mut self) -> Result<(), RegAllocError> { while let Some((bundle, reg_hint)) = self.allocation_queue.pop() { self.stats.process_bundle_count += 1; diff --git a/src/ion/requirement.rs b/src/ion/requirement.rs index 4fa72600..40d42889 100644 --- a/src/ion/requirement.rs +++ b/src/ion/requirement.rs @@ -104,7 +104,7 @@ impl Requirement { } } -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { #[inline(always)] pub fn requirement_from_operand(&self, op: Operand) -> Requirement { match op.constraint() { diff --git a/src/ion/spill.rs b/src/ion/spill.rs index 5bc6e9e5..7925046a 100644 --- a/src/ion/spill.rs +++ b/src/ion/spill.rs @@ -19,7 +19,7 @@ use super::{ use crate::{Allocation, Function, SpillSlot}; use smallvec::smallvec; -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { pub fn try_allocating_regs_for_spilled_bundles(&mut self) { trace!("allocating regs for spilled bundles"); for i in 0..self.spilled_bundles.len() { @@ -163,7 +163,7 @@ impl<'a, F: Function> Env<'a, F> { // Allocate a new spillslot. let spillslot = SpillSlotIndex::new(self.spillslots.len()); self.spillslots.push(SpillSlotData { - ranges: LiveRangeSet::new(), + ranges: LiveRangeSet::new(self.arena), alloc: Allocation::none(), slots: size as u32, }); diff --git a/src/ion/stackmap.rs b/src/ion/stackmap.rs index 108835ae..18240083 100644 --- a/src/ion/stackmap.rs +++ b/src/ion/stackmap.rs @@ -15,7 +15,7 @@ use super::{Env, ProgPoint, VRegIndex}; use crate::{ion::data_structures::u64_key, Function}; -impl<'a, F: Function> Env<'a, F> { +impl<'a, 'arena, F: Function> Env<'a, 'arena, F> { pub fn compute_stackmaps(&mut self) { // For each ref-typed vreg, iterate through ranges and find // safepoints in-range. Add the SpillSlot to the stackmap.