Skip to content

Commit 5ce08ed

Browse files
array slots (#6870)
This PR moves towards having a fixed position for each child to reduce the complexity of finding the position of children. This is motivated by the usage of iterative execution using child index to execute that child. --------- Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent fb4ac44 commit 5ce08ed

151 files changed

Lines changed: 3141 additions & 2580 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

benchmarks/compress-bench/src/lib.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@ pub mod vortex;
1515
pub fn chunked_to_vec_record_batch(
1616
chunked: ChunkedArray,
1717
) -> anyhow::Result<(Vec<RecordBatch>, Arc<Schema>)> {
18-
let chunks_vec = chunked.chunks();
19-
assert!(!chunks_vec.is_empty(), "empty chunks");
18+
assert!(chunked.nchunks() > 0, "empty chunks");
2019

21-
let batches = chunks_vec
22-
.iter()
20+
let batches = chunked
21+
.iter_chunks()
2322
.map(|array| {
2423
// TODO(connor)[ListView]: The rust Parquet implementation does not support writing
2524
// `ListView` to Parquet files yet.

encodings/alp/public-api.lock

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,6 @@ pub fn vortex_alp::ALP::buffer_name(_array: &vortex_alp::ALPArray, _idx: usize)
6868

6969
pub fn vortex_alp::ALP::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_alp::ALPArray>
7070

71-
pub fn vortex_alp::ALP::child(array: &vortex_alp::ALPArray, idx: usize) -> vortex_array::array::ArrayRef
72-
73-
pub fn vortex_alp::ALP::child_name(array: &vortex_alp::ALPArray, idx: usize) -> alloc::string::String
74-
7571
pub fn vortex_alp::ALP::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Metadata>
7672

7773
pub fn vortex_alp::ALP::dtype(array: &vortex_alp::ALPArray) -> &vortex_array::dtype::DType
@@ -88,17 +84,19 @@ pub fn vortex_alp::ALP::metadata(array: &vortex_alp::ALPArray) -> vortex_error::
8884

8985
pub fn vortex_alp::ALP::nbuffers(_array: &vortex_alp::ALPArray) -> usize
9086

91-
pub fn vortex_alp::ALP::nchildren(array: &vortex_alp::ALPArray) -> usize
92-
9387
pub fn vortex_alp::ALP::reduce_parent(array: &vortex_array::vtable::typed::Array<Self>, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::ArrayRef>>
9488

9589
pub fn vortex_alp::ALP::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
9690

91+
pub fn vortex_alp::ALP::slot_name(_array: &vortex_alp::ALPArray, idx: usize) -> alloc::string::String
92+
93+
pub fn vortex_alp::ALP::slots(array: &vortex_alp::ALPArray) -> &[core::option::Option<vortex_array::array::ArrayRef>]
94+
9795
pub fn vortex_alp::ALP::stats(array: &vortex_alp::ALPArray) -> vortex_array::stats::array::StatsSetRef<'_>
9896

9997
pub fn vortex_alp::ALP::vtable(_array: &Self::Array) -> &Self
10098

101-
pub fn vortex_alp::ALP::with_children(array: &mut Self::Array, children: alloc::vec::Vec<vortex_array::array::ArrayRef>) -> vortex_error::VortexResult<()>
99+
pub fn vortex_alp::ALP::with_slots(array: &mut vortex_alp::ALPArray, slots: alloc::vec::Vec<core::option::Option<vortex_array::array::ArrayRef>>) -> vortex_error::VortexResult<()>
102100

103101
impl vortex_array::vtable::operations::OperationsVTable<vortex_alp::ALP> for vortex_alp::ALP
104102

@@ -230,10 +228,6 @@ pub fn vortex_alp::ALPRD::buffer_name(_array: &vortex_alp::ALPRDArray, _idx: usi
230228

231229
pub fn vortex_alp::ALPRD::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_alp::ALPRDArray>
232230

233-
pub fn vortex_alp::ALPRD::child(array: &vortex_alp::ALPRDArray, idx: usize) -> vortex_array::array::ArrayRef
234-
235-
pub fn vortex_alp::ALPRD::child_name(array: &vortex_alp::ALPRDArray, idx: usize) -> alloc::string::String
236-
237231
pub fn vortex_alp::ALPRD::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Metadata>
238232

239233
pub fn vortex_alp::ALPRD::dtype(array: &vortex_alp::ALPRDArray) -> &vortex_array::dtype::DType
@@ -250,17 +244,19 @@ pub fn vortex_alp::ALPRD::metadata(array: &vortex_alp::ALPRDArray) -> vortex_err
250244

251245
pub fn vortex_alp::ALPRD::nbuffers(_array: &vortex_alp::ALPRDArray) -> usize
252246

253-
pub fn vortex_alp::ALPRD::nchildren(array: &vortex_alp::ALPRDArray) -> usize
254-
255247
pub fn vortex_alp::ALPRD::reduce_parent(array: &vortex_array::vtable::typed::Array<Self>, parent: &vortex_array::array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::ArrayRef>>
256248

257249
pub fn vortex_alp::ALPRD::serialize(metadata: Self::Metadata) -> vortex_error::VortexResult<core::option::Option<alloc::vec::Vec<u8>>>
258250

251+
pub fn vortex_alp::ALPRD::slot_name(_array: &vortex_alp::ALPRDArray, idx: usize) -> alloc::string::String
252+
253+
pub fn vortex_alp::ALPRD::slots(array: &vortex_alp::ALPRDArray) -> &[core::option::Option<vortex_array::array::ArrayRef>]
254+
259255
pub fn vortex_alp::ALPRD::stats(array: &vortex_alp::ALPRDArray) -> vortex_array::stats::array::StatsSetRef<'_>
260256

261257
pub fn vortex_alp::ALPRD::vtable(_array: &Self::Array) -> &Self
262258

263-
pub fn vortex_alp::ALPRD::with_children(array: &mut Self::Array, children: alloc::vec::Vec<vortex_array::array::ArrayRef>) -> vortex_error::VortexResult<()>
259+
pub fn vortex_alp::ALPRD::with_slots(array: &mut vortex_alp::ALPRDArray, slots: alloc::vec::Vec<core::option::Option<vortex_array::array::ArrayRef>>) -> vortex_error::VortexResult<()>
264260

265261
impl vortex_array::vtable::operations::OperationsVTable<vortex_alp::ALPRD> for vortex_alp::ALPRD
266262

encodings/alp/src/alp/array.rs

Lines changed: 77 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,10 @@ use vortex_array::vtable::ArrayId;
3030
use vortex_array::vtable::VTable;
3131
use vortex_array::vtable::ValidityChild;
3232
use vortex_array::vtable::ValidityVTableFromChild;
33-
use vortex_array::vtable::patches_child;
34-
use vortex_array::vtable::patches_child_name;
35-
use vortex_array::vtable::patches_nchildren;
3633
use vortex_error::VortexExpect;
3734
use vortex_error::VortexResult;
3835
use vortex_error::vortex_bail;
3936
use vortex_error::vortex_ensure;
40-
use vortex_error::vortex_err;
4137
use vortex_error::vortex_panic;
4238
use vortex_session::VortexSession;
4339

@@ -65,7 +61,7 @@ impl VTable for ALP {
6561
}
6662

6763
fn len(array: &ALPArray) -> usize {
68-
array.encoded.len()
64+
array.encoded().len()
6965
}
7066

7167
fn dtype(array: &ALPArray) -> &DType {
@@ -78,14 +74,14 @@ impl VTable for ALP {
7874

7975
fn array_hash<H: std::hash::Hasher>(array: &ALPArray, state: &mut H, precision: Precision) {
8076
array.dtype.hash(state);
81-
array.encoded.array_hash(state, precision);
77+
array.encoded().array_hash(state, precision);
8278
array.exponents.hash(state);
8379
array.patches.array_hash(state, precision);
8480
}
8581

8682
fn array_eq(array: &ALPArray, other: &ALPArray, precision: Precision) -> bool {
8783
array.dtype == other.dtype
88-
&& array.encoded.array_eq(&other.encoded, precision)
84+
&& array.encoded().array_eq(other.encoded(), precision)
8985
&& array.exponents == other.exponents
9086
&& array.patches.array_eq(&other.patches, precision)
9187
}
@@ -102,32 +98,41 @@ impl VTable for ALP {
10298
None
10399
}
104100

105-
fn nchildren(array: &ALPArray) -> usize {
106-
1 + array.patches().map_or(0, patches_nchildren)
101+
fn slots(array: &ALPArray) -> &[Option<ArrayRef>] {
102+
&array.slots
107103
}
108104

109-
fn child(array: &ALPArray, idx: usize) -> ArrayRef {
110-
match idx {
111-
0 => array.encoded().clone(),
112-
_ => {
113-
let patches = array
114-
.patches()
115-
.unwrap_or_else(|| vortex_panic!("ALPArray child index {idx} out of bounds"));
116-
patches_child(patches, idx - 1)
117-
}
118-
}
105+
fn slot_name(_array: &ALPArray, idx: usize) -> String {
106+
SLOT_NAMES[idx].to_string()
119107
}
120108

121-
fn child_name(array: &ALPArray, idx: usize) -> String {
122-
match idx {
123-
0 => "encoded".to_string(),
124-
_ => {
125-
if array.patches().is_none() {
126-
vortex_panic!("ALPArray child_name index {idx} out of bounds");
127-
}
128-
patches_child_name(idx - 1).to_string()
109+
fn with_slots(array: &mut ALPArray, slots: Vec<Option<ArrayRef>>) -> VortexResult<()> {
110+
vortex_ensure!(
111+
slots.len() == NUM_SLOTS,
112+
"ALPArray expects {} slots, got {}",
113+
NUM_SLOTS,
114+
slots.len()
115+
);
116+
117+
// Reconstruct patches from slots + existing metadata
118+
array.patches = match (&slots[PATCH_INDICES_SLOT], &slots[PATCH_VALUES_SLOT]) {
119+
(Some(indices), Some(values)) => {
120+
let old = array
121+
.patches
122+
.as_ref()
123+
.vortex_expect("ALPArray had patch slots but no patches metadata");
124+
Some(Patches::new(
125+
old.array_len(),
126+
old.offset(),
127+
indices.clone(),
128+
values.clone(),
129+
slots[PATCH_CHUNK_OFFSETS_SLOT].clone(),
130+
)?)
129131
}
130-
}
132+
_ => None,
133+
};
134+
array.slots = slots;
135+
Ok(())
131136
}
132137

133138
fn metadata(array: &ALPArray) -> VortexResult<Self::Metadata> {
@@ -196,51 +201,6 @@ impl VTable for ALP {
196201
)
197202
}
198203

199-
fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
200-
// Children: encoded, patches (if present): indices, values, chunk_offsets (optional)
201-
let patches_info = array
202-
.patches
203-
.as_ref()
204-
.map(|p| (p.array_len(), p.offset(), p.chunk_offsets().is_some()));
205-
206-
let expected_children = match &patches_info {
207-
Some((_, _, has_chunk_offsets)) => 1 + 2 + if *has_chunk_offsets { 1 } else { 0 },
208-
None => 1,
209-
};
210-
211-
vortex_ensure!(
212-
children.len() == expected_children,
213-
"ALPArray expects {} children, got {}",
214-
expected_children,
215-
children.len()
216-
);
217-
218-
let mut children_iter = children.into_iter();
219-
array.encoded = children_iter
220-
.next()
221-
.ok_or_else(|| vortex_err!("Expected encoded child"))?;
222-
223-
if let Some((array_len, offset, _has_chunk_offsets)) = patches_info {
224-
let indices = children_iter
225-
.next()
226-
.ok_or_else(|| vortex_err!("Expected patch indices child"))?;
227-
let values = children_iter
228-
.next()
229-
.ok_or_else(|| vortex_err!("Expected patch values child"))?;
230-
let chunk_offsets = children_iter.next();
231-
232-
array.patches = Some(Patches::new(
233-
array_len,
234-
offset,
235-
indices,
236-
values,
237-
chunk_offsets,
238-
)?);
239-
}
240-
241-
Ok(())
242-
}
243-
244204
fn execute(array: Arc<Array<Self>>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
245205
Ok(ExecutionResult::done(
246206
execute_decompress(Arc::unwrap_or_clone(array).into_inner(), ctx)?.into_array(),
@@ -265,9 +225,21 @@ impl VTable for ALP {
265225
}
266226
}
267227

228+
pub(super) const ENCODED_SLOT: usize = 0;
229+
pub(super) const PATCH_INDICES_SLOT: usize = 1;
230+
pub(super) const PATCH_VALUES_SLOT: usize = 2;
231+
pub(super) const PATCH_CHUNK_OFFSETS_SLOT: usize = 3;
232+
pub(super) const NUM_SLOTS: usize = 4;
233+
pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = [
234+
"encoded",
235+
"patch_indices",
236+
"patch_values",
237+
"patch_chunk_offsets",
238+
];
239+
268240
#[derive(Clone, Debug)]
269241
pub struct ALPArray {
270-
encoded: ArrayRef,
242+
slots: Vec<Option<ArrayRef>>,
271243
patches: Option<Patches>,
272244
dtype: DType,
273245
exponents: Exponents,
@@ -436,9 +408,11 @@ impl ALPArray {
436408
_ => unreachable!(),
437409
};
438410

411+
let slots = Self::make_slots(&encoded, &patches);
412+
439413
Ok(Self {
440414
dtype,
441-
encoded,
415+
slots,
442416
exponents,
443417
patches,
444418
stats_set: Default::default(),
@@ -455,21 +429,42 @@ impl ALPArray {
455429
patches: Option<Patches>,
456430
dtype: DType,
457431
) -> Self {
432+
let slots = Self::make_slots(&encoded, &patches);
433+
458434
Self {
459435
dtype,
460-
encoded,
436+
slots,
461437
exponents,
462438
patches,
463439
stats_set: Default::default(),
464440
}
465441
}
466442

443+
fn make_slots(encoded: &ArrayRef, patches: &Option<Patches>) -> Vec<Option<ArrayRef>> {
444+
let (patch_indices, patch_values, patch_chunk_offsets) = match patches {
445+
Some(p) => (
446+
Some(p.indices().clone()),
447+
Some(p.values().clone()),
448+
p.chunk_offsets().clone(),
449+
),
450+
None => (None, None, None),
451+
};
452+
vec![
453+
Some(encoded.clone()),
454+
patch_indices,
455+
patch_values,
456+
patch_chunk_offsets,
457+
]
458+
}
459+
467460
pub fn ptype(&self) -> PType {
468461
self.dtype.as_ptype()
469462
}
470463

471464
pub fn encoded(&self) -> &ArrayRef {
472-
&self.encoded
465+
self.slots[ENCODED_SLOT]
466+
.as_ref()
467+
.vortex_expect("ALPArray encoded slot")
473468
}
474469

475470
#[inline]
@@ -483,8 +478,11 @@ impl ALPArray {
483478

484479
/// Consumes the array and returns its parts.
485480
#[inline]
486-
pub fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
487-
(self.encoded, self.exponents, self.patches, self.dtype)
481+
pub fn into_parts(mut self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
482+
let encoded = self.slots[ENCODED_SLOT]
483+
.take()
484+
.vortex_expect("ALPArray encoded slot");
485+
(encoded, self.exponents, self.patches, self.dtype)
488486
}
489487
}
490488

0 commit comments

Comments
 (0)