Skip to content

Commit 6a2c565

Browse files
authored
Make sequence metadata in-memory + clean up (#6759)
## Summary Similar in nature to #6751. We want to be able to work with in-memory metadata more easily, and we also want to make sure all scalar deserialization happens is `deserialize` instead of `build`. ## API Changes Renames the constructors to be in line with the rest of the encodings. ## Testing Existing tests should be sufficient. --------- Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent 909deb5 commit 6a2c565

15 files changed

Lines changed: 144 additions & 118 deletions

File tree

encodings/sequence/public-api.lock

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ pub fn vortex_sequence::SequenceArray::last(&self) -> vortex_array::scalar::type
1212

1313
pub fn vortex_sequence::SequenceArray::multiplier(&self) -> vortex_array::scalar::typed_view::primitive::pvalue::PValue
1414

15-
pub fn vortex_sequence::SequenceArray::new(base: vortex_array::scalar::typed_view::primitive::pvalue::PValue, multiplier: vortex_array::scalar::typed_view::primitive::pvalue::PValue, ptype: vortex_array::dtype::ptype::PType, nullability: vortex_array::dtype::nullability::Nullability, length: usize) -> vortex_error::VortexResult<Self>
16-
1715
pub fn vortex_sequence::SequenceArray::ptype(&self) -> vortex_array::dtype::ptype::PType
1816

19-
pub fn vortex_sequence::SequenceArray::typed_new<T: vortex_array::dtype::ptype::NativePType + core::convert::Into<vortex_array::scalar::typed_view::primitive::pvalue::PValue>>(base: T, multiplier: T, nullability: vortex_array::dtype::nullability::Nullability, length: usize) -> vortex_error::VortexResult<Self>
17+
pub fn vortex_sequence::SequenceArray::try_new(base: vortex_array::scalar::typed_view::primitive::pvalue::PValue, multiplier: vortex_array::scalar::typed_view::primitive::pvalue::PValue, ptype: vortex_array::dtype::ptype::PType, nullability: vortex_array::dtype::nullability::Nullability, length: usize) -> vortex_error::VortexResult<Self>
18+
19+
pub fn vortex_sequence::SequenceArray::try_new_typed<T: vortex_array::dtype::ptype::NativePType + core::convert::Into<vortex_array::scalar::typed_view::primitive::pvalue::PValue>>(base: T, multiplier: T, nullability: vortex_array::dtype::nullability::Nullability, length: usize) -> vortex_error::VortexResult<Self>
2020

2121
impl core::clone::Clone for vortex_sequence::SequenceArray
2222

@@ -104,7 +104,7 @@ impl vortex_array::vtable::VTable for vortex_sequence::SequenceVTable
104104

105105
pub type vortex_sequence::SequenceVTable::Array = vortex_sequence::SequenceArray
106106

107-
pub type vortex_sequence::SequenceVTable::Metadata = vortex_array::metadata::ProstMetadata<vortex_sequence::array::SequenceMetadata>
107+
pub type vortex_sequence::SequenceVTable::Metadata = vortex_sequence::array::SequenceMetadata
108108

109109
pub type vortex_sequence::SequenceVTable::OperationsVTable = vortex_sequence::SequenceVTable
110110

@@ -124,7 +124,7 @@ pub fn vortex_sequence::SequenceVTable::child(_array: &vortex_sequence::Sequence
124124

125125
pub fn vortex_sequence::SequenceVTable::child_name(_array: &vortex_sequence::SequenceArray, idx: usize) -> alloc::string::String
126126

127-
pub fn vortex_sequence::SequenceVTable::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Metadata>
127+
pub fn vortex_sequence::SequenceVTable::deserialize(bytes: &[u8], dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Metadata>
128128

129129
pub fn vortex_sequence::SequenceVTable::dtype(array: &vortex_sequence::SequenceArray) -> &vortex_array::dtype::DType
130130

encodings/sequence/src/array.rs

Lines changed: 73 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,14 @@ use crate::rules::RULES;
5050

5151
vtable!(Sequence);
5252

53-
#[derive(Clone, prost::Message)]
53+
#[derive(Debug, Clone, Copy)]
5454
pub struct SequenceMetadata {
55+
base: PValue,
56+
multiplier: PValue,
57+
}
58+
59+
#[derive(Clone, prost::Message)]
60+
pub struct ProstSequenceMetadata {
5561
#[prost(message, tag = "1")]
5662
base: Option<vortex_proto::scalar::ScalarValue>,
5763
#[prost(message, tag = "2")]
@@ -78,13 +84,13 @@ pub struct SequenceArray {
7884
}
7985

8086
impl SequenceArray {
81-
pub fn typed_new<T: NativePType + Into<PValue>>(
87+
pub fn try_new_typed<T: NativePType + Into<PValue>>(
8288
base: T,
8389
multiplier: T,
8490
nullability: Nullability,
8591
length: usize,
8692
) -> VortexResult<Self> {
87-
Self::new(
93+
Self::try_new(
8894
base.into(),
8995
multiplier.into(),
9096
T::PTYPE,
@@ -94,7 +100,7 @@ impl SequenceArray {
94100
}
95101

96102
/// Constructs a sequence array using two integer values (with the same ptype).
97-
pub fn new(
103+
pub fn try_new(
98104
base: PValue,
99105
multiplier: PValue,
100106
ptype: PType,
@@ -111,16 +117,23 @@ impl SequenceArray {
111117
))
112118
})?;
113119

114-
Ok(Self::unchecked_new(
115-
base,
116-
multiplier,
117-
ptype,
118-
nullability,
119-
length,
120-
))
121-
}
122-
123-
pub(crate) fn unchecked_new(
120+
// SAFETY: we just validated that `ptype` is an integer and that the final
121+
// element is representable via `try_last`.
122+
Ok(unsafe { Self::new_unchecked(base, multiplier, ptype, nullability, length) })
123+
}
124+
125+
/// Constructs a [`SequenceArray`] without validating that the `ptype` is an integer
126+
/// type or that the final element is representable.
127+
///
128+
/// # Safety
129+
///
130+
/// The caller must ensure that:
131+
/// - `ptype` is an integer type (i.e., `ptype.is_int()` returns `true`).
132+
/// - `base + (length - 1) * multiplier` does not overflow the range of `ptype`.
133+
///
134+
/// Violating the first invariant will cause a panic. Violating the second will
135+
/// cause silent wraparound when materializing elements, producing incorrect values.
136+
pub(crate) unsafe fn new_unchecked(
124137
base: PValue,
125138
multiplier: PValue,
126139
ptype: PType,
@@ -226,7 +239,7 @@ impl SequenceArray {
226239
impl VTable for SequenceVTable {
227240
type Array = SequenceArray;
228241

229-
type Metadata = ProstMetadata<SequenceMetadata>;
242+
type Metadata = SequenceMetadata;
230243
type OperationsVTable = Self;
231244
type ValidityVTable = Self;
232245

@@ -289,40 +302,37 @@ impl VTable for SequenceVTable {
289302
}
290303

291304
fn metadata(array: &SequenceArray) -> VortexResult<Self::Metadata> {
292-
Ok(ProstMetadata(SequenceMetadata {
293-
base: Some((&array.base()).into()),
294-
multiplier: Some((&array.multiplier()).into()),
295-
}))
305+
Ok(SequenceMetadata {
306+
base: array.base(),
307+
multiplier: array.multiplier(),
308+
})
296309
}
297310

298311
fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
299-
Ok(Some(metadata.serialize()))
312+
let prost = ProstMetadata(ProstSequenceMetadata {
313+
base: Some((&metadata.base).into()),
314+
multiplier: Some((&metadata.multiplier).into()),
315+
});
316+
317+
Ok(Some(prost.serialize()))
300318
}
301319

302320
fn deserialize(
303321
bytes: &[u8],
304-
_dtype: &DType,
322+
dtype: &DType,
305323
_len: usize,
306324
_buffers: &[BufferHandle],
307325
_session: &VortexSession,
308326
) -> VortexResult<Self::Metadata> {
309-
Ok(ProstMetadata(
310-
<ProstMetadata<SequenceMetadata> as DeserializeMetadata>::deserialize(bytes)?,
311-
))
312-
}
327+
let prost = ProstMetadata(
328+
<ProstMetadata<ProstSequenceMetadata> as DeserializeMetadata>::deserialize(bytes)?,
329+
);
313330

314-
fn build(
315-
dtype: &DType,
316-
len: usize,
317-
metadata: &Self::Metadata,
318-
_buffers: &[BufferHandle],
319-
_children: &dyn ArrayChildren,
320-
) -> VortexResult<SequenceArray> {
321331
let ptype = dtype.as_ptype();
322332

323-
// We go via scalar to cast the scalar values into the correct PType
333+
// We go via Scalar to validate that the value is valid for the ptype.
324334
let base = Scalar::from_proto_value(
325-
metadata
335+
prost
326336
.0
327337
.base
328338
.as_ref()
@@ -331,10 +341,10 @@ impl VTable for SequenceVTable {
331341
)?
332342
.as_primitive()
333343
.pvalue()
334-
.vortex_expect("non-nullable primitive");
344+
.vortex_expect("sequence array base should be a non-nullable primitive");
335345

336346
let multiplier = Scalar::from_proto_value(
337-
metadata
347+
prost
338348
.0
339349
.multiplier
340350
.as_ref()
@@ -343,15 +353,25 @@ impl VTable for SequenceVTable {
343353
)?
344354
.as_primitive()
345355
.pvalue()
346-
.vortex_expect("non-nullable primitive");
356+
.vortex_expect("sequence array multiplier should be a non-nullable primitive");
347357

348-
Ok(SequenceArray::unchecked_new(
349-
base,
350-
multiplier,
351-
ptype,
358+
Ok(SequenceMetadata { base, multiplier })
359+
}
360+
361+
fn build(
362+
dtype: &DType,
363+
len: usize,
364+
metadata: &Self::Metadata,
365+
_buffers: &[BufferHandle],
366+
_children: &dyn ArrayChildren,
367+
) -> VortexResult<SequenceArray> {
368+
SequenceArray::try_new(
369+
metadata.base,
370+
metadata.multiplier,
371+
dtype.as_ptype(),
352372
dtype.nullability(),
353373
len,
354-
))
374+
)
355375
}
356376

357377
fn with_children(_array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
@@ -433,7 +453,7 @@ mod tests {
433453

434454
#[test]
435455
fn test_sequence_canonical() {
436-
let arr = SequenceArray::typed_new(2i64, 3, Nullability::NonNullable, 4).unwrap();
456+
let arr = SequenceArray::try_new_typed(2i64, 3, Nullability::NonNullable, 4).unwrap();
437457

438458
let canon = PrimitiveArray::from_iter((0..4).map(|i| 2i64 + i * 3));
439459

@@ -442,7 +462,7 @@ mod tests {
442462

443463
#[test]
444464
fn test_sequence_slice_canonical() {
445-
let arr = SequenceArray::typed_new(2i64, 3, Nullability::NonNullable, 4)
465+
let arr = SequenceArray::try_new_typed(2i64, 3, Nullability::NonNullable, 4)
446466
.unwrap()
447467
.slice(2..3)
448468
.unwrap();
@@ -454,7 +474,7 @@ mod tests {
454474

455475
#[test]
456476
fn test_sequence_scalar_at() {
457-
let scalar = SequenceArray::typed_new(2i64, 3, Nullability::NonNullable, 4)
477+
let scalar = SequenceArray::try_new_typed(2i64, 3, Nullability::NonNullable, 4)
458478
.unwrap()
459479
.scalar_at(2)
460480
.unwrap();
@@ -467,19 +487,19 @@ mod tests {
467487

468488
#[test]
469489
fn test_sequence_min_max() {
470-
assert!(SequenceArray::typed_new(-127i8, -1i8, Nullability::NonNullable, 2).is_ok());
471-
assert!(SequenceArray::typed_new(126i8, -1i8, Nullability::NonNullable, 2).is_ok());
490+
assert!(SequenceArray::try_new_typed(-127i8, -1i8, Nullability::NonNullable, 2).is_ok());
491+
assert!(SequenceArray::try_new_typed(126i8, -1i8, Nullability::NonNullable, 2).is_ok());
472492
}
473493

474494
#[test]
475495
fn test_sequence_too_big() {
476-
assert!(SequenceArray::typed_new(127i8, 1i8, Nullability::NonNullable, 2).is_err());
477-
assert!(SequenceArray::typed_new(-128i8, -1i8, Nullability::NonNullable, 2).is_err());
496+
assert!(SequenceArray::try_new_typed(127i8, 1i8, Nullability::NonNullable, 2).is_err());
497+
assert!(SequenceArray::try_new_typed(-128i8, -1i8, Nullability::NonNullable, 2).is_err());
478498
}
479499

480500
#[test]
481501
fn positive_multiplier_is_strict_sorted() -> VortexResult<()> {
482-
let arr = SequenceArray::typed_new(0i64, 3, Nullability::NonNullable, 4)?;
502+
let arr = SequenceArray::try_new_typed(0i64, 3, Nullability::NonNullable, 4)?;
483503

484504
let is_sorted = arr
485505
.statistics()
@@ -495,7 +515,7 @@ mod tests {
495515

496516
#[test]
497517
fn zero_multiplier_is_sorted_not_strict() -> VortexResult<()> {
498-
let arr = SequenceArray::typed_new(5i64, 0, Nullability::NonNullable, 4)?;
518+
let arr = SequenceArray::try_new_typed(5i64, 0, Nullability::NonNullable, 4)?;
499519

500520
let is_sorted = arr
501521
.statistics()
@@ -511,7 +531,7 @@ mod tests {
511531

512532
#[test]
513533
fn negative_multiplier_not_sorted() -> VortexResult<()> {
514-
let arr = SequenceArray::typed_new(10i64, -1, Nullability::NonNullable, 4)?;
534+
let arr = SequenceArray::try_new_typed(10i64, -1, Nullability::NonNullable, 4)?;
515535

516536
let is_sorted = arr
517537
.statistics()
@@ -530,7 +550,7 @@ mod tests {
530550
#[test]
531551
fn test_large_multiplier_sorted() -> VortexResult<()> {
532552
let large_multiplier = (i64::MAX as u64) + 1;
533-
let arr = SequenceArray::typed_new(0, large_multiplier, Nullability::NonNullable, 2)?;
553+
let arr = SequenceArray::try_new_typed(0, large_multiplier, Nullability::NonNullable, 2)?;
534554

535555
let is_sorted = arr
536556
.statistics()

encodings/sequence/src/compress.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSu
4848
) -> VortexResult<Option<ArrayRef>> {
4949
if slice.len() == 1 {
5050
// The multiplier here can be any value, zero is chosen
51-
return SequenceArray::typed_new(slice[0], P::zero(), nullability, 1)
51+
return SequenceArray::try_new_typed(slice[0], P::zero(), nullability, 1)
5252
.map(|a| Some(a.to_array()));
5353
}
5454
let base = slice[0];
@@ -69,7 +69,7 @@ fn encode_primitive_array<P: NativePType + Into<PValue> + CheckedAdd + CheckedSu
6969
.windows(2)
7070
.all(|w| Some(w[1]) == w[0].checked_add(&multiplier))
7171
.then_some(
72-
SequenceArray::typed_new(base, multiplier, nullability, slice.len())
72+
SequenceArray::try_new_typed(base, multiplier, nullability, slice.len())
7373
.map(|a| a.to_array()),
7474
)
7575
.transpose()

encodings/sequence/src/compute/cast.rs

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ impl CastReduce for SequenceVTable {
3232
// For SequenceArray, we can just create a new one with the same parameters
3333
// but different nullability
3434
return Ok(Some(
35-
SequenceArray::new(
35+
SequenceArray::try_new(
3636
array.base(),
3737
array.multiplier(),
3838
*target_ptype,
@@ -71,7 +71,7 @@ impl CastReduce for SequenceVTable {
7171
.ok_or_else(|| vortex_err!("Cast resulted in null multiplier value"))?;
7272

7373
return Ok(Some(
74-
SequenceArray::new(
74+
SequenceArray::try_new(
7575
new_base,
7676
new_multiplier,
7777
*target_ptype,
@@ -102,7 +102,8 @@ mod tests {
102102

103103
#[test]
104104
fn test_cast_sequence_nullability() {
105-
let sequence = SequenceArray::typed_new(0u32, 1u32, Nullability::NonNullable, 4).unwrap();
105+
let sequence =
106+
SequenceArray::try_new_typed(0u32, 1u32, Nullability::NonNullable, 4).unwrap();
106107

107108
// Cast to nullable
108109
let casted = sequence
@@ -118,7 +119,7 @@ mod tests {
118119
#[test]
119120
fn test_cast_sequence_u32_to_i64() {
120121
let sequence =
121-
SequenceArray::typed_new(100u32, 10u32, Nullability::NonNullable, 4).unwrap();
122+
SequenceArray::try_new_typed(100u32, 10u32, Nullability::NonNullable, 4).unwrap();
122123

123124
let casted = sequence
124125
.to_array()
@@ -137,7 +138,8 @@ mod tests {
137138
#[test]
138139
fn test_cast_sequence_i16_to_i32_nullable() {
139140
// Test ptype change AND nullability change in one cast
140-
let sequence = SequenceArray::typed_new(5i16, 3i16, Nullability::NonNullable, 3).unwrap();
141+
let sequence =
142+
SequenceArray::try_new_typed(5i16, 3i16, Nullability::NonNullable, 3).unwrap();
141143

142144
let casted = sequence
143145
.to_array()
@@ -158,7 +160,8 @@ mod tests {
158160

159161
#[test]
160162
fn test_cast_sequence_to_float_delegates_to_canonical() {
161-
let sequence = SequenceArray::typed_new(0i32, 1i32, Nullability::NonNullable, 5).unwrap();
163+
let sequence =
164+
SequenceArray::try_new_typed(0i32, 1i32, Nullability::NonNullable, 5).unwrap();
162165

163166
// Cast to float should delegate to canonical (SequenceArray doesn't support float)
164167
let casted = sequence
@@ -180,15 +183,15 @@ mod tests {
180183
}
181184

182185
#[rstest]
183-
#[case::i32(SequenceArray::typed_new(0i32, 1i32, Nullability::NonNullable, 5).unwrap())]
184-
#[case::u64(SequenceArray::typed_new(1000u64, 100u64, Nullability::NonNullable, 4).unwrap())]
186+
#[case::i32(SequenceArray::try_new_typed(0i32, 1i32, Nullability::NonNullable, 5).unwrap())]
187+
#[case::u64(SequenceArray::try_new_typed(1000u64, 100u64, Nullability::NonNullable, 4).unwrap())]
185188
// TODO(DK): SequenceArray does not actually conform. You cannot cast this array to u8 even
186189
// though all its values are representable therein.
187190
//
188-
// #[case::negative_step(SequenceArray::typed_new(100i32, -10i32, Nullability::NonNullable,
191+
// #[case::negative_step(SequenceArray::try_new_typed(100i32, -10i32, Nullability::NonNullable,
189192
// 5).unwrap())]
190-
#[case::single(SequenceArray::typed_new(42i64, 0i64, Nullability::NonNullable, 1).unwrap())]
191-
#[case::constant(SequenceArray::typed_new(
193+
#[case::single(SequenceArray::try_new_typed(42i64, 0i64, Nullability::NonNullable, 1).unwrap())]
194+
#[case::constant(SequenceArray::try_new_typed(
192195
100i32,
193196
0i32, // multiplier of 0 means constant array
194197
Nullability::NonNullable,

0 commit comments

Comments
 (0)