Skip to content

Commit dabe5af

Browse files
authored
Basic Python encoding (#2551)
Make a few changes to support runtime-defined Python Vortex arrays. I think I will rename encoding back to array in PyVortex for consistency with Rust APIs in the next PR.
1 parent 165ce7a commit dabe5af

50 files changed

Lines changed: 439 additions & 132 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

docs/api/python/encodings.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,5 +108,5 @@ Compressed Encodings
108108
:members:
109109

110110

111-
.. autoclass:: vortex.FastLanesForEncoding
111+
.. autoclass:: vortex.FastLanesFoREncoding
112112
:members:

encodings/alp/src/alp/array.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use vortex_array::arrays::PrimitiveArray;
44
use vortex_array::patches::Patches;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::variants::PrimitiveArrayTrait;
7-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
7+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
88
use vortex_array::{
99
Array, ArrayCanonicalImpl, ArrayExt, ArrayImpl, ArrayRef, ArrayStatisticsImpl,
1010
ArrayValidityImpl, ArrayVariantsImpl, Canonical, Encoding, EncodingId, SerdeMetadata,
@@ -27,11 +27,16 @@ pub struct ALPArray {
2727

2828
pub struct ALPEncoding;
2929
impl Encoding for ALPEncoding {
30-
const ID: EncodingId = EncodingId::new_ref("vortex.alp");
3130
type Array = ALPArray;
3231
type Metadata = SerdeMetadata<ALPMetadata>;
3332
}
3433

34+
impl EncodingVTable for ALPEncoding {
35+
fn id(&self) -> EncodingId {
36+
EncodingId::new_ref("vortex.alp")
37+
}
38+
}
39+
3540
impl ALPArray {
3641
// TODO(ngates): remove try_new and panic on wrong DType?
3742
pub fn try_new(

encodings/alp/src/alp_rd/array.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use vortex_array::arrays::PrimitiveArray;
44
use vortex_array::patches::Patches;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::validity::Validity;
7-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
7+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
88
use vortex_array::{
99
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
1010
Canonical, Encoding, EncodingId, SerdeMetadata, ToCanonical,
@@ -30,11 +30,16 @@ pub struct ALPRDArray {
3030

3131
pub struct ALPRDEncoding;
3232
impl Encoding for ALPRDEncoding {
33-
const ID: EncodingId = EncodingId::new_ref("vortex.alprd");
3433
type Array = ALPRDArray;
3534
type Metadata = SerdeMetadata<ALPRDMetadata>;
3635
}
3736

37+
impl EncodingVTable for ALPRDEncoding {
38+
fn id(&self) -> EncodingId {
39+
EncodingId::new_ref("vortex.alprd")
40+
}
41+
}
42+
3843
impl ALPRDArray {
3944
pub fn try_new(
4045
dtype: DType,

encodings/bytebool/src/array.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_array::arrays::BoolArray;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::validity::Validity;
77
use vortex_array::variants::BoolArrayTrait;
8-
use vortex_array::vtable::VTableRef;
8+
use vortex_array::vtable::{EncodingVTable, VTableRef};
99
use vortex_array::{
1010
Array, ArrayCanonicalImpl, ArrayImpl, ArrayStatisticsImpl, ArrayValidityImpl,
1111
ArrayVariantsImpl, Canonical, EmptyMetadata, Encoding, EncodingId, try_from_array_ref,
@@ -27,11 +27,16 @@ try_from_array_ref!(ByteBoolArray);
2727

2828
pub struct ByteBoolEncoding;
2929
impl Encoding for ByteBoolEncoding {
30-
const ID: EncodingId = EncodingId::new_ref("vortex.bytebool");
3130
type Array = ByteBoolArray;
3231
type Metadata = EmptyMetadata;
3332
}
3433

34+
impl EncodingVTable for ByteBoolEncoding {
35+
fn id(&self) -> EncodingId {
36+
EncodingId::new_ref("vortex.bytebool")
37+
}
38+
}
39+
3540
impl ByteBoolArray {
3641
pub fn new(buffer: ByteBuffer, validity: Validity) -> Self {
3742
let length = buffer.len();

encodings/datetime-parts/src/array.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_array::compute::try_cast;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::validity::Validity;
77
use vortex_array::variants::ExtensionArrayTrait;
8-
use vortex_array::vtable::VTableRef;
8+
use vortex_array::vtable::{EncodingVTable, VTableRef};
99
use vortex_array::{
1010
Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl, ArrayVariantsImpl,
1111
Encoding, EncodingId, RkyvMetadata,
@@ -27,11 +27,16 @@ pub struct DateTimePartsArray {
2727

2828
pub struct DateTimePartsEncoding;
2929
impl Encoding for DateTimePartsEncoding {
30-
const ID: EncodingId = EncodingId::new_ref("vortex.datetimeparts");
3130
type Array = DateTimePartsArray;
3231
type Metadata = RkyvMetadata<DateTimePartsMetadata>;
3332
}
3433

34+
impl EncodingVTable for DateTimePartsEncoding {
35+
fn id(&self) -> EncodingId {
36+
EncodingId::new_ref("vortex.datetimeparts")
37+
}
38+
}
39+
3540
impl DateTimePartsArray {
3641
pub fn try_new(
3742
dtype: DType,

encodings/dict/src/array.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_array::builders::ArrayBuilder;
55
use vortex_array::compute::{scalar_at, take, take_into, try_cast};
66
use vortex_array::stats::{ArrayStats, StatsSetRef};
77
use vortex_array::variants::PrimitiveArrayTrait;
8-
use vortex_array::vtable::VTableRef;
8+
use vortex_array::vtable::{EncodingVTable, VTableRef};
99
use vortex_array::{
1010
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
1111
Canonical, Encoding, EncodingId, IntoArray, RkyvMetadata, ToCanonical,
@@ -25,11 +25,16 @@ pub struct DictArray {
2525

2626
pub struct DictEncoding;
2727
impl Encoding for DictEncoding {
28-
const ID: EncodingId = EncodingId::new_ref("vortex.dict");
2928
type Array = DictArray;
3029
type Metadata = RkyvMetadata<DictMetadata>;
3130
}
3231

32+
impl EncodingVTable for DictEncoding {
33+
fn id(&self) -> EncodingId {
34+
EncodingId::new_ref("vortex.dict")
35+
}
36+
}
37+
3338
impl DictArray {
3439
pub fn try_new(mut codes: ArrayRef, values: ArrayRef) -> VortexResult<Self> {
3540
if !codes.dtype().is_unsigned_int() {

encodings/fastlanes/src/bitpacking/mod.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use vortex_array::patches::Patches;
88
use vortex_array::stats::{ArrayStats, StatsSetRef};
99
use vortex_array::validity::Validity;
1010
use vortex_array::variants::PrimitiveArrayTrait;
11-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
11+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
1212
use vortex_array::{
1313
Array, ArrayCanonicalImpl, ArrayExt, ArrayImpl, ArrayStatisticsImpl, ArrayValidityImpl,
1414
ArrayVariantsImpl, Canonical, Encoding, EncodingId, RkyvMetadata, try_from_array_ref,
@@ -40,11 +40,16 @@ try_from_array_ref!(BitPackedArray);
4040

4141
pub struct BitPackedEncoding;
4242
impl Encoding for BitPackedEncoding {
43-
const ID: EncodingId = EncodingId::new_ref("fastlanes.bitpacked");
4443
type Array = BitPackedArray;
4544
type Metadata = RkyvMetadata<BitPackedMetadata>;
4645
}
4746

47+
impl EncodingVTable for BitPackedEncoding {
48+
fn id(&self) -> EncodingId {
49+
EncodingId::new_ref("fastlanes.bitpacked")
50+
}
51+
}
52+
4853
/// NB: All non-null values in the patches array are considered patches
4954
impl BitPackedArray {
5055
/// Create a new bitpacked array using a buffer of packed data.

encodings/fastlanes/src/delta/mod.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use vortex_array::arrays::PrimitiveArray;
55
use vortex_array::stats::{ArrayStats, StatsSetRef};
66
use vortex_array::validity::Validity;
77
use vortex_array::variants::PrimitiveArrayTrait;
8-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
8+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
99
use vortex_array::{
1010
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
1111
ArrayVariantsImpl, Canonical, Encoding, EncodingId, RkyvMetadata,
@@ -34,11 +34,16 @@ pub struct DeltaArray {
3434

3535
pub struct DeltaEncoding;
3636
impl Encoding for DeltaEncoding {
37-
const ID: EncodingId = EncodingId::new_ref("fastlanes.delta");
3837
type Array = DeltaArray;
3938
type Metadata = RkyvMetadata<DeltaMetadata>;
4039
}
4140

41+
impl EncodingVTable for DeltaEncoding {
42+
fn id(&self) -> EncodingId {
43+
EncodingId::new_ref("fastlanes.delta")
44+
}
45+
}
46+
4247
/// A FastLanes-style delta-encoded array of primitive values.
4348
///
4449
/// A [`DeltaArray`] comprises a sequence of _chunks_ each representing 1,024 delta-encoded values,

encodings/fastlanes/src/for/mod.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::fmt::Debug;
33
pub use compress::*;
44
use vortex_array::stats::{ArrayStats, StatsSetRef};
55
use vortex_array::variants::PrimitiveArrayTrait;
6-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
6+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
77
use vortex_array::{
88
Array, ArrayCanonicalImpl, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl,
99
ArrayVariantsImpl, Canonical, Encoding, EncodingId,
@@ -28,11 +28,16 @@ pub struct FoRArray {
2828

2929
pub struct FoREncoding;
3030
impl Encoding for FoREncoding {
31-
const ID: EncodingId = EncodingId::new_ref("fastlanes.for");
3231
type Array = FoRArray;
3332
type Metadata = ScalarValueMetadata;
3433
}
3534

35+
impl EncodingVTable for FoREncoding {
36+
fn id(&self) -> EncodingId {
37+
EncodingId::new_ref("fastlanes.for")
38+
}
39+
}
40+
3641
impl FoRArray {
3742
pub fn try_new(encoded: ArrayRef, reference: Scalar) -> VortexResult<Self> {
3843
if reference.is_null() {

encodings/fsst/src/array.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use fsst::{Decompressor, Symbol};
22
use vortex_array::arrays::VarBinEncoding;
33
use vortex_array::stats::{ArrayStats, StatsSetRef};
44
use vortex_array::variants::{BinaryArrayTrait, Utf8ArrayTrait};
5-
use vortex_array::vtable::{StatisticsVTable, VTableRef};
5+
use vortex_array::vtable::{EncodingVTable, StatisticsVTable, VTableRef};
66
use vortex_array::{
77
Array, ArrayImpl, ArrayRef, ArrayStatisticsImpl, ArrayValidityImpl, ArrayVariantsImpl,
88
Encoding, EncodingId, SerdeMetadata, ToCanonical,
@@ -25,11 +25,16 @@ pub struct FSSTArray {
2525

2626
pub struct FSSTEncoding;
2727
impl Encoding for FSSTEncoding {
28-
const ID: EncodingId = EncodingId::new_ref("vortex.fsst");
2928
type Array = FSSTArray;
3029
type Metadata = SerdeMetadata<FSSTMetadata>;
3130
}
3231

32+
impl EncodingVTable for FSSTEncoding {
33+
fn id(&self) -> EncodingId {
34+
EncodingId::new_ref("vortex.fsst")
35+
}
36+
}
37+
3338
pub(crate) static SYMBOLS_DTYPE: DType = DType::Primitive(PType::U64, Nullability::NonNullable);
3439
pub(crate) static SYMBOL_LENS_DTYPE: DType = DType::Primitive(PType::U8, Nullability::NonNullable);
3540

@@ -74,7 +79,7 @@ impl FSSTArray {
7479
vortex_bail!(InvalidArgument: "uncompressed_lengths must have integer type and cannot be nullable, found {}", uncompressed_lengths.dtype());
7580
}
7681

77-
if codes.encoding() != VarBinEncoding::ID {
82+
if codes.encoding() != VarBinEncoding.id() {
7883
vortex_bail!(
7984
InvalidArgument: "codes must have varbin encoding, was {}",
8085
codes.encoding()

0 commit comments

Comments
 (0)