Skip to content

Commit 97bae96

Browse files
author
Leonid Ryzhyk
committed
[sql] Cleanup arc-strings.
* Fixup benchmarks * Implement rkyv traits for SqlString (the Arc version only) * Feature-gate the internment dependency. * Make the `Arc<>` version the default. The interned version is enabled by the `interned_string` feature. Signed-off-by: Leonid Ryzhyk <leonid@feldera.com>
1 parent 0e69e27 commit 97bae96

File tree

6 files changed

+50
-81
lines changed

6 files changed

+50
-81
lines changed
Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,27 @@
11
use dashmap::DashMap;
22
use regex::Regex;
33
use std::sync::LazyLock;
4+
use feldera_sqllib::*;
45

5-
static REGEXS: LazyLock<DashMap<String, Regex>> = LazyLock::new(|| DashMap::new());
6+
static REGEXS: LazyLock<DashMap<SqlString, Regex>> = LazyLock::new(|| DashMap::new());
67

78
pub fn re_extract(
8-
s: Option<String>,
9-
p: Option<String>,
9+
s: Option<SqlString>,
10+
p: Option<SqlString>,
1011
group: Option<i32>,
11-
) -> Result<Option<String>, Box<dyn std::error::Error>> {
12+
) -> Result<Option<SqlString>, Box<dyn std::error::Error>> {
1213
Ok(do_re_extract(s, p, group))
1314
}
1415

15-
fn do_re_extract(s: Option<String>, p: Option<String>, group: Option<i32>) -> Option<String> {
16+
fn do_re_extract(s: Option<SqlString>, p: Option<SqlString>, group: Option<i32>) -> Option<SqlString> {
1617
let s = s?;
1718
let p = p?;
1819
let group = group?;
1920

2021
let re = REGEXS
2122
.entry(p.clone())
22-
.or_try_insert_with(|| Regex::new(&p))
23+
.or_try_insert_with(|| Regex::new(p.str()))
2324
.ok()?;
2425

25-
Some(re.captures(&s)?.get(group as usize)?.as_str().to_string())
26+
Some(SqlString::from_ref(re.captures(s.str())?.get(group as usize)?.as_str()))
2627
}

crates/dbsp/src/hash.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::hash::{Hash, Hasher};
44
use xxhash_rust::xxh3::Xxh3Default;
55

66
/// Default hashing function used to shard records across workers.
7-
pub fn default_hash<T: Hash>(x: &T) -> u64 {
7+
pub fn default_hash<T: Hash + ?Sized>(x: &T) -> u64 {
88
let mut hasher = Xxh3Default::new();
99
x.hash(&mut hasher);
1010
hasher.finish()

crates/sqllib/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ categories = ["database", "api-bindings", "network-programming"]
1111
publish = true
1212

1313
[features]
14-
arcstring = []
14+
default = []
15+
interned_string = ["internment"]
1516

1617
[dependencies]
1718
thiserror = "1.0"
@@ -37,7 +38,7 @@ flate2 = "1.0.28"
3738
metrics = { version = "0.23.0" }
3839
base64 = "0.22.1"
3940
uuid = { version = "1.11.0", features = ["v4", "std"] }
40-
internment = { version = "0.8.5", features = ["arc"] }
41+
internment = { version = "0.8.5", features = ["arc"], optional = true }
4142

4243
[dev-dependencies]
4344
serde_json = "1.0.107"

crates/sqllib/src/string.rs

Lines changed: 36 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
//! SQL String operations
1+
//! SQL String operations.
2+
//!
3+
4+
// This module contains two implementations of an immutable reference-counted
5+
// string type: a version wrapped in `Arc` and an interned string. The former
6+
// is enabled by default. The latter misses rkyv trait implementations and
7+
// has poor performance in preliminary benchmarks.
28

39
#![allow(non_snake_case)]
410
use crate::{
@@ -8,47 +14,62 @@ use crate::{
814

915
use core::fmt::Error;
1016
use feldera_types::{deserialize_without_context, serialize_without_context};
11-
#[cfg(not(feature = "arcstring"))]
17+
#[cfg(feature = "interned_string")]
1218
use internment::ArcIntern;
1319
use like::{Escape, Like};
1420
use regex::Regex;
15-
use rkyv::Fallible;
16-
use serde::{Deserialize, Deserializer, Serialize, Serializer};
21+
use serde::{Deserialize, Serialize};
1722
use size_of::{Context, SizeOf};
1823
use std::{
19-
borrow::Cow,
2024
cmp::{max, min},
2125
fmt::{Display, Formatter},
2226
sync::Arc,
2327
};
2428

25-
#[cfg(not(feature = "arcstring"))]
29+
#[cfg(feature = "interned_string")]
2630
type StringRef = ArcIntern<String>;
2731

28-
#[cfg(feature = "arcstring")]
32+
#[cfg(not(feature = "interned_string"))]
2933
type StringRef = Arc<String>;
3034

31-
#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
35+
/// An immutable reference counted string.
36+
#[derive(
37+
Clone,
38+
Default,
39+
Debug,
40+
Eq,
41+
Hash,
42+
Ord,
43+
PartialEq,
44+
PartialOrd,
45+
Serialize,
46+
Deserialize,
47+
rkyv::Archive,
48+
rkyv::Serialize,
49+
rkyv::Deserialize,
50+
)]
51+
#[archive_attr(derive(Ord, Eq, PartialEq, PartialOrd))]
52+
#[serde(transparent)]
3253
pub struct SqlString(StringRef);
3354

3455
/// String representation used by the Feldera SQL runtime
3556
impl SqlString {
36-
#[cfg(feature = "arcstring")]
57+
#[cfg(not(feature = "interned_string"))]
3758
pub fn new() -> Self {
38-
SqlString(StringRef::new("".to_string()))
59+
Self::default()
3960
}
4061

41-
#[cfg(feature = "arcstring")]
62+
#[cfg(not(feature = "interned_string"))]
4263
pub fn from_ref(value: &str) -> Self {
43-
SqlString(StringRef::new(value))
64+
SqlString(StringRef::from(value.to_string()))
4465
}
4566

46-
#[cfg(not(feature = "arcstring"))]
67+
#[cfg(feature = "interned_string")]
4768
pub fn new() -> Self {
48-
SqlString(StringRef::new("".to_string()))
69+
Self::default()
4970
}
5071

51-
#[cfg(not(feature = "arcstring"))]
72+
#[cfg(feature = "interned_string")]
5273
pub fn from_ref(value: &str) -> Self {
5374
SqlString(StringRef::from_ref(value))
5475
}
@@ -66,25 +87,6 @@ impl SqlString {
6687
}
6788
}
6889

69-
impl Serialize for SqlString {
70-
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
71-
where
72-
S: Serializer,
73-
{
74-
self.str().serialize(serializer)
75-
}
76-
}
77-
78-
impl<'de> Deserialize<'de> for SqlString {
79-
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
80-
where
81-
D: Deserializer<'de>,
82-
{
83-
let str: Cow<'de, str> = Deserialize::deserialize(deserializer)?;
84-
Ok(Self::from_ref(&str))
85-
}
86-
}
87-
8890
serialize_without_context!(SqlString);
8991
deserialize_without_context!(SqlString);
9092

@@ -118,41 +120,6 @@ impl SizeOf for SqlString {
118120
}
119121
}
120122

121-
impl rkyv::Archive for SqlString {
122-
type Archived = ();
123-
type Resolver = ();
124-
unsafe fn resolve(&self, _pos: usize, _resolver: Self::Resolver, _out: *mut Self::Archived) {
125-
todo!()
126-
}
127-
}
128-
129-
impl<D> rkyv::Deserialize<SqlString, D> for ()
130-
where
131-
D: Fallible + ?Sized,
132-
{
133-
fn deserialize(&self, _deserializer: &mut D) -> Result<SqlString, D::Error> {
134-
todo!()
135-
}
136-
}
137-
138-
impl<D> rkyv::Deserialize<SqlString, D> for SqlString
139-
where
140-
D: Fallible + ?Sized,
141-
{
142-
fn deserialize(&self, _deserializer: &mut D) -> Result<SqlString, D::Error> {
143-
todo!()
144-
}
145-
}
146-
147-
impl<S> rkyv::Serialize<S> for SqlString
148-
where
149-
S: Fallible + ?Sized,
150-
{
151-
fn serialize(&self, _serializer: &mut S) -> Result<Self::Resolver, S::Error> {
152-
todo!()
153-
}
154-
}
155-
156123
///////////////////////////////
157124

158125
#[doc(hidden)]

crates/sqllib/src/timestamp.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1244,7 +1244,7 @@ some_polymorphic_function2!(datediff_day, Date, Date, Date, Date, i32);
12441244

12451245
#[doc(hidden)]
12461246
pub fn format_date__(format: SqlString, date: Date) -> SqlString {
1247-
return SqlString::from(date.to_dateTime().format(format.str()).to_string());
1247+
SqlString::from(date.to_dateTime().format(format.str()).to_string())
12481248
}
12491249

12501250
some_function2!(format_date, SqlString, Date, SqlString);

sql-to-dbsp-compiler/temp/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ publish = false
66

77
[features]
88
default = []
9-
arcstring = ["feldera-sqllib/arcstring"]
9+
interned_string = ["feldera-sqllib/interned_string"]
1010

1111
[dependencies]
1212
paste = { version = "1.0.12" }

0 commit comments

Comments
 (0)