Skip to content

Commit 4c1310a

Browse files
committed
[SQL] Support for OVER window queries with UNSIGNED and UUID NOT NULL sort orders
Signed-off-by: Mihai Budiu <mbudiu@feldera.com>
1 parent 741e654 commit 4c1310a

18 files changed

Lines changed: 1385 additions & 129 deletions

File tree

crates/sqllib/src/aggregates.rs

Lines changed: 803 additions & 13 deletions
Large diffs are not rendered by default.

crates/sqllib/src/binary.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,3 +518,50 @@ pub fn bin2utf8N(source: Option<ByteArray>) -> Option<SqlString> {
518518
Some(bytes) => bin2utf8_(bytes),
519519
}
520520
}
521+
522+
#[doc(hidden)]
523+
pub fn binary_to_u8(b: ByteArray) -> u8 {
524+
assert!(b.length() <= 1);
525+
b.data[0]
526+
}
527+
528+
#[doc(hidden)]
529+
pub fn binary_to_u16(b: ByteArray) -> u16 {
530+
assert!(b.length() <= 2);
531+
let mut buf = [0u8; 2];
532+
buf[2 - b.length()..].copy_from_slice(&b.data);
533+
u16::from_be_bytes(buf)
534+
}
535+
536+
#[doc(hidden)]
537+
pub fn binary_to_u32(b: ByteArray) -> u32 {
538+
assert!(b.length() <= 4);
539+
let mut buf = [0u8; 4];
540+
buf[4 - b.length()..].copy_from_slice(&b.data);
541+
u32::from_be_bytes(buf)
542+
}
543+
544+
#[doc(hidden)]
545+
pub fn binary_to_u64(b: ByteArray) -> u64 {
546+
assert!(b.length() <= 8);
547+
let mut buf = [0u8; 8];
548+
buf[8 - b.length()..].copy_from_slice(&b.data);
549+
u64::from_be_bytes(buf)
550+
}
551+
552+
#[doc(hidden)]
553+
pub fn binary_to_u128(b: ByteArray) -> u128 {
554+
assert!(b.length() <= 16);
555+
let mut buf = [0u8; 16];
556+
buf[16 - b.length()..].copy_from_slice(&b.data);
557+
u128::from_be_bytes(buf)
558+
}
559+
560+
#[test]
561+
pub fn testBinaryToInteger() {
562+
let bin = ByteArray::new(&[0x12, 0x34]);
563+
assert_eq!(0x1234, binary_to_u16(bin.clone()));
564+
assert_eq!(0x1234, binary_to_u32(bin.clone()));
565+
assert_eq!(0x1234, binary_to_u64(bin.clone()));
566+
assert_eq!(0x1234, binary_to_u128(bin));
567+
}

crates/sqllib/src/interval.rs

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,55 +121,55 @@ pub fn abs_ShortInterval(value: ShortInterval) -> ShortInterval {
121121
some_polymorphic_function1!(abs, ShortInterval, ShortInterval, ShortInterval);
122122

123123
#[doc(hidden)]
124-
/// This function is used in rolling window computations, which require all
125-
/// values to be expressed using unsigned types.
124+
/// This function is used in rolling window computations, to compute a window bound.
125+
/// Window bounds are always positive.
126126
pub fn to_bound_ShortInterval_ShortInterval_u128(value: &ShortInterval) -> u128 {
127127
value.microseconds as u128
128128
}
129129

130130
#[doc(hidden)]
131-
/// This function is used in rolling window computations, which require all
132-
/// values to be expressed using unsigned types.
131+
/// This function is used in rolling window computations, to compute a window bound.
132+
/// Window bounds are always positive.
133133
pub fn to_bound_ShortInterval_Date_u128(value: &ShortInterval) -> u128 {
134134
// express value in days
135135
(value.microseconds / 1_000_000_i64 / 86400) as u128
136136
}
137137

138138
#[doc(hidden)]
139-
/// This function is used in rolling window computations, which require all
140-
/// values to be expressed using unsigned types.
139+
/// This function is used in rolling window computations, to compute a window bound.
140+
/// Window bounds are always positive.
141141
pub fn to_bound_ShortInterval_Date_u64(value: &ShortInterval) -> u64 {
142142
// express value in days
143143
(value.microseconds / 1_000_000_i64 / 86400) as u64
144144
}
145145

146146
#[doc(hidden)]
147-
/// This function is used in rolling window computations, which require all
148-
/// values to be expressed using unsigned types.
147+
/// This function is used in rolling window computations, to compute a window bound.
148+
/// Window bounds are always positive.
149149
pub fn to_bound_ShortInterval_Timestamp_u128(value: &ShortInterval) -> u128 {
150150
// express value in milliseconds
151151
value.microseconds as u128
152152
}
153153

154154
#[doc(hidden)]
155-
/// This function is used in rolling window computations, which require all
156-
/// values to be expressed using unsigned types.
155+
/// This function is used in rolling window computations, to compute a window bound.
156+
/// Window bounds are always positive.
157157
pub fn to_bound_ShortInterval_Timestamp_u64(value: &ShortInterval) -> u64 {
158158
// express value in milliseconds
159159
value.microseconds as u64
160160
}
161161

162162
#[doc(hidden)]
163-
/// This function is used in rolling window computations, which require all
164-
/// values to be expressed using unsigned types.
163+
/// This function is used in rolling window computations, to compute a window bound.
164+
/// Window bounds are always positive.
165165
pub fn to_bound_ShortInterval_Time_u128(value: &ShortInterval) -> u128 {
166166
// express value in nanoseconds
167167
(value.microseconds * 1_000) as u128
168168
}
169169

170170
#[doc(hidden)]
171-
/// This function is used in rolling window computations, which require all
172-
/// values to be expressed using unsigned types.
171+
/// This function is used in rolling window computations, to compute a window bound.
172+
/// Window bounds are always positive.
173173
pub fn to_bound_ShortInterval_Time_u64(value: &ShortInterval) -> u64 {
174174
// express value in nanoseconds
175175
(value.microseconds * 1_000) as u64

crates/sqllib/src/uuid.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,13 @@ impl Display for Uuid {
171171
Display::fmt(&self.value, f)
172172
}
173173
}
174+
175+
#[doc(hidden)]
176+
pub fn uuid_to_u128(u: Uuid) -> u128 {
177+
u128::from_be_bytes(*u.to_bytes())
178+
}
179+
180+
#[doc(hidden)]
181+
pub fn u128_to_uuid(n: u128) -> Uuid {
182+
Uuid::from_bytes(n.to_be_bytes())
183+
}

sql-to-dbsp-compiler/SQL-compiler/src/main/java/org/dbsp/sqlCompiler/compiler/backend/rust/ToRustInnerVisitor.java

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -373,19 +373,23 @@ void codegen(DBSPUnsignedWrapExpression.TypeSequence sequence) {
373373
this.builder.append("<");
374374
// In the type parameter we do not put the Option<>
375375
sequence.dataType.withMayBeNull(false).accept(this);
376-
this.builder.append(", ");
377-
sequence.dataConvertedType.accept(this);
378-
this.builder.append(", ");
379-
sequence.intermediateType.accept(this);
380-
this.builder.append(", ");
381-
sequence.unsignedType.accept(this);
376+
if (sequence.dataConvertedType.signed) {
377+
this.builder.append(", ");
378+
sequence.dataConvertedType.accept(this);
379+
this.builder.append(", ");
380+
sequence.intermediateType.accept(this);
381+
}
382+
if (!sequence.unsignedType.sameType(sequence.dataType)) {
383+
this.builder.append(", ");
384+
sequence.unsignedType.accept(this);
385+
}
382386
this.builder.append(">");
383387
}
384388

385389
@Override
386390
public VisitDecision preorder(DBSPUnsignedWrapExpression expression) {
387391
this.push(expression);
388-
this.builder.append("UnsignedWrapper")
392+
this.builder.append(DBSPUnsignedWrapExpression.RUST_IMPLEMENTATION)
389393
.append("::")
390394
.append(expression.getMethod())
391395
.append("::");
@@ -415,7 +419,7 @@ public VisitDecision preorder(DBSPLazyExpression expression) {
415419
@Override
416420
public VisitDecision preorder(DBSPUnsignedUnwrapExpression expression) {
417421
this.push(expression);
418-
this.builder.append("UnsignedWrapper")
422+
this.builder.append(DBSPUnsignedWrapExpression.RUST_IMPLEMENTATION)
419423
.append("::")
420424
.append(expression.getMethod())
421425
.append("::");
@@ -1869,7 +1873,9 @@ public VisitDecision preorder(DBSPUnaryExpression expression) {
18691873
this.pop(expression);
18701874
return VisitDecision.STOP;
18711875
} else if (expression.opcode == DBSPOpcode.INTEGER_TO_SHORT_INTERVAL ||
1872-
expression.opcode == DBSPOpcode.SHORT_INTERVAL_TO_INTEGER) {
1876+
expression.opcode == DBSPOpcode.SHORT_INTERVAL_TO_INTEGER ||
1877+
expression.opcode == DBSPOpcode.INTEGER_TO_UUID ||
1878+
expression.opcode == DBSPOpcode.UUID_TO_INTEGER) {
18731879
this.builder.append(expression.opcode.toString())
18741880
.append("(");
18751881
expression.source.accept(this);

sql-to-dbsp-compiler/SQL-compiler/src/main/java/org/dbsp/sqlCompiler/compiler/frontend/ExpressionCompiler.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
import org.apache.calcite.util.DateString;
4242
import org.apache.calcite.util.TimeString;
4343
import org.apache.calcite.util.TimestampString;
44-
import org.dbsp.sqlCompiler.circuit.operator.DBSPConcreteAsofJoinOperator;
4544
import org.dbsp.sqlCompiler.compiler.DBSPCompiler;
4645
import org.dbsp.sqlCompiler.compiler.ICompilerComponent;
4746
import org.dbsp.sqlCompiler.compiler.errors.BaseCompilerException;

sql-to-dbsp-compiler/SQL-compiler/src/main/java/org/dbsp/sqlCompiler/compiler/frontend/aggregates/RangeAggregates.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,19 @@
4141
import org.dbsp.sqlCompiler.ir.expression.literal.DBSPLiteral;
4242
import org.dbsp.sqlCompiler.ir.type.DBSPType;
4343
import org.dbsp.sqlCompiler.ir.type.DBSPTypeCode;
44-
import org.dbsp.sqlCompiler.ir.type.IsDateType;
4544
import org.dbsp.sqlCompiler.ir.type.IsNumericType;
4645
import org.dbsp.sqlCompiler.ir.type.IsTimeRelatedType;
4746
import org.dbsp.sqlCompiler.ir.type.derived.DBSPTypeRawTuple;
4847
import org.dbsp.sqlCompiler.ir.type.derived.DBSPTypeTuple;
4948
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeBaseType;
5049
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeDate;
5150
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeDecimal;
52-
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeDouble;
5351
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeInteger;
5452
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeLongInterval;
55-
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeReal;
5653
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeShortInterval;
5754
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeTime;
5855
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeTimestamp;
56+
import org.dbsp.sqlCompiler.ir.type.primitive.DBSPTypeUuid;
5957
import org.dbsp.sqlCompiler.ir.type.user.DBSPTypeIndexedZSet;
6058
import org.dbsp.util.Linq;
6159
import org.dbsp.util.Utilities;
@@ -101,6 +99,7 @@ String intervalConversionFunction(CalciteObject node, DBSPType unsignedType, DBS
10199

102100
// we ignore nullability because window bounds are constants and cannot be null
103101
// BEWARE: this function name structure is hardwired in the monotonicity analysis
102+
// NOTE: This function requires the arguments to be positive
104103
return "to_bound_" +
105104
deltaType.withMayBeNull(false).to(DBSPTypeBaseType.class).shortName() + "_" +
106105
sortType.withMayBeNull(false).to(DBSPTypeBaseType.class).shortName() + "_" +
@@ -196,6 +195,14 @@ public DBSPSimpleOperator implement(DBSPSimpleOperator input, DBSPSimpleOperator
196195
sortType = DBSPTypeInteger.getType(node, INT64, originalSortType.mayBeNull);
197196
convertToSigned = new DBSPUnaryExpression(
198197
this.node, sortType, DBSPOpcode.SHORT_INTERVAL_TO_INTEGER, var).closure(var);
198+
} else if (originalSortType.is(DBSPTypeUuid.class)) {
199+
if (originalSortType.mayBeNull) {
200+
// This is 128 bits, but we need one more to represent the NULL value.
201+
throw new UnimplementedException("OVER currently cannot sort on columns with type UUID NULL. Can you convert the type to UUID NOT NULL?", 457, node);
202+
}
203+
sortType = DBSPTypeInteger.getType(node, UINT128, originalSortType.mayBeNull);
204+
convertToSigned = new DBSPUnaryExpression(
205+
this.node, sortType, DBSPOpcode.UUID_TO_INTEGER, var.applyClone()).closure(var);
199206
} else {
200207
sortType = originalSortType;
201208
}
@@ -306,6 +313,9 @@ public DBSPSimpleOperator implement(DBSPSimpleOperator input, DBSPSimpleOperator
306313
} else if (originalSortType.is(DBSPTypeShortInterval.class)) {
307314
unwrap = new DBSPUnaryExpression(this.node, originalSortType,
308315
DBSPOpcode.INTEGER_TO_SHORT_INTERVAL, unwrap);
316+
} else if (originalSortType.is(DBSPTypeUuid.class)) {
317+
unwrap = new DBSPUnaryExpression(this.node, originalSortType,
318+
DBSPOpcode.INTEGER_TO_UUID, unwrap);
309319
}
310320

311321
DBSPExpression ixKey = var.field(0).deref();

sql-to-dbsp-compiler/SQL-compiler/src/main/java/org/dbsp/sqlCompiler/compiler/visitors/monotone/MonotoneTransferFunctions.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,9 @@ public void postorder(DBSPUnsignedUnwrapExpression expression) {
743743
static final Set<DBSPOpcode> monotoneUnary = Set.of(
744744
DBSPOpcode.UNARY_PLUS, DBSPOpcode.TYPEDBOX,
745745
DBSPOpcode.DECIMAL_TO_INTEGER, DBSPOpcode.INTEGER_TO_DECIMAL,
746-
DBSPOpcode.SHORT_INTERVAL_TO_INTEGER, DBSPOpcode.INTEGER_TO_SHORT_INTERVAL);
746+
DBSPOpcode.SHORT_INTERVAL_TO_INTEGER, DBSPOpcode.INTEGER_TO_SHORT_INTERVAL,
747+
DBSPOpcode.UUID_TO_INTEGER, DBSPOpcode.INTEGER_TO_UUID
748+
);
747749

748750
@Override
749751
public void postorder(DBSPUnaryExpression expression) {

sql-to-dbsp-compiler/SQL-compiler/src/main/java/org/dbsp/sqlCompiler/ir/expression/DBSPOpcode.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ public enum DBSPOpcode {
2525
// Lossless, order-preserving conversion between short interval and INTEGER, used for range aggregates
2626
SHORT_INTERVAL_TO_INTEGER("short_interval_to_integer", false),
2727
INTEGER_TO_SHORT_INTERVAL("integer_to_short_interval", false),
28+
// Lossless, order-preserving conversion between UUID and INTEGER, used for range aggregates
29+
UUID_TO_INTEGER("uuid_to_u128", false),
30+
INTEGER_TO_UUID("u128_to_uuid", false),
2831

2932
// Binary operations
3033
ADD("+", false),
@@ -130,7 +133,7 @@ public boolean isStrict() {
130133
AGG_MAX, AGG_XOR, AGG_OR, AGG_AND, IS_DISTINCT, CONCAT, MIN, MAX, OR, AND, IS_NOT_FALSE, IS_NOT_TRUE,
131134
AGG_MAX1, AGG_MIN1, INDICATOR -> false;
132135
case NEG, INTERVAL_DIV, INTERVAL_MUL, DECIMAL_TO_INTEGER, INTEGER_TO_DECIMAL,
133-
SHORT_INTERVAL_TO_INTEGER, INTEGER_TO_SHORT_INTERVAL,
136+
SHORT_INTERVAL_TO_INTEGER, INTEGER_TO_SHORT_INTERVAL, INTEGER_TO_UUID, UUID_TO_INTEGER,
134137
RUST_INDEX, VARIANT_INDEX, MAP_INDEX,
135138
SQL_INDEX, XOR, BW_OR, MUL_WEIGHT, BW_AND, GTE, LTE, GT, LT, NEQ, EQ, MOD, DIV_NULL, DIV, MUL, SUB,
136139
ADD, TYPEDBOX, IS_TRUE, IS_FALSE, NOT, UNARY_PLUS -> true;

sql-to-dbsp-compiler/SQL-compiler/src/main/java/org/dbsp/sqlCompiler/ir/expression/DBSPUnaryExpression.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ public IIndentStream toString(IIndentStream builder) {
7979
if (this.opcode == DBSPOpcode.DECIMAL_TO_INTEGER ||
8080
this.opcode == DBSPOpcode.INTEGER_TO_DECIMAL ||
8181
this.opcode == DBSPOpcode.SHORT_INTERVAL_TO_INTEGER ||
82-
this.opcode == DBSPOpcode.INTEGER_TO_SHORT_INTERVAL) {
82+
this.opcode == DBSPOpcode.INTEGER_TO_SHORT_INTERVAL ||
83+
this.opcode == DBSPOpcode.INTEGER_TO_UUID ||
84+
this.opcode == DBSPOpcode.UUID_TO_INTEGER) {
8385
return builder.append(this.opcode.toString())
8486
.append("(")
8587
.append(this.source)

0 commit comments

Comments
 (0)