feldera
diff --git a/‎docs.feldera.com/docs/changelog.md‎
Lines changed: 12 additions & 5 deletions b/‎docs.feldera.com/docs/changelog.md‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎docs.feldera.com/docs/sql/udf.md‎
Lines changed: 16 additions & 34 deletions b/‎docs.feldera.com/docs/sql/udf.md‎
Lines changed: 16 additions & 34 deletions
diff --git a/‎python/tests/test_uda.py‎ ‎python/tests/runtime/test_uda.py‎python/tests/test_uda.py renamed to python/tests/runtime/test_uda.py
Lines changed: 81 additions & 26 deletions b/‎python/tests/test_uda.py‎ ‎python/tests/runtime/test_uda.py‎python/tests/test_uda.py renamed to python/tests/runtime/test_uda.py
Lines changed: 81 additions & 26 deletions
diff --git a/‎sql-to-dbsp-compiler/SQL-compiler/src/main/java/org/dbsp/sqlCompiler/compiler/frontend/CalciteToDBSPCompiler.java‎
Lines changed: 4 additions & 3 deletions b/‎sql-to-dbsp-compiler/SQL-compiler/src/main/java/org/dbsp/sqlCompiler/compiler/frontend/CalciteToDBSPCompiler.java‎
Lines changed: 4 additions & 3 deletions
@@ -15,6 +15,17 @@ import TabItem from '@theme/TabItem';
 
         ## Unreleased
 
+        Simplified the way user-defined aggregates are defined -- the
+        compiler now automates the handling of NULL values.
+
+        The following change doesn't affect the external Feldera API, only the
+        pipeline's API available from a sidecar container. The `/status`
+        endpoint no longer returns HTTP status 503 (SERVICE_UNAVAILABLE) while
+        the pipeline is initializing. Instead it returns status OK with message
+        body containing the "Initializing" string.
+
+        ## 0.138.0
+
         [Transaction (also known as huge-step) support](/pipelines/transactions).
 
         TIMESTAMP is now the same as TIMESTAMP(3); TIME is now the same as
@@ -23,11 +34,7 @@ import TabItem from '@theme/TabItem';
         that differ from the default ones are ignored (and the compiler
         gives a warning).
 
-        The following change doesn't affect the external Feldera API, only the
-        pipeline's API available from a sidecar container. The `/status`
-        endpoint no longer returns HTTP status 503 (SERVICE_UNAVAILABLE) while
-        the pipeline is initializing. Instead it returns status OK with message
-        body containing the "Initializing" string.
+        ## 0.136.0
 
         ### Changes to Python SDK `feldera`:
         - `Pipeline.sync_checkpoint` will now raise a runtime error if `wait`
 
@@ -499,14 +499,8 @@ traits.  Most of the code is devoted for this task, and is relatively
 straightforward.
 
 For our example the accumulator type that the user has to define is
-named `i128_sum_accumulator_type`.  In our implementation the
-accumulator is a tuple with 3 fields:
-
-- the partial sum computed, stored in an I256 value
-
-- the count of non-null elements in the collection encountered
-
-- the total count of elements in the collection
+named `i128_sum_accumulator_type`, holding the partial sum computed,
+stored in an I256 value.
 
 The user would add the following implementation to the `udf.rs` file:
 
@@ -554,7 +548,6 @@ impl MulByRef<Weight> for I256Wrapper {
     type Output = Self;
 
     fn mul_by_ref(&self, other: &Weight) -> Self::Output {
-        println!("Mul {:?} by {}", self, other);
         Self {
             data: self.data.checked_mul_i64(*other)
                 .expect("Overflow during multiplication"),
@@ -616,29 +609,18 @@ impl<D: Fallible + ?Sized> rkyv::Deserialize<I256Wrapper, D> for ArchivedI256Wra
     }
 }
 
-pub type i128_sum_accumulator_type = Tup3<I256Wrapper, i64, i64>;
+pub type i128_sum_accumulator_type = I256Wrapper;
 
-pub fn i128_sum_map(val: Option<ByteArray>) -> i128_sum_accumulator_type {
-    match val {
-        None => Tup3::new(I256Wrapper::zero(), 0, 1),
-        Some(val) => Tup3::new(
-           I256Wrapper::from(val.as_slice()),
-           1,
-           1,
-        ),
-    }
+pub fn i128_sum_map(val: ByteArray) -> i128_sum_accumulator_type {
+    I256Wrapper::from(val.as_slice())
 }
 
-pub fn i128_sum_post(val: i128_sum_accumulator_type) -> Option<ByteArray> {
-    if val.1 == 0 {
-       None
-    } else {
-       // Check for overflow
-       if val.0.data < I256::from(i128::MIN) || val.0.data > I256::from(i128::MAX) {
-           panic!("Result of aggregation {} does not fit in 128 bits", val.0.data);
-       }
-       Some(ByteArray::new(&val.0.data.to_be_bytes()[16..]))
+pub fn i128_sum_post(val: i128_sum_accumulator_type) -> ByteArray {
+    // Check for overflow
+    if val.data < I256::from(i128::MIN) || val.data > I256::from(i128::MAX) {
+        panic!("Result of aggregation {} does not fit in 128 bits", val.data);
     }
+    ByteArray::new(&val.data.to_be_bytes()[16..])
 }
 ```
 
@@ -647,15 +629,15 @@ The two functions needed to implement the aggregation are
 
 `i128_sum_map` converts a `BINARY(16)` value into an accumulator
 value.  Notice that in the SQL runtime library `BINARY(16)` is
-implemented as a `ByteArray`.
+implemented as a `ByteArray`.  The argument of this function must be
+non-nullable.
 
 `i128_sum_post` converts the accumulator value into the expected
-result type `BINARY(16)`.
+result type `BINARY(16)`.  The result must be non-nullable.
 
-We use the `Tup3` type from our SQL runtime library.  This type
-implements `Add` and other required operations if all fields do.
-The addition of `Tup3` values is done field-wise, and the `Zero` trait
-for `Tup3` is a tuple with all fields zero.
+The handling of `NULL` is dictated by the SQL semantics, and cannot be
+changed: aggregating a collection containing only `NULL` values (or
+empty) produces `NULL`.
 
 ### Creating user-defined non-linear aggregate functions
 
 
@@ -8,8 +8,8 @@ class TestUDA(unittest.TestCase):
     def test_local(self):
         sql = """
 CREATE LINEAR AGGREGATE I128_SUM(s BINARY(16)) RETURNS BINARY(16);
-CREATE TABLE T(x BINARY(16));
-CREATE MATERIALIZED VIEW V AS SELECT I128_SUM(x) AS S, COUNT(*) AS C FROM T;
+CREATE TABLE T(x BINARY(16), y BINARY(16) NOT NULL);
+CREATE MATERIALIZED VIEW V AS SELECT I128_SUM(x) AS S, I128_SUM(y) AS N, COUNT(*) AS C FROM T;
         """
 
         toml = """
@@ -123,29 +123,18 @@ def test_local(self):
     }
 }
 
-pub type i128_sum_accumulator_type = Tup3<I256Wrapper, i64, i64>;
+pub type i128_sum_accumulator_type = I256Wrapper;
 
-pub fn i128_sum_map(val: Option<ByteArray>) -> i128_sum_accumulator_type {
-    match val {
-        None => Tup3::new(I256Wrapper::zero(), 0, 1),
-        Some(val) => Tup3::new(
-           I256Wrapper::from(val.as_slice()),
-           1,
-           1,
-        ),
-    }
+pub fn i128_sum_map(val: ByteArray) -> i128_sum_accumulator_type {
+    I256Wrapper::from(val.as_slice())
 }
 
-pub fn i128_sum_post(val: i128_sum_accumulator_type) -> Option<ByteArray> {
-    if val.1 == 0 {
-       None
-    } else {
-       // Check for overflow
-       if val.0.data < I256::from(i128::MIN) || val.0.data > I256::from(i128::MAX) {
-           panic!("Result of aggregation {} does not fit in 128 bits", val.0.data);
-       }
-       Some(ByteArray::new(&val.0.data.to_be_bytes()[16..]))
+pub fn i128_sum_post(val: i128_sum_accumulator_type) -> ByteArray {
+    // Check for overflow
+    if val.data < I256::from(i128::MIN) || val.data > I256::from(i128::MAX) {
+        panic!("Result of aggregation {} does not fit in 128 bits", val.data);
     }
+    ByteArray::new(&val.data.to_be_bytes()[16..])
 }
         """
 
@@ -160,14 +149,21 @@ def test_local(self):
                 {
                     "insert": {
                         "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+                        "y": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                     }
                 }
             ],
             update_format="insert_delete",
         )
         pipeline.wait_for_idle()
         output = list(pipeline.query("SELECT * FROM V;"))
-        assert output == [{"s": "00000000000000000000000000000001", "c": 1}]
+        assert output == [
+            {
+                "s": "00000000000000000000000000000001",
+                "n": "00000000000000000000000000000001",
+                "c": 1,
+            }
+        ]
 
         # Insert -1
         pipeline.input_json(
@@ -193,59 +189,100 @@ def test_local(self):
                             255,
                             255,
                         ],
+                        "y": [
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                        ],
                     }
                 }
             ],
             update_format="insert_delete",
         )
         pipeline.wait_for_idle()
         output = list(pipeline.query("SELECT * FROM V;"))
-        assert output == [{"s": "00000000000000000000000000000000", "c": 2}]
+        assert output == [
+            {
+                "s": "00000000000000000000000000000000",
+                "n": "00000000000000000000000000000000",
+                "c": 2,
+            }
+        ]
 
         pipeline.input_json(
             "t",
             [
                 {
                     "insert": {
                         "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
+                        "y": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
                     }
                 }
             ],
             update_format="insert_delete",
         )
         output = list(pipeline.query("SELECT * FROM V;"))
-        assert output == [{"s": "00000000000000000000000000000002", "c": 3}]
+        assert output == [
+            {
+                "s": "00000000000000000000000000000002",
+                "n": "00000000000000000000000000000002",
+                "c": 3,
+            }
+        ]
 
         pipeline.input_json(
             "t",
             [
                 {
                     "insert": {
                         "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
+                        "y": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
                     }
                 }
             ],
             update_format="insert_delete",
         )
         output = list(pipeline.query("SELECT * FROM V;"))
-        assert output == [{"s": "00000000000000000000000000000005", "c": 4}]
+        assert output == [
+            {
+                "s": "00000000000000000000000000000005",
+                "n": "00000000000000000000000000000005",
+                "c": 4,
+            }
+        ]
 
         pipeline.input_json(
             "t",
             [
                 {
                     "delete": {
                         "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+                        "y": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                     }
                 },
                 {
                     "delete": {
                         "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
+                        "y": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
                     }
                 },
                 {
                     "delete": {
                         "x": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
+                        "y": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3],
                     }
                 },
                 {
@@ -268,13 +305,31 @@ def test_local(self):
                             255,
                             1,
                         ],
+                        "y": [
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            255,
+                            1,
+                        ],
                     }
                 },
             ],
             update_format="insert_delete",
         )
         output = list(pipeline.query("SELECT * FROM V;"))
-        assert output == [{"s": None, "c": 0}]
+        assert output == [{"s": None, "n": None, "c": 0}]
 
         pipeline.stop(force=True)
 
 
@@ -3108,17 +3108,18 @@ public DBSPNode compileCreateAggregate(CreateAggregateStatement aggregate) {
         String name = uda.description.name.getSimple();
         if (uda.isLinear()) {
             // Add two functions that the user needs to define to the circuit declarations.
-            DBSPTypeUser accumulatorType = LinearAggregate.accumulatorType(node, name);
+            DBSPType accumulatorType = LinearAggregate.userAccumulatorType(node, name);
             List<DBSPParameter> parameters = Linq.map(uda.description.parameterList,
-                    p -> new DBSPParameter(p.getName(), this.convertType(node.getPositionRange(), p.getType(), false)));
+                    p -> new DBSPParameter(p.getName(),
+                            this.convertType(node.getPositionRange(), p.getType(), false).withMayBeNull(false)));
             DBSPFunction mapFunction = new DBSPFunction(
                     node, LinearAggregate.userDefinedMapFunctionName(name), parameters, accumulatorType, null, Linq.list());
             this.getCircuit().addDeclaration(new DBSPDeclaration(new DBSPFunctionItem(mapFunction)));
 
             DBSPType resultType = this.convertType(node.getPositionRange(), uda.description.returnType, false);
             DBSPFunction postFunction = new DBSPFunction(
                     node, LinearAggregate.userDefinedPostFunctionName(name),
-                    Linq.list(new DBSPParameter("accumulator", accumulatorType)), resultType, null, Linq.list());
+                    Linq.list(new DBSPParameter("accumulator", accumulatorType)), resultType.withMayBeNull(false), null, Linq.list());
             this.getCircuit().addDeclaration(new DBSPDeclaration(new DBSPFunctionItem(postFunction)));
         } else {
             throw new UnimplementedException("Non-linear user-defined aggregation functions");