diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bf0d8695..e9a889f9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +## [25.7.0] - 2025-07-23 + +## [25.7.0-rc1] - 2025-07-18 + ### Added - airflow: check for correct permissions and ownerships in /stackable folder via @@ -60,6 +64,7 @@ All notable changes to this project will be documented in this file. - zookeeper: bump netty version for CVE-2025-24970 in 3.9.3 ([#1180]) - hadoop: backport HADOOP-19352, HADOOP-19335, HADOOP-19465, HADOOP-19456 and HADOOP-19225 to fix vulnerabilities in Hadoop `3.4.1` ([#1184]) - hadoop: Backport HADOOP-18583 to make OpenSSL 3.x work with the native hadoop libraries ([#1209]). +- spark: backport [SPARK-51311] Promote bcprov-jdk18on to compile scope ([#1212]). ### Changed @@ -223,6 +228,7 @@ All notable changes to this project will be documented in this file. [#1189]: https://github.com/stackabletech/docker-images/pull/1189 [#1197]: https://github.com/stackabletech/docker-images/pull/1197 [#1209]: https://github.com/stackabletech/docker-images/pull/1209 +[#1212]: https://github.com/stackabletech/docker-images/pull/1212 ## [25.3.0] - 2025-03-21 @@ -230,7 +236,7 @@ All notable changes to this project will be documented in this file. - omid: Added 1.1.3-SNAPSHOT to allow for easier scanning pre-release - airflow: Add OPA support to Airflow ([#978]). -- nifi: Activate `include-hadoop` profile for NiFi version 2.* ([#958]). +- nifi: Activate `include-hadoop` profile for NiFi version 2 ([#958]). - nifi: Add NiFi hadoop Azure and GCP libraries ([#943]). - superset: Add role mapping from OPA ([#979]). - base: Add containerdebug tool ([#928], [#959]). @@ -763,7 +769,7 @@ All notable changes to this project will be documented in this file. - BREAKING: Use RPM instead of tar.gz for Vector. Because of that, the location of the Vector executable changed, and the operator-rs version 0.45.0 or newer is required ([#429]). -- spark-k8s: Rework spark images to build on top of java-base image. This fixes the missing tzdata-java package in 0.0.0-dev versions ([#434]). +- spark-k8s: Rework spark images to build on top of java-base image. This fixes the missing tzdata-java package in 0.0.0-dev versions ([#434]). - airflow: Updated git-sync to 3.6.8 ([#431]). - airflow: Updated statsd-exporter to 0.24, this was accidentally moved to a very old version previously (0.3.0) ([#431]). diff --git a/spark-k8s/stackable/patches/3.5.5/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch b/spark-k8s/stackable/patches/3.5.5/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch new file mode 100644 index 000000000..08e9415b6 --- /dev/null +++ b/spark-k8s/stackable/patches/3.5.5/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch @@ -0,0 +1,83 @@ +From c4a23f0060f34a2e1c3b826b9698ad56a5ce7176 Mon Sep 17 00:00:00 2001 +From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> +Date: Tue, 22 Jul 2025 18:51:28 +0200 +Subject: Backport [SPARK-51311][BUILD] Promote bcprov-jdk18on to compile scope + +--- + LICENSE-binary | 1 + + assembly/pom.xml | 14 ++++++++++++++ + dev/deps/spark-deps-hadoop-3-hive-2.3 | 1 + + licenses-binary/LICENSE-bouncycastle.txt | 13 +++++++++++++ + 4 files changed, 29 insertions(+) + create mode 100644 licenses-binary/LICENSE-bouncycastle.txt + +diff --git a/LICENSE-binary b/LICENSE-binary +index 05645977a0..9834cf333f 100644 +--- a/LICENSE-binary ++++ b/LICENSE-binary +@@ -480,6 +480,7 @@ org.typelevel:algebra_2.12:jar + org.typelevel:cats-kernel_2.12 + org.typelevel:machinist_2.12 + net.razorvine:pickle ++org.bouncycastle:bcprov-jdk18on + org.slf4j:jcl-over-slf4j + org.slf4j:jul-to-slf4j + org.slf4j:slf4j-api +diff --git a/assembly/pom.xml b/assembly/pom.xml +index 2066bbeb7e..05edd80958 100644 +--- a/assembly/pom.xml ++++ b/assembly/pom.xml +@@ -85,8 +85,22 @@ + guava + ${hadoop.deps.scope} + ++ ++ ++ ++ org.bouncycastle ++ bcprov-jdk18on ++ ${hadoop.deps.scope} ++ ++ + + ++ + + + +diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 +index 4feea62dfe..df85dcb6f5 100644 +--- a/dev/deps/spark-deps-hadoop-3-hive-2.3 ++++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 +@@ -28,6 +28,7 @@ aws-java-sdk-bundle/1.12.262//aws-java-sdk-bundle-1.12.262.jar + azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar + azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar + azure-storage/7.0.1//azure-storage-7.0.1.jar ++bcprov-jdk18on/1.77//bcprov-jdk18on-1.77.jar + blas/3.0.3//blas-3.0.3.jar + bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar + breeze-macros_2.12/2.1.0//breeze-macros_2.12-2.1.0.jar +diff --git a/licenses-binary/LICENSE-bouncycastle.txt b/licenses-binary/LICENSE-bouncycastle.txt +new file mode 100644 +index 0000000000..277dcd1ebb +--- /dev/null ++++ b/licenses-binary/LICENSE-bouncycastle.txt +@@ -0,0 +1,13 @@ ++Copyright (c) 2000-2024 The Legion of the Bouncy Castle Inc. (https://www.bouncycastle.org). ++Permission is hereby granted, free of charge, to any person obtaining a copy of this software and ++associated documentation files (the "Software"), to deal in the Software without restriction, ++including without limitation the rights to use, copy, modify, merge, publish, distribute, ++sub license, and/or sell copies of the Software, and to permit persons to whom the Software is ++furnished to do so, subject to the following conditions: The above copyright notice and this ++permission notice shall be included in all copies or substantial portions of the Software. ++ ++**THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT ++NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, ++DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT ++OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.** diff --git a/spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch b/spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch new file mode 100644 index 000000000..1a7029a6f --- /dev/null +++ b/spark-k8s/stackable/patches/3.5.6/0002-Backport-SPARK-51311-BUILD-Promote-bcprov-jdk18on-to.patch @@ -0,0 +1,83 @@ +From 37d866706d952702effd640babf891fef349da7d Mon Sep 17 00:00:00 2001 +From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> +Date: Tue, 22 Jul 2025 17:34:03 +0200 +Subject: Backport [SPARK-51311][BUILD] Promote bcprov-jdk18on to compile scope + +--- + LICENSE-binary | 1 + + assembly/pom.xml | 14 ++++++++++++++ + dev/deps/spark-deps-hadoop-3-hive-2.3 | 1 + + licenses-binary/LICENSE-bouncycastle.txt | 13 +++++++++++++ + 4 files changed, 29 insertions(+) + create mode 100644 licenses-binary/LICENSE-bouncycastle.txt + +diff --git a/LICENSE-binary b/LICENSE-binary +index 05645977a0..9834cf333f 100644 +--- a/LICENSE-binary ++++ b/LICENSE-binary +@@ -480,6 +480,7 @@ org.typelevel:algebra_2.12:jar + org.typelevel:cats-kernel_2.12 + org.typelevel:machinist_2.12 + net.razorvine:pickle ++org.bouncycastle:bcprov-jdk18on + org.slf4j:jcl-over-slf4j + org.slf4j:jul-to-slf4j + org.slf4j:slf4j-api +diff --git a/assembly/pom.xml b/assembly/pom.xml +index dcc46b0b82..def40ad52e 100644 +--- a/assembly/pom.xml ++++ b/assembly/pom.xml +@@ -85,8 +85,22 @@ + guava + ${hadoop.deps.scope} + ++ ++ ++ ++ org.bouncycastle ++ bcprov-jdk18on ++ ${hadoop.deps.scope} ++ ++ + + ++ + + + +diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 +index dbf0cb34c5..689f50612b 100644 +--- a/dev/deps/spark-deps-hadoop-3-hive-2.3 ++++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 +@@ -28,6 +28,7 @@ aws-java-sdk-bundle/1.12.262//aws-java-sdk-bundle-1.12.262.jar + azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar + azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar + azure-storage/7.0.1//azure-storage-7.0.1.jar ++bcprov-jdk18on/1.77//bcprov-jdk18on-1.77.jar + blas/3.0.3//blas-3.0.3.jar + bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar + breeze-macros_2.12/2.1.0//breeze-macros_2.12-2.1.0.jar +diff --git a/licenses-binary/LICENSE-bouncycastle.txt b/licenses-binary/LICENSE-bouncycastle.txt +new file mode 100644 +index 0000000000..277dcd1ebb +--- /dev/null ++++ b/licenses-binary/LICENSE-bouncycastle.txt +@@ -0,0 +1,13 @@ ++Copyright (c) 2000-2024 The Legion of the Bouncy Castle Inc. (https://www.bouncycastle.org). ++Permission is hereby granted, free of charge, to any person obtaining a copy of this software and ++associated documentation files (the "Software"), to deal in the Software without restriction, ++including without limitation the rights to use, copy, modify, merge, publish, distribute, ++sub license, and/or sell copies of the Software, and to permit persons to whom the Software is ++furnished to do so, subject to the following conditions: The above copyright notice and this ++permission notice shall be included in all copies or substantial portions of the Software. ++ ++**THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT ++NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, ++DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT ++OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.** diff --git a/stackable-base/Dockerfile b/stackable-base/Dockerfile index 7ad3e5c65..9973b850e 100644 --- a/stackable-base/Dockerfile +++ b/stackable-base/Dockerfile @@ -153,7 +153,8 @@ chown ${STACKABLE_USER_UID}:0 /stackable/.curlrc # CVE-2023-37920: Remove "e-Tugra" root certificates # e-Tugra's root certificates were subject to an investigation prompted by reporting of security issues in their systems # Until they are removed by default from ca-certificates, we should remove them manually -EXPECTED_CERTS_PACKAGE="ca-certificates-2024.2.69_v8.0.303-91.4.el9_4.noarch" +# EXPECTED_CERTS_PACKAGE="ca-certificates-2024.2.69_v8.0.303-91.4.el9_4.noarch" +EXPECTED_CERTS_PACKAGE="ca-certificates-2025.2.80_v9.0.305-91.el9.noarch" ACTUAL_CERTS_PACKAGE="$(rpm -qa ca-certificates)" if [ "$ACTUAL_CERTS_PACKAGE" != "$EXPECTED_CERTS_PACKAGE" ]; then echo "The ca-certificates package was updated to $ACTUAL_CERTS_PACKAGE. Please check if the e-Tugra root certificates are present. \ diff --git a/stackable-devel/Dockerfile b/stackable-devel/Dockerfile index 962c6b312..f7f324b81 100644 --- a/stackable-devel/Dockerfile +++ b/stackable-devel/Dockerfile @@ -93,7 +93,7 @@ chown ${STACKABLE_USER_UID}:0 /stackable/.curlrc # WARNING (@NickLarsenNZ): We should pin the rustup version curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain "$RUST_DEFAULT_TOOLCHAIN_VERSION" -. "$HOME/.cargo/env" && cargo --quiet install cargo-cyclonedx@"$CARGO_CYCLONEDX_CRATE_VERSION" cargo-auditable@"$CARGO_AUDITABLE_CRATE_VERSION" && rustup toolchain install +. "$HOME/.cargo/env" && cargo --locked --quiet install cargo-cyclonedx@"$CARGO_CYCLONEDX_CRATE_VERSION" cargo-auditable@"$CARGO_AUDITABLE_CRATE_VERSION" && rustup toolchain install EOF diff --git a/trino/trino/Dockerfile b/trino/trino/Dockerfile index 89fb1d846..c715bb5de 100644 --- a/trino/trino/Dockerfile +++ b/trino/trino/Dockerfile @@ -5,6 +5,7 @@ FROM stackable/image/java-devel AS trino-builder ARG PRODUCT ARG RELEASE ARG STACKABLE_USER_UID +ARG RUN_TESTS=false WORKDIR /stackable @@ -54,6 +55,18 @@ fi -Ddep.presto-jdbc-under-test=${NEW_VERSION} \ --projects="$SKIP_PROJECTS" +if [ "$RUN_TESTS" = "true" ]; then + echo "Running full Trino test suite..." + ./mvnw \ + --batch-mode \ + --no-transfer-progress \ + verify \ + -Dcheckstyle.skip \ + -Dmaven.javadoc.skip=true \ + -Ddep.presto-jdbc-under-test=${NEW_VERSION} \ + --projects="$SKIP_PROJECTS" +fi + mkdir -p /stackable/patched-libs/maven/io cp -r /root/.m2/repository/io/trino /stackable/patched-libs/maven/io diff --git a/trino/trino/stackable/patches/451/0002-Workaround-for-column-index-filtering-corrupting-row.patch b/trino/trino/stackable/patches/451/0002-Workaround-for-column-index-filtering-corrupting-row.patch new file mode 100644 index 000000000..acfccf6cf --- /dev/null +++ b/trino/trino/stackable/patches/451/0002-Workaround-for-column-index-filtering-corrupting-row.patch @@ -0,0 +1,39 @@ +From 4b0ab3fb780e8d56cb0a56ab994f005bf26bfb7e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?S=C3=B6nke=20Liebau?= +Date: Thu, 26 Mar 2026 15:33:28 +0100 +Subject: Workaround for column index filtering corrupting row positions + +When Parquet column index filtering is active, pages whose min/max stats +don't overlap the predicate are skipped. This causes lastBatchStartRow() +to return positions relative to the filtered output rather than physical +file positions. The incorrect row numbers propagate to the $row_id +synthetic column used by Delta Lake (and other connectors) during +row-level deletes, resulting in wrong rows being deleted. + +Disable column index filtering when the row number column is requested +as a workaround until a proper fix that correctly maps filtered positions +back to physical file positions can be implemented. + +Fixes https://github.com/trinodb/trino/issues/28885 + +Co-Authored-By: Claude Opus 4.6 (1M context) +--- + .../trino/plugin/hive/parquet/ParquetPageSourceFactory.java | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java +index 82136a0ad0e..15ddf145bd2 100644 +--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java ++++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java +@@ -279,7 +279,10 @@ public class ParquetPageSourceFactory + finalDataSource, + timeZone, + memoryContext, +- options, ++ // Column index filtering causes lastBatchStartRow() to produce filtered-output ++ // positions rather than physical file positions, corrupting the row number column. ++ // Disable column index when row numbers are requested to ensure correct positions. ++ appendRowNumberColumn ? ParquetReaderOptions.builder(options).withUseColumnIndex(false).build() : options, + exception -> handleException(dataSourceId, exception), + // We avoid using disjuncts of parquetPredicate for page pruning in ParquetReader as currently column indexes + // are not present in the Parquet files which are read with disjunct predicates. diff --git a/trino/trino/stackable/patches/476/0001-Add-CycloneDX-plugin.patch b/trino/trino/stackable/patches/476/0001-Add-CycloneDX-plugin.patch index 52d992e1a..efece8f57 100644 --- a/trino/trino/stackable/patches/476/0001-Add-CycloneDX-plugin.patch +++ b/trino/trino/stackable/patches/476/0001-Add-CycloneDX-plugin.patch @@ -8,7 +8,7 @@ Subject: Add CycloneDX plugin 1 file changed, 18 insertions(+) diff --git a/pom.xml b/pom.xml -index 579d46b764..8e2d10ec16 100644 +index 579d46b7649..8e2d10ec166 100644 --- a/pom.xml +++ b/pom.xml @@ -2834,6 +2834,24 @@ diff --git a/trino/trino/stackable/patches/476/0002-Disable-web-ui-code-checking-because-flow-v0.241.0-f.patch b/trino/trino/stackable/patches/476/0002-Disable-web-ui-code-checking-because-flow-v0.241.0-f.patch index c8c080232..38d3d3e38 100644 --- a/trino/trino/stackable/patches/476/0002-Disable-web-ui-code-checking-because-flow-v0.241.0-f.patch +++ b/trino/trino/stackable/patches/476/0002-Disable-web-ui-code-checking-because-flow-v0.241.0-f.patch @@ -10,7 +10,7 @@ Subject: Disable web-ui code checking, because flow v0.241.0 for non-x86 1 file changed, 22 deletions(-) diff --git a/core/trino-web-ui/pom.xml b/core/trino-web-ui/pom.xml -index a783c8f989..bea233cfb9 100644 +index a783c8f9892..bea233cfb9e 100644 --- a/core/trino-web-ui/pom.xml +++ b/core/trino-web-ui/pom.xml @@ -103,28 +103,6 @@ diff --git a/trino/trino/stackable/patches/476/0003-Workaround-for-column-index-filtering-corrupting-row.patch b/trino/trino/stackable/patches/476/0003-Workaround-for-column-index-filtering-corrupting-row.patch new file mode 100644 index 000000000..050e12582 --- /dev/null +++ b/trino/trino/stackable/patches/476/0003-Workaround-for-column-index-filtering-corrupting-row.patch @@ -0,0 +1,39 @@ +From add29d24bcd21c592cb89e4b7738eb962f281118 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?S=C3=B6nke=20Liebau?= +Date: Thu, 26 Mar 2026 15:33:28 +0100 +Subject: Workaround for column index filtering corrupting row positions + +When Parquet column index filtering is active, pages whose min/max stats +don't overlap the predicate are skipped. This causes lastBatchStartRow() +to return positions relative to the filtered output rather than physical +file positions. The incorrect row numbers propagate to the $row_id +synthetic column used by Delta Lake (and other connectors) during +row-level deletes, resulting in wrong rows being deleted. + +Disable column index filtering when the row number column is requested +as a workaround until a proper fix that correctly maps filtered positions +back to physical file positions can be implemented. + +Fixes https://github.com/trinodb/trino/issues/28885 + +Co-Authored-By: Claude Opus 4.6 (1M context) +--- + .../trino/plugin/hive/parquet/ParquetPageSourceFactory.java | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java +index 4472496b07b..2522e29a0a3 100644 +--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java ++++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java +@@ -280,7 +280,10 @@ public class ParquetPageSourceFactory + finalDataSource, + timeZone, + memoryContext, +- options, ++ // Column index filtering causes lastBatchStartRow() to produce filtered-output ++ // positions rather than physical file positions, corrupting the row number column. ++ // Disable column index when row numbers are requested to ensure correct positions. ++ appendRowNumberColumn ? ParquetReaderOptions.builder(options).withUseColumnIndex(false).build() : options, + exception -> handleException(dataSourceId, exception), + // We avoid using disjuncts of parquetPredicate for page pruning in ParquetReader as currently column indexes + // are not present in the Parquet files which are read with disjunct predicates.