From 82d88232dc7a7afa518554eb01026a15b203effa Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Fri, 29 May 2026 14:53:00 -0400 Subject: [PATCH 1/2] Add sort_tpch SQL benchmark --- .../sort_tpch/benchmarks/q01.benchmark | 44 ++++++++++++++ .../sort_tpch/benchmarks/q02.benchmark | 44 ++++++++++++++ .../sort_tpch/benchmarks/q03.benchmark | 44 ++++++++++++++ .../sort_tpch/benchmarks/q04.benchmark | 44 ++++++++++++++ .../sort_tpch/benchmarks/q05.benchmark | 44 ++++++++++++++ .../sort_tpch/benchmarks/q06.benchmark | 44 ++++++++++++++ .../sort_tpch/benchmarks/q07.benchmark | 58 ++++++++++++++++++ .../sort_tpch/benchmarks/q08.benchmark | 44 ++++++++++++++ .../sort_tpch/benchmarks/q09.benchmark | 44 ++++++++++++++ .../sort_tpch/benchmarks/q10.benchmark | 59 +++++++++++++++++++ .../sort_tpch/benchmarks/q11.benchmark | 44 ++++++++++++++ .../sql_benchmarks/sort_tpch/init/load.sql | 3 + 12 files changed, 516 insertions(+) create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q01.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q02.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q03.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q04.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q05.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q06.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q07.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q08.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q09.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q10.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/benchmarks/q11.benchmark create mode 100644 benchmarks/sql_benchmarks/sort_tpch/init/load.sql diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q01.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q01.benchmark new file mode 100644 index 0000000000000..b6f1a37e3d03f --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q01.benchmark @@ -0,0 +1,44 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q01 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q1: 1 sort key (type: INTEGER, cardinality: 7) + 1 payload column +SELECT l_linenumber, l_partkey +FROM lineitem +ORDER BY l_linenumber +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q01.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q02.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q02.benchmark new file mode 100644 index 0000000000000..1238beb00583a --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q02.benchmark @@ -0,0 +1,44 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q02 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q2: 1 sort key (type: BIGINT, cardinality: 1.5M) + 1 payload column +SELECT l_orderkey, l_partkey +FROM lineitem +ORDER BY l_orderkey +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q02.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q03.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q03.benchmark new file mode 100644 index 0000000000000..aadbe86c61602 --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q03.benchmark @@ -0,0 +1,44 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q03 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q3: 1 sort key (type: VARCHAR, cardinality: 4.5M) + 1 payload column +SELECT l_comment, l_partkey +FROM lineitem +ORDER BY l_comment +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q03.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q04.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q04.benchmark new file mode 100644 index 0000000000000..8119a6c51be33 --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q04.benchmark @@ -0,0 +1,44 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q04 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q4: 2 sort keys {(BIGINT, 1.5M), (INTEGER, 7)} + 1 payload column +SELECT l_orderkey, l_linenumber, l_partkey +FROM lineitem +ORDER BY l_orderkey, l_linenumber +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q04.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q05.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q05.benchmark new file mode 100644 index 0000000000000..5ee9e610cc3bf --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q05.benchmark @@ -0,0 +1,44 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q05 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q5: 3 sort keys {(INTEGER, 7), (BIGINT, 10k), (BIGINT, 1.5M)} + no payload column +SELECT l_linenumber, l_suppkey, l_orderkey +FROM lineitem +ORDER BY l_linenumber, l_suppkey, l_orderkey +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q05.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q06.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q06.benchmark new file mode 100644 index 0000000000000..54ce6fa44341d --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q06.benchmark @@ -0,0 +1,44 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q06 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q6: 3 sort keys {(INTEGER, 7), (BIGINT, 10k), (BIGINT, 1.5M)} + 1 payload column +SELECT l_linenumber, l_suppkey, l_orderkey, l_partkey +FROM lineitem +ORDER BY l_linenumber, l_suppkey, l_orderkey +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q06.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q07.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q07.benchmark new file mode 100644 index 0000000000000..8932810cc1f97 --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q07.benchmark @@ -0,0 +1,58 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q07 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q7: 3 sort keys {(INTEGER, 7), (BIGINT, 10k), (BIGINT, 1.5M)} + 12 all other columns +SELECT l_linenumber, + l_suppkey, + l_orderkey, + l_partkey, + l_quantity, + l_extendedprice, + l_discount, + l_tax, + l_returnflag, + l_linestatus, + l_shipdate, + l_commitdate, + l_receiptdate, + l_shipinstruct, + l_shipmode +FROM lineitem +ORDER BY l_linenumber, l_suppkey, l_orderkey +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q07.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q08.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q08.benchmark new file mode 100644 index 0000000000000..f09e6e9f72f21 --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q08.benchmark @@ -0,0 +1,44 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q08 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q8: 4 sort keys {(BIGINT, 1.5M), (BIGINT, 10k), (INTEGER, 7), (VARCHAR, 4.5M)} + no payload column +SELECT l_orderkey, l_suppkey, l_linenumber, l_comment +FROM lineitem +ORDER BY l_orderkey, l_suppkey, l_linenumber, l_comment +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q08.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q09.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q09.benchmark new file mode 100644 index 0000000000000..5e7a2ea63747a --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q09.benchmark @@ -0,0 +1,44 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q09 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q9: 4 sort keys {(BIGINT, 1.5M), (BIGINT, 10k), (INTEGER, 7), (VARCHAR, 4.5M)} + 1 payload column +SELECT l_orderkey, l_suppkey, l_linenumber, l_comment, l_partkey +FROM lineitem +ORDER BY l_orderkey, l_suppkey, l_linenumber, l_comment +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q09.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q10.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q10.benchmark new file mode 100644 index 0000000000000..535393526147e --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q10.benchmark @@ -0,0 +1,59 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q10 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q10: 4 sort keys {(BIGINT, 1.5M), (BIGINT, 10k), (INTEGER, 7), (VARCHAR, 4.5M)} + 12 all other columns +SELECT l_orderkey, + l_suppkey, + l_linenumber, + l_comment, + l_partkey, + l_quantity, + l_extendedprice, + l_discount, + l_tax, + l_returnflag, + l_linestatus, + l_shipdate, + l_commitdate, + l_receiptdate, + l_shipinstruct, + l_shipmode +FROM lineitem +ORDER BY l_orderkey, l_suppkey, l_linenumber, l_comment +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q10.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q11.benchmark b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q11.benchmark new file mode 100644 index 0000000000000..efce2005f3beb --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/benchmarks/q11.benchmark @@ -0,0 +1,44 @@ +echo Loading tpch items sorted: ${BENCH_SORTED:-false} + +# +# Sort queries with different characteristics: +# - Sort key with fixed length or variable length (VARCHAR) +# - Sort key with different cardinality +# - Different number of sort keys +# - Different number of payload columns (thin: 1 additional column other +# than sort keys; wide: all columns except sort keys) +# +# DataSet is `lineitem` table in TPCH dataset (16 columns, 6M rows for +# scale factor 1.0, cardinality is counted from SF1 dataset) +# +# Key Columns: +# - Column `l_linenumber`, type: `INTEGER`, cardinality: 7 +# - Column `l_suppkey`, type: `BIGINT`, cardinality: 10k +# - Column `l_orderkey`, type: `BIGINT`, cardinality: 1.5M +# - Column `l_comment`, type: `VARCHAR`, cardinality: 4.5M (len is ~26 chars) +# +# Payload Columns: +# - Thin variant: `l_partkey` column with `BIGINT` type (1 column) +# - Wide variant: all columns except for possible key columns (12 columns) + +name Q11 +group sort_tpch +subgroup sf${BENCH_SIZE:-1} + +echo Loading sort_tpch sf ${BENCH_SIZE:-1} data + +load sql_benchmarks/sort_tpch/init/load.sql + +assert I +SELECT COUNT(*) > 0 from lineitem; +---- +true + +run +-- Q11: 1 sort key (type: VARCHAR, cardinality: 4.5M) + 1 payload column +SELECT l_shipmode, l_comment, l_partkey +FROM lineitem +ORDER BY l_shipmode +${LIMIT:-false|LIMIT 100| } + +result sql_benchmarks/sort_tpch/results/sf${BENCH_SIZE:-1}/q11.csv diff --git a/benchmarks/sql_benchmarks/sort_tpch/init/load.sql b/benchmarks/sql_benchmarks/sort_tpch/init/load.sql new file mode 100644 index 0000000000000..13e490e0f5678 --- /dev/null +++ b/benchmarks/sql_benchmarks/sort_tpch/init/load.sql @@ -0,0 +1,3 @@ +CREATE EXTERNAL TABLE lineitem_raw STORED AS PARQUET LOCATION 'data/tpch_sf${BENCH_SIZE:-1}/lineitem/lineitem.1.parquet'; + +CREATE TABLE lineitem as (SELECT * FROM lineitem_raw ${BENCH_SORTED:-false|order by l_orderkey asc| }); \ No newline at end of file From cd5ef66edec814435142cd251353ef6e81c814ca Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sun, 7 Jun 2026 14:56:42 -0400 Subject: [PATCH 2/2] Use data_dir. --- benchmarks/sql_benchmarks/sort_tpch/init/load.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/sql_benchmarks/sort_tpch/init/load.sql b/benchmarks/sql_benchmarks/sort_tpch/init/load.sql index 13e490e0f5678..996baabdf1954 100644 --- a/benchmarks/sql_benchmarks/sort_tpch/init/load.sql +++ b/benchmarks/sql_benchmarks/sort_tpch/init/load.sql @@ -1,3 +1,3 @@ -CREATE EXTERNAL TABLE lineitem_raw STORED AS PARQUET LOCATION 'data/tpch_sf${BENCH_SIZE:-1}/lineitem/lineitem.1.parquet'; +CREATE EXTERNAL TABLE lineitem_raw STORED AS PARQUET LOCATION '${DATA_DIR:-data}/tpch_sf${BENCH_SIZE:-1}/lineitem/lineitem.1.parquet'; CREATE TABLE lineitem as (SELECT * FROM lineitem_raw ${BENCH_SORTED:-false|order by l_orderkey asc| }); \ No newline at end of file