Skip to content

Commit 4caa917

Browse files
chore[ci]: tpch-10 on ci action (#4498)
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk> --------- Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent a4ca38f commit 4caa917

3 files changed

Lines changed: 176 additions & 27 deletions

File tree

.github/workflows/bench-pr.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,10 @@ jobs:
109109
| grep $base_commit_sha \
110110
> base.json
111111
112-
echo '# Benchmarks: ${{ matrix.benchmark.id }}' > comment.md
113-
echo '<details>' >> comment.md
114-
echo '<summary>Table of Results</summary>' >> comment.md
112+
echo '# Benchmarks: ${{ matrix.benchmark.name }}' > comment.md
115113
echo '' >> comment.md
116-
uv run --no-project scripts/compare-benchmark-jsons.py base.json ${{ matrix.benchmark.id }}.json \
114+
uv run --no-project scripts/compare-benchmark-jsons.py base.json ${{ matrix.benchmark.id }}.json "${{ matrix.benchmark.name }}" \
117115
>> comment.md
118-
echo '</details>' >> comment.md
119116
120117
- name: Comment PR
121118
uses: thollander/actions-comment-pull-request@v3

.github/workflows/sql-benchmarks.yml

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,22 @@ on:
3838
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex",
3939
"scale_factor": "--scale-factor 1.0"
4040
},
41+
{
42+
"id": "tpch-nvme-10",
43+
"subcommand": "tpch",
44+
"name": "TPC-H SF=10 on NVME",
45+
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb",
46+
"scale_factor": "--scale-factor 10.0"
47+
},
48+
{
49+
"id": "tpch-s3-10",
50+
"subcommand": "tpch",
51+
"name": "TPC-H SF=10 on S3",
52+
"local_dir": "bench-vortex/data/tpch/10.0",
53+
"remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/tpch/10.0/",
54+
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex",
55+
"scale_factor": "--scale-factor 10.0"
56+
},
4157
{
4258
"id": "tpcds-nvme",
4359
"subcommand": "tpcds",
@@ -195,12 +211,9 @@ jobs:
195211
> base.json
196212
197213
echo '# Benchmarks: ${{ matrix.name }}' > comment.md
198-
echo '<details>' >> comment.md
199-
echo '<summary>Table of Results</summary>' >> comment.md
200214
echo '' >> comment.md
201-
uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json \
215+
uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json "${{ matrix.name }}" \
202216
>> comment.md
203-
echo '</details>' >> comment.md
204217
205218
- name: Comment PR
206219
if: inputs.mode == 'pr'

scripts/compare-benchmark-jsons.py

Lines changed: 157 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,14 @@
99
# SPDX-License-Identifier: Apache-2.0
1010
# SPDX-FileCopyrightText: Copyright the Vortex contributors
1111

12+
import math
1213
import sys
1314

1415
import pandas as pd
1516

17+
# Check if benchmark name argument is provided (will be added from workflow)
18+
benchmark_name = sys.argv[3] if len(sys.argv) > 3 else ""
19+
1620
base = pd.read_json(sys.argv[1], lines=True)
1721
pr = pd.read_json(sys.argv[2], lines=True)
1822

@@ -24,30 +28,165 @@
2428
assert len(pr_commit_id) == 1, pr_commit_id
2529
pr_commit_id = next(iter(pr_commit_id))
2630

31+
# Handle missing storage field
2732
if "storage" not in base:
28-
# For whatever reason, the base lacks storage. Might be an old database of results. Might be a
29-
# database of results without any storage fields.
3033
base["storage"] = pd.NA
31-
3234
if "storage" not in pr:
33-
# Not all benchmarks have a "storage" key. If none of the JSON objects in the PR results file
34-
# had a "storage" key, then the PR DataFrame will lack that key and the join will fail.
3535
pr["storage"] = pd.NA
3636

37-
# NB: `pd.merge` considers two null key values to be equal, so benchmarks without storage keys will
38-
# match.
39-
df3 = pd.merge(base, pr, on=["name", "storage"], how="right", suffixes=("_base", "_pr"))
37+
38+
# Handle missing dataset field and create a dataset key for joining
39+
def extract_dataset_key(df):
40+
if "dataset" not in df.columns:
41+
df["dataset_key"] = pd.NA
42+
else:
43+
# Convert dataset dict to a string representation for joining
44+
df["dataset_key"] = df["dataset"].apply(
45+
lambda x: str(sorted(x.items())) if pd.notna(x) and isinstance(x, dict) else pd.NA
46+
)
47+
return df
48+
49+
50+
base = extract_dataset_key(base)
51+
pr = extract_dataset_key(pr)
52+
53+
# Join on name, storage, and dataset_key
54+
# NB: `pd.merge` considers two null key values to be equal, so benchmarks without these keys will match.
55+
df3 = pd.merge(base, pr, on=["name", "storage", "dataset_key"], how="right", suffixes=("_base", "_pr"))
4056

4157
# assert df3["unit_base"].equals(df3["unit_pr"]), (df3["unit_base"], df3["unit_pr"])
4258

43-
print(
44-
pd.DataFrame(
45-
{
46-
"name": df3["name"],
47-
f"PR {pr_commit_id[:8]}": df3["value_pr"],
48-
f"base {base_commit_id[:8]}": df3["value_base"],
49-
"ratio (PR/base)": df3["value_pr"] / df3["value_base"],
50-
"unit": df3["unit_base"],
51-
}
52-
).to_markdown(index=False)
59+
# Generate summary statistics
60+
df3["ratio"] = df3["value_pr"] / df3["value_base"]
61+
62+
# Filter for different target combinations for summary statistics
63+
vortex_df = df3[df3["name"].str.contains("vortex", case=False, na=False)]
64+
duckdb_vortex_df = df3[df3["name"].str.contains("duckdb.*vortex", case=False, na=False, regex=True)]
65+
datafusion_vortex_df = df3[df3["name"].str.contains("datafusion.*vortex", case=False, na=False, regex=True)]
66+
67+
68+
# Overall performance (all results)
69+
valid_positive_ratios = [r for r in df3["ratio"] if r > 0 and not pd.isna(r)]
70+
if len(valid_positive_ratios) > 0:
71+
geo_mean_ratio = math.exp(sum(math.log(r) for r in valid_positive_ratios) / len(valid_positive_ratios))
72+
else:
73+
geo_mean_ratio = float("nan")
74+
75+
76+
# Performance for different target combinations
77+
def calculate_geo_mean(df):
78+
valid_ratios = [r for r in df["ratio"] if r > 0 and not pd.isna(r)]
79+
if len(valid_ratios) > 0:
80+
return math.exp(sum(math.log(r) for r in valid_ratios) / len(valid_ratios))
81+
else:
82+
return float("nan")
83+
84+
85+
vortex_geo_mean_ratio = calculate_geo_mean(vortex_df)
86+
duckdb_vortex_geo_mean_ratio = calculate_geo_mean(duckdb_vortex_df)
87+
datafusion_vortex_geo_mean_ratio = calculate_geo_mean(datafusion_vortex_df)
88+
89+
# Find best and worst changes for vortex-only results
90+
vortex_valid_ratios = vortex_df["ratio"].dropna()
91+
if len(vortex_valid_ratios) > 0:
92+
# Best improvement: smallest ratio (< 1.0, fastest performance)
93+
improvements = vortex_valid_ratios[vortex_valid_ratios < 1.0]
94+
if len(improvements) > 0:
95+
best_idx = improvements.idxmin()
96+
best_improvement = f"{vortex_df.loc[best_idx, 'name']} ({vortex_df.loc[best_idx, 'ratio']:.3f}x)"
97+
else:
98+
best_improvement = "no improvements"
99+
100+
# Worst regression: largest ratio (> 1.0, slowest performance)
101+
regressions = vortex_valid_ratios[vortex_valid_ratios > 1.0]
102+
if len(regressions) > 0:
103+
worst_idx = regressions.idxmax()
104+
worst_regression = f"{vortex_df.loc[worst_idx, 'name']} ({vortex_df.loc[worst_idx, 'ratio']:.3f}x)"
105+
else:
106+
worst_regression = "no regressions"
107+
else:
108+
best_improvement = "no valid vortex comparisons"
109+
worst_regression = "no valid vortex comparisons"
110+
111+
# Determine threshold based on benchmark name
112+
# Use 30% threshold for S3 benchmarks, 10% for others
113+
is_s3_benchmark = "s3" in benchmark_name.lower()
114+
threshold_pct = 30 if is_s3_benchmark else 10
115+
improvement_threshold = 1.0 - (threshold_pct / 100.0) # e.g., 0.7 for 30%, 0.9 for 10%
116+
regression_threshold = 1.0 + (threshold_pct / 100.0) # e.g., 1.3 for 30%, 1.1 for 10%
117+
118+
# Count significant changes for vortex-only results
119+
significant_improvements = (vortex_df["ratio"] < improvement_threshold).sum()
120+
significant_regressions = (vortex_df["ratio"] > regression_threshold).sum()
121+
122+
123+
# Build summary
124+
def format_performance(ratio, target_name):
125+
if pd.isna(ratio):
126+
return f"no valid {target_name.lower()} comparisons available"
127+
else:
128+
return f"{ratio:.3f}x ({'better' if ratio < 1 else 'worse'} than base)"
129+
130+
131+
overall_performance = (
132+
"no valid comparisons available"
133+
if pd.isna(geo_mean_ratio)
134+
else f"{geo_mean_ratio:.3f}x ({'better' if geo_mean_ratio < 1 else 'worse'} than base)"
135+
)
136+
vortex_performance = format_performance(vortex_geo_mean_ratio, "vortex")
137+
duckdb_vortex_performance = format_performance(duckdb_vortex_geo_mean_ratio, "duckdb:vortex")
138+
datafusion_vortex_performance = format_performance(datafusion_vortex_geo_mean_ratio, "datafusion:vortex")
139+
140+
summary_lines = [
141+
"## Summary",
142+
"",
143+
f"- **overall performance (all targets)**: {overall_performance}",
144+
]
145+
146+
# Only add vortex-specific sections if we have vortex data
147+
if len(vortex_df) > 0:
148+
summary_lines.extend(
149+
[
150+
f"- **vortex performance**: {vortex_performance}",
151+
]
152+
)
153+
154+
# Only add duckdb:vortex section if we have that data
155+
if len(duckdb_vortex_df) > 0:
156+
summary_lines.append(f"- **duckdb:vortex performance**: {duckdb_vortex_performance}")
157+
158+
# Only add datafusion:vortex section if we have that data
159+
if len(datafusion_vortex_df) > 0:
160+
summary_lines.append(f"- **datafusion:vortex performance**: {datafusion_vortex_performance}")
161+
162+
# Only add best/worst if we have vortex data
163+
if len(vortex_df) > 0:
164+
summary_lines.extend(
165+
[
166+
f"- **best vortex improvement**: {best_improvement}",
167+
f"- **worst vortex regression**: {worst_regression}",
168+
f"- **significant vortex changes (>{threshold_pct}%)**:",
169+
f" - improvements: {significant_improvements} queries",
170+
f" - regressions: {significant_regressions} queries",
171+
]
172+
)
173+
174+
# Build table
175+
table_df = pd.DataFrame(
176+
{
177+
"name": df3["name"],
178+
f"PR {pr_commit_id[:8]}": df3["value_pr"],
179+
f"base {base_commit_id[:8]}": df3["value_base"],
180+
"ratio (PR/base)": df3["ratio"],
181+
"unit": df3["unit_base"],
182+
}
53183
)
184+
185+
# Output complete formatted markdown
186+
print("\n".join(summary_lines))
187+
print("")
188+
print("<details>")
189+
print("<summary>Detailed Results Table</summary>")
190+
print("")
191+
print(table_df.to_markdown(index=False))
192+
print("</details>")

0 commit comments

Comments
 (0)