Skip to content

Commit 95baf03

Browse files
authored
Improve benchmark output formatting (#6744)
## Summary Introduces two small changes to how benchmarks are formatted: 1. Add overall parquet as baseline, helps locating "fast" or "slow" runs. 2. Only display floating point values up to two decimal places, everything else is noise. --------- Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 5da84fe commit 95baf03

1 file changed

Lines changed: 9 additions & 6 deletions

File tree

scripts/compare-benchmark-jsons.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def extract_dataset_key(df):
7777
vortex_df = df3[df3["name"].str.contains("vortex", case=False, na=False)]
7878
duckdb_vortex_df = df3[df3["name"].str.contains("duckdb.*vortex", case=False, na=False, regex=True)]
7979
datafusion_vortex_df = df3[df3["name"].str.contains("datafusion.*vortex", case=False, na=False, regex=True)]
80+
parquet_df = df3[df3["name"].str.contains("parquet", case=False, na=False)]
8081

8182

8283
# Overall performance (all results)
@@ -99,6 +100,7 @@ def calculate_geo_mean(df):
99100
vortex_geo_mean_ratio = calculate_geo_mean(vortex_df)
100101
duckdb_vortex_geo_mean_ratio = calculate_geo_mean(duckdb_vortex_df)
101102
datafusion_vortex_geo_mean_ratio = calculate_geo_mean(datafusion_vortex_df)
103+
parquet_geo_mean_ratio = calculate_geo_mean(parquet_df)
102104

103105
# Find best and worst changes for vortex-only results
104106
vortex_valid_ratios = vortex_df["ratio"].dropna()
@@ -146,6 +148,8 @@ def format_performance(ratio, target_name):
146148
vortex_performance = format_performance(vortex_geo_mean_ratio, "vortex")
147149
duckdb_vortex_performance = format_performance(duckdb_vortex_geo_mean_ratio, "duckdb:vortex")
148150
datafusion_vortex_performance = format_performance(datafusion_vortex_geo_mean_ratio, "datafusion:vortex")
151+
parquet_performance = format_performance(parquet_geo_mean_ratio, "parquet")
152+
149153

150154
summary_lines = [
151155
"## Summary",
@@ -155,11 +159,10 @@ def format_performance(ratio, target_name):
155159

156160
# Only add vortex-specific sections if we have vortex data
157161
if len(vortex_df) > 0:
158-
summary_lines.extend(
159-
[
160-
f"- **Vortex**: {vortex_performance}",
161-
]
162-
)
162+
summary_lines.extend([f"- **Vortex**: {vortex_performance}"])
163+
164+
if len(parquet_df) > 0:
165+
summary_lines.extend([f"- **Parquet**: {parquet_performance}"])
163166

164167
# Only add duckdb:vortex section if we have that data
165168
if len(duckdb_vortex_df) > 0:
@@ -197,5 +200,5 @@ def format_performance(ratio, target_name):
197200
print("<details>")
198201
print("<summary>Detailed Results Table</summary>")
199202
print("")
200-
print(table_df.to_markdown(index=False))
203+
print(table_df.to_markdown(index=False, tablefmt="github", floatfmt=".2f"))
201204
print("</details>")

0 commit comments

Comments
 (0)