-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathconcat_tables.py
More file actions
executable file
·177 lines (155 loc) · 6.33 KB
/
concat_tables.py
File metadata and controls
executable file
·177 lines (155 loc) · 6.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env python3
"""
Concatenate multiple benchmark result tables into a single comprehensive table.
This script finds and combines related benchmark results from different datasets
(mesh, canada, uniform_01) into a single LaTeX table for easier comparison.
"""
import os
import re
import argparse
import pandas as pd
def parse_tex_table(filepath):
"""Parse a LaTeX table file into a pandas DataFrame."""
with open(filepath, 'r') as file:
lines = file.readlines()
data_start = False
parsed = []
for line in lines:
if "\\midrule" in line:
data_start = True
continue
if "\\bottomrule" in line:
break
if data_start and '&' in line:
row = [x.strip().strip('\\') for x in line.split('&')]
if len(row) == 4:
parsed.append({
'algorithm': row[0],
'ns/f': row[1],
'ins/f': row[2],
'ins/c': row[3]
})
return pd.DataFrame(parsed)
def clean_cpu_name(cpu_name):
"""Clean CPU name for better display in tables."""
cpu_cleaned = cpu_name.replace("Ryzen9900x", "Ryzen 9900X")
cpu_cleaned = cpu_cleaned.replace("_Platinum", "")
cpu_cleaned = re.sub(r"_\d+-Core_Processor", "", cpu_cleaned)
cpu_cleaned = re.sub(r"_CPU__\d+\.\d+GHz", "", cpu_cleaned)
cpu_cleaned = re.sub(r"\(R\)", "", cpu_cleaned)
return cpu_cleaned.replace("_", " ").replace(" ", " ").strip()
def format_latex_table(df, cpu_name, compiler, float_bits, microarch=None,
exclude_algos=None):
"""Format the combined data as a LaTeX table."""
if exclude_algos is None:
exclude_algos = set()
cpu_cleaned = clean_cpu_name(cpu_name)
caption = f"{cpu_cleaned} results ({compiler}, {float_bits}-bit floats"
if microarch:
caption += f", {microarch}"
caption += ")"
label = f"tab:{re.sub(r'[^a-zA-Z0-9]+', '', cpu_name.lower())}results"
header = (
"\\begin{table}\n"
" \\centering\n"
f" \\caption{{{caption}}}%\n"
f" \\label{{{label}}}\n"
" \\begin{tabular}{lccccccccc}\n"
" \\toprule\n"
" \\multirow{1}{*}{Name} & \\multicolumn{3}{c|}{mesh} & "
"\\multicolumn{3}{c|}{canada} & \\multicolumn{3}{c}{unit} \\\\\n"
" & {ns/f} & {ins/f} & {ins/c} & "
"{ns/f} & {ins/f} & {ins/c} & {ns/f} & {ins/f} & {ins/c} \\\\ "
"\\midrule\n"
)
body = ""
for _, row in df.iterrows():
if row['algorithm'] in exclude_algos:
continue
line = (
f" {row['algorithm']} & {row['ns/f_mesh']} & "
f"{row['ins/f_mesh']} & {row['ins/c_mesh']} & "
f"{row['ns/f_canada']} & {row['ins/f_canada']} & "
f"{row['ins/c_canada']} & "
f"{row['ns/f_unit']} & {row['ins/f_unit']} & "
f"{row['ins/c_unit']} \\\\\n"
)
body += line
footer = (
" \\bottomrule\n"
" \\end{tabular}\\restartrowcolors\n"
"\\end{table}\n"
)
return header + body + footer
def find_combinations(root, pattern=None):
"""Find all combinations of benchmark result files that can be combined."""
if pattern is None:
pattern = re.compile(
r"(.*?)_(g\+\+|clang\+\+)_(mesh|canada|uniform_01)_(none|s)"
r"(?:_(x86-64|x86-64-v2|x86-64-v3|x86-64-v4|native))?\.tex"
)
# group(1)=cpu, 2=compiler, 3=dataset, 4=variant, 5=microarch (optional)
combos = []
for dirpath, _, filenames in os.walk(root):
tex_files = [f for f in filenames if f.endswith('.tex')]
table = {}
for f in tex_files:
m = pattern.match(f)
if m:
cpu, compiler, dataset, variant, microarch = m.groups()
key = (dirpath, cpu, compiler, variant, microarch)
if key not in table:
table[key] = {}
table[key][dataset] = os.path.join(dirpath, f)
for (dirpath, cpu, compiler, variant, microarch), files in table.items():
if {"mesh", "canada", "uniform_01"}.issubset(files.keys()):
combos.append((dirpath, cpu, compiler, variant, microarch, files))
return combos
def main():
parser = argparse.ArgumentParser(
description="Concatenate benchmark tables into comprehensive tables")
parser.add_argument(
"--input-dir", "-i", default="./outputs",
help="Directory containing benchmark .tex files")
parser.add_argument(
"--output-dir", "-o",
help="Output directory for combined tables (defaults to input directory)")
parser.add_argument(
"--exclude", "-e", nargs="+",
default=["netlib", "teju\\_jagua", "yy\\_double", "snprintf", "abseil"],
help="Algorithms to exclude from the output tables")
args = parser.parse_args()
input_dir = args.input_dir
output_dir = args.output_dir if args.output_dir else input_dir
exclude_algos = set(args.exclude)
# Create output directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
combos = find_combinations(input_dir)
if not combos:
print(f"No matching benchmark files found in {input_dir}")
return
print(f"Found {len(combos)} combinations to process")
for dirpath, cpu, compiler, variant, microarch, paths in combos:
df_mesh = parse_tex_table(paths['mesh'])
df_canada = parse_tex_table(paths['canada'])
df_unit = parse_tex_table(paths['uniform_01'])
df_merged = df_mesh.merge(
df_canada, on='algorithm', suffixes=('_mesh', '_canada'))
df_merged = df_merged.merge(df_unit, on='algorithm')
df_merged.rename(columns={
'ns/f': 'ns/f_unit',
'ins/f': 'ins/f_unit',
'ins/c': 'ins/c_unit'
}, inplace=True)
float_bits = "32" if variant == "s" else "64"
tex_code = format_latex_table(
df_merged, cpu, compiler, float_bits, microarch, exclude_algos)
suffix = f"_{microarch}" if microarch else ""
out_path = os.path.join(
output_dir, f"{cpu}_{compiler}_all_{variant}{suffix}.tex")
with open(out_path, "w") as f:
f.write(tex_code)
print(f"[OK] {out_path}")
if __name__ == "__main__":
main()