-
Notifications
You must be signed in to change notification settings - Fork 268
Expand file tree
/
Copy pathrun_doc_benchmarks.py
More file actions
70 lines (56 loc) · 2.43 KB
/
run_doc_benchmarks.py
File metadata and controls
70 lines (56 loc) · 2.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3
import argparse
import json
import subprocess
from pathlib import Path
def run_sbt(sbt_cmd: str, spark_version: str, jmh_args: str, repo_root: Path) -> None:
subprocess.run(
[sbt_cmd, f"-Dspark.version={spark_version}", jmh_args],
check=True,
cwd=repo_root,
)
def load_json_array(path: Path) -> list[dict]:
with path.open("r", encoding="utf-8") as file:
data = json.load(file)
if not isinstance(data, list):
raise ValueError(f"Expected a JSON array in {path}")
return data
def main() -> None:
repo_root = Path(__file__).resolve().parent.parent
parser = argparse.ArgumentParser(description="Run benchmarks needed for docs generation")
parser.add_argument("--spark-version", default="4.0.0")
parser.add_argument("--sbt", default=str((repo_root / "build" / "sbt").resolve()))
args = parser.parse_args()
results_dir = (repo_root / "benchmarks" / "target" / "doc-jmh").resolve()
results_dir.mkdir(parents=True, exist_ok=True)
benchmark_commands = [
(
results_dir / "shortest-paths.json",
"benchmarks/Jmh/run -rf json -p graphName=wiki-Talk -p useLocalCheckpoints=true "
"-p algorithm=graphframes,graphx "
"org.graphframes.benchmarks.ShortestPathsBenchmark",
),
(
results_dir / "connected-components.json",
"benchmarks/Jmh/run -rf json -p graphName=wiki-Talk -p useLocalCheckpoints=true "
"-p algorithm=graphframes,graphx -p broadcastThreshold=-1 "
"org.graphframes.benchmarks.ConnectedComponentsBenchmark",
),
(
results_dir / "label-propagation.json",
"benchmarks/Jmh/run -rf json -p graphName=wiki-Talk -p useLocalCheckpoints=true "
"-p algorithm=graphframes,graphx "
"org.graphframes.benchmarks.LabelPropagationBenchmark",
),
]
all_results: list[dict] = []
for output_file, cmd in benchmark_commands:
run_sbt(args.sbt, args.spark_version, f"{cmd} -rff {output_file}", repo_root)
all_results.extend(load_json_array(output_file))
target_file = (repo_root / "benchmarks" / "jmh-result.json").resolve()
with target_file.open("w", encoding="utf-8") as file:
json.dump(all_results, file, indent=2)
file.write("\n")
print(f"Saved {len(all_results)} benchmark entries to {target_file}")
if __name__ == "__main__":
main()