forked from AliceO2Group/AliceO2
-
Notifications
You must be signed in to change notification settings - Fork 0
163 lines (146 loc) · 9.6 KB
/
standalone-benchmark.yml
File metadata and controls
163 lines (146 loc) · 9.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
name: Standalone Benchmark
on:
workflow_dispatch:
pull_request:
push:
branches:
- '**'
jobs:
benchmark:
runs-on: ${{ matrix.runner }}
container: registry.cern.ch/alisw/slc9-gpu-builder@sha256:ea3443f9dfbc770e4b4bce0d1a9ecc0b7a7c16e9f76e416b796d170877220820
strategy:
fail-fast: false
matrix:
name: [cpu, nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
include:
- name: cpu
runner: cern-nextgen-mi300x
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=0
profiler_runs: 42
standalone_runs: 42
cpu_gpu: "-c"
- name: nvidia-h100
runner: cern-nextgen-h100
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
profiler_runs: 21
standalone_runs: 42
cpu_gpu: "-g --memSize 20000000000"
- name: nvidia-l40s
runner: cern-nextgen-l40s
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
profiler_runs: 42
standalone_runs: 42
cpu_gpu: "-g --memSize 20000000000"
- name: amd-mi300x
runner: cern-nextgen-mi300x
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
profiler_runs: 42
standalone_runs: 42
cpu_gpu: "-g --memSize 20000000000"
- name: amd-w7900
runner: cern-nextgen-w7900
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
profiler_runs: 42
standalone_runs: 42
cpu_gpu: "-g --memSize 20000000000"
env:
WORK_DIR: /cvmfs/alice.cern.ch
ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages
MODULEPATH: /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
STANDALONE_DIR: /root/standalone
BENCHMARK_CSV: standalone_${{ matrix.name }}.csv
PROFILER_CSV: profiler_${{ matrix.name }}.csv
TIMING_CA: ./ca -e 50kHz ${{ matrix.cpu_gpu }} --seed 0 --sync --runsInit 0 --PROCresetTimers 1 --PROCdebugMarkdown 1 --debug 1 # Add --runs 42 for benchmark runs
LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat
name: ${{ matrix.name }}
steps:
- name: Checkout Repository
uses: actions/checkout@v6
- name: Download Files
run: |
mkdir -p ${STANDALONE_DIR}
curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
mkdir -p ${STANDALONE_DIR}/events
curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
curl -fL --retry 3 -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz
tar -xf ${STANDALONE_DIR}/events/50kHz.tar.xz -C ${STANDALONE_DIR}/events
- name: Build Deterministic
run: &build |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
mkdir -p ${STANDALONE_DIR}
cmake -B ${STANDALONE_DIR}/build ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=${DETERMINISTIC_MODE} -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/
cmake --build ${STANDALONE_DIR}/build --target install -j 8
env:
DETERMINISTIC_MODE: GPU
- name: Test Track Reconstruction
run: |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
${STANDALONE_DIR}/ca -e o2-simple ${{ matrix.cpu_gpu }} --seed 0 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
cmp ${STANDALONE_DIR}/*.out
rm -rf ${STANDALONE_DIR}/*.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
- name: Build Non-Deterministic
run: *build
env:
DETERMINISTIC_MODE: OFF
- name: Benchmark Track Reconstruction
run: |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
${TIMING_CA} --runs ${{ matrix.standalone_runs }} --PROCdebugCSV /root/${BENCHMARK_CSV}
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_standalone.py --discard 0 --input /root/${BENCHMARK_CSV} --output /root/summary_${BENCHMARK_CSV}
- name: Profiler - Nsight Compute
if: ${{ matrix.name == 'nvidia-h100' }}
run: |
dnf install -y cuda-nsight-compute-13-1
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs ${{ matrix.profiler_runs }} # Generates ${{ matrix.name }}.ncu-rep
ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
- name: Profiler - Nsight Systems
if: ${{ matrix.name == 'nvidia-l40s' }}
run: |
dnf config-manager --add-repo "https://developer.download.nvidia.com/devtools/repos/rhel$(source /etc/os-release; echo ${VERSION_ID%%.*})/$(rpm --eval '%{_arch}' | sed s/aarch/arm/)/"
dnf install --nogpgcheck -y nsight-systems-cli-2026.2.1
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.profiler_runs }} # Generates ${{ matrix.name }}.nsys-rep
nsys stats --report cuda_gpu_kern_sum --timeunit usec --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV}
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
- name: Profiler - rocprofv2
if: ${{ matrix.name == 'amd-mi300x' || matrix.name == 'amd-w7900' }}
run: |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
cd ${STANDALONE_DIR}
rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.standalone_runs }} # Generates results_${{ matrix.name }}.csv
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
mv /root/results_${{ matrix.name }}.csv /root/${PROFILER_CSV}
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
- name: Upload Artifact
uses: actions/upload-artifact@v6
with:
name: ${{ matrix.name }}-artifact
path: "/root/*.csv"
- name: Display table on GitHub web
run: |
source /etc/profile.d/modules.sh
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
mkdir -p ${STANDALONE_DIR}/baseline
curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/summary_${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/summary_${PROFILER_CSV}
curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/summary_${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/summary_${BENCHMARK_CSV}
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --runs ${{ matrix.profiler_runs }} --baseline ${STANDALONE_DIR}/baseline/summary_${PROFILER_CSV} --current /root/summary_${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --runs ${{ matrix.standalone_runs }} --baseline ${STANDALONE_DIR}/baseline/summary_${BENCHMARK_CSV} --current /root/summary_${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
rm -rf ${STANDALONE_DIR}/baseline
if: ${{ matrix.name != 'cpu' }}