-
Notifications
You must be signed in to change notification settings - Fork 704
160 lines (148 loc) · 5.29 KB
/
build.yml
File metadata and controls
160 lines (148 loc) · 5.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See LICENSE for license information.
# A workflow to trigger TE build on GitHub
name: 'Build'
on:
pull_request:
workflow_dispatch:
concurrency:
# Group by workflow name + PR number (for PRs) or ref (for branch/tag pushes)
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
core:
name: 'Core'
runs-on: ubuntu-latest
container:
image: nvcr.io/nvidia/cuda:12.1.0-devel-ubuntu22.04
options: --user root
steps:
- name: 'Dependencies'
run: |
apt-get update
apt-get install -y git python3.9 pip cudnn9-cuda-12
pip install cmake==3.21.0 pybind11[global] ninja
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive
- name: ccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad
- name: 'Build'
run: NVTE_USE_CCACHE=1 NVTE_CCACHE_BIN=sccache pip install --no-build-isolation . -v
env:
NVTE_FRAMEWORK: none
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
- name: 'Sanity check'
run: python3 -c "import transformer_engine"
working-directory: /
pytorch:
name: 'PyTorch'
runs-on: ubuntu-latest
steps:
- name: Move /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker"
- name: Maximize build space
uses: easimon/maximize-build-space@c28619d8999a147d5e09c1199f84ff6af6ad5794
with:
root-reserve-mb: 5120
temp-reserve-mb: 32
swap-size-mb: 10240
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
build-mount-path: '/var/lib/docker/'
- name: Restore /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker"
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive
- name: Start named container
run: |
docker run -v $(pwd):$(pwd) -w $(pwd) --name builder -d nvcr.io/nvidia/cuda:12.8.0-devel-ubuntu22.04 sleep infinity
- name: 'Dependencies'
run: |
docker exec builder bash -c '\
apt-get update && \
apt-get install -y git python3.9 pip cudnn9-cuda-12 && \
pip install cmake torch ninja pydantic importlib-metadata>=1.0 packaging pybind11 numpy einops onnxscript && \
apt-get clean \
'
- name: 'Build'
run: docker exec builder bash -c 'pip install --no-build-isolation . -v --no-deps'
env:
NVTE_FRAMEWORK: pytorch
MAX_JOBS: 1
- name: 'Sanity check'
run: docker exec builder bash -c 'python3 tests/pytorch/test_sanity_import.py'
jax:
name: 'JAX'
runs-on: ubuntu-latest
container:
image: ghcr.io/nvidia/jax:jax
options: --user root
steps:
- name: 'Dependencies'
run: pip install cmake==3.21.0 pybind11[global]
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive
- name: ccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad
- name: 'Build'
run: |
NVTE_CCACHE_BIN=sccache NVTE_USE_CCACHE=1 pip install --no-build-isolation . -v
env:
NVTE_FRAMEWORK: jax
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
- name: 'Sanity check'
run: python3 tests/jax/test_sanity_import.py
all:
name: 'All'
runs-on: ubuntu-latest
steps:
- name: Move /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker"
- name: Maximize build space
uses: easimon/maximize-build-space@c28619d8999a147d5e09c1199f84ff6af6ad5794
with:
root-reserve-mb: 5120
temp-reserve-mb: 32
swap-size-mb: 10240
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
build-mount-path: '/var/lib/docker/'
- name: Restore /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker"
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive
- name: Start named container
run: |
docker run -v $(pwd):$(pwd) -w $(pwd) --name builder -d ghcr.io/nvidia/jax:jax sleep infinity
- name: 'Dependencies'
run: |
docker exec builder bash -c '\
pip install cmake==3.21.0 pybind11[global] einops onnxscript && \
pip install torch --no-cache-dir --index-url https://download.pytorch.org/whl/cu130
'
- name: 'Build'
run: docker exec builder bash -c 'pip install --no-cache-dir --no-build-isolation . -v --no-deps'
env:
NVTE_FRAMEWORK: all
MAX_JOBS: 1
- name: 'Sanity check'
run: docker exec builder bash -c 'python3 tests/pytorch/test_sanity_import.py && python3 tests/jax/test_sanity_import.py'