Skip to content

Commit cda493d

Browse files
authored
Fix registry backfill with per-provider versions and Docker extraction (#65223)
Chain both extraction scripts in a single uv run invocation to avoid creating two ephemeral environments per version.
1 parent d1d313a commit cda493d

8 files changed

Lines changed: 244 additions & 92 deletions

File tree

.github/workflows/registry-backfill.yml

Lines changed: 68 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,54 @@ on: # yamllint disable-line rule:truthy
2929
- staging
3030
- live
3131
default: staging
32-
providers:
32+
provider-versions:
3333
description: >
34-
Space-separated provider IDs
35-
(e.g. 'amazon google databricks')
36-
required: true
37-
type: string
38-
versions:
39-
description: >
40-
Space-separated versions to backfill
41-
(e.g. '9.15.0 9.14.0'). Applied to ALL providers.
34+
Space-separated provider/version pairs
35+
(e.g. 'amazon/9.24.0 google/21.0.0 celery/3.17.2').
36+
Multiple versions per provider are grouped into one job.
4237
required: true
4338
type: string
4439

4540
permissions:
4641
contents: read
42+
packages: read
4743

4844
jobs:
45+
build-ci-image:
46+
name: "Build CI image"
47+
uses: ./.github/workflows/ci-image-build.yml
48+
permissions:
49+
contents: read
50+
packages: write
51+
if: >
52+
contains(fromJSON('[
53+
"ashb",
54+
"bugraoz93",
55+
"eladkal",
56+
"ephraimbuddy",
57+
"jedcunningham",
58+
"jscheffl",
59+
"kaxil",
60+
"pierrejeambrun",
61+
"shahar1",
62+
"potiuk",
63+
"utkarsharma2",
64+
"vincbeck"
65+
]'), github.event.sender.login)
66+
with:
67+
runners: '["ubuntu-22.04"]'
68+
platform: "linux/amd64"
69+
push-image: "false"
70+
upload-image-artifact: "true"
71+
upload-mount-cache-artifact: "false"
72+
python-versions: '["3.12"]'
73+
branch: "main"
74+
constraints-branch: "constraints-main"
75+
use-uv: "true"
76+
upgrade-to-newer-dependencies: "false"
77+
docker-cache: "registry"
78+
disable-airflow-repo-cache: "false"
79+
4980
prepare:
5081
runs-on: ubuntu-latest
5182
outputs:
@@ -55,12 +86,19 @@ jobs:
5586
- name: "Build provider matrix"
5687
id: matrix
5788
env:
58-
PROVIDERS: ${{ inputs.providers }}
89+
PROVIDER_VERSIONS: ${{ inputs.provider-versions }}
5990
run: |
60-
MATRIX=$(echo "${PROVIDERS}" \
61-
| tr ' ' '\n' | jq -R . \
62-
| jq -cs '{"provider": .}')
91+
# Parse provider/version pairs, group by provider
92+
# Input: "amazon/9.24.0 google/21.0.0 amazon/9.23.0"
93+
# Output: {"include": [{"provider":"amazon","versions":"9.24.0 9.23.0"}, ...]}
94+
MATRIX=$(echo "${PROVIDER_VERSIONS}" | tr ' ' '\n' | grep '/' | \
95+
jq -R 'split("/") | {provider: .[0], version: .[1]}' | \
96+
jq -cs 'group_by(.provider) | map({
97+
provider: .[0].provider,
98+
versions: (map(.version) | join(" "))
99+
}) | {include: .}')
63100
echo "matrix=${MATRIX}" >> "${GITHUB_OUTPUT}"
101+
echo "Matrix: ${MATRIX}"
64102
65103
- name: "Determine S3 destination"
66104
id: destination
@@ -76,28 +114,16 @@ jobs:
76114
>> "${GITHUB_OUTPUT}"
77115
78116
backfill:
79-
needs: prepare
117+
needs: [prepare, build-ci-image]
80118
runs-on: ubuntu-latest
81119
timeout-minutes: 60
82120
strategy:
83121
fail-fast: false
84122
matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }}
85-
name: "Backfill ${{ matrix.provider }}"
86-
if: >
87-
contains(fromJSON('[
88-
"ashb",
89-
"bugraoz93",
90-
"eladkal",
91-
"ephraimbuddy",
92-
"jedcunningham",
93-
"jscheffl",
94-
"kaxil",
95-
"pierrejeambrun",
96-
"shahar1",
97-
"potiuk",
98-
"utkarsharma2",
99-
"vincbeck"
100-
]'), github.event.sender.login)
123+
name: "Backfill ${{ matrix.provider }} (${{ matrix.versions }})"
124+
permissions:
125+
contents: read
126+
packages: read
101127
steps:
102128
- name: "Checkout repository"
103129
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -107,23 +133,23 @@ jobs:
107133

108134
- name: "Fetch provider tags"
109135
env:
110-
VERSIONS: ${{ inputs.versions }}
136+
VERSIONS: ${{ matrix.versions }}
111137
PROVIDER: ${{ matrix.provider }}
112138
run: |
113139
for VERSION in ${VERSIONS}; do
114140
TAG="providers-${PROVIDER}/${VERSION}"
115141
echo "Fetching tag: ${TAG}"
116142
git fetch origin tag "${TAG}" \
117-
2>/dev/null || echo "Tag not found"
143+
2>/dev/null || echo "Tag not found: ${TAG}"
118144
done
119145
120-
- name: "Install uv"
121-
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
122-
123-
- name: "Install Breeze"
124-
uses: ./.github/actions/breeze
146+
- name: "Prepare breeze & CI image"
147+
uses: ./.github/actions/prepare_breeze_and_image
125148
with:
126-
python-version: "3.12"
149+
python: "3.12"
150+
platform: "linux/amd64"
151+
use-uv: "true"
152+
make-mnt-writeable-and-cleanup: "true"
127153

128154
- name: "Install AWS CLI v2"
129155
run: |
@@ -152,7 +178,7 @@ jobs:
152178
153179
- name: "Extract version metadata from git tags"
154180
env:
155-
VERSIONS: ${{ inputs.versions }}
181+
VERSIONS: ${{ matrix.versions }}
156182
PROVIDER: ${{ matrix.provider }}
157183
run: |
158184
VERSION_ARGS=""
@@ -164,15 +190,15 @@ jobs:
164190
165191
- name: "Run breeze registry backfill"
166192
env:
167-
VERSIONS: ${{ inputs.versions }}
193+
VERSIONS: ${{ matrix.versions }}
168194
PROVIDER: ${{ matrix.provider }}
169195
run: |
170196
VERSION_ARGS=""
171197
for VERSION in ${VERSIONS}; do
172198
VERSION_ARGS="${VERSION_ARGS} --version ${VERSION}"
173199
done
174200
breeze registry backfill \
175-
--provider "${PROVIDER}" ${VERSION_ARGS}
201+
--provider "${PROVIDER}" --python 3.12 ${VERSION_ARGS}
176202
177203
- name: "Download data files from S3 for build"
178204
env:
@@ -211,7 +237,7 @@ jobs:
211237
env:
212238
S3_BUCKET: ${{ needs.prepare.outputs.bucket }}
213239
CACHE_CONTROL: "public, max-age=300"
214-
VERSIONS: ${{ inputs.versions }}
240+
VERSIONS: ${{ matrix.versions }}
215241
PROVIDER: ${{ matrix.provider }}
216242
run: |
217243
for VERSION in ${VERSIONS}; do

dev/breeze/doc/images/output_registry.svg

Lines changed: 2 additions & 2 deletions
Loading
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
b1c2694af08bb5e10ae6f2c3b9bb2479
1+
27b4df2c81ed8e0d4c566e552e13bb6a

0 commit comments

Comments
 (0)