-
Notifications
You must be signed in to change notification settings - Fork 19
Expand file tree
/
Copy pathbuild.py
More file actions
41 lines (33 loc) · 1.13 KB
/
build.py
File metadata and controls
41 lines (33 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import logging
import docker
from datasets import load_dataset
from typing import Iterator
from commit0.harness.constants import RepoInstance, SPLIT
from commit0.harness.docker_build import build_repo_images
from commit0.harness.spec import make_spec
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def main(
dataset_name: str,
dataset_split: str,
repo_split: str,
num_workers: int,
verbose: int,
) -> None:
dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split) # type: ignore
specs = []
for example in dataset:
repo_name = example["repo"].split("/")[-1]
if repo_split != "all" and repo_name not in SPLIT[repo_split]:
continue
spec = make_spec(example)
specs.append(spec)
client = docker.from_env()
build_repo_images(client, specs, num_workers, verbose)
for spec in specs:
image = client.images.get(spec.repo_image_key)
repository, tag = spec.repo_image_tag.split(":")
image.tag(repository, tag)
__all__ = []