-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Expand file tree
/
Copy pathcheck_HF_repo.py
More file actions
141 lines (119 loc) · 3.97 KB
/
check_HF_repo.py
File metadata and controls
141 lines (119 loc) · 3.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""Library for the HuggingFace (HF) repositories.
Authors
* Mirco Ravanelli 2022
* Andreas Nautsch 2022, 2023
"""
import csv
import os
from speechbrain.utils.data_utils import download_file
from tests.consistency.test_recipe import __skip_list
def run_HF_check(
recipe_folder="tests/recipes",
field="HF_repo",
output_folder="tests/tmp",
):
"""Checks if the code reported in the readme files of the HF repository is
runnable. Note: the tests run the code marked as python in the readme file.
Arguments
---------
recipe_folder: path
Path of the folder containing csv recipe files summarizing all the recipes in the repo.
field: string
Field of the csv recipe file containing the links to HF repos.
output_folder: path
Where to download the HF readme files.
Returns
-------
check: True
True if all the code runs, False otherwise.
"""
# Detect list of HF repositories
HF_repos = repo_list(recipe_folder, field)
# Set up output folder
os.makedirs(output_folder, exist_ok=True)
os.chdir(output_folder)
# Checking all detected repos
check = True
for i, repo in enumerate(HF_repos):
print("(%i/%i) Checking %s..." % (i + 1, len(HF_repos), repo))
if not check_repo(repo):
check = False
return check
def repo_list(recipe_folder="tests/recipes", field="HF_repo"):
"""Get the list of HF recipes in the csv recipe file.
Arguments
---------
recipe_folder: path
Path of the folder containing csv recipe files summarizing all the recipes in the repo.
field: string
Field of the csv recipe file containing the links to HF repos.
Returns
-------
HF_repos: list
List of the detected HF repos.
"""
HF_repos = []
# Loop over all recipe CSVs
for recipe_csvfile in os.listdir(recipe_folder):
if recipe_csvfile in __skip_list:
continue
with open(
os.path.join(recipe_folder, recipe_csvfile),
newline="",
encoding="utf-8",
) as csvf:
reader = csv.DictReader(csvf, delimiter=",", skipinitialspace=True)
for row in reader:
if len(row[field]) > 0:
repos = row[field].split(" ")
for repo in repos:
if len(repo) > 0:
HF_repos.append(repo)
HF_repos = set(HF_repos)
return HF_repos
def check_repo(HF_repo):
"""Runs the code reported in the README file of the given HF_repo. It checks
if the code runs without errors.
Arguments
---------
HF_repo: string
URL of the HF repository to check.
Returns
-------
check: bool
True if all the code runs, False otherwise.
"""
exp_name = os.path.basename(HF_repo)
if HF_repo[-1] == "/":
readme_file = HF_repo + "raw/main/README.md"
else:
readme_file = HF_repo + "/raw/main/README.md"
dest_file = exp_name + ".md"
download_file(readme_file, dest_file)
code_snippets = []
code = []
flag = False
check = True
with open(dest_file, encoding="utf-8") as f:
for line in f:
if "```python" in line:
flag = True
code = []
elif "```" in line and flag:
flag = False
code_snippets.append(code)
elif flag:
if len(line.strip()) > 0:
# adjust local audio paths 'tests/samples' -> '../samples'
if "tests/samples" in line:
line = line.replace("tests/samples", "../samples")
code.append(line)
for code in code_snippets:
try:
exec("".join(code))
except Exception as e:
print("\t" + str(e))
check = False
print("\tERROR: cannot run code snippet in %s" % (HF_repo))
print("---\n" + "".join(code) + "---\n")
return check