Skip to content

Commit d4c9f39

Browse files
authored
Add pydoclint (#2469)
1 parent 90bbb23 commit d4c9f39

461 files changed

Lines changed: 8027 additions & 4783 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.dict-speechbrain.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,7 @@ puml
602602
punc
603603
pval
604604
pyctcdecode
605+
pydoclint
605606
pydub
606607
pygtrie
607608
pyin

.flake8

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
[flake8]
2-
ignore = E203, E266, E501, W503
2+
ignore = E203, E266, E501, W503, DOC105, DOC106, DOC107, DOC203, DOC403, DOC404, DOC405, DOC501, DOC502
33
# line length is intentionally set to 80 here because black uses Bugbear
44
# See https://github.com/psf/black/blob/master/README.md#line-length for more details
55
max-line-length = 80
66
max-complexity = 18
7-
select = B,C,E,F,W,T4,B9
8-
exclude = tools/kaldi_decoder
7+
select = B,C,E,F,W,T4,B9,DOC
8+
exclude = tests/tmp

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,19 @@ repos:
1414
args: [--maxkb=1024]
1515

1616
- repo: https://github.com/psf/black
17-
rev: 19.10b0
17+
rev: 24.3.0
1818
hooks:
1919
- id: black
2020
types: [python]
21-
additional_dependencies: ['click==8.0.4']
21+
additional_dependencies: ['click==8.1.7']
2222
- repo: https://github.com/PyCQA/flake8
23-
rev: 3.7.9
23+
rev: 7.0.0
2424
hooks:
2525
- id: flake8
2626
types: [python]
2727

2828
- repo: https://github.com/adrienverge/yamllint
29-
rev: v1.23.0
29+
rev: v1.35.1
3030
hooks:
3131
- id: yamllint
3232

lint-requirements.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
black==19.10b0
2-
click==8.0.4
3-
flake8==3.7.9
4-
pycodestyle==2.5.0
1+
black==24.3.0
2+
click==8.1.7
3+
flake8==7.0.0
4+
pycodestyle==2.11.0
5+
pydoclint==0.4.1
56
pytest==7.4.0
6-
yamllint==1.23.0
7+
yamllint==1.35.1

recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,9 @@ def compute_objectives(self, predictions, batch, stage):
109109
if stage != sb.Stage.TRAIN:
110110
target_words_list = [list(wrd) for wrd in batch.wrd]
111111
self.cer_metric.append(
112-
ids=ids, predict=predicted_words_list, target=target_words_list,
112+
ids=ids,
113+
predict=predicted_words_list,
114+
target=target_words_list,
113115
)
114116

115117
return loss
@@ -154,7 +156,8 @@ def on_stage_end(self, stage, stage_loss, epoch):
154156
valid_stats=stage_stats,
155157
)
156158
self.checkpointer.save_and_keep_only(
157-
meta={"CER": stage_stats["CER"]}, min_keys=["CER"],
159+
meta={"CER": stage_stats["CER"]},
160+
min_keys=["CER"],
158161
)
159162
elif stage == sb.Stage.TEST:
160163
self.hparams.train_logger.log_stats(
@@ -205,11 +208,13 @@ def freeze_optimizers(self, optimizers):
205208

206209
def dataio_prepare(hparams):
207210
"""This function prepares the datasets to be used in the brain class.
208-
It also defines the data processing pipeline through user-defined functions."""
211+
It also defines the data processing pipeline through user-defined functions.
212+
"""
209213
data_folder = hparams["data_folder"]
210214

211215
train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
212-
csv_path=hparams["train_data"], replacements={"data_root": data_folder},
216+
csv_path=hparams["train_data"],
217+
replacements={"data_root": data_folder},
213218
)
214219

215220
if hparams["sorting"] == "ascending":
@@ -234,12 +239,14 @@ def dataio_prepare(hparams):
234239
)
235240

236241
valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
237-
csv_path=hparams["valid_data"], replacements={"data_root": data_folder},
242+
csv_path=hparams["valid_data"],
243+
replacements={"data_root": data_folder},
238244
)
239245
valid_data = valid_data.filtered_sorted(sort_key="duration")
240246

241247
test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
242-
csv_path=hparams["test_data"], replacements={"data_root": data_folder},
248+
csv_path=hparams["test_data"],
249+
replacements={"data_root": data_folder},
243250
)
244251
test_data = test_data.filtered_sorted(sort_key="duration")
245252

@@ -272,7 +279,8 @@ def text_pipeline(wrd):
272279

273280
# 4. Set output:
274281
sb.dataio.dataset.set_output_keys(
275-
datasets, ["id", "sig", "wrd", "tokens"],
282+
datasets,
283+
["id", "sig", "wrd", "tokens"],
276284
)
277285

278286
# 5. If Dynamic Batching is used, we instantiate the needed samplers.
@@ -284,11 +292,15 @@ def text_pipeline(wrd):
284292
dynamic_hparams = hparams["dynamic_batch_sampler"]
285293

286294
train_batch_sampler = DynamicBatchSampler(
287-
train_data, **dynamic_hparams, length_func=lambda x: x["duration"],
295+
train_data,
296+
**dynamic_hparams,
297+
length_func=lambda x: x["duration"],
288298
)
289299

290300
valid_batch_sampler = DynamicBatchSampler(
291-
valid_data, **dynamic_hparams, length_func=lambda x: x["duration"],
301+
valid_data,
302+
**dynamic_hparams,
303+
length_func=lambda x: x["duration"],
292304
)
293305

294306
return (
@@ -302,7 +314,6 @@ def text_pipeline(wrd):
302314

303315

304316
if __name__ == "__main__":
305-
306317
# CLI:
307318
hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
308319
with open(hparams_file) as fin:

recipes/AISHELL-1/ASR/seq2seq/train.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ def on_stage_end(self, stage, stage_loss, epoch):
131131
valid_stats=stage_stats,
132132
)
133133
self.checkpointer.save_and_keep_only(
134-
meta={"CER": stage_stats["CER"]}, min_keys=["CER"],
134+
meta={"CER": stage_stats["CER"]},
135+
min_keys=["CER"],
135136
)
136137
elif stage == sb.Stage.TEST:
137138
self.hparams.train_logger.log_stats(
@@ -144,11 +145,13 @@ def on_stage_end(self, stage, stage_loss, epoch):
144145

145146
def dataio_prepare(hparams):
146147
"""This function prepares the datasets to be used in the brain class.
147-
It also defines the data processing pipeline through user-defined functions."""
148+
It also defines the data processing pipeline through user-defined functions.
149+
"""
148150
data_folder = hparams["data_folder"]
149151

150152
train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
151-
csv_path=hparams["train_data"], replacements={"data_root": data_folder},
153+
csv_path=hparams["train_data"],
154+
replacements={"data_root": data_folder},
152155
)
153156

154157
if hparams["sorting"] == "ascending":
@@ -173,12 +176,14 @@ def dataio_prepare(hparams):
173176
)
174177

175178
valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
176-
csv_path=hparams["valid_data"], replacements={"data_root": data_folder},
179+
csv_path=hparams["valid_data"],
180+
replacements={"data_root": data_folder},
177181
)
178182
valid_data = valid_data.filtered_sorted(sort_key="duration")
179183

180184
test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
181-
csv_path=hparams["test_data"], replacements={"data_root": data_folder},
185+
csv_path=hparams["test_data"],
186+
replacements={"data_root": data_folder},
182187
)
183188
test_data = test_data.filtered_sorted(sort_key="duration")
184189

@@ -216,7 +221,8 @@ def text_pipeline(wrd):
216221

217222
# 4. Set output:
218223
sb.dataio.dataset.set_output_keys(
219-
datasets, ["id", "sig", "wrd", "tokens_bos", "tokens_eos", "tokens"],
224+
datasets,
225+
["id", "sig", "wrd", "tokens_bos", "tokens_eos", "tokens"],
220226
)
221227

222228
# 5. If Dynamic Batching is used, we instantiate the needed samplers.
@@ -228,11 +234,15 @@ def text_pipeline(wrd):
228234
dynamic_hparams = hparams["dynamic_batch_sampler"]
229235

230236
train_batch_sampler = DynamicBatchSampler(
231-
train_data, **dynamic_hparams, length_func=lambda x: x["duration"],
237+
train_data,
238+
**dynamic_hparams,
239+
length_func=lambda x: x["duration"],
232240
)
233241

234242
valid_batch_sampler = DynamicBatchSampler(
235-
valid_data, **dynamic_hparams, length_func=lambda x: x["duration"],
243+
valid_data,
244+
**dynamic_hparams,
245+
length_func=lambda x: x["duration"],
236246
)
237247

238248
return (
@@ -246,7 +256,6 @@ def text_pipeline(wrd):
246256

247257

248258
if __name__ == "__main__":
249-
250259
# CLI:
251260
hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
252261
with open(hparams_file) as fin:

recipes/AISHELL-1/ASR/transformer/train.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def compute_forward(self, batch, stage):
7575
def compute_objectives(self, predictions, batch, stage):
7676
"""Computes the loss (CTC+NLL) given predictions and targets."""
7777

78-
(p_ctc, p_seq, wav_lens, hyps,) = predictions
78+
(p_ctc, p_seq, wav_lens, hyps) = predictions
7979

8080
ids = batch.id
8181
tokens_eos, tokens_eos_lens = batch.tokens_eos
@@ -169,7 +169,6 @@ def on_stage_end(self, stage, stage_loss, epoch):
169169

170170
# log stats and save checkpoint at end-of-epoch
171171
if stage == sb.Stage.VALID:
172-
173172
# report different epoch stages according current stage
174173
current_epoch = self.hparams.epoch_counter.current
175174
if current_epoch <= self.hparams.stage_one_epochs:
@@ -247,7 +246,6 @@ def on_fit_start(self):
247246

248247
# Load latest checkpoint to resume training if interrupted
249248
if self.checkpointer is not None:
250-
251249
# do not reload the weights if training is interrupted right before stage 2
252250
group = current_optimizer.param_groups[0]
253251
if "momentum" not in group:
@@ -263,7 +261,8 @@ def on_evaluate_start(self, max_key=None, min_key=None):
263261
max_key=max_key, min_key=min_key
264262
)
265263
ckpt = sb.utils.checkpoints.average_checkpoints(
266-
ckpts, recoverable_name="model",
264+
ckpts,
265+
recoverable_name="model",
267266
)
268267

269268
self.hparams.model.load_state_dict(ckpt, strict=True)
@@ -272,11 +271,13 @@ def on_evaluate_start(self, max_key=None, min_key=None):
272271

273272
def dataio_prepare(hparams):
274273
"""This function prepares the datasets to be used in the brain class.
275-
It also defines the data processing pipeline through user-defined functions."""
274+
It also defines the data processing pipeline through user-defined functions.
275+
"""
276276
data_folder = hparams["data_folder"]
277277

278278
train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
279-
csv_path=hparams["train_data"], replacements={"data_root": data_folder},
279+
csv_path=hparams["train_data"],
280+
replacements={"data_root": data_folder},
280281
)
281282

282283
if hparams["sorting"] == "ascending":
@@ -301,12 +302,14 @@ def dataio_prepare(hparams):
301302
)
302303

303304
valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
304-
csv_path=hparams["valid_data"], replacements={"data_root": data_folder},
305+
csv_path=hparams["valid_data"],
306+
replacements={"data_root": data_folder},
305307
)
306308
valid_data = valid_data.filtered_sorted(sort_key="duration")
307309

308310
test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
309-
csv_path=hparams["test_data"], replacements={"data_root": data_folder},
311+
csv_path=hparams["test_data"],
312+
replacements={"data_root": data_folder},
310313
)
311314
test_data = test_data.filtered_sorted(sort_key="duration", reverse=True)
312315

@@ -344,7 +347,8 @@ def text_pipeline(wrd):
344347

345348
# 4. Set output:
346349
sb.dataio.dataset.set_output_keys(
347-
datasets, ["id", "sig", "wrd", "tokens_bos", "tokens_eos", "tokens"],
350+
datasets,
351+
["id", "sig", "wrd", "tokens_bos", "tokens_eos", "tokens"],
348352
)
349353

350354
# 5. If Dynamic Batching is used, we instantiate the needed samplers.
@@ -356,11 +360,11 @@ def text_pipeline(wrd):
356360
dynamic_hparams = hparams["dynamic_batch_sampler"]
357361

358362
train_batch_sampler = DynamicBatchSampler(
359-
train_data, **dynamic_hparams, length_func=lambda x: x["duration"],
363+
train_data, **dynamic_hparams, length_func=lambda x: x["duration"]
360364
)
361365

362366
valid_batch_sampler = DynamicBatchSampler(
363-
valid_data, **dynamic_hparams, length_func=lambda x: x["duration"],
367+
valid_data, **dynamic_hparams, length_func=lambda x: x["duration"]
364368
)
365369

366370
return (
@@ -374,7 +378,6 @@ def text_pipeline(wrd):
374378

375379

376380
if __name__ == "__main__":
377-
378381
# CLI:
379382
hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
380383
with open(hparams_file) as fin:

0 commit comments

Comments
 (0)