Skip to content

Commit abbfab9

Browse files
committed
test clean-ups: passes linters; doctests; unit & integration tests; load-yaml on cpu
1 parent 1b5811c commit abbfab9

File tree

15 files changed

+124
-42
lines changed

15 files changed

+124
-42
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ coverage.xml
5151
.hypothesis/
5252
.pytest_cache/
5353
cover/
54+
tests/tmp/
5455

5556
# Translations
5657
*.mo
@@ -117,6 +118,9 @@ ENV/
117118
env.bak/
118119
venv.bak/
119120

121+
# PyCharm project settings
122+
.idea
123+
120124
# Spyder project settings
121125
.spyderproject
122126
.spyproject

recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ __set_seed: !apply:torch.manual_seed [!ref <seed>]
1616
data_folder: !PLACEHOLDER # e.g. /localscratch/common_voice_kpd/
1717
output_folder: !ref results/ECAPA-TDNN/<seed>
1818
save_folder: !ref <output_folder>/save
19+
rir_folder: !ref <data_folder>
1920
train_log: !ref <output_folder>/train_log.txt
2021
device: 'cuda:0'
2122
skip_prep: False
@@ -51,7 +52,7 @@ test_dataloader_options:
5152
# Added noise and reverb come from OpenRIR dataset, automatically
5253
# downloaded and prepared with this Environmental Corruption class.
5354
env_corrupt: !new:speechbrain.lobes.augment.EnvCorrupt
54-
openrir_folder: !ref <data_folder>
55+
openrir_folder: !ref <rir_folder>
5556
openrir_max_noise_len: 3.0 # seconds
5657
babble_prob: 0.0
5758
reverb_prob: 1.0

recipes/CommonVoice/ASR/seq2seq/hparams/train_en_with_wav2vec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
106106
source: !ref <wav2vec2_hub>
107107
output_norm: True
108108
freeze: !ref <freeze_wav2vec>
109-
save_path: !ref <save_folder>/wav2vec2_checkpoints
109+
save_path: !ref <save_folder>/wav2vec2_checkpoint
110110

111111
#####
112112
# Uncomment this block if you prefer to use a Fairseq pretrained model instead

recipes/CommonVoice/ASR/seq2seq/hparams/train_rw_with_wav2vec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
106106
source: !ref <wav2vec2_hub>
107107
output_norm: True
108108
freeze: !ref <freeze_wav2vec>
109-
save_path: !ref <save_folder>/wav2vec2_checkpoints
109+
save_path: !ref <save_folder>/wav2vec2_checkpoint
110110

111111
#####
112112
# Uncomment this block if you prefer to use a Fairseq pretrained model instead

recipes/CommonVoice/self-supervised-learning/wav2vec2/hparams/wav2vec2_base.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
9393

9494
wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2Pretrain
9595
source: !ref <wav2vec2_hub>
96-
save_path: !ref <save_folder>/wav2vec2_checkpoints
96+
save_path: !ref <save_folder>/wav2vec2_checkpoint
9797
mask_prob: !ref <mask_prob>
9898
mask_length: !ref <mask_length>
9999

recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ output_folder: !ref results/epaca/<seed>
1010
save_folder: !ref <output_folder>/save
1111
train_log: !ref <output_folder>/train_log.txt
1212
data_folder: ./
13-
13+
rir_folder: !ref <data_folder>
1414

1515
shards_url: /data/voxlingua107_shards
1616
train_meta: !ref <shards_url>/train/meta.json
@@ -79,7 +79,7 @@ augment_speed: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
7979

8080

8181
add_rev_noise: !new:speechbrain.lobes.augment.EnvCorrupt
82-
openrir_folder: !ref <data_folder>
82+
openrir_folder: !ref <rir_folder>
8383
openrir_max_noise_len: 3.0 # seconds
8484
reverb_prob: 0.5
8585
noise_prob: 0.8

speechbrain/dataio/dataio.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ def read_audio(waveforms_obj):
189189
-------
190190
>>> dummywav = torch.rand(16000)
191191
>>> import os
192-
>>> tmpfile = os.path.join(str(getfixture('tmpdir')), "wave.wav")
192+
>>> tmpfile = str(getfixture('tmpdir') / "wave.wav")
193193
>>> write_audio(tmpfile, dummywav, 16000)
194194
>>> asr_example = { "wav": tmpfile, "spk_id": "foo", "words": "foo bar"}
195195
>>> loaded = read_audio(asr_example["wav"])
@@ -257,7 +257,7 @@ def read_audio_multichannel(waveforms_obj):
257257
-------
258258
>>> dummywav = torch.rand(16000, 2)
259259
>>> import os
260-
>>> tmpfile = os.path.join(str(getfixture('tmpdir')), "wave.wav")
260+
>>> tmpfile = str(getfixture('tmpdir') / "wave.wav")
261261
>>> write_audio(tmpfile, dummywav, 16000)
262262
>>> asr_example = { "wav": tmpfile, "spk_id": "foo", "words": "foo bar"}
263263
>>> loaded = read_audio(asr_example["wav"])
@@ -305,7 +305,7 @@ def write_audio(filepath, audio, samplerate):
305305
Example
306306
-------
307307
>>> import os
308-
>>> tmpfile = os.path.join(str(getfixture('tmpdir')), "wave.wav")
308+
>>> tmpfile = str(getfixture('tmpdir') / "wave.wav")
309309
>>> dummywav = torch.rand(16000, 2)
310310
>>> write_audio(tmpfile, dummywav, 16000)
311311
>>> loaded = read_audio(tmpfile)
@@ -605,7 +605,7 @@ def write_txt_file(data, filename, sampling_rate=None):
605605
-------
606606
>>> tmpdir = getfixture('tmpdir')
607607
>>> signal=torch.tensor([1,2,3,4])
608-
>>> write_txt_file(signal, os.path.join(tmpdir, 'example.txt'))
608+
>>> write_txt_file(signal, tmpdir / 'example.txt')
609609
"""
610610
del sampling_rate # Not used.
611611
# Check if the path of filename exists
@@ -642,7 +642,7 @@ def write_stdout(data, filename=None, sampling_rate=None):
642642
-------
643643
>>> tmpdir = getfixture('tmpdir')
644644
>>> signal = torch.tensor([[1,2,3,4]])
645-
>>> write_stdout(signal, tmpdir + '/example.txt')
645+
>>> write_stdout(signal, tmpdir / 'example.txt')
646646
[1, 2, 3, 4]
647647
"""
648648
# Managing Torch.Tensor
@@ -805,7 +805,7 @@ def save_md5(files, out_file):
805805
Example:
806806
>>> files = ['tests/samples/single-mic/example1.wav']
807807
>>> tmpdir = getfixture('tmpdir')
808-
>>> save_md5(files, os.path.join(tmpdir, "md5.pkl"))
808+
>>> save_md5(files, tmpdir / "md5.pkl")
809809
"""
810810
# Initialization of the dictionary
811811
md5_dict = {}
@@ -830,7 +830,7 @@ def save_pkl(obj, file):
830830
831831
Example
832832
-------
833-
>>> tmpfile = os.path.join(getfixture('tmpdir'), "example.pkl")
833+
>>> tmpfile = getfixture('tmpdir') / "example.pkl"
834834
>>> save_pkl([1, 2, 3, 4, 5], tmpfile)
835835
>>> load_pkl(tmpfile)
836836
[1, 2, 3, 4, 5]
@@ -983,7 +983,9 @@ def merge_csvs(data_folder, csv_lst, merged_csv):
983983
984984
Example
985985
-------
986-
>>> merge_csvs("tests/samples/annotation/",
986+
>>> tmpdir = getfixture('tmpdir')
987+
>>> os.symlink(os.path.realpath("tests/samples/annotation/speech.csv"), tmpdir / "speech.csv")
988+
>>> merge_csvs(tmpdir,
987989
... ["speech.csv", "speech.csv"],
988990
... "test_csv_merge.csv")
989991
"""

speechbrain/pretrained/interfaces.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -766,10 +766,10 @@ class EncoderClassifier(Pretrained):
766766
767767
>>> # Compute embeddings
768768
>>> signal, fs = torchaudio.load("tests/samples/single-mic/example1.wav")
769-
>>> embeddings = classifier.encode_batch(signal)
769+
>>> embeddings = classifier.encode_batch(signal)
770770
771771
>>> # Classification
772-
>>> prediction = classifier .classify_batch(signal)
772+
>>> prediction = classifier.classify_batch(signal)
773773
"""
774774

775775
MODULES_NEEDED = [
@@ -2344,7 +2344,8 @@ class GraphemeToPhoneme(Pretrained, EncodeDecodePipelineMixin):
23442344
>>> text = ("English is tough. It can be understood "
23452345
... "through thorough thought though")
23462346
>>> from speechbrain.pretrained import GraphemeToPhoneme
2347-
>>> g2p = GraphemeToPhoneme.from_hparams('path/to/model') # doctest: +SKIP
2347+
>>> tmpdir = getfixture('tmpdir')
2348+
>>> g2p = GraphemeToPhoneme.from_hparams('path/to/model', savedir=tmpdir) # doctest: +SKIP
23482349
>>> phonemes = g2p.g2p(text) # doctest: +SKIP
23492350
"""
23502351

@@ -2590,7 +2591,8 @@ class Tacotron2(Pretrained):
25902591
25912592
Example
25922593
-------
2593-
>>> tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir")
2594+
>>> tmpdir_vocoder = getfixture('tmpdir') / "vocoder"
2595+
>>> tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir=tmpdir_vocoder)
25942596
>>> mel_output, mel_length, alignment = tacotron2.encode_text("Mary had a little lamb")
25952597
>>> items = [
25962598
... "A quick brown fox jumped over the lazy dog",
@@ -2601,7 +2603,8 @@ class Tacotron2(Pretrained):
26012603
26022604
>>> # One can combine the TTS model with a vocoder (that generates the final waveform)
26032605
>>> # Intialize the Vocoder (HiFIGAN)
2604-
>>> hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
2606+
>>> tmpdir_tts = getfixture('tmpdir') / "tts"
2607+
>>> hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir=tmpdir_tts)
26052608
>>> # Running the TTS
26062609
>>> mel_output, mel_length, alignment = tacotron2.encode_text("Mary had a little lamb")
26072610
>>> # Running Vocoder (spectrogram-to-waveform)
@@ -2679,13 +2682,15 @@ class HIFIGAN(Pretrained):
26792682
26802683
Example
26812684
-------
2682-
>>> hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
2685+
>>> tmpdir_vocoder = getfixture('tmpdir') / "vocoder"
2686+
>>> hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir=tmpdir_vocoder)
26832687
>>> mel_specs = torch.rand(2, 80,298)
26842688
>>> waveforms = hifi_gan.decode_batch(mel_specs)
26852689
26862690
>>> # You can use the vocoder coupled with a TTS system
26872691
>>> # Intialize TTS (tacotron2)
2688-
>>> tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
2692+
>>> tmpdir_tts = getfixture('tmpdir') / "tts"
2693+
>>> tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir=tmpdir_tts)
26892694
>>> # Running the TTS
26902695
>>> mel_output, mel_length, alignment = tacotron2.encode_text("Mary had a little lamb")
26912696
>>> # Running Vocoder (spectrogram-to-waveform)
@@ -2737,7 +2742,7 @@ def decode_spectrogram(self, spectrogram):
27372742
audio can be saved by:
27382743
>>> waveform = torch.rand(1, 666666)
27392744
>>> sample_rate = 22050
2740-
>>> torchaudio.save("test.wav", waveform, sample_rate)
2745+
>>> torchaudio.save(str(getfixture('tmpdir') / "test.wav"), waveform, sample_rate)
27412746
"""
27422747
if self.first_call:
27432748
self.hparams.generator.remove_weight_norm()

speechbrain/tokenizers/SentencePiece.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,23 +84,21 @@ class SentencePiece:
8484
-------
8585
>>> import torch
8686
>>> dict_int2lab = {1: "HELLO", 2: "MORNING"}
87-
>>> model_dir = "tests/unittests/tokenizer_data/"
87+
>>> model_dir = getfixture('tmpdir') / "tokenizer_data"
8888
>>> # Example with csv
89-
>>> annotation_train = "tests/unittests/tokenizer_data/dev-clean.csv"
89+
>>> annotation_train = "tests/samples/annotation/dev-clean.csv"
9090
>>> annotation_read = "wrd"
9191
>>> model_type = "bpe"
92-
>>> bpe = SentencePiece(model_dir,100, annotation_train, annotation_read,
93-
... model_type)
92+
>>> bpe = SentencePiece(str(model_dir), 100, annotation_train, annotation_read, model_type)
9493
>>> batch_seq = torch.Tensor([[1, 2, 2, 1],[1, 2, 1, 0]])
9594
>>> batch_lens = torch.Tensor([1.0, 0.75])
9695
>>> encoded_seq_ids, encoded_seq_pieces = bpe(
9796
... batch_seq, batch_lens, dict_int2lab, task="encode"
9897
... )
9998
>>> # Example using JSON
100-
>>> annotation_train = "tests/unittests/tokenizer_data/dev-clean.json"
99+
>>> annotation_train = str(model_dir + "/dev-clean.json")
101100
>>> annotation_read = "wrd"
102-
>>> bpe = SentencePiece(model_dir,100, annotation_train, annotation_read,
103-
... model_type, annotation_format = 'json')
101+
>>> bpe = SentencePiece(model_dir, 100, annotation_train, annotation_read, model_type, annotation_format = 'json')
104102
>>> encoded_seq_ids, encoded_seq_pieces = bpe(
105103
... batch_seq, batch_lens, dict_int2lab, task="encode"
106104
... )
@@ -142,7 +140,12 @@ def __init__(
142140
if self.annotation_train is not None:
143141
ext = os.path.splitext(self.annotation_train)[1]
144142
if text_file is None:
145-
text_file = self.annotation_train.replace(ext, ".txt")
143+
text_file = os.path.join(
144+
model_dir,
145+
os.path.basename(self.annotation_train).replace(
146+
ext, ".txt"
147+
),
148+
)
146149
self.text_file = text_file
147150

148151
self.prefix_model_file = os.path.join(

speechbrain/utils/recipe_tests.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
Authors
44
* Mirco Ravanelli 2022
5+
* Andreas Nautsch 2022
56
"""
67
import os
78
import re
@@ -318,7 +319,7 @@ def run_recipe_tests(
318319
test_field="test_debug_flags",
319320
check_field="test_debug_checks",
320321
run_opts="--device=cpu",
321-
output_folder="tests/recipe_tests/",
322+
output_folder="tests/tmp/recipes/",
322323
filters_fields=[],
323324
filters=[],
324325
do_checks=True,
@@ -429,9 +430,19 @@ def load_yaml_test(
429430
avoid_list=[
430431
"templates/hyperparameter_optimization_speaker_id/train.yaml",
431432
"templates/speaker_id/train.yaml",
433+
# recipes creating errors if NVIDIA driver is not on one's system
434+
"recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml",
435+
"recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml",
436+
"recipes/timers-and-such/direct/hparams/train.yaml",
437+
"recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml",
438+
"recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml",
439+
"recipes/fluent-speech-commands/direct/hparams/train.yaml",
440+
"recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml",
441+
"recipes/SLURP/direct/hparams/train.yaml",
432442
],
433-
data_folder="yaml_check_folder",
434-
output_folder="yaml_check_folder",
443+
rir_folder="tests/tmp/rir",
444+
data_folder="tests/tmp/yaml",
445+
output_folder="tests/tmp/yaml",
435446
):
436447
"""Tests if the yaml files can be loaded without errors.
437448
@@ -453,6 +464,8 @@ def load_yaml_test(
453464
See above.
454465
avoid_list: list
455466
List of hparam file not to check.
467+
rir_folder:
468+
This overrides the rir_folder; rir_path, and openrir_folder usually specified in the hparam files.
456469
data_folder:
457470
This overrides the data_folder usually specified in the hparam files.
458471
output_folder:
@@ -470,19 +483,25 @@ def load_yaml_test(
470483
# Set data_foler and output folder
471484
data_folder = os.path.join(cwd, data_folder)
472485
output_folder = os.path.join(cwd, output_folder)
486+
rir_folder = os.path.join(cwd, rir_folder)
473487

474488
# Additional overrides
475489
add_overrides = {
476490
"manual_annot_folder": data_folder,
477491
"musan_folder": data_folder,
478492
"tea_models_dir": data_folder,
479-
"rir_path": data_folder,
480493
"wsj_root": data_folder,
481494
"tokenizer_file": data_folder,
482495
"commonlanguage_folder": data_folder,
483496
"tea_infer_dir": data_folder,
484497
"original_data_folder": data_folder,
485498
"pretrain_st_dir": data_folder,
499+
# RIR folder specifications -> all point to the same zip file: one download destination
500+
"rir_path": rir_folder,
501+
"rir_folder": rir_folder,
502+
"openrir_folder": rir_folder,
503+
"open_rir_folder": rir_folder,
504+
"data_folder_rirs": rir_folder,
486505
}
487506

488507
# Read the csv recipe file and detect which tests we have to run
@@ -523,10 +542,10 @@ def load_yaml_test(
523542
# Append additional overrides when needed
524543
with open(hparam_file) as f:
525544
for line in f:
526-
for key in add_overrides.keys():
545+
for key, value in add_overrides.items():
527546
pattern = key + ":"
528547
if pattern in line and line.find(pattern) == 0:
529-
overrides.update({key: data_folder})
548+
overrides.update({key: value})
530549

531550
with open(hparam_file) as fin:
532551
try:

0 commit comments

Comments
 (0)