Skip to content

Commit 89cd54f

Browse files
Recipe test fixes (#2874)
Co-authored-by: Adel Moumen <adelmoumen.pro@gmail.com>
1 parent 9d27982 commit 89cd54f

File tree

12 files changed

+18
-18
lines changed

12 files changed

+18
-18
lines changed

.github/workflows/pythonapp.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
run: |
3131
pip install uv
3232
uv pip install --system ctc-segmentation sacrebleu # ctc-segmentation is funky with uv due to their oldest-supported-numpy dependency
33-
uv pip install --system -r requirements.txt torch==2.6.0+cpu torchaudio==2.6.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu k2==1.24.4.dev20250307+cpu.torch2.6.0 --find-links https://k2-fsa.github.io/k2/cpu.html gensim==4.3.2 bitsandbytes==0.45.3 scikit-learn==1.6.1
33+
uv pip install --system -r requirements.txt torch==2.6.0+cpu torchaudio==2.6.0+cpu --extra-index-url https://download.pytorch.org/whl/cpu k2==1.24.4.dev20250307+cpu.torch2.6.0 --find-links https://k2-fsa.github.io/k2/cpu.html gensim==4.3.2 bitsandbytes==0.45.3 scikit-learn==1.6.1 accelerate
3434
uv pip install --system --editable . --no-deps # already installed pinned deps from requirements.txt, we're good
3535
- name: Install sox
3636
run: |

recipes/LJSpeech/quantization/train.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def dataio_prepare(hparams):
142142
"""
143143
train_data = sb.dataio.dataset.DynamicItemDataset.from_json(
144144
json_path=hparams["train_json"],
145-
replacements={"DATA_ROOT": hparams["data_folder"]},
145+
replacements={"data_root": hparams["data_folder"]},
146146
)
147147
# Sort training data to speed up training
148148
train_data = train_data.filtered_sorted(
@@ -153,7 +153,7 @@ def dataio_prepare(hparams):
153153

154154
valid_data = sb.dataio.dataset.DynamicItemDataset.from_json(
155155
json_path=hparams["valid_json"],
156-
replacements={"DATA_ROOT": hparams["data_folder"]},
156+
replacements={"data_root": hparams["data_folder"]},
157157
)
158158
# Sort validation data to speed up validation
159159
valid_data = valid_data.filtered_sorted(
@@ -164,7 +164,7 @@ def dataio_prepare(hparams):
164164

165165
test_data = sb.dataio.dataset.DynamicItemDataset.from_json(
166166
json_path=hparams["test_json"],
167-
replacements={"DATA_ROOT": hparams["data_folder"]},
167+
replacements={"data_root": hparams["data_folder"]},
168168
)
169169
# Sort the test data to speed up testing
170170
test_data = test_data.filtered_sorted(

recipes/Voicebank/dereverb/MetricGAN-U/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ def write_wavs(self, batch_id, wavs, score, lens):
401401
for i, (name, pred_wav, length) in enumerate(zip(batch_id, wavs, lens)):
402402
path = os.path.join(self.hparams.MetricGAN_folder, name + ".wav")
403403
data = torch.unsqueeze(pred_wav[: int(length)].cpu(), 0)
404-
torchaudio.save(path, data, self.hparams.Sample_rate)
404+
torchaudio.save(path, data.detach(), self.hparams.Sample_rate)
405405

406406
# Make record of path and score for historical training
407407
score = float(score[i][0])

recipes/Voicebank/enhance/MetricGAN-U/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ def write_wavs(self, batch_id, wavs, score, lens):
394394
for i, (name, pred_wav, length) in enumerate(zip(batch_id, wavs, lens)):
395395
path = os.path.join(self.hparams.MetricGAN_folder, name + ".wav")
396396
data = torch.unsqueeze(pred_wav[: int(length)].cpu(), 0)
397-
torchaudio.save(path, data, self.hparams.Sample_rate)
397+
torchaudio.save(path, data.detach(), self.hparams.Sample_rate)
398398

399399
# Make record of path and score for historical training
400400
score = float(score[i][0])

recipes/Voicebank/enhance/MetricGAN/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def write_wavs(self, batch_id, wavs, clean_paths, scores, lens):
280280
):
281281
path = os.path.join(self.hparams.MetricGAN_folder, name + ".wav")
282282
data = torch.unsqueeze(pred_wav[: int(length)].cpu(), 0)
283-
torchaudio.save(path, data, self.hparams.Sample_rate)
283+
torchaudio.save(path, data.detach(), self.hparams.Sample_rate)
284284

285285
# Make record of path and score for historical training
286286
score = float(scores[i][0])

recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ asr_model: !apply:speechbrain.inference.ASR.EncoderDecoderASR.from_hparams
7979
embedding_dim: 128
8080
dropout: 0.
8181
rnn_layers: 2
82-
rnn_neurons: 2048
82+
rnn_neurons: 1024
8383
dnn_blocks: 1
84-
dnn_neurons: 512
84+
dnn_neurons: 1024
8585
return_hidden: True
8686
pretrainer:
8787
paths:

recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,9 @@ asr_model: !apply:speechbrain.inference.ASR.EncoderDecoderASR.from_hparams
137137
embedding_dim: 128
138138
dropout: 0.
139139
rnn_layers: 2
140-
rnn_neurons: 2048
140+
rnn_neurons: 1024
141141
dnn_blocks: 1
142-
dnn_neurons: 512
142+
dnn_neurons: 1024
143143
return_hidden: True
144144
pretrainer:
145145
paths:

tests/recipes/GigaSpeech.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
Task,Dataset,Script_file,Hparam_file,Data_prep_file,Readme_file,Result_url,HF_repo,test_debug_flags,test_debug_checks,performance
22
ASR-CTC,GigaSpeech,recipes/GigaSpeech/ASR/CTC/train_with_wavlm.py,recipes/GigaSpeech/ASR/CTC/hparams/train_hf_wavlm.yaml,recipes/GigaSpeech/ASR/CTC/gigaspeech_prepare.py,recipes/GigaSpeech/ASR/CTC/README.md,,,--data_folder=tests/samples/ASR/ --train_csv=tests/samples/annotation/ASR_train.csv --valid_csv=tests/samples/annotation/ASR_train.csv --test_csv=tests/samples/annotation/ASR_train.csv --number_of_epochs=1 --skip_prep=True --wav2vec2_folder=tests/tmp/wav2vec2_checkpoint,
3-
ASR-Transducers,GigaSpeech,recipes/GigaSpeech/ASR/transducer/train.py,recipes/GigaSpeech/ASR/transducer/hparams/conformer_transducer.yaml,recipes/GigaSpeech/ASR/transducer/gigaspeech_prepare.py,recipes/GigaSpeech/ASR/transducer/README.md,,,--data_folder=tests/samples/ASR/ --train_csv=tests/samples/annotation/ASR_train.csv --valid_csv=tests/samples/annotation/ASR_train.csv --test_csv=tests/samples/annotation/ASR_train.csv --number_of_epochs=1 --skip_prep=True,
3+
ASR-Transducers,GigaSpeech,recipes/GigaSpeech/ASR/transducer/train.py,recipes/GigaSpeech/ASR/transducer/hparams/conformer_transducer.yaml,recipes/GigaSpeech/ASR/transducer/gigaspeech_prepare.py,recipes/GigaSpeech/ASR/transducer/README.md,,,--data_folder=tests/samples/ASR/ --train_csv=tests/samples/annotation/ASR_train.csv --valid_csv=tests/samples/annotation/ASR_train.csv --test_csv=tests/samples/annotation/ASR_train.csv --number_of_epochs=1 --output_neurons=29 --skip_prep=True --use_torchaudio=True,

tests/recipes/LJSpeech.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ TTS,LJSpeech,recipes/LJSpeech/TTS/tacotron2/train.py,recipes/LJSpeech/TTS/tacotr
55
TTS,LJSpeech,recipes/LJSpeech/TTS/vocoder/hifigan/train.py,recipes/LJSpeech/TTS/vocoder/hifigan/hparams/train.yaml,recipes/LJSpeech/ljspeech_prepare.py,recipes/LJSpeech/TTS/README.md,https://www.dropbox.com/sh/m2xrdssiroipn8g/AAD-TqPYLrSg6eNxUkcImeg4a?dl=0,https://huggingface.co/speechbrain/tts-hifigan-ljspeech,--epochs=2 --data_folder=tests/samples/ASR --train_json=tests/samples/annotation/ASR_train.json --valid_json=tests/samples/annotation/ASR_dev.json --test_json=tests/samples/annotation/ASR_dev.json --skip_prep=True --sample_rate=16000,"file_exists=[train_log.txt,log.txt,env.log,train.py,hyperparams.yaml,samples/1/synthesized.wav,samples/1/target.wav,samples/2/synthesized.wav,samples/2/target.wav]"
66
TTS,LJSpeech,recipes/LJSpeech/TTS/vocoder/diffwave/train.py,recipes/LJSpeech/TTS/vocoder/diffwave/hparams/train.yaml,recipes/LJSpeech/ljspeech_prepare.py,recipes/LJSpeech/TTS/README.md,,,--number_of_epochs=2 --data_folder=tests/samples/ASR --train_json=tests/samples/annotation/ASR_train.json --valid_json=tests/samples/annotation/ASR_dev.json --test_json=tests/samples/annotation/ASR_dev.json --skip_prep=True --sample_rate=16000 --num_workers 0,"file_exists=[train_log.txt,log.txt,env.log,train.py,hyperparams.yaml]"
77
TTS,LJSpeech,recipes/LJSpeech/TTS/vocoder/hifigan_discrete/train.py,recipes/LJSpeech/TTS/vocoder/hifigan_discrete/hparams/train.yaml,recipes/LJSpeech/ljspeech_prepare.py,recipes/LJSpeech/TTS/README.md,,,--batch_size=2 --epochs=2 --data_folder=tests/samples/TTS --train_json=tests/samples/annotation/TTS_train.json --valid_json=tests/samples/annotation/TTS_train.json --test_json=tests/samples/annotation/TTS_train.json --skip_prep=True --sample_rate=16000 --codes_save_folder=tests/samples/TTS/codes --skip_extract=True,"file_exists=[train_log.txt,log.txt,env.log,train.py,hyperparams.yaml,samples/1/synthesized.wav,samples/1/target.wav,samples/2/synthesized.wav,samples/2/target.wav]"
8-
quantization,LJSpeech,recipes/LJSpeech/quantization/train.py,recipes/LJSpeech/quantization/hparams/train_discrete_ssl.yaml,recipes/LJSpeech/quantization/ljspeech_prepare.py,recipes/LJSpeech/quantization/README.md,,https://huggingface.co/speechbrain/SSL_Quantization,--data_folder=tests/samples/TTS --train_json=tests/samples/annotation/TTS_train.json --valid_json=tests/samples/annotation/TTS_train.json --test_json=tests/samples/annotation/TTS_train.json --skip_prep=True,"file_exists=[train_log.txt,train.py,log.txt,env.log,hyperparams.yaml]"
8+
quantization,LJSpeech,recipes/LJSpeech/quantization/train.py,recipes/LJSpeech/quantization/hparams/train_discrete_ssl.yaml,recipes/LJSpeech/quantization/ljspeech_prepare.py,recipes/LJSpeech/quantization/README.md,,https://huggingface.co/speechbrain/SSL_Quantization,--data_folder=tests/samples/TTS --train_json=tests/samples/annotation/TTS_train.json --valid_json=tests/samples/annotation/TTS_train.json --test_json=tests/samples/annotation/TTS_train.json --skip_prep=True --n_clusters 10 --kmeans_batch_size 10,"file_exists=[train_log.txt,train.py,log.txt,env.log,hyperparams.yaml]"

tests/recipes/LibriSpeech.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,4 @@ ASR-CTC,LibriSpeech,recipes/LibriSpeech/ASR/CTC/train.py,recipes/LibriSpeech/ASR
4545
ASR-CTC,LibriSpeech,recipes/LibriSpeech/ASR/CTC/train.py,recipes/LibriSpeech/ASR/CTC/hparams/branchformer_large.yaml,recipes/LibriSpeech/ASR/CTC/librispeech_prepare.py,recipes/LibriSpeech/ASR/CTC/README.md,,,--data_folder=tests/samples/ASR/ --train_csv=tests/samples/annotation/ASR_train.csv --valid_csv=tests/samples/annotation/ASR_train.csv --test_csv=[tests/samples/annotation/ASR_train.csv] --number_of_epochs=2 --skip_prep=True --output_neurons=21,"file_exists=[env.log,hyperparams.yaml,log.txt,train_log.txt,train.py,wer_ASR_train.txt,save/21_char.model,save/21_char.vocab]",
4646
ASR-CTC,LibriSpeech,recipes/LibriSpeech/ASR/CTC/train_with_wav2vec_k2.py,recipes/LibriSpeech/ASR/CTC/hparams/train_hf_wav2vec_k2.yaml,recipes/LibriSpeech/ASR/CTC/librispeech_prepare.py,recipes/LibriSpeech/ASR/CTC/README.md,,,--data_folder=tests/samples/ASR/ --skip_prep=True --train_csv=tests/samples/annotation/ASR_train.csv --valid_csv=tests/samples/annotation/ASR_train.csv --test_csv=[tests/samples/annotation/ASR_train.csv] --number_of_epochs=2,"file_exists=[metric_ASR_train/wer_HL_1best.txt,train_log.txt,log.txt,train_with_wav2vec_k2.py,env.log,hyperparams.yaml]",
4747
LM,LibriSpeech,recipes/LibriSpeech/LM/train_ngram.py,recipes/LibriSpeech/LM/hparams/train_ngram.yaml,recipes/LibriSpeech/LM/librispeech_prepare.py,recipes/LibriSpeech/LM/README.md,,,--data_folder=tests/samples/ASR/ --skip_prep=True --train_csv=tests/samples/annotation/ASR_train.csv,"file_exists=[env.log,hyperparams.yaml,log.txt,lang/words.txt,libri_lm_corpus.txt,train_ngram.py]",
48-
quantization,LibriSpeech,recipes/LibriSpeech/quantization/train.py,recipes/LibriSpeech/quantization/hparams/train_discrete_ssl.yaml,recipes/LibriSpeech/quantization/librispeech_prepare.py,recipes/LibriSpeech/quantization/README.md,,,--data_folder=tests/samples/ASR/ --train_csv=tests/samples/annotation/ASR_train.csv --valid_csv=tests/samples/annotation/ASR_train.csv --test_csv=tests/samples/annotation/ASR_train.csv --skip_prep=True,"file_exists=[train_log.txt,train.py,log.txt,env.log,hyperparams.yaml]",
48+
quantization,LibriSpeech,recipes/LibriSpeech/quantization/train.py,recipes/LibriSpeech/quantization/hparams/train_discrete_ssl.yaml,recipes/LibriSpeech/quantization/librispeech_prepare.py,recipes/LibriSpeech/quantization/README.md,,,--data_folder=tests/samples/ASR/ --train_csv=tests/samples/annotation/ASR_train.csv --valid_csv=tests/samples/annotation/ASR_train.csv --test_csv=tests/samples/annotation/ASR_train.csv --skip_prep=True --n_clusters 10 --kmeans_batch_size 10,"file_exists=[train_log.txt,train.py,log.txt,env.log,hyperparams.yaml]",

0 commit comments

Comments
 (0)