fpaissan
diff --git a/‎.github/workflows/pythonapp.yml‎
Lines changed: 12 additions & 14 deletions b/‎.github/workflows/pythonapp.yml‎
Lines changed: 12 additions & 14 deletions
diff --git a/‎.github/workflows/verify-docs-gen.yml‎
Lines changed: 11 additions & 8 deletions b/‎.github/workflows/verify-docs-gen.yml‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎conftest.py‎
Lines changed: 1 addition & 7 deletions b/‎conftest.py‎
Lines changed: 1 addition & 7 deletions
diff --git a/‎recipes/Aishell1Mix/separation/train.py‎
Lines changed: 2 additions & 2 deletions b/‎recipes/Aishell1Mix/separation/train.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎recipes/BinauralWSJ0Mix/separation/train.py‎
Lines changed: 2 additions & 2 deletions b/‎recipes/BinauralWSJ0Mix/separation/train.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎recipes/CVSS/S2ST/hparams/train_fr-en.yaml‎
Lines changed: 1 addition & 1 deletion b/‎recipes/CVSS/S2ST/hparams/train_fr-en.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎recipes/CommonVoice/ASR/transformer/README.md‎
Lines changed: 28 additions & 24 deletions b/‎recipes/CommonVoice/ASR/transformer/README.md‎
Lines changed: 28 additions & 24 deletions
@@ -17,10 +17,21 @@ jobs:
                 python-version: [3.8, 3.12]
         steps:
             - uses: actions/checkout@v2
+            - uses: actions/cache@v4
+              id: cache-uv
+              with:
+                  path: ~/.cache/uv
+                  key: ${{ runner.os }}-python-${{ matrix.python-version }}-uv
             - name: Set up Python ${{ matrix.python-version }}
-              uses: actions/setup-python@v1
+              uses: actions/setup-python@v5
               with:
                   python-version: ${{ matrix.python-version }}
+            - name: Full dependencies
+              run: |
+                  pip install uv
+                  uv pip install --system ctc-segmentation  # ctc-segmentation is funky with uv due to their oldest-supported-numpy dependency
+                  uv pip install --system -r requirements.txt torch==2.2.1+cpu torchaudio==2.2.1+cpu --extra-index-url https://download.pytorch.org/whl/cpu k2==1.24.4.dev20240223+cpu.torch2.2.1 --find-links https://k2-fsa.github.io/k2/cpu.html kaldilm==1.15.1 spacy==3.7.4 flair==0.13.1
+                  uv pip install --system --editable . --no-deps  # already installed pinned deps from requirements.txt, we're good
             - name: Install sox
               run: |
                   sudo apt-get update
@@ -33,19 +44,6 @@ jobs:
             #      sudo apt-get install -y ffmpeg
             - name: Display Python version
               run: python -c "import sys; print(sys.version)"
-            - name: Full dependencies
-              run: |
-                  sudo apt-get update
-                  # up to k2 compatible torch version
-                  pip install torch==2.2.1 torchaudio==2.2.1
-                  pip install -r requirements.txt
-                  pip install --editable .
-                  pip install ctc-segmentation
-                  pip install k2==1.24.4.dev20240223+cpu.torch2.2.1 -f https://k2-fsa.github.io/k2/cpu.html
-                  pip install protobuf
-                  pip install kaldilm==1.15.1
-                  pip install spacy==3.7.4
-                  pip install flair==0.13.1
             - name: Consistency tests with pytest
               run: |
                   pytest tests/consistency
 
@@ -11,19 +11,22 @@ jobs:
         runs-on: ubuntu-latest
         steps:
             - uses: actions/checkout@v2
+            - uses: actions/cache@v4
+              id: cache-uv
+              with:
+                  path: ~/.cache/uv
+                  key: ${{ runner.os }}-python-docs-uv
             - name: Setup Python 3.8
-              uses: actions/setup-python@v2
+              uses: actions/setup-python@v5
               with:
                   python-version: '3.8'
             - name: Full dependencies
               run: |
-                  # up to k2 compatible torch version
-                  pip install torch==2.1.2 torchaudio==2.1.2
-                  pip install -r requirements.txt
-                  pip install --editable .
-                  pip install -r docs/docs-requirements.txt
-                  pip install k2==1.24.4.dev20231220+cpu.torch2.1.2 -f https://k2-fsa.github.io/k2/cpu.html
+                  pip install uv
+                  uv pip install --system ctc-segmentation  # ctc-segmentation is funky with uv due to their oldest-supported-numpy dependency
+                  uv pip install --system -r requirements.txt -r docs/docs-requirements.txt torch==2.2.1+cpu torchaudio==2.2.1+cpu --extra-index-url https://download.pytorch.org/whl/cpu k2==1.24.4.dev20240223+cpu.torch2.2.1 --find-links https://k2-fsa.github.io/k2/cpu.html kaldilm==1.15.1 spacy==3.7.4 flair==0.13.1
+                  uv pip install --system --editable . --no-deps  # already installed pinned deps from requirements.txt, we're good
             - name: Generate docs
               run: |
                   cd docs
-                  make html
+                  SPHINXOPTS="-j=auto" make html
@@ -40,13 +40,7 @@ def pytest_generate_tests(metafunc):
 except ModuleNotFoundError:
     collect_ignore.append("speechbrain/utils/kmeans.py")
     collect_ignore.append(
-        "speechbrain/lobes/models/huggingface_transformers/discrete_hubert.py"
-    )
-    collect_ignore.append(
-        "speechbrain/lobes/models/huggingface_transformers/discrete_wav2vec2.py"
-    )
-    collect_ignore.append(
-        "speechbrain/lobes/models/huggingface_transformers/discrete_wavlm.py"
+        "speechbrain/lobes/models/huggingface_transformers/discrete_ssl.py"
     )
 try:
     import peft  # noqa: F401
 
@@ -165,7 +165,7 @@ def fit_batch(self, batch):
                             self.nonfinite_count
                         )
                     )
-                    loss.data = torch.tensor(0).to(self.device)
+                    loss.data = torch.tensor(0.0).to(self.device)
             else:
                 predictions, targets = self.compute_forward(
                     mixture, targets, sb.Stage.TRAIN, noise
@@ -197,7 +197,7 @@ def fit_batch(self, batch):
                             self.nonfinite_count
                         )
                     )
-                    loss.data = torch.tensor(0).to(self.device)
+                    loss.data = torch.tensor(0.0).to(self.device)
         self.optimizer.zero_grad()
 
         return loss.detach().cpu()
 
@@ -253,7 +253,7 @@ def fit_batch(self, batch):
                             self.nonfinite_count
                         )
                     )
-                    loss.data = torch.tensor(0).to(self.device)
+                    loss.data = torch.tensor(0.0).to(self.device)
             else:
                 predictions, targets = self.compute_forward(
                     mixture, targets, sb.Stage.TRAIN, noise
@@ -285,7 +285,7 @@ def fit_batch(self, batch):
                             self.nonfinite_count
                         )
                     )
-                    loss.data = torch.tensor(0).to(self.device)
+                    loss.data = torch.tensor(0.0).to(self.device)
         self.optimizer.zero_grad()
 
         return loss.detach().cpu()
 
@@ -203,7 +203,7 @@ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
     save_file: !ref <train_log>
 
-valid_search: !new:speechbrain.decoders.seq2seq.S2STransformerGreedySearch
+valid_search: !new:speechbrain.decoders.seq2seq.S2STransformerGreedySearcher
     modules: [!ref <transformer>, !ref <seq_lin>, null]
     bos_index: !ref <bos_index>
     eos_index: !ref <eos_index>
 
@@ -21,9 +21,8 @@ It is important to note that CommonVoice initially offers mp3 audio files at 42H
 # Languages
 Here is a list of the different languages that we tested within the CommonVoice dataset
 with our transformers:
-- French
 - Italian
-- German
+- French
 
 For Whisper-large-v2 and medium finetuning, here is list of the different language that we tested  within the CommonVoice.14_0 dataset:
 - Hindi
@@ -36,30 +35,35 @@ For Whisper-large-v2 and medium finetuning, here is list of the different langua
 
 
 # Results
-
-| Language | Release | hyperparams file | LM | Val. CER | Val. WER | Test CER | Test WER | Hugging Face link |  Model link | GPUs |
+## Transformer
+| Language | CV version | hyperparams file |  LM | Val. CER | Val. WER | Test CER | Test WER | Hugging Face link |  Model link | GPUs |
 | ------------- |:-------------:|:---------------------------:| -----:| -----:| -----:| -----:| -----:|:-----------:| :-----------:| :-----------:|
-| French | 2023-08-15 | train_fr.yaml | No | 5.41 | 16.00 | 5.41 | 17.61 | - | [model](https://www.dropbox.com/sh/zvu9h9pctksnuvp/AAD1kyS3-N0YtmcoMgjM-_Tba?dl=0) | 1xV100 32GB |
-| Italian | 2023-08-15 | train_it.yaml | No | 3.72 | 16.31 | 4.01 | 16.80 | - | [model](https://www.dropbox.com/sh/yy8du12jgbkm3qe/AACBHhTCM-cU-oGvAKJ9kTtaa?dl=0) | 1xV100 32GB |
-| German | 2023-08-15 | train_de.yaml | No | 3.60 | 15.33 | 4.22 | 16.76 |- | [model](https://www.dropbox.com/sh/umfq986o3d9o1px/AAARNF2BFYELOWx3xhIOEoZka?dl=0) | 1xV100 32GB |
+| Italian | 14.0 | conformer_large.yaml | No | 2.91 | 9.79 | 2.68 | 9.27 | - | [model](https://www.dropbox.com/scl/fo/tf44itp8f4icf2z5qlxpm/AIOYS_CMov5ss5Q9AonFEno?rlkey=xek5ikbhqoovcao31iniqimrr&dl=0) | 2xV100 32GB |
+| French | 14.0 | conformer_large.yaml | No | 2.64 | 7.62 | 3.55 | 9.48 | - | [model](https://www.dropbox.com/scl/fo/y862nl95zoe4sj3347095/ACxmT3_uw1ScLoYs0DSbGRM?rlkey=q66dk13w5nu1lkphtdinnnigm&dl=0) | 2xV100 32GB |
+
 
-## Whisper Finetuning Result:
-Following table contains whisper-finetuning results for 1 epoch using whisper_medium model, freezing encoder and finetuning decoder.
-| Language | Release | Model | hyperparams file | LM | Val. CER | Val. WER | Test CER | Test WER | HuggingFace link | Model link | GPUs |
-| ------------- |:-------------:| -----:|:---------------------------:| -----:| -----:| -----:| -----:| -----:| :-----------: |:-----------:| :-----------:|
-| Arabic | 2023-08-15 | large-v2 | train_ar_hf_whisper.yaml | No | 4.02 | 12.47 | 5.20 | 16.96 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-ar) | [model](https://www.dropbox.com/sh/45o3xkxdheksdfi/AAAs1zxCw76mcAbudYEonzg0a?dl=0) | 1xV100 16GB |
-| Persian | 2023-08-15 | large-v2 | train_fa_hf_whisper.yaml | No | 6.91 | 25.30 | 9.38 | 31.75 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-fa) | [model](https://www.dropbox.com/sh/a2vd6nn0icybdcz/AAC7z41jcheW1R9aNNK4-lHha?dl=0) | 1xV100 16GB |
-| Mongolian | 2023-08-15 | large-v2 | train_mn_hf_whisper.yaml | No | 24.05 | 62.37 | 25.73 | 64.92 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-mn) | [model](https://www.dropbox.com/sh/2t0srpb2nt2wst5/AACRJQCwooRaLxPoLkmTvKq8a?dl=0) | 1xV100 16GB |
-| Hindi | 2023-08-15 | large-v2 | train_hi_hf_whisper.yaml | No | 4.54 | 10.46 | 7.00 | 15.27 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-hi) | [model](https://www.dropbox.com/sh/qkcm86bzzb1y4sj/AABjA_ckw_hPwJCBzUiXLWrBa?dl=0) | 1xV100 16GB |
-| Serbian | 2023-08-15 | large-v2 | train_sr_hf_whisper.yaml | No | 8.92 | 27.12 |  7.60 | 23.63 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-sr) | [model](https://www.dropbox.com/sh/a798gw3k2ezerp5/AADz7UxvQRQDOH4DnCJ4J4dja?dl=0) | 1xV100 16GB |
-| French | 2023-08-15 | large-v2 | train_fr_hf_whisper.yaml | No | 3.00 | 8.95 | 3.83 | 10.62 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-fr) | [model](https://www.dropbox.com/sh/8c2lpa7m5amasjz/AAD5AZlD6OslhFc0W81D3nosa?dl=0) | 1xV100 16GB |
-| Arabic | 2023-08-15 | Medium | train_ar_hf_whisper.yaml | No | 4.95 | 14.82 | 6.51 | 20.24 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-ar) | [model](https://www.dropbox.com/sh/0e4vtvbg6hf2e13/AAD-tfzCZGUrh85aeAeJj8I9a?dl=0) | 1xV100 16GB |
-| Persian | 2023-08-15 | Medium | train_fa_hf_whisper.yaml | No | 8.58 | 35.48 | 11.27 | 35.48 |[model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-fa) | [model](https://www.dropbox.com/sh/w1urihacmtoulmi/AADMtK3qeAF5mLYk5LMHyiOra?dl=0) | 1xV100 16GB |
-| Mongolian | 2023-08-15 | Medium | train_mn_hf_whisper.yaml | No |  27.08 |  67.41 | 27.69 | 67.84 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-mn) | [model](https://www.dropbox.com/sh/6fbhmey7q1udykf/AAAiGObWTTe2cdXHt2Uv2VQXa?dl=0) | 1xV100 16GB |
-| Hindi | 2023-08-15 | Medium | train_hi_hf_whisper.yaml | No | 5.82 | 12.51 | 8.16 | 17.04 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-hi) | [model](https://www.dropbox.com/sh/z9vriyy3i6xqvif/AAB7ql-40yWTjKEQJiuhYUr5a?dl=0) | 1xV100 16GB |
-| Serbian | 2023-08-15 | Medium | train_sr_hf_whisper.yaml | No | 8.63 | 25.10 |  7.25 | 22.29 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-sr) | [model](https://www.dropbox.com/sh/5lhk230q45sd97z/AAD-U9b_Ws_vFPs-cazsbOY0a?dl=0) | 1xV100 16GB |
-| French | 2023-08-15 | Medium | train_fr_hf_whisper.yaml | No | 3.26 | 9.65 | 4.30 | 11.79 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-fr) | [model](https://www.dropbox.com/sh/7zlk07yxnslk4yy/AAANcI3EaG0ZFy6UrKk1Mm2Ga?dl=0) | 1xV100 16GB |
-| Italian | 2023-08-15 | Medium | train_it_hf_whisper.yaml | No | 2.42 | 8.26 | 3.03 | 9.63 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-it) | [model](https://www.dropbox.com/sh/u5tex3nvzzs5pex/AAD-J7cOBE_fNfBono8waTKCa?dl=0) | 1xV100 16GB |
+## Whisper Finetuning
+Following table contains whisper-finetuning results for 1 epoch using Whisper model, freezing encoder and finetuning decoder.
+| Language | Release | Model | commit hash | hyperparams file | LM | Val. CER | Val. WER | Test CER | Test WER | HuggingFace link | Model link | GPUs |
+| ------------- |:-------------:| -----:|-----:|:---------------------------:| -----:| -----:| -----:| -----:| -----:| :-----------: |:-----------:| :-----------:|
+| French | 2024-03-28 | large-v3 | [e4e2e13](https://github.com/speechbrain/speechbrain/pull/2450/commits/e4e2e135e9edafc6a26fc9aa4df9a94eaf86de41) | train_hf_whisper.yaml | No | 2.31% | 7.38% | 3.11% | 9.09% | x | [DropBox](https://www.dropbox.com/scl/fo/erwh83bg2jbzf3bf8v6ur/AHmQ5i8uWRaieXCOe5DSRUk?rlkey=kjivz2hx3o1pi7wbzadjznpid&dl=0) | 2xV100 32GB |
+| Italian | 2024-03-28 | large-v3 | [e4e2e13](https://github.com/speechbrain/speechbrain/pull/2450/commits/e4e2e135e9edafc6a26fc9aa4df9a94eaf86de41) | train_hf_whisper.yaml | No | 1.27% | 4.85% | 1.62% | 5.47% | x | [DropBox](https://www.dropbox.com/scl/fo/gtfo3qoz1ceg4xg0dfq1d/AIabz2J9NxkNAEbGF7rHCHU?rlkey=eokq2a2z07ke48scazqnn5v73&dl=0) | 2xV100 32GB |
+| French | 2024-03-28 | medium | [e4e2e13](https://github.com/speechbrain/speechbrain/pull/2450/commits/e4e2e135e9edafc6a26fc9aa4df9a94eaf86de41) | train_hf_whisper.yaml | No | 2.92% | 8.90% | 4.02% | 11.07% | x | [DropBox](https://www.dropbox.com/scl/fo/72aiaflc9w6168rk9jv6u/AGIVW5ml74wZYED7HUFjX-U?rlkey=nz7eo6i6gbze7rwv8la6sxobx&dl=0) | 2xV100 32GB |
+| Italian | 2024-03-28 | medium | [e4e2e13](https://github.com/speechbrain/speechbrain/pull/2450/commits/e4e2e135e9edafc6a26fc9aa4df9a94eaf86de41) | train_hf_whisper.yaml | No | 2.05% | 7.17% | 2.31% | 7.79% | x | [DropBox](https://www.dropbox.com/scl/fo/sso9k4n6hma9cub44oi2p/AKINkGK0XMCYND-JrMQh4LQ?rlkey=gywsgxle4k473z9c7tf4l1m7n&dl=0) | 2xV100 32GB |
+| French | 2024-03-28 | small | [e4e2e13](https://github.com/speechbrain/speechbrain/pull/2450/commits/e4e2e135e9edafc6a26fc9aa4df9a94eaf86de41) | train_hf_whisper.yaml | No | 4.34% | 12.57% | 5.89% | 15.46% | x | [DropBox](https://www.dropbox.com/scl/fo/h8idsgzp8xz5vsupqv0q8/ACS13H9awYU2G7DeTcyxiV0?rlkey=bbqpx0lbf5aify6ib029g2gn0&dl=0) | 2xV100 32GB |
+| Italian | 2024-03-28 | small | [e4e2e13](https://github.com/speechbrain/speechbrain/pull/2450/commits/e4e2e135e9edafc6a26fc9aa4df9a94eaf86de41) | train_hf_whisper.yaml | No | 3.20% | 11.40% | 3.71% | 12.25% | x | [DropBox](https://www.dropbox.com/scl/fo/o4objjm5c65c5hzy1vvk4/ABXA2V1Gy1GCg7FGS6Ty9yc?rlkey=4kbjmmljdznvureyxfip5tw8q&dl=0) | 2xV100 32GB |
+| Arabic | 2023-08-15 | large-v2 | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) | train_ar_hf_whisper.yaml | No | 4.02 | 12.47 | 5.20 | 16.96 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-ar) | [model](https://www.dropbox.com/sh/45o3xkxdheksdfi/AAAs1zxCw76mcAbudYEonzg0a?dl=0) | 1xV100 16GB |
+| Persian | 2023-08-15 | large-v2 | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_fa_hf_whisper.yaml | No | 6.91 | 25.30 | 9.38 | 31.75 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-fa) | [model](https://www.dropbox.com/sh/a2vd6nn0icybdcz/AAC7z41jcheW1R9aNNK4-lHha?dl=0) | 1xV100 16GB |
+| Mongolian | 2023-08-15 | large-v2 | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_mn_hf_whisper.yaml | No | 24.05 | 62.37 | 25.73 | 64.92 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-mn) | [model](https://www.dropbox.com/sh/2t0srpb2nt2wst5/AACRJQCwooRaLxPoLkmTvKq8a?dl=0) | 1xV100 16GB |
+| Hindi | 2023-08-15 | large-v2 | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_hi_hf_whisper.yaml | No | 4.54 | 10.46 | 7.00 | 15.27 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-hi) | [model](https://www.dropbox.com/sh/qkcm86bzzb1y4sj/AABjA_ckw_hPwJCBzUiXLWrBa?dl=0) | 1xV100 16GB |
+| Serbian | 2023-08-15 | large-v2 | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_sr_hf_whisper.yaml | No | 8.92 | 27.12 |  7.60 | 23.63 | [model](https://huggingface.co/speechbrain/asr-whisper-large-v2-commonvoice-sr) | [model](https://www.dropbox.com/sh/a798gw3k2ezerp5/AADz7UxvQRQDOH4DnCJ4J4dja?dl=0) | 1xV100 16GB |
+| Arabic | 2023-08-15 | Medium | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_ar_hf_whisper.yaml | No | 4.95 | 14.82 | 6.51 | 20.24 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-ar) | [model](https://www.dropbox.com/sh/0e4vtvbg6hf2e13/AAD-tfzCZGUrh85aeAeJj8I9a?dl=0) | 1xV100 16GB |
+| Persian | 2023-08-15 | Medium | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_fa_hf_whisper.yaml | No | 8.58 | 35.48 | 11.27 | 35.48 |[model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-fa) | [model](https://www.dropbox.com/sh/w1urihacmtoulmi/AADMtK3qeAF5mLYk5LMHyiOra?dl=0) | 1xV100 16GB |
+| Mongolian | 2023-08-15 | Medium | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_mn_hf_whisper.yaml | No |  27.08 |  67.41 | 27.69 | 67.84 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-mn) | [model](https://www.dropbox.com/sh/6fbhmey7q1udykf/AAAiGObWTTe2cdXHt2Uv2VQXa?dl=0) | 1xV100 16GB |
+| Hindi | 2023-08-15 | Medium | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_hi_hf_whisper.yaml | No | 5.82 | 12.51 | 8.16 | 17.04 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-hi) | [model](https://www.dropbox.com/sh/z9vriyy3i6xqvif/AAB7ql-40yWTjKEQJiuhYUr5a?dl=0) | 1xV100 16GB |
+| Serbian | 2023-08-15 | Medium | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_sr_hf_whisper.yaml | No | 8.63 | 25.10 |  7.25 | 22.29 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-sr) | [model](https://www.dropbox.com/sh/5lhk230q45sd97z/AAD-U9b_Ws_vFPs-cazsbOY0a?dl=0) | 1xV100 16GB |
+| French | 2023-08-15 | Medium | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/commits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_fr_hf_whisper.yaml | No | 3.26 | 9.65 | 4.30 | 11.79 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-fr) | [model](https://www.dropbox.com/sh/7zlk07yxnslk4yy/AAANcI3EaG0ZFy6UrKk1Mm2Ga?dl=0) | 1xV100 16GB |
+| Italian | 2023-08-15 | Medium | [b112860](https://github.com/speechbrain/speechbrain/pull/2254/mcommits/b1128604e040d43e80e9a3214c5116f34d5806db) |train_it_hf_whisper.yaml | No | 2.42 | 8.26 | 3.03 | 9.63 | [model](https://huggingface.co/speechbrain/asr-whisper-medium-commonvoice-it) | [model](https://www.dropbox.com/sh/u5tex3nvzzs5pex/AAD-J7cOBE_fNfBono8waTKCa?dl=0) | 1xV100 16GB |
 
 # **About SpeechBrain**
 - Website: https://speechbrain.github.io/
Original file line number	Diff line number	Diff line change
`@@ -165,7 +165,7 @@ def fit_batch(self, batch):`
`165`	`165`	`self.nonfinite_count`
`166`	`166`	`)`
`167`	`167`	`)`
`168`		`- loss.data = torch.tensor(0).to(self.device)`
	`168`	`+ loss.data = torch.tensor(0.0).to(self.device)`
`169`	`169`	`else:`
`170`	`170`	`predictions, targets = self.compute_forward(`
`171`	`171`	`mixture, targets, sb.Stage.TRAIN, noise`
`@@ -197,7 +197,7 @@ def fit_batch(self, batch):`
`197`	`197`	`self.nonfinite_count`
`198`	`198`	`)`
`199`	`199`	`)`
`200`		`- loss.data = torch.tensor(0).to(self.device)`
	`200`	`+ loss.data = torch.tensor(0.0).to(self.device)`
`201`	`201`	`self.optimizer.zero_grad()`
`202`	`202`
`203`	`203`	`return loss.detach().cpu()`
Original file line number	Diff line number	Diff line change
`@@ -253,7 +253,7 @@ def fit_batch(self, batch):`
`253`	`253`	`self.nonfinite_count`
`254`	`254`	`)`
`255`	`255`	`)`
`256`		`- loss.data = torch.tensor(0).to(self.device)`
	`256`	`+ loss.data = torch.tensor(0.0).to(self.device)`
`257`	`257`	`else:`
`258`	`258`	`predictions, targets = self.compute_forward(`
`259`	`259`	`mixture, targets, sb.Stage.TRAIN, noise`
`@@ -285,7 +285,7 @@ def fit_batch(self, batch):`
`285`	`285`	`self.nonfinite_count`
`286`	`286`	`)`
`287`	`287`	`)`
`288`		`- loss.data = torch.tensor(0).to(self.device)`
	`288`	`+ loss.data = torch.tensor(0.0).to(self.device)`
`289`	`289`	`self.optimizer.zero_grad()`
`290`	`290`
`291`	`291`	`return loss.detach().cpu()`