speechbrain
diff --git a/‎.github/workflows/pre-commit.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pre-commit.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/verify-docs-gen.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/verify-docs-gen.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/conf.py‎
Lines changed: 1 addition & 14 deletions b/‎docs/conf.py‎
Lines changed: 1 addition & 14 deletions
diff --git a/‎recipes/LibriSpeech/ASR/transducer/README.md‎
Lines changed: 12 additions & 0 deletions b/‎recipes/LibriSpeech/ASR/transducer/README.md‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml‎
Lines changed: 21 additions & 1 deletion b/‎recipes/LibriSpeech/ASR/transducer/hparams/conformer_transducer.yaml‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎speechbrain/decoders/transducer.py‎
Lines changed: 35 additions & 0 deletions b/‎speechbrain/decoders/transducer.py‎
Lines changed: 35 additions & 0 deletions
@@ -12,5 +12,5 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v2
         with:
-          python-version: '3.8'
+          python-version: '3.9'
       - uses: pre-commit/action@v2.0.3
@@ -17,7 +17,7 @@ jobs:
           ref: main
       - uses: actions/setup-python@v2
         with:
-          python-version: 3.8
+          python-version: 3.9
       - name: Install pypa/build
         run: python -m pip install build --user
       - name: Build binary wheel and source tarball
 
@@ -11,10 +11,10 @@ jobs:
         runs-on: ubuntu-latest
         steps:
             - uses: actions/checkout@v2
-            - name: Setup Python 3.8
+            - name: Setup Python 3.9
               uses: actions/setup-python@v2
               with:
-                  python-version: '3.8'
+                  python-version: '3.9'
             - name: Full dependencies
               run: |
                   # up to k2 compatible torch version
 
@@ -97,20 +97,6 @@ def run_apidoc(app):
     import better_apidoc
 
     better_apidoc.APP = app
-
-    better_apidoc.main(
-        [
-            "better-apidoc",
-            "-t",
-            "_apidoc_templates",
-            "--force",
-            "--no-toc",
-            "--separate",
-            "-o",
-            "API",
-            os.path.dirname(hyperpyyaml.__file__),
-        ]
-    )
     better_apidoc.main(
         [
             "better-apidoc",
@@ -122,6 +108,7 @@ def run_apidoc(app):
             "-o",
             "API",
             os.path.join("../", "speechbrain"),
+            os.path.dirname(hyperpyyaml.__file__),
         ]
     )
 
 
@@ -64,6 +64,18 @@ may end up forming indirect dependencies to audio many seconds ago.
 | 4     | -    | 3.12% | 3.13% | 3.37% | 3.51% | 3.80% |
 | 2     | -    | 3.19% | 3.24% | 3.50% | 3.79% | 4.38% |
 
+### Inference
+
+Once your model is trained, you need a few manual steps in order to use it with the high-level streaming interfaces (`speechbrain.inference.ASR.StreamingASR`):
+
+1. Create a new directory where you want to store the model.
+2. Copy `results/conformer_transducer/<seed>/lm.ckpt` (optional; currently, for streaming rescoring LMs might be unsupported) and `tokenizer.ckpt` to that directory.
+3. Copy `results/conformer_transducer/<seed>/save/CKPT+????/model.ckpt` and `normalizer.ckpt` to that directory.
+4. Copy your hyperparameters file to that directory. Uncomment the streaming specific keys and remove any training-specific keys. Alternatively, grab the inference hyperparameters YAML for this model from HuggingFace and adapt it to any changes you may have done.
+5. You can now instantiate a `StreamingASR` with your model using `StreamingASR.from_hparams("/path/to/model/")`.
+
+The contents of that directory may be uploaded as a HuggingFace model, in which case the model source path can just be specified as `youruser/yourmodel`.
+
 # **About SpeechBrain**
 - Website: https://speechbrain.github.io/
 - Code: https://github.com/speechbrain/speechbrain/
 
@@ -10,7 +10,7 @@
 
 # Seed needs to be set at top of yaml, before objects with parameters are made
 seed: 3407
-__set_seed: !!python/object/apply:torch.manual_seed [!ref <seed>]
+__set_seed: !apply:torch.manual_seed [!ref <seed>]
 output_folder: !ref results/conformer_transducer_large/<seed>
 output_wer_folder: !ref <output_folder>/
 save_folder: !ref <output_folder>/save
@@ -399,3 +399,23 @@ error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
 
 cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
    split_tokens: True
+
+# for the inference hparams, you will need to include and uncomment something like this:
+
+# make_tokenizer_streaming_context: !name:speechbrain.tokenizers.SentencePiece.SentencePieceDecoderStreamingContext
+# tokenizer_decode_streaming: !name:speechbrain.tokenizers.SentencePiece.spm_decode_preserve_leading_space
+
+# make_decoder_streaming_context: !name:speechbrain.decoders.transducer.TransducerGreedySearcherStreamingContext # default constructor
+# decoding_function: !name:speechbrain.decoders.transducer.TransducerBeamSearcher.transducer_greedy_decode_streaming
+#    - !ref <Greedysearcher>  # self
+
+# fea_streaming_extractor: !new:speechbrain.lobes.features.StreamingFeatureWrapper
+#    module: !new:speechbrain.nnet.containers.LengthsCapableSequential
+#       - !ref <compute_features>
+#       - !ref <normalize>
+#       - !ref <CNN>
+#    # don't consider normalization as part of the input filter chain.
+#    # normalization will operate at chunk level, which mismatches training
+#    # somewhat, but does not appear to result in noticeable degradation.
+#    properties: !apply:speechbrain.utils.filter_analysis.stack_filter_properties
+#       - [!ref <compute_features>, !ref <CNN>]
@@ -5,7 +5,19 @@
     Sung-Lin Yeh 2020
 """
 import torch
+from dataclasses import dataclass
 from functools import partial
+from typing import Optional, Any
+
+
+@dataclass
+class TransducerGreedySearcherStreamingContext(torch.nn.Module):
+    """Simple wrapper for the hidden state of the transducer greedy searcher.
+    Used by :meth:`~TransducerBeamSearcher.transducer_greedy_decode_streaming`.
+    """
+
+    hidden: Optional[Any] = None
+    """Hidden state; typically a tensor or a tuple of tensors."""
 
 
 class TransducerBeamSearcher(torch.nn.Module):
@@ -255,6 +267,29 @@ def transducer_greedy_decode(
 
         return ret
 
+    def transducer_greedy_decode_streaming(
+        self, x: torch.Tensor, context: TransducerGreedySearcherStreamingContext
+    ):
+        """Tiny wrapper for
+        :meth:`~TransducerBeamSearcher.transducer_greedy_decode` with an API
+        that makes it suitable to be passed as a `decoding_function` for
+        streaming.
+
+        Arguments
+        ---------
+        x : torch.Tensor
+            Outputs of the prediction network (equivalent to `tn_output`)
+        context : TransducerGreedySearcherStreamingContext
+            Mutable streaming context object, which must be specified and reused
+            across calls when streaming.
+            You can obtain an initial context by initializing a default object.
+        """
+        (hyp, _scores, _, _, hidden) = self.transducer_greedy_decode(
+            x, context.hidden, return_hidden=True
+        )
+        context.hidden = hidden
+        return hyp
+
     def transducer_beam_search_decode(self, tn_output):
         """Transducer beam search decoder is a beam search decoder over batch which apply Transducer rules:
             1- for each utterance: