speechbrain
diff --git a/‎speechbrain/alignment/aligner.py‎
Lines changed: 16 additions & 16 deletions b/‎speechbrain/alignment/aligner.py‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎speechbrain/core.py‎
Lines changed: 2 additions & 2 deletions b/‎speechbrain/core.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎speechbrain/dataio/batch.py‎
Lines changed: 1 addition & 1 deletion b/‎speechbrain/dataio/batch.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎speechbrain/dataio/dataio.py‎
Lines changed: 35 additions & 88 deletions b/‎speechbrain/dataio/dataio.py‎
Lines changed: 35 additions & 88 deletions
@@ -38,10 +38,10 @@ class HMMAligner(torch.nn.Module):
         What kind of batch-level reduction to apply to the loss calculated
         in the forward method
     input_len_norm: bool
-        Whether to normalise the loss in the forward method by the length of
+        Whether to normalize the loss in the forward method by the length of
         the inputs.
     target_len_norm: bool
-        Whether to normalise the loss in the forward method by the length of
+        Whether to normalize the loss in the forward method by the length of
         the targets.
     lexicon_path: string
         The location of the lexicon.
@@ -139,7 +139,7 @@ def __init__(
     def _use_lexicon(self, words, interword_sils, sample_pron):
         """
         Do processing using the lexicon to return a sequence of the possible
-        phonemes, the transition/pi probabilities and the possible final states
+        phonemes, the transition/pi probabilities, and the possible final states
         Inputs correspond to a single utterance, not a whole batch
 
         Arguments
@@ -298,7 +298,7 @@ def use_lexicon(
     ):
         """
         Do processing using the lexicon to return a sequence of the possible
-        phonemes, the transition/pi probabilities and the possible final
+        phonemes, the transition/pi probabilities, and the possible final
         states.
         Does processing on an utterance-by-utterance basis. Each utterance
         in the batch is processed by a helper method `_use_lexicon`.
@@ -456,7 +456,7 @@ def use_lexicon(
     def _make_pi_prob(self, phn_lens_abs):
         """
         Creates tensor of initial (log) probabilities (known as 'pi').
-        Assigns all probability mass to first phoneme in the sequence.
+        Assigns all probability mass to the first phoneme in the sequence.
 
         Arguments
         ---------
@@ -548,7 +548,7 @@ def _make_emiss_pred_useful(
     ):
         """
         Creates a 'useful' form of the posterior probabilities, rearranged
-        into order of phoneme appearance in phns.
+        into the order of phoneme appearance in phns.
 
         Arguments
         ---------
@@ -623,7 +623,7 @@ def _dp_forward(
 
         emiss_pred_useful: torch.Tensor (batch, phoneme in phn sequence, time)
             A 'useful' form of the posterior probabilities, rearranged
-            into order of phoneme appearance in phns.
+            into the order of phoneme appearance in phns.
 
         lens_abs: torch.Tensor (batch)
             The absolute length of each input to the acoustic model,
@@ -633,7 +633,7 @@ def _dp_forward(
             The absolute length of each phoneme sequence in the batch.
 
         phns: torch.Tensor (batch, phoneme in phn sequence)
-            The phonemes that are known/thought to be to be in each utterance.
+            The phonemes that are known/thought to be in each utterance.
 
         Returns
         -------
@@ -702,7 +702,7 @@ def _dp_viterbi(
 
         emiss_pred_useful: torch.Tensor (batch, phoneme in phn sequence, time)
             A 'useful' form of the posterior probabilities, rearranged
-            into order of phoneme appearance in phns.
+            into the order of phoneme appearance in phns.
 
         lens_abs: torch.Tensor (batch)
             The absolute length of each input to the acoustic model,
@@ -712,7 +712,7 @@ def _dp_viterbi(
             The absolute length of each phoneme sequence in the batch.
 
         phns: torch.Tensor (batch, phoneme in phn sequence)
-            The phonemes that are known/thought to be to be in each utterance.
+            The phonemes that are known/thought to be in each utterance.
 
         Returns
         -------
@@ -801,7 +801,7 @@ def _dp_viterbi(
 
     def _loss_reduction(self, loss, input_lens, target_lens):
         """
-        Applies reduction to loss as specified during object initialisation.
+        Applies reduction to loss as specified during object initialization.
 
         Arguments
         ---------
@@ -849,7 +849,7 @@ def forward(
         """
         Prepares relevant (log) probability tensors and does dynamic
         programming: either the forward or the Viterbi algorithm. Applies
-        reduction as specified during object initialisation.
+        reduction as specified during object initialization.
 
         Arguments
         ---------
@@ -1037,9 +1037,9 @@ def _get_flat_start_batch(self, lens_abs, phn_lens_abs, phns):
         """
         Prepares flat start alignments (with zero padding) for every utterance
         in the batch.
-        Every phoneme will have equal duration, except for the final phoneme
+        Every phoneme will have an equal duration, except for the final phoneme
         potentially. E.g. if 104 frames and 10 phonemes, 9 phonemes will have
-        duration of 10 frames, and one phoneme will have duration of 14 frames.
+        duration of 10 frames, and one phoneme will have a duration of 14 frames.
 
         Arguments
         ---------
@@ -1093,7 +1093,7 @@ def _get_flat_start_batch(self, lens_abs, phn_lens_abs, phns):
     def _get_viterbi_batch(self, ids, lens_abs):
         """
         Retrieves Viterbi alignments stored in `self.align_dict` and
-        creates batch of them, with zero padding.
+        creates a batch of them, with zero padding.
 
         Arguments
         ---------
@@ -1128,7 +1128,7 @@ def get_prev_alignments(self, ids, emission_pred, lens, phns, phn_lens):
         """
         Fetches previously recorded Viterbi alignments if they are available.
         If not, fetches flat start alignments.
-        Currently, assumes that if a Viterbi alignment is not availble for the
+        Currently, assumes that if a Viterbi alignment is not available for the
         first utterance in the batch, it will not be available for the rest of
         the utterances.
 
 
@@ -352,7 +352,7 @@ class and override any methods for which the default behavior does not
     Arguments
     ---------
     modules : dict of str:torch.nn.Module pairs
-        These modules are passed to the optimizier by default if they have
+        These modules are passed to the optimizer by default if they have
         trainable parameters, and will have train()/eval() called on them.
     opt_class : torch.optim class
         A torch optimizer constructor that has takes only the list of
@@ -628,7 +628,7 @@ def make_dataloader(
 
         The Stage.TRAIN DataLoader is handled specially. It has extra args for
         shuffle and drop_last. In DDP a DistributedSampler is created (unless
-        dataset is an IterableDataset).
+        the dataset is an IterableDataset).
 
         NOTE
         ----
 
@@ -18,7 +18,7 @@
 
 
 class PaddedBatch:
-    """Collate_fn when examples are dicts and have variable length sequences.
+    """Collate_fn when examples are dicts and have variable-length sequences.
 
     Different elements in the examples get matched by key.
     All numpy tensors get converted to Torch (PyTorch default_convert)
 
@@ -1,5 +1,5 @@
 """
-Data reading and writing
+Data reading and writing.
 
 Authors
  * Mirco Ravanelli 2020
@@ -25,27 +25,6 @@
 logger = logging.getLogger(__name__)
 
 
-def _recursive_format(data, replacements):
-    # Data: dict or list, replacements : dict
-    # Replaces string keys in replacements by their values
-    # at all levels of data (in str values)
-    # Works in-place.
-    if isinstance(data, dict):
-        for key, item in data.items():
-            if isinstance(item, dict) or isinstance(item, list):
-                _recursive_format(item, replacements)
-            elif isinstance(item, str):
-                data[key] = item.format_map(replacements)
-            # If not dict, list or str, do nothing
-    if isinstance(data, list):
-        for i, item in enumerate(data):
-            if isinstance(item, dict) or isinstance(item, list):
-                _recursive_format(item, replacements)
-            elif isinstance(item, str):
-                data[i] = item.format_map(replacements)
-            # If not dict, list or str, do nothing
-
-
 def load_data_json(json_path, replacements={}):
     """Loads JSON and recursively formats string values
 
@@ -80,13 +59,33 @@ def load_data_json(json_path, replacements={}):
     '/home/ex2.wav'
 
     """
-    # TODO: Example / unittest
     with open(json_path, "r") as f:
         out_json = json.load(f)
     _recursive_format(out_json, replacements)
     return out_json
 
 
+def _recursive_format(data, replacements):
+    # Data: dict or list, replacements : dict
+    # Replaces string keys in replacements by their values
+    # at all levels of data (in str values)
+    # Works in-place.
+    if isinstance(data, dict):
+        for key, item in data.items():
+            if isinstance(item, dict) or isinstance(item, list):
+                _recursive_format(item, replacements)
+            elif isinstance(item, str):
+                data[key] = item.format_map(replacements)
+            # If not dict, list or str, do nothing
+    if isinstance(data, list):
+        for i, item in enumerate(data):
+            if isinstance(item, dict) or isinstance(item, list):
+                _recursive_format(item, replacements)
+            elif isinstance(item, str):
+                data[i] = item.format_map(replacements)
+            # If not dict, list or str, do nothing
+
+
 def load_data_csv(csv_path, replacements={}):
     """Loads CSV and formats string values
 
@@ -162,9 +161,9 @@ def load_data_csv(csv_path, replacements={}):
 
 
 def read_audio(waveforms_obj):
-    """General audio loading, based on custom notation
+    """General audio loading, based on a custom notation
 
-    Expected use case is specifically in conjunction with Datasets
+    Expected use case is in conjunction with Datasets
     specified by JSON.
 
     The custom notation:
@@ -215,9 +214,9 @@ def read_audio(waveforms_obj):
 
 
 def read_audio_multichannel(waveforms_obj):
-    """General audio loading, based on custom notation
+    """General audio loading, based on a custom notation
 
-    Expected use case is specifically in conjunction with Datasets
+    Expected use case is in conjunction with Datasets
     specified by JSON.
 
     The custom notation:
@@ -233,7 +232,7 @@ def read_audio_multichannel(waveforms_obj):
         ]
     }
 
-    Or you can specify a single file more succintly:
+    Or you can specify a single file more succinctly:
     {"files": "/path/to/wav2.wav"}
 
     Offset number samples and stop number samples also can be specified to read
@@ -432,19 +431,19 @@ def convert_index_to_lab(batch, ind2lab):
 
 def relative_time_to_absolute(batch, relative_lens, rate):
     """
-    Converts SpeechBrain style relative length to absolute duration
+    Converts SpeechBrain style relative length to the absolute duration
 
     Operates on batch level.
 
     Arguments
     ---------
     batch : torch.tensor
-        Sequences to determine duration for.
+        Sequences to determine the duration for.
     relative_lens : torch.tensor
         The relative length of each sequence in batch. The longest sequence in
         the batch needs to have relative length 1.0.
     rate : float
-        The rate at which sequence elements occur in real world time. Sample
+        The rate at which sequence elements occur in real-world time. Sample
         rate, if batch is raw wavs (recommended) or 1/frame_shift if batch is
         features. This has to have 1/s as the unit.
 
@@ -594,58 +593,6 @@ def _expand_data_fields(data_fields):
         return expanded
 
 
-def read_kaldi_lab(kaldi_ali, kaldi_lab_opts):
-    """
-    Read labels in kaldi format
-
-    Uses kaldi IO
-
-    Arguments
-    ---------
-    kaldi_ali : str
-        Path to directory where kaldi alignents are stored.
-    kaldi_lab_opts : str
-        A string that contains the options for reading the kaldi alignments.
-
-    Returns
-    -------
-    dict
-        A dictionary contaning the labels
-
-    Note
-    ----
-    This depends on kaldi-io-for-python. Install it separately.
-    See: https://github.com/vesis84/kaldi-io-for-python
-
-    Example
-    -------
-    This example requires kaldi files
-    ```
-    lab_folder = '/home/kaldi/egs/TIMIT/s5/exp/dnn4_pretrain-dbn_dnn_ali'
-    read_kaldi_lab(lab_folder, 'ali-to-pdf')
-    ```
-    """
-    # EXTRA TOOLS
-    try:
-        import kaldi_io
-    except ImportError:
-        raise ImportError("Could not import kaldi_io. Install it to use this.")
-    # Reading the Kaldi labels
-    lab = {
-        k: v
-        for k, v in kaldi_io.read_vec_int_ark(
-            "gunzip -c "
-            + kaldi_ali
-            + "/ali*.gz | "
-            + kaldi_lab_opts
-            + " "
-            + kaldi_ali
-            + "/final.mdl ark:- ark:-|",
-        )
-    }
-    return lab
-
-
 def write_txt_file(data, filename, sampling_rate=None):
     """
     Write data in text format
@@ -732,7 +679,7 @@ def length_to_mask(length, max_len=None, dtype=None, device=None):
     length : torch.LongTensor
         Containing the length of each sequence in the batch. Must be 1D.
     max_len : int
-        Max length for the mask, also the size of second dimension.
+        Max length for the mask, also the size of the second dimension.
     dtype : torch.dtype, default: None
         The dtype of the generated mask.
     device: torch.device, default: None
@@ -958,13 +905,13 @@ def merge_char(sequences, space="_"):
     Arguments
     ---------
     sequences : list
-        Each item contains a list, and this list contains character sequence.
+        Each item contains a list, and this list contains a character sequence.
     space : string
         The token represents space. Default: _
 
     Returns
     -------
-    The list contain word sequences for each sentence.
+    The list contains word sequences for each sentence.
 
     Example:
     >>> sequences = [["a", "b", "_", "c", "_", "d", "e"], ["e", "f", "g", "_", "h", "i"]]
@@ -1028,13 +975,13 @@ def split_word(sequences, space="_"):
     Arguments
     ---------
     sequences : list
-        Each item contains a list, and this list contains words sequence.
+        Each item contains a list, and this list contains a words sequence.
     space : string
         The token represents space. Default: _
 
     Returns
     -------
-    The list contain word sequences for each sentence.
+    The list contains word sequences for each sentence.
 
     Example:
     >>> sequences = [['ab', 'c', 'de'], ['efg', 'hi']]