11"""
2- Data reading and writing
2+ Data reading and writing.
33
44Authors
55 * Mirco Ravanelli 2020
2525logger = logging .getLogger (__name__ )
2626
2727
28- def _recursive_format (data , replacements ):
29- # Data: dict or list, replacements : dict
30- # Replaces string keys in replacements by their values
31- # at all levels of data (in str values)
32- # Works in-place.
33- if isinstance (data , dict ):
34- for key , item in data .items ():
35- if isinstance (item , dict ) or isinstance (item , list ):
36- _recursive_format (item , replacements )
37- elif isinstance (item , str ):
38- data [key ] = item .format_map (replacements )
39- # If not dict, list or str, do nothing
40- if isinstance (data , list ):
41- for i , item in enumerate (data ):
42- if isinstance (item , dict ) or isinstance (item , list ):
43- _recursive_format (item , replacements )
44- elif isinstance (item , str ):
45- data [i ] = item .format_map (replacements )
46- # If not dict, list or str, do nothing
47-
48-
4928def load_data_json (json_path , replacements = {}):
5029 """Loads JSON and recursively formats string values
5130
@@ -80,13 +59,33 @@ def load_data_json(json_path, replacements={}):
8059 '/home/ex2.wav'
8160
8261 """
83- # TODO: Example / unittest
8462 with open (json_path , "r" ) as f :
8563 out_json = json .load (f )
8664 _recursive_format (out_json , replacements )
8765 return out_json
8866
8967
68+ def _recursive_format (data , replacements ):
69+ # Data: dict or list, replacements : dict
70+ # Replaces string keys in replacements by their values
71+ # at all levels of data (in str values)
72+ # Works in-place.
73+ if isinstance (data , dict ):
74+ for key , item in data .items ():
75+ if isinstance (item , dict ) or isinstance (item , list ):
76+ _recursive_format (item , replacements )
77+ elif isinstance (item , str ):
78+ data [key ] = item .format_map (replacements )
79+ # If not dict, list or str, do nothing
80+ if isinstance (data , list ):
81+ for i , item in enumerate (data ):
82+ if isinstance (item , dict ) or isinstance (item , list ):
83+ _recursive_format (item , replacements )
84+ elif isinstance (item , str ):
85+ data [i ] = item .format_map (replacements )
86+ # If not dict, list or str, do nothing
87+
88+
9089def load_data_csv (csv_path , replacements = {}):
9190 """Loads CSV and formats string values
9291
@@ -162,9 +161,9 @@ def load_data_csv(csv_path, replacements={}):
162161
163162
164163def read_audio (waveforms_obj ):
165- """General audio loading, based on custom notation
164+ """General audio loading, based on a custom notation
166165
167- Expected use case is specifically in conjunction with Datasets
166+ Expected use case is in conjunction with Datasets
168167 specified by JSON.
169168
170169 The custom notation:
@@ -215,9 +214,9 @@ def read_audio(waveforms_obj):
215214
216215
217216def read_audio_multichannel (waveforms_obj ):
218- """General audio loading, based on custom notation
217+ """General audio loading, based on a custom notation
219218
220- Expected use case is specifically in conjunction with Datasets
219+ Expected use case is in conjunction with Datasets
221220 specified by JSON.
222221
223222 The custom notation:
@@ -233,7 +232,7 @@ def read_audio_multichannel(waveforms_obj):
233232 ]
234233 }
235234
236- Or you can specify a single file more succintly :
235+ Or you can specify a single file more succinctly :
237236 {"files": "/path/to/wav2.wav"}
238237
239238 Offset number samples and stop number samples also can be specified to read
@@ -432,19 +431,19 @@ def convert_index_to_lab(batch, ind2lab):
432431
433432def relative_time_to_absolute (batch , relative_lens , rate ):
434433 """
435- Converts SpeechBrain style relative length to absolute duration
434+ Converts SpeechBrain style relative length to the absolute duration
436435
437436 Operates on batch level.
438437
439438 Arguments
440439 ---------
441440 batch : torch.tensor
442- Sequences to determine duration for.
441+ Sequences to determine the duration for.
443442 relative_lens : torch.tensor
444443 The relative length of each sequence in batch. The longest sequence in
445444 the batch needs to have relative length 1.0.
446445 rate : float
447- The rate at which sequence elements occur in real world time. Sample
446+ The rate at which sequence elements occur in real- world time. Sample
448447 rate, if batch is raw wavs (recommended) or 1/frame_shift if batch is
449448 features. This has to have 1/s as the unit.
450449
@@ -594,58 +593,6 @@ def _expand_data_fields(data_fields):
594593 return expanded
595594
596595
597- def read_kaldi_lab (kaldi_ali , kaldi_lab_opts ):
598- """
599- Read labels in kaldi format
600-
601- Uses kaldi IO
602-
603- Arguments
604- ---------
605- kaldi_ali : str
606- Path to directory where kaldi alignents are stored.
607- kaldi_lab_opts : str
608- A string that contains the options for reading the kaldi alignments.
609-
610- Returns
611- -------
612- dict
613- A dictionary contaning the labels
614-
615- Note
616- ----
617- This depends on kaldi-io-for-python. Install it separately.
618- See: https://github.com/vesis84/kaldi-io-for-python
619-
620- Example
621- -------
622- This example requires kaldi files
623- ```
624- lab_folder = '/home/kaldi/egs/TIMIT/s5/exp/dnn4_pretrain-dbn_dnn_ali'
625- read_kaldi_lab(lab_folder, 'ali-to-pdf')
626- ```
627- """
628- # EXTRA TOOLS
629- try :
630- import kaldi_io
631- except ImportError :
632- raise ImportError ("Could not import kaldi_io. Install it to use this." )
633- # Reading the Kaldi labels
634- lab = {
635- k : v
636- for k , v in kaldi_io .read_vec_int_ark (
637- "gunzip -c "
638- + kaldi_ali
639- + "/ali*.gz | "
640- + kaldi_lab_opts
641- + " "
642- + kaldi_ali
643- + "/final.mdl ark:- ark:-|" ,
644- )
645- }
646- return lab
647-
648-
649596def write_txt_file (data , filename , sampling_rate = None ):
650597 """
651598 Write data in text format
@@ -732,7 +679,7 @@ def length_to_mask(length, max_len=None, dtype=None, device=None):
732679 length : torch.LongTensor
733680 Containing the length of each sequence in the batch. Must be 1D.
734681 max_len : int
735- Max length for the mask, also the size of second dimension.
682+ Max length for the mask, also the size of the second dimension.
736683 dtype : torch.dtype, default: None
737684 The dtype of the generated mask.
738685 device: torch.device, default: None
@@ -958,13 +905,13 @@ def merge_char(sequences, space="_"):
958905 Arguments
959906 ---------
960907 sequences : list
961- Each item contains a list, and this list contains character sequence.
908+ Each item contains a list, and this list contains a character sequence.
962909 space : string
963910 The token represents space. Default: _
964911
965912 Returns
966913 -------
967- The list contain word sequences for each sentence.
914+ The list contains word sequences for each sentence.
968915
969916 Example:
970917 >>> sequences = [["a", "b", "_", "c", "_", "d", "e"], ["e", "f", "g", "_", "h", "i"]]
@@ -1028,13 +975,13 @@ def split_word(sequences, space="_"):
1028975 Arguments
1029976 ---------
1030977 sequences : list
1031- Each item contains a list, and this list contains words sequence.
978+ Each item contains a list, and this list contains a words sequence.
1032979 space : string
1033980 The token represents space. Default: _
1034981
1035982 Returns
1036983 -------
1037- The list contain word sequences for each sentence.
984+ The list contains word sequences for each sentence.
1038985
1039986 Example:
1040987 >>> sequences = [['ab', 'c', 'de'], ['efg', 'hi']]
0 commit comments