|
15 | 15 | import torchaudio |
16 | 16 | import numpy as np |
17 | 17 | from tqdm import tqdm |
| 18 | +from speechbrain.utils.data_utils import download_file |
18 | 19 | from speechbrain.dataio.dataio import load_pkl, save_pkl |
19 | 20 | import tgt |
20 | 21 | from speechbrain.pretrained import GraphemeToPhoneme |
@@ -125,10 +126,17 @@ def prepare_ljspeech( |
125 | 126 | # Setting up additional folders required for FastSpeech2 |
126 | 127 | if model_name == "FastSpeech2": |
127 | 128 | # This step requires phoneme alignements to be present in the data_folder |
| 129 | + # We automatically donwload the alignments from https://www.dropbox.com/s/v28x5ldqqa288pu/LJSpeech.zip |
128 | 130 | # Download and unzip LJSpeech phoneme alignments from here: https://drive.google.com/drive/folders/1DBRkALpPd6FL9gjHMmMEdHODmkgNIIK4 |
| 131 | + alignment_URL = ( |
| 132 | + "https://www.dropbox.com/s/v28x5ldqqa288pu/LJSpeech.zip?dl=1" |
| 133 | + ) |
129 | 134 | phoneme_alignments_folder = os.path.join( |
130 | 135 | data_folder, "TextGrid", "LJSpeech" |
131 | 136 | ) |
| 137 | + download_file( |
| 138 | + alignment_URL, data_folder + "/alligments.zip", unpack=True |
| 139 | + ) |
132 | 140 |
|
133 | 141 | duration_folder = os.path.join(data_folder, "durations") |
134 | 142 | if not os.path.exists(duration_folder): |
@@ -557,7 +565,6 @@ def get_alignment(tier, sampling_rate, hop_length, last_phoneme_flags): |
557 | 565 |
|
558 | 566 |
|
559 | 567 | def get_last_phoneme_info(words_seq, phones_seq): |
560 | | - |
561 | 568 | """This function takes word and phoneme tiers from a TextGrid file as input |
562 | 569 | and provides a list of tuples for the phoneme sequence indicating whether |
563 | 570 | each of the phonemes is the last phoneme of a word or not. |
|
0 commit comments