77# 1. added more training data for early stages
88# 2. removed SAT system (and later stages) on the 100k utterance training data
99# 3. reduced number of LM rescoring, only sw1_tg and sw1_fsh_fg remain
10- # 4. mapped swbd transcription to fisher style, instead of the other way around
10+ # 4. mapped swbd transcription to fisher style, instead of the other way around
1111
1212set -e # exit on error
1313has_fisher=true
@@ -22,7 +22,7 @@ local/swbd1_prepare_dict.sh
2222# which specifies the directory to Switchboard documentations. Specifically, if
2323# this argument is given, the script will look for the conv.tab file and correct
2424# speaker IDs to the actual speaker personal identification numbers released in
25- # the documentations. The documentations can be found here:
25+ # the documentations. The documentations can be found here:
2626# https://catalog.ldc.upenn.edu/docs/LDC97S62/
2727# Note: if you are using this link, make sure you rename conv_tab.csv to conv.tab
2828# after downloading.
@@ -37,7 +37,7 @@ utils/prepare_lang.sh data/local/dict_nosp \
3737 " <unk>" data/local/lang_nosp data/lang_nosp
3838
3939# Now train the language models. We are using SRILM and interpolating with an
40- # LM trained on the Fisher transcripts (part 2 disk is currently missing; so
40+ # LM trained on the Fisher transcripts (part 2 disk is currently missing; so
4141# only part 1 transcripts ~700hr are used)
4242
4343# If you have the Fisher data, you can set this "fisher_dir" variable.
@@ -79,7 +79,7 @@ mfccdir=mfcc
7979for x in train eval2000; do
8080 steps/make_mfcc.sh --nj 50 --cmd " $train_cmd " \
8181 data/$x exp/make_mfcc/$x $mfccdir
82- steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
82+ steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
8383 utils/fix_data_dir.sh data/$x
8484done
8585
@@ -91,10 +91,10 @@ utils/subset_data_dir.sh --first data/train 4000 data/train_dev # 5hr 6min
9191n=$[` cat data/train/segments | wc -l` - 4000]
9292utils/subset_data_dir.sh --last data/train $n data/train_nodev
9393
94- # Now-- there are 260k utterances (313hr 23min), and we want to start the
95- # monophone training on relatively short utterances (easier to align), but not
94+ # Now-- there are 260k utterances (313hr 23min), and we want to start the
95+ # monophone training on relatively short utterances (easier to align), but not
9696# only the shortest ones (mostly uh-huh). So take the 100k shortest ones;
97- # remove most of the repeated utterances (these are the uh-huh type ones), and
97+ # remove most of the repeated utterances (these are the uh-huh type ones), and
9898# then take 10k random utterances from those (about 4hr 40mins)
9999utils/subset_data_dir.sh --shortest data/train_nodev 100000 data/train_100kshort
100100utils/subset_data_dir.sh data/train_100kshort 30000 data/train_30kshort
@@ -108,13 +108,13 @@ local/remove_dup_utts.sh 200 data/train_100k data/train_100k_nodup # 110hr
108108local/remove_dup_utts.sh 300 data/train_nodev data/train_nodup # 286hr
109109# # Starting basic training on MFCC features
110110steps/train_mono.sh --nj 30 --cmd " $train_cmd " \
111- data/train_30kshort data/lang_nosp exp/mono
111+ data/train_30kshort data/lang_nosp exp/mono
112112
113113steps/align_si.sh --nj 30 --cmd " $train_cmd " \
114- data/train_100k_nodup data/lang_nosp exp/mono exp/mono_ali
114+ data/train_100k_nodup data/lang_nosp exp/mono exp/mono_ali
115115
116116steps/train_deltas.sh --cmd " $train_cmd " \
117- 3200 30000 data/train_100k_nodup data/lang_nosp exp/mono_ali exp/tri1
117+ 3200 30000 data/train_100k_nodup data/lang_nosp exp/mono_ali exp/tri1
118118
119119(
120120 graph_dir=exp/tri1/graph_nosp_sw1_tg
@@ -125,7 +125,7 @@ steps/train_deltas.sh --cmd "$train_cmd" \
125125) &
126126
127127steps/align_si.sh --nj 30 --cmd " $train_cmd " \
128- data/train_100k_nodup data/lang_nosp exp/tri1 exp/tri1_ali
128+ data/train_100k_nodup data/lang_nosp exp/tri1 exp/tri1_ali
129129
130130steps/train_deltas.sh --cmd " $train_cmd " \
131131 4000 70000 data/train_100k_nodup data/lang_nosp exp/tri1_ali exp/tri2
@@ -149,11 +149,11 @@ steps/align_si.sh --nj 30 --cmd "$train_cmd" \
149149# From now, we start using all of the data (except some duplicates of common
150150# utterances, which don't really contribute much).
151151steps/align_si.sh --nj 30 --cmd " $train_cmd " \
152- data/train_nodup data/lang_nosp exp/tri2 exp/tri2_ali_nodup
152+ data/train_nodup data/lang_nosp exp/tri2 exp/tri2_ali_nodup
153153
154154# Do another iteration of LDA+MLLT training, on all the data.
155155steps/train_lda_mllt.sh --cmd " $train_cmd " \
156- 6000 140000 data/train_nodup data/lang_nosp exp/tri2_ali_nodup exp/tri3
156+ 6000 140000 data/train_nodup data/lang_nosp exp/tri2_ali_nodup exp/tri3
157157
158158(
159159 graph_dir=exp/tri3/graph_nosp_sw1_tg
190190
191191# Train tri4, which is LDA+MLLT+SAT, on all the (nodup) data.
192192steps/align_fmllr.sh --nj 30 --cmd " $train_cmd " \
193- data/train_nodup data/lang exp/tri3 exp/tri3_ali_nodup
193+ data/train_nodup data/lang exp/tri3 exp/tri3_ali_nodup
194194
195195
196196steps/train_sat.sh --cmd " $train_cmd " \
@@ -215,21 +215,21 @@ if $has_fisher; then
215215 exp/tri4/decode_eval2000_sw1_{tg,fsh_fg}
216216fi
217217
218- # MMI training starting from the LDA+MLLT+SAT systems on all the (nodup) data.
218+ # MMI training starting from the LDA+MLLT+SAT systems on all the (nodup) data.
219219steps/align_fmllr.sh --nj 50 --cmd " $train_cmd " \
220220 data/train_nodup data/lang exp/tri4 exp/tri4_ali_nodup
221221
222222steps/make_denlats.sh --nj 50 --cmd " $decode_cmd " \
223223 --config conf/decode.config --transform-dir exp/tri4_ali_nodup \
224- data/train_nodup data/lang exp/tri4 exp/tri4_denlats_nodup
224+ data/train_nodup data/lang exp/tri4 exp/tri4_denlats_nodup
225225
226226# 4 iterations of MMI seems to work well overall. The number of iterations is
227227# used as an explicit argument even though train_mmi.sh will use 4 iterations by
228228# default.
229229num_mmi_iters=4
230230steps/train_mmi.sh --cmd " $decode_cmd " \
231231 --boost 0.1 --num-iters $num_mmi_iters \
232- data/train_nodup data/lang exp/tri4_{ali,denlats}_nodup exp/tri4_mmi_b0.1
232+ data/train_nodup data/lang exp/tri4_{ali,denlats}_nodup exp/tri4_mmi_b0.1
233233
234234for iter in 1 2 3 4; do
235235 (
@@ -260,7 +260,7 @@ steps/train_diag_ubm.sh --silence-weight 0.5 --nj 50 --cmd "$train_cmd" \
260260steps/train_mmi_fmmi.sh --learning-rate 0.005 \
261261 --boost 0.1 --cmd " $train_cmd " \
262262 data/train_nodup data/lang exp/tri4_ali_nodup exp/tri4_dubm \
263- exp/tri4_denlats_nodup exp/tri4_fmmi_b0.1
263+ exp/tri4_denlats_nodup exp/tri4_fmmi_b0.1
264264
265265for iter in 4 5 6 7 8; do
266266 (
0 commit comments