Skip to content

Commit b5c8ee0

Browse files
committed
Merge pull request kaldi-asr#657 from naxingyu/add-fisher-swbd-nnet3-chain
add fisher_swbd nnet3 and chain recipe
2 parents 497ca79 + 02cf52a commit b5c8ee0

7 files changed

Lines changed: 1052 additions & 0 deletions

File tree

egs/fisher_swbd/s5/RESULTS

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,71 @@ for x in exp/nnet2_online/nnet_ms_a_online/decode_eval2000*_fg; do grep Sum $x/
4242
%WER 12.3 | 1831 21395 | 89.2 7.2 3.5 1.5 12.3 50.8 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_utt_fsh_sw1_fg/score_13/eval2000.ctm.swbd.filt.sys
4343
%WER 11.8 | 1831 21395 | 89.6 7.2 3.2 1.4 11.8 49.0 | exp/nnet2_online/nnet_ms_a_online/decode_eval2000_utt_offline_fsh_sw1_fg/score_11/eval2000.ctm.swbd.filt.sys
4444

45+
# nnet3 result on eval2000
46+
# BLSTM ran for about 760 hours, command:
47+
# local/nnet3/run_lstm.sh --affix bidirectional --lstm-delay " [-1,1] [-2,2] [-3,3] " --label-delay 0 \
48+
# --cell-dim 1024 --recurrent-projection-dim 128 --non-recurrent-projection-dim 128 \
49+
# --chunk-left-context 40 --chunk-right-context 40 \
50+
# --extra-left-context 50 --extra-right-context 50
51+
# use tri-gram
52+
for x in exp/nnet3/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
53+
%WER 15.8 | 4459 42989 | 86.1 9.7 4.1 1.9 15.8 52.6 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.filt.sys
54+
%WER 14.8 | 4459 42989 | 86.6 9.2 4.3 1.4 14.8 54.3 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.filt.sys
55+
# rescore with four-gram
56+
for x in exp/nnet3/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
57+
%WER 15.4 | 4459 42989 | 86.4 9.5 4.0 1.8 15.4 51.6 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_fg/score_10_0.0/eval2000_hires.ctm.filt.sys
58+
%WER 14.5 | 4459 42989 | 87.0 9.0 4.0 1.5 14.5 53.7 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_fg/score_8_0.0/eval2000_hires.ctm.filt.sys
59+
60+
# nnet3 result on eval2000 for swbd subset
61+
# use tri-gram
62+
for x in exp/nnet3/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
63+
%WER 11.6 | 1831 21395 | 89.7 7.3 3.0 1.3 11.6 47.7 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
64+
%WER 10.7 | 1831 21395 | 90.3 6.7 3.0 1.0 10.7 45.9 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
65+
# rescore with four-gram
66+
for x in exp/nnet3/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
67+
%WER 11.1 | 1831 21395 | 90.2 7.0 2.8 1.3 11.1 46.2 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_fg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
68+
%WER 10.4 | 1831 21395 | 90.6 6.5 2.9 1.0 10.4 45.3 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_fg/score_10_0.0/eval2000_hires.ctm.swbd.filt.sys
69+
70+
# nnet3 result on eval2000 for callhm subset
71+
# use tri-gram
72+
for x in exp/nnet3/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.callhm.filt.sys | utils/best_wer.sh ; done
73+
%WER 19.9 | 2628 21594 | 82.6 12.1 5.3 2.6 19.9 56.0 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_tg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
74+
%WER 18.8 | 2628 21594 | 83.1 11.7 5.2 1.9 18.8 60.2 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_tg/score_8_0.0/eval2000_hires.ctm.callhm.filt.sys
75+
# rescore with four-gram
76+
for x in exp/nnet3/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
77+
%WER 19.7 | 2628 21594 | 82.7 12.1 5.2 2.4 19.7 55.3 | exp/nnet3/tdnn_sp/decode_eval2000_fsh_sw1_fg/score_10_0.0/eval2000_hires.ctm.callhm.filt.sys
78+
%WER 18.6 | 2628 21594 | 83.3 11.5 5.2 1.9 18.6 59.6 | exp/nnet3/lstm_bidirectional_sp/decode_eval2000_fsh_sw1_fg/score_8_0.0/eval2000_hires.ctm.callhm.filt.sys
4579

80+
# chain result on eval2000
81+
# BLSTM ran for about 380 hours
82+
# use tri-gram
83+
for x in exp/chain/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
84+
%WER 13.6 | 4459 42989 | 88.2 7.9 3.9 1.8 13.6 51.0 | exp/chain/tdnn_7b_sp/decode_eval2000_fsh_sw1_tg/score_8_0.0/eval2000_hires.ctm.filt.sys
85+
%WER 12.1 | 4459 42989 | 89.7 6.8 3.5 1.8 12.1 50.2 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_tg/score_7_0.0/eval2000_hires.ctm.filt.sys
86+
# rescore with four-gram
87+
for x in exp/chain/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
88+
%WER 13.3 | 4459 42989 | 88.4 7.8 3.8 1.8 13.3 50.1 | exp/chain/tdnn_7b_sp/decode_eval2000_fsh_sw1_fg/score_8_0.0/eval2000_hires.ctm.filt.sys
89+
%WER 12.0 | 4459 42989 | 89.6 6.5 3.8 1.7 12.0 49.3 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_fg/score_8_0.5/eval2000_hires.ctm.filt.sys
90+
91+
# chain result on eval2000 for swbd subset
92+
# use tri-gram
93+
for x in exp/chain/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
94+
%WER 9.4 | 1831 21395 | 91.7 5.4 2.9 1.2 9.4 43.9 | exp/chain/tdnn_7b_sp/decode_eval2000_fsh_sw1_tg/score_10_0.5/eval2000_hires.ctm.swbd.filt.sys
95+
%WER 8.8 | 1831 21395 | 92.5 5.3 2.2 1.4 8.8 46.9 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_tg/score_7_1.0/eval2000_hires.ctm.swbd.filt.sys
96+
# rescore with four-gram
97+
for x in exp/chain/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
98+
%WER 9.2 | 1831 21395 | 92.1 5.6 2.3 1.3 9.2 42.4 | exp/chain/tdnn_7b_relu_sp/decode_eval2000_fsh_sw1_fg/score_9_0.0/eval2000_hires.ctm.swbd.filt.sys
99+
%WER 8.5 | 1831 21395 | 92.6 4.9 2.4 1.2 8.5 44.1 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_fg/score_9_1.0/eval2000_hires.ctm.swbd.filt.sys
46100

101+
# chain result on eval2000 for callhm subset
102+
# use tri-gram
103+
for x in exp/chain/*/decode_eval2000*tg; do grep Sum $x/score_*/*.ctm.callhm.filt.sys | utils/best_wer.sh ; done
104+
%WER 17.4 | 2628 21594 | 84.7 9.8 5.5 2.1 17.4 55.3 | exp/chain/tdnn_7b_relu_sp/decode_eval2000_fsh_sw1_tg/score_8_0.0/eval2000_hires.ctm.callhm.filt.sys
105+
%WER 15.3 | 2628 21594 | 86.9 8.3 4.8 2.2 15.3 52.4 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_tg/score_7_0.0/eval2000_hires.ctm.callhm.filt.sys
106+
# rescore with four-gram
107+
for x in exp/chain/*/decode_eval2000*fg; do grep Sum $x/score_*/*.ctm.callhm.filt.sys | utils/best_wer.sh ; done
108+
%WER 17.3 | 2628 21594 | 84.9 9.7 5.5 2.1 17.3 55.0 | exp/chain/tdnn_7b_relu_sp/decode_eval2000_fsh_sw1_fg/score_8_0.0/eval2000_hires.ctm.callhm.filt.sys
109+
%WER 15.3 | 2628 21594 | 87.0 8.6 4.4 2.4 15.3 52.1 | exp/chain/blstm_6h_sp/decode_eval2000_fsh_sw1_fg/score_6_0.5/eval2000_hires.ctm.callhm.filt.sys
47110

48111
# GMM and SGMM numbers reported on rt03
49112
for x in exp/*/decode_rt03*; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
@@ -89,3 +152,63 @@ for x in exp/nnet2_online/nnet_ms_a_online/decode_rt03*_fg; do grep Sum $x/scor
89152
%WER 20.2 | 3970 36721 | 88.3 8.1 3.6 8.5 20.2 74.3 | exp/nnet2_online/nnet_ms_a_online/decode_rt03_utt_fsh_sw1_fg/score_11/rt03.ctm.swbd.filt.sys
90153
%WER 19.1 | 3970 36721 | 88.8 7.8 3.4 7.9 19.1 72.2 | exp/nnet2_online/nnet_ms_a_online/decode_rt03_utt_offline_fsh_sw1_fg/score_11/rt03.ctm.swbd.filt.sys
91154

155+
# nnet3 result on rt03
156+
# use tri-gram
157+
for x in exp/nnet3/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
158+
%WER 14.7 | 8420 76157 | 86.8 8.9 4.3 1.5 14.7 45.9 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_tg/score_11_0.0/rt03_hires.ctm.filt.sys
159+
%WER 14.2 | 8420 76157 | 87.0 8.7 4.3 1.2 14.2 46.9 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_tg/score_8_0.0/rt03_hires.ctm.filt.sys
160+
# rescore with four-gram
161+
for x in exp/nnet3/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
162+
%WER 14.4 | 8420 76157 | 87.1 8.8 4.2 1.5 14.4 45.2 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_fg/score_11_0.0/rt03_hires.ctm.filt.sys
163+
%WER 13.9 | 8420 76157 | 87.2 8.4 4.3 1.2 13.9 46.0 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_fg/score_9_0.0/rt03_hires.ctm.filt.sys
164+
165+
# nnet3 result on rt03 for swbd subset
166+
# use tri-gram
167+
for x in exp/nnet3/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
168+
%WER 17.4 | 4450 39436 | 84.3 10.6 5.1 1.8 17.4 48.9 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_tg/score_11_0.5/rt03_hires.ctm.swbd.filt.sys
169+
%WER 16.6 | 4450 39436 | 84.7 10.0 5.3 1.3 16.6 49.6 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_tg/score_10_0.5/rt03_hires.ctm.swbd.filt.sys
170+
# rescore with four-gram
171+
for x in exp/nnet3/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
172+
%WER 17.1 | 4450 39436 | 84.6 10.3 5.1 1.8 17.1 48.2 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_fg/score_12_0.0/rt03_hires.ctm.swbd.filt.sys
173+
%WER 16.3 | 4450 39436 | 85.0 9.8 5.1 1.3 16.3 49.0 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_fg/score_10_0.0/rt03_hires.ctm.swbd.filt.sys
174+
175+
# nnet3 result on rt03 for fsh subset
176+
# use tri-gram
177+
for x in exp/nnet3/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.fsh.filt.sys | utils/best_wer.sh ; done
178+
%WER 11.8 | 3970 36721 | 89.4 7.2 3.5 1.2 11.8 42.5 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_tg/score_11_0.0/rt03_hires.ctm.fsh.filt.sys
179+
%WER 11.6 | 3970 36721 | 89.4 7.1 3.5 1.0 11.6 43.6 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_tg/score_7_0.0/rt03_hires.ctm.fsh.filt.sys
180+
# rescore with four-gram
181+
for x in exp/nnet3/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.fsh.filt.sys | utils/best_wer.sh ; done
182+
%WER 11.4 | 3970 36721 | 89.7 6.9 3.4 1.1 11.4 41.5 | exp/nnet3/tdnn_sp/decode_rt03_fsh_sw1_fg/score_11_0.0/rt03_hires.ctm.fsh.filt.sys
183+
%WER 11.4 | 3970 36721 | 89.5 6.7 3.8 1.0 11.4 42.6 | exp/nnet3/lstm_bidirectional_sp/decode_rt03_fsh_sw1_fg/score_10_0.0/rt03_hires.ctm.fsh.filt.sys
184+
185+
# chain result on rt03
186+
# BLSTM ran for about 380 hours
187+
# use tri-gram
188+
for x in exp/chain/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
189+
%WER 12.7 | 8420 76157 | 88.5 7.2 4.2 1.3 12.7 43.2 | exp/chain/tdnn_7b_sp/decode_rt03_fsh_sw1_tg/score_9_0.0/rt03_hires.ctm.filt.sys
190+
%WER 11.7 | 8420 76157 | 89.8 6.6 3.6 1.5 11.7 43.7 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_tg/score_7_0.0/rt03_hires.ctm.filt.sys
191+
# rescore with four-gram
192+
for x in exp/chain/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.filt.sys | utils/best_wer.sh ; done
193+
%WER 12.4 | 8420 76157 | 88.9 7.0 4.1 1.3 12.4 42.7 | exp/chain/tdnn_7b_sp/decode_rt03_fsh_sw1_fg/score_9_0.0/rt03_hires.ctm.filt.sys
194+
%WER 11.4 | 8420 76157 | 89.9 6.1 3.9 1.3 11.4 43.4 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_fg/score_8_0.0/rt03_hires.ctm.filt.sys
195+
196+
# chain result on rt03 for swbd subset
197+
# use tri-gram
198+
for x in exp/chain/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
199+
%WER 15.0 | 4450 39436 | 86.4 8.6 5.0 1.4 15.0 45.8 | exp/chain/tdnn_7b_sp/decode_rt03_fsh_sw1_tg/score_9_0.0/rt03_hires.ctm.swbd.filt.sys
200+
%WER 13.3 | 4450 39436 | 88.3 7.5 4.2 1.6 13.3 45.2 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_tg/score_8_0.0/rt03_hires.ctm.swbd.filt.sys
201+
# rescore with four-gram
202+
for x in exp/chain/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.swbd.filt.sys | utils/best_wer.sh ; done
203+
%WER 14.8 | 4450 39436 | 86.5 8.0 5.5 1.3 14.8 45.5 | exp/chain/tdnn_7b_sp/decode_rt03_fsh_sw1_fg/score_10_0.0/rt03_hires.ctm.swbd.filt.sys
204+
%WER 13.0 | 4450 39436 | 88.5 7.3 4.2 1.6 13.0 44.8 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_fg/score_8_0.0/rt03_hires.ctm.swbd.filt.sys
205+
206+
# chain result on rt03 for fsh subset
207+
# use tri-gram
208+
for x in exp/chain/*/decode_rt03*tg; do grep Sum $x/score_*/*.ctm.fsh.filt.sys | utils/best_wer.sh ; done
209+
%WER 10.2 | 3970 36721 | 91.1 6.0 3.0 1.2 10.2 40.2 | exp/chain/tdnn_7b_relu_sp/decode_rt03_fsh_sw1_tg/score_8_0.0/rt03_hires.ctm.fsh.filt.sys
210+
%WER 9.8 | 3970 36721 | 91.4 5.3 3.3 1.2 9.8 42.0 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_tg/score_7_0.0/rt03_hires.ctm.fsh.filt.sys
211+
# rescore with four-gram
212+
for x in exp/chain/*/decode_rt03*fg; do grep Sum $x/score_*/*.ctm.fsh.filt.sys | utils/best_wer.sh ; done
213+
%WER 9.8 | 3970 36721 | 91.4 5.8 2.8 1.2 9.8 39.6 | exp/chain/tdnn_7b_relu_sp/decode_rt03_fsh_sw1_fg/score_8_0.0/rt03_hires.ctm.fsh.filt.sys
214+
%WER 9.6 | 3970 36721 | 91.6 5.2 3.3 1.2 9.6 41.4 | exp/chain/blstm_6h_sp/decode_rt03_fsh_sw1_fg/score_7_0.0/rt03_hires.ctm.fsh.filt.sys
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
#!/bin/bash
2+
3+
# based on run_tdnn_6h.sh
4+
5+
set -e
6+
7+
# configs for 'chain'
8+
stage=12
9+
train_stage=-10
10+
get_egs_stage=-10
11+
dir=exp/chain/blstm_6h
12+
decode_iter=
13+
decode_dir_affix=
14+
15+
# training options
16+
num_epochs=4
17+
remove_egs=false
18+
common_egs_dir=
19+
affix=
20+
chunk_width=150
21+
chunk_left_context=40
22+
chunk_right_context=40
23+
24+
# End configuration section.
25+
echo "$0 $@" # Print the command line for logging
26+
27+
. ./cmd.sh
28+
. ./path.sh
29+
. ./utils/parse_options.sh
30+
31+
if ! cuda-compiled; then
32+
cat <<EOF && exit 1
33+
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
34+
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
35+
where "nvcc" is installed.
36+
EOF
37+
fi
38+
39+
dir=$dir${affix:+_$affix}
40+
train_set=train_nodup_sp
41+
ali_dir=exp/tri5a_ali_nodup
42+
treedir=exp/chain/tri6_tree_11000
43+
lang=data/lang_chain
44+
45+
# The iVector-extraction and feature-dumping parts are the same as the standard
46+
# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
47+
# run those things.
48+
local/nnet3/run_ivector_common.sh --stage $stage \
49+
--speed-perturb true \
50+
--generate-alignments false || exit 1;
51+
52+
if [ $stage -le 9 ]; then
53+
# Get the alignments as lattices (gives the CTC training more freedom).
54+
# use the same num-jobs as the alignments
55+
nj=$(cat $ali_dir/num_jobs) || exit 1;
56+
steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/$train_set \
57+
data/lang exp/tri5a exp/tri5a_lats_nodup_sp
58+
rm exp/tri5a_lats_nodup_sp/fsts.*.gz # save space
59+
fi
60+
61+
if [ $stage -le 10 ]; then
62+
# Create a version of the lang/ directory that has one state per phone in the
63+
# topo file. [note, it really has two states.. the first one is only repeated
64+
# once, the second one has zero or more repeats.]
65+
rm -rf $lang
66+
cp -r data/lang $lang
67+
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
68+
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
69+
# Use our special topology... note that later on may have to tune this
70+
# topology.
71+
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
72+
fi
73+
74+
if [ $stage -le 11 ]; then
75+
# Build a tree using our new topology.
76+
steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
77+
--leftmost-questions-truncate -1 \
78+
--cmd "$train_cmd" 11000 data/$train_set $lang $ali_dir $treedir
79+
fi
80+
81+
if [ $stage -le 12 ]; then
82+
echo "$0: creating neural net configs";
83+
84+
steps/nnet3/lstm/make_configs.py \
85+
--feat-dir data/${train_set}_hires \
86+
--ivector-dir exp/nnet3/ivectors_${train_set} \
87+
--tree-dir $treedir \
88+
--splice-indexes="-2,-1,0,1,2 0 0" \
89+
--lstm-delay=" [-3,3] [-3,3] [-3,3] " \
90+
--xent-regularize 0.1 \
91+
--include-log-softmax false \
92+
--num-lstm-layers 3 \
93+
--cell-dim 1024 \
94+
--hidden-dim 1024 \
95+
--recurrent-projection-dim 256 \
96+
--non-recurrent-projection-dim 256 \
97+
--label-delay 0 \
98+
--self-repair-scale 0.00001 \
99+
$dir/configs || exit 1;
100+
101+
fi
102+
103+
if [ $stage -le 13 ]; then
104+
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
105+
utils/create_split_dir.pl \
106+
/export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
107+
fi
108+
109+
touch $dir/egs/.nodelete # keep egs around when that run dies.
110+
111+
steps/nnet3/chain/train.py --stage $train_stage \
112+
--cmd "$decode_cmd" \
113+
--feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \
114+
--feat.cmvn-opts "--norm-means=false --norm-vars=false" \
115+
--chain.xent-regularize 0.1 \
116+
--chain.leaky-hmm-coefficient 0.1 \
117+
--chain.l2-regularize 0.00005 \
118+
--chain.apply-deriv-weights false \
119+
--chain.lm-opts="--num-extra-lm-states=2000" \
120+
--chain.left-deriv-truncate 0 \
121+
--trainer.num-chunk-per-minibatch 64 \
122+
--trainer.frames-per-iter 1200000 \
123+
--trainer.max-param-change 1.414 \
124+
--trainer.num-epochs $num_epochs \
125+
--trainer.optimization.shrink-value 0.99 \
126+
--trainer.optimization.num-jobs-initial 3 \
127+
--trainer.optimization.num-jobs-final 16 \
128+
--trainer.optimization.initial-effective-lrate 0.001 \
129+
--trainer.optimization.final-effective-lrate 0.0001 \
130+
--trainer.optimization.momentum 0.0 \
131+
--egs.stage $get_egs_stage \
132+
--egs.opts "--frames-overlap-per-eg 0" \
133+
--egs.chunk-width $chunk_width \
134+
--egs.chunk-left-context $chunk_left_context \
135+
--egs.chunk-right-context $chunk_right_context \
136+
--egs.dir "$common_egs_dir" \
137+
--cleanup.remove-egs $remove_egs \
138+
--feat-dir data/${train_set}_hires \
139+
--tree-dir $treedir \
140+
--lat-dir exp/tri5a_lats_nodup_sp \
141+
--dir $dir || exit 1;
142+
fi
143+
144+
if [ $stage -le 14 ]; then
145+
# Note: it might appear that this $lang directory is mismatched, and it is as
146+
# far as the 'topo' is concerned, but this script doesn't read the 'topo' from
147+
# the lang directory.
148+
utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg
149+
fi
150+
151+
decode_suff=fsh_sw1_tg
152+
graph_dir=$dir/graph_fsh_sw1_tg
153+
if [ $stage -le 15 ]; then
154+
iter_opts=
155+
if [ ! -z $decode_iter ]; then
156+
iter_opts=" --iter $decode_iter "
157+
fi
158+
159+
# decoding options
160+
extra_left_context=$[$chunk_left_context+10]
161+
extra_right_context=$[$chunk_right_context+10]
162+
163+
for decode_set in eval2000 rt03; do
164+
(
165+
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
166+
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
167+
--nj $num_jobs --cmd "$decode_cmd" $iter_opts \
168+
--extra-left-context $extra_left_context \
169+
--extra-right-context $extra_right_context \
170+
--frames-per-chunk $chunk_width \
171+
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
172+
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1;
173+
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
174+
data/lang_fsh_sw1_{tg,fg} data/${decode_set}_hires \
175+
$dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_fsh_sw1_{tg,fg} || exit 1;
176+
fi
177+
) &
178+
done
179+
fi
180+
wait;
181+
exit 0;

0 commit comments

Comments
 (0)