Skip to content

Commit 8c77d2c

Browse files
hhadiandanpovey
authored andcommitted
[src,egs,scripts] Support frame-subsampling with non-chain nnet3 models (kaldi-asr#1238)
1 parent 483192e commit 8c77d2c

File tree

10 files changed

+740
-15
lines changed

10 files changed

+740
-15
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tuning/run_tdnn_lfr1c.sh
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#!/bin/bash
2+
3+
# e is as c, but uses splicing similar to chain's without changing number of
4+
# layers.
5+
6+
# At this script level we don't support not running on GPU, as it would be painfully slow.
7+
# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
8+
# --num-threads 16 and --minibatch-size 128.
9+
10+
# System tdnn_c tdnn_e
11+
# WER on train_dev(tg) 17.37 16.75
12+
# WER on train_dev(fg) 15.94 15.34
13+
# WER on eval2000(tg) 20.0 19.5
14+
# WER on eval2000(fg) 18.2 18.0
15+
# Final train prob -1.43781 -1.40491
16+
# Final valid prob -1.56895 -1.55255
17+
18+
19+
stage=9
20+
affix=
21+
train_stage=-10
22+
has_fisher=true
23+
speed_perturb=true
24+
common_egs_dir=
25+
#exp/nnet3/tdnn_c_sp/egs
26+
reporting_email=
27+
remove_egs=true
28+
29+
. ./cmd.sh
30+
. ./path.sh
31+
. ./utils/parse_options.sh
32+
33+
34+
if ! cuda-compiled; then
35+
cat <<EOF && exit 1
36+
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
37+
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
38+
where "nvcc" is installed.
39+
EOF
40+
fi
41+
42+
suffix=
43+
if [ "$speed_perturb" == "true" ]; then
44+
suffix=_sp
45+
fi
46+
dir=exp/nnet3/tdnn_e
47+
dir=$dir${affix:+_$affix}
48+
dir=${dir}$suffix
49+
train_set=train_nodup$suffix
50+
ali_dir=exp/tri4_ali_nodup$suffix
51+
52+
local/nnet3/run_ivector_common.sh --stage $stage \
53+
--speed-perturb $speed_perturb || exit 1;
54+
55+
if [ $stage -le 9 ]; then
56+
echo "$0: creating neural net configs using the xconfig parser";
57+
58+
num_targets=$(tree-info $ali_dir/tree | grep num-pdfs | awk '{print $2}')
59+
60+
mkdir -p $dir/configs
61+
cat <<EOF > $dir/configs/network.xconfig
62+
input dim=100 name=ivector
63+
input dim=40 name=input
64+
65+
# please note that it is important to have input layer with the name=input
66+
# as the layer immediately preceding the fixed-affine-layer to enable
67+
# the use of short notation for the descriptor
68+
fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
69+
70+
# the first splicing is moved before the lda layer, so no splicing here
71+
relu-renorm-layer name=tdnn1 dim=1024
72+
relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024
73+
relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024
74+
relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
75+
relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
76+
77+
output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec
78+
EOF
79+
80+
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
81+
fi
82+
83+
84+
85+
if [ $stage -le 10 ]; then
86+
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
87+
utils/create_split_dir.pl \
88+
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
89+
fi
90+
91+
steps/nnet3/train_dnn.py --stage=$train_stage \
92+
--cmd="$decode_cmd" \
93+
--feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \
94+
--feat.cmvn-opts="--norm-means=false --norm-vars=false" \
95+
--trainer.num-epochs 2 \
96+
--trainer.optimization.num-jobs-initial 3 \
97+
--trainer.optimization.num-jobs-final 16 \
98+
--trainer.optimization.initial-effective-lrate 0.0017 \
99+
--trainer.optimization.final-effective-lrate 0.00017 \
100+
--egs.dir "$common_egs_dir" \
101+
--cleanup.remove-egs $remove_egs \
102+
--cleanup.preserve-model-interval 100 \
103+
--use-gpu true \
104+
--feat-dir=data/${train_set}_hires \
105+
--ali-dir $ali_dir \
106+
--lang data/lang \
107+
--reporting.email="$reporting_email" \
108+
--dir=$dir || exit 1;
109+
110+
fi
111+
112+
graph_dir=exp/tri4/graph_sw1_tg
113+
if [ $stage -le 11 ]; then
114+
for decode_set in train_dev eval2000; do
115+
(
116+
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
117+
steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \
118+
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
119+
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1;
120+
if $has_fisher; then
121+
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
122+
data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
123+
$dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1;
124+
fi
125+
) &
126+
done
127+
fi
128+
wait;
129+
exit 0;
130+
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/bin/bash
2+
3+
# _lfr1a is as _c, but is LFR (low frame rate): it uses triphone chain topology
4+
# with a frame subsampling factor of 3.
5+
6+
# At this script level we don't support not running on GPU, as it would be painfully slow.
7+
# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
8+
# --num-threads 16 and --minibatch-size 128.
9+
10+
# System tdnn_c tdnn_lfr1a
11+
# WER on train_dev(tg) 17.37 17.25
12+
# WER on train_dev(fg) 15.94 15.90
13+
# WER on eval2000(tg) 20.0 20.1
14+
# WER on eval2000(fg) 18.2 18.5
15+
# Final train prob -1.43781 -1.32434
16+
# Final valid prob -1.56895 -1.42206
17+
18+
19+
stage=11
20+
affix=
21+
train_stage=-10
22+
has_fisher=true
23+
speed_perturb=true
24+
common_egs_dir=
25+
reporting_email=
26+
remove_egs=true
27+
leftmost_questions_truncate=-1
28+
29+
. ./cmd.sh
30+
. ./path.sh
31+
. ./utils/parse_options.sh
32+
33+
34+
if ! cuda-compiled; then
35+
cat <<EOF && exit 1
36+
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
37+
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
38+
where "nvcc" is installed.
39+
EOF
40+
fi
41+
42+
suffix=
43+
if [ "$speed_perturb" == "true" ]; then
44+
suffix=_sp
45+
fi
46+
dir=exp/nnet3/tdnn_lfr1a
47+
dir=$dir${affix:+_$affix}
48+
dir=${dir}$suffix
49+
train_set=train_nodup$suffix
50+
ali_dir=exp/tri4_ali_nodup$suffix
51+
treedir=exp/nnet3/tdnn_lfr1b_tree$suffix
52+
lang=data/lang_lfr1b
53+
54+
local/nnet3/run_ivector_common.sh --stage $stage \
55+
--speed-perturb $speed_perturb || exit 1;
56+
57+
58+
if [ $stage -le 9 ]; then
59+
# Create a version of the lang/ directory that has one state per phone in the
60+
# topo file. [note, it really has two states.. the first one is only repeated
61+
# once, the second one has zero or more repeats.]
62+
rm -rf $lang
63+
cp -r data/lang $lang
64+
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
65+
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
66+
# Use our special topology... note that later on may have to tune this
67+
# topology.
68+
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
69+
fi
70+
71+
if [ $stage -le 10 ]; then
72+
# Build a tree using our new topology. This is the critically different
73+
# step compared with other recipes.
74+
steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \
75+
--leftmost-questions-truncate $leftmost_questions_truncate \
76+
--cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir
77+
fi
78+
79+
if [ $stage -le 11 ]; then
80+
echo "$0: creating neural net configs using the xconfig parser";
81+
82+
num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
83+
84+
mkdir -p $dir/configs
85+
cat <<EOF > $dir/configs/network.xconfig
86+
input dim=100 name=ivector
87+
input dim=40 name=input
88+
89+
# please note that it is important to have input layer with the name=input
90+
# as the layer immediately preceding the fixed-affine-layer to enable
91+
# the use of short notation for the descriptor
92+
fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
93+
94+
# the first splicing is moved before the lda layer, so no splicing here
95+
relu-renorm-layer name=tdnn1 dim=1024
96+
relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024
97+
relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024
98+
relu-renorm-layer name=tdnn4 input=Append(-7,2) dim=1024
99+
relu-renorm-layer name=tdnn5 dim=1024
100+
101+
output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec
102+
EOF
103+
104+
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
105+
fi
106+
107+
108+
if [ $stage -le 12 ]; then
109+
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
110+
utils/create_split_dir.pl \
111+
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
112+
fi
113+
114+
steps/nnet3/train_dnn.py --stage=$train_stage \
115+
--cmd="$decode_cmd" \
116+
--feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \
117+
--feat.cmvn-opts="--norm-means=false --norm-vars=false" \
118+
--trainer.num-epochs 2 \
119+
--trainer.optimization.num-jobs-initial 3 \
120+
--trainer.optimization.num-jobs-final 16 \
121+
--trainer.optimization.initial-effective-lrate 0.0017 \
122+
--trainer.optimization.final-effective-lrate 0.00017 \
123+
--egs.dir "$common_egs_dir" \
124+
--cleanup.remove-egs $remove_egs \
125+
--cleanup.preserve-model-interval 100 \
126+
--use-gpu true \
127+
--feat-dir=data/${train_set}_hires \
128+
--ali-dir $treedir \
129+
--lang data/lang \
130+
--reporting.email="$reporting_email" \
131+
--dir=$dir || exit 1;
132+
133+
fi
134+
135+
echo 3 >$dir/frame_subsampling_factor
136+
graph_dir=$dir/graph_sw1_tg
137+
if [ $stage -le 13 ]; then
138+
# Note: it might appear that this $lang directory is mismatched, and it is as
139+
# far as the 'topo' is concerned, but this script doesn't read the 'topo' from
140+
# the lang directory.
141+
utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir
142+
fi
143+
144+
if [ $stage -le 14 ]; then
145+
for decode_set in train_dev eval2000; do
146+
(
147+
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
148+
steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \
149+
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
150+
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1;
151+
if $has_fisher; then
152+
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
153+
data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
154+
$dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1;
155+
fi
156+
) &
157+
done
158+
fi
159+
wait;
160+
exit 0;
161+

0 commit comments

Comments
 (0)