11#! /bin/bash
22
3- # 1e is as 1b, but reducing decay-time from 40 to 20.
3+ # 1e is as 1d, but reducing decay-time from 40 to 20.
4+
5+ # The following table shows comparison of various decay-time values,
6+ # namely: [b:unset=infinity, f:80, d:40, e:20, g:10, g2:5].
7+ # note: the g2 script is not checked in.
8+ # There is no clear trend on the non-looped decoding, but looped decoding seems
9+ # to improve as decay-time is decreased. We end up recommending decay-time=20,
10+ # as by then we get all the improvement on looped decoding, and it's the
11+ # most conservative setting with which we can get this improvement (although
12+ # actually it seems fine to use an even smaller decay-time).
13+
14+ # local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{b,f,d,e,g,g2}_sp_bi
15+
16+ # local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1b_sp_bi exp/chain_cleaned/tdnn_lstm1f_sp_bi exp/chain_cleaned/tdnn_lstm1d_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1g_sp_bi exp/chain_cleaned/tdnn_lstm1g2_sp_bi
17+ # System tdnn_lstm1b_sp_bi tdnn_lstm1f_sp_bi tdnn_lstm1d_sp_bi tdnn_lstm1e_sp_bi tdnn_lstm1g_sp_bi tdnn_lstm1g2_sp_bi
18+ # WER on dev(orig) 9.1 8.8 9.0 9.0 9.0 9.4
19+ # [looped:] 9.4 9.3 9.2 9.0 8.9 9.4
20+ # WER on dev(rescored) 8.4 8.2 8.4 8.4 8.4 8.7
21+ # [looped:] 8.8 8.7 8.6 8.4 8.3 8.7
22+ # WER on test(orig) 8.9 9.0 8.9 8.8 8.8 9.3
23+ # [looped:] 9.3 9.3 9.0 8.8 8.8 9.2
24+ # WER on test(rescored) 8.4 8.6 8.3 8.4 8.4 8.9
25+ # [looped:] 8.7 8.9 8.5 8.3 8.4 8.8
26+ # Final train prob -0.0621 -0.0631 -0.0595 -0.0648 -0.0689 -0.0739
27+ # Final valid prob -0.0799 -0.0802 -0.0823 -0.0827 -0.0890 -0.0963
28+ # Final train prob (xent) -0.8300 -0.8295 -0.8129 -0.8372 -0.8610 -0.8792
29+ # Final valid prob (xent) -0.9500 -0.9662 -0.9589 -0.9497 -0.9982 -1.0256
30+
31+
32+ # the following table compares the 'online' decoding with regular and looped
33+ # decoding. online decoding is a little better than either (possibly due to
34+ # using slightly later iVectors).
35+ #
36+ # local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi{,_online} 2>/dev/null
37+ # local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_online
38+ # System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_online
39+ # WER on dev(orig) 9.0 8.8
40+ # [looped:] 9.0
41+ # WER on dev(rescored) 8.4 8.4
42+ # [looped:] 8.4
43+ # WER on test(orig) 8.8 8.8
44+ # [looped:] 8.8
45+ # WER on test(rescored) 8.4 8.4
46+ # [looped:] 8.3
47+
448
549# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it
650# uses egs from 1b, remember to remove that before I commit.
@@ -77,6 +121,8 @@ tdnn_lstm_affix=1e #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we
77121common_egs_dir= # you can set this to use previously dumped egs.
78122remove_egs=true
79123
124+ test_online_decoding=false # if true, it will run the last decoding stage.
125+
80126# End configuration section.
81127echo " $0 $@ " # Print the command line for logging
82128
@@ -289,8 +335,10 @@ if [ $stage -le 21 ]; then
289335 # 'looped' decoding. we didn't write a -parallel version of this program yet,
290336 # so it will take a bit longer as the --num-threads option is not supported.
291337 # we just hardcode the --frames-per-chunk option as it doesn't have to
292- # match any value used in training, and it won't affect the results (unlike
293- # regular decoding).
338+ # match any value used in training, and it won't affect the results very much (unlike
339+ # regular decoding)... [it will affect them slightly due to differences in the
340+ # iVector extraction; probably smaller will be worse as it sees less of the future,
341+ # but in a real scenario, long chunks will introduce excessive latency].
294342 rm $dir /.error 2> /dev/null || true
295343 for dset in dev test ; do
296344 (
@@ -313,4 +361,35 @@ if [ $stage -le 21 ]; then
313361fi
314362
315363
364+ if $test_online_decoding && [ $stage -le 22 ]; then
365+ # note: if the features change (e.g. you add pitch features), you will have to
366+ # change the options of the following command line.
367+ steps/online/nnet3/prepare_online_decoding.sh \
368+ --mfcc-config conf/mfcc_hires.conf \
369+ data/lang_chain exp/nnet3${nnet3_affix} /extractor ${dir} ${dir} _online
370+
371+ rm $dir /.error 2> /dev/null || true
372+ for dset in dev test ; do
373+ (
374+ # note: we just give it "$dset" as it only uses the wav.scp, the
375+ # feature type does not matter.
376+
377+ steps/online/nnet3/decode.sh --nj $decode_nj --cmd " $decode_cmd " \
378+ --extra-left-context-initial $extra_left_context_initial \
379+ --acwt 1.0 --post-decode-acwt 10.0 \
380+ --scoring-opts " --min-lmwt 5 " \
381+ $dir /graph data/${dset} ${dir} _online/decode_${dset} || exit 1;
382+ steps/lmrescore_const_arpa.sh --cmd " $decode_cmd " data/lang data/lang_rescore \
383+ data/${dset} _hires ${dir} _online/decode_${dset} ${dir} _online/decode_${dset} _rescore || exit 1
384+ ) || touch $dir /.error &
385+ done
386+ wait
387+ if [ -f $dir /.error ]; then
388+ echo " $0 : something went wrong in decoding"
389+ exit 1
390+ fi
391+ fi
392+
393+
394+
316395exit 0
0 commit comments