feelingstack
diff --git a/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh‎
Lines changed: 1 addition & 1 deletion b/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh‎
Lines changed: 1 addition & 0 deletions b/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh‎
Lines changed: 82 additions & 3 deletions b/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh‎
Lines changed: 82 additions & 3 deletions
diff --git a/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh‎
Lines changed: 2 additions & 1 deletion b/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh‎
Lines changed: 2 additions & 0 deletions b/‎egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎egs/wsj/s5/steps/online/nnet3/decode.sh‎
Lines changed: 8 additions & 7 deletions b/‎egs/wsj/s5/steps/online/nnet3/decode.sh‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎src/itf/decodable-itf.h‎
Lines changed: 1 addition & 1 deletion b/‎src/itf/decodable-itf.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/nnet2/online-nnet2-decodable.cc‎
Lines changed: 6 additions & 6 deletions b/‎src/nnet2/online-nnet2-decodable.cc‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/nnet3/Makefile‎
Lines changed: 2 additions & 2 deletions b/‎src/nnet3/Makefile‎
Lines changed: 2 additions & 2 deletions
@@ -6,7 +6,7 @@
 # and adding
 #    --egs.chunk-left-context-initial=0
 # and  --egs.chunk-right-context-final=0
-
+# See 1e for summary of results.
 
 # steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi
 # exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091)
 
@@ -2,6 +2,7 @@
 
 # 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers.  note: it
 # uses egs from 1b, remember to remove that before I commit.
+# See 1e for summary of results.
 
 # steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi
 # exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091)
 
@@ -1,6 +1,50 @@
 #!/bin/bash
 
-# 1e is as 1b, but reducing decay-time from 40 to 20.
+# 1e is as 1d, but reducing decay-time from 40 to 20.
+
+# The following table shows comparison of various decay-time values,
+# namely: [b:unset=infinity, f:80, d:40, e:20, g:10, g2:5].
+# note: the g2 script is not checked in.
+# There is no clear trend on the non-looped decoding, but looped decoding seems
+# to improve as decay-time is decreased.  We end up recommending decay-time=20,
+# as by then we get all the improvement on looped decoding, and it's the
+# most conservative setting with which we can get this improvement (although
+# actually it seems fine to use an even smaller decay-time).
+
+# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{b,f,d,e,g,g2}_sp_bi
+
+# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1b_sp_bi exp/chain_cleaned/tdnn_lstm1f_sp_bi exp/chain_cleaned/tdnn_lstm1d_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1g_sp_bi exp/chain_cleaned/tdnn_lstm1g2_sp_bi
+# System                tdnn_lstm1b_sp_bi tdnn_lstm1f_sp_bi tdnn_lstm1d_sp_bi tdnn_lstm1e_sp_bi tdnn_lstm1g_sp_bi tdnn_lstm1g2_sp_bi
+# WER on dev(orig)            9.1       8.8       9.0       9.0       9.0       9.4
+#         [looped:]           9.4       9.3       9.2       9.0       8.9       9.4
+# WER on dev(rescored)        8.4       8.2       8.4       8.4       8.4       8.7
+#         [looped:]           8.8       8.7       8.6       8.4       8.3       8.7
+# WER on test(orig)           8.9       9.0       8.9       8.8       8.8       9.3
+#         [looped:]           9.3       9.3       9.0       8.8       8.8       9.2
+# WER on test(rescored)       8.4       8.6       8.3       8.4       8.4       8.9
+#         [looped:]           8.7       8.9       8.5       8.3       8.4       8.8
+# Final train prob        -0.0621   -0.0631   -0.0595   -0.0648   -0.0689   -0.0739
+# Final valid prob        -0.0799   -0.0802   -0.0823   -0.0827   -0.0890   -0.0963
+# Final train prob (xent)   -0.8300   -0.8295   -0.8129   -0.8372   -0.8610   -0.8792
+# Final valid prob (xent)   -0.9500   -0.9662   -0.9589   -0.9497   -0.9982   -1.0256
+
+
+# the following table compares the 'online' decoding with regular and looped
+# decoding.  online decoding is a little better than either (possibly due to
+# using slightly later iVectors).
+#
+# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi{,_online} 2>/dev/null
+# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_online
+# System                tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_online
+# WER on dev(orig)            9.0       8.8
+#         [looped:]           9.0
+# WER on dev(rescored)        8.4       8.4
+#         [looped:]           8.4
+# WER on test(orig)           8.8       8.8
+#         [looped:]           8.8
+# WER on test(rescored)       8.4       8.4
+#         [looped:]           8.3
+
 
 # 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers.  note: it
 # uses egs from 1b, remember to remove that before I commit.
@@ -77,6 +121,8 @@ tdnn_lstm_affix=1e  #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we
 common_egs_dir=    # you can set this to use previously dumped egs.
 remove_egs=true
 
+test_online_decoding=false  # if true, it will run the last decoding stage.
+
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
 
@@ -289,8 +335,10 @@ if [ $stage -le 21 ]; then
   # 'looped' decoding.  we didn't write a -parallel version of this program yet,
   # so it will take a bit longer as the --num-threads option is not supported.
   # we just hardcode the --frames-per-chunk option as it doesn't have to
-  # match any value used in training, and it won't affect the results (unlike
-  # regular decoding).
+  # match any value used in training, and it won't affect the results very much (unlike
+  # regular decoding)... [it will affect them slightly due to differences in the
+  # iVector extraction; probably smaller will be worse as it sees less of the future,
+  # but in a real scenario, long chunks will introduce excessive latency].
   rm $dir/.error 2>/dev/null || true
   for dset in dev test; do
       (
@@ -313,4 +361,35 @@ if [ $stage -le 21 ]; then
 fi
 
 
+if $test_online_decoding && [ $stage -le 22 ]; then
+  # note: if the features change (e.g. you add pitch features), you will have to
+  # change the options of the following command line.
+  steps/online/nnet3/prepare_online_decoding.sh \
+       --mfcc-config conf/mfcc_hires.conf \
+       data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online
+
+  rm $dir/.error 2>/dev/null || true
+  for dset in dev test; do
+    (
+      # note: we just give it "$dset" as it only uses the wav.scp, the
+      # feature type does not matter.
+
+      steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
+          --extra-left-context-initial $extra_left_context_initial \
+          --acwt 1.0 --post-decode-acwt 10.0 \
+          --scoring-opts "--min-lmwt 5 " \
+         $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1;
+      steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \
+        data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1
+    ) || touch $dir/.error &
+  done
+  wait
+  if [ -f $dir/.error ]; then
+    echo "$0: something went wrong in decoding"
+    exit 1
+  fi
+fi
+
+
+
 exit 0
@@ -1,6 +1,7 @@
 #!/bin/bash
 
-# 1f is as 1b, but increasing decay-time from 40 to 80.  [see also 1e, at 20.]
+# 1f is as 1d, but increasing decay-time from 40 to 80.  [see also 1e, at 20.]
+# see 1e for summary of results.
 
 # 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers.  note: it
 # uses egs from 1b, remember to remove that before I commit.
 
@@ -2,6 +2,8 @@
 
 #######################
 # 1g is as 1e, but reducing decay-time further from 20 to 10.
+# see 1e for summary of results.
+
 # 1e is as 1b, but reducing decay-time from 40 to 20.
 
 # 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers.  note: it
 
@@ -8,6 +8,8 @@
 stage=0
 nj=4
 cmd=run.pl
+frames_per_chunk=20
+extra_left_context_initial=0
 min_active=200
 max_active=7000
 beam=15.0
@@ -114,11 +116,6 @@ else
 fi
 
 
-decoder=online2-wav-nnet3-latgen-faster
-parallel_opts=
-opts="--online=$online"
-
-
 if [ "$post_decode_acwt" == 1.0 ]; then
   lat_wspecifier="ark:|gzip -c >$dir/lat.JOB.gz"
 else
@@ -132,8 +129,12 @@ if [ -f $srcdir/frame_subsampling_factor ]; then
 fi
 
 if [ $stage -le 0 ]; then
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
-    $decoder $opts $silence_weighting_opts --do-endpointing=$do_endpointing $frame_subsampling_opt \
+  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
+    online2-wav-nnet3-latgen-faster $silence_weighting_opts --do-endpointing=$do_endpointing \
+    --frames-per-chunk=$frames_per_chunk \
+    --extra-left-context-initial=$extra_left_context_initial \
+    --online=$online \
+       $frame_subsampling_opt \
      --config=$online_config \
      --min-active=$min_active --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
      --acoustic-scale=$acwt --word-symbol-table=$graphdir/words.txt \
 
@@ -112,7 +112,7 @@ class DecodableInterface {
 
   /// Returns the number of states in the acoustic model
   /// (they will be indexed one-based, i.e. from 1 to NumIndices();
-  /// this is for compatibility with OpenFst.
+  /// this is for compatibility with OpenFst).
   virtual int32 NumIndices() const = 0;
 
   virtual ~DecodableInterface() {}
 
@@ -80,7 +80,7 @@ int32 DecodableNnet2Online::NumFramesReady() const {
 
 void DecodableNnet2Online::ComputeForFrame(int32 frame) {
   int32 features_ready = features_->NumFramesReady();
-  bool input_finished = features_->IsLastFrame(features_ready - 1);  
+  bool input_finished = features_->IsLastFrame(features_ready - 1);
   KALDI_ASSERT(frame >= 0);
   if (frame >= begin_frame_ &&
       frame < begin_frame_ + scaled_loglikes_.NumRows())
@@ -112,20 +112,20 @@ void DecodableNnet2Online::ComputeForFrame(int32 frame) {
       t_modified = features_ready - 1;
     features_->GetFrame(t_modified, &row);
   }
-  CuMatrix<BaseFloat> cu_features; 
+  CuMatrix<BaseFloat> cu_features;
   cu_features.Swap(&features);  // Copy to GPU, if we're using one.
-  
+
 
   int32 num_frames_out = input_frame_end - input_frame_begin -
       left_context_ - right_context_;
-  
+
   CuMatrix<BaseFloat> cu_posteriors(num_frames_out, num_pdfs_);
-  
+
   // The "false" below tells it not to pad the input: we've already done
   // any padding that we needed to do.
   NnetComputation(nnet_.GetNnet(), cu_features,
                   false, &cu_posteriors);
-  
+
   cu_posteriors.ApplyFloor(1.0e-20); // Avoid log of zero which leads to NaN.
   cu_posteriors.ApplyLog();
   // subtract log-prior (divide by prior)
 
@@ -28,8 +28,8 @@ OBJFILES = nnet-common.o nnet-compile.o nnet-component-itf.o \
   discriminative-supervision.o nnet-discriminative-example.o \
   nnet-discriminative-diagnostics.o \
   discriminative-training.o nnet-discriminative-training.o \
-  online-nnet3-decodable-simple.o nnet-compile-looped.o \
-  decodable-simple-looped.o
+  nnet-compile-looped.o decodable-simple-looped.o \
+  decodable-online-looped.o
 
 
 LIBNAME = kaldi-nnet3