feelingstack
diff --git a/‎egs/hkust/s5b/conf/decode_wide.config‎
Lines changed: 4 additions & 1 deletion b/‎egs/hkust/s5b/conf/decode_wide.config‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎egs/swbd/s5b/RESULTS‎
Lines changed: 24 additions & 0 deletions b/‎egs/swbd/s5b/RESULTS‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎egs/swbd/s5b/local/run_resegment.sh‎
Lines changed: 126 additions & 0 deletions b/‎egs/swbd/s5b/local/run_resegment.sh‎
Lines changed: 126 additions & 0 deletions
diff --git a/‎egs/swbd/s5b/local/score_sclite.sh‎
Lines changed: 3 additions & 2 deletions b/‎egs/swbd/s5b/local/score_sclite.sh‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎egs/swbd/s5b/local/swbd1_train_lms.sh‎
Lines changed: 0 additions & 1 deletion b/‎egs/swbd/s5b/local/swbd1_train_lms.sh‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎egs/swbd/s5b/run.sh‎
Lines changed: 12 additions & 7 deletions b/‎egs/swbd/s5b/run.sh‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎egs/wsj/s5/run.sh‎
Lines changed: 1 addition & 1 deletion b/‎egs/wsj/s5/run.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎egs/wsj/s5/steps/compute_cmvn_stats.sh‎
Lines changed: 17 additions & 3 deletions b/‎egs/wsj/s5/steps/compute_cmvn_stats.sh‎
Lines changed: 17 additions & 3 deletions
diff --git a/‎egs/wsj/s5/steps/decode_nnet_cpu.sh‎
Lines changed: 1 addition & 1 deletion b/‎egs/wsj/s5/steps/decode_nnet_cpu.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎egs/wsj/s5/steps/decode_si_ali.sh‎
Lines changed: 1 addition & 0 deletions b/‎egs/wsj/s5/steps/decode_si_ali.sh‎
Lines changed: 1 addition & 0 deletions
@@ -1,3 +1,6 @@
 beam=18.0 # beam for decoding.  
-lat_beam=10.0 # lattice beam for decoding
+
+lattice_beam=10.0 # lattice beam for decoding
+lat_beam=10.0     # lattice beam for decoding (## This is the variable in older kaldi scripts, and has been replaced by "lattice_beam")
+
 first_beam=10.0 # beam for 1st-pass decoding in SAT.
@@ -34,3 +34,27 @@ exp/tri4b/decode_eval2000_sw1_tg/score_14/eval2000.ctm.swbd.filt.sys:     | Sum/
 exp/tri4b/decode_eval2000_sw1_tg.si/score_12/eval2000.ctm.swbd.filt.sys:     | Sum/Avg   | 1831  21395 | 73.6   19.3    7.1    3.6   30.0   66.2 |
 
 
+# some more recent results (Sep 25 2013), from tri4b and tri4c_reseg, to
+# see the effect of resegmentation.  Note: we're only looking at the "swbd" results here,
+# the callhome results or total results are terrible because of huge insertions, because
+# it seems that only some segments of the audio files are in the stm.  I'm not sure
+# where to get the start and end points in the files, that they intended us to 
+# decode.
+%WER 22.2 | 1831 21395 | 80.3 13.8 5.9 2.5 22.2 60.1 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr/score_15/eval2000.ctm.swbd.filt.sys
+%WER 29.3 | 1831 21395 | 73.5 18.7 7.8 2.9 29.3 65.0 | exp/tri4b/decode_eval2000_sw1_fsh_tgpr.si/score_17/eval2000.ctm.swbd.filt.sys
+%WER 22.5 | 1831 21395 | 79.8 13.8 6.4 2.3 22.5 60.3 | exp/tri4b/decode_eval2000_sw1_tg/score_17/eval2000.ctm.swbd.filt.sys
+%WER 30.5 | 1831 21395 | 73.1 19.8 7.1 3.6 30.5 65.8 | exp/tri4b/decode_eval2000_sw1_tg.si/score_14/eval2000.ctm.swbd.filt.sys
+
+%WER 22.9 | 1831 21395 | 79.7 13.4 6.9 2.6 22.9 62.8 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr/score_14/eval2000.ctm.swbd.filt.sys
+%WER 29.6 | 1831 21395 | 73.8 18.2 8.1 3.4 29.6 66.8 | exp/tri4c_reseg/decode_eval2000_sw1_fsh_tgpr.si/score_13/eval2000.ctm.swbd.filt.sys
+%WER 23.5 | 1831 21395 | 79.1 13.8 7.1 2.6 23.5 63.6 | exp/tri4c_reseg/decode_eval2000_sw1_tg/score_15/eval2000.ctm.swbd.filt.sys
+%WER 30.9 | 1831 21395 | 73.1 19.0 7.9 4.0 30.9 67.6 | exp/tri4c_reseg/decode_eval2000_sw1_tg.si/score_12/eval2000.ctm.swbd.filt.sys
+# so the resegmented one is about 0.3 to 1.0 worse, but the #sub is actually down, it's due to more deletions
+# and insertions.  This is kind of what we'd expect, since the reference segmentation is a kind of "oracle".
+
+# below are some partial results (only the SI decode finished so far), of a version where I 
+# kept the segments that the segmentation regarded as noise (e.g. cough, etc.)  Strangely, it
+# has (slightly) more deletions and fewer insertions than the baseline, which is the opposite
+# of what we would expect.
+%WER 29.7 | 1831 21395 | 73.6 18.3 8.0 3.4 29.7 67.0 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_fsh_tgpr.si/score_13/eval2000_with_noise.ctm.swbd.filt.sys
+%WER 30.8 | 1831 21395 | 72.9 19.0 8.1 3.7 30.8 67.5 | exp/tri4c_reseg/decode_eval2000_with_noise_sw1_tg.si/score_13/eval2000_with_noise.ctm.swbd.filt.sys
@@ -0,0 +1,126 @@
+#!/bin/bash
+
+. cmd.sh
+
+steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+  data/train_30k_nodup data/lang exp/tri3b exp/tri3b_ali_30k_nodup || exit 1;
+
+
+steps/train_lda_mllt.sh --cmd "$train_cmd" --realign-iters "" \
+  1000 10000 data/train_30k_nodup data/lang exp/tri3b_ali_30k_nodup exp/tri4b_seg || exit 1;
+
+# Make the phone decoding-graph.
+steps/make_phone_graph.sh data/lang exp/tri3b_ali_all exp/tri4b_seg || exit 1;
+
+mkdir -p data_reseg
+
+for data in train_orig eval2000; do
+  cp -rT data/${data} data_reseg/${data}_orig; rm -r data_reseg/${data}_orig/split*
+  for f in text utt2spk spk2utt feats.scp cmvn.scp segments; do rm data_reseg/${data}_orig/$f; done
+  cat data_reseg/${data}_orig/wav.scp  | awk '{print $1, $1;}' | \
+    tee data_reseg/${data}_orig/spk2utt > data_reseg/${data}_orig/utt2spk
+  mfccdir=mfcc_reseg # don't use mfcc because of the way names are assigned within that
+                     # dir, we'll overwrite the old data.
+  mkdir -p mfcc_reseg
+  steps/make_mfcc.sh --compress true --nj 20 --cmd "$train_cmd" data_reseg/${data}_orig exp/make_mfcc/${data}_orig $mfccdir 
+  # caution: the new speakers don't correspond to the old ones, since they now have "sw0" at the start..
+  steps/compute_cmvn_stats.sh --two-channel data_reseg/${data}_orig exp/make_mfcc/${data}_orig $mfccdir 
+done
+
+
+
+steps/decode_si_ali.sh --cmd "$decode_cmd" --nj 60 --beam 7.0 --max-active 1000 \
+  exp/tri4b_seg/phone_graph data_reseg/train_orig exp/tri4b_seg/decode_train_orig2 
+
+steps/decode_si_ali.sh --cmd "$decode_cmd" --nj 10 --beam 7.0 --max-active 1000 \
+  exp/tri4b_seg/phone_graph data_reseg/eval2000_orig exp/tri4b_seg/decode_eval2000_orig
+
+
+# Here: resegment.
+# Note: it would be perfectly possible to use exp/tri3b_ali_train here instead
+# of exp/tri4b_seg/decode_train_orig.  In this case we'd be relying on the transcripts.
+# I chose not to do this for more consistency with what happens in test time.
+
+steps/resegment_data.sh --cmd "$train_cmd" data_reseg/train_orig data/lang \
+  exp/tri4b_seg/decode_train_orig data_reseg/train exp/tri4b_resegment_train
+
+steps/resegment_data.sh --cmd "$train_cmd" data_reseg/eval2000_orig data/lang \
+  exp/tri4b_seg/decode_eval2000_orig data_reseg/eval2000 exp/tri4b_resegment_eval2000
+
+# We need all the training data to be aligned (not just "train_nodup"), in order
+# to get the resegmented "text".
+steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+  data/train data/lang exp/tri3b exp/tri3b_ali_train || exit 1;
+
+# Get the file data_reseg/train/text
+steps/resegment_text.sh --cmd "$train_cmd" data/train data/lang \
+  exp/tri3b_ali_train data_reseg/train exp/tri4b_resegment_train
+
+
+for data in train eval2000; do
+  utils/fix_data_dir.sh data_reseg/${data}
+  utils/validate_data_dir.sh --no-feats --no-text data_reseg/${data}
+  mfccdir=mfcc_reseg # don't use mfcc because of the way names are assigned within that
+                     # dir, we'll overwrite the old data.
+  steps/make_mfcc.sh --compress true --nj 40 --cmd "$train_cmd" data_reseg/${data} \
+    exp/make_mfcc/${data}_reseg $mfccdir  || exit 1;
+  steps/compute_cmvn_stats.sh data_reseg/${data} exp/make_mfcc/${data}_reseg $mfccdir  || exit 1;
+  utils/fix_data_dir.sh data_reseg/${data} || exit 1;
+done
+
+
+# Note: we'll be comparing tri4b, which was trained on train_nodup, with tri4c_reseg, which
+# was trained on *all* the resegmented data.  However, it's comparable because the actual hours
+# of data is less in tri4c_reseg: 265h, versus 284 in the nodup data.
+# cat data/train_nodup/segments | awk '{nf += $4 - $3; } END{print nf /3600;}'
+# 284.433
+# cat data_reseg/train/segments | awk '{nf += $4 - $3; } END{print nf /3600;}'
+# 265.154
+
+steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
+  data_reseg/train data/lang exp/tri3b exp/tri3b_ali_reseg || exit 1;
+
+steps/train_sat.sh  --cmd "$train_cmd" \
+  11500 200000 data_reseg/train data/lang exp/tri3b_ali_reseg exp/tri4c_reseg || exit 1;
+
+
+for lm_suffix in tg fsh_tgpr; do
+  (
+    graph_dir=exp/tri4c_reseg/graph_sw1_${lm_suffix}
+    $train_cmd $graph_dir/mkgraph.log \
+      utils/mkgraph.sh data/lang_sw1_${lm_suffix} exp/tri4c_reseg $graph_dir
+    steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
+       $graph_dir data_reseg/eval2000 exp/tri4c_reseg/decode_eval2000_sw1_${lm_suffix}
+  ) &
+done
+
+
+exit 0;
+
+# Below is experimental.
+# I'm figuring out whether we should keep the segments the the 1st pass designated as noise.
+steps/resegment_data.sh --cmd "$train_cmd" \
+   --segmentation-opts "--remove-noise-only-segments false" \
+  data_reseg/eval2000_orig data/lang \
+  exp/tri4b_seg/decode_eval2000_orig data_reseg/eval2000_with_noise exp/tri4b_resegment_eval2000_with_noise
+
+for data in eval2000_with_noise; do
+  utils/fix_data_dir.sh data_reseg/${data}
+  utils/validate_data_dir.sh --no-feats --no-text data_reseg/${data}
+  mfccdir=mfcc_reseg # don't use mfcc because of the way names are assigned within that
+                     # dir, we'll overwrite the old data.
+  steps/make_mfcc.sh --compress true --nj 40 --cmd "$train_cmd" data_reseg/${data} \
+    exp/make_mfcc/${data}_reseg $mfccdir  || exit 1;
+  steps/compute_cmvn_stats.sh data_reseg/${data} exp/make_mfcc/${data}_reseg $mfccdir  || exit 1;
+  utils/fix_data_dir.sh data_reseg/${data} || exit 1;
+done
+
+for lm_suffix in tg fsh_tgpr; do
+  (
+    graph_dir=exp/tri4c_reseg/graph_sw1_${lm_suffix}
+    $train_cmd $graph_dir/mkgraph.log \
+      utils/mkgraph.sh data/lang_sw1_${lm_suffix} exp/tri4c_reseg $graph_dir
+    steps/decode_fmllr.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
+       $graph_dir data_reseg/eval2000_with_noise exp/tri4c_reseg/decode_eval2000_with_noise_sw1_${lm_suffix}
+  ) &
+done
@@ -83,7 +83,7 @@ if [ $stage -le 2 ]; then
 fi
 
 # For eval2000 score the subsets
-if [ "$name" == "eval2000" ]; then
+case "$name" in eval2000* )
   # Score only the, swbd part...
   if [ $stage -le 3 ]; then  
     $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.swbd.LMWT.log \
@@ -98,6 +98,7 @@ if [ "$name" == "eval2000" ]; then
       grep -v '^sw_' $dir/score_LMWT/${name}.ctm '>' $dir/score_LMWT/${name}.ctm.callhm '&&' \
       $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm.callhm $dir/score_LMWT/${name}.ctm.callhm || exit 1;
   fi
-fi
+ ;;
+esac
 
 exit 0
@@ -122,4 +122,3 @@ fi
 ## The following takes about 11 minutes to download on Eddie: 
 # wget --no-check-certificate http://ssli.ee.washington.edu/data/191M_conversational_web-filt+periods.gz
 
-
@@ -5,7 +5,7 @@
 # This is supposed to be the "new" version of the switchboard recipe,
 # after the s5/ one became a bit messy.  It is not 100% checked-through yet.
 
-exit 1;
+#exit 1;
 # This is a shell script, but it's recommended that you run the commands one by
 # one by copying and pasting into the shell.
 # Caution: some of the graph creation steps use quite a bit of memory, so you
@@ -176,6 +176,7 @@ done
 steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
   data/train_100k_nodup data/lang exp/tri3b exp/tri3b_ali_100k_nodup || exit 1;
 
+
 steps/train_sat.sh  --cmd "$train_cmd" \
   5500 90000 data/train_100k_nodup data/lang exp/tri3b_ali_100k_nodup \
    exp/tri4a || exit 1;
@@ -190,14 +191,18 @@ for lm_suffix in tg fsh_tgpr; do
   ) &
 done
 
+
+#local/run_resegment.sh
+
 # Now train a LDA+MLLT+SAT model on the entire training data (train_nodup; 
 # 286 hours)
 # Train tri4b, which is LDA+MLLT+SAT, on train_nodup data.
 steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
-  data/train_nodup data/lang exp/tri3b exp/tri3b_ali_all || exit 1;
+  data/train_nodup data/lang exp/tri3b exp/tri3b_ali_nodup || exit 1;
+
 
 steps/train_sat.sh  --cmd "$train_cmd" \
-  11500 200000 data/train_nodup data/lang exp/tri3b_ali_all exp/tri4b || exit 1;
+  11500 200000 data/train_nodup data/lang exp/tri3b_ali_nodup exp/tri4b || exit 1;
 
 for lm_suffix in tg fsh_tgpr; do
   (
@@ -219,15 +224,15 @@ steps/align_fmllr.sh --nj 50 --cmd "$train_cmd" \
   data/train_100k_nodup data/lang exp/tri4a exp/tri4a_ali_100k_nodup || exit 1
 
 steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
-  data/train_nodup data/lang exp/tri4b exp/tri4b_ali_all || exit 1
+  data/train_nodup data/lang exp/tri4b exp/tri4b_ali_nodup || exit 1
 
 steps/make_denlats.sh --nj 50 --cmd "$decode_cmd" --config conf/decode.config \
   --transform-dir exp/tri4a_ali_100k_nodup \
   data/train_100k_nodup data/lang exp/tri4a exp/tri4a_denlats_100k_nodup \
   || exit 1;
 
 steps/make_denlats.sh --nj 100 --cmd "$decode_cmd" --config conf/decode.config \
-  --transform-dir exp/tri4b_ali_all \
+  --transform-dir exp/tri4b_ali_nodup \
   data/train_nodup data/lang exp/tri4b exp/tri4b_denlats_all || exit 1;
 
 # 4 iterations of MMI seems to work well overall. The number of iterations is
@@ -274,14 +279,14 @@ steps/train_diag_ubm.sh --silence-weight 0.5 --nj 50 --cmd "$train_cmd" \
   700 data/train_100k_nodup data/lang exp/tri4a_ali_100k_nodup exp/tri4a_dubm
 
 steps/train_diag_ubm.sh --silence-weight 0.5 --nj 100 --cmd "$train_cmd" \
-  700 data/train_nodup data/lang exp/tri4b_ali_all exp/tri4b_dubm
+  700 data/train_nodup data/lang exp/tri4b_ali_nodup exp/tri4b_dubm
 
 steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
   data/train_100k_nodup data/lang exp/tri4a_ali_100k_nodup exp/tri4a_dubm \
   exp/tri4a_denlats_100k_nodup exp/tri4a_fmmi_b0.1 || exit 1;
 
 steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
-  data/train_nodup data/lang exp/tri4b_ali_all exp/tri4b_dubm \
+  data/train_nodup data/lang exp/tri4b_ali_nodup exp/tri4b_dubm \
   exp/tri4b_denlats_all exp/tri4b_fmmi_b0.1 || exit 1;
 
 for iter in 4 5 6 7 8; do
 
@@ -322,7 +322,7 @@ local/run_mmi_tri4b.sh
 local/run_sgmm2.sh
 
 # You probably wany to run the hybrid recipe as it is complementary:
-local/run_hybrid.sh
+local/run_dnn.sh
 
 
 # Getting results [see RESULTS file]
 
@@ -21,14 +21,24 @@
 echo "$0 $@"  # Print the command line for logging
 
 fake=false
+two_channel=false
+
 if [ $1 == "--fake" ]; then
   fake=true
   shift
 fi
+if [ $1 == "--two-channel" ]; then
+  two_channel=true
+  shift
+fi
 
 if [ $# != 3 ]; then
-   echo "usage: compute_cmvn_stats.sh [--fake] <data-dir> <log-dir> <path-to-cmvn-dir>";
-   echo "(note: --fake gives you fake cmvn stats that do no normalization.)"
+   echo "usage: compute_cmvn_stats.sh [options] <data-dir> <log-dir> <path-to-cmvn-dir>";
+   echo "Options:"
+   echo " --fake          gives you fake cmvn stats that do no normalization."
+   echo " --two-channel   is for two-channel telephone data, there must be no segments "
+   echo "                 file and reco2file_and_channel must be present.  It will take"
+   echo "                 only frames that are louder than the other channel."
    exit 1;
 fi
 
@@ -63,7 +73,11 @@ if $fake; then
                                                         for (n=0; n < dim; n++) { printf("1 "); } print "0 ]";}' | \
     copy-matrix ark:- ark,scp:$cmvndir/cmvn_$name.ark,$cmvndir/cmvn_$name.scp && \
      echo "Error creating fake CMVN stats" && exit 1;
-else  
+elif $two_channel; then
+  ! compute-cmvn-stats-two-channel $data/reco2file_and_channel scp:$data/feats.scp \
+       ark,scp:$cmvndir/cmvn_$name.ark,$cmvndir/cmvn_$name.scp \
+    2> $logdir/cmvn_$name.log && echo "Error computing CMVN stats (using two-channel method)" && exit 1;
+else
   ! compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark,scp:$cmvndir/cmvn_$name.ark,$cmvndir/cmvn_$name.scp \
     2> $logdir/cmvn_$name.log && echo "Error computing CMVN stats" && exit 1;
 fi
 
@@ -16,7 +16,7 @@ cmd=run.pl
 beam=15.0
 max_active=7000
 
-#WARNING: This option is renamed lat_beam (it was renamed to follow the naming 
+#WARNING: This option is renamed from lat_beam (it was renamed to follow the naming 
 #         in the other scripts
 lattice_beam=8.0 # Beam we use in lattice generation.
 iter=final
 
@@ -0,0 +1 @@
+decode.sh
Original file line number	Diff line number	Diff line change
`@@ -122,4 +122,3 @@ fi`
`122`	`122`	`## The following takes about 11 minutes to download on Eddie:`
`123`	`123`	`# wget --no-check-certificate http://ssli.ee.washington.edu/data/191M_conversational_web-filt+periods.gz`
`124`	`124`
`125`		`-`