feelingstack
diff --git a/‎egs/swbd/s5c/local/nnet3/run_ivector_common.sh‎
Lines changed: 3 additions & 3 deletions b/‎egs/swbd/s5c/local/nnet3/run_ivector_common.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎egs/swbd/s5c/local/online/run_nnet2_common.sh‎
Lines changed: 6 additions & 6 deletions b/‎egs/swbd/s5c/local/online/run_nnet2_common.sh‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎egs/swbd/s5c/local/score_basic.sh‎
Lines changed: 2 additions & 2 deletions b/‎egs/swbd/s5c/local/score_basic.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎egs/swbd/s5c/run.sh‎
Lines changed: 18 additions & 18 deletions b/‎egs/swbd/s5c/run.sh‎
Lines changed: 18 additions & 18 deletions
diff --git a/‎egs/wsj/s5/local/nnet3/run_tdnn_baseline.sh‎
Lines changed: 0 additions & 1 deletion b/‎egs/wsj/s5/local/nnet3/run_tdnn_baseline.sh‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎egs/wsj/s5/steps/align_fmllr_lats.sh‎
Lines changed: 160 additions & 0 deletions b/‎egs/wsj/s5/steps/align_fmllr_lats.sh‎
Lines changed: 160 additions & 0 deletions
@@ -24,20 +24,20 @@ if [ "$speed_perturb" == "true" ]; then
       utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
       utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
       rm -r data/temp1 data/temp2
-      
+
       mfccdir=mfcc_perturbed
       steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
         data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
       steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
       utils/fix_data_dir.sh data/${datadir}_tmp
-      
+
       utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
       utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
       utils/fix_data_dir.sh data/${datadir}_sp
       rm -r data/temp0 data/${datadir}_tmp
     done
   fi
-  
+
   if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then
     #obtain the alignment of the perturbed data
     steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
 
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 . ./cmd.sh
-set -e 
+set -e
 stage=1
 train_stage=-10
 
@@ -18,7 +18,7 @@ if [ $stage -le 1 ]; then
   fi
   utils/copy_data_dir.sh data/train data/train_scaled_hires
   utils/copy_data_dir.sh data/train data/train_hires
-  
+
   data_dir=data/train_scaled_hires
   cat $data_dir/wav.scp | python -c "
 import sys, os, subprocess, re, random
@@ -34,12 +34,12 @@ for line in sys.stdin.readlines():
       --cmd "$train_cmd" data/train_scaled_hires exp/make_hires/train_scaled $mfccdir;
   steps/compute_cmvn_stats.sh data/train_scaled_hires exp/make_hires/train_scaled $mfccdir;
 
-  # we need these features for the run_nnet2_ms.sh 
+  # we need these features for the run_nnet2_ms.sh
   steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
       --cmd "$train_cmd" data/train_hires exp/make_hires/train $mfccdir;
   steps/compute_cmvn_stats.sh data/train_hires exp/make_hires/train $mfccdir;
 
-  # Remove the small number of utterances that couldn't be extracted for some 
+  # Remove the small number of utterances that couldn't be extracted for some
   # reason (e.g. too short; no such file).
   utils/fix_data_dir.sh data/train_scaled_hires;
   utils/fix_data_dir.sh data/train_hires;
@@ -50,7 +50,7 @@ for line in sys.stdin.readlines():
       data/eval2000_hires exp/make_hires/eval2000 $mfccdir;
   steps/compute_cmvn_stats.sh data/eval2000_hires exp/make_hires/eval2000 $mfccdir;
     utils/fix_data_dir.sh data/eval2000_hires  # remove segments with problems
-    
+
   # Use the first 4k sentences as dev set.  Note: when we trained the LM, we used
   # the 1st 10k sentences as dev set, so the 1st 4k won't have been used in the
   # LM training data.   However, they will be in the lexicon, plus speakers
@@ -84,7 +84,7 @@ if [ $stage -le 2 ]; then
   # We need to build a small system just because we need the LDA+MLLT transform
   # to train the diag-UBM on top of.  We use --num-iters 13 because after we get
   # the transform (12th iter is the last), any further training is pointless.
-  # this decision is based on fisher_english 
+  # this decision is based on fisher_english
   steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
     --splice-opts "--left-context=3 --right-context=3" \
     5500 90000 data/train_scaled_hires_100k_nodup \
 
@@ -28,7 +28,7 @@ dir=$3
 
 model=$dir/../final.mdl # assume model one level up from decoding dir.
 
-hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl 
+hubscr=$KALDI_ROOT/tools/sctk/bin/hubscr.pl
 [ ! -f $hubscr ] && echo "Cannot find scoring program at $hubscr" && exit 1;
 hubdir=`dirname $hubscr`
 
@@ -42,7 +42,7 @@ mkdir -p $dir/scoring/log
 
 
 function filter_text {
-  perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; } 
+  perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; }
    while(<STDIN>) { @A  = split(" ", $_); $id = shift @A; print "$id ";
      foreach $a (@A) { if (!defined $bad{$a}) { print "$a "; }} print "\n"; }' \
    '[NOISE]' '[LAUGHTER]' '[VOCALIZED-NOISE]' '<UNK>' '%HESITATION'
 
@@ -7,7 +7,7 @@
 # 1. added more training data for early stages
 # 2. removed SAT system (and later stages) on the 100k utterance training data
 # 3. reduced number of LM rescoring, only sw1_tg and sw1_fsh_fg remain
-# 4. mapped swbd transcription to fisher style, instead of the other way around 
+# 4. mapped swbd transcription to fisher style, instead of the other way around
 
 set -e # exit on error
 has_fisher=true
@@ -22,7 +22,7 @@ local/swbd1_prepare_dict.sh
 # which specifies the directory to Switchboard documentations. Specifically, if
 # this argument is given, the script will look for the conv.tab file and correct
 # speaker IDs to the actual speaker personal identification numbers released in
-# the documentations. The documentations can be found here: 
+# the documentations. The documentations can be found here:
 # https://catalog.ldc.upenn.edu/docs/LDC97S62/
 # Note: if you are using this link, make sure you rename conv_tab.csv to conv.tab
 # after downloading.
@@ -37,7 +37,7 @@ utils/prepare_lang.sh data/local/dict_nosp \
   "<unk>"  data/local/lang_nosp data/lang_nosp
 
 # Now train the language models. We are using SRILM and interpolating with an
-# LM trained on the Fisher transcripts (part 2 disk is currently missing; so 
+# LM trained on the Fisher transcripts (part 2 disk is currently missing; so
 # only part 1 transcripts ~700hr are used)
 
 # If you have the Fisher data, you can set this "fisher_dir" variable.
@@ -79,7 +79,7 @@ mfccdir=mfcc
 for x in train eval2000; do
   steps/make_mfcc.sh --nj 50 --cmd "$train_cmd" \
     data/$x exp/make_mfcc/$x $mfccdir
-  steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir 
+  steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
   utils/fix_data_dir.sh data/$x
 done
 
@@ -91,10 +91,10 @@ utils/subset_data_dir.sh --first data/train 4000 data/train_dev # 5hr 6min
 n=$[`cat data/train/segments | wc -l` - 4000]
 utils/subset_data_dir.sh --last data/train $n data/train_nodev
 
-# Now-- there are 260k utterances (313hr 23min), and we want to start the 
-# monophone training on relatively short utterances (easier to align), but not 
+# Now-- there are 260k utterances (313hr 23min), and we want to start the
+# monophone training on relatively short utterances (easier to align), but not
 # only the shortest ones (mostly uh-huh).  So take the 100k shortest ones;
-# remove most of the repeated utterances (these are the uh-huh type ones), and 
+# remove most of the repeated utterances (these are the uh-huh type ones), and
 # then take 10k random utterances from those (about 4hr 40mins)
 utils/subset_data_dir.sh --shortest data/train_nodev 100000 data/train_100kshort
 utils/subset_data_dir.sh data/train_100kshort 30000 data/train_30kshort
@@ -108,13 +108,13 @@ local/remove_dup_utts.sh 200 data/train_100k data/train_100k_nodup  # 110hr
 local/remove_dup_utts.sh 300 data/train_nodev data/train_nodup  # 286hr
 ## Starting basic training on MFCC features
 steps/train_mono.sh --nj 30 --cmd "$train_cmd" \
-  data/train_30kshort data/lang_nosp exp/mono 
+  data/train_30kshort data/lang_nosp exp/mono
 
 steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-  data/train_100k_nodup data/lang_nosp exp/mono exp/mono_ali 
+  data/train_100k_nodup data/lang_nosp exp/mono exp/mono_ali
 
 steps/train_deltas.sh --cmd "$train_cmd" \
-  3200 30000 data/train_100k_nodup data/lang_nosp exp/mono_ali exp/tri1 
+  3200 30000 data/train_100k_nodup data/lang_nosp exp/mono_ali exp/tri1
 
 (
   graph_dir=exp/tri1/graph_nosp_sw1_tg
@@ -125,7 +125,7 @@ steps/train_deltas.sh --cmd "$train_cmd" \
 ) &
 
 steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-  data/train_100k_nodup data/lang_nosp exp/tri1 exp/tri1_ali 
+  data/train_100k_nodup data/lang_nosp exp/tri1 exp/tri1_ali
 
 steps/train_deltas.sh --cmd "$train_cmd" \
   4000 70000 data/train_100k_nodup data/lang_nosp exp/tri1_ali exp/tri2
@@ -149,11 +149,11 @@ steps/align_si.sh --nj 30 --cmd "$train_cmd" \
 # From now, we start using all of the data (except some duplicates of common
 # utterances, which don't really contribute much).
 steps/align_si.sh --nj 30 --cmd "$train_cmd" \
-  data/train_nodup data/lang_nosp exp/tri2 exp/tri2_ali_nodup 
+  data/train_nodup data/lang_nosp exp/tri2 exp/tri2_ali_nodup
 
 # Do another iteration of LDA+MLLT training, on all the data.
 steps/train_lda_mllt.sh --cmd "$train_cmd" \
-  6000 140000 data/train_nodup data/lang_nosp exp/tri2_ali_nodup exp/tri3 
+  6000 140000 data/train_nodup data/lang_nosp exp/tri2_ali_nodup exp/tri3
 
 (
   graph_dir=exp/tri3/graph_nosp_sw1_tg
@@ -190,7 +190,7 @@ fi
 
 # Train tri4, which is LDA+MLLT+SAT, on all the (nodup) data.
 steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
-  data/train_nodup data/lang exp/tri3 exp/tri3_ali_nodup 
+  data/train_nodup data/lang exp/tri3 exp/tri3_ali_nodup
 
 
 steps/train_sat.sh  --cmd "$train_cmd" \
@@ -215,21 +215,21 @@ if $has_fisher; then
     exp/tri4/decode_eval2000_sw1_{tg,fsh_fg}
 fi
 
-# MMI training starting from the LDA+MLLT+SAT systems on all the (nodup) data. 
+# MMI training starting from the LDA+MLLT+SAT systems on all the (nodup) data.
 steps/align_fmllr.sh --nj 50 --cmd "$train_cmd" \
   data/train_nodup data/lang exp/tri4 exp/tri4_ali_nodup
 
 steps/make_denlats.sh --nj 50 --cmd "$decode_cmd" \
   --config conf/decode.config --transform-dir exp/tri4_ali_nodup \
-  data/train_nodup data/lang exp/tri4 exp/tri4_denlats_nodup 
+  data/train_nodup data/lang exp/tri4 exp/tri4_denlats_nodup
 
 # 4 iterations of MMI seems to work well overall. The number of iterations is
 # used as an explicit argument even though train_mmi.sh will use 4 iterations by
 # default.
 num_mmi_iters=4
 steps/train_mmi.sh --cmd "$decode_cmd" \
   --boost 0.1 --num-iters $num_mmi_iters \
-  data/train_nodup data/lang exp/tri4_{ali,denlats}_nodup exp/tri4_mmi_b0.1 
+  data/train_nodup data/lang exp/tri4_{ali,denlats}_nodup exp/tri4_mmi_b0.1
 
 for iter in 1 2 3 4; do
   (
@@ -260,7 +260,7 @@ steps/train_diag_ubm.sh --silence-weight 0.5 --nj 50 --cmd "$train_cmd" \
 steps/train_mmi_fmmi.sh --learning-rate 0.005 \
   --boost 0.1 --cmd "$train_cmd" \
   data/train_nodup data/lang exp/tri4_ali_nodup exp/tri4_dubm \
-  exp/tri4_denlats_nodup exp/tri4_fmmi_b0.1  
+  exp/tri4_denlats_nodup exp/tri4_fmmi_b0.1
 
 for iter in 4 5 6 7 8; do
   (
 
@@ -35,7 +35,6 @@ if [ $stage -le 8 ]; then
      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
   fi
 
-
   steps/nnet3/train_tdnn.sh --stage $train_stage \
     --num-epochs 8 --num-jobs-initial 2 --num-jobs-final 14 \
     --splice-indexes "-1,0,1  -2,1  -4,2 0" \
 
@@ -0,0 +1,160 @@
+#!/bin/bash
+#
+# Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# Version of align_fmllr.sh that generates lattices (lat.*.gz) with
+# alignments of alternative pronunciations in them.  Mainly intended
+# as a precursor to CTC training for now.
+
+# Begin configuration section.  
+stage=0
+nj=4
+cmd=run.pl
+# Begin configuration.
+scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
+acoustic_scale=0.1
+beam=10
+retry_beam=40
+final_beam=20  # For the lattice-generation phase there is no retry-beam.  This
+               # is a limitation of gmm-latgen-faster.  We just use an
+               # intermediate beam.  We'll lose a little data and it will be
+               # slightly slower.  (however, the min-active of 200 that
+               # gmm-latgen-faster defaults to may help.)
+boost_silence=1.0 # factor by which to boost silence during alignment.
+fmllr_update_type=full
+# End configuration options.
+
+echo "$0 $@"  # Print the command line for logging
+
+[ -f path.sh ] && . ./path.sh # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+   echo "usage: steps/align_fmllr_lats.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
+   echo "e.g.:  steps/align_fmllr_lats.sh data/train data/lang exp/tri1 exp/tri1_lats"
+   echo "main options (for others, see top of script file)"
+   echo "  --config <config-file>                           # config containing options"
+   echo "  --nj <nj>                                        # number of parallel jobs"
+   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+   echo "  --fmllr-update-type (full|diag|offset|none)      # default full."
+   exit 1;
+fi
+
+data=$1
+lang=$2
+srcdir=$3
+dir=$4
+
+oov=`cat $lang/oov.int` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+sdata=$data/split$nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
+
+cp $srcdir/{tree,final.mdl} $dir || exit 1;
+cp $srcdir/final.alimdl $dir 2>/dev/null
+cp $srcdir/final.occs $dir;
+splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
+cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
+cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
+cp $srcdir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option.
+delta_opts=`cat $srcdir/delta_opts 2>/dev/null`
+cp $srcdir/delta_opts $dir 2>/dev/null
+
+if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  delta) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas $delta_opts ark:- ark:- |";;
+  lda) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+    cp $srcdir/final.mat $dir
+    cp $srcdir/full.mat $dir 2>/dev/null
+   ;;
+  *) echo "Invalid feature type $feat_type" && exit 1;
+esac
+
+## Set up model and alignment model.
+mdl=$srcdir/final.mdl
+if [ -f $srcdir/final.alimdl ]; then
+  alimdl=$srcdir/final.alimdl
+else
+  alimdl=$srcdir/final.mdl
+fi
+[ ! -f $mdl ] && echo "$0: no such model $mdl" && exit 1;
+alimdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $alimdl - |"
+mdl_cmd="gmm-boost-silence --boost=$boost_silence `cat $lang/phones/optional_silence.csl` $mdl - |"
+
+
+## because gmm-latgen-faster doesn't support adding the transition-probs to the
+## graph itself, we need to bake them into the compiled graphs.  This means we can't reuse previously compiled graphs,
+## because the other scripts write them without transition probs.
+if [ $stage -le 0 ]; then
+  echo "$0: compiling training graphs"
+  tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";   
+  $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log  \
+    compile-train-graphs $scale_opts $dir/tree $dir/final.mdl  $lang/L.fst "$tra" \
+    "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
+fi
+
+
+if [ $stage -le 1 ]; then
+  # Note: we need to set --transition-scale=0.0 --self-loop-scale=0.0 because,
+  # as explained above, we compiled the transition probs into the training
+  # graphs.
+  echo "$0: aligning data in $data using $alimdl and speaker-independent features."
+  $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \
+    gmm-align-compiled --transition-scale=0.0 --self-loop-scale=0.0 --acoustic-scale=$acoustic_scale \
+        --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \
+    "ark:gunzip -c $dir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  echo "$0: computing fMLLR transforms"
+  if [ "$alimdl" != "$mdl" ]; then
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-post-to-gpost $alimdl "$sifeats" ark:- ark:- \| \
+      gmm-est-fmllr-gpost --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  else
+    $cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
+      ali-to-post "ark:gunzip -c $dir/pre_ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alimdl ark:- ark:- \| \
+      gmm-est-fmllr --fmllr-update-type=$fmllr_update_type \
+      --spk2utt=ark:$sdata/JOB/spk2utt $mdl "$sifeats" \
+      ark,s,cs:- ark:$dir/trans.JOB || exit 1;
+  fi
+fi
+
+feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.JOB ark:- ark:- |"
+
+if [ $stage -le 3 ]; then
+  # Warning: gmm-latgen-faster doesn't support a retry-beam so you may get more
+  # alignment errors (however, it does have a default min-active=200 so this
+  # will tend to reduce alignment errors).
+  # --allow_partial=false makes sure we reach the end of the decoding graph.  
+  # --word-determinize=false makes sure we retain the alternative pronunciations of
+  #   words (including alternatives regarding optional silences).
+  #  --lattice-beam=$beam keeps all the alternatives that were within the beam,
+  #    it means we do no pruning of the lattice (lattices from a training transcription
+  #    will be small anyway).
+  echo "$0: generating lattices containing alternate pronunciations."
+  $cmd JOB=1:$nj $dir/log/generate_lattices.JOB.log \
+    gmm-latgen-faster --acoustic-scale=$acoustic_scale --beam=$final_beam \
+        --lattice-beam=$final_beam --allow-partial=false --word-determinize=false \
+      "$mdl_cmd" "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
+      "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
+fi
+
+rm $dir/pre_ali.*.gz
+
+echo "$0: done generating lattices from training transcripts."
+
+utils/summarize_warnings.pl $dir/log
+
+exit 0;