feelingstack
diff --git a/‎egs/librispeech/s5/local/online/run_nnet2.sh‎
Lines changed: 3 additions & 3 deletions b/‎egs/librispeech/s5/local/online/run_nnet2.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎egs/wsj/s5/local/online/run_nnet2.sh‎
Lines changed: 28 additions & 0 deletions b/‎egs/wsj/s5/local/online/run_nnet2.sh‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh‎
Lines changed: 207 additions & 0 deletions b/‎egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh‎
Lines changed: 207 additions & 0 deletions
diff --git a/‎egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh‎
Lines changed: 2 additions & 18 deletions b/‎egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh‎
Lines changed: 2 additions & 18 deletions
diff --git a/‎src/bin/ali-to-post.cc‎
Lines changed: 1 addition & 1 deletion b/‎src/bin/ali-to-post.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/bin/copy-matrix.cc‎
Lines changed: 16 additions & 4 deletions b/‎src/bin/copy-matrix.cc‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎src/featbin/append-feats.cc‎
Lines changed: 3 additions & 3 deletions b/‎src/featbin/append-feats.cc‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/featbin/paste-feats.cc‎
Lines changed: 2 additions & 2 deletions b/‎src/featbin/paste-feats.cc‎
Lines changed: 2 additions & 2 deletions
@@ -147,7 +147,7 @@ if [ $stage -le 13 ]; then
   done
 fi
 
-exit 0;
+#exit 0;
 ###### Comment out the "exit 0" above to run the multi-threaded decoding. #####
 
 if [ $stage -le 14 ]; then
@@ -166,8 +166,8 @@ if [ $stage -le 15 ]; then
   test=dev_clean
   steps/online/nnet2/decode.sh --threaded true --do-endpointing true \
     --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-    --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
-    ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_ep || exit 1;
+    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+    ${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1;
 fi
 
 exit 0;
@@ -134,6 +134,34 @@ if [ $stage -le 13 ]; then
   done
 fi
 
+if [ $stage -le 14 ]; then
+  # this does offline decoding, as stage 10, except we estimate the iVectors per
+  # speaker, excluding silence (based on alignments from a GMM decoding), with a
+  # different script.  This is just to demonstrate that script.
+
+  rm exp/nnet2_online/.error 2>/dev/null
+  for year in eval92 dev93; do
+    steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj 8 \
+      data/test_${year}_hires data/lang exp/nnet2_online/extractor \
+      exp/tri4b/decode_tgpr_$year exp/nnet2_online/ivectors_spk_test_${year} || touch exp/nnet2_online/.error &
+  done
+  wait
+  [ -f exp/nnet2_online/.error ] && echo "$0: Error getting iVectors" && exit 1;
+
+  for lm_suffix in bd_tgpr; do # just use the bd decoding, to avoid wasting time.
+    graph_dir=exp/tri4b/graph_${lm_suffix}
+    # use already-built graphs.
+    for year in eval92 dev93; do
+      steps/nnet2/decode.sh --nj 8 --cmd "$decode_cmd" \
+          --online-ivector-dir exp/nnet2_online/ivectors_spk_test_$year \
+         $graph_dir data/test_${year}_hires $dir/decode_${lm_suffix}_${year}_spk || touch exp/nnet2_online/.error &
+    done
+  done
+  wait
+  [ -f exp/nnet2_online/.error ] && echo "$0: Error decoding" && exit 1;
+fi
+
+
 
 
 exit 0;
 
@@ -0,0 +1,207 @@
+#!/bin/bash
+
+# Copyright     2013  Daniel Povey
+# Apache 2.0.
+
+
+# This script computes iVectors in the same format as extract_ivectors_online.sh,
+# except that they are actually not really computed online, they are first computed
+# per speaker and just duplicated many times.
+#
+# This setup also makes it possible to use a previous decoding or alignment, to
+# down-weight silence in the stats (default is --silence-weight 0.0).
+#
+# This is for when you use the "online-decoding" setup in an offline task, and
+# you want the best possible results.  
+
+
+# Begin configuration section.
+nj=30
+cmd="run.pl"
+stage=0
+num_gselect=5 # Gaussian-selection using diagonal model: number of Gaussians to select
+min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
+ivector_period=10
+posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
+                    # inter-frame correlations.  Making this small during iVector
+                    # extraction is equivalent to scaling up the prior, and will
+                    # will tend to produce smaller iVectors where data-counts are
+                    # small.  It's not so important that this match the value
+                    # used when training the iVector extractor, but more important
+                    # that this match the value used when you do real online decoding
+                    # with the neural nets trained with these iVectors.
+max_count=100       # Interpret this as a number of frames times posterior scale...
+                    # this config ensures that once the count exceeds this (i.e.
+                    # 1000 frames, or 10 seconds, by default), we start to scale
+                    # down the stats, accentuating the prior term.   This seems quite
+                    # important for some reason.
+compress=true       # If true, compress the iVectors stored on disk (it's lossy
+                    # compression, as used for feature matrices).
+silence_weight=0.0
+acwt=0.1  # used if input is a decode dir, to get best path from lattices.
+mdl=final  # change this if decode directory did not have ../final.mdl present.
+
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ] && [ $# != 5 ]; then
+  echo "Usage: $0 [options] <data> <lang> <extractor-dir> [<alignment-dir>|<decode-dir>] <ivector-dir>"
+  echo " e.g.: $0 data/test exp/nnet2_online/extractor exp/tri3/decode_test exp/nnet2_online/ivectors_test"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --nj <n|10>                                      # Number of jobs (also see num-processes and num-threads)"
+  echo "                                                   # Ignored if <alignment-dir> or <decode-dir> supplied."
+  echo "  --stage <stage|0>                                # To control partial reruns"
+  echo "  --num-gselect <n|5>                              # Number of Gaussians to select using"
+  echo "                                                   # diagonal model."
+  echo "  --min-post <float;default=0.025>                 # Pruning threshold for posteriors"
+  echo "  --ivector-period <int;default=10>                # How often to extract an iVector (frames)"
+  echo "  --utts-per-spk-max <int;default=-1>   # Controls splitting into 'fake speakers'."
+  echo "                                        # Set to 1 if compatibility with utterance-by-utterance"
+  echo "                                        # decoding is the only factor, and to larger if you care "
+  echo "                                        # also about adaptation over several utterances."
+  exit 1;
+fi
+
+if [ $# -eq 4 ]; then
+  data=$1
+  lang=$2
+  srcdir=$3
+  dir=$4
+else # 5 arguments
+  data=$1
+  lang=$2
+  srcdir=$3
+  ali_or_decode_dir=$4
+  dir=$5
+fi
+
+for f in $data/feats.scp $srcdir/final.ie $srcdir/final.dubm $srcdir/global_cmvn.stats $srcdir/splice_opts \
+  $lang/phones.txt $srcdir/online_cmvn.conf $srcdir/final.mat; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+mkdir -p $dir/log 
+silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+
+if [ ! -z "$ali_or_decode_dir" ]; then
+
+  nj_orig=$(cat $ali_or_decode_dir/num_jobs) || exit 1;
+  
+  if [ -f $ali_or_decode_dir/ali.1.gz ]; then
+    if [ ! -f $ali_or_decode_dir/${mdl}.mdl ]; then
+      echo "$0: expected $ali_or_decode_dir/${mdl}.mdl to exist."
+      exit 1;
+    fi
+
+    if [ $stage -le 0 ]; then
+      rm $dir/weights.*.gz 2>/dev/null
+
+      $cmd JOB=1:$nj_orig  $dir/log/ali_to_post.JOB.log \
+        gunzip -c $ali_or_decode_dir/ali.JOB.gz \| \
+        ali-to-post ark:- ark:- \| \
+        weight-silence-post $silence_weight $silphonelist $ali_or_decode_dir/final.mdl ark:- ark:- \| \
+        post-to-weights ark:- "ark:|gzip -c >$dir/weights.JOB.gz" || exit 1;
+
+      # put all the weights in one archive.
+      for j in $(seq $nj_orig); do gunzip -c $dir/weights.$j.gz; done | gzip -c >$dir/weights.gz || exit 1;
+      rm $dir/weights.*.gz || exit 1;
+    fi
+
+  elif [ -f $ali_or_decode_dir/lat.1.gz ]; then
+    if [ ! -f $ali_or_decode_dir/../${mdl}.mdl ]; then
+      echo "$0: expected $ali_or_decode_dir/../${mdl}.mdl to exist."
+      exit 1;
+    fi
+
+
+    if [ $stage -le 0 ]; then
+      rm $dir/weights.*.gz 2>/dev/null
+
+      $cmd JOB=1:$nj_orig  $dir/log/lat_to_post.JOB.log \
+        lattice-best-path --acoustic-scale=$acwt "ark:gunzip -c $ali_or_decode_dir/lat.JOB.gz|" ark:/dev/null ark:- \| \
+        ali-to-post ark:- ark:- \| \
+        weight-silence-post $silence_weight $silphonelist $ali_or_decode_dir/../${mdl}.mdl ark:- ark:- \| \
+        post-to-weights ark:- "ark:|gzip -c >$dir/weights.JOB.gz" || exit 1;
+
+      # put all the weights in one archive.
+      for j in $(seq $nj_orig); do gunzip -c $dir/weights.$j.gz; done | gzip -c >$dir/weights.gz || exit 1;
+      rm $dir/weights.*.gz || exit 1;
+    fi
+  else
+    echo "$0: expected ali.1.gz or lat.1.gz to exist in $ali_or_decode_dir";
+    exit 1;
+  fi
+
+fi
+
+# Now work out the per-speaker iVectors.
+
+sdata=$data/split$nj;
+utils/split_data.sh $data $nj || exit 1;
+
+echo $ivector_period > $dir/ivector_period || exit 1;
+splice_opts=$(cat $srcdir/splice_opts)
+
+
+gmm_feats="ark,s,cs:apply-cmvn-online --spk2utt=ark:$sdata/JOB/spk2utt --config=$srcdir/online_cmvn.conf $srcdir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+feats="ark,s,cs:splice-feats $splice_opts scp:$sdata/JOB/feats.scp ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+
+
+if [ $stage -le 1 ]; then
+  if [ ! -z "$ali_or_decode_dir" ]; then
+    $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \
+      gmm-global-get-post --n=$num_gselect --min-post=$min_post $srcdir/final.dubm "$gmm_feats" ark:- \| \
+      weight-post ark:- "ark,s,cs:gunzip -c $dir/weights.gz|" ark:- \| \
+      ivector-extract --acoustic-weight=$posterior_scale --compute-objf-change=true \
+        --max-count=$max_count --spk2utt=ark:$sdata/JOB/spk2utt \
+      $srcdir/final.ie "$feats" ark,s,cs:- ark,t:$dir/ivectors_spk.JOB.ark || exit 1;
+  else
+    $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \
+      gmm-global-get-post --n=$num_gselect --min-post=$min_post $srcdir/final.dubm "$gmm_feats" ark:- \| \
+      ivector-extract --acoustic-weight=$posterior_scale --compute-objf-change=true \
+        --max-count=$max_count --spk2utt=ark:$sdata/JOB/spk2utt \
+      $srcdir/final.ie "$feats" ark,s,cs:- ark,t:$dir/ivectors_spk.JOB.ark || exit 1;
+  fi
+fi
+
+# get an utterance-level set of iVectors (just duplicate the speaker-level ones).  
+if [ $stage -le 2 ]; then
+  for j in $(seq $nj); do 
+    utils/apply_map.pl -f 2 $dir/ivectors_spk.$j.ark <$sdata/$j/utt2spk >$dir/ivectors_utt.$j.ark || exit 1;
+  done
+fi
+
+ivector_dim=$[$(head -n 1 $dir/ivectors_spk.1.ark | wc -w) - 3] || exit 1;
+echo  "$0: iVector dim is $ivector_dim"
+
+base_feat_dim=$(feat-to-dim scp:$data/feats.scp -) || exit 1;
+
+start_dim=$base_feat_dim
+end_dim=$[$base_feat_dim+$ivector_dim-1]
+
+
+if [ $stage -le 3 ]; then
+  # here, we are just using the original features in $sdata/JOB/feats.scp for
+  # their number of rows; we use the select-feats command to remove those
+  # features and retain only the iVector features.
+  $cmd JOB=1:$nj $dir/log/duplicate_feats.JOB.log \
+    append-vector-to-feats scp:$sdata/JOB/feats.scp ark:$dir/ivectors_utt.JOB.ark ark:- \| \
+    select-feats "$start_dim-$end_dim" ark:- ark:- \| \
+    subsample-feats --n=$ivector_period ark:- ark:- \| \
+    copy-feats --compress=$compress ark:- \
+    ark,scp:$dir/ivector_online.JOB.ark,$dir/ivector_online.JOB.scp || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: combining iVectors across jobs"
+  for j in $(seq $nj); do cat $dir/ivector_online.$j.scp; done >$dir/ivector_online.scp || exit 1;
+fi
+
+echo "$0: done extracting (pseudo-online) iVectors"
@@ -32,9 +32,6 @@ posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
                     # used when training the iVector extractor, but more important
                     # that this match the value used when you do real online decoding
                     # with the neural nets trained with these iVectors.
-#utts_per_spk_max=-1 # This option is no longer supported, you should use
-                    # steps/online/nnet2/copy_data_dir.sh with the --utts-per-spk-max
-                    # option to make a copy of the data dir.
 compress=true       # If true, compress the iVectors stored on disk (it's lossy
                     # compression, as used for feature matrices).
 
@@ -58,10 +55,6 @@ if [ $# != 3 ]; then
   echo "                                                   # diagonal model."
   echo "  --min-post <float;default=0.025>                 # Pruning threshold for posteriors"
   echo "  --ivector-period <int;default=10>                # How often to extract an iVector (frames)"
-  echo "  --utts-per-spk-max <int;default=-1>   # Controls splitting into 'fake speakers'."
-  echo "                                        # Set to 1 if compatibility with utterance-by-utterance"
-  echo "                                        # decoding is the only factor, and to larger if you care "
-  echo "                                        # also about adaptation over several utterances."
   exit 1;
 fi
 
@@ -71,7 +64,7 @@ dir=$3
 
 for f in $data/feats.scp $srcdir/final.ie $srcdir/final.dubm $srcdir/global_cmvn.stats $srcdir/splice_opts \
      $srcdir/online_cmvn.conf $srcdir/final.mat; do
-  [ ! -f $f ] && echo "No such file $f" && exit 1;
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
 done
 
 # Set various variables.
@@ -86,7 +79,7 @@ splice_opts=$(cat $srcdir/splice_opts)
 # the program ivector-extract-online2 does a bunch of stuff in memory and is
 # config-driven...  this was easier in this case because the same code is
 # involved in online decoding.  We need to create a config file for iVector
-# extration.
+# extraction.
 
 ieconf=$dir/conf/ivector_extractor.conf
 echo -n >$ieconf
@@ -104,15 +97,6 @@ echo "--posterior-scale=$posterior_scale" >>$ieconf
 echo "--max-remembered-frames=1000" >>$ieconf # the default
 
 
-ns=$(wc -l <$data/spk2utt)
-if [ "$ns" == 1 -a "$utts_per_spk_max" != 1 -a "$utts_per_spk_max" != -1 ]; then
-  echo "$0: you seem to have just one speaker in your database.  This is probably not a good idea."
-  echo "  see http://kaldi.sourceforge.net/data_prep.html (search for 'bold') for why"
-  echo "  Setting --utts-per-spk-max to 1."
-  utts_per_spk_max=1
-fi
-
-
 
 for n in $(seq $nj); do
   # This will do nothing unless the directory $dir/storage exists;
 
@@ -34,7 +34,7 @@ int main(int argc, char *argv[]) {
   try {
     const char *usage =
         "Convert alignments to posteriors\n"
-        "Usage:  ali-to-post [options] alignments-rspecifier posteriors-wspecifier\n"
+        "Usage:  ali-to-post [options] <alignments-rspecifier> <posteriors-wspecifier>\n"
         "e.g.:\n"
         " ali-to-post ark:1.ali ark:1.post\n";
 
 
@@ -38,10 +38,14 @@ int main(int argc, char *argv[]) {
         "See also: copy-feats\n";
 
     bool binary = true;
+    BaseFloat scale = 1.0;
     ParseOptions po(usage);
 
-    po.Register("binary", &binary, "Write in binary mode (only relevant if output is a wxfilename)");
-
+    po.Register("binary", &binary,
+                "Write in binary mode (only relevant if output is a wxfilename)");
+    po.Register("scale", &scale,
+                "This option can be used to scale the matrices being copied.");
+    
     po.Read(argc, argv);
 
     if (po.NumArgs() != 2) {
@@ -68,6 +72,7 @@ int main(int argc, char *argv[]) {
     if (!in_is_rspecifier) {
       Matrix<BaseFloat> mat;
       ReadKaldiObject(matrix_in_fn, &mat);
+      if (scale != 1.0) mat.Scale(scale);
       Output ko(matrix_out_fn, binary);
       mat.Write(ko.Stream(), binary);
       KALDI_LOG << "Copied matrix to " << matrix_out_fn;
@@ -76,8 +81,15 @@ int main(int argc, char *argv[]) {
       int num_done = 0;
       BaseFloatMatrixWriter writer(matrix_out_fn);
       SequentialBaseFloatMatrixReader reader(matrix_in_fn);
-      for (; !reader.Done(); reader.Next(), num_done++)
-        writer.Write(reader.Key(), reader.Value());
+      for (; !reader.Done(); reader.Next(), num_done++) {
+        if (scale != 1.0) {
+          Matrix<BaseFloat> mat(reader.Value());
+          mat.Scale(scale);
+          writer.Write(reader.Key(), mat);
+        } else {
+          writer.Write(reader.Key(), reader.Value());
+        }
+      }
       KALDI_LOG << "Copied " << num_done << " matrices.";
       return (num_done != 0 ? 0 : 1);
     }
 
@@ -50,9 +50,9 @@ int main(int argc, char *argv[]) {
       exit(1);
     }
 
-    std::string rspecifier1 = po.GetArg(1);
-    std::string rspecifier2 = po.GetArg(2);
-    std::string wspecifier = po.GetArg(3);
+    std::string rspecifier1 = po.GetArg(1),
+        rspecifier2 = po.GetArg(2),
+        wspecifier = po.GetArg(3);
 
     BaseFloatMatrixWriter feats_writer(wspecifier);
     SequentialBaseFloatMatrixReader feats_reader1(rspecifier1);
 
@@ -78,8 +78,8 @@ int main(int argc, char *argv[]) {
         "Usage: paste-feats <in-rspecifier1> <in-rspecifier2> [<in-rspecifier3> ...] <out-wspecifier>\n"
         " or: paste-feats <in-rxfilename1> <in-rxfilename2> [<in-rxfilename3> ...] <out-wxfilename>\n"
         " e.g. paste-feats ark:feats1.ark \"ark:select-feats 0-3 ark:feats2.ark ark:- |\" ark:feats-out.ark\n"
-        "  or: paste-feats foo.mat bar.mat baz.mat\n";
-    
+        "  or: paste-feats foo.mat bar.mat baz.mat\n"
+        "See also: copy-feats, copy-matrix, append-vector-to-feats, concat-feats\n";
 
     ParseOptions po(usage);