update for lattice output

mhanneman · mhanneman · commit 43136eb7549d · 2011-08-16T17:15:34.000Z
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@291 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
diff --git a/src/decoder/faster-decoder.h b/src/decoder/faster-decoder.h
@@ -23,19 +23,10 @@
 #include "util/hash-list.h"
 #include "fst/fstlib.h"
 #include "itf/decodable-itf.h"
+#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc
 
 namespace kaldi {
 
-// macros to switch off all debugging messages without runtime cost
-#define DEBUG_CMD(x) x;
-#define DEBUG_OUT3(x) KALDI_VLOG(3) << x;
-#define DEBUG_OUT2(x) KALDI_VLOG(2) << x;
-#define DEBUG_OUT1(x) KALDI_VLOG(1) << x;
-//#define DEBUG_OUT1(x)
-//#define DEBUG_OUT2(x)
-//#define DEBUG_OUT3(x)
-//#define DEBUG_CMD(x)
-
 struct FasterDecoderOptions {
   BaseFloat beam;
   int32 max_active;
@@ -83,15 +74,11 @@ class FasterDecoder {
     // clean up from last time:
     ClearToks(toks_.Clear());
     StateId start_state = fst_.Start();
-    DEBUG_OUT2("Initial state: " << start_state)
     assert(start_state != fst::kNoStateId);
     Arc dummy_arc(0, 0, Weight::One(), start_state);
     toks_.Insert(start_state, new Token(dummy_arc, NULL));
     ProcessNonemitting(std::numeric_limits<float>::max());
     for (int32 frame = 0; !decodable->IsLastFrame(frame-1); frame++) {
-      DEBUG_OUT1("==== FRAME " << frame << " =====")
-      if ((frame%50) == 0)
-        KALDI_VLOG(2) << "==== FRAME " << frame << " =====";
       BaseFloat adaptive_beam = ProcessEmitting(decodable, frame);
       ProcessNonemitting(adaptive_beam);
     }
@@ -107,7 +94,7 @@ class FasterDecoder {
     return false;
   }
 
-  bool GetBestPath(fst::MutableFst<fst::StdArc> *fst_out) {
+  bool GetBestPath(fst::MutableFst<LatticeArc> *fst_out) {
     // GetBestPath gets the decoding output.  If is_final == true, it limits itself
     // to final states; otherwise it gets the most likely token not taking into
     // account final-probs.  fst_out will be empty (Start() == kNoStateId) if
@@ -124,7 +111,6 @@ class FasterDecoder {
       Weight best_weight = Weight::Zero();
       for (Elem *e = toks_.GetList(); e != NULL; e = e->tail) {
         Weight this_weight = Times(e->val->weight, fst_.Final(e->key));
-        if (this_weight != Weight::Zero()) DEBUG_OUT1("final state reached: " << e->key << " path weight:" << this_weight)
         if (this_weight != Weight::Zero() &&
            this_weight.Value() < best_weight.Value()) {
           best_weight = this_weight;
@@ -133,29 +119,37 @@ class FasterDecoder {
       }
     }
     if (best_tok == NULL) return false;  // No output.
-    DEBUG_OUT1("best final token:  path weight:" << best_tok->weight)
-
 
-    std::vector<Arc> arcs_reverse;  // arcs in reverse order.
+    std::vector<LatticeArc> arcs_reverse;  // arcs in reverse order.
     for (Token *tok = best_tok; tok != NULL; tok = tok->prev_) {
-      arcs_reverse.push_back(tok->arc_);
+      BaseFloat amscore = tok->weight_a.Value(),
+                lmscore = tok->arc_.weight.Value() - amscore;
+      LatticeArc l_arc(tok->arc_.ilabel,
+    	               tok->arc_.olabel,
+    	               LatticeWeight(lmscore, amscore),
+    	               tok->arc_.nextstate);
+      arcs_reverse.push_back(l_arc);
     }
     assert(arcs_reverse.back().nextstate == fst_.Start());
     arcs_reverse.pop_back();  // that was a "fake" token... gives no info.
 
     StateId cur_state = fst_out->AddState();
     fst_out->SetStart(cur_state);
     for (ssize_t i = static_cast<ssize_t>(arcs_reverse.size())-1; i >= 0; i--) {
-      Arc arc = arcs_reverse[i];
+      LatticeArc arc = arcs_reverse[i];
       arc.nextstate = fst_out->AddState();
       fst_out->AddArc(cur_state, arc);
-      DEBUG_OUT1("arc: " << arc.ilabel << " : " << arc.olabel)
       cur_state = arc.nextstate;
     }
-    if (is_final)
-      fst_out->SetFinal(cur_state, fst_.Final(best_tok->arc_.nextstate));
-    else
-      fst_out->SetFinal(cur_state, Weight::One());
+    if (is_final) {
+      fst_out->SetFinal(cur_state,
+          LatticeWeight(fst_.Final(best_tok->arc_.nextstate).Value(), 0.0));
+    } else {
+      fst_out->SetFinal(cur_state, LatticeWeight::One());
+    }
+    KALDI_LOG << "best path, final:" << fst_.Final(best_tok->arc_.nextstate);
+    //fst::FstPrinter<LatticeArc> fstprinter(*fst_out, NULL, NULL, NULL, false, true);
+    //fstprinter.Print(&std::cout, "standard output");
     RemoveEpsLocal(fst_out);
     return true;
   }
@@ -168,18 +162,15 @@ class FasterDecoder {
     Token *prev_;
     int32 ref_count_;
     Weight weight;
+    Weight weight_a;
     inline Token(Arc &arc, Token *prev): arc_(arc), prev_(prev), ref_count_(1) {
-      DEBUG_OUT2("advance: " << arc.nextstate << " " << arc.ilabel << ":"
-                 << arc.olabel << "/" << arc.weight)
-      DEBUG_OUT3("create t")
       if (prev) {
-        DEBUG_OUT3("inc t(" << prev->weight << "):" << prev->ref_count_ )
         prev->ref_count_++;
         weight = Times(prev->weight, arc.weight);
       } else {
         weight = arc.weight;
       }
-      DEBUG_OUT3("new weight t:" << weight)
+      weight_a = Weight::One();
     }
     inline bool operator < (const Token &other) {
       return weight.Value() > other.weight.Value();
@@ -192,11 +183,9 @@ class FasterDecoder {
     }
     inline static void TokenDelete(Token *tok) {
       if (tok->ref_count_ == 1) { 
-        DEBUG_OUT3( "kill t" )
         delete tok;
       } else {
         tok->ref_count_--;
-        DEBUG_OUT3("dec t:" << tok->ref_count_)
       }
     }
   };
@@ -206,7 +195,6 @@ class FasterDecoder {
   /// Gets the weight cutoff.  Also counts the active tokens.
   BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
                       BaseFloat *adaptive_beam, Elem **best_elem) {
-    DEBUG_OUT1("GetCufoff")
     BaseFloat best_weight = 1.0e+10;  // positive == high cost == bad.
     size_t count = 0;
     if (opts_.max_active == std::numeric_limits<int32>::max()) {
@@ -219,7 +207,6 @@ class FasterDecoder {
       }
       if (tok_count != NULL) *tok_count = count;
       if (adaptive_beam != NULL) *adaptive_beam = opts_.beam;
-      DEBUG_OUT1("count:" << *tok_count << " best:" << best_weight << " cutoff:" << best_weight + opts_.beam << " adaptive:" << *adaptive_beam)
       return best_weight + opts_.beam;
     } else {
       tmp_array_.clear();
@@ -234,7 +221,6 @@ class FasterDecoder {
       if (tok_count != NULL) *tok_count = count;
       if (tmp_array_.size() <= static_cast<size_t>(opts_.max_active)) {
         if (adaptive_beam) *adaptive_beam = opts_.beam;
-        DEBUG_OUT1("count:" << *tok_count << " best:" << best_weight << " cutoff:" << best_weight + opts_.beam << " adaptive:" << *adaptive_beam)
         return best_weight + opts_.beam;
       } else {
         // the lowest elements (lowest costs, highest likes)
@@ -248,7 +234,6 @@ class FasterDecoder {
         if (adaptive_beam)
           *adaptive_beam = std::min(opts_.beam,
                                     ans - best_weight + opts_.beam_delta);
-        DEBUG_OUT1("count:" << *tok_count << " best:" << best_weight << " cutoff:" << ans << " adaptive:" << *adaptive_beam)
         return ans;
       }
     }
@@ -259,13 +244,11 @@ class FasterDecoder {
                                         * opts_.hash_ratio);
     if (new_sz > toks_.Size()) {
       toks_.SetSize(new_sz);
-      DEBUG_OUT1("resize hash:" << new_sz)
     }
   }
 
   // ProcessEmitting returns the likelihood cutoff used.
   BaseFloat ProcessEmitting(DecodableInterface *decodable, int frame) {
-    DEBUG_OUT1("PropagateEmitting")
     Elem *last_toks = toks_.Clear();
     size_t tok_cnt;
     BaseFloat adaptive_beam;
@@ -308,7 +291,6 @@ class FasterDecoder {
       // because we delete "e" as we go.
       StateId state = e->key;
       Token *tok = e->val;
-      DEBUG_OUT2("get token: " << " state:" << state << " weight:" << tok->weight)
       if (tok->weight.Value() < weight_cutoff) {  // not pruned.
         // np++;
         assert(state == tok->arc_.nextstate);
@@ -317,32 +299,23 @@ class FasterDecoder {
             aiter.Next()) {
           Arc arc = aiter.Value();
           if (arc.ilabel != 0) {  // propagate..
-            arc.weight = Times(arc.weight,
-                               Weight(- decodable->LogLikelihood(frame, arc.ilabel)));
-            DEBUG_OUT2("acoustic: " << 
-              Weight(- decodable->LogLikelihood(frame, arc.ilabel)))
+            Weight amscore(- decodable->LogLikelihood(frame, arc.ilabel));
+            arc.weight = Times(arc.weight, amscore);
             BaseFloat new_weight = arc.weight.Value() + tok->weight.Value();
             if (new_weight < next_weight_cutoff) {  // not pruned..
               Token *new_tok = new Token(arc, tok);
+              new_tok->weight_a = amscore;
               Elem *e_found = toks_.Find(arc.nextstate);
               if (e_found == NULL) {
-                DEBUG_OUT2("insert to: " << arc.nextstate)
                 toks_.Insert(arc.nextstate, new_tok);
               } else {
-                DEBUG_OUT2("combine: " << arc.nextstate)
-                DEBUG_OUT2("combine: " << e_found->val->weight)
-                DEBUG_OUT2("with: " << new_tok->weight)
                 if ( *(e_found->val) < *new_tok ) {
-                  DEBUG_OUT2("delete first")
                   Token::TokenDelete(e_found->val);
                   e_found->val = new_tok;
                 } else {
-                  DEBUG_OUT2("delete second")
                   Token::TokenDelete(new_tok);
                 }
               }
-            } else {
-              DEBUG_OUT2("prune")
             }
           }
         }
@@ -358,24 +331,20 @@ class FasterDecoder {
   void ProcessNonemitting(BaseFloat adaptive_beam) {
     // Processes nonemitting arcs for one frame.  Propagates within
     // cur_toks_.
-    DEBUG_OUT1("PropagateEpsilon")
     assert(queue_.empty());
     float best_weight = 1.0e+10;
     for (Elem *e = toks_.GetList(); e != NULL;  e = e->tail) {
       queue_.push_back(e->key);
       best_weight = std::min(best_weight, e->val->weight.Value());
     }
     BaseFloat cutoff = best_weight + adaptive_beam;
-    DEBUG_OUT1("queue:" << queue_.size() << " best:" << best_weight << " cutoff:" << cutoff)
 
     while (!queue_.empty()) {
       StateId state = queue_.back();
       queue_.pop_back();
       Token *tok = toks_.Find(state)->val;  // would segfault if state not
-      DEBUG_OUT2("pop token: state:" << state << " weight:" << tok->weight)
       // in toks_ but this can't happen.
       if (tok->weight.Value() > cutoff) { // Don't bother processing successors.
-        DEBUG_OUT2("prune")
         continue;
       }
       assert(tok != NULL && state == tok->arc_.nextstate);
@@ -390,20 +359,14 @@ class FasterDecoder {
           } else {
             Elem *e_found = toks_.Find(arc.nextstate);
             if (e_found == NULL) {
-              DEBUG_OUT2("insert/queue to: " << arc.nextstate)
               toks_.Insert(arc.nextstate, new_tok);
               queue_.push_back(arc.nextstate);
             } else {
-              DEBUG_OUT2("combine: " << arc.nextstate)
-              DEBUG_OUT2("combine: " << e_found->val->weight)
-              DEBUG_OUT2("with: " << new_tok->weight)
               if ( *(e_found->val) < *new_tok ) {
-                DEBUG_OUT2("delete first")
                 Token::TokenDelete(e_found->val);
                 e_found->val = new_tok;
                 queue_.push_back(arc.nextstate);
               } else {
-                DEBUG_OUT2("delete second")
                 Token::TokenDelete(new_tok);
               }
             }
diff --git a/src/gmmbin/gmm-decode-faster.cc b/src/gmmbin/gmm-decode-faster.cc
@@ -24,6 +24,7 @@
 #include "decoder/faster-decoder.h"
 #include "decoder/decodable-am-diag-gmm.h"
 #include "util/timer.h"
+#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc
 
 using namespace kaldi;
 
@@ -66,7 +67,7 @@ int main(int argc, char *argv[]) {
 
     const char *usage =
         "Decode features using GMM-based model.\n"
-        "Usage:  gmm-decode-faster [options] model-in fst-in features-rspecifier words-wspecifier [alignments-wspecifier]\n";
+        "Usage:  gmm-decode-faster [options] model-in fst-in features-rspecifier words-wspecifier [alignments-wspecifier lattice-wspecifier]\n";
     ParseOptions po(usage);
     bool allow_partial = true;
     BaseFloat acoustic_scale = 0.1;
@@ -82,7 +83,7 @@ int main(int argc, char *argv[]) {
                 "Produce output even when final state was not reached");
     po.Read(argc, argv);
 
-    if (po.NumArgs() < 4 || po.NumArgs() > 5) {
+    if (po.NumArgs() < 4 || po.NumArgs() > 6) {
       po.PrintUsage();
       exit(1);
     }
@@ -91,7 +92,8 @@ int main(int argc, char *argv[]) {
         fst_in_filename = po.GetArg(2),
         feature_rspecifier = po.GetArg(3),
         words_wspecifier = po.GetArg(4),
-        alignment_wspecifier = po.GetOptArg(5);
+        alignment_wspecifier = po.GetArg(5),
+        lattice_wspecifier = po.GetOptArg(6);
 
     TransitionModel trans_model;
     AmDiagGmm am_gmm;
@@ -106,6 +108,17 @@ int main(int argc, char *argv[]) {
 
     Int32VectorWriter alignment_writer(alignment_wspecifier);
 
+    LatticeWriter lattice_writer;
+    bool write_lattices = false;
+    if (lattice_wspecifier != "") {
+      if (lattice_writer.Open(lattice_wspecifier)) {
+        write_lattices = true;
+      } else {
+        KALDI_WARN << "Could not open table for writing lattices: "
+                   << lattice_wspecifier;
+      }
+    }
+
     fst::SymbolTable *word_syms = NULL;
     if (word_syms_filename != "") 
       if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename)))
@@ -142,7 +155,7 @@ int main(int argc, char *argv[]) {
                                              acoustic_scale);
       decoder.Decode(&gmm_decodable);
 
-      fst::VectorFst<fst::StdArc> decoded;  // linear FST.
+      fst::VectorFst<LatticeArc> decoded;  // linear FST.
 
       if ( (allow_partial || decoder.ReachedFinal())
            && decoder.GetBestPath(&decoded) ) {
@@ -154,14 +167,21 @@ int main(int argc, char *argv[]) {
           KALDI_WARN << "Decoder did not reach end-state, outputting partial traceback.";
         std::vector<int32> alignment;
         std::vector<int32> words;
-        fst::StdArc::Weight weight;
+        LatticeWeight weight;
         frame_count += features.NumRows();
 
         GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
 
         words_writer.Write(key, words);
         if (alignment_writer.IsOpen())
           alignment_writer.Write(key, alignment);
+
+        if (write_lattices) {
+          //if (acoustic_scale != 0.0) // We'll write the lattice without acoustic scaling
+          //  fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &decoded);
+          lattice_writer.Write(key, decoded);
+        }
+
         if (word_syms != NULL) {
           std::cerr << key << ' ';
           for (size_t i = 0; i < words.size(); i++) {
@@ -172,7 +192,7 @@ int main(int argc, char *argv[]) {
           }
           std::cerr << '\n';
         }
-        BaseFloat like = -weight.Value();
+        BaseFloat like = - weight.Value1() - weight.Value2(); // Times
         tot_like += like;
         KALDI_LOG << "Log-like per frame for utterance " << key << " is "
                   << (like / features.NumRows()) << " over "