Skip to content

Commit 3bf9adc

Browse files
committed
trunk: various small extensions to programs and cosmetic bug fixes; code cleanup in online/ directory.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3240 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
1 parent 0acf744 commit 3bf9adc

16 files changed

+173
-63
lines changed

src/featbin/subsample-feats.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ int main(int argc, char *argv[]) {
6060
BaseFloatMatrixWriter feat_writer(wspecifier);
6161

6262
int32 num_done = 0, num_err = 0;
63+
int64 frames_in = 0, frames_out = 0;
6364

6465
// process all keys
6566
for (; !feat_reader.Done(); feat_reader.Next()) {
@@ -71,6 +72,9 @@ int main(int argc, char *argv[]) {
7172
int32 num_indexes = 0;
7273
for (int32 k = offset; k < feats.NumRows(); k += n)
7374
num_indexes++; // k is the index.
75+
76+
frames_in += feats.NumRows();
77+
frames_out += num_indexes;
7478

7579
if (num_indexes == 0) {
7680
KALDI_WARN << "For utterance " << utt << ", output would have no rows, "
@@ -88,8 +92,9 @@ int main(int argc, char *argv[]) {
8892
feat_writer.Write(utt, output);
8993
num_done++;
9094
}
91-
KALDI_LOG << "Sub-sampled " << num_done << " feats; " << num_err
95+
KALDI_LOG << "Sub-sampled " << num_done << " feature matrices; " << num_err
9296
<< " with errors.";
97+
KALDI_LOG << "Reduced " << frames_in << " frames to " << frames_out;
9398
return (num_done != 0 ? 0 : 1);
9499
} catch(const std::exception &e) {
95100
std::cerr << e.what();

src/fgmmbin/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ include ../kaldi.mk
66
BINFILES = fgmm-global-acc-stats fgmm-global-sum-accs fgmm-global-est \
77
fgmm-global-merge fgmm-global-to-gmm fgmm-gselect fgmm-global-get-frame-likes \
88
fgmm-global-acc-stats-twofeats fgmm-global-copy fgmm-global-mixdown \
9-
fgmm-global-gselect-to-post
9+
fgmm-global-gselect-to-post fgmm-global-info
1010

1111

1212
OBJFILES =

src/fgmmbin/fgmm-global-get-frame-likes.cc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
3232

3333
const char *usage =
3434
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
35-
"of vectors of floats.\n"
35+
"of vectors of floats. If --average=true, prints out the average per-frame\n"
36+
"log-likelihood for each utterance, as a single float.\n"
3637
"Usage: fgmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
3738
"<likes-out-wspecifier>\n"
3839
"e.g.: fgmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
3940

4041
ParseOptions po(usage);
42+
bool average = false;
4143
std::string gselect_rspecifier;
4244
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
4345
"to limit the #Gaussians accessed on each frame.");
46+
po.Register("average", &average, "If true, print out the average per-frame "
47+
"log-likelihood as a single float per utterance.");
4448
po.Read(argc, argv);
4549

4650
if (po.NumArgs() != 3) {
@@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
6367

6468
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
6569
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
66-
BaseFloatVectorWriter likes_writer(likes_wspecifier);
70+
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
71+
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
6772
int32 num_done = 0, num_err = 0;
6873

6974
for (; !feature_reader.Done(); feature_reader.Next()) {
@@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
104109

105110
tot_like += likes.Sum();
106111
tot_frames += file_frames;
107-
likes_writer.Write(key, likes);
112+
if (average)
113+
average_likes_writer.Write(key, likes.Sum() / file_frames);
114+
else
115+
likes_writer.Write(key, likes);
108116
num_done++;
109117
}
110118
KALDI_LOG << "Done " << num_done << " files; " << num_err

src/fgmmbin/fgmm-global-info.cc

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// fgmmbin/fgmm-global-info.cc
2+
3+
// Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
4+
5+
// See ../../COPYING for clarification regarding multiple authors
6+
//
7+
// Licensed under the Apache License, Version 2.0 (the "License");
8+
// you may not use this file except in compliance with the License.
9+
// You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15+
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16+
// MERCHANTABLITY OR NON-INFRINGEMENT.
17+
// See the Apache 2 License for the specific language governing permissions and
18+
// limitations under the License.
19+
20+
#include "base/kaldi-common.h"
21+
#include "util/common-utils.h"
22+
#include "gmm/full-gmm.h"
23+
#include "hmm/transition-model.h"
24+
25+
int main(int argc, char *argv[]) {
26+
try {
27+
using namespace kaldi;
28+
typedef kaldi::int32 int32;
29+
30+
const char *usage =
31+
"Write to standard output various properties of full-covariance GMM model\n"
32+
"This is for a single mixture of Gaussians, e.g. as used for a UBM.\n"
33+
"Usage: gmm-info [options] <gmm>\n"
34+
"e.g.:\n"
35+
" fgmm-info 1.ubm\n";
36+
37+
ParseOptions po(usage);
38+
39+
po.Read(argc, argv);
40+
41+
if (po.NumArgs() != 1) {
42+
po.PrintUsage();
43+
exit(1);
44+
}
45+
46+
std::string model_rxfilename = po.GetArg(1);
47+
48+
FullGmm gmm;
49+
ReadKaldiObject(model_rxfilename, &gmm);
50+
51+
std::cout << "number of gaussians " << gmm.NumGauss() << '\n';
52+
std::cout << "feature dimension " << gmm.Dim() << '\n';
53+
return 0;
54+
} catch(const std::exception &e) {
55+
std::cerr << e.what() << '\n';
56+
return -1;
57+
}
58+
}
59+
60+

src/gmmbin/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
2626
gmm-diff-accs gmm-basis-fmllr-accs gmm-basis-fmllr-training gmm-est-basis-fmllr \
2727
gmm-est-map gmm-adapt-map gmm-latgen-map gmm-basis-fmllr-accs-gpost \
2828
gmm-est-basis-fmllr-gpost gmm-latgen-tracking gmm-latgen-faster-parallel \
29-
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats
29+
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats \
30+
gmm-global-info
3031

3132
OBJFILES =
3233

src/gmmbin/gmm-global-get-frame-likes.cc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
3232

3333
const char *usage =
3434
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
35-
"of vectors of floats.\n"
35+
"of vectors of floats. If --average=true, prints out the average per-frame\n"
36+
"log-likelihood for each utterance, as a single float.\n"
3637
"Usage: gmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
3738
"<likes-out-wspecifier>\n"
3839
"e.g.: gmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
3940

4041
ParseOptions po(usage);
42+
bool average = false;
4143
std::string gselect_rspecifier;
4244
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
4345
"to limit the #Gaussians accessed on each frame.");
46+
po.Register("average", &average, "If true, print out the average per-frame "
47+
"log-likelihood as a single float per utterance.");
4448
po.Read(argc, argv);
4549

4650
if (po.NumArgs() != 3) {
@@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
6367

6468
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
6569
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
66-
BaseFloatVectorWriter likes_writer(likes_wspecifier);
70+
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
71+
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
6772
int32 num_done = 0, num_err = 0;
6873

6974
for (; !feature_reader.Done(); feature_reader.Next()) {
@@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
104109

105110
tot_like += likes.Sum();
106111
tot_frames += file_frames;
107-
likes_writer.Write(key, likes);
112+
if (average)
113+
average_likes_writer.Write(key, likes.Sum() / file_frames);
114+
else
115+
likes_writer.Write(key, likes);
108116
num_done++;
109117
}
110118
KALDI_LOG << "Done " << num_done << " files; " << num_err

src/gmmbin/gmm-global-info.cc

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// gmmbin/gmm-global-info.cc
2+
3+
// Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
4+
5+
// See ../../COPYING for clarification regarding multiple authors
6+
//
7+
// Licensed under the Apache License, Version 2.0 (the "License");
8+
// you may not use this file except in compliance with the License.
9+
// You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15+
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16+
// MERCHANTABLITY OR NON-INFRINGEMENT.
17+
// See the Apache 2 License for the specific language governing permissions and
18+
// limitations under the License.
19+
20+
#include "base/kaldi-common.h"
21+
#include "util/common-utils.h"
22+
#include "gmm/diag-gmm.h"
23+
#include "hmm/transition-model.h"
24+
25+
int main(int argc, char *argv[]) {
26+
try {
27+
using namespace kaldi;
28+
typedef kaldi::int32 int32;
29+
30+
const char *usage =
31+
"Write to standard output various properties of GMM model\n"
32+
"This is for a single diagonal GMM, e.g. as used for a UBM.\n"
33+
"Usage: gmm-info [options] <gmm>\n"
34+
"e.g.:\n"
35+
" gmm-info 1.dubm\n";
36+
37+
ParseOptions po(usage);
38+
39+
po.Read(argc, argv);
40+
41+
if (po.NumArgs() != 1) {
42+
po.PrintUsage();
43+
exit(1);
44+
}
45+
46+
std::string model_rxfilename = po.GetArg(1);
47+
48+
DiagGmm gmm;
49+
ReadKaldiObject(model_rxfilename, &gmm);
50+
51+
std::cout << "number of gaussians " << gmm.NumGauss() << '\n';
52+
std::cout << "feature dimension " << gmm.Dim() << '\n';
53+
return 0;
54+
} catch(const std::exception &e) {
55+
std::cerr << e.what() << '\n';
56+
return -1;
57+
}
58+
}
59+
60+

src/gmmbin/gmm-global-init-from-feats.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ int main(int argc, char *argv[]) {
194194
TrainOneIter(feats, gmm_opts, iter, num_threads, &gmm);
195195

196196
int32 next_num_gauss = std::min(num_gauss, cur_num_gauss + gauss_inc);
197-
if (next_num_gauss > cur_num_gauss) {
197+
if (next_num_gauss > gmm.NumGauss()) {
198198
KALDI_LOG << "Splitting to " << next_num_gauss << " Gaussians.";
199199
gmm.Split(next_num_gauss, 0.1);
200200
cur_num_gauss = next_num_gauss;

src/gmmbin/gmm-info.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ int main(int argc, char *argv[]) {
6161
<< trans_model.NumTransitionStates() << '\n';
6262
std::cout << "feature dimension " << am_gmm.Dim() << '\n';
6363
std::cout << "number of gaussians " << am_gmm.NumGauss() << '\n';
64+
return 0;
6465
} catch(const std::exception &e) {
6566
std::cerr << e.what() << '\n';
6667
return -1;

src/onlinebin/online-audio-client.cc

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,12 @@ int main(int argc, char** argv) {
6060
"e.g.: ./online-audio-client 192.168.50.12 9012 'scp:wav_files.scp'\n\n";
6161
ParseOptions po(usage);
6262

63-
bool htk = false, vtt = false, silent = false;
63+
bool htk = false, vtt = false;
6464
int32 channel = -1;
6565
int32 packet_size = 1024;
6666

6767
po.Register("htk", &htk, "Save the result to an HTK label file");
6868
po.Register("vtt", &vtt, "Save the result to a WebVTT subtitle file");
69-
po.Register("silent", &silent,
70-
"Don't print any output (except for errors)");
7169
po.Register(
7270
"channel", &channel,
7371
"Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)");
@@ -116,19 +114,16 @@ int main(int argc, char** argv) {
116114
return -1;
117115
}
118116

119-
if (!silent) {
120-
std::cout << "Connected to KALDI server at host " << server_addr_str
121-
<< " port " << server_port << std::endl;
122-
}
117+
KALDI_VLOG(2) << "Connected to KALDI server at host " << server_addr_str
118+
<< " port " << server_port << std::endl;
123119

124120
char* pack_buffer = new char[packet_size];
125121

126122
SequentialTableReader<WaveHolder> reader(wav_rspecifier);
127123
for (; !reader.Done(); reader.Next()) {
128124
std::string wav_key = reader.Key();
129125

130-
if (!silent)
131-
std::cout << "File: " << wav_key << std::endl;
126+
KALDI_VLOG(2) << "File: " << wav_key << std::endl;
132127

133128
const WaveData &wav_data = reader.Value();
134129

@@ -260,10 +255,10 @@ int main(int argc, char** argv) {
260255
}
261256
}
262257

263-
if (!silent) {
258+
{
264259
float speed = total_input_dur / total_reco_dur;
265-
std::cout << "Recognized (" << speed << "xRT): " << reco_output
266-
<< std::endl;
260+
KALDI_VLOG(2) << "Recognized (" << speed << "xRT): " << reco_output
261+
<< std::endl;
267262
}
268263

269264
if (htk) {

0 commit comments

Comments
 (0)