Skip to content

Commit 50b7635

Browse files
committed
fix small bug regarding online-ivector-features (for online-nnet2/nnet3 setup) regarding how silence-weighting is applied in iVector estimation. (thanks: xiang li)
1 parent 0dc411a commit 50b7635

File tree

1 file changed

+21
-21
lines changed

1 file changed

+21
-21
lines changed

src/online2/online-ivector-feature.cc

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void OnlineIvectorExtractionInfo::Init(
4242
use_most_recent_ivector = true;
4343
}
4444
max_remembered_frames = config.max_remembered_frames;
45-
45+
4646
std::string note = "(note: this may be needed "
4747
"in the file supplied to --ivector-extractor-config)";
4848
if (config.lda_mat_rxfilename == "")
@@ -72,7 +72,7 @@ void OnlineIvectorExtractionInfo::Check() const {
7272
int32 base_feat_dim = global_cmvn_stats.NumCols() - 1,
7373
num_splice = splice_opts.left_context + 1 + splice_opts.right_context,
7474
spliced_input_dim = base_feat_dim * num_splice;
75-
75+
7676
KALDI_ASSERT(lda_mat.NumCols() == spliced_input_dim ||
7777
lda_mat.NumCols() == spliced_input_dim + 1);
7878
KALDI_ASSERT(lda_mat.NumRows() == diag_ubm.Dim());
@@ -113,7 +113,7 @@ void OnlineIvectorExtractorAdaptationState::LimitFrames(
113113
if (ivector_stats.Count() > max_remembered_frames_scaled) {
114114
ivector_stats.Scale(max_remembered_frames_scaled /
115115
ivector_stats.Count());
116-
}
116+
}
117117
}
118118

119119
void OnlineIvectorExtractorAdaptationState::Write(std::ostream &os, bool binary) const {
@@ -177,7 +177,7 @@ void OnlineIvectorFeature::UpdateFrameWeights(
177177

178178
void OnlineIvectorFeature::UpdateStatsForFrame(int32 t,
179179
BaseFloat weight) {
180-
int32 feat_dim = lda_normalized_->Dim();
180+
int32 feat_dim = lda_normalized_->Dim();
181181
Vector<BaseFloat> feat(feat_dim), // features given to iVector extractor
182182
log_likes(info_.diag_ubm.NumGauss());
183183
lda_normalized_->GetFrame(t, &feat);
@@ -197,10 +197,10 @@ void OnlineIvectorFeature::UpdateStatsUntilFrame(int32 frame) {
197197
KALDI_ASSERT(frame >= 0 && frame < this->NumFramesReady() &&
198198
!delta_weights_provided_);
199199
updated_with_no_delta_weights_ = true;
200-
200+
201201
int32 ivector_period = info_.ivector_period;
202202
int32 num_cg_iters = info_.num_cg_iters;
203-
203+
204204
for (; num_frames_stats_ <= frame; num_frames_stats_++) {
205205
int32 t = num_frames_stats_;
206206
UpdateStatsForFrame(t, 1.0);
@@ -268,7 +268,7 @@ void OnlineIvectorFeature::GetFrame(int32 frame,
268268
UpdateStatsUntilFrameWeighted(frame_to_update_until);
269269

270270
KALDI_ASSERT(feat->Dim() == this->Dim());
271-
271+
272272
if (info_.use_most_recent_ivector) {
273273
KALDI_VLOG(5) << "due to --use-most-recent-ivector=true, using iVector "
274274
<< "from frame " << num_frames_stats_ << " for frame "
@@ -299,7 +299,7 @@ void OnlineIvectorFeature::PrintDiagnostics() const {
299299

300300
Vector<BaseFloat> temp_ivector(current_ivector_);
301301
temp_ivector(0) -= info_.extractor.PriorOffset();
302-
302+
303303
KALDI_VLOG(3) << "By the end of the utterance, objf change/frame "
304304
<< "from estimating iVector (vs. default) was "
305305
<< ivector_stats_.ObjfChange(current_ivector_)
@@ -359,7 +359,7 @@ OnlineIvectorFeature::OnlineIvectorFeature(
359359

360360
// Set the iVector to its default value, [ prior_offset, 0, 0, ... ].
361361
current_ivector_.Resize(info_.extractor.IvectorDim());
362-
current_ivector_(0) = info_.extractor.PriorOffset();
362+
current_ivector_(0) = info_.extractor.PriorOffset();
363363
}
364364

365365
void OnlineIvectorFeature::SetAdaptationState(
@@ -409,7 +409,7 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
409409
KALDI_ERR << "Number of frames decoded decreased"; // Likely bug
410410

411411
if (num_frames_decoded == 0)
412-
return;
412+
return;
413413
int32 frame = num_frames_decoded - 1;
414414
bool use_final_probs = false;
415415
LatticeFasterOnlineDecoder::BestPathIterator iter =
@@ -421,20 +421,20 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
421421
iter = decoder.TraceBackBestPath(iter, &arc);
422422
// note, the iter.frame values are slightly unintuitively defined,
423423
// they are one less than you might expect.
424-
KALDI_ASSERT(iter.frame == frame - 1);
425-
424+
KALDI_ASSERT(iter.frame == frame - 1);
425+
426426
if (frame_info_[frame].token == iter.tok) {
427427
// we know that the traceback from this point back will be identical, so
428428
// no point tracing back further. Note: we are comparing memory addresses
429429
// of tokens of the decoder; this guarantees it's the same exact token
430430
// because tokens, once allocated on a frame, are only deleted, never
431431
// reallocated for that frame.
432-
break;
432+
break;
433433
}
434434

435435
if (num_frames_output_and_correct_ > frame)
436436
num_frames_output_and_correct_ = frame;
437-
437+
438438
frame_info_[frame].token = iter.tok;
439439
frame_info_[frame].transition_id = arc.ilabel;
440440
frame--;
@@ -454,7 +454,7 @@ int32 OnlineSilenceWeighting::GetBeginFrame() {
454454
// run of length greater than max_duration, since this would force it
455455
// to be treated as silence (note: typically a non-silence phone that's very
456456
// long is really silence, for example this can happen with the word "mm").
457-
457+
458458
int32 t_last_untouched = num_frames_output_and_correct_ - 1,
459459
t_end = frame_info_.size();
460460
int32 transition_id = frame_info_[t_last_untouched].transition_id;
@@ -463,17 +463,17 @@ int32 OnlineSilenceWeighting::GetBeginFrame() {
463463
int32 lower_search_bound = std::max(0, t_last_untouched - max_duration),
464464
upper_search_bound = std::min(t_last_untouched + max_duration, t_end - 1),
465465
t_lower, t_upper;
466-
466+
467467
// t_lower will be the first index in the run of equal transition-ids.
468468
for (t_lower = t_last_untouched;
469469
t_lower > lower_search_bound &&
470-
frame_info_[t_lower - 1].transition_id == transition_id; t_lower++);
470+
frame_info_[t_lower - 1].transition_id == transition_id; t_lower--);
471471

472472
// t_lower will be the last index in the run of equal transition-ids.
473473
for (t_upper = t_last_untouched;
474474
t_upper < upper_search_bound &&
475475
frame_info_[t_upper + 1].transition_id == transition_id; t_upper++);
476-
476+
477477
int32 run_length = t_upper - t_lower + 1;
478478
if (run_length <= max_duration) {
479479
// we wouldn't treat this run as being silence, as it's within
@@ -503,10 +503,10 @@ void OnlineSilenceWeighting::GetDeltaWeights(
503503
const BaseFloat silence_weight = config_.silence_weight;
504504

505505
delta_weights->clear();
506-
506+
507507
if (frame_info_.size() < static_cast<size_t>(num_frames_ready))
508508
frame_info_.resize(num_frames_ready);
509-
509+
510510
// we may have to make begin_frame earlier than num_frames_output_and_correct_
511511
// so that max_state_duration is properly enforced. GetBeginFrame() handles
512512
// this logic.
@@ -581,7 +581,7 @@ void OnlineSilenceWeighting::GetDeltaWeights(
581581
if (weight_diff != 0.0 || offset + 1 == frames_out)
582582
delta_weights->push_back(std::make_pair(frame, weight_diff));
583583
}
584-
584+
585585
}
586586

587587
} // namespace kaldi

0 commit comments

Comments
 (0)