@@ -119,6 +119,7 @@ SingleUtteranceNnet2DecoderThreaded::SingleUtteranceNnet2DecoderThreaded(
119119 config_ (config), am_nnet_(am_nnet), tmodel_(tmodel), sampling_rate_(0.0 ),
120120 num_samples_received_ (0 ), input_finished_(false ),
121121 feature_pipeline_ (feature_info),
122+ num_samples_discarded_ (0 ),
122123 silence_weighting_ (tmodel, feature_info.silence_weighting_config),
123124 decodable_ (tmodel),
124125 num_frames_decoded_ (0 ), decoder_(fst, config_.decoder_opts),
@@ -176,6 +177,10 @@ SingleUtteranceNnet2DecoderThreaded::~SingleUtteranceNnet2DecoderThreaded() {
176177 delete input_waveform_.front ();
177178 input_waveform_.pop_front ();
178179 }
180+ while (!processed_waveform_.empty ()) {
181+ delete processed_waveform_.front ();
182+ processed_waveform_.pop_front ();
183+ }
179184}
180185
181186void SingleUtteranceNnet2DecoderThreaded::AcceptWaveform (
@@ -200,6 +205,22 @@ void SingleUtteranceNnet2DecoderThreaded::AcceptWaveform(
200205 waveform_synchronizer_.UnlockSuccess (ThreadSynchronizer::kProducer );
201206}
202207
208+ int32 SingleUtteranceNnet2DecoderThreaded::NumWaveformPiecesPending () {
209+ // Note RE locking: what we really want here is just to lock the mutex. As a
210+ // side effect, because of the way the synchronizer code works, it will also
211+ // increment the semaphore and might wake up the consumer thread. This will
212+ // possibly make it do a little useless work (go around a loop once), but
213+ // won't really do any harm. Perhaps we should have implemented a version of
214+ // the Lock function that takes no arguments.
215+ if (!waveform_synchronizer_.Lock (ThreadSynchronizer::kProducer )) {
216+ KALDI_ERR << " Failure locking mutex: decoding aborted." ;
217+ }
218+ int32 ans = input_waveform_.size ();
219+ waveform_synchronizer_.UnlockSuccess (ThreadSynchronizer::kProducer );
220+ return ans;
221+ }
222+
223+
203224int32 SingleUtteranceNnet2DecoderThreaded::NumFramesReceivedApprox () const {
204225 return num_samples_received_ /
205226 (sampling_rate_ * feature_pipeline_.FrameShiftInSeconds ());
@@ -237,6 +258,55 @@ void SingleUtteranceNnet2DecoderThreaded::FinalizeDecoding() {
237258 decoder_.FinalizeDecoding ();
238259}
239260
261+ BaseFloat SingleUtteranceNnet2DecoderThreaded::GetRemainingWaveform (
262+ Vector<BaseFloat> *waveform) const {
263+ if (KALDI_PTHREAD_PTR (threads_[0 ]) != 0 ) {
264+ KALDI_ERR << " It is an error to call GetRemainingWaveform before Wait()." ;
265+ }
266+ int64 num_samples_stored = 0 ; // number of samples we still have.
267+ std::vector< Vector<BaseFloat>* > all_pieces;
268+ std::deque< Vector<BaseFloat>* >::const_iterator iter;
269+ for (iter = input_waveform_.begin (); iter != input_waveform_.end (); ++iter) {
270+ num_samples_stored += (*iter)->Dim ();
271+ all_pieces.push_back (*iter);
272+ }
273+ for (iter = processed_waveform_.begin (); iter != processed_waveform_.end ();
274+ ++iter) {
275+ num_samples_stored += (*iter)->Dim ();
276+ all_pieces.push_back (*iter);
277+ }
278+ // put the pieces in chronological order.
279+ std::reverse (all_pieces.begin (), all_pieces.end ());
280+ int64 samples_shift_per_frame =
281+ sampling_rate_ * feature_pipeline_.FrameShiftInSeconds ();
282+ int64 num_samples_to_discard = samples_shift_per_frame * num_frames_decoded_;
283+ KALDI_ASSERT (num_samples_to_discard >= num_samples_discarded_);
284+
285+ // num_samp_discard is how many samples we must discard from our stored
286+ // samples.
287+ int64 num_samp_discard = num_samples_to_discard - num_samples_discarded_,
288+ num_samp_keep = num_samples_stored - num_samp_discard;
289+ KALDI_ASSERT (num_samp_discard <= num_samples_stored && num_samp_keep >= 0 );
290+ waveform->Resize (num_samp_keep, kUndefined );
291+ int32 offset = 0 ; // offset in output waveform. assume output waveform is no
292+ // larger than int32.
293+ for (size_t i = 0 ; i < all_pieces.size (); i++) {
294+ Vector<BaseFloat> *this_piece = all_pieces[i];
295+ int32 this_dim = this_piece->Dim ();
296+ if (num_samp_discard >= this_dim) {
297+ num_samp_discard -= this_dim;
298+ } else {
299+ // normal case is num_samp_discard = 0.
300+ int32 this_dim_keep = this_dim - num_samp_discard;
301+ waveform->Range (offset, this_dim_keep).CopyFromVec (
302+ this_piece->Range (num_samp_discard, this_dim_keep));
303+ offset += this_dim_keep;
304+ num_samp_discard = 0 ;
305+ }
306+ }
307+ KALDI_ASSERT (offset == num_samp_keep && num_samp_discard == 0 );
308+ return sampling_rate_;
309+ }
240310
241311void SingleUtteranceNnet2DecoderThreaded::GetAdaptationState (
242312 OnlineIvectorExtractorAdaptationState *adaptation_state) {
@@ -413,11 +483,23 @@ bool SingleUtteranceNnet2DecoderThreaded::FeatureComputation(
413483 while (num_frames_usable < config_.nnet_batch_size &&
414484 !input_waveform_.empty ()) {
415485 feature_pipeline_.AcceptWaveform (sampling_rate_, *input_waveform_.front ());
416- delete input_waveform_.front ();
486+ processed_waveform_. push_back ( input_waveform_.front () );
417487 input_waveform_.pop_front ();
418488 num_frames_ready = feature_pipeline_.NumFramesReady ();
419489 num_frames_usable = num_frames_ready - num_frames_output;
420490 }
491+ // Delete already-processed pieces of waveform if we have already decoded
492+ // those frames. (If not already decoded, we keep them around for the
493+ // sake of GetRemainingWaveform()).
494+ int32 samples_shift_per_frame =
495+ sampling_rate_ * feature_pipeline_.FrameShiftInSeconds ();
496+ while (!processed_waveform_.empty () &&
497+ num_samples_discarded_ + processed_waveform_.front ()->Dim () <
498+ samples_shift_per_frame * num_frames_decoded_) {
499+ num_samples_discarded_ += processed_waveform_.front ()->Dim ();
500+ delete processed_waveform_.front ();
501+ processed_waveform_.pop_front ();
502+ }
421503 return waveform_synchronizer_.UnlockSuccess (ThreadSynchronizer::kConsumer );
422504 }
423505 }
@@ -605,4 +687,3 @@ bool SingleUtteranceNnet2DecoderThreaded::EndpointDetected(
605687
606688
607689} // namespace kaldi
608-
0 commit comments