forked from kaldi-asr/kaldi
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfeature-mfcc.h
More file actions
135 lines (111 loc) · 5.15 KB
/
feature-mfcc.h
File metadata and controls
135 lines (111 loc) · 5.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// feat/feature-mfcc.h
// Copyright 2009-2011 Karel Vesely; Petr Motlicek; Saarland University
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_FEAT_FEATURE_MFCC_H_
#define KALDI_FEAT_FEATURE_MFCC_H_
#include <map>
#include <string>
#include "feat/feature-functions.h"
namespace kaldi {
/// @addtogroup feat FeatureExtraction
/// @{
/// MfccOptions contains basic options for computing MFCC features
/// It only includes things that can be done in a "stateless" way, i.e.
/// it does not include energy max-normalization.
/// It does not include delta computation.
struct MfccOptions {
FrameExtractionOptions frame_opts;
MelBanksOptions mel_opts;
int32 num_ceps; // e.g. 13: num cepstral coeffs, counting zero.
bool use_energy; // use energy; else C0
BaseFloat energy_floor;
bool raw_energy; // If true, compute energy before preemphasis and windowing
BaseFloat cepstral_lifter; // Scaling factor on cepstra for HTK compatibility.
// if 0.0, no liftering is done.
bool htk_compat; // if true, put energy/C0 last and introduce a factor of
// sqrt(2) on C0 to be the same as HTK.
MfccOptions() : mel_opts(23),
// defaults the #mel-banks to 23 for the MFCC computations.
// this seems to be common for 16khz-sampled data,
// but for 8khz-sampled data, 15 may be better.
num_ceps(13),
use_energy(true),
energy_floor(0.0), // not in log scale: a small value e.g. 1.0e-10
raw_energy(true),
cepstral_lifter(22.0),
htk_compat(false) {}
void Register(OptionsItf *opts) {
frame_opts.Register(opts);
mel_opts.Register(opts);
opts->Register("num-ceps", &num_ceps,
"Number of cepstra in MFCC computation (including C0)");
opts->Register("use-energy", &use_energy,
"Use energy (not C0) in MFCC computation");
opts->Register("energy-floor", &energy_floor,
"Floor on energy (absolute, not relative) in MFCC computation");
opts->Register("raw-energy", &raw_energy,
"If true, compute energy before preemphasis and windowing");
opts->Register("cepstral-lifter", &cepstral_lifter,
"Constant that controls scaling of MFCCs");
opts->Register("htk-compat", &htk_compat,
"If true, put energy or C0 last and use a factor of sqrt(2) on "
"C0. Warning: not sufficient to get HTK compatible features "
"(need to change other parameters).");
}
};
class MelBanks;
/// Class for computing MFCC features; see \ref feat_mfcc for more information.
class Mfcc {
public:
explicit Mfcc(const MfccOptions &opts);
~Mfcc();
int32 Dim() const { return opts_.num_ceps; }
/// Will throw exception on failure (e.g. if file too short for even one
/// frame). The output "wave_remainder" is the last frame or two of the
/// waveform that it would be necessary to include in the next call to Compute
/// for the same utterance. It is not exactly the un-processed part (it may
/// have been partly processed), it's the start of the next window that we
/// have not already processed.
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL);
/// Const version of Compute()
void Compute(const VectorBase<BaseFloat> &wave,
BaseFloat vtln_warp,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL) const;
typedef MfccOptions Options;
private:
void ComputeInternal(const VectorBase<BaseFloat> &wave,
const MelBanks &mel_banks,
Matrix<BaseFloat> *output,
Vector<BaseFloat> *wave_remainder = NULL) const;
const MelBanks *GetMelBanks(BaseFloat vtln_warp);
const MelBanks *GetMelBanks(BaseFloat vtln_warp,
bool *must_delete) const;
MfccOptions opts_;
Vector<BaseFloat> lifter_coeffs_;
Matrix<BaseFloat> dct_matrix_; // matrix we left-multiply by to perform DCT.
BaseFloat log_energy_floor_;
std::map<BaseFloat, MelBanks*> mel_banks_; // BaseFloat is VTLN coefficient.
FeatureWindowFunction feature_window_function_;
SplitRadixRealFft<BaseFloat> *srfft_;
KALDI_DISALLOW_COPY_AND_ASSIGN(Mfcc);
};
/// @} End of "addtogroup feat"
} // namespace kaldi
#endif // KALDI_FEAT_FEATURE_MFCC_H_