Skip to content

Commit 3cd6a6d

Browse files
singalsulgirdwood
authored andcommitted
Audio: MFCC: Add Mel log spectra output mode when num_ceps is zero
Add a mode where cepstral coefficients are not computed and the Mel frequency logarithm values are passed directly to the sink buffer. The mode is activated when sof_mfcc_config member num_ceps is set to zero. When num_ceps is zero: - DCT matrix and cepstral lifter are not allocated or initialized - The Mel log spectra (num_mel_bins values) are output to the sink instead of cepstral coefficients - A mel_only flag is added to mfcc_state for runtime path selection This is useful for applications that need Mel spectrogram features without the DCT transform, such as some neural network audio front-ends. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
1 parent 4afaf72 commit 3cd6a6d

3 files changed

Lines changed: 63 additions & 30 deletions

File tree

src/audio/mfcc/mfcc_common.c

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -119,16 +119,22 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_state *stat
119119
state->mel_spectra->data, mel_scale_shift);
120120
#endif
121121

122-
/* Multiply Mel spectra with DCT matrix to get cepstral coefficients */
123-
mat_init_16b(state->cepstral_coef, 1, state->dct.num_out, 7); /* Q8.7 */
124-
mat_multiply(state->mel_spectra, state->dct.matrix, state->cepstral_coef);
125-
126-
/* Apply cepstral lifter */
127-
if (state->lifter.cepstral_lifter != 0)
128-
mat_multiply_elementwise(state->cepstral_coef, state->lifter.matrix,
129-
state->cepstral_coef);
130-
131-
cc_count += state->dct.num_out;
122+
if (state->mel_only) {
123+
/* In Mel-only mode output Mel log spectra directly */
124+
cc_count += state->dct.num_in;
125+
} else {
126+
/* Multiply Mel spectra with DCT matrix to get cepstral coefficients */
127+
mat_init_16b(state->cepstral_coef, 1, state->dct.num_out, 7); /* Q8.7 */
128+
mat_multiply(state->mel_spectra, state->dct.matrix, state->cepstral_coef);
129+
130+
/* Apply cepstral lifter */
131+
if (state->lifter.cepstral_lifter != 0) {
132+
mat_multiply_elementwise(state->cepstral_coef, state->lifter.matrix,
133+
state->cepstral_coef);
134+
}
135+
136+
cc_count += state->dct.num_out;
137+
}
132138

133139
/* Output to sink buffer */
134140
}
@@ -205,9 +211,17 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer
205211
*/
206212
zero_samples = frames * audio_stream_get_channels(sink);
207213
if (num_ceps > 0) {
214+
int16_t *out_data;
215+
216+
if (state->mel_only) {
217+
out_data = state->mel_spectra->data;
218+
} else {
219+
out_data = state->cepstral_coef->data;
220+
}
221+
208222
zero_samples -= num_ceps + num_magic;
209223
w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_magic, (int16_t *)&magic);
210-
w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_ceps, state->cepstral_coef->data);
224+
w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_ceps, out_data);
211225
}
212226

213227
w_ptr = mfcc_sink_copy_zero_s16(sink, w_ptr, zero_samples);

src/audio/mfcc/mfcc_setup.c

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -248,23 +248,37 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
248248
goto free_fft_out;
249249
}
250250

251-
/* Setup DCT */
252-
dct->num_in = config->num_mel_bins;
253-
dct->num_out = config->num_ceps;
254-
dct->type = (enum dct_type)config->dct;
255-
dct->ortho = true;
256-
ret = mod_dct_initialize_16(mod, dct);
257-
if (ret < 0) {
258-
comp_err(dev, "Failed DCT init");
259-
goto free_melfb_data;
260-
}
261-
262-
state->lifter.num_ceps = config->num_ceps;
263-
state->lifter.cepstral_lifter = config->cepstral_lifter; /* Q7.9 max 64.0*/
264-
ret = mfcc_get_cepstral_lifter(mod, &state->lifter);
265-
if (ret < 0) {
266-
comp_err(dev, "Failed cepstral lifter");
267-
goto free_dct_matrix;
251+
/* Setup DCT and cepstral lifter only when num_ceps > 0.
252+
* When num_ceps is zero, skip DCT/lifter and output Mel
253+
* log spectra directly.
254+
*/
255+
if (config->num_ceps > 0) {
256+
dct->num_in = config->num_mel_bins;
257+
dct->num_out = config->num_ceps;
258+
dct->type = (enum dct_type)config->dct;
259+
dct->ortho = true;
260+
ret = mod_dct_initialize_16(mod, dct);
261+
if (ret < 0) {
262+
comp_err(dev, "Failed DCT init");
263+
goto free_melfb_data;
264+
}
265+
266+
state->lifter.num_ceps = config->num_ceps;
267+
state->lifter.cepstral_lifter = config->cepstral_lifter; /* Q7.9 max 64.0*/
268+
ret = mfcc_get_cepstral_lifter(mod, &state->lifter);
269+
if (ret < 0) {
270+
comp_err(dev, "Failed cepstral lifter");
271+
goto free_dct_matrix;
272+
}
273+
274+
state->mel_only = false;
275+
} else {
276+
comp_info(dev, "num_ceps is 0, Mel log spectra output mode");
277+
dct->num_in = config->num_mel_bins;
278+
dct->num_out = 0;
279+
dct->matrix = NULL;
280+
state->lifter.matrix = NULL;
281+
state->mel_only = true;
268282
}
269283

270284
/* Scratch overlay during runtime
@@ -288,8 +302,12 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
288302
/* Use FFT buffer as scratch for later computed data */
289303
state->power_spectra = (int32_t *)&fft->fft_buf[0];
290304
state->mel_spectra = (struct mat_matrix_16b *)&fft->fft_out[0];
291-
state->cepstral_coef = (struct mat_matrix_16b *)
292-
&state->mel_spectra->data[state->dct.num_in];
305+
if (!state->mel_only) {
306+
state->cepstral_coef =
307+
(struct mat_matrix_16b *)&state->mel_spectra->data[state->dct.num_in];
308+
} else {
309+
state->cepstral_coef = NULL;
310+
}
293311

294312
/* Set initial state for STFT */
295313
state->waiting_fill = true;

src/include/sof/audio/mfcc/mfcc_comp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ struct mfcc_state {
125125
int low_freq;
126126
int high_freq;
127127
int sample_rate;
128+
bool mel_only; /**< When true, output Mel spectra instead of cepstral coefficients */
128129
bool waiting_fill; /**< booleans */
129130
bool prev_samples_valid;
130131
size_t sample_buffers_size; /**< bytes */

0 commit comments

Comments
 (0)