forked from thesofproject/sof
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspeech.cc
More file actions
153 lines (125 loc) · 4.34 KB
/
Copy pathspeech.cc
File metadata and controls
153 lines (125 loc) · 4.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2025 Intel Corporation. All rights reserved.
#include <algorithm>
#include <cstdint>
#include <iterator>
#include "tensorflow/lite/core/c/common.h"
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/micro_log.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
#include "tensorflow/lite/micro/testing/micro_test.h"
#include "speech.h"
// hard code the model today
#include "micro_speech_quantized_model_data.h"
// The following values are derived from values used during model training.
// If you change the way you preprocess the input, update all these constants.
//constexpr int kAudioSampleFrequency = TFLM_SAMPLE_RATE;
static constexpr int kFeatureSize = TFLM_FEATURE_SIZE;
static constexpr int kFeatureCount = TFLM_FEATURE_COUNT;
static constexpr int kFeatureElementCount = TFLM_FEATURE_ELEM_COUNT;
// Arena size is a guesstimate, followed by use of
// MicroInterpreter::arena_used_bytes() on both the AudioPreprocessor and
// MicroSpeech models and using the larger of the two results.
static constexpr size_t kArenaSize = 28584; // xtensa p6
alignas(16) static uint8_t g_arena[kArenaSize];
// type for features
using Features = int8_t[kFeatureCount][kFeatureSize];
// inference
static const tflite::Model *model;
static TfLiteTensor *input;
static TfLiteTensor *output;
static tflite::MicroInterpreter *interpreter;
using MicroSpeechOpResolver = tflite::MicroMutableOpResolver<4>;
static MicroSpeechOpResolver *op_resolver;
// Adding more kernels is quite efficient. TODO add more
int RegisterOps(MicroSpeechOpResolver *op_resolver) {
TF_LITE_ENSURE_STATUS(op_resolver->AddReshape());
TF_LITE_ENSURE_STATUS(op_resolver->AddFullyConnected());
TF_LITE_ENSURE_STATUS(op_resolver->AddDepthwiseConv2D());
TF_LITE_ENSURE_STATUS(op_resolver->AddSoftmax());
return 0;
}
int TF_InitOps(struct tf_classify *tfc)
{
op_resolver = new MicroSpeechOpResolver();
if (RegisterOps(op_resolver) != 0) {
tfc->error = "register ops failed";
return -EINVAL;
}
// create the interpreter
interpreter = new tflite::MicroInterpreter(model, *op_resolver,
g_arena, kArenaSize);
// and allocate the tensors
if (interpreter->AllocateTensors() != kTfLiteOk) {
tfc->error = "interpreter tensor allocate failed";
delete interpreter;
delete op_resolver;
interpreter = nullptr;
op_resolver = nullptr;
return -EINVAL;
}
return 0;
}
static int Init_Interpreter(struct tf_classify *tfc)
{
input = interpreter->input(0);
if (!input){
tfc->error = "input interpreter NULL";
return -EINVAL;
}
// check input shape is compatible with our feature data size
if (kFeatureElementCount != input->dims->data[input->dims->size - 1]){
tfc->error = "input interpreter shape incompatible";
return -EINVAL;
}
output = interpreter->output(0);
if (!output){
tfc->error = "output interpreter NULL";
return -EINVAL;
}
// check output shape is compatible with our number of prediction categories
if (tfc->categories != output->dims->data[output->dims->size - 1]) {
tfc->error = "output shape != categories";
return -EINVAL;
}
return 0;
}
int TF_SetModel(struct tf_classify *tfc, unsigned char *model_tflite)
{
// ignore passed in model today until we can load via binary kcontrol
// Map the model into a usable data structure. This doesn't involve any
// copying or parsing, it's a very lightweight operation.
model = tflite::GetModel(g_micro_speech_quantized_model_data);
if (model->version() != TFLITE_SCHEMA_VERSION) {
tfc->error = "failed to load model";
return -EINVAL;
}
return 0;
}
int TF_ProcessClassify(struct tf_classify *tfc)
{
Features *features = reinterpret_cast<Features *>(tfc->audio_features);
int ret;
// initialise the interpreter for current feature block
ret = Init_Interpreter(tfc);
if (!ret)
return ret;
float output_scale = output->params.scale;
int output_zero_point = output->params.zero_point;
// copy features to input then invoke()
std::copy_n(features[0][0], kFeatureElementCount,
tflite::GetTensorData<int8_t>(input));
// run the interpreter
if (interpreter->Invoke() != kTfLiteOk) {
tfc->error = "invoke failed";
return -EINVAL;
}
// Dequantize output values
for (int i = 0; i < tfc->categories; i++) {
tfc->predictions[i] =
(tflite::GetTensorData<int8_t>(output)[i] - output_zero_point) *
output_scale;
}
return 0;
}