Skip to content

Commit fdca431

Browse files
yoshi-automationtseaver
authored andcommitted
Add MP3 to Audio Encoding and add boost to Speech Context (via synth). (googleapis#8109)
1 parent d9127d7 commit fdca431

File tree

4 files changed

+92
-41
lines changed

4 files changed

+92
-41
lines changed

speech/google/cloud/speech_v1p1beta1/gapic/enums.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ class AudioEncoding(enum.IntEnum):
7171
number of bytes (octets) as specified in RFC 5574. In other words, each
7272
RTP header is replaced with a single byte containing the block length.
7373
Only Speex wideband is supported. ``sample_rate_hertz`` must be 16000.
74+
MP3 (int): MP3 audio. Support all standard MP3 bitrates (which range from 32-320
75+
kbps). When using this encoding, ``sample_rate_hertz`` can be optionally
76+
unset if not known.
7477
"""
7578

7679
ENCODING_UNSPECIFIED = 0
@@ -81,6 +84,7 @@ class AudioEncoding(enum.IntEnum):
8184
AMR_WB = 5
8285
OGG_OPUS = 6
8386
SPEEX_WITH_HEADER_BYTE = 7
87+
MP3 = 8
8488

8589

8690
class RecognitionMetadata(object):

speech/google/cloud/speech_v1p1beta1/proto/cloud_speech.proto

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,11 @@ message RecognitionConfig {
200200
// is replaced with a single byte containing the block length. Only Speex
201201
// wideband is supported. `sample_rate_hertz` must be 16000.
202202
SPEEX_WITH_HEADER_BYTE = 7;
203+
204+
// MP3 audio. Support all standard MP3 bitrates (which range from 32-320
205+
// kbps). When using this encoding, `sample_rate_hertz` can be optionally
206+
// unset if not known.
207+
MP3 = 8;
203208
}
204209

205210
// Encoding of audio data sent in all `RecognitionAudio` messages.
@@ -511,6 +516,16 @@ message SpeechContext {
511516
// to add additional words to the vocabulary of the recognizer. See
512517
// [usage limits](/speech-to-text/quotas#content).
513518
repeated string phrases = 1;
519+
520+
// Hint Boost. Positive value will increase the probability that a specific
521+
// phrase will be recognized over other similar sounding phrases. The higher
522+
// the boost, the higher the chance of false positive recognition as well.
523+
// Negative boost values would correspond to anti-biasing. Anti-biasing is not
524+
// enabled, so negative boost will simply be ignored. Though `boost` can
525+
// accept a wide range of positive values, most use cases are best served with
526+
// values between 0 and 20. We recommend using a binary search approach to
527+
// finding the optimal value for your use case.
528+
float boost = 4;
514529
}
515530

516531
// Contains audio data in the encoding specified in the `RecognitionConfig`.

0 commit comments

Comments
 (0)