Skip to content

Commit 2dc15c2

Browse files
yoshi-automationcrwilcox
authored andcommitted
Copy proto files alongside protoc versions. Remove unneeded utf-8 header.
1 parent 3b5fc7c commit 2dc15c2

File tree

4 files changed

+12
-216
lines changed

4 files changed

+12
-216
lines changed

speech/google/cloud/speech_v1/proto/cloud_speech.proto

Lines changed: 9 additions & 211 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
syntax = "proto3";
1717

18-
package google.cloud.speech.v1p1beta1;
18+
package google.cloud.speech.v1;
1919

2020
import "google/api/annotations.proto";
2121
import "google/longrunning/operations.proto";
@@ -26,10 +26,10 @@ import "google/protobuf/timestamp.proto";
2626
import "google/rpc/status.proto";
2727

2828
option cc_enable_arenas = true;
29-
option go_package = "google.golang.org/genproto/googleapis/cloud/speech/v1p1beta1;speech";
29+
option go_package = "google.golang.org/genproto/googleapis/cloud/speech/v1;speech";
3030
option java_multiple_files = true;
3131
option java_outer_classname = "SpeechProto";
32-
option java_package = "com.google.cloud.speech.v1p1beta1";
32+
option java_package = "com.google.cloud.speech.v1";
3333

3434

3535
// Service that implements Google Cloud Speech API.
@@ -38,7 +38,7 @@ service Speech {
3838
// has been sent and processed.
3939
rpc Recognize(RecognizeRequest) returns (RecognizeResponse) {
4040
option (google.api.http) = {
41-
post: "/v1p1beta1/speech:recognize"
41+
post: "/v1/speech:recognize"
4242
body: "*"
4343
};
4444
}
@@ -49,7 +49,7 @@ service Speech {
4949
// a `LongRunningRecognizeResponse` message.
5050
rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
5151
option (google.api.http) = {
52-
post: "/v1p1beta1/speech:longrunningrecognize"
52+
post: "/v1/speech:longrunningrecognize"
5353
body: "*"
5454
};
5555
}
@@ -203,7 +203,7 @@ message RecognitionConfig {
203203

204204
// Encoding of audio data sent in all `RecognitionAudio` messages.
205205
// This field is optional for `FLAC` and `WAV` audio files and required
206-
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
206+
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
207207
AudioEncoding encoding = 1;
208208

209209
// Sample rate in Hertz of the audio data sent in all
@@ -212,7 +212,7 @@ message RecognitionConfig {
212212
// source to 16000 Hz. If that's not possible, use the native sample rate of
213213
// the audio source (instead of re-sampling).
214214
// This field is optional for `FLAC` and `WAV` audio files and required
215-
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
215+
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
216216
int32 sample_rate_hertz = 2;
217217

218218
// *Optional* The number of channels in the input audio data.
@@ -226,7 +226,7 @@ message RecognitionConfig {
226226
// `enable_separate_recognition_per_channel` to 'true'.
227227
int32 audio_channel_count = 7;
228228

229-
// This needs to be set to true explicitly and `audio_channel_count` > 1
229+
// This needs to be set to `true` explicitly and `audio_channel_count` > 1
230230
// to get each channel recognized separately. The recognition result will
231231
// contain a `channel_tag` field to state which channel that result belongs
232232
// to. If this is not true, we will only recognize the first channel. The
@@ -241,20 +241,6 @@ message RecognitionConfig {
241241
// for a list of the currently supported language codes.
242242
string language_code = 3;
243243

244-
// *Optional* A list of up to 3 additional
245-
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tags,
246-
// listing possible alternative languages of the supplied audio.
247-
// See [Language Support](/speech-to-text/docs/languages)
248-
// for a list of the currently supported language codes.
249-
// If alternative languages are listed, recognition result will contain
250-
// recognition in the most likely language detected including the main
251-
// language_code. The recognition result will include the language tag
252-
// of the language detected in the audio.
253-
// Note: This feature is only supported for Voice Command and Voice Search
254-
// use cases and performance may vary for other use cases (e.g., phone call
255-
// transcription).
256-
repeated string alternative_language_codes = 18;
257-
258244
// *Optional* Maximum number of recognition hypotheses to be returned.
259245
// Specifically, the maximum number of `SpeechRecognitionAlternative` messages
260246
// within each `SpeechRecognitionResult`.
@@ -269,7 +255,7 @@ message RecognitionConfig {
269255
// won't be filtered out.
270256
bool profanity_filter = 5;
271257

272-
// *Optional* array of [SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
258+
// *Optional* array of [SpeechContext][google.cloud.speech.v1.SpeechContext].
273259
// A means to provide context to assist the speech recognition. For more
274260
// information, see [Phrase Hints](/speech-to-text/docs/basics#phrase-hints).
275261
repeated SpeechContext speech_contexts = 6;
@@ -280,11 +266,6 @@ message RecognitionConfig {
280266
// `false`.
281267
bool enable_word_time_offsets = 8;
282268

283-
// *Optional* If `true`, the top result includes a list of words and the
284-
// confidence for those words. If `false`, no word-level confidence
285-
// information is returned. The default is `false`.
286-
bool enable_word_confidence = 15;
287-
288269
// *Optional* If 'true', adds punctuation to recognition result hypotheses.
289270
// This feature is only available in select languages. Setting this for
290271
// requests in other languages has no effect at all.
@@ -294,26 +275,6 @@ message RecognitionConfig {
294275
// premium feature.
295276
bool enable_automatic_punctuation = 11;
296277

297-
// *Optional* If 'true', enables speaker detection for each recognized word in
298-
// the top alternative of the recognition result using a speaker_tag provided
299-
// in the WordInfo.
300-
// Note: When this is true, we send all the words from the beginning of the
301-
// audio for the top alternative in every consecutive STREAMING responses.
302-
// This is done in order to improve our speaker tags as our models learn to
303-
// identify the speakers in the conversation over time.
304-
// For non-streaming requests, the diarization results will be provided only
305-
// in the top alternative of the FINAL SpeechRecognitionResult.
306-
bool enable_speaker_diarization = 16;
307-
308-
// *Optional*
309-
// If set, specifies the estimated number of speakers in the conversation.
310-
// If not set, defaults to '2'.
311-
// Ignored unless enable_speaker_diarization is set to true."
312-
int32 diarization_speaker_count = 17;
313-
314-
// *Optional* Metadata regarding this request.
315-
RecognitionMetadata metadata = 9;
316-
317278
// *Optional* Which model to select for the given request. Select the model
318279
// best suited to your domain to get best results. If a model is not
319280
// explicitly specified, then we auto-select a model based on the parameters
@@ -366,137 +327,6 @@ message RecognitionConfig {
366327
bool use_enhanced = 14;
367328
}
368329

369-
// Description of audio data to be recognized.
370-
message RecognitionMetadata {
371-
// Use case categories that the audio recognition request can be described
372-
// by.
373-
enum InteractionType {
374-
// Use case is either unknown or is something other than one of the other
375-
// values below.
376-
INTERACTION_TYPE_UNSPECIFIED = 0;
377-
378-
// Multiple people in a conversation or discussion. For example in a
379-
// meeting with two or more people actively participating. Typically
380-
// all the primary people speaking would be in the same room (if not,
381-
// see PHONE_CALL)
382-
DISCUSSION = 1;
383-
384-
// One or more persons lecturing or presenting to others, mostly
385-
// uninterrupted.
386-
PRESENTATION = 2;
387-
388-
// A phone-call or video-conference in which two or more people, who are
389-
// not in the same room, are actively participating.
390-
PHONE_CALL = 3;
391-
392-
// A recorded message intended for another person to listen to.
393-
VOICEMAIL = 4;
394-
395-
// Professionally produced audio (eg. TV Show, Podcast).
396-
PROFESSIONALLY_PRODUCED = 5;
397-
398-
// Transcribe spoken questions and queries into text.
399-
VOICE_SEARCH = 6;
400-
401-
// Transcribe voice commands, such as for controlling a device.
402-
VOICE_COMMAND = 7;
403-
404-
// Transcribe speech to text to create a written document, such as a
405-
// text-message, email or report.
406-
DICTATION = 8;
407-
}
408-
409-
// Enumerates the types of capture settings describing an audio file.
410-
enum MicrophoneDistance {
411-
// Audio type is not known.
412-
MICROPHONE_DISTANCE_UNSPECIFIED = 0;
413-
414-
// The audio was captured from a closely placed microphone. Eg. phone,
415-
// dictaphone, or handheld microphone. Generally if there speaker is within
416-
// 1 meter of the microphone.
417-
NEARFIELD = 1;
418-
419-
// The speaker if within 3 meters of the microphone.
420-
MIDFIELD = 2;
421-
422-
// The speaker is more than 3 meters away from the microphone.
423-
FARFIELD = 3;
424-
}
425-
426-
// The original media the speech was recorded on.
427-
enum OriginalMediaType {
428-
// Unknown original media type.
429-
ORIGINAL_MEDIA_TYPE_UNSPECIFIED = 0;
430-
431-
// The speech data is an audio recording.
432-
AUDIO = 1;
433-
434-
// The speech data originally recorded on a video.
435-
VIDEO = 2;
436-
}
437-
438-
// The type of device the speech was recorded with.
439-
enum RecordingDeviceType {
440-
// The recording device is unknown.
441-
RECORDING_DEVICE_TYPE_UNSPECIFIED = 0;
442-
443-
// Speech was recorded on a smartphone.
444-
SMARTPHONE = 1;
445-
446-
// Speech was recorded using a personal computer or tablet.
447-
PC = 2;
448-
449-
// Speech was recorded over a phone line.
450-
PHONE_LINE = 3;
451-
452-
// Speech was recorded in a vehicle.
453-
VEHICLE = 4;
454-
455-
// Speech was recorded outdoors.
456-
OTHER_OUTDOOR_DEVICE = 5;
457-
458-
// Speech was recorded indoors.
459-
OTHER_INDOOR_DEVICE = 6;
460-
}
461-
462-
// The use case most closely describing the audio content to be recognized.
463-
InteractionType interaction_type = 1;
464-
465-
// The industry vertical to which this speech recognition request most
466-
// closely applies. This is most indicative of the topics contained
467-
// in the audio. Use the 6-digit NAICS code to identify the industry
468-
// vertical - see https://www.naics.com/search/.
469-
uint32 industry_naics_code_of_audio = 3;
470-
471-
// The audio type that most closely describes the audio being recognized.
472-
MicrophoneDistance microphone_distance = 4;
473-
474-
// The original media the speech was recorded on.
475-
OriginalMediaType original_media_type = 5;
476-
477-
// The type of device the speech was recorded with.
478-
RecordingDeviceType recording_device_type = 6;
479-
480-
// The device used to make the recording. Examples 'Nexus 5X' or
481-
// 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or
482-
// 'Cardioid Microphone'.
483-
string recording_device_name = 7;
484-
485-
// Mime type of the original audio file. For example `audio/m4a`,
486-
// `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`.
487-
// A list of possible audio mime types is maintained at
488-
// http://www.iana.org/assignments/media-types/media-types.xhtml#audio
489-
string original_mime_type = 8;
490-
491-
// Obfuscated (privacy-protected) ID of the user, to identify number of
492-
// unique users using the service.
493-
int64 obfuscated_id = 9;
494-
495-
// Description of the content. Eg. "Recordings of federal supreme court
496-
// hearings from 2012".
497-
string audio_topic = 10;
498-
}
499-
500330
// Provides "hints" to the speech recognizer to favor specific words and phrases
501331
// in the results.
502332
message SpeechContext {
@@ -670,20 +500,10 @@ message StreamingRecognitionResult {
670500
// The default of 0.0 is a sentinel value indicating `stability` was not set.
671501
float stability = 3;
672502

673-
// Output only. Time offset of the end of this result relative to the
674-
// beginning of the audio.
675-
google.protobuf.Duration result_end_time = 4;
676-
677503
// For multi-channel audio, this is the channel number corresponding to the
678504
// recognized result for the audio from that channel.
679505
// For audio_channel_count = N, its output values can range from '1' to 'N'.
680506
int32 channel_tag = 5;
681-
682-
// Output only. The
683-
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
684-
// language in this result. This language code was detected to have the most
685-
// likelihood of being spoken in the audio.
686-
string language_code = 6;
687507
}
688508

689509
// A speech recognition result corresponding to a portion of the audio.
@@ -698,12 +518,6 @@ message SpeechRecognitionResult {
698518
// recognized result for the audio from that channel.
699519
// For audio_channel_count = N, its output values can range from '1' to 'N'.
700520
int32 channel_tag = 2;
701-
702-
// Output only. The
703-
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
704-
// language in this result. This language code was detected to have the most
705-
// likelihood of being spoken in the audio.
706-
string language_code = 5;
707521
}
708522

709523
// Alternative hypotheses (a.k.a. n-best list).
@@ -746,20 +560,4 @@ message WordInfo {
746560

747561
// Output only. The word corresponding to this set of information.
748562
string word = 3;
749-
750-
// Output only. The confidence estimate between 0.0 and 1.0. A higher number
751-
// indicates an estimated greater likelihood that the recognized words are
752-
// correct. This field is set only for the top alternative of a non-streaming
753-
// result or, of a streaming result where `is_final=true`.
754-
// This field is not guaranteed to be accurate and users should not rely on it
755-
// to be always provided.
756-
// The default of 0.0 is a sentinel value indicating `confidence` was not set.
757-
float confidence = 4;
758-
759-
// Output only. A distinct integer value is assigned for every speaker within
760-
// the audio. This field specifies which one of those speakers was detected to
761-
// have spoken this word. Value ranges from '1' to diarization_speaker_count.
762-
// speaker_tag is set if enable_speaker_diarization = 'true' and only in the
763-
// top alternative.
764-
int32 speaker_tag = 5;
765563
}

speech/google/cloud/speech_v1/proto/cloud_speech_pb2.py

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

speech/google/cloud/speech_v1p1beta1/proto/cloud_speech_pb2.py

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

speech/synth.metadata

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"updateTime": "2019-01-31T00:53:35.360883Z",
2+
"updateTime": "2019-02-01T01:10:29.243119Z",
33
"sources": [
44
{
55
"generator": {
@@ -12,8 +12,8 @@
1212
"git": {
1313
"name": "googleapis",
1414
"remote": "https://github.com/googleapis/googleapis.git",
15-
"sha": "9607c39973de36d319ec8861ac39a826163e21de",
16-
"internalRef": "231680111"
15+
"sha": "acb5253cd11cd43cab93eb153d6e48ba0fa5303d",
16+
"internalRef": "231786007"
1717
}
1818
},
1919
{

0 commit comments

Comments
 (0)