googleapis
diff --git a/‎packages/google-cloud-python-speech/google/cloud/speech_v1/proto/cloud_speech.proto‎
Lines changed: 40 additions & 0 deletions b/‎packages/google-cloud-python-speech/google/cloud/speech_v1/proto/cloud_speech.proto‎
Lines changed: 40 additions & 0 deletions
@@ -276,6 +276,16 @@ message RecognitionConfig {
   // premium feature.
   bool enable_automatic_punctuation = 11;
 
+  // *Optional* Config to enable speaker diarization and set additional
+  // parameters to make diarization better suited for your application.
+  // Note: When this is enabled, we send all the words from the beginning of the
+  // audio for the top alternative in every consecutive STREAMING responses.
+  // This is done in order to improve our speaker tags as our models learn to
+  // identify the speakers in the conversation over time.
+  // For non-streaming requests, the diarization results will be provided only
+  // in the top alternative of the FINAL SpeechRecognitionResult.
+  SpeakerDiarizationConfig diarization_config = 19;
+
   // *Optional* Metadata regarding this request.
   RecognitionMetadata metadata = 9;
 
@@ -324,6 +334,36 @@ message RecognitionConfig {
   bool use_enhanced = 14;
 }
 
+// *Optional* Config to enable speaker diarization.
+message SpeakerDiarizationConfig {
+  // *Optional* If 'true', enables speaker detection for each recognized word in
+  // the top alternative of the recognition result using a speaker_tag provided
+  // in the WordInfo.
+  bool enable_speaker_diarization = 1;
+
+  // Note: Set min_speaker_count = max_speaker_count to fix the number of
+  // speakers to be detected in the audio.
+
+  // *Optional*
+  // Minimum number of speakers in the conversation. This range gives you more
+  // flexibility by allowing the system to automatically determine the correct
+  // number of speakers. If not set, the default value is 2.
+  int32 min_speaker_count = 2;
+
+  // *Optional*
+  // Maximum number of speakers in the conversation. This range gives you more
+  // flexibility by allowing the system to automatically determine the correct
+  // number of speakers. If not set, the default value is 6.
+  int32 max_speaker_count = 3;
+
+  // Output only. A distinct integer value is assigned for every speaker within
+  // the audio. This field specifies which one of those speakers was detected to
+  // have spoken this word. Value ranges from '1' to diarization_speaker_count.
+  // speaker_tag is set if enable_speaker_diarization = 'true' and only in the
+  // top alternative.
+  int32 speaker_tag = 5;
+}
+
 // Description of audio data to be recognized.
 message RecognitionMetadata {
   // Use case categories that the audio recognition request can be described