feat: add the API of StreamingAnalyzeContent

Google APIs · copybara-github · commit b9927eb6fe36 · 2022-05-05T16:48:00.000-07:00
PiperOrigin-RevId: 446850583
diff --git a/google/cloud/dialogflow/v2beta1/BUILD.bazel b/google/cloud/dialogflow/v2beta1/BUILD.bazel
@@ -210,6 +210,7 @@ py_gapic_library(
     name = "dialogflow_py_gapic",
     srcs = [":dialogflow_proto"],
     grpc_service_config = "dialogflow_grpc_service_config.json",
+    service_yaml = "dialogflow_v2beta1.yaml",
 )
 
 # Open Source Packages
diff --git a/google/cloud/dialogflow/v2beta1/audio_config.proto b/google/cloud/dialogflow/v2beta1/audio_config.proto
@@ -27,6 +27,10 @@ option java_multiple_files = true;
 option java_outer_classname = "AudioConfigProto";
 option java_package = "com.google.cloud.dialogflow.v2beta1";
 option objc_class_prefix = "DF";
+option (google.api.resource_definition) = {
+  type: "automl.googleapis.com/Model"
+  pattern: "projects/{project}/locations/{location}/models/{model}"
+};
 
 // Audio encoding of the audio content sent in the conversational query request.
 // Refer to the
diff --git a/google/cloud/dialogflow/v2beta1/dialogflow_v2beta1.yaml b/google/cloud/dialogflow/v2beta1/dialogflow_v2beta1.yaml
@@ -90,6 +90,8 @@ backend:
     deadline: 60.0
   - selector: google.cloud.dialogflow.v2beta1.Participants.AnalyzeContent
     deadline: 220.0
+  - selector: google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent
+    deadline: 220.0
   - selector: 'google.cloud.dialogflow.v2beta1.SessionEntityTypes.*'
     deadline: 60.0
   - selector: google.cloud.dialogflow.v2beta1.Sessions.DetectIntent
@@ -104,6 +106,8 @@ backend:
     deadline: 60.0
   - selector: 'google.longrunning.Operations.*'
     deadline: 60.0
+  - selector: google.longrunning.Operations.ListOperations
+    deadline: 180.0
 
 http:
   rules:
diff --git a/google/cloud/dialogflow/v2beta1/fulfillment.proto b/google/cloud/dialogflow/v2beta1/fulfillment.proto
@@ -81,8 +81,10 @@ message Fulfillment {
 
   // Represents configuration for a generic web service.
   // Dialogflow supports two mechanisms for authentications:
+  //
   // - Basic authentication with username and password.
   // - Authentication with additional authentication headers.
+  //
   // More information could be found at:
   // https://cloud.google.com/dialogflow/docs/fulfillment-configure.
   message GenericWebService {
@@ -127,6 +129,7 @@ message Fulfillment {
 
   // Required. The unique identifier of the fulfillment.
   // Supported formats:
+  //
   // - `projects/<Project ID>/agent/fulfillment`
   // - `projects/<Project ID>/locations/<Location ID>/agent/fulfillment`
   //
@@ -155,6 +158,7 @@ message Fulfillment {
 message GetFulfillmentRequest {
   // Required. The name of the fulfillment.
   // Supported formats:
+  //
   // - `projects/<Project ID>/agent/fulfillment`
   // - `projects/<Project ID>/locations/<Location ID>/agent/fulfillment`
   string name = 1 [
diff --git a/google/cloud/dialogflow/v2beta1/participant.proto b/google/cloud/dialogflow/v2beta1/participant.proto
@@ -109,6 +109,24 @@ service Participants {
     option (google.api.method_signature) = "participant,event_input";
   }
 
+  // Adds a text (e.g., chat) or audio (e.g., phone recording) message from a
+  // participant into the conversation.
+  // Note: This method is only available through the gRPC API (not REST).
+  //
+  // The top-level message sent to the client by the server is
+  // `StreamingAnalyzeContentResponse`. Multiple response messages can be
+  // returned in order. The first one or more messages contain the
+  // `recognition_result` field. Each result represents a more complete
+  // transcript of what the user said. The next message contains the
+  // `reply_text` field, and potentially the `reply_audio` and/or the
+  // `automated_agent_reply` fields.
+  //
+  // Note: Always use agent versions for production traffic
+  // sent to virtual agents. See [Versions and
+  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
+  rpc StreamingAnalyzeContent(stream StreamingAnalyzeContentRequest) returns (stream StreamingAnalyzeContentResponse) {
+  }
+
   // Gets suggested articles for a participant based on specific historical
   // messages.
   //
@@ -543,6 +561,14 @@ message AnalyzeContentRequest {
   // Parameters for a human assist query.
   AssistQueryParameters assist_query_params = 14;
 
+  // Additional parameters to be put into Dialogflow CX session parameters. To
+  // remove a parameter from the session, clients should explicitly set the
+  // parameter value to null.
+  //
+  // Note: this field should only be used if you are connecting to a Dialogflow
+  // CX agent.
+  google.protobuf.Struct cx_parameters = 18;
+
   // Optional. The send time of the message from end user or human agent's
   // perspective. It is used for identifying the same message under one
   // participant.
@@ -624,6 +650,182 @@ message AnalyzeContentResponse {
   DtmfParameters dtmf_parameters = 9;
 }
 
+// Defines the language used in the input text.
+message InputTextConfig {
+  // Required. The language of this conversational query. See [Language
+  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
+  // for a list of the currently supported language codes.
+  string language_code = 1;
+}
+
+// The top-level message sent by the client to the
+// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent] method.
+//
+// Multiple request messages should be sent in order:
+//
+// 1.  The first message must contain
+//     [participant][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.participant],
+//     [config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] and optionally
+//     [query_params][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.query_params]. If you want
+//     to receive an audio response, it should also contain
+//     [reply_audio_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.reply_audio_config].
+//     The message must not contain
+//     [input][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input].
+//
+// 2.  If [config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] in the first message
+//     was set to [audio_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.audio_config],
+//     all subsequent messages must contain
+//     [input_audio][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_audio] to continue
+//     with Speech recognition.
+//     If you decide to rather analyze text input after you already started
+//     Speech recognition, please send a message with
+//     [StreamingAnalyzeContentRequest.input_text][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_text].
+//
+//     However, note that:
+//
+//     * Dialogflow will bill you for the audio so far.
+//     * Dialogflow discards all Speech recognition results in favor of the
+//       text input.
+//
+//  3. If [StreamingAnalyzeContentRequest.config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] in the first message was set
+//    to [StreamingAnalyzeContentRequest.text_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.text_config], then the second message
+//    must contain only [input_text][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_text].
+//    Moreover, you must not send more than two messages.
+//
+//  After you sent all input, you must half-close or abort the request stream.
+message StreamingAnalyzeContentRequest {
+  // Required. The name of the participant this text comes from.
+  // Format: `projects/<Project ID>/locations/<Location
+  // ID>/conversations/<Conversation ID>/participants/<Participant ID>`.
+  string participant = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "dialogflow.googleapis.com/Participant"
+    }
+  ];
+
+  // Required. The input config.
+  oneof config {
+    // Instructs the speech recognizer how to process the speech audio.
+    InputAudioConfig audio_config = 2;
+
+    // The natural language text to be processed.
+    InputTextConfig text_config = 3;
+  }
+
+  // Speech synthesis configuration.
+  // The speech synthesis settings for a virtual agent that may be configured
+  // for the associated conversation profile are not used when calling
+  // StreamingAnalyzeContent. If this configuration is not supplied, speech
+  // synthesis is disabled.
+  OutputAudioConfig reply_audio_config = 4;
+
+  // Required. The input.
+  oneof input {
+    // The input audio content to be recognized. Must be sent if `audio_config`
+    // is set in the first message. The complete audio over all streaming
+    // messages must not exceed 1 minute.
+    bytes input_audio = 5;
+
+    // The UTF-8 encoded natural language text to be processed. Must be sent if
+    // `text_config` is set in the first message. Text length must not exceed
+    // 256 bytes. The `input_text` field can be only sent once.
+    string input_text = 6;
+
+    // The DTMF digits used to invoke intent and fill in parameter value.
+    //
+    // This input is ignored if the previous response indicated that DTMF input
+    // is not accepted.
+    TelephonyDtmfEvents input_dtmf = 9;
+  }
+
+  // Parameters for a Dialogflow virtual-agent query.
+  QueryParameters query_params = 7;
+
+  // Parameters for a human assist query.
+  AssistQueryParameters assist_query_params = 8;
+
+  // Additional parameters to be put into Dialogflow CX session parameters. To
+  // remove a parameter from the session, clients should explicitly set the
+  // parameter value to null.
+  //
+  // Note: this field should only be used if you are connecting to a Dialogflow
+  // CX agent.
+  google.protobuf.Struct cx_parameters = 13;
+
+  // Enable partial virtual agent responses. If this flag is not enabled,
+  // response stream still contains only one final response even if some
+  // `Fulfillment`s in Dialogflow virtual agent have been configured to return
+  // partial responses.
+  bool enable_partial_automated_agent_reply = 12;
+}
+
+// The top-level message returned from the `StreamingAnalyzeContent` method.
+//
+// Multiple response messages can be returned in order:
+//
+// 1.  If the input was set to streaming audio, the first one or more messages
+//     contain `recognition_result`. Each `recognition_result` represents a more
+//     complete transcript of what the user said. The last `recognition_result`
+//     has `is_final` set to `true`.
+//
+// 2.  In virtual agent stage: if `enable_partial_automated_agent_reply` is
+//     true, the following N (currently 1 <= N <= 4) messages
+//     contain `automated_agent_reply` and optionally `reply_audio`
+//     returned by the virtual agent. The first (N-1)
+//     `automated_agent_reply`s will have `automated_agent_reply_type` set to
+//     `PARTIAL`. The last `automated_agent_reply` has
+//     `automated_agent_reply_type` set to `FINAL`.
+//     If `enable_partial_automated_agent_reply` is not enabled, response stream
+//     only contains the final reply.
+//
+//     In human assist stage: the following N (N >= 1) messages contain
+//     `human_agent_suggestion_results`, `end_user_suggestion_results` or
+//     `message`.
+message StreamingAnalyzeContentResponse {
+  // The result of speech recognition.
+  StreamingRecognitionResult recognition_result = 1;
+
+  // Optional. The output text content.
+  // This field is set if an automated agent responded with a text for the user.
+  string reply_text = 2;
+
+  // Optional. The audio data bytes encoded as specified in the request.
+  // This field is set if:
+  //
+  //  - The `reply_audio_config` field is specified in the request.
+  //  - The automated agent, which this output comes from, responded with audio.
+  //    In such case, the `reply_audio.config` field contains settings used to
+  //    synthesize the speech.
+  //
+  // In some scenarios, multiple output audio fields may be present in the
+  // response structure. In these cases, only the top-most-level audio output
+  // has content.
+  OutputAudio reply_audio = 3;
+
+  // Optional. Only set if a Dialogflow automated agent has responded.
+  // Note that: [AutomatedAgentReply.detect_intent_response.output_audio][]
+  // and [AutomatedAgentReply.detect_intent_response.output_audio_config][]
+  // are always empty, use [reply_audio][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentResponse.reply_audio] instead.
+  AutomatedAgentReply automated_agent_reply = 4;
+
+  // Output only. Message analyzed by CCAI.
+  Message message = 6;
+
+  // The suggestions for most recent human agent. The order is the same as
+  // [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
+  // [HumanAgentAssistantConfig.human_agent_suggestion_config][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.human_agent_suggestion_config].
+  repeated SuggestionResult human_agent_suggestion_results = 7;
+
+  // The suggestions for end user. The order is the same as
+  // [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
+  // [HumanAgentAssistantConfig.end_user_suggestion_config][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.end_user_suggestion_config].
+  repeated SuggestionResult end_user_suggestion_results = 8;
+
+  // Indicates the parameters of DTMF.
+  DtmfParameters dtmf_parameters = 10;
+}
+
 // Represents a part of a message possibly annotated with an entity. The part
 // can be an entity or purely a part of the message between two entities or
 // message start/end.
@@ -1103,6 +1305,33 @@ message ResponseMessage {
 
   }
 
+  // Represents an audio message that is composed of both segments
+  // synthesized from the Dialogflow agent prompts and ones hosted externally
+  // at the specified URIs.
+  message MixedAudio {
+    // Represents one segment of audio.
+    message Segment {
+      // Content of the segment.
+      oneof content {
+        // Raw audio synthesized from the Dialogflow agent's response using
+        // the output config specified in the request.
+        bytes audio = 1;
+
+        // Client-specific URI that points to an audio clip accessible to the
+        // client.
+        string uri = 2;
+      }
+
+      // Whether the playback of this segment can be interrupted by the end
+      // user's speech and the client should then start the next Dialogflow
+      // request.
+      bool allow_playback_interruption = 3;
+    }
+
+    // Segments this audio response is composed of.
+    repeated Segment segments = 1;
+  }
+
   // Represents the signal that telles the client to transfer the phone call
   // connected to the agent to a third-party endpoint.
   message TelephonyTransferCall {
@@ -1132,6 +1361,10 @@ message ResponseMessage {
     // ended.
     EndInteraction end_interaction = 4;
 
+    // An audio response message composed of both the synthesized Dialogflow
+    // agent responses and the audios hosted in places known to the client.
+    MixedAudio mixed_audio = 5;
+
     // A signal that the client should transfer the phone call connected to
     // this agent to a third-party endpoint.
     TelephonyTransferCall telephony_transfer_call = 6;

Original file line number	Diff line number	Diff line change
`@@ -210,6 +210,7 @@ py_gapic_library(`
`210`	`210`	`name = "dialogflow_py_gapic",`
`211`	`211`	`srcs = [":dialogflow_proto"],`
`212`	`212`	`grpc_service_config = "dialogflow_grpc_service_config.json",`
	`213`	`+ service_yaml = "dialogflow_v2beta1.yaml",`
`213`	`214`	`)`
`214`	`215`
`215`	`216`	`# Open Source Packages`