@@ -109,6 +109,24 @@ service Participants {
109109 option (google.api.method_signature ) = "participant,event_input" ;
110110 }
111111
112+ // Adds a text (e.g., chat) or audio (e.g., phone recording) message from a
113+ // participant into the conversation.
114+ // Note: This method is only available through the gRPC API (not REST).
115+ //
116+ // The top-level message sent to the client by the server is
117+ // `StreamingAnalyzeContentResponse`. Multiple response messages can be
118+ // returned in order. The first one or more messages contain the
119+ // `recognition_result` field. Each result represents a more complete
120+ // transcript of what the user said. The next message contains the
121+ // `reply_text` field, and potentially the `reply_audio` and/or the
122+ // `automated_agent_reply` fields.
123+ //
124+ // Note: Always use agent versions for production traffic
125+ // sent to virtual agents. See [Versions and
126+ // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
127+ rpc StreamingAnalyzeContent (stream StreamingAnalyzeContentRequest ) returns (stream StreamingAnalyzeContentResponse ) {
128+ }
129+
112130 // Gets suggested articles for a participant based on specific historical
113131 // messages.
114132 //
@@ -543,6 +561,14 @@ message AnalyzeContentRequest {
543561 // Parameters for a human assist query.
544562 AssistQueryParameters assist_query_params = 14 ;
545563
564+ // Additional parameters to be put into Dialogflow CX session parameters. To
565+ // remove a parameter from the session, clients should explicitly set the
566+ // parameter value to null.
567+ //
568+ // Note: this field should only be used if you are connecting to a Dialogflow
569+ // CX agent.
570+ google.protobuf.Struct cx_parameters = 18 ;
571+
546572 // Optional. The send time of the message from end user or human agent's
547573 // perspective. It is used for identifying the same message under one
548574 // participant.
@@ -624,6 +650,182 @@ message AnalyzeContentResponse {
624650 DtmfParameters dtmf_parameters = 9 ;
625651}
626652
653+ // Defines the language used in the input text.
654+ message InputTextConfig {
655+ // Required. The language of this conversational query. See [Language
656+ // Support](https://cloud.google.com/dialogflow/docs/reference/language)
657+ // for a list of the currently supported language codes.
658+ string language_code = 1 ;
659+ }
660+
661+ // The top-level message sent by the client to the
662+ // [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent] method.
663+ //
664+ // Multiple request messages should be sent in order:
665+ //
666+ // 1. The first message must contain
667+ // [participant][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.participant],
668+ // [config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] and optionally
669+ // [query_params][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.query_params]. If you want
670+ // to receive an audio response, it should also contain
671+ // [reply_audio_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.reply_audio_config].
672+ // The message must not contain
673+ // [input][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input].
674+ //
675+ // 2. If [config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] in the first message
676+ // was set to [audio_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.audio_config],
677+ // all subsequent messages must contain
678+ // [input_audio][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_audio] to continue
679+ // with Speech recognition.
680+ // If you decide to rather analyze text input after you already started
681+ // Speech recognition, please send a message with
682+ // [StreamingAnalyzeContentRequest.input_text][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_text].
683+ //
684+ // However, note that:
685+ //
686+ // * Dialogflow will bill you for the audio so far.
687+ // * Dialogflow discards all Speech recognition results in favor of the
688+ // text input.
689+ //
690+ // 3. If [StreamingAnalyzeContentRequest.config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] in the first message was set
691+ // to [StreamingAnalyzeContentRequest.text_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.text_config], then the second message
692+ // must contain only [input_text][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_text].
693+ // Moreover, you must not send more than two messages.
694+ //
695+ // After you sent all input, you must half-close or abort the request stream.
696+ message StreamingAnalyzeContentRequest {
697+ // Required. The name of the participant this text comes from.
698+ // Format: `projects/<Project ID>/locations/<Location
699+ // ID>/conversations/<Conversation ID>/participants/<Participant ID>`.
700+ string participant = 1 [
701+ (google.api.field_behavior ) = REQUIRED ,
702+ (google.api.resource_reference ) = {
703+ type : "dialogflow.googleapis.com/Participant"
704+ }
705+ ];
706+
707+ // Required. The input config.
708+ oneof config {
709+ // Instructs the speech recognizer how to process the speech audio.
710+ InputAudioConfig audio_config = 2 ;
711+
712+ // The natural language text to be processed.
713+ InputTextConfig text_config = 3 ;
714+ }
715+
716+ // Speech synthesis configuration.
717+ // The speech synthesis settings for a virtual agent that may be configured
718+ // for the associated conversation profile are not used when calling
719+ // StreamingAnalyzeContent. If this configuration is not supplied, speech
720+ // synthesis is disabled.
721+ OutputAudioConfig reply_audio_config = 4 ;
722+
723+ // Required. The input.
724+ oneof input {
725+ // The input audio content to be recognized. Must be sent if `audio_config`
726+ // is set in the first message. The complete audio over all streaming
727+ // messages must not exceed 1 minute.
728+ bytes input_audio = 5 ;
729+
730+ // The UTF-8 encoded natural language text to be processed. Must be sent if
731+ // `text_config` is set in the first message. Text length must not exceed
732+ // 256 bytes. The `input_text` field can be only sent once.
733+ string input_text = 6 ;
734+
735+ // The DTMF digits used to invoke intent and fill in parameter value.
736+ //
737+ // This input is ignored if the previous response indicated that DTMF input
738+ // is not accepted.
739+ TelephonyDtmfEvents input_dtmf = 9 ;
740+ }
741+
742+ // Parameters for a Dialogflow virtual-agent query.
743+ QueryParameters query_params = 7 ;
744+
745+ // Parameters for a human assist query.
746+ AssistQueryParameters assist_query_params = 8 ;
747+
748+ // Additional parameters to be put into Dialogflow CX session parameters. To
749+ // remove a parameter from the session, clients should explicitly set the
750+ // parameter value to null.
751+ //
752+ // Note: this field should only be used if you are connecting to a Dialogflow
753+ // CX agent.
754+ google.protobuf.Struct cx_parameters = 13 ;
755+
756+ // Enable partial virtual agent responses. If this flag is not enabled,
757+ // response stream still contains only one final response even if some
758+ // `Fulfillment`s in Dialogflow virtual agent have been configured to return
759+ // partial responses.
760+ bool enable_partial_automated_agent_reply = 12 ;
761+ }
762+
763+ // The top-level message returned from the `StreamingAnalyzeContent` method.
764+ //
765+ // Multiple response messages can be returned in order:
766+ //
767+ // 1. If the input was set to streaming audio, the first one or more messages
768+ // contain `recognition_result`. Each `recognition_result` represents a more
769+ // complete transcript of what the user said. The last `recognition_result`
770+ // has `is_final` set to `true`.
771+ //
772+ // 2. In virtual agent stage: if `enable_partial_automated_agent_reply` is
773+ // true, the following N (currently 1 <= N <= 4) messages
774+ // contain `automated_agent_reply` and optionally `reply_audio`
775+ // returned by the virtual agent. The first (N-1)
776+ // `automated_agent_reply`s will have `automated_agent_reply_type` set to
777+ // `PARTIAL`. The last `automated_agent_reply` has
778+ // `automated_agent_reply_type` set to `FINAL`.
779+ // If `enable_partial_automated_agent_reply` is not enabled, response stream
780+ // only contains the final reply.
781+ //
782+ // In human assist stage: the following N (N >= 1) messages contain
783+ // `human_agent_suggestion_results`, `end_user_suggestion_results` or
784+ // `message`.
785+ message StreamingAnalyzeContentResponse {
786+ // The result of speech recognition.
787+ StreamingRecognitionResult recognition_result = 1 ;
788+
789+ // Optional. The output text content.
790+ // This field is set if an automated agent responded with a text for the user.
791+ string reply_text = 2 ;
792+
793+ // Optional. The audio data bytes encoded as specified in the request.
794+ // This field is set if:
795+ //
796+ // - The `reply_audio_config` field is specified in the request.
797+ // - The automated agent, which this output comes from, responded with audio.
798+ // In such case, the `reply_audio.config` field contains settings used to
799+ // synthesize the speech.
800+ //
801+ // In some scenarios, multiple output audio fields may be present in the
802+ // response structure. In these cases, only the top-most-level audio output
803+ // has content.
804+ OutputAudio reply_audio = 3 ;
805+
806+ // Optional. Only set if a Dialogflow automated agent has responded.
807+ // Note that: [AutomatedAgentReply.detect_intent_response.output_audio][]
808+ // and [AutomatedAgentReply.detect_intent_response.output_audio_config][]
809+ // are always empty, use [reply_audio][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentResponse.reply_audio] instead.
810+ AutomatedAgentReply automated_agent_reply = 4 ;
811+
812+ // Output only. Message analyzed by CCAI.
813+ Message message = 6 ;
814+
815+ // The suggestions for most recent human agent. The order is the same as
816+ // [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
817+ // [HumanAgentAssistantConfig.human_agent_suggestion_config][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.human_agent_suggestion_config].
818+ repeated SuggestionResult human_agent_suggestion_results = 7 ;
819+
820+ // The suggestions for end user. The order is the same as
821+ // [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
822+ // [HumanAgentAssistantConfig.end_user_suggestion_config][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.end_user_suggestion_config].
823+ repeated SuggestionResult end_user_suggestion_results = 8 ;
824+
825+ // Indicates the parameters of DTMF.
826+ DtmfParameters dtmf_parameters = 10 ;
827+ }
828+
627829// Represents a part of a message possibly annotated with an entity. The part
628830// can be an entity or purely a part of the message between two entities or
629831// message start/end.
@@ -1103,6 +1305,33 @@ message ResponseMessage {
11031305
11041306 }
11051307
1308+ // Represents an audio message that is composed of both segments
1309+ // synthesized from the Dialogflow agent prompts and ones hosted externally
1310+ // at the specified URIs.
1311+ message MixedAudio {
1312+ // Represents one segment of audio.
1313+ message Segment {
1314+ // Content of the segment.
1315+ oneof content {
1316+ // Raw audio synthesized from the Dialogflow agent's response using
1317+ // the output config specified in the request.
1318+ bytes audio = 1 ;
1319+
1320+ // Client-specific URI that points to an audio clip accessible to the
1321+ // client.
1322+ string uri = 2 ;
1323+ }
1324+
1325+ // Whether the playback of this segment can be interrupted by the end
1326+ // user's speech and the client should then start the next Dialogflow
1327+ // request.
1328+ bool allow_playback_interruption = 3 ;
1329+ }
1330+
1331+ // Segments this audio response is composed of.
1332+ repeated Segment segments = 1 ;
1333+ }
1334+
11061335 // Represents the signal that telles the client to transfer the phone call
11071336 // connected to the agent to a third-party endpoint.
11081337 message TelephonyTransferCall {
@@ -1132,6 +1361,10 @@ message ResponseMessage {
11321361 // ended.
11331362 EndInteraction end_interaction = 4 ;
11341363
1364+ // An audio response message composed of both the synthesized Dialogflow
1365+ // agent responses and the audios hosted in places known to the client.
1366+ MixedAudio mixed_audio = 5 ;
1367+
11351368 // A signal that the client should transfer the phone call connected to
11361369 // this agent to a third-party endpoint.
11371370 TelephonyTransferCall telephony_transfer_call = 6 ;
0 commit comments