Skip to content

Commit b9927eb

Browse files
Google APIscopybara-github
authored andcommitted
feat: add the API of StreamingAnalyzeContent
PiperOrigin-RevId: 446850583
1 parent 34e5f85 commit b9927eb

5 files changed

Lines changed: 246 additions & 0 deletions

File tree

google/cloud/dialogflow/v2beta1/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ py_gapic_library(
210210
name = "dialogflow_py_gapic",
211211
srcs = [":dialogflow_proto"],
212212
grpc_service_config = "dialogflow_grpc_service_config.json",
213+
service_yaml = "dialogflow_v2beta1.yaml",
213214
)
214215

215216
# Open Source Packages

google/cloud/dialogflow/v2beta1/audio_config.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ option java_multiple_files = true;
2727
option java_outer_classname = "AudioConfigProto";
2828
option java_package = "com.google.cloud.dialogflow.v2beta1";
2929
option objc_class_prefix = "DF";
30+
option (google.api.resource_definition) = {
31+
type: "automl.googleapis.com/Model"
32+
pattern: "projects/{project}/locations/{location}/models/{model}"
33+
};
3034

3135
// Audio encoding of the audio content sent in the conversational query request.
3236
// Refer to the

google/cloud/dialogflow/v2beta1/dialogflow_v2beta1.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ backend:
9090
deadline: 60.0
9191
- selector: google.cloud.dialogflow.v2beta1.Participants.AnalyzeContent
9292
deadline: 220.0
93+
- selector: google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent
94+
deadline: 220.0
9395
- selector: 'google.cloud.dialogflow.v2beta1.SessionEntityTypes.*'
9496
deadline: 60.0
9597
- selector: google.cloud.dialogflow.v2beta1.Sessions.DetectIntent
@@ -104,6 +106,8 @@ backend:
104106
deadline: 60.0
105107
- selector: 'google.longrunning.Operations.*'
106108
deadline: 60.0
109+
- selector: google.longrunning.Operations.ListOperations
110+
deadline: 180.0
107111

108112
http:
109113
rules:

google/cloud/dialogflow/v2beta1/fulfillment.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,10 @@ message Fulfillment {
8181

8282
// Represents configuration for a generic web service.
8383
// Dialogflow supports two mechanisms for authentications:
84+
//
8485
// - Basic authentication with username and password.
8586
// - Authentication with additional authentication headers.
87+
//
8688
// More information could be found at:
8789
// https://cloud.google.com/dialogflow/docs/fulfillment-configure.
8890
message GenericWebService {
@@ -127,6 +129,7 @@ message Fulfillment {
127129

128130
// Required. The unique identifier of the fulfillment.
129131
// Supported formats:
132+
//
130133
// - `projects/<Project ID>/agent/fulfillment`
131134
// - `projects/<Project ID>/locations/<Location ID>/agent/fulfillment`
132135
//
@@ -155,6 +158,7 @@ message Fulfillment {
155158
message GetFulfillmentRequest {
156159
// Required. The name of the fulfillment.
157160
// Supported formats:
161+
//
158162
// - `projects/<Project ID>/agent/fulfillment`
159163
// - `projects/<Project ID>/locations/<Location ID>/agent/fulfillment`
160164
string name = 1 [

google/cloud/dialogflow/v2beta1/participant.proto

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,24 @@ service Participants {
109109
option (google.api.method_signature) = "participant,event_input";
110110
}
111111

112+
// Adds a text (e.g., chat) or audio (e.g., phone recording) message from a
113+
// participant into the conversation.
114+
// Note: This method is only available through the gRPC API (not REST).
115+
//
116+
// The top-level message sent to the client by the server is
117+
// `StreamingAnalyzeContentResponse`. Multiple response messages can be
118+
// returned in order. The first one or more messages contain the
119+
// `recognition_result` field. Each result represents a more complete
120+
// transcript of what the user said. The next message contains the
121+
// `reply_text` field, and potentially the `reply_audio` and/or the
122+
// `automated_agent_reply` fields.
123+
//
124+
// Note: Always use agent versions for production traffic
125+
// sent to virtual agents. See [Versions and
126+
// environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
127+
rpc StreamingAnalyzeContent(stream StreamingAnalyzeContentRequest) returns (stream StreamingAnalyzeContentResponse) {
128+
}
129+
112130
// Gets suggested articles for a participant based on specific historical
113131
// messages.
114132
//
@@ -543,6 +561,14 @@ message AnalyzeContentRequest {
543561
// Parameters for a human assist query.
544562
AssistQueryParameters assist_query_params = 14;
545563

564+
// Additional parameters to be put into Dialogflow CX session parameters. To
565+
// remove a parameter from the session, clients should explicitly set the
566+
// parameter value to null.
567+
//
568+
// Note: this field should only be used if you are connecting to a Dialogflow
569+
// CX agent.
570+
google.protobuf.Struct cx_parameters = 18;
571+
546572
// Optional. The send time of the message from end user or human agent's
547573
// perspective. It is used for identifying the same message under one
548574
// participant.
@@ -624,6 +650,182 @@ message AnalyzeContentResponse {
624650
DtmfParameters dtmf_parameters = 9;
625651
}
626652

653+
// Defines the language used in the input text.
654+
message InputTextConfig {
655+
// Required. The language of this conversational query. See [Language
656+
// Support](https://cloud.google.com/dialogflow/docs/reference/language)
657+
// for a list of the currently supported language codes.
658+
string language_code = 1;
659+
}
660+
661+
// The top-level message sent by the client to the
662+
// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent] method.
663+
//
664+
// Multiple request messages should be sent in order:
665+
//
666+
// 1. The first message must contain
667+
// [participant][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.participant],
668+
// [config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] and optionally
669+
// [query_params][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.query_params]. If you want
670+
// to receive an audio response, it should also contain
671+
// [reply_audio_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.reply_audio_config].
672+
// The message must not contain
673+
// [input][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input].
674+
//
675+
// 2. If [config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] in the first message
676+
// was set to [audio_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.audio_config],
677+
// all subsequent messages must contain
678+
// [input_audio][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_audio] to continue
679+
// with Speech recognition.
680+
// If you decide to rather analyze text input after you already started
681+
// Speech recognition, please send a message with
682+
// [StreamingAnalyzeContentRequest.input_text][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_text].
683+
//
684+
// However, note that:
685+
//
686+
// * Dialogflow will bill you for the audio so far.
687+
// * Dialogflow discards all Speech recognition results in favor of the
688+
// text input.
689+
//
690+
// 3. If [StreamingAnalyzeContentRequest.config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.config] in the first message was set
691+
// to [StreamingAnalyzeContentRequest.text_config][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.text_config], then the second message
692+
// must contain only [input_text][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentRequest.input_text].
693+
// Moreover, you must not send more than two messages.
694+
//
695+
// After you sent all input, you must half-close or abort the request stream.
696+
message StreamingAnalyzeContentRequest {
697+
// Required. The name of the participant this text comes from.
698+
// Format: `projects/<Project ID>/locations/<Location
699+
// ID>/conversations/<Conversation ID>/participants/<Participant ID>`.
700+
string participant = 1 [
701+
(google.api.field_behavior) = REQUIRED,
702+
(google.api.resource_reference) = {
703+
type: "dialogflow.googleapis.com/Participant"
704+
}
705+
];
706+
707+
// Required. The input config.
708+
oneof config {
709+
// Instructs the speech recognizer how to process the speech audio.
710+
InputAudioConfig audio_config = 2;
711+
712+
// The natural language text to be processed.
713+
InputTextConfig text_config = 3;
714+
}
715+
716+
// Speech synthesis configuration.
717+
// The speech synthesis settings for a virtual agent that may be configured
718+
// for the associated conversation profile are not used when calling
719+
// StreamingAnalyzeContent. If this configuration is not supplied, speech
720+
// synthesis is disabled.
721+
OutputAudioConfig reply_audio_config = 4;
722+
723+
// Required. The input.
724+
oneof input {
725+
// The input audio content to be recognized. Must be sent if `audio_config`
726+
// is set in the first message. The complete audio over all streaming
727+
// messages must not exceed 1 minute.
728+
bytes input_audio = 5;
729+
730+
// The UTF-8 encoded natural language text to be processed. Must be sent if
731+
// `text_config` is set in the first message. Text length must not exceed
732+
// 256 bytes. The `input_text` field can be only sent once.
733+
string input_text = 6;
734+
735+
// The DTMF digits used to invoke intent and fill in parameter value.
736+
//
737+
// This input is ignored if the previous response indicated that DTMF input
738+
// is not accepted.
739+
TelephonyDtmfEvents input_dtmf = 9;
740+
}
741+
742+
// Parameters for a Dialogflow virtual-agent query.
743+
QueryParameters query_params = 7;
744+
745+
// Parameters for a human assist query.
746+
AssistQueryParameters assist_query_params = 8;
747+
748+
// Additional parameters to be put into Dialogflow CX session parameters. To
749+
// remove a parameter from the session, clients should explicitly set the
750+
// parameter value to null.
751+
//
752+
// Note: this field should only be used if you are connecting to a Dialogflow
753+
// CX agent.
754+
google.protobuf.Struct cx_parameters = 13;
755+
756+
// Enable partial virtual agent responses. If this flag is not enabled,
757+
// response stream still contains only one final response even if some
758+
// `Fulfillment`s in Dialogflow virtual agent have been configured to return
759+
// partial responses.
760+
bool enable_partial_automated_agent_reply = 12;
761+
}
762+
763+
// The top-level message returned from the `StreamingAnalyzeContent` method.
764+
//
765+
// Multiple response messages can be returned in order:
766+
//
767+
// 1. If the input was set to streaming audio, the first one or more messages
768+
// contain `recognition_result`. Each `recognition_result` represents a more
769+
// complete transcript of what the user said. The last `recognition_result`
770+
// has `is_final` set to `true`.
771+
//
772+
// 2. In virtual agent stage: if `enable_partial_automated_agent_reply` is
773+
// true, the following N (currently 1 <= N <= 4) messages
774+
// contain `automated_agent_reply` and optionally `reply_audio`
775+
// returned by the virtual agent. The first (N-1)
776+
// `automated_agent_reply`s will have `automated_agent_reply_type` set to
777+
// `PARTIAL`. The last `automated_agent_reply` has
778+
// `automated_agent_reply_type` set to `FINAL`.
779+
// If `enable_partial_automated_agent_reply` is not enabled, response stream
780+
// only contains the final reply.
781+
//
782+
// In human assist stage: the following N (N >= 1) messages contain
783+
// `human_agent_suggestion_results`, `end_user_suggestion_results` or
784+
// `message`.
785+
message StreamingAnalyzeContentResponse {
786+
// The result of speech recognition.
787+
StreamingRecognitionResult recognition_result = 1;
788+
789+
// Optional. The output text content.
790+
// This field is set if an automated agent responded with a text for the user.
791+
string reply_text = 2;
792+
793+
// Optional. The audio data bytes encoded as specified in the request.
794+
// This field is set if:
795+
//
796+
// - The `reply_audio_config` field is specified in the request.
797+
// - The automated agent, which this output comes from, responded with audio.
798+
// In such case, the `reply_audio.config` field contains settings used to
799+
// synthesize the speech.
800+
//
801+
// In some scenarios, multiple output audio fields may be present in the
802+
// response structure. In these cases, only the top-most-level audio output
803+
// has content.
804+
OutputAudio reply_audio = 3;
805+
806+
// Optional. Only set if a Dialogflow automated agent has responded.
807+
// Note that: [AutomatedAgentReply.detect_intent_response.output_audio][]
808+
// and [AutomatedAgentReply.detect_intent_response.output_audio_config][]
809+
// are always empty, use [reply_audio][google.cloud.dialogflow.v2beta1.StreamingAnalyzeContentResponse.reply_audio] instead.
810+
AutomatedAgentReply automated_agent_reply = 4;
811+
812+
// Output only. Message analyzed by CCAI.
813+
Message message = 6;
814+
815+
// The suggestions for most recent human agent. The order is the same as
816+
// [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
817+
// [HumanAgentAssistantConfig.human_agent_suggestion_config][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.human_agent_suggestion_config].
818+
repeated SuggestionResult human_agent_suggestion_results = 7;
819+
820+
// The suggestions for end user. The order is the same as
821+
// [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
822+
// [HumanAgentAssistantConfig.end_user_suggestion_config][google.cloud.dialogflow.v2beta1.HumanAgentAssistantConfig.end_user_suggestion_config].
823+
repeated SuggestionResult end_user_suggestion_results = 8;
824+
825+
// Indicates the parameters of DTMF.
826+
DtmfParameters dtmf_parameters = 10;
827+
}
828+
627829
// Represents a part of a message possibly annotated with an entity. The part
628830
// can be an entity or purely a part of the message between two entities or
629831
// message start/end.
@@ -1103,6 +1305,33 @@ message ResponseMessage {
11031305

11041306
}
11051307

1308+
// Represents an audio message that is composed of both segments
1309+
// synthesized from the Dialogflow agent prompts and ones hosted externally
1310+
// at the specified URIs.
1311+
message MixedAudio {
1312+
// Represents one segment of audio.
1313+
message Segment {
1314+
// Content of the segment.
1315+
oneof content {
1316+
// Raw audio synthesized from the Dialogflow agent's response using
1317+
// the output config specified in the request.
1318+
bytes audio = 1;
1319+
1320+
// Client-specific URI that points to an audio clip accessible to the
1321+
// client.
1322+
string uri = 2;
1323+
}
1324+
1325+
// Whether the playback of this segment can be interrupted by the end
1326+
// user's speech and the client should then start the next Dialogflow
1327+
// request.
1328+
bool allow_playback_interruption = 3;
1329+
}
1330+
1331+
// Segments this audio response is composed of.
1332+
repeated Segment segments = 1;
1333+
}
1334+
11061335
// Represents the signal that telles the client to transfer the phone call
11071336
// connected to the agent to a third-party endpoint.
11081337
message TelephonyTransferCall {
@@ -1132,6 +1361,10 @@ message ResponseMessage {
11321361
// ended.
11331362
EndInteraction end_interaction = 4;
11341363

1364+
// An audio response message composed of both the synthesized Dialogflow
1365+
// agent responses and the audios hosted in places known to the client.
1366+
MixedAudio mixed_audio = 5;
1367+
11351368
// A signal that the client should transfer the phone call connected to
11361369
// this agent to a third-party endpoint.
11371370
TelephonyTransferCall telephony_transfer_call = 6;

0 commit comments

Comments
 (0)