@@ -75,6 +75,29 @@ service PredictionService {
7575 option (google.api.method_signature ) = "endpoint,http_body" ;
7676 }
7777
78+ // Perform an unary online prediction request for Vertex first-party products
79+ // and frameworks.
80+ rpc DirectPredict (DirectPredictRequest ) returns (DirectPredictResponse ) {
81+ option (google.api.http ) = {
82+ post : "/v1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict"
83+ body : "*"
84+ };
85+ }
86+
87+ // Perform an online prediction request through gRPC.
88+ rpc DirectRawPredict (DirectRawPredictRequest )
89+ returns (DirectRawPredictResponse ) {
90+ option (google.api.http ) = {
91+ post : "/v1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict"
92+ body : "*"
93+ };
94+ }
95+
96+ // Perform a streaming online prediction request for Vertex first-party
97+ // products and frameworks.
98+ rpc StreamingPredict (stream StreamingPredictRequest )
99+ returns (stream StreamingPredictResponse ) {}
100+
78101 // Perform a server-side streaming online prediction request for Vertex
79102 // LLM streaming.
80103 rpc ServerStreamingPredict (StreamingPredictRequest )
@@ -89,6 +112,10 @@ service PredictionService {
89112 };
90113 }
91114
115+ // Perform a streaming online prediction request through gRPC.
116+ rpc StreamingRawPredict (stream StreamingRawPredictRequest )
117+ returns (stream StreamingRawPredictResponse ) {}
118+
92119 // Perform an online explanation.
93120 //
94121 // If
@@ -211,6 +238,69 @@ message RawPredictRequest {
211238 google.api.HttpBody http_body = 2 ;
212239}
213240
241+ // Request message for
242+ // [PredictionService.DirectPredict][google.cloud.aiplatform.v1.PredictionService.DirectPredict].
243+ message DirectPredictRequest {
244+ // Required. The name of the Endpoint requested to serve the prediction.
245+ // Format:
246+ // `projects/{project}/locations/{location}/endpoints/{endpoint}`
247+ string endpoint = 1 [
248+ (google.api.field_behavior ) = REQUIRED ,
249+ (google.api.resource_reference ) = {
250+ type : "aiplatform.googleapis.com/Endpoint"
251+ }
252+ ];
253+
254+ // The prediction input.
255+ repeated Tensor inputs = 2 ;
256+
257+ // The parameters that govern the prediction.
258+ Tensor parameters = 3 ;
259+ }
260+
261+ // Response message for
262+ // [PredictionService.DirectPredict][google.cloud.aiplatform.v1.PredictionService.DirectPredict].
263+ message DirectPredictResponse {
264+ // The prediction output.
265+ repeated Tensor outputs = 1 ;
266+
267+ // The parameters that govern the prediction.
268+ Tensor parameters = 2 ;
269+ }
270+
271+ // Request message for
272+ // [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1.PredictionService.DirectRawPredict].
273+ message DirectRawPredictRequest {
274+ // Required. The name of the Endpoint requested to serve the prediction.
275+ // Format:
276+ // `projects/{project}/locations/{location}/endpoints/{endpoint}`
277+ string endpoint = 1 [
278+ (google.api.field_behavior ) = REQUIRED ,
279+ (google.api.resource_reference ) = {
280+ type : "aiplatform.googleapis.com/Endpoint"
281+ }
282+ ];
283+
284+ // Fully qualified name of the API method being invoked to perform
285+ // predictions.
286+ //
287+ // Format:
288+ // `/namespace.Service/Method/`
289+ // Example:
290+ // `/tensorflow.serving.PredictionService/Predict`
291+ string method_name = 2 ;
292+
293+ // The prediction input.
294+ bytes input = 3 ;
295+ }
296+
297+ // Response message for
298+ // [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1.PredictionService.DirectRawPredict].
299+ message DirectRawPredictResponse {
300+ // The prediction output.
301+ bytes output = 1 ;
302+ }
303+
214304// Request message for
215305// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
216306//
@@ -245,6 +335,50 @@ message StreamingPredictResponse {
245335 Tensor parameters = 2 ;
246336}
247337
338+ // Request message for
339+ // [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamingRawPredict].
340+ //
341+ // The first message must contain
342+ // [endpoint][google.cloud.aiplatform.v1.StreamingRawPredictRequest.endpoint]
343+ // and
344+ // [method_name][google.cloud.aiplatform.v1.StreamingRawPredictRequest.method_name]
345+ // fields and optionally
346+ // [input][google.cloud.aiplatform.v1.StreamingRawPredictRequest.input]. The
347+ // subsequent messages must contain
348+ // [input][google.cloud.aiplatform.v1.StreamingRawPredictRequest.input].
349+ // [method_name][google.cloud.aiplatform.v1.StreamingRawPredictRequest.method_name]
350+ // in the subsequent messages have no effect.
351+ message StreamingRawPredictRequest {
352+ // Required. The name of the Endpoint requested to serve the prediction.
353+ // Format:
354+ // `projects/{project}/locations/{location}/endpoints/{endpoint}`
355+ string endpoint = 1 [
356+ (google.api.field_behavior ) = REQUIRED ,
357+ (google.api.resource_reference ) = {
358+ type : "aiplatform.googleapis.com/Endpoint"
359+ }
360+ ];
361+
362+ // Fully qualified name of the API method being invoked to perform
363+ // predictions.
364+ //
365+ // Format:
366+ // `/namespace.Service/Method/`
367+ // Example:
368+ // `/tensorflow.serving.PredictionService/Predict`
369+ string method_name = 2 ;
370+
371+ // The prediction input.
372+ bytes input = 3 ;
373+ }
374+
375+ // Response message for
376+ // [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamingRawPredict].
377+ message StreamingRawPredictResponse {
378+ // The prediction output.
379+ bytes output = 1 ;
380+ }
381+
248382// Request message for
249383// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain].
250384message ExplainRequest {
0 commit comments