Skip to content

Commit 31c70c9

Browse files
Google APIscopybara-github
authored andcommitted
feat: add grpc_ports to UploadModel ModelContainerSpec
feat: add DirectPredict to PredictionService feat: add DirectRawPredict to PredictionService feat: add StreamingPredict to PredictionService. feat: add StreamingRawPredict to PredictionService. PiperOrigin-RevId: 583502805
1 parent 2f91fd5 commit 31c70c9

2 files changed

Lines changed: 145 additions & 0 deletions

File tree

google/cloud/aiplatform/v1/model.proto

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,17 @@ message ModelContainerSpec {
692692
// variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).)
693693
string health_route = 7 [(google.api.field_behavior) = IMMUTABLE];
694694

695+
// Immutable. List of ports to expose from the container. Vertex AI sends gRPC
696+
// prediction requests that it receives to the first port on this list. Vertex
697+
// AI also sends liveness and health checks to this port.
698+
//
699+
// If you do not specify this field, gRPC requests to the container will be
700+
// disabled.
701+
//
702+
// Vertex AI does not use ports other than the first one listed. This field
703+
// corresponds to the `ports` field of the Kubernetes Containers v1 core API.
704+
repeated Port grpc_ports = 9 [(google.api.field_behavior) = IMMUTABLE];
705+
695706
// Immutable. Deployment timeout.
696707
// Limit for deployment timeout is 2 hours.
697708
google.protobuf.Duration deployment_timeout = 10

google/cloud/aiplatform/v1/prediction_service.proto

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,29 @@ service PredictionService {
7575
option (google.api.method_signature) = "endpoint,http_body";
7676
}
7777

78+
// Perform an unary online prediction request for Vertex first-party products
79+
// and frameworks.
80+
rpc DirectPredict(DirectPredictRequest) returns (DirectPredictResponse) {
81+
option (google.api.http) = {
82+
post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict"
83+
body: "*"
84+
};
85+
}
86+
87+
// Perform an online prediction request through gRPC.
88+
rpc DirectRawPredict(DirectRawPredictRequest)
89+
returns (DirectRawPredictResponse) {
90+
option (google.api.http) = {
91+
post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict"
92+
body: "*"
93+
};
94+
}
95+
96+
// Perform a streaming online prediction request for Vertex first-party
97+
// products and frameworks.
98+
rpc StreamingPredict(stream StreamingPredictRequest)
99+
returns (stream StreamingPredictResponse) {}
100+
78101
// Perform a server-side streaming online prediction request for Vertex
79102
// LLM streaming.
80103
rpc ServerStreamingPredict(StreamingPredictRequest)
@@ -89,6 +112,10 @@ service PredictionService {
89112
};
90113
}
91114

115+
// Perform a streaming online prediction request through gRPC.
116+
rpc StreamingRawPredict(stream StreamingRawPredictRequest)
117+
returns (stream StreamingRawPredictResponse) {}
118+
92119
// Perform an online explanation.
93120
//
94121
// If
@@ -211,6 +238,69 @@ message RawPredictRequest {
211238
google.api.HttpBody http_body = 2;
212239
}
213240

241+
// Request message for
242+
// [PredictionService.DirectPredict][google.cloud.aiplatform.v1.PredictionService.DirectPredict].
243+
message DirectPredictRequest {
244+
// Required. The name of the Endpoint requested to serve the prediction.
245+
// Format:
246+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
247+
string endpoint = 1 [
248+
(google.api.field_behavior) = REQUIRED,
249+
(google.api.resource_reference) = {
250+
type: "aiplatform.googleapis.com/Endpoint"
251+
}
252+
];
253+
254+
// The prediction input.
255+
repeated Tensor inputs = 2;
256+
257+
// The parameters that govern the prediction.
258+
Tensor parameters = 3;
259+
}
260+
261+
// Response message for
262+
// [PredictionService.DirectPredict][google.cloud.aiplatform.v1.PredictionService.DirectPredict].
263+
message DirectPredictResponse {
264+
// The prediction output.
265+
repeated Tensor outputs = 1;
266+
267+
// The parameters that govern the prediction.
268+
Tensor parameters = 2;
269+
}
270+
271+
// Request message for
272+
// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1.PredictionService.DirectRawPredict].
273+
message DirectRawPredictRequest {
274+
// Required. The name of the Endpoint requested to serve the prediction.
275+
// Format:
276+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
277+
string endpoint = 1 [
278+
(google.api.field_behavior) = REQUIRED,
279+
(google.api.resource_reference) = {
280+
type: "aiplatform.googleapis.com/Endpoint"
281+
}
282+
];
283+
284+
// Fully qualified name of the API method being invoked to perform
285+
// predictions.
286+
//
287+
// Format:
288+
// `/namespace.Service/Method/`
289+
// Example:
290+
// `/tensorflow.serving.PredictionService/Predict`
291+
string method_name = 2;
292+
293+
// The prediction input.
294+
bytes input = 3;
295+
}
296+
297+
// Response message for
298+
// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1.PredictionService.DirectRawPredict].
299+
message DirectRawPredictResponse {
300+
// The prediction output.
301+
bytes output = 1;
302+
}
303+
214304
// Request message for
215305
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
216306
//
@@ -245,6 +335,50 @@ message StreamingPredictResponse {
245335
Tensor parameters = 2;
246336
}
247337

338+
// Request message for
339+
// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamingRawPredict].
340+
//
341+
// The first message must contain
342+
// [endpoint][google.cloud.aiplatform.v1.StreamingRawPredictRequest.endpoint]
343+
// and
344+
// [method_name][google.cloud.aiplatform.v1.StreamingRawPredictRequest.method_name]
345+
// fields and optionally
346+
// [input][google.cloud.aiplatform.v1.StreamingRawPredictRequest.input]. The
347+
// subsequent messages must contain
348+
// [input][google.cloud.aiplatform.v1.StreamingRawPredictRequest.input].
349+
// [method_name][google.cloud.aiplatform.v1.StreamingRawPredictRequest.method_name]
350+
// in the subsequent messages have no effect.
351+
message StreamingRawPredictRequest {
352+
// Required. The name of the Endpoint requested to serve the prediction.
353+
// Format:
354+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
355+
string endpoint = 1 [
356+
(google.api.field_behavior) = REQUIRED,
357+
(google.api.resource_reference) = {
358+
type: "aiplatform.googleapis.com/Endpoint"
359+
}
360+
];
361+
362+
// Fully qualified name of the API method being invoked to perform
363+
// predictions.
364+
//
365+
// Format:
366+
// `/namespace.Service/Method/`
367+
// Example:
368+
// `/tensorflow.serving.PredictionService/Predict`
369+
string method_name = 2;
370+
371+
// The prediction input.
372+
bytes input = 3;
373+
}
374+
375+
// Response message for
376+
// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamingRawPredict].
377+
message StreamingRawPredictResponse {
378+
// The prediction output.
379+
bytes output = 1;
380+
}
381+
248382
// Request message for
249383
// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain].
250384
message ExplainRequest {

0 commit comments

Comments
 (0)