Skip to content

Commit c53bf8d

Browse files
Google APIscopybara-github
authored andcommitted
feat: Added Training and Evaluation functions, request, responses and metadata to document_processor_service.proto
feat: Added evaluation.proto feat: Added latest_evaluation to processor.proto chore: removed deprecated flag from REPLACE in OperationType in document.proto PiperOrigin-RevId: 511230520
1 parent 84bbbc5 commit c53bf8d

7 files changed

Lines changed: 426 additions & 17 deletions

File tree

google/cloud/documentai/v1/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ proto_library(
2626
"document_io.proto",
2727
"document_processor_service.proto",
2828
"document_schema.proto",
29+
"evaluation.proto",
2930
"geometry.proto",
3031
"operation_metadata.proto",
3132
"processor.proto",

google/cloud/documentai/v1/document.proto

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -735,22 +735,29 @@ message Document {
735735
// Remove an element identified by `parent`.
736736
REMOVE = 2;
737737

738-
// Replace an element identified by `parent`.
738+
// Updates any fields within the given provenance scope of the message. It
739+
// 'overwrites' the fields rather than replacing them. This is
740+
// especially relevant when we just want to update a field value of an
741+
// entity without also affecting all the child properties.
742+
UPDATE = 7;
743+
744+
// Currently unused. Replace an element identified by `parent`.
739745
REPLACE = 3;
740746

741-
// Request human review for the element identified by `parent`.
742-
EVAL_REQUESTED = 4;
747+
// Deprecated. Request human review for the element identified by
748+
// `parent`.
749+
EVAL_REQUESTED = 4 [deprecated = true];
743750

744-
// Element is reviewed and approved at human review, confidence will be
745-
// set to 1.0.
746-
EVAL_APPROVED = 5;
751+
// Deprecated. Element is reviewed and approved at human review,
752+
// confidence will be set to 1.0.
753+
EVAL_APPROVED = 5 [deprecated = true];
747754

748-
// Element is skipped in the validation process.
749-
EVAL_SKIPPED = 6;
755+
// Deprecated. Element is skipped in the validation process.
756+
EVAL_SKIPPED = 6 [deprecated = true];
750757
}
751758

752759
// The index of the revision that produced this element.
753-
int32 revision = 1;
760+
int32 revision = 1 [deprecated = true];
754761

755762
// The Id of this operation. Needs to be unique within the scope of the
756763
// revision.
@@ -786,7 +793,8 @@ message Document {
786793
string processor = 5;
787794
}
788795

789-
// Id of the revision. Unique within the context of the document.
796+
// Id of the revision, internally generated by doc proto storage.
797+
// Unique within the context of the document.
790798
string id = 1;
791799

792800
// The revisions that this revision is based on. This can include one or
@@ -799,7 +807,8 @@ message Document {
799807
// `provenance.parent.revision` fields that index into this field.
800808
repeated string parent_ids = 7;
801809

802-
// The time that the revision was created.
810+
// The time that the revision was created, internally generated by
811+
// doc proto storage at the time of create.
803812
google.protobuf.Timestamp create_time = 3;
804813

805814
// Human Review information of this revision.

google/cloud/documentai/v1/document_processor_service.proto

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@ import "google/api/resource.proto";
2323
import "google/cloud/documentai/v1/document.proto";
2424
import "google/cloud/documentai/v1/document_io.proto";
2525
import "google/cloud/documentai/v1/document_schema.proto";
26+
import "google/cloud/documentai/v1/evaluation.proto";
2627
import "google/cloud/documentai/v1/operation_metadata.proto";
2728
import "google/cloud/documentai/v1/processor.proto";
2829
import "google/cloud/documentai/v1/processor_type.proto";
2930
import "google/longrunning/operations.proto";
31+
import "google/protobuf/empty.proto";
3032
import "google/protobuf/field_mask.proto";
3133
import "google/protobuf/timestamp.proto";
3234
import "google/rpc/status.proto";
@@ -131,6 +133,22 @@ service DocumentProcessorService {
131133
option (google.api.method_signature) = "name";
132134
}
133135

136+
// Trains a new processor version.
137+
// Operation metadata is returned as
138+
// cloud_documentai_core.TrainProcessorVersionMetadata.
139+
rpc TrainProcessorVersion(TrainProcessorVersionRequest)
140+
returns (google.longrunning.Operation) {
141+
option (google.api.http) = {
142+
post: "/v1/{parent=projects/*/locations/*/processors/*}/processorVersions:train"
143+
body: "*"
144+
};
145+
option (google.api.method_signature) = "parent,processor_version";
146+
option (google.longrunning.operation_info) = {
147+
response_type: "TrainProcessorVersionResponse"
148+
metadata_type: "TrainProcessorVersionMetadata"
149+
};
150+
}
151+
134152
// Gets a processor version detail.
135153
rpc GetProcessorVersion(GetProcessorVersionRequest)
136154
returns (ProcessorVersion) {
@@ -272,6 +290,38 @@ service DocumentProcessorService {
272290
metadata_type: "ReviewDocumentOperationMetadata"
273291
};
274292
}
293+
294+
// Evaluates a ProcessorVersion against annotated documents, producing an
295+
// Evaluation.
296+
rpc EvaluateProcessorVersion(EvaluateProcessorVersionRequest)
297+
returns (google.longrunning.Operation) {
298+
option (google.api.http) = {
299+
post: "/v1/{processor_version=projects/*/locations/*/processors/*/processorVersions/*}:evaluateProcessorVersion"
300+
body: "*"
301+
};
302+
option (google.api.method_signature) = "processor_version";
303+
option (google.longrunning.operation_info) = {
304+
response_type: "EvaluateProcessorVersionResponse"
305+
metadata_type: "EvaluateProcessorVersionMetadata"
306+
};
307+
}
308+
309+
// Retrieves a specific evaluation.
310+
rpc GetEvaluation(GetEvaluationRequest) returns (Evaluation) {
311+
option (google.api.http) = {
312+
get: "/v1/{name=projects/*/locations/*/processors/*/processorVersions/*/evaluations/*}"
313+
};
314+
option (google.api.method_signature) = "name";
315+
}
316+
317+
// Retrieves a set of evaluations for a given processor version.
318+
rpc ListEvaluations(ListEvaluationsRequest)
319+
returns (ListEvaluationsResponse) {
320+
option (google.api.http) = {
321+
get: "/v1/{parent=projects/*/locations/*/processors/*/processorVersions/*}/evaluations"
322+
};
323+
option (google.api.method_signature) = "parent";
324+
}
275325
}
276326

277327
// Request message for the process document method.
@@ -758,6 +808,81 @@ message SetDefaultProcessorVersionMetadata {
758808
CommonOperationMetadata common_metadata = 1;
759809
}
760810

811+
// Request message for the create processor version method.
812+
message TrainProcessorVersionRequest {
813+
// The input data used to train a new `ProcessorVersion`.
814+
message InputData {
815+
// The documents used for training the new version.
816+
BatchDocumentsInputConfig training_documents = 3;
817+
818+
// The documents used for testing the trained version.
819+
BatchDocumentsInputConfig test_documents = 4;
820+
}
821+
822+
// Required. The parent (project, location and processor) to create the new
823+
// version for. Format:
824+
// `projects/{project}/locations/{location}/processors/{processor}`.
825+
string parent = 1 [
826+
(google.api.field_behavior) = REQUIRED,
827+
(google.api.resource_reference) = {
828+
type: "documentai.googleapis.com/Processor"
829+
}
830+
];
831+
832+
// Required. The processor version to be created.
833+
ProcessorVersion processor_version = 2
834+
[(google.api.field_behavior) = REQUIRED];
835+
836+
// Optional. The schema the processor version will be trained with.
837+
DocumentSchema document_schema = 10 [(google.api.field_behavior) = OPTIONAL];
838+
839+
// Optional. The input data used to train the `ProcessorVersion`.
840+
InputData input_data = 4 [(google.api.field_behavior) = OPTIONAL];
841+
842+
// Optional. The processor version to use as a base for training. This
843+
// processor version must be a child of `parent`. Format:
844+
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`.
845+
string base_processor_version = 8 [(google.api.field_behavior) = OPTIONAL];
846+
}
847+
848+
// The response for the TrainProcessorVersion method.
849+
message TrainProcessorVersionResponse {
850+
// The resource name of the processor version produced by training.
851+
string processor_version = 1;
852+
}
853+
854+
// The metadata that represents a processor version being created.
855+
message TrainProcessorVersionMetadata {
856+
// The dataset validation information.
857+
// This includes any and all errors with documents and the dataset.
858+
message DatasetValidation {
859+
// The total number of document errors.
860+
int32 document_error_count = 3;
861+
862+
// The total number of dataset errors.
863+
int32 dataset_error_count = 4;
864+
865+
// Error information pertaining to specific documents. A maximum of 10
866+
// document errors will be returned.
867+
// Any document with errors will not be used throughout training.
868+
repeated google.rpc.Status document_errors = 1;
869+
870+
// Error information for the dataset as a whole. A maximum of 10 dataset
871+
// errors will be returned.
872+
// A single dataset error is terminal for training.
873+
repeated google.rpc.Status dataset_errors = 2;
874+
}
875+
876+
// The basic metadata of the long running operation.
877+
CommonOperationMetadata common_metadata = 1;
878+
879+
// The training dataset validation information.
880+
DatasetValidation training_dataset_validation = 2;
881+
882+
// The test dataset validation information.
883+
DatasetValidation test_dataset_validation = 3;
884+
}
885+
761886
// Request message for review document method.
762887
message ReviewDocumentRequest {
763888
// The priority level of the human review task.
@@ -828,3 +953,80 @@ message ReviewDocumentOperationMetadata {
828953
// The Crowd Compute question ID.
829954
string question_id = 6;
830955
}
956+
957+
// Evaluates the given ProcessorVersion against the supplied documents.
958+
message EvaluateProcessorVersionRequest {
959+
// Required. The resource name of the
960+
// [ProcessorVersion][google.cloud.documentai.v1.ProcessorVersion] to
961+
// evaluate.
962+
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
963+
string processor_version = 1 [
964+
(google.api.field_behavior) = REQUIRED,
965+
(google.api.resource_reference) = {
966+
type: "documentai.googleapis.com/ProcessorVersion"
967+
}
968+
];
969+
970+
// Optional. The documents used in the evaluation. If unspecified, use the
971+
// processor's dataset as evaluation input.
972+
BatchDocumentsInputConfig evaluation_documents = 3
973+
[(google.api.field_behavior) = OPTIONAL];
974+
}
975+
976+
// Metadata of the EvaluateProcessorVersion method.
977+
message EvaluateProcessorVersionMetadata {
978+
// The basic metadata of the long running operation.
979+
CommonOperationMetadata common_metadata = 1;
980+
}
981+
982+
// Metadata of the EvaluateProcessorVersion method.
983+
message EvaluateProcessorVersionResponse {
984+
// The resource name of the created evaluation.
985+
string evaluation = 2;
986+
}
987+
988+
// Retrieves a specific Evaluation.
989+
message GetEvaluationRequest {
990+
// Required. The resource name of the
991+
// [Evaluation][google.cloud.documentai.v1.Evaluation] to get.
992+
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}/evaluations/{evaluation}`
993+
string name = 1 [
994+
(google.api.field_behavior) = REQUIRED,
995+
(google.api.resource_reference) = {
996+
type: "documentai.googleapis.com/Evaluation"
997+
}
998+
];
999+
}
1000+
1001+
// Retrieves a list of evaluations for a given ProcessorVersion.
1002+
message ListEvaluationsRequest {
1003+
// Required. The resource name of the
1004+
// [ProcessorVersion][google.cloud.documentai.v1.ProcessorVersion] to list
1005+
// evaluations for.
1006+
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
1007+
string parent = 1 [
1008+
(google.api.field_behavior) = REQUIRED,
1009+
(google.api.resource_reference) = {
1010+
type: "documentai.googleapis.com/ProcessorVersion"
1011+
}
1012+
];
1013+
1014+
// The standard list page size.
1015+
// If unspecified, at most 5 evaluations will be returned.
1016+
// The maximum value is 100; values above 100 will be coerced to 100.
1017+
int32 page_size = 2;
1018+
1019+
// A page token, received from a previous `ListEvaluations` call.
1020+
// Provide this to retrieve the subsequent page.
1021+
string page_token = 3;
1022+
}
1023+
1024+
// The response from ListEvaluations.
1025+
message ListEvaluationsResponse {
1026+
// The evaluations requested.
1027+
repeated Evaluation evaluations = 1;
1028+
1029+
// A token, which can be sent as `page_token` to retrieve the next page.
1030+
// If this field is omitted, there are no subsequent pages.
1031+
string next_page_token = 2;
1032+
}

google/cloud/documentai/v1/document_schema.proto

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,28 @@ message DocumentSchema {
3838

3939
// Defines properties that can be part of the entity type.
4040
message Property {
41-
// Types of occurrences of the entity type in the document. Note: this
42-
// represents the number of instances of an entity types, not number of
43-
// mentions of a given entity instance.
41+
// Types of occurrences of the entity type in the document. This
42+
// represents the number of instances of instances of an entity, not
43+
// number of mentions of an entity. For example, a bank statement may
44+
// only have one `account_number`, but this account number may be
45+
// mentioned in several places on the document. In this case the
46+
// 'account_number' would be considered a `REQUIRED_ONCE` entity type. If,
47+
// on the other hand, we expect a bank statement to contain the status of
48+
// multiple different accounts for the customers, the occurrence type will
49+
// be set to `REQUIRED_MULTIPLE`.
4450
enum OccurrenceType {
4551
// Unspecified occurrence type.
4652
OCCURRENCE_TYPE_UNSPECIFIED = 0;
4753

48-
// There will be zero or one instance of this entity type.
54+
// There will be zero or one instance of this entity type. The same
55+
// entity instance may be mentioned multiple times.
4956
OPTIONAL_ONCE = 1;
5057

5158
// The entity type will appear zero or multiple times.
5259
OPTIONAL_MULTIPLE = 2;
5360

54-
// The entity type will only appear exactly once.
61+
// The entity type will only appear exactly once. The same
62+
// entity instance may be mentioned multiple times.
5563
REQUIRED_ONCE = 3;
5664

5765
// The entity type will appear once or more times.
@@ -103,7 +111,7 @@ message DocumentSchema {
103111
// one should be set.
104112
repeated string base_types = 2;
105113

106-
// Describing the nested structure, or composition of an entity.
114+
// Description the nested structure, or composition of an entity.
107115
repeated Property properties = 6;
108116
}
109117

google/cloud/documentai/v1/documentai_v1.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,14 @@ types:
1919
- name: google.cloud.documentai.v1.DisableProcessorResponse
2020
- name: google.cloud.documentai.v1.EnableProcessorMetadata
2121
- name: google.cloud.documentai.v1.EnableProcessorResponse
22+
- name: google.cloud.documentai.v1.EvaluateProcessorVersionMetadata
23+
- name: google.cloud.documentai.v1.EvaluateProcessorVersionResponse
2224
- name: google.cloud.documentai.v1.ReviewDocumentOperationMetadata
2325
- name: google.cloud.documentai.v1.ReviewDocumentResponse
2426
- name: google.cloud.documentai.v1.SetDefaultProcessorVersionMetadata
2527
- name: google.cloud.documentai.v1.SetDefaultProcessorVersionResponse
28+
- name: google.cloud.documentai.v1.TrainProcessorVersionMetadata
29+
- name: google.cloud.documentai.v1.TrainProcessorVersionResponse
2630
- name: google.cloud.documentai.v1.UndeployProcessorVersionMetadata
2731
- name: google.cloud.documentai.v1.UndeployProcessorVersionResponse
2832

0 commit comments

Comments
 (0)