Skip to content

Commit 280bed6

Browse files
Google APIscopybara-github
authored andcommitted
feat: Added a fields for image and table annotation output
feat: Update comments for proto fields PiperOrigin-RevId: 893028748
1 parent 775a3cb commit 280bed6

File tree

1 file changed

+132
-15
lines changed

1 file changed

+132
-15
lines changed

google/cloud/documentai/v1/document.proto

Lines changed: 132 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025 Google LLC
1+
// Copyright 2026 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@ syntax = "proto3";
1717
package google.cloud.documentai.v1;
1818

1919
import "google/api/field_behavior.proto";
20+
import "google/api/resource.proto";
2021
import "google/cloud/documentai/v1/barcode.proto";
2122
import "google/cloud/documentai/v1/geometry.proto";
2223
import "google/protobuf/timestamp.proto";
@@ -180,9 +181,9 @@ message Document {
180181

181182
// Confidence of the current
182183
// [Layout][google.cloud.documentai.v1.Document.Page.Layout] within
183-
// context of the object this layout is for. e.g. confidence can be for a
184-
// single token, a table, a visual element, etc. depending on context.
185-
// Range `[0, 1]`.
184+
// context of the object this layout is for. For example, confidence can
185+
// be for a single token, a table, a visual element, etc. depending on
186+
// context. Range `[0, 1]`.
186187
float confidence = 2;
187188

188189
// The bounding polygon for the
@@ -340,8 +341,8 @@ message Document {
340341
repeated DetectedLanguage detected_languages = 2;
341342
}
342343

343-
// Detected non-text visual elements e.g. checkbox, signature etc. on the
344-
// page.
344+
// Detected non-text visual elements, for example, checkbox, signature etc.
345+
// on the page.
345346
message VisualElement {
346347
// [Layout][google.cloud.documentai.v1.Document.Page.Layout] for
347348
// [VisualElement][google.cloud.documentai.v1.Document.Page.VisualElement].
@@ -400,7 +401,7 @@ message Document {
400401
message FormField {
401402
// [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the
402403
// [FormField][google.cloud.documentai.v1.Document.Page.FormField] name.
403-
// e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
404+
// For example, `Address`, `Email`, `Grand total`, `Phone number`, etc.
404405
Layout field_name = 1;
405406

406407
// [Layout][google.cloud.documentai.v1.Document.Page.Layout] for the
@@ -526,7 +527,7 @@ message Document {
526527
// A list of visually detected tokens on the page.
527528
repeated Token tokens = 8;
528529

529-
// A list of detected non-text visual elements e.g. checkbox,
530+
// A list of detected non-text visual elements, for example, checkbox,
530531
// signature etc. on the page.
531532
repeated VisualElement visual_elements = 9;
532533

@@ -586,6 +587,8 @@ message Document {
586587
// Float value.
587588
float float_value = 8;
588589

590+
// A signature - a graphical representation of a person's name,
591+
// often used to sign a document.
589592
bool signature_value = 10;
590593
}
591594

@@ -622,10 +625,11 @@ message Document {
622625
// [Document.text][google.cloud.documentai.v1.Document.text].
623626
TextAnchor text_anchor = 1 [(google.api.field_behavior) = OPTIONAL];
624627

625-
// Required. Entity type from a schema e.g. `Address`.
628+
// Required. Entity type from a schema, for example, `Address`.
626629
string type = 2 [(google.api.field_behavior) = REQUIRED];
627630

628-
// Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
631+
// Optional. Text value of the entity, for example, `1600 Amphitheatre
632+
// Pkwy`.
629633
string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];
630634

631635
// Optional. Deprecated. Use `id` field instead.
@@ -643,9 +647,9 @@ message Document {
643647
string id = 7 [(google.api.field_behavior) = OPTIONAL];
644648

645649
// Optional. Normalized entity value. Absent if the extracted value could
646-
// not be converted or the type (e.g. address) is not supported for certain
647-
// parsers. This field is also only populated for certain supported document
648-
// types.
650+
// not be converted or the type (for example, address) is not supported for
651+
// certain parsers. This field is also only populated for certain supported
652+
// document types.
649653
NormalizedValue normalized_value = 9
650654
[(google.api.field_behavior) = OPTIONAL];
651655

@@ -854,7 +858,7 @@ message Document {
854858
message Revision {
855859
// Human Review information of the document.
856860
message HumanReview {
857-
// Human review state. e.g. `requested`, `succeeded`, `rejected`.
861+
// Human review state. For example, `requested`, `succeeded`, `rejected`.
858862
string state = 1;
859863

860864
// A message providing more details about the current state of processing.
@@ -912,6 +916,12 @@ message Document {
912916
repeated Provenance provenance = 3 [deprecated = true];
913917
}
914918

919+
// Represents the annotation of a block or a chunk.
920+
message Annotations {
921+
// The description of the content with this annotation.
922+
string description = 1;
923+
}
924+
915925
// Represents the parsed layout of a document as a collection of blocks that
916926
// the document is divided into.
917927
message DocumentLayout {
@@ -940,6 +950,9 @@ message Document {
940950
// A text block could further have child blocks.
941951
// Repeated blocks support further hierarchies and nested blocks.
942952
repeated DocumentLayoutBlock blocks = 3;
953+
954+
// Annotation of the text block.
955+
Annotations annotations = 4;
943956
}
944957

945958
// Represents a table type block.
@@ -952,6 +965,9 @@ message Document {
952965

953966
// Table caption/title.
954967
string caption = 3;
968+
969+
// Annotation of the table block.
970+
Annotations annotations = 4;
955971
}
956972

957973
// Represents a row in a table.
@@ -990,6 +1006,37 @@ message Document {
9901006
repeated DocumentLayoutBlock blocks = 1;
9911007
}
9921008

1009+
// Represents an image type block.
1010+
message LayoutImageBlock {
1011+
// Source of the image.
1012+
oneof image_source {
1013+
// Optional. Asset id of the inline image. If set, find the image
1014+
// content in the blob_assets field.
1015+
string blob_asset_id = 4 [(google.api.field_behavior) = OPTIONAL];
1016+
1017+
// Optional. Google Cloud Storage uri of the image.
1018+
string gcs_uri = 5 [(google.api.field_behavior) = OPTIONAL];
1019+
1020+
// Optional. Data uri of the image.
1021+
// It is composed of four parts: a prefix (data:), a MIME type
1022+
// indicating the type of data, an optional base64 token if
1023+
// non-textual, and the data itself:
1024+
// data:[<mediatype>][;base64],<data>
1025+
string data_uri = 6 [(google.api.field_behavior) = OPTIONAL];
1026+
}
1027+
1028+
// Mime type of the image. An IANA published [media type (MIME type)]
1029+
// (https://www.iana.org/assignments/media-types/media-types.xhtml).
1030+
string mime_type = 1;
1031+
1032+
// Text extracted from the image using OCR or alt text describing the
1033+
// image.
1034+
string image_text = 2;
1035+
1036+
// Annotation of the image block.
1037+
Annotations annotations = 3;
1038+
}
1039+
9931040
oneof block {
9941041
// Block consisting of text content.
9951042
LayoutTextBlock text_block = 2;
@@ -999,6 +1046,9 @@ message Document {
9991046

10001047
// Block consisting of list content/structure.
10011048
LayoutListBlock list_block = 4;
1049+
1050+
// Block consisting of image content.
1051+
LayoutImageBlock image_block = 7;
10021052
}
10031053

10041054
// ID of the block.
@@ -1046,6 +1096,48 @@ message Document {
10461096
ChunkPageSpan page_span = 2;
10471097
}
10481098

1099+
// The image chunk field in the chunk.
1100+
message ImageChunkField {
1101+
// Source of the image.
1102+
oneof image_source {
1103+
// Optional. Asset id of the inline image. If set, find the image
1104+
// content in the blob_assets field.
1105+
string blob_asset_id = 1 [(google.api.field_behavior) = OPTIONAL];
1106+
1107+
// Optional. Google Cloud Storage uri of the image.
1108+
string gcs_uri = 2 [(google.api.field_behavior) = OPTIONAL];
1109+
1110+
// Optional. Data uri of the image.
1111+
// It is composed of four parts: a prefix (data:), a MIME type
1112+
// indicating the type of data, an optional base64 token if
1113+
// non-textual, and the data itself:
1114+
// data:[<mediatype>][;base64],<data>
1115+
string data_uri = 3 [(google.api.field_behavior) = OPTIONAL];
1116+
}
1117+
1118+
// Annotation of the image chunk field.
1119+
Annotations annotations = 4;
1120+
}
1121+
1122+
// The table chunk field in the chunk.
1123+
message TableChunkField {
1124+
// Annotation of the table chunk field.
1125+
Annotations annotations = 1;
1126+
}
1127+
1128+
// The chunk field in the chunk. A chunk field could be one of the various
1129+
// types (for example, image, table) supported.
1130+
message ChunkField {
1131+
// The type of the chunk field.
1132+
oneof field_type {
1133+
// The image chunk field in the chunk.
1134+
ImageChunkField image_chunk_field = 1;
1135+
1136+
// The table chunk field in the chunk.
1137+
TableChunkField table_chunk_field = 2;
1138+
}
1139+
}
1140+
10491141
// ID of the chunk.
10501142
string chunk_id = 1;
10511143

@@ -1063,12 +1155,31 @@ message Document {
10631155

10641156
// Page footers associated with the chunk.
10651157
repeated ChunkPageFooter page_footers = 6;
1158+
1159+
// Chunk fields inside this chunk.
1160+
repeated ChunkField chunk_fields = 7;
10661161
}
10671162

10681163
// List of chunks.
10691164
repeated Chunk chunks = 1;
10701165
}
10711166

1167+
// Represents a blob asset. It's used to store the content of the inline blob
1168+
// in this document, for example, image bytes, such that it can be referenced
1169+
// by other fields in the document via asset ID.
1170+
message BlobAsset {
1171+
// Optional. The id of the blob asset.
1172+
string asset_id = 1 [(google.api.field_behavior) = OPTIONAL];
1173+
1174+
// Optional. The content of the blob asset, for example, image bytes.
1175+
bytes content = 2 [(google.api.field_behavior) = OPTIONAL];
1176+
1177+
// The mime type of the blob asset.
1178+
// An IANA published [media type (MIME
1179+
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
1180+
string mime_type = 3;
1181+
}
1182+
10721183
// The output of the validation given the document and the validation rules.
10731184
message EntityValidationOutput {
10741185
// Validation result for a single validation rule.
@@ -1186,6 +1297,12 @@ message Document {
11861297
// Document chunked based on chunking config.
11871298
ChunkedDocument chunked_document = 18;
11881299

1300+
// Optional. The blob assets in this document. This is used to store the
1301+
// content of the inline blobs in this document, for example, image bytes,
1302+
// such that it can be referenced by other fields in the document via asset
1303+
// id.
1304+
repeated BlobAsset blob_assets = 19 [(google.api.field_behavior) = OPTIONAL];
1305+
11891306
// The entity validation output for the document. This is the validation
11901307
// output for `document.entities` field.
11911308
EntityValidationOutput entity_validation_output = 21;
@@ -1198,7 +1315,7 @@ message Document {
11981315
// The entity revision ID that `document.entities` field is based on.
11991316
// If this field is set and `entities_revisions` is not empty, the entities in
12001317
// `document.entities` field are the entities in the entity revision with this
1201-
// ID and `document.entity_validation_output` field is the
1318+
// id and `document.entity_validation_output` field is the
12021319
// `entity_validation_output` field in this entity revision.
12031320
string entities_revision_id = 23;
12041321
}

0 commit comments

Comments
 (0)