Skip to content

Commit e2d7865

Browse files
authored
docs(samples): Updated Doc AI Quickstart sample based on user feedback (GoogleCloudPlatform#10421)
* docs(samples): Updated Doc AI Quickstart sample based on user feedback * fix: Add Specific Version to Form Parser tests and removed number of form fields extracted * fix: Update Form Parser response test to read return values instead of print
1 parent 174c303 commit e2d7865

4 files changed

Lines changed: 19 additions & 27 deletions

File tree

documentai/snippets/handle_response_sample.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def process_document_form_sample(
142142
processor_version: str,
143143
file_path: str,
144144
mime_type: str,
145-
) -> None:
145+
) -> documentai.Document:
146146
# Online processing request to Document AI
147147
document = process_document(
148148
project_id, location, processor_id, processor_version, file_path, mime_type
@@ -162,9 +162,9 @@ def process_document_form_sample(
162162

163163
print(f"\nFound {len(page.tables)} table(s):")
164164
for table in page.tables:
165-
num_collumns = len(table.header_rows[0].cells)
165+
num_columns = len(table.header_rows[0].cells)
166166
num_rows = len(table.body_rows)
167-
print(f"Table with {num_collumns} columns and {num_rows} rows:")
167+
print(f"Table with {num_columns} columns and {num_rows} rows:")
168168

169169
# Print header rows
170170
print("Columns:")
@@ -179,6 +179,8 @@ def process_document_form_sample(
179179
value = layout_to_text(field.field_value, text)
180180
print(f" * {repr(name.strip())}: {repr(value.strip())}")
181181

182+
return document
183+
182184

183185
def print_table_rows(
184186
table_rows: Sequence[documentai.Document.Page.Table.TableRow], text: str

documentai/snippets/handle_response_sample_test.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,32 +43,26 @@ def test_process_document_ocr(capsys):
4343
assert "FakeDoc" in out
4444

4545

46-
def test_process_document_form(capsys):
46+
def test_process_document_form():
4747
location = "us"
4848
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
4949
processor_id = "90484cfdedb024f6"
50-
processor_version = "stable"
50+
processor_version = "pretrained-form-parser-v2.0-2022-11-10"
5151
file_path = "resources/invoice.pdf"
5252
mime_type = "application/pdf"
5353

54-
handle_response_sample.process_document_form_sample(
54+
document = handle_response_sample.process_document_form_sample(
5555
project_id=project_id,
5656
location=location,
5757
processor_id=processor_id,
5858
processor_version=processor_version,
5959
file_path=file_path,
6060
mime_type=mime_type,
6161
)
62-
out, _ = capsys.readouterr()
6362

64-
expected_strings = [
65-
"There are 1 page(s) in this document.",
66-
"Table with 4 columns and 6 rows",
67-
"Found 13 form field(s)",
68-
"'BALANCE DUE': '$2140.00'",
69-
]
70-
for expected_string in expected_strings:
71-
assert expected_string in out
63+
assert len(document.pages) == 1
64+
assert len(document.pages[0].tables[0].header_rows[0].cells) == 4
65+
assert len(document.pages[0].tables[0].body_rows) == 6
7266

7367

7468
def test_process_document_quality(capsys):

documentai/snippets/quickstart_sample.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,15 @@
2323
# TODO(developer): Uncomment these variables before running the sample.
2424
# project_id = "YOUR_PROJECT_ID"
2525
# location = "YOUR_PROCESSOR_LOCATION" # Format is "us" or "eu"
26-
# processor_display_name = "YOUR_PROCESSOR_DISPLAY_NAME" # Must be unique per project, e.g.: "My Processor"
27-
# processor_type = "YOUR_PROCESSOR_TYPE" # Use `fetch_processor_types()` to get available processor types
2826
# file_path = "/path/to/local/pdf"
29-
# mime_type = "application/pdf" # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types
27+
# processor_display_name = "YOUR_PROCESSOR_DISPLAY_NAME" # Must be unique per project, e.g.: "My Processor"
3028

3129

3230
def quickstart(
3331
project_id: str,
3432
location: str,
35-
processor_display_name: str,
36-
processor_type: str,
3733
file_path: str,
38-
mime_type: str,
34+
processor_display_name: str = "My Processor",
3935
):
4036
# You must set the `api_endpoint`if you use a location other than "us".
4137
opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
@@ -50,7 +46,8 @@ def quickstart(
5046
processor = client.create_processor(
5147
parent=parent,
5248
processor=documentai.Processor(
53-
display_name=processor_display_name, type_=processor_type
49+
type_="OCR_PROCESSOR", # Refer to https://cloud.google.com/document-ai/docs/create-processor for how to get available processor types
50+
display_name=processor_display_name,
5451
),
5552
)
5653

@@ -62,7 +59,10 @@ def quickstart(
6259
image_content = image.read()
6360

6461
# Load binary data
65-
raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type)
62+
raw_document = documentai.RawDocument(
63+
content=image_content,
64+
mime_type="application/pdf", # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types
65+
)
6666

6767
# Configure the process request
6868
# `processor.name` is the full resource name of the processor, e.g.:

documentai/snippets/quickstart_sample_test.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,15 @@
2626
location = "us"
2727
project_id = os.environ["GOOGLE_CLOUD_PROJECT"]
2828
processor_display_name = f"test-processor-{uuid4()}"
29-
processor_type = "OCR_PROCESSOR"
3029
file_path = "resources/invoice.pdf"
31-
mime_type = "application/pdf"
3230

3331

3432
def test_quickstart(capsys):
3533
processor = quickstart_sample.quickstart(
3634
project_id=project_id,
3735
location=location,
3836
processor_display_name=processor_display_name,
39-
processor_type=processor_type,
4037
file_path=file_path,
41-
mime_type=mime_type,
4238
)
4339
out, _ = capsys.readouterr()
4440

0 commit comments

Comments
 (0)