Skip to content
This repository was archived by the owner on Sep 20, 2023. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/.OwlBot.lock.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
docker:
image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
digest: sha256:4ee57a76a176ede9087c14330c625a71553cf9c72828b2c0ca12f5338171ba60
digest: sha256:74124fe59b8859f30143dcdea7b78300046d97de816dc53c0e381308a5f4f8bc
9 changes: 5 additions & 4 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
#
# For syntax help see:
# https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax
# Note: This file is autogenerated. To make changes to the codeowner team, please update .repo-metadata.json.

# The @googleapis/yoshi-python is the default owner for changes in this repo
* @googleapis/yoshi-python
# @googleapis/yoshi-python @googleapis/cdpe-cloudai are the default owners for changes in this repo
* @googleapis/yoshi-python @googleapis/cdpe-cloudai

# The python-samples-reviewers team is the default owner for samples changes
/samples/ @googleapis/python-samples-owners @googleapis/ml-apis
# @googleapis/python-samples-owners @googleapis/cdpe-cloudai are the default owners for samples changes
/samples/ @googleapis/python-samples-owners @googleapis/cdpe-cloudai
4 changes: 1 addition & 3 deletions samples/snippets/batch_process_documents_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ def batch_process_documents(
# Location can be 'us' or 'eu'
name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"
request = documentai.types.document_processor_service.BatchProcessRequest(
name=name,
input_documents=input_config,
document_output_config=output_config,
name=name, input_documents=input_config, document_output_config=output_config,
)

operation = client.batch_process_documents(request)
Expand Down
17 changes: 9 additions & 8 deletions samples/snippets/process_document_form_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console
# file_path = '/path/to/local/pdf'


def process_document_form_sample(
project_id: str, location: str, processor_id: str, file_path: str
):
Expand Down Expand Up @@ -69,9 +70,9 @@ def process_document_form_sample(
for table in page.tables:
num_collumns = len(table.header_rows[0].cells)
num_rows = len(table.body_rows)
print(f'Table with {num_collumns} columns and {num_rows} rows:')
print(f"Table with {num_collumns} columns and {num_rows} rows:")
print_table_info(table, text)
print(f'Found {len(page.form_fields)} form fields:')
print(f"Found {len(page.form_fields)} form fields:")
for field in page.form_fields:
name = layout_to_text(field.field_name, text)
value = layout_to_text(field.field_value, text)
Expand All @@ -80,17 +81,17 @@ def process_document_form_sample(

def print_table_info(table: dict, text: str) -> None:
# Print header row
header_row_text = ''
header_row_text = ""
for header_cell in table.header_rows[0].cells:
header_cell_text = layout_to_text(header_cell.layout, text)
header_row_text += f'{repr(header_cell_text.strip())} | '
print(f'Collumns: {header_row_text[:-3]}')
header_row_text += f"{repr(header_cell_text.strip())} | "
print(f"Collumns: {header_row_text[:-3]}")
# Print first body row
body_row_text = ''
body_row_text = ""
for body_cell in table.body_rows[0].cells:
body_cell_text = layout_to_text(body_cell.layout, text)
body_row_text += f'{repr(body_cell_text.strip())} | '
print(f'First row data: {body_row_text[:-3]}\n')
body_row_text += f"{repr(body_cell_text.strip())} | "
print(f"First row data: {body_row_text[:-3]}\n")


def layout_to_text(layout: dict, text: str) -> str:
Expand Down
3 changes: 2 additions & 1 deletion samples/snippets/process_document_ocr_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console
# file_path = '/path/to/local/pdf'


def process_document_ocr_sample(
project_id: str, location: str, processor_id: str, file_path: str
) -> None:
Expand Down Expand Up @@ -78,7 +79,7 @@ def print_detected_langauges(detected_languages: dict) -> None:
print(" Detected languages:")
for lang in detected_languages:
code = lang.language_code
conf_percent = '{:.1%}'.format(lang.confidence)
conf_percent = "{:.1%}".format(lang.confidence)
print(f" {code} ({conf_percent} confidence)")


Expand Down
12 changes: 7 additions & 5 deletions samples/snippets/process_document_quality_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console
# file_path = '/path/to/local/pdf'


def process_document_quality_sample(
project_id: str, location: str, processor_id: str, file_path: str
):
Expand Down Expand Up @@ -60,17 +61,18 @@ def process_document_quality_sample(
# response.
document = result.document
for entity in document.entities:
conf_percent = '{:.1%}'.format(entity.confidence)
page_num = ''
conf_percent = "{:.1%}".format(entity.confidence)
page_num = ""
try:
page_num = str(int(entity.page_anchor.page_refs.page) + 1)
except AttributeError:
page_num = "1"

print(f'Page {page_num} has a quality score of {conf_percent}:')
print(f"Page {page_num} has a quality score of {conf_percent}:")

for prop in entity.properties:
conf_percent = '{:.1%}'.format(prop.confidence)
print(f' * {prop.type_} score of {conf_percent}')
conf_percent = "{:.1%}".format(prop.confidence)
print(f" * {prop.type_} score of {conf_percent}")


# [END documentai_process_quality_document]
2 changes: 1 addition & 1 deletion samples/snippets/process_document_quality_sample_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_process_documents(capsys):
expected_strings = [
"Page 1 has a quality score of",
"defect_blurry score of 9",
"defect_noisy"
"defect_noisy",
]
for expected_string in expected_strings:
assert expected_string in out
8 changes: 5 additions & 3 deletions samples/snippets/process_document_specialized_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console
# file_path = '/path/to/local/pdf'


def process_document_specialized_sample(
project_id: str, location: str, processor_id: str, file_path: str
):
Expand Down Expand Up @@ -62,7 +63,7 @@ def process_document_specialized_sample(
# Please see the OCR and other samples for how to parse other data in the
# response.
document = result.document
print(f'Found {len(document.entities)} entities:')
print(f"Found {len(document.entities)} entities:")
for entity in document.entities:
# Fields detected. For a full list of fields for each processor see
# the processor documentation:
Expand All @@ -71,7 +72,8 @@ def process_document_specialized_sample(
# some other value formats in addition to text are availible
# e.g. dates: `entity.normalized_value.date_value.year`
text_value = entity.text_anchor.content
conf_percent = '{:.1%}'.format(entity.confidence)
print(f' * {repr(key)}: {repr(text_value)}({conf_percent} confident)')
conf_percent = "{:.1%}".format(entity.confidence)
print(f" * {repr(key)}: {repr(text_value)}({conf_percent} confident)")


# [END documentai_process_specialized_document]
24 changes: 13 additions & 11 deletions samples/snippets/process_document_splitter_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@
# processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console
# file_path = '/path/to/local/pdf'


def process_document_splitter_sample(
project_id: str,
location: str,
processor_id: str,
file_path: str):
project_id: str, location: str, processor_id: str, file_path: str
):
from google.cloud import documentai_v1beta3 as documentai

# You must set the api_endpoint if you use a location other than 'us', e.g.:
Expand Down Expand Up @@ -61,26 +60,29 @@ def process_document_splitter_sample(
# form elements, and entities please see other processors like the OCR, form,
# and specalized processors.
document = result.document
print(f'Found {len(document.entities)} subdocuments:')
print(f"Found {len(document.entities)} subdocuments:")
for entity in document.entities:
conf_percent = '{:.1%}'.format(entity.confidence)
conf_percent = "{:.1%}".format(entity.confidence)
pages_range = page_refs_to_string(entity.page_anchor.page_refs)
# Print subdocument type information, if available
try:
doctype = entity.type
print(f'{conf_percent} confident that {pages_range} a "{doctype}" subdocument.')
print(
f'{conf_percent} confident that {pages_range} a "{doctype}" subdocument.'
)
except AttributeError:
print(f'{conf_percent} confident that {pages_range} a subdocument.')
print(f"{conf_percent} confident that {pages_range} a subdocument.")


def page_refs_to_string(page_refs: dict) -> str:
''' Converts a page ref to a string describing the page or page range.'''
""" Converts a page ref to a string describing the page or page range."""
if len(page_refs) == 1:
num = str(int(page_refs[0].page) + 1)
return f'page {num} is'
return f"page {num} is"
else:
start = str(int(page_refs[0].page) + 1)
end = str(int(page_refs[1].page) + 1)
return f'pages {start} to {end} are'
return f"pages {start} to {end} are"


# [END documentai_process_splitter_document]