2323python beta_snippets.py object-localization-uri gs://...
2424python beta_snippets.py handwritten-ocr INPUT_IMAGE
2525python beta_snippets.py handwritten-ocr-uri gs://...
26+ python beta_snippets.py doc-features INPUT_PDF
27+ python beta_snippets.py doc-features_uri gs://...
2628
2729
2830For more information, the documentation at
@@ -174,6 +176,105 @@ def detect_handwritten_ocr_uri(uri):
174176# [END vision_handwritten_ocr_gcs_beta]
175177
176178
179+ # [START vision_fulltext_detection_pdf_beta]
180+ def detect_document_features (path ):
181+ """Detects document features in a PDF/TIFF/GIF file.
182+
183+ While your PDF file may have several pages,
184+ this API can process up to 5 pages only.
185+
186+ Args:
187+ path: The path to the local file.
188+ """
189+ from google .cloud import vision_v1p4beta1 as vision
190+ client = vision .ImageAnnotatorClient ()
191+
192+ with open (path , 'rb' ) as pdf_file :
193+ content = pdf_file .read ()
194+
195+ # Other supported mime_types: image/tiff' or 'image/gif'
196+ mime_type = 'application/pdf'
197+ input_config = vision .types .InputConfig (
198+ content = content , mime_type = mime_type )
199+
200+ feature = vision .types .Feature (
201+ type = vision .enums .Feature .Type .DOCUMENT_TEXT_DETECTION )
202+ # Annotate the first two pages and the last one (max 5 pages)
203+ # First page starts at 1, and not 0. Last page is -1.
204+ pages = [1 , 2 , - 1 ]
205+
206+ request = vision .types .AnnotateFileRequest (
207+ input_config = input_config ,
208+ features = [feature ],
209+ pages = pages )
210+
211+ response = client .batch_annotate_files (requests = [request ])
212+
213+ for image_response in response .responses [0 ].responses :
214+ for page in image_response .full_text_annotation .pages :
215+ for block in page .blocks :
216+ print ('\n Block confidence: {}\n ' .format (block .confidence ))
217+ for par in block .paragraphs :
218+ print ('\t Paragraph confidence: {}' .format (par .confidence ))
219+ for word in par .words :
220+ symbol_texts = [symbol .text for symbol in word .symbols ]
221+ word_text = '' .join (symbol_texts )
222+ print ('\t \t Word text: {} (confidence: {})' .format (
223+ word_text , word .confidence ))
224+ for symbol in word .symbols :
225+ print ('\t \t \t Symbol: {} (confidence: {})' .format (
226+ symbol .text , symbol .confidence ))
227+ # [END vision_fulltext_detection_pdf_beta]
228+
229+
230+ # [START vision_fulltext_detection_pdf_gcs_beta]
231+ def detect_document_features_uri (gcs_uri ):
232+ """Detects document features in a PDF/TIFF/GIF file.
233+
234+ While your PDF file may have several pages,
235+ this API can process up to 5 pages only.
236+
237+ Args:
238+ uri: The path to the file in Google Cloud Storage (gs://...)
239+ """
240+ from google .cloud import vision_v1p4beta1 as vision
241+ client = vision .ImageAnnotatorClient ()
242+
243+ # Other supported mime_types: image/tiff' or 'image/gif'
244+ mime_type = 'application/pdf'
245+ input_config = vision .types .InputConfig (
246+ gcs_source = vision .types .GcsSource (uri = gcs_uri ), mime_type = mime_type )
247+
248+ feature = vision .types .Feature (
249+ type = vision .enums .Feature .Type .DOCUMENT_TEXT_DETECTION )
250+ # Annotate the first two pages and the last one (max 5 pages)
251+ # First page starts at 1, and not 0. Last page is -1.
252+ pages = [1 , 2 , - 1 ]
253+
254+ request = vision .types .AnnotateFileRequest (
255+ input_config = input_config ,
256+ features = [feature ],
257+ pages = pages )
258+
259+ response = client .batch_annotate_files (requests = [request ])
260+
261+ for image_response in response .responses [0 ].responses :
262+ for page in image_response .full_text_annotation .pages :
263+ for block in page .blocks :
264+ print ('\n Block confidence: {}\n ' .format (block .confidence ))
265+ for par in block .paragraphs :
266+ print ('\t Paragraph confidence: {}' .format (par .confidence ))
267+ for word in par .words :
268+ symbol_texts = [symbol .text for symbol in word .symbols ]
269+ word_text = '' .join (symbol_texts )
270+ print ('\t \t Word text: {} (confidence: {})' .format (
271+ word_text , word .confidence ))
272+ for symbol in word .symbols :
273+ print ('\t \t \t Symbol: {} (confidence: {})' .format (
274+ symbol .text , symbol .confidence ))
275+ # [END vision_fulltext_detection_pdf_gcs_beta]
276+
277+
177278if __name__ == '__main__' :
178279 parser = argparse .ArgumentParser (
179280 description = __doc__ ,
@@ -196,15 +297,27 @@ def detect_handwritten_ocr_uri(uri):
196297 'handwritten-ocr-uri' , help = detect_handwritten_ocr_uri .__doc__ )
197298 handwritten_uri_parser .add_argument ('uri' )
198299
300+ doc_features_parser = subparsers .add_parser (
301+ 'doc-features' , help = detect_document_features .__doc__ )
302+ doc_features_parser .add_argument ('path' )
303+
304+ doc_features_uri_parser = subparsers .add_parser (
305+ 'doc-features-uri' , help = detect_document_features_uri .__doc__ )
306+ doc_features_uri_parser .add_argument ('uri' )
307+
199308 args = parser .parse_args ()
200309
201310 if 'uri' in args .command :
202311 if 'object-localization-uri' in args .command :
203312 localize_objects_uri (args .uri )
204313 elif 'handwritten-ocr-uri' in args .command :
205314 detect_handwritten_ocr_uri (args .uri )
315+ elif 'doc-features' in args .command :
316+ detect_handwritten_ocr_uri (args .uri )
206317 else :
207318 if 'object-localization' in args .command :
208319 localize_objects (args .path )
209320 elif 'handwritten-ocr' in args .command :
210321 detect_handwritten_ocr (args .path )
322+ elif 'doc-features' in args .command :
323+ detect_handwritten_ocr (args .path )
0 commit comments