@@ -104,10 +104,32 @@ message DocumentOutputConfig {
104104
105105// Config for Document OCR.
106106message OcrConfig {
107+ // Hints for OCR Engine
108+ message Hints {
109+ // List of BCP-47 language codes to use for OCR. In most cases, not
110+ // specifying it yields the best results since it enables automatic language
111+ // detection. For languages based on the Latin alphabet, setting hints is
112+ // not needed. In rare cases, when the language of the text in the
113+ // image is known, setting a hint will help get better results (although it
114+ // will be a significant hindrance if the hint is wrong).
115+ repeated string language_hints = 1 ;
116+ }
117+
118+ // Hints for the OCR model.
119+ Hints hints = 2 ;
120+
107121 // Enables special handling for PDFs with existing text information. Results
108122 // in better text extraction quality in such PDF inputs.
109123 bool enable_native_pdf_parsing = 3 ;
110124
125+ // Enables intelligent document quality scores after OCR. Can help with
126+ // diagnosing why OCR responses are of poor quality for a given input.
127+ // Adds additional latency comparable to regular OCR to the process call.
128+ bool enable_image_quality_scores = 4 ;
129+
111130 // A list of advanced OCR options to further fine-tune OCR behavior.
112131 repeated string advanced_ocr_options = 5 ;
132+
133+ // Includes symbol level OCR information if set to true.
134+ bool enable_symbol = 6 ;
113135}
0 commit comments