feat: add estimated physical file sizes to ReadAPI v1

Google APIs · copybara-github · commit a4ff1c210c20 · 2023-06-21T13:43:41.000-07:00
PiperOrigin-RevId: 542350532
diff --git a/google/cloud/bigquery/storage/v1/stream.proto b/google/cloud/bigquery/storage/v1/stream.proto
@@ -131,11 +131,11 @@ message ReadSession {
     }
 
     // Optional. Specifies a table sampling percentage. Specifically, the query
-    // planner will use TABLESAMPLE SYSTEM (sample_percentage PERCENT). This
-    // samples at the file-level. It will randomly choose for each file whether
-    // to include that file in the sample returned. Note, that if the table only
-    // has one file, then TABLESAMPLE SYSTEM will select that file and return
-    // all returnable rows contained within.
+    // planner will use TABLESAMPLE SYSTEM (sample_percentage PERCENT). The
+    // sampling percentage is applied at the data block granularity. It will
+    // randomly choose for each data block whether to read the rows in that data
+    // block. For more details, see
+    // https://cloud.google.com/bigquery/docs/table-sampling)
     optional double sample_percentage = 5
         [(google.api.field_behavior) = OPTIONAL];
   }
@@ -194,6 +194,14 @@ message ReadSession {
   int64 estimated_total_bytes_scanned = 12
       [(google.api.field_behavior) = OUTPUT_ONLY];
 
+  // Output only. A pre-projected estimate of the total physical size (in bytes)
+  // of files this session will scan when all streams are completely consumed.
+  // This estimate does not depend on the selected columns and can be based on
+  // metadata from the table which might be incomplete or stale. Only set for
+  // BigLake tables.
+  int64 estimated_total_physical_file_size = 15
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
   // Output only. An estimate on the number of rows present in this session's
   // streams. This estimate is based on metadata from the table which might be
   // incomplete or stale.