Skip to content

Commit 5638e32

Browse files
committed
fix(bqjdbc): optimize meetsReadRatio latency to achieve faster page counting
1 parent 1c7cc0c commit 5638e32

4 files changed

Lines changed: 111 additions & 3 deletions

File tree

java-bigquery/google-cloud-bigquery-jdbc/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ drivers/**
22
target-it/**
33
*logs*/**
44
**/ITBigQueryJDBCLocalTest.java
5+
**/BigQueryStatementE2EBenchmark.java
56

67
tools/**/*.class
78
tools/**/*.jfr

java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryJdbcUrlUtility.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ protected boolean removeEldestEntry(Map.Entry<String, Map<String, String>> eldes
7272
static final String QUERY_PROPERTIES_NAME = "QueryProperties";
7373
static final int DEFAULT_HTAPI_ACTIVATION_RATIO_VALUE = 2;
7474
static final String HTAPI_MIN_TABLE_SIZE_PROPERTY_NAME = "HighThroughputMinTableSize";
75-
static final int DEFAULT_HTAPI_MIN_TABLE_SIZE_VALUE = 100;
75+
static final int DEFAULT_HTAPI_MIN_TABLE_SIZE_VALUE = 10000;
7676
static final int DEFAULT_OAUTH_TYPE_VALUE = -1;
7777
static final String LOCATION_PROPERTY_NAME = "Location";
7878
static final String ENDPOINT_OVERRIDES_PROPERTY_NAME = "EndpointOverrides";

java-bigquery/google-cloud-bigquery-jdbc/src/main/java/com/google/cloud/bigquery/jdbc/BigQueryStatement.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
import com.google.cloud.bigquery.storage.v1.ReadSession;
5656
import com.google.common.annotations.VisibleForTesting;
5757
import com.google.common.collect.ImmutableList;
58-
import com.google.common.collect.Iterators;
5958
import com.google.common.util.concurrent.Uninterruptibles;
6059
import java.lang.ref.ReferenceQueue;
6160
import java.sql.Connection;
@@ -952,7 +951,23 @@ private boolean meetsReadRatio(TableResult results) {
952951
// below log iterates and counts. This is inefficient and we may eventually want to expose
953952
// PageSize with TableResults
954953
// TODO(Obada): Scope for performance optimization.
955-
int pageSize = Iterators.size(results.getValues().iterator());
954+
int pageSize;
955+
Iterable<FieldValueList> values = results.getValues();
956+
if (values instanceof java.util.Collection) {
957+
pageSize = ((java.util.Collection<?>) values).size();
958+
} else {
959+
// O(1) Fast Page Size Approximation:
960+
// If the values iterable is not a collection, approximate the page size rather than
961+
// performing a slow O(N) iteration over the entire page of query results.
962+
pageSize = (int) Math.min(totalRows, querySettings.getMaxResultPerPage());
963+
}
964+
965+
// SAFEGUARD: If all data has already been retrieved in the first page,
966+
// NEVER switch to the Read API as it would discard in-memory data and cause a double-fetch.
967+
if (totalRows <= pageSize) {
968+
return false;
969+
}
970+
956971
return totalRows / pageSize > querySettings.getHighThroughputActivationRatio();
957972
}
958973

java-bigquery/google-cloud-bigquery-jdbc/src/test/java/com/google/cloud/bigquery/jdbc/BigQueryStatementTest.java

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,4 +494,96 @@ public void testGetStatementType(boolean isReadOnlyTokenUsed) throws Exception {
494494
verify(bigquery, isReadOnlyTokenUsed ? Mockito.never() : Mockito.times(1))
495495
.create(any(JobInfo.class));
496496
}
497+
498+
@Test
499+
public void testUseReadAPI_SafeguardSmallDataset() throws SQLException {
500+
// Setup: totalRows <= pageSize, so it should not activate the Read API
501+
doReturn(true).when(bigQueryConnection).isEnableHighThroughputAPI();
502+
doReturn(100).when(bigQueryConnection).getHighThroughputMinTableSize();
503+
doReturn(2).when(bigQueryConnection).getHighThroughputActivationRatio();
504+
doReturn(1000L).when(bigQueryConnection).getMaxResults();
505+
506+
BigQueryStatement statement = new BigQueryStatement(bigQueryConnection);
507+
TableResult tableResult = mock(TableResult.class);
508+
doReturn(50L).when(tableResult).getTotalRows();
509+
510+
// Standard java collection in values
511+
java.util.List<com.google.cloud.bigquery.FieldValueList> valuesList =
512+
new java.util.ArrayList<>();
513+
for (int i = 0; i < 50; i++) {
514+
valuesList.add(mock(com.google.cloud.bigquery.FieldValueList.class));
515+
}
516+
doReturn(valuesList).when(tableResult).getValues();
517+
518+
boolean useReadApi = statement.useReadAPI(tableResult);
519+
assertThat(useReadApi).isFalse();
520+
}
521+
522+
@Test
523+
public void testUseReadAPI_MeetsRatioCollection() throws SQLException {
524+
// Setup: totalRows = 500, pageSize = 100, MinTableSize = 100, ActivationRatio = 2
525+
// ratio = 5 > 2, should activate Read API
526+
doReturn(true).when(bigQueryConnection).isEnableHighThroughputAPI();
527+
doReturn(100).when(bigQueryConnection).getHighThroughputMinTableSize();
528+
doReturn(2).when(bigQueryConnection).getHighThroughputActivationRatio();
529+
doReturn(1000L).when(bigQueryConnection).getMaxResults();
530+
531+
BigQueryStatement statement = new BigQueryStatement(bigQueryConnection);
532+
TableResult tableResult = mock(TableResult.class);
533+
doReturn(500L).when(tableResult).getTotalRows();
534+
535+
java.util.List<com.google.cloud.bigquery.FieldValueList> valuesList =
536+
new java.util.ArrayList<>();
537+
for (int i = 0; i < 100; i++) {
538+
valuesList.add(mock(com.google.cloud.bigquery.FieldValueList.class));
539+
}
540+
doReturn(valuesList).when(tableResult).getValues();
541+
542+
boolean useReadApi = statement.useReadAPI(tableResult);
543+
assertThat(useReadApi).isTrue();
544+
}
545+
546+
@Test
547+
public void testUseReadAPI_FailsMinTableSize() throws SQLException {
548+
// Setup: totalRows = 80 < MinTableSize (100)
549+
doReturn(true).when(bigQueryConnection).isEnableHighThroughputAPI();
550+
doReturn(100).when(bigQueryConnection).getHighThroughputMinTableSize();
551+
doReturn(2).when(bigQueryConnection).getHighThroughputActivationRatio();
552+
doReturn(1000L).when(bigQueryConnection).getMaxResults();
553+
554+
BigQueryStatement statement = new BigQueryStatement(bigQueryConnection);
555+
TableResult tableResult = mock(TableResult.class);
556+
doReturn(80L).when(tableResult).getTotalRows();
557+
558+
java.util.List<com.google.cloud.bigquery.FieldValueList> valuesList =
559+
new java.util.ArrayList<>();
560+
for (int i = 0; i < 20; i++) {
561+
valuesList.add(mock(com.google.cloud.bigquery.FieldValueList.class));
562+
}
563+
doReturn(valuesList).when(tableResult).getValues();
564+
565+
boolean useReadApi = statement.useReadAPI(tableResult);
566+
assertThat(useReadApi).isFalse();
567+
}
568+
569+
@Test
570+
public void testUseReadAPI_NonCollectionApproximation() throws SQLException {
571+
// Setup: totalRows = 500, MinTableSize = 100, ActivationRatio = 2, maxResultPerPage = 100
572+
// results.getValues() returns custom non-collection Iterable (ratio = 500/100 = 5 > 2)
573+
doReturn(true).when(bigQueryConnection).isEnableHighThroughputAPI();
574+
doReturn(100).when(bigQueryConnection).getHighThroughputMinTableSize();
575+
doReturn(2).when(bigQueryConnection).getHighThroughputActivationRatio();
576+
doReturn(100L).when(bigQueryConnection).getMaxResults(); // maxResultPerPage = 100
577+
578+
BigQueryStatement statement = new BigQueryStatement(bigQueryConnection);
579+
TableResult tableResult = mock(TableResult.class);
580+
doReturn(500L).when(tableResult).getTotalRows();
581+
582+
// Mock non-collection iterable
583+
Iterable<com.google.cloud.bigquery.FieldValueList> mockIterable = mock(Iterable.class);
584+
doReturn(mockIterable).when(tableResult).getValues();
585+
586+
boolean useReadApi = statement.useReadAPI(tableResult);
587+
assertThat(useReadApi).isTrue();
588+
}
497589
}

0 commit comments

Comments
 (0)