Skip to content

Commit 848245e

Browse files
happyhumanpongad
authored andcommitted
Added a snippet to show how to read a newline-delimited-json file and store it in a Table (googleapis#2974)
1 parent a21cc36 commit 848245e

3 files changed

Lines changed: 114 additions & 50 deletions

File tree

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQuery.java

Lines changed: 45 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ public int hashCode() {
522522
* } catch (BigQueryException e) {
523523
* // the dataset was not created
524524
* }
525-
* } </pre>
525+
* }</pre>
526526
*
527527
* @throws BigQueryException upon failure
528528
*/
@@ -538,7 +538,7 @@ public int hashCode() {
538538
* String fieldName = "string_field";
539539
* TableId tableId = TableId.of(datasetName, tableName);
540540
* // Table field definition
541-
* Field field = Field.of(fieldName, Field.Type.string());
541+
* Field field = Field.of(fieldName, LegacySQLTypeName.STRING);
542542
* // Table schema definition
543543
* Schema schema = Schema.of(field);
544544
* TableDefinition tableDefinition = StandardTableDefinition.of(schema);
@@ -553,6 +553,32 @@ public int hashCode() {
553553
/**
554554
* Creates a new job.
555555
*
556+
* <p>Example of loading a newline-delimited-json file with textual fields from GCS to a table.
557+
* <pre> {@code
558+
* String datasetName = "my_dataset_name";
559+
* String tableName = "my_table_name";
560+
* String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
561+
* TableId tableId = TableId.of(datasetName, tableName);
562+
* // Table field definition
563+
* Field[] fields = new Field[] {
564+
* Field.of("name", LegacySQLTypeName.STRING),
565+
* Field.of("post_abbr", LegacySQLTypeName.STRING)
566+
* };
567+
* // Table schema definition
568+
* Schema schema = Schema.of(fields);
569+
* LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri)
570+
* .setFormatOptions(FormatOptions.json())
571+
* .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
572+
* .setSchema(schema)
573+
* .build();
574+
* // Load the table
575+
* Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
576+
* remoteLoadJob = remoteLoadJob.waitFor();
577+
* // Check the table
578+
* System.out.println("State: " + remoteLoadJob.getStatus().getState());
579+
* return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
580+
* }</pre>
581+
*
556582
* <p>Example of creating a query job.
557583
* <pre> {@code
558584
* String query = "SELECT field FROM my_dataset_name.my_table_name";
@@ -861,8 +887,7 @@ public int hashCode() {
861887
* Lists the table's rows.
862888
*
863889
* <p>Example of listing table rows, specifying the page size.
864-
*
865-
* <pre>{@code
890+
* <pre> {@code
866891
* String datasetName = "my_dataset_name";
867892
* String tableName = "my_table_name";
868893
* // This example reads the result 100 rows per RPC call. If there's no need to limit the number,
@@ -882,16 +907,15 @@ public int hashCode() {
882907
* Lists the table's rows.
883908
*
884909
* <p>Example of listing table rows, specifying the page size.
885-
*
886-
* <pre>{@code
910+
* <pre> {@code
887911
* String datasetName = "my_dataset_name";
888912
* String tableName = "my_table_name";
889913
* TableId tableIdObject = TableId.of(datasetName, tableName);
890914
* // This example reads the result 100 rows per RPC call. If there's no need to limit the number,
891915
* // simply omit the option.
892916
* TableResult tableData =
893917
* bigquery.listTableData(tableIdObject, TableDataListOption.pageSize(100));
894-
* for (FieldValueList row : rowIterator.hasNext()) {
918+
* for (FieldValueList row : tableData.iterateAll()) {
895919
* // do something with the row
896920
* }
897921
* }</pre>
@@ -904,17 +928,16 @@ public int hashCode() {
904928
* Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the
905929
* {@link FieldValueList} iterated over.
906930
*
907-
* <p>Example of listing table rows.
908-
*
909-
* <pre>{@code
931+
* <p>Example of listing table rows with schema.
932+
* <pre> {@code
910933
* String datasetName = "my_dataset_name";
911934
* String tableName = "my_table_name";
912935
* Schema schema = ...;
913-
* String field = "my_field";
936+
* String field = "field";
914937
* TableResult tableData =
915938
* bigquery.listTableData(datasetName, tableName, schema);
916939
* for (FieldValueList row : tableData.iterateAll()) {
917-
* row.get(field)
940+
* row.get(field);
918941
* }
919942
* }</pre>
920943
*
@@ -927,9 +950,8 @@ TableResult listTableData(
927950
* Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the
928951
* {@link FieldValueList} iterated over.
929952
*
930-
* <p>Example of listing table rows.
931-
*
932-
* <pre>{@code
953+
* <p>Example of listing table rows with schema.
954+
* <pre> {@code
933955
* Schema schema =
934956
* Schema.of(
935957
* Field.of("word", LegacySQLTypeName.STRING),
@@ -1047,28 +1069,21 @@ TableResult listTableData(
10471069
* queries. Since dry-run queries are not actually executed, there's no way to retrieve results.
10481070
*
10491071
* <p>Example of running a query.
1050-
*
1051-
* <pre>{@code
1052-
* String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare`";
1053-
* QueryJobConfiguration queryConfig = QueryJobConfiguration.of(query);
1054-
*
1055-
* // To run the legacy syntax queries use the following code instead:
1056-
* // String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"
1057-
* // QueryJobConfiguration queryConfig =
1058-
* // QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
1059-
*
1072+
* <pre> {@code
1073+
* String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]";
1074+
* QueryJobConfiguration queryConfig =
1075+
* QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
10601076
* for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
10611077
* // do something with the data
10621078
* }
10631079
* }</pre>
10641080
*
10651081
* <p>Example of running a query with query parameters.
1066-
*
1067-
* <pre>{@code
1068-
* String query =
1069-
* "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > ?";
1082+
* <pre> {@code
1083+
* String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount";
1084+
* // Note, standard SQL is required to use query parameters. Legacy SQL will not work.
10701085
* QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query)
1071-
* .addPositionalParameter(QueryParameterValue.int64(5))
1086+
* .addNamedParameter("wordCount", QueryParameterValue.int64(5))
10721087
* .build();
10731088
* for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
10741089
* // do something with the data
@@ -1092,18 +1107,6 @@ TableResult query(QueryJobConfiguration configuration, JobOption... options)
10921107
* <p>See {@link #query(QueryJobConfiguration, JobOption...)} for examples on populating a {@link
10931108
* QueryJobConfiguration}.
10941109
*
1095-
* <p>The recommended way to create a randomly generated JobId is the following:
1096-
*
1097-
* <pre>{@code
1098-
* JobId jobId = JobId.of();
1099-
* }</pre>
1100-
*
1101-
* For a user specified job id with an optional prefix use the following:
1102-
*
1103-
* <pre>{@code
1104-
* JobId jobId = JobId.of("my_prefix-my_unique_job_id");
1105-
* }</pre>
1106-
*
11071110
* @throws BigQueryException upon failure
11081111
* @throws InterruptedException if the current thread gets interrupted while waiting for the query
11091112
* to complete

google-cloud-examples/src/main/java/com/google/cloud/examples/bigquery/snippets/BigQuerySnippets.java

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import com.google.api.client.util.Charsets;
2626
import com.google.api.gax.paging.Page;
2727
import com.google.cloud.bigquery.BigQuery;
28+
import com.google.cloud.bigquery.JobInfo.CreateDisposition;
29+
import com.google.cloud.bigquery.LoadJobConfiguration;
2830
import com.google.cloud.bigquery.TableResult;
2931
import com.google.cloud.bigquery.BigQuery.DatasetDeleteOption;
3032
import com.google.cloud.bigquery.BigQuery.DatasetListOption;
@@ -378,6 +380,38 @@ public long writeFileToTable(String datasetName, String tableName, Path csvPath)
378380
// [END writeFileToTable]
379381
}
380382

383+
/**
384+
* Example of loading a newline-delimited-json file with textual fields from GCS to a table.
385+
*/
386+
// [TARGET create(JobInfo, JobOption...)]
387+
// [VARIABLE "my_dataset_name"]
388+
// [VARIABLE "my_table_name"]
389+
public Long writeRemoteFileToTable(String datasetName, String tableName)
390+
throws InterruptedException {
391+
// [START bigquery_load_table_gcs_json]
392+
String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
393+
TableId tableId = TableId.of(datasetName, tableName);
394+
// Table field definition
395+
Field[] fields = new Field[] {
396+
Field.of("name", LegacySQLTypeName.STRING),
397+
Field.of("post_abbr", LegacySQLTypeName.STRING)
398+
};
399+
// Table schema definition
400+
Schema schema = Schema.of(fields);
401+
LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri)
402+
.setFormatOptions(FormatOptions.json())
403+
.setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
404+
.setSchema(schema)
405+
.build();
406+
// Load the table
407+
Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
408+
remoteLoadJob = remoteLoadJob.waitFor();
409+
// Check the table
410+
System.out.println("State: " + remoteLoadJob.getStatus().getState());
411+
return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
412+
// [END bigquery_load_table_gcs_json]
413+
}
414+
381415
/**
382416
* Example of inserting rows into a table without running a load job.
383417
*/
@@ -470,7 +504,9 @@ public TableResult listTableDataFromId(String datasetName, String tableName) {
470504
return tableData;
471505
}
472506

473-
/** Example of listing table rows with schema. */
507+
/**
508+
* Example of listing table rows with schema.
509+
*/
474510
// [TARGET listTableData(String, String, Schema, TableDataListOption...)]
475511
// [VARIABLE "my_dataset_name"]
476512
// [VARIABLE "my_table_name"]
@@ -488,7 +524,9 @@ public TableResult listTableDataSchema(
488524
return tableData;
489525
}
490526

491-
/** Example of listing table rows with schema. */
527+
/**
528+
* Example of listing table rows with schema.
529+
*/
492530
// [TARGET listTableData(TableId, Schema, TableDataListOption...)]
493531
public FieldValueList listTableDataSchemaId() {
494532
// [START listTableDataSchemaId]
@@ -607,8 +645,10 @@ public boolean cancelJobFromId(String jobName) {
607645
return success;
608646
}
609647

610-
/** Example of running a query. */
611-
// [TARGET query(QueryJobConfiguration, QueryOption...)]
648+
/**
649+
* Example of running a query.
650+
*/
651+
// [TARGET query(QueryJobConfiguration, JobOption...)]
612652
// [VARIABLE "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"]
613653
public void runQuery(String query) throws InterruptedException {
614654
// [START runQuery]
@@ -620,10 +660,11 @@ public void runQuery(String query) throws InterruptedException {
620660
// [END runQuery]
621661
}
622662

623-
/** Example of running a query with query parameters. */
624-
// [TARGET query(QueryJobConfiguration, QueryOption...)]
625-
// [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where
626-
// word_count > @wordCount"]
663+
/**
664+
* Example of running a query with query parameters.
665+
*/
666+
// [TARGET query(QueryJobConfiguration, JobOption...)]
667+
// [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount"]
627668
public void runQueryWithParameters(String query) throws InterruptedException {
628669
// [START runQueryWithParameters]
629670
// Note, standard SQL is required to use query parameters. Legacy SQL will not work.

google-cloud-examples/src/test/java/com/google/cloud/examples/bigquery/snippets/ITBigQuerySnippets.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import static org.junit.Assert.assertEquals;
2121
import static org.junit.Assert.assertFalse;
2222
import static org.junit.Assert.assertNotNull;
23+
import static org.junit.Assert.assertNull;
2324
import static org.junit.Assert.assertTrue;
2425

2526
import com.google.api.gax.paging.Page;
@@ -48,6 +49,7 @@
4849
import java.net.URISyntaxException;
4950
import java.nio.file.Path;
5051
import java.nio.file.Paths;
52+
import java.util.ArrayList;
5153
import java.util.Iterator;
5254
import java.util.Set;
5355
import java.util.concurrent.ExecutionException;
@@ -197,6 +199,24 @@ public void testWriteAndListTableData()
197199
assertTrue(bigquerySnippets.deleteTable(DATASET, tableName));
198200
}
199201

202+
@Test
203+
public void testWriteRemoteJsonToTable() throws InterruptedException {
204+
String datasetName = "test_dataset";
205+
String tableName = "us_states";
206+
Table table = bigquery.getTable(datasetName, tableName);
207+
assertNull(table);
208+
209+
Long result = bigquerySnippets.writeRemoteFileToTable(datasetName, tableName);
210+
table = bigquery.getTable(datasetName, tableName);
211+
assertNotNull(table);
212+
ArrayList<String> tableFieldNames = new ArrayList<>();
213+
for (Field field: table.getDefinition().getSchema().getFields()) {
214+
tableFieldNames.add(field.getName());
215+
}
216+
bigquery.delete(table.getTableId());
217+
assertEquals(Long.valueOf(50), result);
218+
}
219+
200220
@Test
201221
public void testInsertAllAndListTableData() throws IOException, InterruptedException {
202222
String tableName = "test_insert_all_and_list_table_data";

0 commit comments

Comments
 (0)