Added a snippet to show how to read a newline-delimited-json file and store it in a Table (googleapis#2974)

happyhuman · pongad · commit 848245e10efc · 2018-03-12T11:28:09.000+11:00
diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQuery.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQuery.java
@@ -522,7 +522,7 @@ public int hashCode() {
    * } catch (BigQueryException e) {
    *   // the dataset was not created
    * }
-   * } </pre>
+   * }</pre>
    *
    * @throws BigQueryException upon failure
    */
@@ -538,7 +538,7 @@ public int hashCode() {
    * String fieldName = "string_field";
    * TableId tableId = TableId.of(datasetName, tableName);
    * // Table field definition
-   * Field field = Field.of(fieldName, Field.Type.string());
+   * Field field = Field.of(fieldName, LegacySQLTypeName.STRING);
    * // Table schema definition
    * Schema schema = Schema.of(field);
    * TableDefinition tableDefinition = StandardTableDefinition.of(schema);
@@ -553,6 +553,32 @@ public int hashCode() {
   /**
    * Creates a new job.
    *
+   * <p>Example of loading a newline-delimited-json file with textual fields from GCS to a table.
+   * <pre> {@code
+   * String datasetName = "my_dataset_name";
+   * String tableName = "my_table_name";
+   * String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
+   * TableId tableId = TableId.of(datasetName, tableName);
+   * // Table field definition
+   * Field[] fields = new Field[] {
+   *     Field.of("name", LegacySQLTypeName.STRING),
+   *     Field.of("post_abbr", LegacySQLTypeName.STRING)
+   * };
+   * // Table schema definition
+   * Schema schema = Schema.of(fields);
+   * LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri)
+   *     .setFormatOptions(FormatOptions.json())
+   *     .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
+   *     .setSchema(schema)
+   *     .build();
+   * // Load the table
+   * Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
+   * remoteLoadJob = remoteLoadJob.waitFor();
+   * // Check the table
+   * System.out.println("State: " + remoteLoadJob.getStatus().getState());
+   * return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
+   * }</pre>
+   *
    * <p>Example of creating a query job.
    * <pre> {@code
    * String query = "SELECT field FROM my_dataset_name.my_table_name";
@@ -861,8 +887,7 @@ public int hashCode() {
    * Lists the table's rows.
    *
    * <p>Example of listing table rows, specifying the page size.
-   *
-   * <pre>{@code
+   * <pre> {@code
    * String datasetName = "my_dataset_name";
    * String tableName = "my_table_name";
    * // This example reads the result 100 rows per RPC call. If there's no need to limit the number,
@@ -882,16 +907,15 @@ public int hashCode() {
    * Lists the table's rows.
    *
    * <p>Example of listing table rows, specifying the page size.
-   *
-   * <pre>{@code
+   * <pre> {@code
    * String datasetName = "my_dataset_name";
    * String tableName = "my_table_name";
    * TableId tableIdObject = TableId.of(datasetName, tableName);
    * // This example reads the result 100 rows per RPC call. If there's no need to limit the number,
    * // simply omit the option.
    * TableResult tableData =
    *     bigquery.listTableData(tableIdObject, TableDataListOption.pageSize(100));
-   * for (FieldValueList row : rowIterator.hasNext()) {
+   * for (FieldValueList row : tableData.iterateAll()) {
    *   // do something with the row
    * }
    * }</pre>
@@ -904,17 +928,16 @@ public int hashCode() {
    * Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the
    * {@link FieldValueList} iterated over.
    *
-   * <p>Example of listing table rows.
-   *
-   * <pre>{@code
+   * <p>Example of listing table rows with schema.
+   * <pre> {@code
    * String datasetName = "my_dataset_name";
    * String tableName = "my_table_name";
    * Schema schema = ...;
-   * String field = "my_field";
+   * String field = "field";
    * TableResult tableData =
    *     bigquery.listTableData(datasetName, tableName, schema);
    * for (FieldValueList row : tableData.iterateAll()) {
-   *   row.get(field)
+   *   row.get(field);
    * }
    * }</pre>
    *
@@ -927,9 +950,8 @@ TableResult listTableData(
    * Lists the table's rows. If the {@code schema} is not {@code null}, it is available to the
    * {@link FieldValueList} iterated over.
    *
-   * <p>Example of listing table rows.
-   *
-   * <pre>{@code
+   * <p>Example of listing table rows with schema.
+   * <pre> {@code
    * Schema schema =
    *     Schema.of(
    *         Field.of("word", LegacySQLTypeName.STRING),
@@ -1047,28 +1069,21 @@ TableResult listTableData(
    * queries. Since dry-run queries are not actually executed, there's no way to retrieve results.
    *
    * <p>Example of running a query.
-   *
-   * <pre>{@code
-   * String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare`";
-   * QueryJobConfiguration queryConfig = QueryJobConfiguration.of(query);
-   *
-   * // To run the legacy syntax queries use the following code instead:
-   * //   String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"
-   * //   QueryJobConfiguration queryConfig =
-   * //       QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
-   *
+   * <pre> {@code
+   * String query = "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]";
+   * QueryJobConfiguration queryConfig =
+   *     QueryJobConfiguration.newBuilder(query).setUseLegacySql(true).build();
    * for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
    *   // do something with the data
    * }
    * }</pre>
    *
    * <p>Example of running a query with query parameters.
-   *
-   * <pre>{@code
-   * String query =
-   *     "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > ?";
+   * <pre> {@code
+   * String query = "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount";
+   * // Note, standard SQL is required to use query parameters. Legacy SQL will not work.
    * QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query)
-   *     .addPositionalParameter(QueryParameterValue.int64(5))
+   *     .addNamedParameter("wordCount", QueryParameterValue.int64(5))
    *     .build();
    * for (FieldValueList row : bigquery.query(queryConfig).iterateAll()) {
    *   // do something with the data
@@ -1092,18 +1107,6 @@ TableResult query(QueryJobConfiguration configuration, JobOption... options)
    * <p>See {@link #query(QueryJobConfiguration, JobOption...)} for examples on populating a {@link
    * QueryJobConfiguration}.
    *
-   * <p>The recommended way to create a randomly generated JobId is the following:
-   *
-   * <pre>{@code
-   * JobId jobId = JobId.of();
-   * }</pre>
-   *
-   * For a user specified job id with an optional prefix use the following:
-   *
-   * <pre>{@code
-   * JobId jobId = JobId.of("my_prefix-my_unique_job_id");
-   * }</pre>
-   *
    * @throws BigQueryException upon failure
    * @throws InterruptedException if the current thread gets interrupted while waiting for the query
    *     to complete
diff --git a/google-cloud-examples/src/main/java/com/google/cloud/examples/bigquery/snippets/BigQuerySnippets.java b/google-cloud-examples/src/main/java/com/google/cloud/examples/bigquery/snippets/BigQuerySnippets.java
@@ -25,6 +25,8 @@
 import com.google.api.client.util.Charsets;
 import com.google.api.gax.paging.Page;
 import com.google.cloud.bigquery.BigQuery;
+import com.google.cloud.bigquery.JobInfo.CreateDisposition;
+import com.google.cloud.bigquery.LoadJobConfiguration;
 import com.google.cloud.bigquery.TableResult;
 import com.google.cloud.bigquery.BigQuery.DatasetDeleteOption;
 import com.google.cloud.bigquery.BigQuery.DatasetListOption;
@@ -378,6 +380,38 @@ public long writeFileToTable(String datasetName, String tableName, Path csvPath)
     // [END writeFileToTable]
   }
 
+  /**
+   * Example of loading a newline-delimited-json file with textual fields from GCS to a table.
+   */
+  // [TARGET create(JobInfo, JobOption...)]
+  // [VARIABLE "my_dataset_name"]
+  // [VARIABLE "my_table_name"]
+  public Long writeRemoteFileToTable(String datasetName, String tableName)
+      throws InterruptedException {
+    // [START bigquery_load_table_gcs_json]
+    String sourceUri = "gs://cloud-samples-data/bigquery/us-states/us-states.json";
+    TableId tableId = TableId.of(datasetName, tableName);
+    // Table field definition
+    Field[] fields = new Field[] {
+        Field.of("name", LegacySQLTypeName.STRING),
+        Field.of("post_abbr", LegacySQLTypeName.STRING)
+    };
+    // Table schema definition
+    Schema schema = Schema.of(fields);
+    LoadJobConfiguration configuration = LoadJobConfiguration.builder(tableId, sourceUri)
+        .setFormatOptions(FormatOptions.json())
+        .setCreateDisposition(CreateDisposition.CREATE_IF_NEEDED)
+        .setSchema(schema)
+        .build();
+    // Load the table
+    Job remoteLoadJob = bigquery.create(JobInfo.of(configuration));
+    remoteLoadJob = remoteLoadJob.waitFor();
+    // Check the table
+    System.out.println("State: " + remoteLoadJob.getStatus().getState());
+    return ((StandardTableDefinition) bigquery.getTable(tableId).getDefinition()).getNumRows();
+    // [END bigquery_load_table_gcs_json]
+  }
+
   /**
    * Example of inserting rows into a table without running a load job.
    */
@@ -470,7 +504,9 @@ public TableResult listTableDataFromId(String datasetName, String tableName) {
     return tableData;
   }
 
-  /** Example of listing table rows with schema. */
+  /**
+   * Example of listing table rows with schema.
+   */
   // [TARGET listTableData(String, String, Schema, TableDataListOption...)]
   // [VARIABLE "my_dataset_name"]
   // [VARIABLE "my_table_name"]
@@ -488,7 +524,9 @@ public TableResult listTableDataSchema(
     return tableData;
   }
 
-  /** Example of listing table rows with schema. */
+  /**
+   * Example of listing table rows with schema.
+   */
   // [TARGET listTableData(TableId, Schema, TableDataListOption...)]
   public FieldValueList listTableDataSchemaId() {
     // [START listTableDataSchemaId]
@@ -607,8 +645,10 @@ public boolean cancelJobFromId(String jobName) {
     return success;
   }
 
-  /** Example of running a query. */
-  // [TARGET query(QueryJobConfiguration, QueryOption...)]
+  /**
+   * Example of running a query.
+   */
+  // [TARGET query(QueryJobConfiguration, JobOption...)]
   // [VARIABLE "SELECT unique(corpus) FROM [bigquery-public-data:samples.shakespeare]"]
   public void runQuery(String query) throws InterruptedException {
     // [START runQuery]
@@ -620,10 +660,11 @@ public void runQuery(String query) throws InterruptedException {
     // [END runQuery]
   }
 
-  /** Example of running a query with query parameters. */
-  // [TARGET query(QueryJobConfiguration, QueryOption...)]
-  // [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where
-  // word_count > @wordCount"]
+  /**
+   * Example of running a query with query parameters.
+   */
+  // [TARGET query(QueryJobConfiguration, JobOption...)]
+  // [VARIABLE "SELECT distinct(corpus) FROM `bigquery-public-data.samples.shakespeare` where word_count > @wordCount"]
   public void runQueryWithParameters(String query) throws InterruptedException {
     // [START runQueryWithParameters]
     // Note, standard SQL is required to use query parameters. Legacy SQL will not work.
diff --git a/google-cloud-examples/src/test/java/com/google/cloud/examples/bigquery/snippets/ITBigQuerySnippets.java b/google-cloud-examples/src/test/java/com/google/cloud/examples/bigquery/snippets/ITBigQuerySnippets.java
@@ -20,6 +20,7 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import com.google.api.gax.paging.Page;
@@ -48,6 +49,7 @@
 import java.net.URISyntaxException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
@@ -197,6 +199,24 @@ public void testWriteAndListTableData()
     assertTrue(bigquerySnippets.deleteTable(DATASET, tableName));
   }
 
+  @Test
+  public void testWriteRemoteJsonToTable() throws InterruptedException {
+    String datasetName = "test_dataset";
+    String tableName = "us_states";
+    Table table = bigquery.getTable(datasetName, tableName);
+    assertNull(table);
+
+    Long result = bigquerySnippets.writeRemoteFileToTable(datasetName, tableName);
+    table = bigquery.getTable(datasetName, tableName);
+    assertNotNull(table);
+    ArrayList<String> tableFieldNames = new ArrayList<>();
+    for (Field field: table.getDefinition().getSchema().getFields()) {
+      tableFieldNames.add(field.getName());
+    }
+    bigquery.delete(table.getTableId());
+    assertEquals(Long.valueOf(50), result);
+  }
+
   @Test
   public void testInsertAllAndListTableData() throws IOException, InterruptedException {
     String tableName = "test_insert_all_and_list_table_data";