diff --git a/docs/bigquery/snippets.py b/docs/bigquery/snippets.py index f4811f4a3e7c..19892b1aa538 100644 --- a/docs/bigquery/snippets.py +++ b/docs/bigquery/snippets.py @@ -26,6 +26,7 @@ import os import time +import mock import pytest import six try: @@ -90,6 +91,40 @@ def delete(self): self._wrapped.close() +def test_create_client_default_credentials(): + """Create a BigQuery client with Application Default Credentials""" + + # [START bigquery_client_default_credentials] + from google.cloud import bigquery + + # If you don't specify credentials when constructing the client, the + # client library will look for credentials in the environment. + client = bigquery.Client() + # [END bigquery_client_default_credentials] + + assert client is not None + + +def test_create_client_json_credentials(): + """Create a BigQuery client with Application Default Credentials""" + with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) as creds_file: + creds_file_data = creds_file.read() + + open_mock = mock.mock_open(read_data=creds_file_data) + + with mock.patch('io.open', open_mock): + # [START bigquery_client_json_credentials] + from google.cloud import bigquery + + # Explicitly use service account credentials by specifying the private + # key file. All clients in google-cloud-python have this helper. + client = bigquery.Client.from_service_account_json( + 'path/to/service_account.json') + # [END bigquery_client_json_credentials] + + assert client is not None + + def test_list_datasets(client): """List datasets for a project.""" # [START bigquery_list_datasets] @@ -816,7 +851,8 @@ def test_load_table_from_file(client, to_delete): job.result() # Waits for table load to complete. - print('Job {} completed.'.format(job.job_id)) + print('Loaded {} rows into {}:{}.'.format( + job.output_rows, dataset_id, table_id)) # [END bigquery_load_from_file] table = client.get_table(table_ref) @@ -1473,10 +1509,13 @@ def test_extract_table(client, to_delete): # from google.cloud import bigquery # client = bigquery.Client() # bucket_name = 'my-bucket' + project = 'bigquery-public-data' + dataset_id = 'samples' + table_id = 'shakespeare' destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.csv') - dataset_ref = client.dataset('samples', project='bigquery-public-data') - table_ref = dataset_ref.table('shakespeare') + dataset_ref = client.dataset(dataset_id, project=project) + table_ref = dataset_ref.table(table_id) extract_job = client.extract_table( table_ref, @@ -1484,6 +1523,9 @@ def test_extract_table(client, to_delete): # Location must match that of the source table. location='US') # API request extract_job.result() # Waits for job to complete. + + print('Exported {}:{}.{} to {}'.format( + project, dataset_id, table_id, destination_uri)) # [END bigquery_extract_table] blob = bucket.get_blob('shakespeare.csv') @@ -1611,9 +1653,39 @@ def test_client_query(client): for row in query_job: # API request - fetches results # Row values can be accessed by field name or index assert row[0] == row.name == row['name'] + print(row) # [END bigquery_query] +def test_client_query_standard_sql(client): + """Run a query with Standard SQL explicitly set""" + # [START bigquery_query_standard] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = ( + 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' + 'WHERE state = "TX" ' + 'LIMIT 100') + + # Set use_legacy_sql to False to use standard SQL syntax. + # Note that queries run through the Python Client Library are set to use + # standard SQL by default. + job_config = bigquery.QueryJobConfig() + job_config.use_legacy_sql = False + + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results. + for row in query_job: # API request - fetches results + print(row) + # [END bigquery_query_standard] + + def test_client_query_destination_table(client, to_delete): """Run a query""" dataset_id = 'query_destination_table_{}'.format(_millis()) @@ -1638,7 +1710,7 @@ def test_client_query_destination_table(client, to_delete): # The write_disposition specifies the behavior when writing query results # to a table that already exists. With WRITE_TRUNCATE, any existing rows # in the table are overwritten by the query results. - job_config.write_disposition = 'WRITE_TRUNCATE' + job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE # Start the query, passing in the extra configuration. query_job = client.query( @@ -1709,10 +1781,10 @@ def test_client_query_destination_table_cmek(client, to_delete): # [END bigquery_query_destination_table_cmek] -def test_client_query_w_params(client): - """Run a query using a query parameter""" +def test_client_query_w_named_params(client, capsys): + """Run a query using named query parameters""" - # [START bigquery_query_params] + # [START bigquery_query_params_named] # from google.cloud import bigquery # client = bigquery.Client() @@ -1723,12 +1795,84 @@ def test_client_query_w_params(client): AND word_count >= @min_word_count ORDER BY word_count DESC; """ - corpus = 'hamlet' - min_word_count = 100 query_params = [ - bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), + bigquery.ScalarQueryParameter('corpus', 'STRING', 'romeoandjuliet'), + bigquery.ScalarQueryParameter('min_word_count', 'INT64', 250) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results + for row in query_job: + print('{}: \t{}'.format(row.word, row.word_count)) + + assert query_job.state == 'DONE' + # [END bigquery_query_params_named] + + out, _ = capsys.readouterr() + assert 'the' in out + + +def test_client_query_w_positional_params(client, capsys): + """Run a query using query parameters""" + + # [START bigquery_query_params_positional] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = """ + SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = ? + AND word_count >= ? + ORDER BY word_count DESC; + """ + # Set the name to None to use positional parameters. + # Note that you cannot mix named and positional parameters. + query_params = [ + bigquery.ScalarQueryParameter(None, 'STRING', 'romeoandjuliet'), + bigquery.ScalarQueryParameter(None, 'INT64', 250) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results + for row in query_job: + print('{}: \t{}'.format(row.word, row.word_count)) + + assert query_job.state == 'DONE' + # [END bigquery_query_params_positional] + + out, _ = capsys.readouterr() + assert 'the' in out + + +def test_client_query_w_timestamp_params(client, capsys): + """Run a query using query parameters""" + + # [START bigquery_query_params_timestamps] + # from google.cloud import bigquery + # client = bigquery.Client() + + import datetime + import pytz + + query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' + query_params = [ bigquery.ScalarQueryParameter( - 'min_word_count', 'INT64', min_word_count) + 'ts_value', + 'TIMESTAMP', + datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC)) ] job_config = bigquery.QueryJobConfig() job_config.query_parameters = query_params @@ -1738,14 +1882,88 @@ def test_client_query_w_params(client): location='US', job_config=job_config) # API request - starts the query - # Waits for the query to finish - timeout = 30 # in seconds - iterator = query_job.result(timeout=timeout) - rows = list(iterator) + # Print the results + for row in query_job: + print(row) assert query_job.state == 'DONE' - assert len(rows) > 0 - # [END bigquery_query_params] + # [END bigquery_query_params_timestamps] + + out, _ = capsys.readouterr() + assert '2016, 12, 7, 9, 0' in out + + +def test_client_query_w_array_params(client, capsys): + """Run a query using array query parameters""" + # [START bigquery_query_params_arrays] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = """ + SELECT name, sum(number) as count + FROM `bigquery-public-data.usa_names.usa_1910_2013` + WHERE gender = @gender + AND state IN UNNEST(@states) + GROUP BY name + ORDER BY count DESC + LIMIT 10; + """ + query_params = [ + bigquery.ScalarQueryParameter('gender', 'STRING', 'M'), + bigquery.ArrayQueryParameter( + 'states', 'STRING', ['WA', 'WI', 'WV', 'WY']) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results + for row in query_job: + print('{}: \t{}'.format(row.name, row.count)) + + assert query_job.state == 'DONE' + # [END bigquery_query_params_arrays] + + out, _ = capsys.readouterr() + assert 'James' in out + + +def test_client_query_w_struct_params(client, capsys): + """Run a query using struct query parameters""" + # [START bigquery_query_params_structs] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = 'SELECT @struct_value AS s;' + query_params = [ + bigquery.StructQueryParameter( + 'struct_value', + bigquery.ScalarQueryParameter('x', 'INT64', 1), + bigquery.ScalarQueryParameter('y', 'STRING', 'foo') + ) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query( + query, + # Location must match that of the dataset(s) referenced in the query. + location='US', + job_config=job_config) # API request - starts the query + + # Print the results + for row in query_job: + print(row.s) + + assert query_job.state == 'DONE' + # [END bigquery_query_params_structs] + + out, _ = capsys.readouterr() + assert '1' in out + assert 'foo' in out def test_client_query_dry_run(client): diff --git a/docs/bigquery/usage.rst b/docs/bigquery/usage.rst index ae94c402bd5e..a412ed94f721 100644 --- a/docs/bigquery/usage.rst +++ b/docs/bigquery/usage.rst @@ -344,8 +344,8 @@ See BigQuery documentation for more information on `parameterized queries `_. .. literalinclude:: snippets.py - :start-after: [START bigquery_query_params] - :end-before: [END bigquery_query_params] + :start-after: [START bigquery_query_params_named] + :end-before: [END bigquery_query_params_named] List jobs for a project