From a862bd664e1df7192a5006c33da7dd491c1867e4 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 13 Apr 2018 10:56:08 -0700 Subject: [PATCH 1/2] sample update and region tag standardization --- bigquery/nox.py | 2 +- bigquery/tests/data/people.csv | 3 + docs/bigquery/snippets.py | 318 ++++++++++++++++----------------- docs/bigquery/usage.rst | 55 ++---- 4 files changed, 171 insertions(+), 207 deletions(-) create mode 100644 bigquery/tests/data/people.csv diff --git a/bigquery/nox.py b/bigquery/nox.py index 07c5ea80408f..6ce3a89d9b3c 100644 --- a/bigquery/nox.py +++ b/bigquery/nox.py @@ -139,7 +139,7 @@ def snippets(session, py): os.path.join('..', 'storage'), os.path.join('..', 'test_utils'), ) - session.install('-e', '.') + session.install('-e', '.[pandas]') # Run py.test against the system tests. session.run( diff --git a/bigquery/tests/data/people.csv b/bigquery/tests/data/people.csv new file mode 100644 index 000000000000..d3c7d063892a --- /dev/null +++ b/bigquery/tests/data/people.csv @@ -0,0 +1,3 @@ +full_name,age +Phred Phlyntstone,32 +Wylma Phlyntstone,29 \ No newline at end of file diff --git a/docs/bigquery/snippets.py b/docs/bigquery/snippets.py index a2fd50dfb97b..9088fb444a2a 100644 --- a/docs/bigquery/snippets.py +++ b/docs/bigquery/snippets.py @@ -23,6 +23,7 @@ need to be deleted during teardown. """ +import os import time import pytest @@ -92,7 +93,9 @@ def delete(self): def test_list_datasets(client): """List datasets for a project.""" # [START bigquery_list_datasets] + # from google.cloud import bigquery # client = bigquery.Client() + datasets = list(client.list_datasets()) project = client.project @@ -110,6 +113,7 @@ def test_create_dataset(client, to_delete): dataset_id = 'create_dataset_{}'.format(_millis()) # [START bigquery_create_dataset] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' @@ -143,6 +147,7 @@ def test_get_dataset_information(client, to_delete): to_delete.append(dataset) # [START bigquery_get_dataset] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' @@ -214,6 +219,7 @@ def test_update_dataset_description(client, to_delete): to_delete.append(dataset) # [START bigquery_update_dataset_description] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_ref = client.dataset('my_dataset') # dataset = client.get_dataset(dataset_ref) # API request @@ -235,6 +241,7 @@ def test_update_dataset_default_table_expiration(client, to_delete): to_delete.append(dataset) # [START bigquery_update_dataset_expiration] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_ref = client.dataset('my_dataset') # dataset = client.get_dataset(dataset_ref) # API request @@ -250,33 +257,27 @@ def test_update_dataset_default_table_expiration(client, to_delete): # [END bigquery_update_dataset_expiration] -def test_update_dataset_multiple_properties(client, to_delete): +def test_update_dataset_labels(client, to_delete): """Update a dataset's metadata.""" dataset_id = 'update_dataset_multiple_properties_{}'.format(_millis()) dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset = client.create_dataset(dataset) to_delete.append(dataset) - # [START bigquery_update_dataset_multiple_properties] + # [START bigquery_label_dataset] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_ref = client.dataset('my_dataset') # dataset = client.get_dataset(dataset_ref) # API request - assert dataset.default_table_expiration_ms is None assert dataset.labels == {} - one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds labels = {'color': 'green'} dataset.labels = labels - dataset.default_table_expiration_ms = one_day_ms - dataset = client.update_dataset( - dataset, - ['labels', 'default_table_expiration_ms'] - ) # API request + dataset = client.update_dataset(dataset, ['labels']) # API request - assert dataset.default_table_expiration_ms == one_day_ms assert dataset.labels == labels - # [END bigquery_update_dataset_multiple_properties] + # [END bigquery_label_dataset] def test_update_dataset_access(client, to_delete): @@ -287,6 +288,7 @@ def test_update_dataset_access(client, to_delete): to_delete.append(dataset) # [START bigquery_update_dataset_access] + # from google.cloud import bigquery # client = bigquery.Client() # dataset = client.get_dataset(client.dataset('my_dataset')) @@ -321,6 +323,7 @@ def test_delete_dataset(client): client.create_table(table) # [START bigquery_delete_dataset] + # from google.cloud import bigquery # client = bigquery.Client() # Delete a dataset that does not contain any tables @@ -351,6 +354,7 @@ def test_list_tables(client, to_delete): to_delete.append(dataset) # [START bigquery_list_tables] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_ref = client.dataset('my_dataset') @@ -378,6 +382,7 @@ def test_create_table(client, to_delete): to_delete.append(dataset) # [START bigquery_create_table] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_ref = client.dataset('my_dataset') @@ -404,6 +409,7 @@ def test_create_table_then_add_schema(client, to_delete): to_delete.append(dataset) # [START bigquery_create_table_without_schema] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_ref = client.dataset('my_dataset') @@ -417,6 +423,7 @@ def test_create_table_then_add_schema(client, to_delete): to_delete.insert(0, table) # [START bigquery_add_schema_to_empty] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' @@ -440,6 +447,9 @@ def test_create_table_cmek(client, to_delete): to_delete.append(dataset) # [START bigquery_create_table_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + table_ref = dataset.table('my_table') table = bigquery.Table(table_ref) @@ -456,27 +466,6 @@ def test_create_table_cmek(client, to_delete): # [END bigquery_create_table_cmek] -def test_get_table(client, to_delete): - """Reload a table's metadata.""" - DATASET_ID = 'get_table_dataset_{}'.format(_millis()) - TABLE_ID = 'get_table_table_{}'.format(_millis()) - dataset = bigquery.Dataset(client.dataset(DATASET_ID)) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(TABLE_ID), schema=SCHEMA) - table.description = ORIGINAL_DESCRIPTION - table = client.create_table(table) - to_delete.insert(0, table) - - # [START get_table] - assert table.description == ORIGINAL_DESCRIPTION - table.description = LOCALLY_CHANGED_DESCRIPTION - table = client.get_table(table) # API request - assert table.description == ORIGINAL_DESCRIPTION - # [END get_table] - - def test_get_table_information(client, to_delete): """Show a table's properties.""" dataset_id = 'show_table_dataset_{}'.format(_millis()) @@ -492,6 +481,7 @@ def test_get_table_information(client, to_delete): to_delete.insert(0, table) # [START bigquery_get_table] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' # table_id = 'my_table' @@ -565,6 +555,7 @@ def test_update_table_description(client, to_delete): to_delete.insert(0, table) # [START bigquery_update_table_description] + # from google.cloud import bigquery # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('my_table') # table = client.get_table(table_ref) # API request @@ -591,13 +582,14 @@ def test_update_table_expiration(client, to_delete): to_delete.insert(0, table) # [START bigquery_update_table_expiration] - import datetime - import pytz - + # from google.cloud import bigquery # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('my_table') # table = client.get_table(table_ref) # API request + import datetime + import pytz + assert table.expires is None # set table to expire 5 days from now @@ -624,6 +616,7 @@ def test_add_empty_column(client, to_delete): to_delete.insert(0, table) # [START bigquery_add_empty_column] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' # table_id = 'my_table' @@ -651,9 +644,11 @@ def test_relax_column(client, to_delete): to_delete.append(dataset) # [START bigquery_relax_column] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' # table_id = 'my_table' + original_schema = [ bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'), bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'), @@ -678,42 +673,6 @@ def test_relax_column(client, to_delete): to_delete.insert(0, table) -def test_update_table_multiple_properties(client, to_delete): - """Update a table's metadata.""" - dataset_id = 'update_table_multiple_properties_dataset_{}'.format( - _millis()) - table_id = 'update_table_multiple_properties_table_{}'.format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset.description = 'Original description' - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table.friendly_name = 'Original friendly name' - table.description = 'Original description' - table = client.create_table(table) - to_delete.insert(0, table) - - # [START bigquery_update_table_multiple_properties] - assert table.friendly_name == 'Original friendly name' - assert table.description == 'Original description' - - new_schema = list(table.schema) - new_schema.append(bigquery.SchemaField('phone', 'STRING')) - table.friendly_name = 'Updated friendly name' - table.description = 'Updated description' - table.schema = new_schema - table = client.update_table( - table, - ['schema', 'friendly_name', 'description'] - ) # API request - - assert table.friendly_name == 'Updated friendly name' - assert table.description == 'Updated description' - assert table.schema == new_schema - # [END bigquery_update_table_multiple_properties] - - def test_update_table_cmek(client, to_delete): """Patch a table's metadata.""" dataset_id = 'update_table_cmek_{}'.format(_millis()) @@ -732,6 +691,9 @@ def test_update_table_cmek(client, to_delete): to_delete.insert(0, table) # [START bigquery_update_table_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + assert table.encryption_configuration.kms_key_name == original_kms_key_name # Set a new encryption key to use for the destination. @@ -754,7 +716,9 @@ def test_browse_table_data(client, to_delete, capsys): """Retreive selected row data from a table.""" # [START bigquery_browse_table] + # from google.cloud import bigquery # client = bigquery.Client() + dataset_ref = client.dataset('samples', project='bigquery-public-data') table_ref = dataset_ref.table('shakespeare') table = client.get_table(table_ref) # API call @@ -803,6 +767,9 @@ def test_table_insert_rows(client, to_delete): to_delete.insert(0, table) # [START bigquery_table_insert_rows] + # from google.cloud import bigquery + # client = bigquery.Client() + rows_to_insert = [ (u'Phred Phlyntstone', 32), (u'Wylma Phlyntstone', 29), @@ -816,56 +783,47 @@ def test_table_insert_rows(client, to_delete): def test_load_table_from_file(client, to_delete): """Upload table data from a CSV file.""" - DATASET_ID = 'table_upload_from_file_dataset_{}'.format(_millis()) - TABLE_ID = 'table_upload_from_file_table_{}'.format(_millis()) - dataset = bigquery.Dataset(client.dataset(DATASET_ID)) + dataset_id = 'table_upload_from_file_dataset_{}'.format(_millis()) + table_id = 'table_upload_from_file_table_{}'.format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) dataset.location = 'US' client.create_dataset(dataset) to_delete.append(dataset) + snippets_directory = os.path.abspath(os.path.dirname(__file__)) + filename = os.path.join( + snippets_directory, '../../bigquery/tests/data', 'people.csv') - # [START load_table_from_file] - csv_file = six.BytesIO(b"""full_name,age -Phred Phlyntstone,32 -Wylma Phlyntstone,29 -""") + # [START bigquery_load_from_file] + # from google.cloud import bigquery + # client = bigquery.Client() + # filename = '/path/to/file.csv' + # dataset_id = 'my_dataset' + # table_id = 'my_table' - table_ref = dataset.table(TABLE_ID) + dataset_ref = client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) job_config = bigquery.LoadJobConfig() - job_config.source_format = 'CSV' + job_config.source_format = bigquery.SourceFormat.CSV job_config.skip_leading_rows = 1 job_config.autodetect = True - job = client.load_table_from_file( - csv_file, - table_ref, - location='US', # Location must match that of the destination dataset. - job_config=job_config) # API request - job.result() # Waits for table load to complete. - # [END load_table_from_file] - - table = client.get_table(table_ref) - to_delete.insert(0, table) - found_rows = [] - def do_something(row): - found_rows.append(row) + with open(filename, 'rb') as source_file: + job = client.load_table_from_file( + source_file, + table_ref, + location='US', # Must match the destination dataset location. + job_config=job_config) # API request - # [START table_list_rows] - for row in client.list_rows(table): # API request - do_something(row) - # [END table_list_rows] + job.result() # Waits for table load to complete. - assert len(found_rows) == 2 + print('Job {} completed.'.format(job.job_id)) + # [END bigquery_load_from_file] - # [START table_list_rows_iterator_properties] - iterator = client.list_rows(table) # API request - page = six.next(iterator.pages) - rows = list(page) - total = iterator.total_rows - token = iterator.next_page_token - # [END table_list_rows_iterator_properties] + table = client.get_table(table_ref) + to_delete.insert(0, table) + rows = list(client.list_rows(table)) # API request - assert len(rows) == total == 2 - assert token is None + assert len(rows) == 2 # Order is not preserved, so compare individually row1 = bigquery.Row(('Wylma Phlyntstone', 29), {'full_name': 0, 'age': 1}) assert row1 in rows @@ -880,8 +838,10 @@ def test_load_table_from_uri_csv(client, to_delete): to_delete.append(dataset) # [START bigquery_load_table_gcs_csv] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' + dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.schema = [ @@ -914,8 +874,10 @@ def test_load_table_from_uri_json(client, to_delete): to_delete.append(dataset) # [START bigquery_load_table_gcs_json] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' + dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.schema = [ @@ -947,7 +909,10 @@ def test_load_table_from_uri_cmek(client, to_delete): to_delete.append(dataset) # [START bigquery_load_table_gcs_json_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() # dataset_id = 'my_dataset' + dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.autodetect = True @@ -984,8 +949,10 @@ def test_load_table_from_uri_parquet(client, to_delete): to_delete.append(dataset) # [START bigquery_load_table_gcs_parquet] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' + dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.source_format = bigquery.SourceFormat.PARQUET @@ -1011,8 +978,10 @@ def test_load_table_from_uri_csv_autodetect(client, to_delete): to_delete.append(dataset) # [START bigquery_load_table_gcs_csv_autodetect] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' + dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.autodetect = True @@ -1042,8 +1011,10 @@ def test_load_table_from_uri_json_autodetect(client, to_delete): to_delete.append(dataset) # [START bigquery_load_table_gcs_json_autodetect] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' + dataset_ref = client.dataset(dataset_id) job_config = bigquery.LoadJobConfig() job_config.autodetect = True @@ -1081,8 +1052,10 @@ def test_load_table_from_uri_csv_append(client, to_delete): body, table_ref, job_config=job_config).result() # [START bigquery_load_table_gcs_csv_append] + # from google.cloud import bigquery # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('existing_table') + previous_rows = client.get_table(table_ref).num_rows assert previous_rows > 0 @@ -1129,8 +1102,10 @@ def test_load_table_from_uri_json_append(client, to_delete): job_config=job_config).result() # [START bigquery_load_table_gcs_json_append] + # from google.cloud import bigquery # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('existing_table') + previous_rows = client.get_table(table_ref).num_rows job_config = bigquery.LoadJobConfig() job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON @@ -1168,8 +1143,10 @@ def test_load_table_from_uri_parquet_append(client, to_delete): body, table_ref, job_config=job_config).result() # [START bigquery_load_table_gcs_parquet_append] + # from google.cloud import bigquery # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('existing_table') + previous_rows = client.get_table(table_ref).num_rows job_config = bigquery.LoadJobConfig() job_config.source_format = bigquery.SourceFormat.PARQUET @@ -1207,8 +1184,10 @@ def test_load_table_from_uri_csv_truncate(client, to_delete): body, table_ref, job_config=job_config).result() # [START bigquery_load_table_gcs_csv_truncate] + # from google.cloud import bigquery # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('existing_table') + previous_rows = client.get_table(table_ref).num_rows assert previous_rows > 0 @@ -1253,8 +1232,10 @@ def test_load_table_from_uri_json_truncate(client, to_delete): job_config=job_config).result() # [START bigquery_load_table_gcs_json_truncate] + # from google.cloud import bigquery # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('existing_table') + previous_rows = client.get_table(table_ref).num_rows assert previous_rows > 0 @@ -1294,8 +1275,10 @@ def test_load_table_from_uri_parquet_truncate(client, to_delete): body, table_ref, job_config=job_config).result() # [START bigquery_load_table_gcs_parquet_truncate] + # from google.cloud import bigquery # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('existing_table') + previous_rows = client.get_table(table_ref).num_rows assert previous_rows > 0 @@ -1350,7 +1333,9 @@ def test_copy_table(client, to_delete): to_delete.append(dest_dataset) # [START bigquery_copy_table] + # from google.cloud import bigquery # client = bigquery.Client() + source_dataset = client.dataset('samples', project='bigquery-public-data') source_table_ref = source_dataset.table('shakespeare') @@ -1407,9 +1392,11 @@ def test_copy_table_multiple_source(client, to_delete): job_config=job_config).result() # [START bigquery_copy_table_multiple_source] + # from google.cloud import bigquery # client = bigquery.Client() # source_dataset_id = 'my_source_dataset' # dest_dataset_id = 'my_destination_dataset' + table1_ref = client.dataset(source_dataset_id).table('table1') table2_ref = client.dataset(source_dataset_id).table('table2') dest_table_ref = client.dataset(dest_dataset_id).table('destination_table') @@ -1438,6 +1425,9 @@ def test_copy_table_cmek(client, to_delete): to_delete.append(dest_dataset) # [START bigquery_copy_table_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + source_dataset = bigquery.DatasetReference( 'bigquery-public-data', 'samples') source_table_ref = source_dataset.table('shakespeare') @@ -1480,8 +1470,10 @@ def test_extract_table(client, to_delete): to_delete.append(bucket) # [START bigquery_extract_table] + # from google.cloud import bigquery # client = bigquery.Client() # bucket_name = 'my-bucket' + destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.csv') dataset_ref = client.dataset('samples', project='bigquery-public-data') table_ref = dataset_ref.table('shakespeare') @@ -1509,8 +1501,10 @@ def test_extract_table_json(client, to_delete): to_delete.append(bucket) # [START bigquery_extract_table_json] + # from google.cloud import bigquery # client = bigquery.Client() # bucket_name = 'my-bucket' + destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.json') dataset_ref = client.dataset('samples', project='bigquery-public-data') table_ref = dataset_ref.table('shakespeare') @@ -1542,8 +1536,10 @@ def test_extract_table_compressed(client, to_delete): to_delete.append(bucket) # [START bigquery_extract_table_compressed] + # from google.cloud import bigquery # client = bigquery.Client() # bucket_name = 'my-bucket' + destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.csv.gz') dataset_ref = client.dataset('samples', project='bigquery-public-data') table_ref = dataset_ref.table('shakespeare') @@ -1581,6 +1577,7 @@ def test_delete_table(client, to_delete): table = bigquery.Table(table_ref, schema=SCHEMA) client.create_table(table) # [START bigquery_delete_table] + # from google.cloud import bigquery # client = bigquery.Client() # dataset_id = 'my_dataset' # table_id = 'my_table' @@ -1595,48 +1592,26 @@ def test_delete_table(client, to_delete): client.get_table(table) # API request -def test_client_simple_query(client): +def test_client_query(client): """Run a simple query.""" - # [START client_simple_query] - QUERY = ( + # [START bigquery_query] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = ( 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' 'WHERE state = "TX" ' 'LIMIT 100') query_job = client.query( - QUERY, + query, # Location must match that of the dataset(s) referenced in the query. location='US') # API request - starts the query for row in query_job: # API request - fetches results # Row values can be accessed by field name or index assert row[0] == row.name == row['name'] - # [END client_simple_query] - - -def test_client_query(client): - """Run a query""" - - # [START client_query] - query_str = ( - 'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` ' - 'WHERE state = "TX" ' - 'LIMIT 100') - query_job = client.query( - query_str, - # Location must match that of the dataset(s) referenced in the query. - location='US') # API request - starts the query - - # Waits for the query to finish - timeout = 30 # in seconds - iterator = query_job.result(timeout=timeout) - rows = list(iterator) - - assert query_job.state == 'DONE' - assert len(rows) == 100 - row = rows[0] - assert row[0] == row.name == row['name'] - # [END client_query] + # [END bigquery_query] def test_client_query_destination_table(client, to_delete): @@ -1650,7 +1625,9 @@ def test_client_query_destination_table(client, to_delete): to_delete.insert(0, dataset_ref.table('your_table_id')) # [START bigquery_query_destination_table] + # from google.cloud import bigquery # client = bigquery.Client() + job_config = bigquery.QueryJobConfig() # Set the destination table. Here, dataset_id is a string, such as: @@ -1699,6 +1676,9 @@ def test_client_query_destination_table_cmek(client, to_delete): to_delete.insert(0, dataset_ref.table('your_table_id')) # [START bigquery_query_destination_table_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + job_config = bigquery.QueryJobConfig() # Set the destination table. Here, dataset_id is a string, such as: @@ -1729,20 +1709,31 @@ def test_client_query_destination_table_cmek(client, to_delete): # [END bigquery_query_destination_table_cmek] -def test_client_query_w_param(client): +def test_client_query_w_params(client): """Run a query using a query parameter""" - # [START client_query_w_param] - query_w_param = ( - 'SELECT name, state ' - 'FROM `bigquery-public-data.usa_names.usa_1910_2013` ' - 'WHERE state = @state ' - 'LIMIT 100') - param = bigquery.ScalarQueryParameter('state', 'STRING', 'TX') + # [START bigquery_query_params] + # from google.cloud import bigquery + # client = bigquery.Client() + + query = """ + SELECT word, word_count + FROM `bigquery-public-data.samples.shakespeare` + WHERE corpus = @corpus + AND word_count >= @min_word_count + ORDER BY word_count DESC; + """ + corpus = 'hamlet' + min_word_count = 100 + query_params = [ + bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), + bigquery.ScalarQueryParameter( + 'min_word_count', 'INT64', min_word_count) + ] job_config = bigquery.QueryJobConfig() - job_config.query_parameters = [param] + job_config.query_parameters = query_params query_job = client.query( - query_w_param, + query, # Location must match that of the dataset(s) referenced in the query. location='US', job_config=job_config) # API request - starts the query @@ -1753,11 +1744,8 @@ def test_client_query_w_param(client): rows = list(iterator) assert query_job.state == 'DONE' - assert len(rows) == 100 - row = rows[0] - assert row[0] == row.name == row['name'] - assert row.state == 'TX' - # [END client_query_w_param] + assert len(rows) > 0 + # [END bigquery_query_params] def test_client_query_dry_run(client): @@ -1766,6 +1754,7 @@ def test_client_query_dry_run(client): # [START bigquery_query_dry_run] # from google.cloud import bigquery # client = bigquery.Client() + job_config = bigquery.QueryJobConfig() job_config.dry_run = True job_config.use_query_cache = False @@ -1792,21 +1781,26 @@ def test_client_query_dry_run(client): def test_client_list_jobs(client): """List jobs for a project.""" - def do_something_with(_): - pass + # [START bigquery_list_jobs] + # from google.cloud import bigquery + # client = bigquery.Client(project='my_project') + + # List all accessible jobs in a project + for job in client.list_jobs(): # API request(s) + print(job.job_id) - # [START client_list_jobs] - job_iterator = client.list_jobs( - max_results=10) # Optionally, limit the results to 10 jobs. - for job in job_iterator: # API request(s) happen when iterating - do_something_with(job) - # [END client_list_jobs] + # Optionally, limit the results to 10 jobs + for job in client.list_jobs(max_results=10): # API request(s) + print(job.job_id) + # [END bigquery_list_jobs] @pytest.mark.skipif(pandas is None, reason='Requires `pandas`') def test_query_results_as_dataframe(client): # [START bigquery_query_results_dataframe] + # from google.cloud import bigquery # client = bigquery.Client() + sql = """ SELECT name, SUM(number) as count FROM `bigquery-public-data.usa_names.usa_1910_current` @@ -1825,7 +1819,9 @@ def test_query_results_as_dataframe(client): @pytest.mark.skipif(pandas is None, reason='Requires `pandas`') def test_list_rows_as_dataframe(client): # [START bigquery_list_rows_dataframe] + # from google.cloud import bigquery # client = bigquery.Client() + dataset_ref = client.dataset('samples', project='bigquery-public-data') table_ref = dataset_ref.table('shakespeare') table = client.get_table(table_ref) diff --git a/docs/bigquery/usage.rst b/docs/bigquery/usage.rst index b8008301b04c..ae94c402bd5e 100644 --- a/docs/bigquery/usage.rst +++ b/docs/bigquery/usage.rst @@ -114,12 +114,6 @@ Update a property in a dataset's metadata: :start-after: [START bigquery_update_dataset_description] :end-before: [END bigquery_update_dataset_description] -Update multiple properties in a dataset's metadata: - -.. literalinclude:: snippets.py - :start-after: [START bigquery_update_dataset_multiple_properties] - :end-before: [END bigquery_update_dataset_multiple_properties] - Modify user permissions on a dataset: .. literalinclude:: snippets.py @@ -165,30 +159,12 @@ Update a property in a table's metadata: :start-after: [START bigquery_update_table_description] :end-before: [END bigquery_update_table_description] -Update multiple properties in a table's metadata: - -.. literalinclude:: snippets.py - :start-after: [START bigquery_update_table_multiple_properties] - :end-before: [END bigquery_update_table_multiple_properties] - -Get rows from a table's data: - -.. literalinclude:: snippets.py - :start-after: [START table_list_rows] - :end-before: [END table_list_rows] - Browse selected rows in a table: .. literalinclude:: snippets.py :start-after: [START bigquery_browse_table] :end-before: [END bigquery_browse_table] -Utilize iterator properties returned with row data: - -.. literalinclude:: snippets.py - :start-after: [START table_list_rows_iterator_properties] - :end-before: [END table_list_rows_iterator_properties] - Insert rows into a table's data: .. literalinclude:: snippets.py @@ -216,8 +192,8 @@ Delete a table: Upload table data from a file: .. literalinclude:: snippets.py - :start-after: [START load_table_from_file] - :end-before: [END load_table_from_file] + :start-after: [START bigquery_load_from_file] + :end-before: [END bigquery_load_from_file] Load table data from Google Cloud Storage ***************************************** @@ -332,27 +308,15 @@ Queries ------- -Run a simple query +Querying data ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Run a query and wait for it to finish: .. literalinclude:: snippets.py - :start-after: [START client_simple_query] - :end-before: [END client_simple_query] - - -Querying data -~~~~~~~~~~~~~ - -.. literalinclude:: snippets.py - :start-after: [START client_query] - :end-before: [END client_query] + :start-after: [START bigquery_query] + :end-before: [END bigquery_query] -.. note:: - - - Use of the ``timeout`` parameter is optional. The query will continue to - run in the background even if it takes longer the timeout allowed. Run a dry run query ~~~~~~~~~~~~~~~~~~~ @@ -361,6 +325,7 @@ Run a dry run query :start-after: [START bigquery_query_dry_run] :end-before: [END bigquery_query_dry_run] + Writing query results to a destination table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -379,8 +344,8 @@ See BigQuery documentation for more information on `parameterized queries `_. .. literalinclude:: snippets.py - :start-after: [START client_query_w_param] - :end-before: [END client_query_w_param] + :start-after: [START bigquery_query_params] + :end-before: [END bigquery_query_params] List jobs for a project @@ -394,8 +359,8 @@ Jobs describe actions performed on data in BigQuery tables: - Copy a table .. literalinclude:: snippets.py - :start-after: [START client_list_jobs] - :end-before: [END client_list_jobs] + :start-after: [START bigquery_list_jobs] + :end-before: [END bigquery_list_jobs] Using BigQuery with Pandas From d7dc4827a81bc7f57fa25f2ddffd6715843c7081 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Fri, 13 Apr 2018 12:36:40 -0700 Subject: [PATCH 2/2] fixes per comments --- docs/bigquery/snippets.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/bigquery/snippets.py b/docs/bigquery/snippets.py index 9088fb444a2a..f4811f4a3e7c 100644 --- a/docs/bigquery/snippets.py +++ b/docs/bigquery/snippets.py @@ -582,14 +582,14 @@ def test_update_table_expiration(client, to_delete): to_delete.insert(0, table) # [START bigquery_update_table_expiration] + import datetime + import pytz + # from google.cloud import bigquery # client = bigquery.Client() # table_ref = client.dataset('my_dataset').table('my_table') # table = client.get_table(table_ref) # API request - import datetime - import pytz - assert table.expires is None # set table to expire 5 days from now @@ -789,9 +789,9 @@ def test_load_table_from_file(client, to_delete): dataset.location = 'US' client.create_dataset(dataset) to_delete.append(dataset) - snippets_directory = os.path.abspath(os.path.dirname(__file__)) + snippets_dir = os.path.abspath(os.path.dirname(__file__)) filename = os.path.join( - snippets_directory, '../../bigquery/tests/data', 'people.csv') + snippets_dir, '..', '..', 'bigquery', 'tests', 'data', 'people.csv') # [START bigquery_load_from_file] # from google.cloud import bigquery