Skip to content

Commit a0b5c13

Browse files
authored
BigQuery: Adds samples from python-docs-samples (googleapis#5199)
* updates parameterized query variables to be consistent with command-line sample * adds query parameter samples * adds auth snippets * update extract and load per python-docs-samples * updates query samples per python-docs-samples
1 parent 9c745f2 commit a0b5c13

2 files changed

Lines changed: 237 additions & 19 deletions

File tree

docs/bigquery/snippets.py

Lines changed: 235 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import os
2727
import time
2828

29+
import mock
2930
import pytest
3031
import six
3132
try:
@@ -90,6 +91,40 @@ def delete(self):
9091
self._wrapped.close()
9192

9293

94+
def test_create_client_default_credentials():
95+
"""Create a BigQuery client with Application Default Credentials"""
96+
97+
# [START bigquery_client_default_credentials]
98+
from google.cloud import bigquery
99+
100+
# If you don't specify credentials when constructing the client, the
101+
# client library will look for credentials in the environment.
102+
client = bigquery.Client()
103+
# [END bigquery_client_default_credentials]
104+
105+
assert client is not None
106+
107+
108+
def test_create_client_json_credentials():
109+
"""Create a BigQuery client with Application Default Credentials"""
110+
with open(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) as creds_file:
111+
creds_file_data = creds_file.read()
112+
113+
open_mock = mock.mock_open(read_data=creds_file_data)
114+
115+
with mock.patch('io.open', open_mock):
116+
# [START bigquery_client_json_credentials]
117+
from google.cloud import bigquery
118+
119+
# Explicitly use service account credentials by specifying the private
120+
# key file. All clients in google-cloud-python have this helper.
121+
client = bigquery.Client.from_service_account_json(
122+
'path/to/service_account.json')
123+
# [END bigquery_client_json_credentials]
124+
125+
assert client is not None
126+
127+
93128
def test_list_datasets(client):
94129
"""List datasets for a project."""
95130
# [START bigquery_list_datasets]
@@ -816,7 +851,8 @@ def test_load_table_from_file(client, to_delete):
816851

817852
job.result() # Waits for table load to complete.
818853

819-
print('Job {} completed.'.format(job.job_id))
854+
print('Loaded {} rows into {}:{}.'.format(
855+
job.output_rows, dataset_id, table_id))
820856
# [END bigquery_load_from_file]
821857

822858
table = client.get_table(table_ref)
@@ -1473,17 +1509,23 @@ def test_extract_table(client, to_delete):
14731509
# from google.cloud import bigquery
14741510
# client = bigquery.Client()
14751511
# bucket_name = 'my-bucket'
1512+
project = 'bigquery-public-data'
1513+
dataset_id = 'samples'
1514+
table_id = 'shakespeare'
14761515

14771516
destination_uri = 'gs://{}/{}'.format(bucket_name, 'shakespeare.csv')
1478-
dataset_ref = client.dataset('samples', project='bigquery-public-data')
1479-
table_ref = dataset_ref.table('shakespeare')
1517+
dataset_ref = client.dataset(dataset_id, project=project)
1518+
table_ref = dataset_ref.table(table_id)
14801519

14811520
extract_job = client.extract_table(
14821521
table_ref,
14831522
destination_uri,
14841523
# Location must match that of the source table.
14851524
location='US') # API request
14861525
extract_job.result() # Waits for job to complete.
1526+
1527+
print('Exported {}:{}.{} to {}'.format(
1528+
project, dataset_id, table_id, destination_uri))
14871529
# [END bigquery_extract_table]
14881530

14891531
blob = bucket.get_blob('shakespeare.csv')
@@ -1611,9 +1653,39 @@ def test_client_query(client):
16111653
for row in query_job: # API request - fetches results
16121654
# Row values can be accessed by field name or index
16131655
assert row[0] == row.name == row['name']
1656+
print(row)
16141657
# [END bigquery_query]
16151658

16161659

1660+
def test_client_query_standard_sql(client):
1661+
"""Run a query with Standard SQL explicitly set"""
1662+
# [START bigquery_query_standard]
1663+
# from google.cloud import bigquery
1664+
# client = bigquery.Client()
1665+
1666+
query = (
1667+
'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
1668+
'WHERE state = "TX" '
1669+
'LIMIT 100')
1670+
1671+
# Set use_legacy_sql to False to use standard SQL syntax.
1672+
# Note that queries run through the Python Client Library are set to use
1673+
# standard SQL by default.
1674+
job_config = bigquery.QueryJobConfig()
1675+
job_config.use_legacy_sql = False
1676+
1677+
query_job = client.query(
1678+
query,
1679+
# Location must match that of the dataset(s) referenced in the query.
1680+
location='US',
1681+
job_config=job_config) # API request - starts the query
1682+
1683+
# Print the results.
1684+
for row in query_job: # API request - fetches results
1685+
print(row)
1686+
# [END bigquery_query_standard]
1687+
1688+
16171689
def test_client_query_destination_table(client, to_delete):
16181690
"""Run a query"""
16191691
dataset_id = 'query_destination_table_{}'.format(_millis())
@@ -1638,7 +1710,7 @@ def test_client_query_destination_table(client, to_delete):
16381710
# The write_disposition specifies the behavior when writing query results
16391711
# to a table that already exists. With WRITE_TRUNCATE, any existing rows
16401712
# in the table are overwritten by the query results.
1641-
job_config.write_disposition = 'WRITE_TRUNCATE'
1713+
job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
16421714

16431715
# Start the query, passing in the extra configuration.
16441716
query_job = client.query(
@@ -1709,10 +1781,10 @@ def test_client_query_destination_table_cmek(client, to_delete):
17091781
# [END bigquery_query_destination_table_cmek]
17101782

17111783

1712-
def test_client_query_w_params(client):
1713-
"""Run a query using a query parameter"""
1784+
def test_client_query_w_named_params(client, capsys):
1785+
"""Run a query using named query parameters"""
17141786

1715-
# [START bigquery_query_params]
1787+
# [START bigquery_query_params_named]
17161788
# from google.cloud import bigquery
17171789
# client = bigquery.Client()
17181790

@@ -1723,12 +1795,84 @@ def test_client_query_w_params(client):
17231795
AND word_count >= @min_word_count
17241796
ORDER BY word_count DESC;
17251797
"""
1726-
corpus = 'hamlet'
1727-
min_word_count = 100
17281798
query_params = [
1729-
bigquery.ScalarQueryParameter('corpus', 'STRING', corpus),
1799+
bigquery.ScalarQueryParameter('corpus', 'STRING', 'romeoandjuliet'),
1800+
bigquery.ScalarQueryParameter('min_word_count', 'INT64', 250)
1801+
]
1802+
job_config = bigquery.QueryJobConfig()
1803+
job_config.query_parameters = query_params
1804+
query_job = client.query(
1805+
query,
1806+
# Location must match that of the dataset(s) referenced in the query.
1807+
location='US',
1808+
job_config=job_config) # API request - starts the query
1809+
1810+
# Print the results
1811+
for row in query_job:
1812+
print('{}: \t{}'.format(row.word, row.word_count))
1813+
1814+
assert query_job.state == 'DONE'
1815+
# [END bigquery_query_params_named]
1816+
1817+
out, _ = capsys.readouterr()
1818+
assert 'the' in out
1819+
1820+
1821+
def test_client_query_w_positional_params(client, capsys):
1822+
"""Run a query using query parameters"""
1823+
1824+
# [START bigquery_query_params_positional]
1825+
# from google.cloud import bigquery
1826+
# client = bigquery.Client()
1827+
1828+
query = """
1829+
SELECT word, word_count
1830+
FROM `bigquery-public-data.samples.shakespeare`
1831+
WHERE corpus = ?
1832+
AND word_count >= ?
1833+
ORDER BY word_count DESC;
1834+
"""
1835+
# Set the name to None to use positional parameters.
1836+
# Note that you cannot mix named and positional parameters.
1837+
query_params = [
1838+
bigquery.ScalarQueryParameter(None, 'STRING', 'romeoandjuliet'),
1839+
bigquery.ScalarQueryParameter(None, 'INT64', 250)
1840+
]
1841+
job_config = bigquery.QueryJobConfig()
1842+
job_config.query_parameters = query_params
1843+
query_job = client.query(
1844+
query,
1845+
# Location must match that of the dataset(s) referenced in the query.
1846+
location='US',
1847+
job_config=job_config) # API request - starts the query
1848+
1849+
# Print the results
1850+
for row in query_job:
1851+
print('{}: \t{}'.format(row.word, row.word_count))
1852+
1853+
assert query_job.state == 'DONE'
1854+
# [END bigquery_query_params_positional]
1855+
1856+
out, _ = capsys.readouterr()
1857+
assert 'the' in out
1858+
1859+
1860+
def test_client_query_w_timestamp_params(client, capsys):
1861+
"""Run a query using query parameters"""
1862+
1863+
# [START bigquery_query_params_timestamps]
1864+
# from google.cloud import bigquery
1865+
# client = bigquery.Client()
1866+
1867+
import datetime
1868+
import pytz
1869+
1870+
query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);'
1871+
query_params = [
17301872
bigquery.ScalarQueryParameter(
1731-
'min_word_count', 'INT64', min_word_count)
1873+
'ts_value',
1874+
'TIMESTAMP',
1875+
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC))
17321876
]
17331877
job_config = bigquery.QueryJobConfig()
17341878
job_config.query_parameters = query_params
@@ -1738,14 +1882,88 @@ def test_client_query_w_params(client):
17381882
location='US',
17391883
job_config=job_config) # API request - starts the query
17401884

1741-
# Waits for the query to finish
1742-
timeout = 30 # in seconds
1743-
iterator = query_job.result(timeout=timeout)
1744-
rows = list(iterator)
1885+
# Print the results
1886+
for row in query_job:
1887+
print(row)
17451888

17461889
assert query_job.state == 'DONE'
1747-
assert len(rows) > 0
1748-
# [END bigquery_query_params]
1890+
# [END bigquery_query_params_timestamps]
1891+
1892+
out, _ = capsys.readouterr()
1893+
assert '2016, 12, 7, 9, 0' in out
1894+
1895+
1896+
def test_client_query_w_array_params(client, capsys):
1897+
"""Run a query using array query parameters"""
1898+
# [START bigquery_query_params_arrays]
1899+
# from google.cloud import bigquery
1900+
# client = bigquery.Client()
1901+
1902+
query = """
1903+
SELECT name, sum(number) as count
1904+
FROM `bigquery-public-data.usa_names.usa_1910_2013`
1905+
WHERE gender = @gender
1906+
AND state IN UNNEST(@states)
1907+
GROUP BY name
1908+
ORDER BY count DESC
1909+
LIMIT 10;
1910+
"""
1911+
query_params = [
1912+
bigquery.ScalarQueryParameter('gender', 'STRING', 'M'),
1913+
bigquery.ArrayQueryParameter(
1914+
'states', 'STRING', ['WA', 'WI', 'WV', 'WY'])
1915+
]
1916+
job_config = bigquery.QueryJobConfig()
1917+
job_config.query_parameters = query_params
1918+
query_job = client.query(
1919+
query,
1920+
# Location must match that of the dataset(s) referenced in the query.
1921+
location='US',
1922+
job_config=job_config) # API request - starts the query
1923+
1924+
# Print the results
1925+
for row in query_job:
1926+
print('{}: \t{}'.format(row.name, row.count))
1927+
1928+
assert query_job.state == 'DONE'
1929+
# [END bigquery_query_params_arrays]
1930+
1931+
out, _ = capsys.readouterr()
1932+
assert 'James' in out
1933+
1934+
1935+
def test_client_query_w_struct_params(client, capsys):
1936+
"""Run a query using struct query parameters"""
1937+
# [START bigquery_query_params_structs]
1938+
# from google.cloud import bigquery
1939+
# client = bigquery.Client()
1940+
1941+
query = 'SELECT @struct_value AS s;'
1942+
query_params = [
1943+
bigquery.StructQueryParameter(
1944+
'struct_value',
1945+
bigquery.ScalarQueryParameter('x', 'INT64', 1),
1946+
bigquery.ScalarQueryParameter('y', 'STRING', 'foo')
1947+
)
1948+
]
1949+
job_config = bigquery.QueryJobConfig()
1950+
job_config.query_parameters = query_params
1951+
query_job = client.query(
1952+
query,
1953+
# Location must match that of the dataset(s) referenced in the query.
1954+
location='US',
1955+
job_config=job_config) # API request - starts the query
1956+
1957+
# Print the results
1958+
for row in query_job:
1959+
print(row.s)
1960+
1961+
assert query_job.state == 'DONE'
1962+
# [END bigquery_query_params_structs]
1963+
1964+
out, _ = capsys.readouterr()
1965+
assert '1' in out
1966+
assert 'foo' in out
17491967

17501968

17511969
def test_client_query_dry_run(client):

docs/bigquery/usage.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,8 +344,8 @@ See BigQuery documentation for more information on
344344
`parameterized queries <https://cloud.google.com/bigquery/docs/parameterized-queries>`_.
345345

346346
.. literalinclude:: snippets.py
347-
:start-after: [START bigquery_query_params]
348-
:end-before: [END bigquery_query_params]
347+
:start-after: [START bigquery_query_params_named]
348+
:end-before: [END bigquery_query_params_named]
349349

350350

351351
List jobs for a project

0 commit comments

Comments
 (0)