Skip to content

Commit 872bff3

Browse files
authored
BigQuery: adds load partitioned table sample and refactors existing partition samples (googleapis#5368)
1 parent f6bc038 commit 872bff3

1 file changed

Lines changed: 68 additions & 29 deletions

File tree

docs/bigquery/snippets.py

Lines changed: 68 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -514,21 +514,15 @@ def test_create_partitioned_table(client, to_delete):
514514

515515
table_ref = dataset_ref.table('my_partitioned_table')
516516
schema = [
517-
bigquery.SchemaField('requested_url', 'STRING', mode='REQUIRED'),
518-
bigquery.SchemaField('request_ts', 'TIMESTAMP', mode='REQUIRED'),
519-
bigquery.SchemaField(
520-
'response_info', 'RECORD', fields=[
521-
bigquery.SchemaField('backend_server', 'STRING'),
522-
bigquery.SchemaField('response_ms', 'INTEGER'),
523-
]
524-
),
517+
bigquery.SchemaField('name', 'STRING'),
518+
bigquery.SchemaField('post_abbr', 'STRING'),
519+
bigquery.SchemaField('date', 'DATE')
525520
]
526521
table = bigquery.Table(table_ref, schema=schema)
527-
time_partitioning = bigquery.TimePartitioning(
522+
table.time_partitioning = bigquery.TimePartitioning(
528523
type_=bigquery.TimePartitioningType.DAY,
529-
field='request_ts', # name of column to use for partitioning
524+
field='date', # name of column to use for partitioning
530525
expiration_ms=7776000000) # 90 days
531-
table.time_partitioning = time_partitioning
532526

533527
table = client.create_table(table)
534528

@@ -537,45 +531,90 @@ def test_create_partitioned_table(client, to_delete):
537531
# [END bigquery_create_table_partitioned]
538532

539533
assert table.time_partitioning.type_ == 'DAY'
540-
assert table.time_partitioning.field == 'request_ts'
534+
assert table.time_partitioning.field == 'date'
541535
assert table.time_partitioning.expiration_ms == 7776000000
542536

543537

544-
def test_query_partitioned_table(client, to_delete):
538+
def test_load_and_query_partitioned_table(client, to_delete):
539+
dataset_id = 'load_partitioned_table_dataset_{}'.format(_millis())
540+
dataset = bigquery.Dataset(client.dataset(dataset_id))
541+
client.create_dataset(dataset)
542+
to_delete.append(dataset)
543+
544+
# [START bigquery_load_table_partitioned]
545+
# from google.cloud import bigquery
546+
# client = bigquery.Client()
547+
# dataset_id = 'my_dataset'
548+
table_id = 'us_states_by_date'
549+
550+
dataset_ref = client.dataset(dataset_id)
551+
job_config = bigquery.LoadJobConfig()
552+
job_config.schema = [
553+
bigquery.SchemaField('name', 'STRING'),
554+
bigquery.SchemaField('post_abbr', 'STRING'),
555+
bigquery.SchemaField('date', 'DATE')
556+
]
557+
job_config.skip_leading_rows = 1
558+
job_config.time_partitioning = bigquery.TimePartitioning(
559+
type_=bigquery.TimePartitioningType.DAY,
560+
field='date', # name of column to use for partitioning
561+
expiration_ms=7776000000) # 90 days
562+
uri = 'gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv'
563+
564+
load_job = client.load_table_from_uri(
565+
uri,
566+
dataset_ref.table(table_id),
567+
job_config=job_config) # API request
568+
569+
assert load_job.job_type == 'load'
570+
571+
load_job.result() # Waits for table load to complete.
572+
573+
table = client.get_table(dataset_ref.table(table_id))
574+
print("Loaded {} rows to table {}".format(table.num_rows, table_id))
575+
# [END bigquery_load_table_partitioned]
576+
assert table.num_rows == 50
577+
578+
project_id = client.project
579+
545580
# [START bigquery_query_partitioned_table]
546581
import datetime
547-
import pytz
548582
# from google.cloud import bigquery
549583
# client = bigquery.Client()
584+
# project_id = 'my-project'
585+
# dataset_id = 'my_dataset'
586+
table_id = 'us_states_by_date'
550587

551-
sql = """
588+
sql_template = """
552589
SELECT *
553-
FROM `bigquery-partition-samples.samples.stackoverflow_comments`
554-
WHERE creation_date > @mytime
590+
FROM `{}.{}.{}`
591+
WHERE date BETWEEN @start_date AND @end_date
555592
"""
556-
query_parameters = [
593+
sql = sql_template.format(project_id, dataset_id, table_id)
594+
job_config = bigquery.QueryJobConfig()
595+
job_config.query_parameters = [
557596
bigquery.ScalarQueryParameter(
558-
'mytime',
559-
'TIMESTAMP',
560-
datetime.datetime(2016, 1, 1, 0, 0, tzinfo=pytz.UTC)
597+
'start_date',
598+
'DATE',
599+
datetime.date(1800, 1, 1)
600+
),
601+
bigquery.ScalarQueryParameter(
602+
'end_date',
603+
'DATE',
604+
datetime.date(1899, 12, 31)
561605
)
562606
]
563-
job_config = bigquery.QueryJobConfig()
564-
job_config.query_parameters = query_parameters
565-
job_config.dry_run = True
566607

567608
query_job = client.query(
568609
sql,
569610
# Location must match that of the dataset(s) referenced in the query.
570611
location='US',
571612
job_config=job_config) # API request
572613

573-
# A dry run query completes immediately.
574-
assert query_job.state == 'DONE'
575-
576-
print("This query will process {} bytes.".format(
577-
query_job.total_bytes_processed))
614+
rows = list(query_job)
615+
print("{} states were admitted to the US in the 1800s".format(len(rows)))
578616
# [END bigquery_query_partitioned_table]
617+
assert len(rows) == 29
579618

580619

581620
def test_get_table_information(client, to_delete):

0 commit comments

Comments
 (0)