@@ -38,12 +38,27 @@ Using the API
3838
3939Querying massive datasets can be time consuming and expensive without the
4040right hardware and infrastructure. Google `BigQuery `_ (`BigQuery API docs `_)
41- solves this problem by enabling super-fast, SQL-like queries against
42- append-only tables, using the processing power of Google's infrastructure.
41+ solves this problem by enabling super-fast, SQL queries against
42+ append-mostly tables, using the processing power of Google's infrastructure.
4343
4444.. _BigQuery : https://cloud.google.com/bigquery/what-is-bigquery
4545.. _BigQuery API docs : https://cloud.google.com/bigquery/docs/reference/v2/
4646
47+ Create a dataset
48+ ~~~~~~~~~~~~~~~~
49+
50+ .. code :: python
51+
52+ from google.cloud import bigquery
53+ from google.cloud.bigquery import Dataset
54+
55+ client = bigquery.Client()
56+
57+ dataset_ref = client.dataset(' dataset_name' )
58+ dataset = Dataset(dataset_ref)
59+ dataset.description = ' my dataset'
60+ dataset = client.create_dataset(dataset) # API request
61+
4762 Load data from CSV
4863~~~~~~~~~~~~~~~~~~
4964
@@ -52,39 +67,43 @@ Load data from CSV
5267 import csv
5368
5469 from google.cloud import bigquery
70+ from google.cloud.bigquery import LoadJobConfig
5571 from google.cloud.bigquery import SchemaField
5672
5773 client = bigquery.Client()
5874
59- dataset = client.dataset(' dataset_name' )
60- dataset.create() # API request
61-
6275 SCHEMA = [
6376 SchemaField(' full_name' , ' STRING' , mode = ' required' ),
6477 SchemaField(' age' , ' INTEGER' , mode = ' required' ),
6578 ]
66- table = dataset.table(' table_name' , SCHEMA )
67- table.create()
79+ table_ref = client.dataset(' dataset_name' ).table(' table_name' )
80+
81+ load_config = LoadJobConfig()
82+ load_config.skip_leading_rows = 1
83+ load_config.schema = SCHEMA
6884
69- with open (' csv_file' , ' rb' ) as readable:
70- table.upload_from_file(
71- readable, source_format = ' CSV' , skip_leading_rows = 1 )
85+ # Contents of csv_file.csv:
86+ # Name,Age
87+ # Tim,99
88+ with open (' csv_file.csv' , ' rb' ) as readable:
89+ client.load_table_from_file(
90+ readable, table_ref, job_config = load_config) # API request
7291
73- Perform a synchronous query
74- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
92+ Perform a query
93+ ~~~~~~~~~~~~~~~
7594
7695.. code :: python
7796
78- # Perform a synchronous query.
97+ # Perform a query.
7998 QUERY = (
80- ' SELECT name FROM [ bigquery-public-data: usa_names.usa_1910_2013] '
81- ' WHERE state = "TX"' )
82- query = client.run_sync_query( ' %s LIMIT 100' % QUERY )
83- query.timeout_ms = TIMEOUT_MS
84- query.run()
85-
86- for row in query. rows:
87- print (row)
99+ ' SELECT name FROM ` bigquery-public-data. usa_names.usa_1910_2013` '
100+ ' WHERE state = "TX" '
101+ ' LIMIT 100' )
102+ query_job = client.query( QUERY ) # API request
103+ rows = query_job.result() # Waits for query to finish
104+
105+ for row in rows:
106+ print (row.name )
88107
89108
90109 See the ``google-cloud-python `` API `BigQuery documentation `_ to learn how
0 commit comments