Skip to content

Commit 38ba166

Browse files
authored
BigQuery: Adds samples for to_dataframe() (googleapis#5051)
1 parent 6e85c95 commit 38ba166

2 files changed

Lines changed: 71 additions & 0 deletions

File tree

docs/bigquery/snippets.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727

2828
import pytest
2929
import six
30+
try:
31+
import pandas
32+
except ImportError:
33+
pandas = None
3034

3135
from google.cloud import bigquery
3236

@@ -1545,5 +1549,39 @@ def do_something_with(_):
15451549
# [END client_list_jobs]
15461550

15471551

1552+
@pytest.mark.skipif(pandas is None, reason='Requires `pandas`')
1553+
def test_query_results_as_dataframe(client):
1554+
# [START bigquery_query_results_dataframe]
1555+
# client = bigquery.Client()
1556+
sql = """
1557+
SELECT name, SUM(number) as count
1558+
FROM `bigquery-public-data.usa_names.usa_1910_current`
1559+
GROUP BY name
1560+
ORDER BY count DESC
1561+
LIMIT 10
1562+
"""
1563+
1564+
df = client.query(sql).to_dataframe()
1565+
# [END bigquery_query_results_dataframe]
1566+
assert isinstance(df, pandas.DataFrame)
1567+
assert len(list(df)) == 2 # verify the number of columns
1568+
assert len(df) == 10 # verify the number of rows
1569+
1570+
1571+
@pytest.mark.skipif(pandas is None, reason='Requires `pandas`')
1572+
def test_list_rows_as_dataframe(client):
1573+
# [START bigquery_list_rows_dataframe]
1574+
# client = bigquery.Client()
1575+
dataset_ref = client.dataset('samples', project='bigquery-public-data')
1576+
table_ref = dataset_ref.table('shakespeare')
1577+
table = client.get_table(table_ref)
1578+
1579+
df = client.list_rows(table).to_dataframe()
1580+
# [END bigquery_list_rows_dataframe]
1581+
assert isinstance(df, pandas.DataFrame)
1582+
assert len(list(df)) == 2 # verify the number of columns
1583+
assert len(df) == 10 # verify the number of rows
1584+
1585+
15481586
if __name__ == '__main__':
15491587
pytest.main()

docs/bigquery/usage.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,3 +364,36 @@ Jobs describe actions performed on data in BigQuery tables:
364364
.. literalinclude:: snippets.py
365365
:start-after: [START client_list_jobs]
366366
:end-before: [END client_list_jobs]
367+
368+
369+
Using BigQuery with Pandas
370+
~~~~~~~~~~~~~~~~~~~~~~~~~~
371+
372+
As of version 0.29.0, you can use the
373+
:func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` function to
374+
retrieve query results or table rows as a :class:`pandas.DataFrame`.
375+
376+
First, ensure that the :mod:`pandas` library is installed by running:
377+
378+
.. code-block:: bash
379+
380+
pip install --upgrade pandas
381+
382+
Alternatively, you can install the BigQuery python client library with
383+
:mod:`pandas` by running:
384+
385+
.. code-block:: bash
386+
387+
pip install --upgrade google-cloud-bigquery[pandas]
388+
389+
To retrieve query results as a :class:`pandas.DataFrame`:
390+
391+
.. literalinclude:: snippets.py
392+
:start-after: [START bigquery_query_results_dataframe]
393+
:end-before: [END bigquery_query_results_dataframe]
394+
395+
To retrieve table rows as a :class:`pandas.DataFrame`:
396+
397+
.. literalinclude:: snippets.py
398+
:start-after: [START bigquery_list_rows_dataframe]
399+
:end-before: [END bigquery_list_rows_dataframe]

0 commit comments

Comments
 (0)