Skip to content

Commit de7af59

Browse files
committed
Merge remote-tracking branch 'upstream/master' into b140579733-bq-to_dataframe-part-2
2 parents 2e35f78 + 240f381 commit de7af59

File tree

3 files changed

+99
-8
lines changed

3 files changed

+99
-8
lines changed

bigquery/google/cloud/bigquery/table.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,11 +1565,44 @@ def to_arrow(
15651565
arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema)
15661566
return pyarrow.Table.from_batches(record_batches, schema=arrow_schema)
15671567

1568-
def _to_dataframe_iterable(self, bqstorage_client=None, dtypes=None):
1568+
def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None):
15691569
"""Create an iterable of pandas DataFrames, to process the table as a stream.
15701570
1571-
See ``to_dataframe`` for argument descriptions.
1571+
Args:
1572+
bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient):
1573+
**Beta Feature** Optional. A BigQuery Storage API client. If
1574+
supplied, use the faster BigQuery Storage API to fetch rows
1575+
from BigQuery.
1576+
1577+
This method requires the ``pyarrow`` and
1578+
``google-cloud-bigquery-storage`` libraries.
1579+
1580+
Reading from a specific partition or snapshot is not
1581+
currently supported by this method.
1582+
1583+
**Caution**: There is a known issue reading small anonymous
1584+
query result tables with the BQ Storage API. When a problem
1585+
is encountered reading a table, the tabledata.list method
1586+
from the BigQuery API is used, instead.
1587+
dtypes (Map[str, Union[str, pandas.Series.dtype]]):
1588+
Optional. A dictionary of column names pandas ``dtype``s. The
1589+
provided ``dtype`` is used when constructing the series for
1590+
the column specified. Otherwise, the default pandas behavior
1591+
is used.
1592+
1593+
Returns:
1594+
pandas.DataFrame:
1595+
A generator of :class:`~pandas.DataFrame`.
1596+
1597+
Raises:
1598+
ValueError:
1599+
If the :mod:`pandas` library cannot be imported.
15721600
"""
1601+
if pandas is None:
1602+
raise ValueError(_NO_PANDAS_ERROR)
1603+
if dtypes is None:
1604+
dtypes = {}
1605+
15731606
column_names = [field.name for field in self._schema]
15741607
bqstorage_download = functools.partial(
15751608
_pandas_helpers.download_dataframe_bqstorage,
@@ -1705,7 +1738,7 @@ def to_dataframe(
17051738
progress_bar = self._get_progress_bar(progress_bar_type)
17061739

17071740
frames = []
1708-
for frame in self._to_dataframe_iterable(dtypes=dtypes):
1741+
for frame in self.to_dataframe_iterable(dtypes=dtypes):
17091742
frames.append(frame)
17101743

17111744
if progress_bar is not None:

bigquery/tests/unit/test_table.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,6 +2054,68 @@ def test_to_arrow_w_pyarrow_none(self):
20542054
with self.assertRaises(ValueError):
20552055
row_iterator.to_arrow()
20562056

2057+
@unittest.skipIf(pandas is None, "Requires `pandas`")
2058+
def test_to_dataframe_iterable(self):
2059+
from google.cloud.bigquery.schema import SchemaField
2060+
import types
2061+
2062+
schema = [
2063+
SchemaField("name", "STRING", mode="REQUIRED"),
2064+
SchemaField("age", "INTEGER", mode="REQUIRED"),
2065+
]
2066+
2067+
path = "/foo"
2068+
api_request = mock.Mock(
2069+
side_effect=[
2070+
{
2071+
"rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}],
2072+
"pageToken": "NEXTPAGE",
2073+
},
2074+
{"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]},
2075+
]
2076+
)
2077+
2078+
row_iterator = self._make_one(
2079+
_mock_client(), api_request, path, schema, page_size=1, max_results=5
2080+
)
2081+
dfs = row_iterator.to_dataframe_iterable()
2082+
2083+
self.assertIsInstance(dfs, types.GeneratorType)
2084+
2085+
df_1 = next(dfs)
2086+
self.assertIsInstance(df_1, pandas.DataFrame)
2087+
self.assertEqual(df_1.name.dtype.name, "object")
2088+
self.assertEqual(df_1.age.dtype.name, "int64")
2089+
self.assertEqual(len(df_1), 1) # verify the number of rows
2090+
self.assertEqual(
2091+
df_1["name"][0], "Bengt"
2092+
) # verify the first value of 'name' column
2093+
self.assertEqual(df_1["age"][0], 32) # verify the first value of 'age' column
2094+
2095+
df_2 = next(dfs)
2096+
self.assertEqual(len(df_2), 1) # verify the number of rows
2097+
self.assertEqual(df_2["name"][0], "Sven")
2098+
self.assertEqual(df_2["age"][0], 33)
2099+
2100+
@mock.patch("google.cloud.bigquery.table.pandas", new=None)
2101+
def test_to_dataframe_iterable_error_if_pandas_is_none(self):
2102+
from google.cloud.bigquery.schema import SchemaField
2103+
2104+
schema = [
2105+
SchemaField("name", "STRING", mode="REQUIRED"),
2106+
SchemaField("age", "INTEGER", mode="REQUIRED"),
2107+
]
2108+
rows = [
2109+
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
2110+
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
2111+
]
2112+
path = "/foo"
2113+
api_request = mock.Mock(return_value={"rows": rows})
2114+
row_iterator = self._make_one(_mock_client(), api_request, path, schema)
2115+
2116+
with pytest.raises(ValueError, match="pandas"):
2117+
row_iterator.to_dataframe_iterable()
2118+
20572119
@unittest.skipIf(pandas is None, "Requires `pandas`")
20582120
def test_to_dataframe(self):
20592121
from google.cloud.bigquery.schema import SchemaField

storage/google/cloud/storage/blob.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -546,15 +546,11 @@ def delete(self, client=None):
546546
:param client: Optional. The client to use. If not passed, falls back
547547
to the ``client`` stored on the blob's bucket.
548548
549-
:rtype: :class:`Blob`
550-
:returns: The blob that was just deleted.
551549
:raises: :class:`google.cloud.exceptions.NotFound`
552550
(propagated from
553551
:meth:`google.cloud.storage.bucket.Bucket.delete_blob`).
554552
"""
555-
return self.bucket.delete_blob(
556-
self.name, client=client, generation=self.generation
557-
)
553+
self.bucket.delete_blob(self.name, client=client, generation=self.generation)
558554

559555
def _get_transport(self, client):
560556
"""Return the client's transport.

0 commit comments

Comments
 (0)