From d3092121f6558049c87df0fa4f98c1f5a6118b2a Mon Sep 17 00:00:00 2001 From: John Paton Date: Sat, 23 Mar 2019 15:07:48 +0100 Subject: [PATCH 01/11] Add tqdm progressbar for bigquery downloads --- bigquery/google/cloud/bigquery/table.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index 35f950e7d94a..eec09ead9834 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -1334,8 +1334,21 @@ def _to_dataframe_tabledata_list(self, dtypes): """Use (slower, but free) tabledata.list to construct a DataFrame.""" column_names = [field.name for field in self.schema] frames = [] + + # report progress if tqdm installed + try: + from tqdm import tqdm + pbar = tqdm(desc="Downloading", total=self.total_rows, unit="rows") + except ImportError: + pbar = None + for page in iter(self.pages): frames.append(self._to_dataframe_dtypes(page, column_names, dtypes)) + + if pbar is not None: + # update progress bar with number of rows in last frame + pbar.update(len(frames[-1])) + return pandas.concat(frames) def _to_dataframe_bqstorage(self, bqstorage_client, dtypes): From c6efe1b8fe392c6cdeaf248800036d5e99d71fd3 Mon Sep 17 00:00:00 2001 From: John Paton Date: Sat, 23 Mar 2019 15:16:22 +0100 Subject: [PATCH 02/11] Catch any tqdm errors during progress bar construction --- bigquery/google/cloud/bigquery/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index eec09ead9834..b2a93a8e0c39 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -1339,7 +1339,7 @@ def _to_dataframe_tabledata_list(self, dtypes): try: from tqdm import tqdm pbar = tqdm(desc="Downloading", total=self.total_rows, unit="rows") - except ImportError: + except (ImportError, KeyError, TypeError): pbar = None for page in iter(self.pages): From b23d581198e747b53f7a163012ef937d249cc32e Mon Sep 17 00:00:00 2001 From: John Paton Date: Mon, 25 Mar 2019 11:12:13 +0100 Subject: [PATCH 03/11] Update total rows in progress bar during download --- bigquery/google/cloud/bigquery/table.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index b2a93a8e0c39..e2a2b416357f 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -1346,6 +1346,7 @@ def _to_dataframe_tabledata_list(self, dtypes): frames.append(self._to_dataframe_dtypes(page, column_names, dtypes)) if pbar is not None: + pbar.total = pbar.total or self.total_rows # update progress bar with number of rows in last frame pbar.update(len(frames[-1])) From 3c1f0081c2ebc25063933a6fb74a2e2a63b1d602 Mon Sep 17 00:00:00 2001 From: John Paton Date: Wed, 27 Mar 2019 14:29:00 +0100 Subject: [PATCH 04/11] tqdm import to pandas pattern --- bigquery/google/cloud/bigquery/table.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index e2a2b416357f..65c16087d045 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -30,6 +30,11 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import tqdm +except ImportError: # pragma: NO COVER + tqdm = None + from google.api_core.page_iterator import HTTPIterator import google.cloud._helpers @@ -1336,11 +1341,15 @@ def _to_dataframe_tabledata_list(self, dtypes): frames = [] # report progress if tqdm installed - try: - from tqdm import tqdm - pbar = tqdm(desc="Downloading", total=self.total_rows, unit="rows") - except (ImportError, KeyError, TypeError): - pbar = None + pbar = None + if tqdm is not None: + try: + pbar = tqdm.tqdm( + desc="Downloading", total=self.total_rows, unit="rows" + ) + except (KeyError, TypeError): + # tqdm error + pass for page in iter(self.pages): frames.append(self._to_dataframe_dtypes(page, column_names, dtypes)) From c0e3a39869e7899c512d192f75522885c1e13462 Mon Sep 17 00:00:00 2001 From: John Paton Date: Wed, 27 Mar 2019 14:29:10 +0100 Subject: [PATCH 05/11] unit tests for download progress bar --- bigquery/tests/unit/test_table.py | 77 +++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index d9ba9db3f05d..a4a8df984984 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -29,6 +29,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + from tqdm import tqdm +except (ImportError, AttributeError): # pragma: NO COVER + tqdm = None + from google.cloud.bigquery.dataset import DatasetReference @@ -1413,6 +1418,78 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @mock.patch('tqdm.tqdm') + def test_to_dataframe_progress_bar(self, tqdm_mock): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) + df = row_iterator.to_dataframe() + + tqdm_mock.assert_called() + tqdm_mock().update.assert_called() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.tqdm", new=None) + def test_to_dataframe_no_tqdm(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) + df = row_iterator.to_dataframe() + + self.assertFalse(len(df) == 0) # all should be well + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call + def test_to_dataframe_tqdm_error(self): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) + df = row_iterator.to_dataframe() + + self.assertFalse(len(df) == 0) # all should be well + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): from google.cloud.bigquery.table import RowIterator From 0693789508e4fa63fade7a76e702d9d2fe8e1607 Mon Sep 17 00:00:00 2001 From: John Paton Date: Wed, 27 Mar 2019 14:32:54 +0100 Subject: [PATCH 06/11] Add tqdm to setup.py extras --- bigquery/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigquery/setup.py b/bigquery/setup.py index 7cd901917e4c..5174007c0e16 100644 --- a/bigquery/setup.py +++ b/bigquery/setup.py @@ -39,6 +39,7 @@ # Exclude PyArrow dependency from Windows Python 2.7. 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': 'pyarrow>=0.4.1', + 'tqdm': 'tqdm >= 4.31.1', } From a7d9d591215b8125a5ad6e78b7b4db8bf146eb23 Mon Sep 17 00:00:00 2001 From: John Paton Date: Wed, 27 Mar 2019 17:44:38 +0100 Subject: [PATCH 07/11] Make progress bar tests more specific --- bigquery/tests/unit/test_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index a4a8df984984..b115b72c5f88 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -1464,7 +1464,7 @@ def test_to_dataframe_no_tqdm(self): row_iterator = RowIterator(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe() - self.assertFalse(len(df) == 0) # all should be well + self.assertEqual(len(df), 4) # all should be well @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @@ -1488,7 +1488,7 @@ def test_to_dataframe_tqdm_error(self): row_iterator = RowIterator(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe() - self.assertFalse(len(df) == 0) # all should be well + self.assertEqual(len(df), 4) # all should be well @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): From 25f4e136bed4cb5bebba00297ff90e745eed8339 Mon Sep 17 00:00:00 2001 From: John Paton Date: Wed, 27 Mar 2019 17:45:05 +0100 Subject: [PATCH 08/11] Style adjustments to improve clarity --- bigquery/google/cloud/bigquery/table.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index 65c16087d045..827713e20586 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -1341,23 +1341,28 @@ def _to_dataframe_tabledata_list(self, dtypes): frames = [] # report progress if tqdm installed - pbar = None + progress_bar = None if tqdm is not None: try: - pbar = tqdm.tqdm( + progress_bar = tqdm.tqdm( desc="Downloading", total=self.total_rows, unit="rows" ) except (KeyError, TypeError): - # tqdm error + # Protect ourselves from any tqdm errors. In case of + # unexpected tqdm behavior, just fall back to showing + # no progress bar. pass for page in iter(self.pages): - frames.append(self._to_dataframe_dtypes(page, column_names, dtypes)) - - if pbar is not None: - pbar.total = pbar.total or self.total_rows - # update progress bar with number of rows in last frame - pbar.update(len(frames[-1])) + current_frame = self._to_dataframe_dtypes(page, column_names, dtypes) + frames.append(current_frame) + + if progress_bar is not None: + # In some cases, the number of total rows is not populated + # until the first page of rows is fetched. Update the + # progress bar's total to keep an accurate count. + progress_bar.total = progress_bar.total or self.total_rows + progress_bar.update(len(current_frame)) return pandas.concat(frames) From 4aafefece9970f4e1a2d4ff095ff1f406c5380dd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 28 Mar 2019 10:43:03 -0700 Subject: [PATCH 09/11] Add progress_bar_type argument to to_dataframe --- bigquery/docs/conf.py | 3 +- bigquery/google/cloud/bigquery/job.py | 23 ++++++++- bigquery/google/cloud/bigquery/table.py | 68 +++++++++++++++++++------ docs/conf.py | 5 +- 4 files changed, 79 insertions(+), 20 deletions(-) diff --git a/bigquery/docs/conf.py b/bigquery/docs/conf.py index 62815ae73b38..0810a99ba2ca 100644 --- a/bigquery/docs/conf.py +++ b/bigquery/docs/conf.py @@ -326,9 +326,10 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "python": ("http://python.readthedocs.org/en/latest/", None), "gax": ("https://gax-python.readthedocs.org/en/latest/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "python": ("http://python.readthedocs.org/en/latest/", None), + 'tqdm': ('https://tqdm.github.io/', None), } # Napoleon settings diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 4df7a92ba63c..d03420ed7f2e 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -2778,7 +2778,7 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): dest_table = Table(dest_table_ref, schema=schema) return self._client.list_rows(dest_table, retry=retry) - def to_dataframe(self, bqstorage_client=None, dtypes=None): + def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Return a pandas DataFrame from a QueryJob Args: @@ -2805,6 +2805,27 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None): provided ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :object:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + [Experimental] Use the :func:`tqdm.tqdm_gui` function to + display a progress bar as a graphical dialog box. Requires + matplotlib. + + ..versionadded:: 1.11.0 Returns: A :class:`~pandas.DataFrame` populated with row data and column diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index 827713e20586..a0bd7ec2c5f7 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -1335,24 +1335,11 @@ def _to_dataframe_dtypes(self, page, column_names, dtypes): columns[column] = pandas.Series(columns[column], dtype=dtypes[column]) return pandas.DataFrame(columns, columns=column_names) - def _to_dataframe_tabledata_list(self, dtypes): + def _to_dataframe_tabledata_list(self, dtypes, progress_bar=None): """Use (slower, but free) tabledata.list to construct a DataFrame.""" column_names = [field.name for field in self.schema] frames = [] - # report progress if tqdm installed - progress_bar = None - if tqdm is not None: - try: - progress_bar = tqdm.tqdm( - desc="Downloading", total=self.total_rows, unit="rows" - ) - except (KeyError, TypeError): - # Protect ourselves from any tqdm errors. In case of - # unexpected tqdm behavior, just fall back to showing - # no progress bar. - pass - for page in iter(self.pages): current_frame = self._to_dataframe_dtypes(page, column_names, dtypes) frames.append(current_frame) @@ -1413,7 +1400,34 @@ def get_dataframe(stream): # the end using manually-parsed schema. return pandas.concat(frames)[columns] - def to_dataframe(self, bqstorage_client=None, dtypes=None): + def _get_progress_bar(self, progress_bar_type): + """Construct a tqdm progress bar object, if tqdm is installed.""" + if tqdm is None: + return None + + try: + if progress_bar_type == "tqdm": + return tqdm.tqdm( + desc="Downloading", total=self.total_rows, unit="rows" + ) + elif progress_bar_type == "tqdm_notebook": + return tqdm.tqdm_notebook( + desc="Downloading", total=self.total_rows, unit="rows" + ) + elif progress_bar_type == "tqdm_gui": + return tqdm.tqdm_gui( + desc="Downloading", total=self.total_rows, unit="rows" + ) + except (KeyError, TypeError): + # Protect ourselves from any tqdm errors. In case of + # unexpected tqdm behavior, just fall back to showing + # no progress bar. + pass + return None + + def to_dataframe( + self, bqstorage_client=None, dtypes=None, progress_bar_type=None + ): """Create a pandas DataFrame by loading all pages of a query. @@ -1441,6 +1455,26 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None): provided ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :object:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + ..versionadded:: 1.11.0 Returns: pandas.DataFrame: @@ -1457,10 +1491,12 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None): if dtypes is None: dtypes = {} + progress_bar = self._get_progress_bar(progress_bar_type) + if bqstorage_client is not None: return self._to_dataframe_bqstorage(bqstorage_client, dtypes) else: - return self._to_dataframe_tabledata_list(dtypes) + return self._to_dataframe_tabledata_list(dtypes, progress_bar=progress_bar) class _EmptyRowIterator(object): diff --git a/docs/conf.py b/docs/conf.py index 584ce42e952a..b405b7e61019 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -317,13 +317,14 @@ # Configuration for intersphinx: intersphinx_mapping = { + 'fastavro': ('https://fastavro.readthedocs.io/en/stable/', None), 'google-auth': ('https://google-auth.readthedocs.io/en/stable', None), 'google-gax': ('https://gax-python.readthedocs.io/en/latest/', None), 'grpc': ('https://grpc.io/grpc/python/', None), - 'requests': ('http://docs.python-requests.org/en/master/', None), - 'fastavro': ('https://fastavro.readthedocs.io/en/stable/', None), 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 'python': ('https://docs.python.org/3', None), + 'requests': ('http://docs.python-requests.org/en/master/', None), + 'tqdm': ('https://tqdm.github.io/', None), } # Static HTML pages, e.g. to support redirects From 62e3d5868145555f37d50c9de33d8f2209fda33f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 28 Mar 2019 13:42:21 -0700 Subject: [PATCH 10/11] Fix lint, coverage, and docs errors. --- bigquery/docs/conf.py | 1 - bigquery/google/cloud/bigquery/job.py | 17 +---- bigquery/google/cloud/bigquery/table.py | 19 +++--- bigquery/noxfile.py | 4 +- bigquery/setup.py | 2 +- bigquery/tests/unit/test_table.py | 83 +++++++++++++++++++------ docs/conf.py | 1 - 7 files changed, 79 insertions(+), 48 deletions(-) diff --git a/bigquery/docs/conf.py b/bigquery/docs/conf.py index 0810a99ba2ca..c9ff82d8e72b 100644 --- a/bigquery/docs/conf.py +++ b/bigquery/docs/conf.py @@ -329,7 +329,6 @@ "gax": ("https://gax-python.readthedocs.org/en/latest/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "python": ("http://python.readthedocs.org/en/latest/", None), - 'tqdm': ('https://tqdm.github.io/', None), } # Napoleon settings diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index d03420ed7f2e..35b2932580a4 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -2810,20 +2810,9 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non display a progress bar while the data downloads. Install the ``tqdm`` package to use this feature. - Possible values of ``progress_bar_type`` include: - - ``None`` - No progress bar. - ``'tqdm'`` - Use the :func:`tqdm.tqdm` function to print a progress bar - to :object:`sys.stderr`. - ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a - progress bar as a Jupyter notebook widget. - ``'tqdm_gui'`` - [Experimental] Use the :func:`tqdm.tqdm_gui` function to - display a progress bar as a graphical dialog box. Requires - matplotlib. + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. ..versionadded:: 1.11.0 diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index a0bd7ec2c5f7..9cf3234500a3 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -49,6 +49,10 @@ "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) +_NO_TQDM_ERROR = ( + "A progress bar was requested, but there was an error loading the tqdm " + "library. Please install tqdm to use the progress bar functionality." +) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' _MARKER = object() @@ -1403,13 +1407,13 @@ def get_dataframe(stream): def _get_progress_bar(self, progress_bar_type): """Construct a tqdm progress bar object, if tqdm is installed.""" if tqdm is None: + if progress_bar_type is not None: + warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) return None try: if progress_bar_type == "tqdm": - return tqdm.tqdm( - desc="Downloading", total=self.total_rows, unit="rows" - ) + return tqdm.tqdm(desc="Downloading", total=self.total_rows, unit="rows") elif progress_bar_type == "tqdm_notebook": return tqdm.tqdm_notebook( desc="Downloading", total=self.total_rows, unit="rows" @@ -1422,15 +1426,12 @@ def _get_progress_bar(self, progress_bar_type): # Protect ourselves from any tqdm errors. In case of # unexpected tqdm behavior, just fall back to showing # no progress bar. - pass + warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) return None - def to_dataframe( - self, bqstorage_client=None, dtypes=None, progress_bar_type=None - ): + def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=None): """Create a pandas DataFrame by loading all pages of a query. - Args: bqstorage_client ( \ google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient \ @@ -1466,7 +1467,7 @@ def to_dataframe( No progress bar. ``'tqdm'`` Use the :func:`tqdm.tqdm` function to print a progress bar - to :object:`sys.stderr`. + to :data:`sys.stderr`. ``'tqdm_notebook'`` Use the :func:`tqdm.tqdm_notebook` function to display a progress bar as a Jupyter notebook widget. diff --git a/bigquery/noxfile.py b/bigquery/noxfile.py index 82846604306e..2c11f5b67056 100644 --- a/bigquery/noxfile.py +++ b/bigquery/noxfile.py @@ -44,9 +44,9 @@ def default(session): # Pyarrow does not support Python 3.7 if session.python == '3.7': - dev_install = '.[pandas]' + dev_install = '.[pandas, tqdm]' else: - dev_install = '.[pandas, pyarrow]' + dev_install = '.[pandas, pyarrow, tqdm]' session.install('-e', dev_install) # IPython does not support Python 2 after version 5.x diff --git a/bigquery/setup.py b/bigquery/setup.py index d50b8cd86bb8..696e093cd6ff 100644 --- a/bigquery/setup.py +++ b/bigquery/setup.py @@ -39,7 +39,7 @@ # Exclude PyArrow dependency from Windows Python 2.7. 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': 'pyarrow>=0.4.1', - 'tqdm': 'tqdm >= 4.31.1', + 'tqdm': 'tqdm >= 4.0.0, <5.0.0dev', 'fastparquet': ['fastparquet', 'python-snappy'], } diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index b115b72c5f88..4500856ec2a4 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -15,6 +15,7 @@ import itertools import json import unittest +import warnings import mock import pytest @@ -906,7 +907,6 @@ def test_time_partitioning_setter_none(self): self.assertIsNone(table.time_partitioning) def test_partitioning_type_setter(self): - import warnings from google.cloud.bigquery.table import TimePartitioningType dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -925,7 +925,6 @@ def test_partitioning_type_setter(self): self.assertIs(warning.category, PendingDeprecationWarning) def test_partitioning_type_setter_w_time_partitioning_set(self): - import warnings from google.cloud.bigquery.table import TimePartitioning dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -943,7 +942,6 @@ def test_partitioning_type_setter_w_time_partitioning_set(self): self.assertIs(warning.category, PendingDeprecationWarning) def test_partitioning_expiration_setter_w_time_partitioning_set(self): - import warnings from google.cloud.bigquery.table import TimePartitioning dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -961,8 +959,6 @@ def test_partitioning_expiration_setter_w_time_partitioning_set(self): self.assertIs(warning.category, PendingDeprecationWarning) def test_partition_expiration_setter(self): - import warnings - dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) @@ -1117,8 +1113,6 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor(self): - import warnings - project = "test-project" dataset_id = "test_dataset" table_id = "coffee_table" @@ -1196,8 +1190,6 @@ def test_ctor_view(self): self.assertTrue(table.view_use_legacy_sql) def test_ctor_missing_properties(self): - import warnings - resource = { "tableReference": { "projectId": "testproject", @@ -1420,8 +1412,45 @@ def test_to_dataframe(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch('tqdm.tqdm') - def test_to_dataframe_progress_bar(self, tqdm_mock): + @mock.patch("tqdm.tqdm_gui") + @mock.patch("tqdm.tqdm_notebook") + @mock.patch("tqdm.tqdm") + def test_to_dataframe_progress_bar( + self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock + ): + from google.cloud.bigquery.table import RowIterator + from google.cloud.bigquery.table import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + + progress_bars = ( + ("tqdm", tqdm_mock), + ("tqdm_notebook", tqdm_notebook_mock), + ("tqdm_gui", tqdm_gui_mock), + ) + + for progress_bar_type, progress_bar_mock in progress_bars: + row_iterator = RowIterator(_mock_client(), api_request, path, schema) + df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + + progress_bar_mock.assert_called() + progress_bar_mock().update.assert_called() + self.assertEqual(len(df), 4) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.tqdm", new=None) + def test_to_dataframe_no_tqdm_no_progress_bar(self): from google.cloud.bigquery.table import RowIterator from google.cloud.bigquery.table import SchemaField @@ -1438,10 +1467,12 @@ def test_to_dataframe_progress_bar(self, tqdm_mock): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = RowIterator(_mock_client(), api_request, path, schema) - df = row_iterator.to_dataframe() - tqdm_mock.assert_called() - tqdm_mock().update.assert_called() + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe() + + self.assertEqual(len(warned), 0) + self.assertEqual(len(df), 4) @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) @@ -1462,12 +1493,22 @@ def test_to_dataframe_no_tqdm(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) row_iterator = RowIterator(_mock_client(), api_request, path, schema) - df = row_iterator.to_dataframe() - self.assertEqual(len(df), 4) # all should be well + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe(progress_bar_type="tqdm") + + self.assertEqual(len(warned), 1) + for warning in warned: + self.assertIs(warning.category, UserWarning) + + # Even though the progress bar won't show, downloading the dataframe + # should still work. + self.assertEqual(len(df), 4) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") + @mock.patch("tqdm.tqdm_gui", new=None) # will raise TypeError on call + @mock.patch("tqdm.tqdm_notebook", new=None) # will raise TypeError on call @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call def test_to_dataframe_tqdm_error(self): from google.cloud.bigquery.table import RowIterator @@ -1484,11 +1525,13 @@ def test_to_dataframe_tqdm_error(self): {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] path = "/foo" - api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = RowIterator(_mock_client(), api_request, path, schema) - df = row_iterator.to_dataframe() - self.assertEqual(len(df), 4) # all should be well + for progress_bar_type in ("tqdm", "tqdm_notebook", "tqdm_gui"): + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = RowIterator(_mock_client(), api_request, path, schema) + df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + + self.assertEqual(len(df), 4) # all should be well @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_empty_results(self): diff --git a/docs/conf.py b/docs/conf.py index b405b7e61019..1ff88dbae6f6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -324,7 +324,6 @@ 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 'python': ('https://docs.python.org/3', None), 'requests': ('http://docs.python-requests.org/en/master/', None), - 'tqdm': ('https://tqdm.github.io/', None), } # Static HTML pages, e.g. to support redirects From bed06948e5d4dde6a01b8d6da3d4fb671751416d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 28 Mar 2019 13:55:55 -0700 Subject: [PATCH 11/11] Add shared variable for common tqdm description. --- bigquery/google/cloud/bigquery/table.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index 9cf3234500a3..3d487d9be3f2 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -1411,16 +1411,19 @@ def _get_progress_bar(self, progress_bar_type): warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) return None + description = "Downloading" + unit = "rows" + try: if progress_bar_type == "tqdm": - return tqdm.tqdm(desc="Downloading", total=self.total_rows, unit="rows") + return tqdm.tqdm(desc=description, total=self.total_rows, unit=unit) elif progress_bar_type == "tqdm_notebook": return tqdm.tqdm_notebook( - desc="Downloading", total=self.total_rows, unit="rows" + desc=description, total=self.total_rows, unit=unit ) elif progress_bar_type == "tqdm_gui": return tqdm.tqdm_gui( - desc="Downloading", total=self.total_rows, unit="rows" + desc=description, total=self.total_rows, unit=unit ) except (KeyError, TypeError): # Protect ourselves from any tqdm errors. In case of