From c4528e38e98ba343aee357fe57a5a723f7cd8698 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 28 Jul 2020 17:05:48 -0400 Subject: [PATCH 01/20] tests: avoid use of systest envvars in unit tests (#195) Fixes #194 --- tests/unit/test_client.py | 4 ++-- tests/unit/test_magics.py | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2c4c1342c..1f4d584b9 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -221,7 +221,7 @@ def test__call_api_applying_custom_retry_on_timeout(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY - client = self._make_one() + client = self._make_one(project=self.PROJECT) api_request_patcher = mock.patch.object( client._connection, "api_request", side_effect=[TimeoutError, "result"], @@ -674,7 +674,7 @@ def test_create_bqstorage_client(self): mock_client.assert_called_once_with(credentials=creds) def test_create_bqstorage_client_missing_dependency(self): - client = self._make_one() + client = self._make_one(project=self.PROJECT) def fail_bqstorage_import(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index a42592e3c..bd52f3f53 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -772,9 +772,16 @@ def test_bigquery_magic_w_missing_query(): ip.extension_manager.load_extension("google.cloud.bigquery") magics.context._project = None + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + cell_body = " \n \n \t\t \n " - with io.capture_output() as captured_io: + with io.capture_output() as captured_io, default_patch: ip.run_cell_magic("bigquery", "df", cell_body) output = captured_io.stderr From eeeb88595492a6e2f852b06ce9d9e23e2b1ff0ba Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 28 Jul 2020 16:44:03 -0500 Subject: [PATCH 02/20] doc: update CHANGELOG for version 1.10.0 (#167) I kept getting frustrated that I couldn't find when `exists_ok` was added without looking at the commit history. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7286e9bf..a209dbaf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -482,7 +482,7 @@ ### New Features -- Add options to ignore errors when creating/deleting datasets/tables. ([#7491](https://github.com/googleapis/google-cloud-python/pull/7491)) +- Add `exists_ok` and `not_found_ok` options to ignore errors when creating/deleting datasets/tables. ([#7491](https://github.com/googleapis/google-cloud-python/pull/7491)) - Accept a string in Table and Dataset constructors. ([#7483](https://github.com/googleapis/google-cloud-python/pull/7483)) ### Documentation From 04f20945f9631b4bdb50957964b92157aad7ea45 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 29 Jul 2020 00:26:04 +0200 Subject: [PATCH 03/20] chore(deps): update dependency google-cloud-bigquery to v1.26.1 (#180) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==1.25.0` -> `==1.26.1` | --- ### Release Notes
googleapis/python-bigquery ### [`v1.26.1`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​1261-httpswwwgithubcomgoogleapispython-bigquerycomparev1260v1261-2020-07-25) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v1.26.0...v1.26.1) ### [`v1.26.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​1260-httpswwwgithubcomgoogleapispython-bigquerycomparev1250v1260-2020-07-20) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v1.25.0...v1.26.0) ##### Features - use BigQuery Storage client by default (if dependencies available) ([#​55](https://www.github.com/googleapis/python-bigquery/issues/55)) ([e75ff82](https://www.github.com/googleapis/python-bigquery/commit/e75ff8297c65981545b097f75a17cf9e78ac6772)), closes [#​91](https://www.github.com/googleapis/python-bigquery/issues/91) - **bigquery:** add **eq** method for class PartitionRange and RangePartitioning ([#​162](https://www.github.com/googleapis/python-bigquery/issues/162)) ([0d2a88d](https://www.github.com/googleapis/python-bigquery/commit/0d2a88d8072154cfc9152afd6d26a60ddcdfbc73)) - **bigquery:** expose date_as_object parameter to users ([#​150](https://www.github.com/googleapis/python-bigquery/issues/150)) ([a2d5ce9](https://www.github.com/googleapis/python-bigquery/commit/a2d5ce9e97992318d7dc85c51c053cab74e25a11)) - **bigquery:** expose date_as_object parameter to users ([#​150](https://www.github.com/googleapis/python-bigquery/issues/150)) ([cbd831e](https://www.github.com/googleapis/python-bigquery/commit/cbd831e08024a67148723afd49e1db085e0a862c)) ##### Bug Fixes - dry run queries with DB API cursor ([#​128](https://www.github.com/googleapis/python-bigquery/issues/128)) ([bc33a67](https://www.github.com/googleapis/python-bigquery/commit/bc33a678a765f0232615aa2038b8cc67c88468a0)) - omit `NaN` values when uploading from `insert_rows_from_dataframe` ([#​170](https://www.github.com/googleapis/python-bigquery/issues/170)) ([f9f2f45](https://www.github.com/googleapis/python-bigquery/commit/f9f2f45bc009c03cd257441bd4b6beb1754e2177)) ##### Documentation - **bigquery:** add client thread-safety documentation ([#​132](https://www.github.com/googleapis/python-bigquery/issues/132)) ([fce76b3](https://www.github.com/googleapis/python-bigquery/commit/fce76b3776472b1da798df862a3405e659e35bab)) - **bigquery:** add docstring for conflict exception ([#​171](https://www.github.com/googleapis/python-bigquery/issues/171)) ([9c3409b](https://www.github.com/googleapis/python-bigquery/commit/9c3409bb06218bf499620544f8e92802df0cce47)) - **bigquery:** consistent use of optional keyword ([#​153](https://www.github.com/googleapis/python-bigquery/issues/153)) ([79d8c61](https://www.github.com/googleapis/python-bigquery/commit/79d8c61064cca18b596a24b6f738c7611721dd5c)) - **bigquery:** fix the broken docs ([#​139](https://www.github.com/googleapis/python-bigquery/issues/139)) ([3235255](https://www.github.com/googleapis/python-bigquery/commit/3235255cc5f483949f34d2e8ef13b372e8713782))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 5de21f7e9..f67eb3587 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.25.0 +google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 google-auth-oauthlib==0.4.1 ipython==7.16.1 matplotlib==3.3.0 From 7a6f71988375d0573fd506507877a39f4209c5a2 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 29 Jul 2020 05:06:04 +0200 Subject: [PATCH 04/20] chore(deps): update dependency llvmlite to <=0.33.0 for python >= 3.6 (#185) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Update | Change | |---|---|---| | [llvmlite](http://llvmlite.pydata.org) ([source](https://togithub.com/numba/llvmlite)) | minor | ` <= 0.31.0` -> `<=0.33.0` | --- ### Release Notes
numba/llvmlite ### [`v0.33.0`](https://togithub.com/numba/llvmlite/blob/master/CHANGE_LOG#v0330-June-10-2020) [Compare Source](https://togithub.com/numba/llvmlite/compare/v0.32.1...v0.33.0) This release upgrades to LLVM 9 and drops support for older LLVM versions. Pull requests: - PR [#​593](https://togithub.com/numba/llvmlite/issues/593): Fix CUDA with LLVM9 - PR [#​592](https://togithub.com/numba/llvmlite/issues/592): Fix meta.yaml - PR [#​591](https://togithub.com/numba/llvmlite/issues/591): buildscripts: Unpin wheel - PR [#​590](https://togithub.com/numba/llvmlite/issues/590): add python_requires to setup.py - PR [#​582](https://togithub.com/numba/llvmlite/issues/582): Adds override for LLVM version check, re-formats docs. - PR [#​581](https://togithub.com/numba/llvmlite/issues/581): Add FAQ entry on LLVM version support. - PR [#​580](https://togithub.com/numba/llvmlite/issues/580): Trove classifiers may be out of date. - PR [#​577](https://togithub.com/numba/llvmlite/issues/577): llvmlite wheel building fixes - PR [#​575](https://togithub.com/numba/llvmlite/issues/575): Update the release date - PR [#​548](https://togithub.com/numba/llvmlite/issues/548): Upgrade to LLVM9 - PR [#​521](https://togithub.com/numba/llvmlite/issues/521): Allow instructions to be removed from blocks Authors: - Graham Markall - Jan Vesely - Siu Kwan Lam (core dev) - Stuart Archibald (core dev) - Tim Babb - Valentin Haenel (core dev) ### [`v0.32.1`](https://togithub.com/numba/llvmlite/blob/master/CHANGE_LOG#v0321-May-7-2020) [Compare Source](https://togithub.com/numba/llvmlite/compare/v0.32.0...v0.32.1) This is a small patch release that addresses some packaging issues: Pull requests: - PR 580: Trove classifiers may be out of date. - PR 581: Add FAQ entry on LLVM version support. - PR 582: Adds override for LLVM version check, re-formats docs. Authors: - Stuart Archibald (core dev) - Valentin Haenel (core dev) ### [`v0.32.0`](https://togithub.com/numba/llvmlite/blob/master/CHANGE_LOG#v0320-Apr-16-2020) [Compare Source](https://togithub.com/numba/llvmlite/compare/v0.31.0...v0.32.0) The main changes in this release are the removal of specific code for Python 2 and Python <3.6, and making the code base PEP8 compliant. Pull requests: - PR [#​577](https://togithub.com/numba/llvmlite/issues/577): llvmlite wheel building fixes - PR [#​560](https://togithub.com/numba/llvmlite/issues/560): ENH: Better error message - PR [#​558](https://togithub.com/numba/llvmlite/issues/558): update install docs - PR [#​556](https://togithub.com/numba/llvmlite/issues/556): binding: Allow empty features list - PR [#​555](https://togithub.com/numba/llvmlite/issues/555): travis: Cleanup - PR [#​554](https://togithub.com/numba/llvmlite/issues/554): azure-pipelines: Bump VM images. - PR [#​552](https://togithub.com/numba/llvmlite/issues/552): Add paragraph on installing from sdist and on non-traditional platforms. - PR [#​551](https://togithub.com/numba/llvmlite/issues/551): Remove python 2, python < 3.6, fix up, add flake8 - PR [#​549](https://togithub.com/numba/llvmlite/issues/549): Miscalled method and missing parameter in the documentation - PR [#​547](https://togithub.com/numba/llvmlite/issues/547): Permit building on Visual Studio 2017 - PR [#​543](https://togithub.com/numba/llvmlite/issues/543): Update error message in LLVM version check. - PR [#​540](https://togithub.com/numba/llvmlite/issues/540): update to final release date for 0.31.0 Authors: - Arik Funke - Eric Larson - Jan Vesely - Shan Sikdar - Siu Kwan Lam (core dev) - Stan Seibert (core dev) - Stuart Archibald (core dev) - Vladislav Hrčka
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#googleapis/python-bigquery). --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 61e836a73..f391143d3 100644 --- a/setup.py +++ b/setup.py @@ -59,7 +59,8 @@ # llvmlite >= 0.32.0 cannot be installed on Python 3.5 and below # (building the wheel fails), thus needs to be restricted. # See: https://github.com/googleapis/python-bigquery/issues/78 - "llvmlite <= 0.31.0", + "llvmlite <= 0.33.0;python_version>='3.6'", + "llvmlite <= 0.31.0;python_version<'3.6'", ], } From f59fc9a482d9f9ae63e2b2bfc80b9a3481d09bde Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Thu, 30 Jul 2020 10:55:33 -0700 Subject: [PATCH 05/20] feat: add support for getting and setting table IAM policy (#144) --- google/cloud/bigquery/client.py | 58 +++++++++ google/cloud/bigquery/iam.py | 38 ++++++ tests/system.py | 49 ++++++++ tests/unit/test_client.py | 210 ++++++++++++++++++++++++++++++++ 4 files changed, 355 insertions(+) create mode 100644 google/cloud/bigquery/iam.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index a3d1b8846..651f0263e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -46,6 +46,7 @@ import google.api_core.client_options import google.api_core.exceptions +from google.api_core.iam import Policy from google.api_core import page_iterator import google.cloud._helpers from google.cloud import exceptions @@ -605,6 +606,63 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): ) return Dataset.from_api_repr(api_response) + def get_iam_policy( + self, table, requested_policy_version=1, retry=DEFAULT_RETRY, timeout=None, + ): + if not isinstance(table, (Table, TableReference)): + raise TypeError("table must be a Table or TableReference") + + if requested_policy_version != 1: + raise ValueError("only IAM policy version 1 is supported") + + body = {"options": {"requestedPolicyVersion": 1}} + + path = "{}:getIamPolicy".format(table.path) + + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) + + return Policy.from_api_repr(response) + + def set_iam_policy( + self, table, policy, updateMask=None, retry=DEFAULT_RETRY, timeout=None, + ): + if not isinstance(table, (Table, TableReference)): + raise TypeError("table must be a Table or TableReference") + + if not isinstance(policy, (Policy)): + raise TypeError("policy must be a Policy") + + body = {"policy": policy.to_api_repr()} + + if updateMask is not None: + body["updateMask"] = updateMask + + path = "{}:setIamPolicy".format(table.path) + + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) + + return Policy.from_api_repr(response) + + def test_iam_permissions( + self, table, permissions, retry=DEFAULT_RETRY, timeout=None, + ): + if not isinstance(table, (Table, TableReference)): + raise TypeError("table must be a Table or TableReference") + + body = {"permissions": permissions} + + path = "{}:testIamPermissions".format(table.path) + + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) + + return response + def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): """[Beta] Fetch the model referenced by ``model_ref``. diff --git a/google/cloud/bigquery/iam.py b/google/cloud/bigquery/iam.py new file mode 100644 index 000000000..df9db36b7 --- /dev/null +++ b/google/cloud/bigquery/iam.py @@ -0,0 +1,38 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BigQuery API IAM policy definitions + +For all allowed roles and permissions, see: + +https://cloud.google.com/bigquery/docs/access-control +""" + +# BigQuery-specific IAM roles available for tables and views + +BIGQUERY_DATA_EDITOR_ROLE = "roles/bigquery.dataEditor" +"""When applied to a table or view, this role provides permissions to +read and update data and metadata for the table or view.""" + +BIGQUERY_DATA_OWNER_ROLE = "roles/bigquery.dataOwner" +"""When applied to a table or view, this role provides permissions to +read and update data and metadata for the table or view, share the +table/view, and delete the table/view.""" + +BIGQUERY_DATA_VIEWER_ROLE = "roles/bigquery.dataViewer" +"""When applied to a table or view, this role provides permissions to +read data and metadata from the table or view.""" + +BIGQUERY_METADATA_VIEWER_ROLE = "roles/bigquery.metadataViewer" +"""When applied to a table or view, this role provides persmissions to +read metadata from the table or view.""" diff --git a/tests/system.py b/tests/system.py index cd5454a87..50e2dc7de 100644 --- a/tests/system.py +++ b/tests/system.py @@ -71,6 +71,7 @@ from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests +from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset @@ -1407,6 +1408,54 @@ def test_copy_table(self): got_rows = self._fetch_single_page(dest_table) self.assertTrue(len(got_rows) > 0) + def test_get_set_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_ref = Table(dataset.table(table_id)) + self.assertFalse(_table_exists(table_ref)) + + table = retry_403(Config.CLIENT.create_table)(table_ref) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + + member = "serviceAccount:{}".format(Config.CLIENT.get_service_account_email()) + BINDING = { + "role": BIGQUERY_DATA_VIEWER_ROLE, + "members": {member}, + } + + policy = Config.CLIENT.get_iam_policy(table) + self.assertIsInstance(policy, Policy) + self.assertEqual(policy.bindings, []) + + policy.bindings.append(BINDING) + returned_policy = Config.CLIENT.set_iam_policy(table, policy) + self.assertEqual(returned_policy.bindings, policy.bindings) + + def test_test_iam_permissions(self): + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_ref = Table(dataset.table(table_id)) + self.assertFalse(_table_exists(table_ref)) + + table = retry_403(Config.CLIENT.create_table)(table_ref) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + + # Test some default permissions. + permissions = [ + "bigquery.tables.get", + "bigquery.tables.getData", + "bigquery.tables.update", + ] + + response = Config.CLIENT.test_iam_permissions(table, [permissions]) + self.assertEqual(set(response["permissions"]), set(permissions)) + def test_job_cancel(self): DATASET_ID = _make_dataset_id("job_cancel") JOB_ID_PREFIX = "fetch_" + DATASET_ID diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 1f4d584b9..5687a27ec 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1748,6 +1748,216 @@ def test_get_table_sets_user_agent(self): ) self.assertIn("my-application/1.2.3", expected_user_agent) + def test_get_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + + PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + BODY = {"options": {"requestedPolicyVersion": 1}} + ETAG = "CARDI" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + RETURNED = { + "resourceId": PATH, + "etag": ETAG, + "version": VERSION, + "bindings": [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ], + } + EXPECTED = { + binding["role"]: set(binding["members"]) for binding in RETURNED["bindings"] + } + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + + self.assertIsInstance(policy, Policy) + self.assertEqual(policy.etag, RETURNED["etag"]) + self.assertEqual(policy.version, RETURNED["version"]) + self.assertEqual(dict(policy), EXPECTED) + + def test_get_iam_policy_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + + with self.assertRaises(TypeError): + client.get_iam_policy(table_resource_string) + + def test_get_iam_policy_w_invalid_version(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(ValueError): + client.get_iam_policy(self.TABLE_REF, requested_policy_version=2) + + def test_set_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + ETAG = "foo" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + BINDINGS = [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ] + MASK = "bindings,etag" + RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} + + policy = Policy() + for binding in BINDINGS: + policy[binding["role"]] = binding["members"] + + BODY = {"policy": policy.to_api_repr(), "updateMask": MASK} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + returned_policy = client.set_iam_policy( + self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 + ) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + self.assertEqual(returned_policy.etag, ETAG) + self.assertEqual(returned_policy.version, VERSION) + self.assertEqual(dict(returned_policy), dict(policy)) + + def test_set_iam_policy_no_mask(self): + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + RETURNED = {"etag": "foo", "version": 1, "bindings": []} + + policy = Policy() + BODY = {"policy": policy.to_api_repr()} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + + def test_set_iam_policy_invalid_policy(self): + from google.api_core.iam import Policy + + policy = Policy() + invalid_policy_repr = policy.to_api_repr() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(TypeError): + client.set_iam_policy(self.TABLE_REF, invalid_policy_repr) + + def test_set_iam_policy_w_invalid_table(self): + from google.api_core.iam import Policy + + policy = Policy() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + with self.assertRaises(TypeError): + client.set_iam_policy(table_resource_string, policy) + + def test_test_iam_permissions(self): + PATH = "/projects/%s/datasets/%s/tables/%s:testIamPermissions" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + BODY = {"permissions": PERMISSIONS} + RETURNED = {"permissions": PERMISSIONS} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + + def test_test_iam_permissions_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + + with self.assertRaises(TypeError): + client.test_iam_permissions(table_resource_string, PERMISSIONS) + def test_update_dataset_w_invalid_field(self): from google.cloud.bigquery.dataset import Dataset From 8fe725429541eed34ddc01cffc8b1ee846c14162 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 30 Jul 2020 22:08:47 +0200 Subject: [PATCH 06/20] fix: raise error if inserting rows with unknown fields (#163) Co-authored-by: Tres Seaver --- google/cloud/bigquery/_helpers.py | 31 +++++++++++++++++++++++- tests/unit/test__helpers.py | 40 +++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index d814eec8c..47851d42c 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import datetime import decimal import re +import six from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -419,9 +420,23 @@ def _record_field_to_json(fields, row_value): Returns: Mapping[str, Any]: A JSON-serializable dictionary. """ - record = {} isdict = isinstance(row_value, dict) + # If row is passed as a tuple, make the length sanity check to avoid either + # uninformative index errors a few lines below or silently omitting some of + # the values from the result (we cannot know exactly which fields are missing + # or redundant, since we don't have their names). + if not isdict and len(row_value) != len(fields): + msg = "The number of row fields ({}) does not match schema length ({}).".format( + len(row_value), len(fields) + ) + raise ValueError(msg) + + record = {} + + if isdict: + processed_fields = set() + for subindex, subfield in enumerate(fields): subname = subfield.name subvalue = row_value.get(subname) if isdict else row_value[subindex] @@ -430,6 +445,20 @@ def _record_field_to_json(fields, row_value): if subvalue is not None: record[subname] = _field_to_json(subfield, subvalue) + if isdict: + processed_fields.add(subname) + + # Unknown fields should not be silently dropped, include them. Since there + # is no schema information available for them, include them as strings + # to make them JSON-serializable. + if isdict: + not_processed = set(row_value.keys()) - processed_fields + + for field_name in not_processed: + value = row_value[field_name] + if value is not None: + record[field_name] = six.text_type(value) + return record diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index fa6d27c98..28ebe8144 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -18,6 +18,7 @@ import unittest import mock +import six class Test_not_null(unittest.TestCase): @@ -847,6 +848,26 @@ def test_w_non_empty_list(self): converted = self._call_fut(fields, original) self.assertEqual(converted, {"one": "42", "two": "two"}) + def test_w_list_missing_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = [42] + + with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + self._call_fut(fields, original) + + def test_w_list_too_many_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = [42, "two", "three"] + + with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + self._call_fut(fields, original) + def test_w_non_empty_dict(self): fields = [ _make_field("INT64", name="one", mode="NULLABLE"), @@ -890,6 +911,25 @@ def test_w_explicit_none_value(self): # None values should be dropped regardless of the field type self.assertEqual(converted, {"one": "42"}) + def test_w_dict_unknown_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = { + "whoami": datetime.date(2020, 7, 20), + "one": 111, + "two": "222", + "void": None, + } + + converted = self._call_fut(fields, original) + + # Unknown fields should be included (if not None), but converted as strings. + self.assertEqual( + converted, {"whoami": "2020-07-20", "one": "111", "two": "222"}, + ) + class Test_field_to_json(unittest.TestCase): def _call_fut(self, field, value): From 8360487582f245d20893acbe8cbf92d0fc4cd9d9 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 30 Jul 2020 17:54:05 -0400 Subject: [PATCH 07/20] tests: remove warning spew (#197) Fixes: #196 --- tests/unit/test__pandas_helpers.py | 21 +++++++++++-- tests/unit/test_client.py | 25 +++++++++++----- tests/unit/test_job.py | 46 ++++++++++++++++++++--------- tests/unit/test_magics.py | 6 ++-- tests/unit/test_table.py | 47 ++++++++++++++++++++++++++---- 5 files changed, 112 insertions(+), 33 deletions(-) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 4f4b5f447..e229e04a2 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -20,6 +20,7 @@ import warnings import mock +import six try: import pandas @@ -299,7 +300,10 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): ) ) assert pyarrow.types.is_struct(actual) - assert actual.num_children == len(fields) + try: + assert actual.num_fields == len(fields) + except AttributeError: # py27 + assert actual.num_children == len(fields) assert actual.equals(expected) @@ -344,7 +348,10 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): ) assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) - assert actual.value_type.num_children == len(fields) + try: + assert actual.value_type.num_fields == len(fields) + except AttributeError: # py27 + assert actual.value_type.num_children == len(fields) assert actual.value_type.equals(expected_value_type) @@ -542,9 +549,17 @@ def test_bq_to_arrow_schema_w_unknown_type(module_under_test): # instead. schema.SchemaField("field3", "UNKNOWN_TYPE"), ) - actual = module_under_test.bq_to_arrow_schema(fields) + with warnings.catch_warnings(record=True) as warned: + actual = module_under_test.bq_to_arrow_schema(fields) assert actual is None + if six.PY3: + assert len(warned) == 1 + warning = warned[0] + assert "field3" in str(warning) + else: + assert len(warned) == 0 + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_get_column_or_index_not_found(module_under_test): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 5687a27ec..198708281 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -221,7 +221,8 @@ def test__call_api_applying_custom_retry_on_timeout(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY - client = self._make_one(project=self.PROJECT) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) api_request_patcher = mock.patch.object( client._connection, "api_request", side_effect=[TimeoutError, "result"], @@ -674,7 +675,8 @@ def test_create_bqstorage_client(self): mock_client.assert_called_once_with(credentials=creds) def test_create_bqstorage_client_missing_dependency(self): - client = self._make_one(project=self.PROJECT) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) def fail_bqstorage_import(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import @@ -7680,17 +7682,24 @@ def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): ) with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch as to_parquet_spy: - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - location=self.LOCATION, - parquet_compression="gzip", - ) + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + parquet_compression="gzip", + ) call_args = to_parquet_spy.call_args assert call_args is not None assert call_args.kwargs.get("compression") == "gzip" + assert len(warned) == 2 + warning = warned[0] + assert "Loading dataframe data without pyarrow" in str(warning) + warning = warned[1] + assert "Please install the pyarrow package" in str(warning) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index 733445337..9cd3631e1 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -17,6 +17,7 @@ import json import textwrap import unittest +import warnings import freezegun import mock @@ -1834,26 +1835,34 @@ def test_time_partitioning_hit(self): "expirationMs": str(year_ms), "requirePartitionFilter": False, } - expected = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) + with warnings.catch_warnings(record=True) as warned: + expected = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) self.assertEqual(config.time_partitioning, expected) + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + def test_time_partitioning_setter(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType field = "creation_date" year_ms = 86400 * 1000 * 365 - time_partitioning = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) + + with warnings.catch_warnings(record=True) as warned: + time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + config = self._get_target_class()() config.time_partitioning = time_partitioning expected = { @@ -1864,6 +1873,10 @@ def test_time_partitioning_setter(self): } self.assertEqual(config._properties["load"]["timePartitioning"], expected) + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + def test_time_partitioning_setter_w_none(self): from google.cloud.bigquery.table import TimePartitioningType @@ -5595,7 +5608,10 @@ def test_to_dataframe_column_date_dtypes_wo_pyarrow(self): job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch("google.cloud.bigquery.table.pyarrow", None): - df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + with warnings.catch_warnings(record=True) as warned: + df = job.to_dataframe( + date_as_object=False, create_bqstorage_client=False + ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 1) # verify the number of rows @@ -5604,6 +5620,10 @@ def test_to_dataframe_column_date_dtypes_wo_pyarrow(self): self.assertEqual(df.date.dtype.name, "object") + assert len(warned) == 1 + warning = warned[0] + assert "without pyarrow" in str(warning) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index bd52f3f53..7b07626ad 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -399,7 +399,7 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) @@ -560,7 +560,7 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) @@ -624,7 +624,7 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 2c9d0f64e..28575bd43 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1778,7 +1778,8 @@ def test_to_arrow_w_unknown_type(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - tbl = row_iterator.to_arrow(create_bqstorage_client=False) + with warnings.catch_warnings(record=True) as warned: + tbl = row_iterator.to_arrow(create_bqstorage_client=False) self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -1799,6 +1800,10 @@ def test_to_arrow_w_unknown_type(self): self.assertEqual(ages, [33, 29]) self.assertEqual(sports, ["volleyball", "basketball"]) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("sport" in str(warning)) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2370,13 +2375,18 @@ def test_to_dataframe_progress_bar_wo_pyarrow( for progress_bar_type, progress_bar_mock in progress_bars: row_iterator = self._make_one(_mock_client(), api_request, path, schema) with mock.patch("google.cloud.bigquery.table.pyarrow", None): - df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) progress_bar_mock.assert_called() progress_bar_mock().update.assert_called() progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): @@ -2499,12 +2509,17 @@ def test_to_dataframe_w_empty_results_wo_pyarrow(self): api_request = mock.Mock(return_value={"rows": []}) row_iterator = self._make_one(_mock_client(), api_request, schema=schema) - df = row_iterator.to_dataframe() + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_no_results_wo_pyarrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2522,12 +2537,17 @@ def empty_iterable(dtypes=None): row_iterator.to_dataframe_iterable = empty_iterable - df = row_iterator.to_dataframe() + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime @@ -2787,11 +2807,19 @@ def test_to_dataframe_w_bqstorage_v1beta1_no_streams(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) - got = row_iterator.to_dataframe(bqstorage_client) + with warnings.catch_warnings(record=True) as warned: + got = row_iterator.to_dataframe(bqstorage_client) + column_names = ["colA", "colC", "colB"] self.assertEqual(list(got), column_names) self.assertTrue(got.empty) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue( + "Support for BigQuery Storage v1beta1 clients is deprecated" in str(warning) + ) + @unittest.skipIf( bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @@ -3493,7 +3521,10 @@ def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): row_iterator = self._make_one(_mock_client(), api_request, path, schema) - with mock.patch("google.cloud.bigquery.table.pyarrow", None): + mock_pyarrow = mock.patch("google.cloud.bigquery.table.pyarrow", None) + catch_warnings = warnings.catch_warnings(record=True) + + with mock_pyarrow, catch_warnings as warned: got = row_iterator.to_dataframe( dtypes={ "col_category": pandas.core.dtypes.dtypes.CategoricalDtype( @@ -3522,6 +3553,10 @@ def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): ["low", "medium", "low", "medium", "high", "low"], ) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From fee2ba80e338d093ee61565359268da91a5c9913 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 3 Aug 2020 10:17:00 +0530 Subject: [PATCH 08/20] feat: add support and tests for struct fields (#146) * feat(bigquery): add support and tests for struct fields * feat(bigquery): bump pyarrow version for python3 * feat(bigquery): nit --- google/cloud/bigquery/_pandas_helpers.py | 15 +++--- setup.py | 6 +-- tests/system.py | 44 ++++++++++++++++++ tests/unit/test_client.py | 58 ++++++++++++++++++++---- 4 files changed, 102 insertions(+), 21 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index ff6525399..953b7d0fe 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -287,13 +287,14 @@ def dataframe_to_bq_schema(dataframe, bq_schema): """ if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) - for field in bq_schema: - if field.field_type in schema._STRUCT_TYPES: - raise ValueError( - "Uploading dataframes with struct (record) column types " - "is not supported. See: " - "https://github.com/googleapis/google-cloud-python/issues/8191" - ) + if six.PY2: + for field in bq_schema: + if field.field_type in schema._STRUCT_TYPES: + raise ValueError( + "Uploading dataframes with struct (record) column types " + "is not supported under Python2. See: " + "https://github.com/googleapis/python-bigquery/issues/21" + ) bq_schema_index = {field.name: field for field in bq_schema} bq_schema_unused = set(bq_schema_index.keys()) else: diff --git a/setup.py b/setup.py index f391143d3..b00b2cbe5 100644 --- a/setup.py +++ b/setup.py @@ -47,10 +47,8 @@ ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. - 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': [ - # Bad Linux release for 0.14.0. - # https://issues.apache.org/jira/browse/ARROW-5868 - "pyarrow>=0.4.1, != 0.14.0" + 'pyarrow: platform_system != "Windows" or python_version >= "3.5"': [ + "pyarrow>=0.17.0" ], "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], "fastparquet": [ diff --git a/tests/system.py b/tests/system.py index 50e2dc7de..be79a6d20 100644 --- a/tests/system.py +++ b/tests/system.py @@ -131,6 +131,8 @@ PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0") PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") +PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version def _has_rows(result): @@ -1075,6 +1077,48 @@ def test_load_table_from_dataframe_w_explicit_schema(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 3) + @unittest.skipIf( + pyarrow is None or PYARROW_INSTALLED_VERSION < PYARROW_MINIMUM_VERSION, + "Only `pyarrow version >=0.17.0` is supported", + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_struct_datatype(self): + """Test that a DataFrame with struct datatype can be uploaded if a + BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/21 + """ + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format( + Config.CLIENT.project, dataset_id + ) + table_schema = [ + bigquery.SchemaField( + "bar", + "RECORD", + fields=[ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + mode="REQUIRED", + ), + ] + table = retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}] + dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"]) + + load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table_id) + self.assertEqual(table.schema, table_schema) + self.assertEqual(table.num_rows, 3) + def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 198708281..8b63f7e57 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7373,19 +7373,22 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_struct_fields_error(self): + def test_load_table_from_dataframe_struct_fields(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField client = self._make_client() - records = [{"float_column": 3.14, "struct_column": [{"foo": 1}, {"bar": -1}]}] - dataframe = pandas.DataFrame(data=records) + records = [(3.14, {"foo": 1, "bar": 1})] + dataframe = pandas.DataFrame( + data=records, columns=["float_column", "struct_column"] + ) schema = [ SchemaField("float_column", "FLOAT"), SchemaField( - "agg_col", + "struct_column", "RECORD", fields=[SchemaField("foo", "INTEGER"), SchemaField("bar", "INTEGER")], ), @@ -7396,14 +7399,49 @@ def test_load_table_from_dataframe_struct_fields_error(self): "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - with pytest.raises(ValueError) as exc_info, load_patch: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + if six.PY2: + with pytest.raises(ValueError) as exc_info, load_patch: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + job_config=job_config, + location=self.LOCATION, + ) + + err_msg = str(exc_info.value) + assert "struct" in err_msg + assert "not support" in err_msg + + else: + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + job_config=job_config, + location=self.LOCATION, + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, ) - err_msg = str(exc_info.value) - assert "struct" in err_msg - assert "not support" in err_msg + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") From b842fb5d1cf659a98cb317f1962804f1498a4799 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Tue, 4 Aug 2020 09:39:42 -0700 Subject: [PATCH 09/20] testing: split system tests into separate builds (#207) --- .gitignore | 3 +- .kokoro/build.sh | 8 +- .kokoro/docker/docs/Dockerfile | 98 ++++ .kokoro/docker/docs/fetch_gpg_keys.sh | 45 ++ .kokoro/docs/common.cfg | 21 +- .kokoro/docs/docs-presubmit.cfg | 17 + .kokoro/presubmit/presubmit.cfg | 8 +- .kokoro/presubmit/system-2.7.cfg | 7 + .kokoro/presubmit/system-3.8.cfg | 7 + .kokoro/publish-docs.sh | 39 +- .kokoro/trampoline_v2.sh | 487 ++++++++++++++++++ .trampolinerc | 51 ++ docs/conf.py | 4 + .../proto/encryption_config_pb2.py | 2 +- google/cloud/bigquery_v2/proto/model_pb2.py | 2 +- .../bigquery_v2/proto/model_reference_pb2.py | 2 +- .../bigquery_v2/proto/standard_sql_pb2.py | 2 +- noxfile.py | 37 ++ synth.metadata | 18 +- synth.py | 2 +- 20 files changed, 823 insertions(+), 37 deletions(-) create mode 100644 .kokoro/docker/docs/Dockerfile create mode 100755 .kokoro/docker/docs/fetch_gpg_keys.sh create mode 100644 .kokoro/docs/docs-presubmit.cfg create mode 100644 .kokoro/presubmit/system-2.7.cfg create mode 100644 .kokoro/presubmit/system-3.8.cfg create mode 100755 .kokoro/trampoline_v2.sh create mode 100644 .trampolinerc diff --git a/.gitignore b/.gitignore index b87e1ed58..b9daa52f1 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,7 @@ pip-log.txt # Built documentation docs/_build bigquery/docs/generated +docs.metadata # Virtual environment env/ @@ -57,4 +58,4 @@ system_tests/local_test_setup # Make sure a generated file isn't accidentally committed. pylintrc -pylintrc.test \ No newline at end of file +pylintrc.test diff --git a/.kokoro/build.sh b/.kokoro/build.sh index d3749e290..0e71e2aca 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -36,4 +36,10 @@ python3.6 -m pip uninstall --yes --quiet nox-automation python3.6 -m pip install --upgrade --quiet nox python3.6 -m nox --version -python3.6 -m nox +# If NOX_SESSION is set, it only runs the specified session, +# otherwise run all the sessions. +if [[ -n "${NOX_SESSION:-}" ]]; then + python3.6 -m nox -s "${NOX_SESSION:-}" +else + python3.6 -m nox +fi diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile new file mode 100644 index 000000000..412b0b56a --- /dev/null +++ b/.kokoro/docker/docs/Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ubuntu:20.04 + +ENV DEBIAN_FRONTEND noninteractive + +# Ensure local Python is preferred over distribution Python. +ENV PATH /usr/local/bin:$PATH + +# Install dependencies. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + apt-transport-https \ + build-essential \ + ca-certificates \ + curl \ + dirmngr \ + git \ + gpg-agent \ + graphviz \ + libbz2-dev \ + libdb5.3-dev \ + libexpat1-dev \ + libffi-dev \ + liblzma-dev \ + libreadline-dev \ + libsnappy-dev \ + libssl-dev \ + libsqlite3-dev \ + portaudio19-dev \ + redis-server \ + software-properties-common \ + ssh \ + sudo \ + tcl \ + tcl-dev \ + tk \ + tk-dev \ + uuid-dev \ + wget \ + zlib1g-dev \ + && add-apt-repository universe \ + && apt-get update \ + && apt-get -y install jq \ + && apt-get clean autoclean \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* \ + && rm -f /var/cache/apt/archives/*.deb + + +COPY fetch_gpg_keys.sh /tmp +# Install the desired versions of Python. +RUN set -ex \ + && export GNUPGHOME="$(mktemp -d)" \ + && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ + && /tmp/fetch_gpg_keys.sh \ + && for PYTHON_VERSION in 3.7.8 3.8.5; do \ + wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ + && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ + && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ + && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ + && mkdir -p /usr/src/python-${PYTHON_VERSION} \ + && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ + && rm python-${PYTHON_VERSION}.tar.xz \ + && cd /usr/src/python-${PYTHON_VERSION} \ + && ./configure \ + --enable-shared \ + # This works only on Python 2.7 and throws a warning on every other + # version, but seems otherwise harmless. + --enable-unicode=ucs4 \ + --with-system-ffi \ + --without-ensurepip \ + && make -j$(nproc) \ + && make install \ + && ldconfig \ + ; done \ + && rm -rf "${GNUPGHOME}" \ + && rm -rf /usr/src/python* \ + && rm -rf ~/.cache/ + +RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ + && python3.7 /tmp/get-pip.py \ + && python3.8 /tmp/get-pip.py \ + && rm /tmp/get-pip.py + +CMD ["python3.7"] diff --git a/.kokoro/docker/docs/fetch_gpg_keys.sh b/.kokoro/docker/docs/fetch_gpg_keys.sh new file mode 100755 index 000000000..d653dd868 --- /dev/null +++ b/.kokoro/docker/docs/fetch_gpg_keys.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A script to fetch gpg keys with retry. +# Avoid jinja parsing the file. +# + +function retry { + if [[ "${#}" -le 1 ]]; then + echo "Usage: ${0} retry_count commands.." + exit 1 + fi + local retries=${1} + local command="${@:2}" + until [[ "${retries}" -le 0 ]]; do + $command && return 0 + if [[ $? -ne 0 ]]; then + echo "command failed, retrying" + ((retries--)) + fi + done + return 1 +} + +# 3.6.9, 3.7.5 (Ned Deily) +retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ + 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D + +# 3.8.0 (Łukasz Langa) +retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ + E3FF2839C048B25C084DEBE9B26995E310250568 + +# diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg index 229abf075..8f9807f72 100644 --- a/.kokoro/docs/common.cfg +++ b/.kokoro/docs/common.cfg @@ -11,12 +11,12 @@ action { gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" # Configure the docker image for kokoro-trampoline. env_vars: { key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" + value: "gcr.io/cloud-devrel-kokoro-resources/python-lib-docs" } env_vars: { key: "TRAMPOLINE_BUILD_FILE" @@ -28,6 +28,23 @@ env_vars: { value: "docs-staging" } +env_vars: { + key: "V2_STAGING_BUCKET" + value: "docs-staging-v2-staging" +} + +# It will upload the docker image after successful builds. +env_vars: { + key: "TRAMPOLINE_IMAGE_UPLOAD" + value: "true" +} + +# It will always build the docker image. +env_vars: { + key: "TRAMPOLINE_DOCKERFILE" + value: ".kokoro/docker/docs/Dockerfile" +} + # Fetch the token needed for reporting release status to GitHub before_action { fetch_keystore { diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg new file mode 100644 index 000000000..111810782 --- /dev/null +++ b/.kokoro/docs/docs-presubmit.cfg @@ -0,0 +1,17 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "STAGING_BUCKET" + value: "gcloud-python-test" +} + +env_vars: { + key: "V2_STAGING_BUCKET" + value: "gcloud-python-test" +} + +# We only upload the image in the main `docs` build. +env_vars: { + key: "TRAMPOLINE_IMAGE_UPLOAD" + value: "false" +} diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index 8f43917d9..b158096f0 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -1 +1,7 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file +# Format: //devtools/kokoro/config/proto/build.proto + +# Disable system tests. +env_vars: { + key: "RUN_SYSTEM_TESTS" + value: "false" +} diff --git a/.kokoro/presubmit/system-2.7.cfg b/.kokoro/presubmit/system-2.7.cfg new file mode 100644 index 000000000..3b6523a19 --- /dev/null +++ b/.kokoro/presubmit/system-2.7.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-2.7" +} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.8.cfg b/.kokoro/presubmit/system-3.8.cfg new file mode 100644 index 000000000..f4bcee3db --- /dev/null +++ b/.kokoro/presubmit/system-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-3.8" +} \ No newline at end of file diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 309212789..8acb14e80 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -18,26 +18,16 @@ set -eo pipefail # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 -cd github/python-bigquery - -# Remove old nox -python3.6 -m pip uninstall --yes --quiet nox-automation +export PATH="${HOME}/.local/bin:${PATH}" # Install nox -python3.6 -m pip install --upgrade --quiet nox -python3.6 -m nox --version +python3 -m pip install --user --upgrade --quiet nox +python3 -m nox --version # build docs nox -s docs -python3 -m pip install gcp-docuploader - -# install a json parser -sudo apt-get update -sudo apt-get -y install software-properties-common -sudo add-apt-repository universe -sudo apt-get update -sudo apt-get -y install jq +python3 -m pip install --user gcp-docuploader # create metadata python3 -m docuploader create-metadata \ @@ -52,4 +42,23 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket docs-staging +python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" + + +# docfx yaml files +nox -s docfx + +# create metadata. +python3 -m docuploader create-metadata \ + --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ + --version=$(python3 setup.py --version) \ + --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ + --distribution-name=$(python3 setup.py --name) \ + --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ + --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ + --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) + +cat docs.metadata + +# upload docs +python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh new file mode 100755 index 000000000..719bcd5ba --- /dev/null +++ b/.kokoro/trampoline_v2.sh @@ -0,0 +1,487 @@ +#!/usr/bin/env bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# trampoline_v2.sh +# +# This script does 3 things. +# +# 1. Prepare the Docker image for the test +# 2. Run the Docker with appropriate flags to run the test +# 3. Upload the newly built Docker image +# +# in a way that is somewhat compatible with trampoline_v1. +# +# To run this script, first download few files from gcs to /dev/shm. +# (/dev/shm is passed into the container as KOKORO_GFILE_DIR). +# +# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/secrets_viewer_service_account.json /dev/shm +# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/automl_secrets.txt /dev/shm +# +# Then run the script. +# .kokoro/trampoline_v2.sh +# +# These environment variables are required: +# TRAMPOLINE_IMAGE: The docker image to use. +# TRAMPOLINE_DOCKERFILE: The location of the Dockerfile. +# +# You can optionally change these environment variables: +# TRAMPOLINE_IMAGE_UPLOAD: +# (true|false): Whether to upload the Docker image after the +# successful builds. +# TRAMPOLINE_BUILD_FILE: The script to run in the docker container. +# TRAMPOLINE_WORKSPACE: The workspace path in the docker container. +# Defaults to /workspace. +# Potentially there are some repo specific envvars in .trampolinerc in +# the project root. + + +set -euo pipefail + +TRAMPOLINE_VERSION="2.0.5" + +if command -v tput >/dev/null && [[ -n "${TERM:-}" ]]; then + readonly IO_COLOR_RED="$(tput setaf 1)" + readonly IO_COLOR_GREEN="$(tput setaf 2)" + readonly IO_COLOR_YELLOW="$(tput setaf 3)" + readonly IO_COLOR_RESET="$(tput sgr0)" +else + readonly IO_COLOR_RED="" + readonly IO_COLOR_GREEN="" + readonly IO_COLOR_YELLOW="" + readonly IO_COLOR_RESET="" +fi + +function function_exists { + [ $(LC_ALL=C type -t $1)"" == "function" ] +} + +# Logs a message using the given color. The first argument must be one +# of the IO_COLOR_* variables defined above, such as +# "${IO_COLOR_YELLOW}". The remaining arguments will be logged in the +# given color. The log message will also have an RFC-3339 timestamp +# prepended (in UTC). You can disable the color output by setting +# TERM=vt100. +function log_impl() { + local color="$1" + shift + local timestamp="$(date -u "+%Y-%m-%dT%H:%M:%SZ")" + echo "================================================================" + echo "${color}${timestamp}:" "$@" "${IO_COLOR_RESET}" + echo "================================================================" +} + +# Logs the given message with normal coloring and a timestamp. +function log() { + log_impl "${IO_COLOR_RESET}" "$@" +} + +# Logs the given message in green with a timestamp. +function log_green() { + log_impl "${IO_COLOR_GREEN}" "$@" +} + +# Logs the given message in yellow with a timestamp. +function log_yellow() { + log_impl "${IO_COLOR_YELLOW}" "$@" +} + +# Logs the given message in red with a timestamp. +function log_red() { + log_impl "${IO_COLOR_RED}" "$@" +} + +readonly tmpdir=$(mktemp -d -t ci-XXXXXXXX) +readonly tmphome="${tmpdir}/h" +mkdir -p "${tmphome}" + +function cleanup() { + rm -rf "${tmpdir}" +} +trap cleanup EXIT + +RUNNING_IN_CI="${RUNNING_IN_CI:-false}" + +# The workspace in the container, defaults to /workspace. +TRAMPOLINE_WORKSPACE="${TRAMPOLINE_WORKSPACE:-/workspace}" + +pass_down_envvars=( + # TRAMPOLINE_V2 variables. + # Tells scripts whether they are running as part of CI or not. + "RUNNING_IN_CI" + # Indicates which CI system we're in. + "TRAMPOLINE_CI" + # Indicates the version of the script. + "TRAMPOLINE_VERSION" +) + +log_yellow "Building with Trampoline ${TRAMPOLINE_VERSION}" + +# Detect which CI systems we're in. If we're in any of the CI systems +# we support, `RUNNING_IN_CI` will be true and `TRAMPOLINE_CI` will be +# the name of the CI system. Both envvars will be passing down to the +# container for telling which CI system we're in. +if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then + # descriptive env var for indicating it's on CI. + RUNNING_IN_CI="true" + TRAMPOLINE_CI="kokoro" + if [[ "${TRAMPOLINE_USE_LEGACY_SERVICE_ACCOUNT:-}" == "true" ]]; then + if [[ ! -f "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" ]]; then + log_red "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json does not exist. Did you forget to mount cloud-devrel-kokoro-resources/trampoline? Aborting." + exit 1 + fi + # This service account will be activated later. + TRAMPOLINE_SERVICE_ACCOUNT="${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" + else + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + gcloud auth list + fi + log_yellow "Configuring Container Registry access" + gcloud auth configure-docker --quiet + fi + pass_down_envvars+=( + # KOKORO dynamic variables. + "KOKORO_BUILD_NUMBER" + "KOKORO_BUILD_ID" + "KOKORO_JOB_NAME" + "KOKORO_GIT_COMMIT" + "KOKORO_GITHUB_COMMIT" + "KOKORO_GITHUB_PULL_REQUEST_NUMBER" + "KOKORO_GITHUB_PULL_REQUEST_COMMIT" + # For Build Cop Bot + "KOKORO_GITHUB_COMMIT_URL" + "KOKORO_GITHUB_PULL_REQUEST_URL" + ) +elif [[ "${TRAVIS:-}" == "true" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="travis" + pass_down_envvars+=( + "TRAVIS_BRANCH" + "TRAVIS_BUILD_ID" + "TRAVIS_BUILD_NUMBER" + "TRAVIS_BUILD_WEB_URL" + "TRAVIS_COMMIT" + "TRAVIS_COMMIT_MESSAGE" + "TRAVIS_COMMIT_RANGE" + "TRAVIS_JOB_NAME" + "TRAVIS_JOB_NUMBER" + "TRAVIS_JOB_WEB_URL" + "TRAVIS_PULL_REQUEST" + "TRAVIS_PULL_REQUEST_BRANCH" + "TRAVIS_PULL_REQUEST_SHA" + "TRAVIS_PULL_REQUEST_SLUG" + "TRAVIS_REPO_SLUG" + "TRAVIS_SECURE_ENV_VARS" + "TRAVIS_TAG" + ) +elif [[ -n "${GITHUB_RUN_ID:-}" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="github-workflow" + pass_down_envvars+=( + "GITHUB_WORKFLOW" + "GITHUB_RUN_ID" + "GITHUB_RUN_NUMBER" + "GITHUB_ACTION" + "GITHUB_ACTIONS" + "GITHUB_ACTOR" + "GITHUB_REPOSITORY" + "GITHUB_EVENT_NAME" + "GITHUB_EVENT_PATH" + "GITHUB_SHA" + "GITHUB_REF" + "GITHUB_HEAD_REF" + "GITHUB_BASE_REF" + ) +elif [[ "${CIRCLECI:-}" == "true" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="circleci" + pass_down_envvars+=( + "CIRCLE_BRANCH" + "CIRCLE_BUILD_NUM" + "CIRCLE_BUILD_URL" + "CIRCLE_COMPARE_URL" + "CIRCLE_JOB" + "CIRCLE_NODE_INDEX" + "CIRCLE_NODE_TOTAL" + "CIRCLE_PREVIOUS_BUILD_NUM" + "CIRCLE_PROJECT_REPONAME" + "CIRCLE_PROJECT_USERNAME" + "CIRCLE_REPOSITORY_URL" + "CIRCLE_SHA1" + "CIRCLE_STAGE" + "CIRCLE_USERNAME" + "CIRCLE_WORKFLOW_ID" + "CIRCLE_WORKFLOW_JOB_ID" + "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" + "CIRCLE_WORKFLOW_WORKSPACE_ID" + ) +fi + +# Configure the service account for pulling the docker image. +function repo_root() { + local dir="$1" + while [[ ! -d "${dir}/.git" ]]; do + dir="$(dirname "$dir")" + done + echo "${dir}" +} + +# Detect the project root. In CI builds, we assume the script is in +# the git tree and traverse from there, otherwise, traverse from `pwd` +# to find `.git` directory. +if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + PROGRAM_PATH="$(realpath "$0")" + PROGRAM_DIR="$(dirname "${PROGRAM_PATH}")" + PROJECT_ROOT="$(repo_root "${PROGRAM_DIR}")" +else + PROJECT_ROOT="$(repo_root $(pwd))" +fi + +log_yellow "Changing to the project root: ${PROJECT_ROOT}." +cd "${PROJECT_ROOT}" + +# To support relative path for `TRAMPOLINE_SERVICE_ACCOUNT`, we need +# to use this environment variable in `PROJECT_ROOT`. +if [[ -n "${TRAMPOLINE_SERVICE_ACCOUNT:-}" ]]; then + + mkdir -p "${tmpdir}/gcloud" + gcloud_config_dir="${tmpdir}/gcloud" + + log_yellow "Using isolated gcloud config: ${gcloud_config_dir}." + export CLOUDSDK_CONFIG="${gcloud_config_dir}" + + log_yellow "Using ${TRAMPOLINE_SERVICE_ACCOUNT} for authentication." + gcloud auth activate-service-account \ + --key-file "${TRAMPOLINE_SERVICE_ACCOUNT}" + log_yellow "Configuring Container Registry access" + gcloud auth configure-docker --quiet +fi + +required_envvars=( + # The basic trampoline configurations. + "TRAMPOLINE_IMAGE" + "TRAMPOLINE_BUILD_FILE" +) + +if [[ -f "${PROJECT_ROOT}/.trampolinerc" ]]; then + source "${PROJECT_ROOT}/.trampolinerc" +fi + +log_yellow "Checking environment variables." +for e in "${required_envvars[@]}" +do + if [[ -z "${!e:-}" ]]; then + log "Missing ${e} env var. Aborting." + exit 1 + fi +done + +# We want to support legacy style TRAMPOLINE_BUILD_FILE used with V1 +# script: e.g. "github/repo-name/.kokoro/run_tests.sh" +TRAMPOLINE_BUILD_FILE="${TRAMPOLINE_BUILD_FILE#github/*/}" +log_yellow "Using TRAMPOLINE_BUILD_FILE: ${TRAMPOLINE_BUILD_FILE}" + +# ignore error on docker operations and test execution +set +e + +log_yellow "Preparing Docker image." +# We only download the docker image in CI builds. +if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + # Download the docker image specified by `TRAMPOLINE_IMAGE` + + # We may want to add --max-concurrent-downloads flag. + + log_yellow "Start pulling the Docker image: ${TRAMPOLINE_IMAGE}." + if docker pull "${TRAMPOLINE_IMAGE}"; then + log_green "Finished pulling the Docker image: ${TRAMPOLINE_IMAGE}." + has_image="true" + else + log_red "Failed pulling the Docker image: ${TRAMPOLINE_IMAGE}." + has_image="false" + fi +else + # For local run, check if we have the image. + if docker images "${TRAMPOLINE_IMAGE}:latest" | grep "${TRAMPOLINE_IMAGE}"; then + has_image="true" + else + has_image="false" + fi +fi + + +# The default user for a Docker container has uid 0 (root). To avoid +# creating root-owned files in the build directory we tell docker to +# use the current user ID. +user_uid="$(id -u)" +user_gid="$(id -g)" +user_name="$(id -un)" + +# To allow docker in docker, we add the user to the docker group in +# the host os. +docker_gid=$(cut -d: -f3 < <(getent group docker)) + +update_cache="false" +if [[ "${TRAMPOLINE_DOCKERFILE:-none}" != "none" ]]; then + # Build the Docker image from the source. + context_dir=$(dirname "${TRAMPOLINE_DOCKERFILE}") + docker_build_flags=( + "-f" "${TRAMPOLINE_DOCKERFILE}" + "-t" "${TRAMPOLINE_IMAGE}" + "--build-arg" "UID=${user_uid}" + "--build-arg" "USERNAME=${user_name}" + ) + if [[ "${has_image}" == "true" ]]; then + docker_build_flags+=("--cache-from" "${TRAMPOLINE_IMAGE}") + fi + + log_yellow "Start building the docker image." + if [[ "${TRAMPOLINE_VERBOSE:-false}" == "true" ]]; then + echo "docker build" "${docker_build_flags[@]}" "${context_dir}" + fi + + # ON CI systems, we want to suppress docker build logs, only + # output the logs when it fails. + if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + if docker build "${docker_build_flags[@]}" "${context_dir}" \ + > "${tmpdir}/docker_build.log" 2>&1; then + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + cat "${tmpdir}/docker_build.log" + fi + + log_green "Finished building the docker image." + update_cache="true" + else + log_red "Failed to build the Docker image, aborting." + log_yellow "Dumping the build logs:" + cat "${tmpdir}/docker_build.log" + exit 1 + fi + else + if docker build "${docker_build_flags[@]}" "${context_dir}"; then + log_green "Finished building the docker image." + update_cache="true" + else + log_red "Failed to build the Docker image, aborting." + exit 1 + fi + fi +else + if [[ "${has_image}" != "true" ]]; then + log_red "We do not have ${TRAMPOLINE_IMAGE} locally, aborting." + exit 1 + fi +fi + +# We use an array for the flags so they are easier to document. +docker_flags=( + # Remove the container after it exists. + "--rm" + + # Use the host network. + "--network=host" + + # Run in priviledged mode. We are not using docker for sandboxing or + # isolation, just for packaging our dev tools. + "--privileged" + + # Run the docker script with the user id. Because the docker image gets to + # write in ${PWD} you typically want this to be your user id. + # To allow docker in docker, we need to use docker gid on the host. + "--user" "${user_uid}:${docker_gid}" + + # Pass down the USER. + "--env" "USER=${user_name}" + + # Mount the project directory inside the Docker container. + "--volume" "${PROJECT_ROOT}:${TRAMPOLINE_WORKSPACE}" + "--workdir" "${TRAMPOLINE_WORKSPACE}" + "--env" "PROJECT_ROOT=${TRAMPOLINE_WORKSPACE}" + + # Mount the temporary home directory. + "--volume" "${tmphome}:/h" + "--env" "HOME=/h" + + # Allow docker in docker. + "--volume" "/var/run/docker.sock:/var/run/docker.sock" + + # Mount the /tmp so that docker in docker can mount the files + # there correctly. + "--volume" "/tmp:/tmp" + # Pass down the KOKORO_GFILE_DIR and KOKORO_KEYSTORE_DIR + # TODO(tmatsuo): This part is not portable. + "--env" "TRAMPOLINE_SECRET_DIR=/secrets" + "--volume" "${KOKORO_GFILE_DIR:-/dev/shm}:/secrets/gfile" + "--env" "KOKORO_GFILE_DIR=/secrets/gfile" + "--volume" "${KOKORO_KEYSTORE_DIR:-/dev/shm}:/secrets/keystore" + "--env" "KOKORO_KEYSTORE_DIR=/secrets/keystore" +) + +# Add an option for nicer output if the build gets a tty. +if [[ -t 0 ]]; then + docker_flags+=("-it") +fi + +# Passing down env vars +for e in "${pass_down_envvars[@]}" +do + if [[ -n "${!e:-}" ]]; then + docker_flags+=("--env" "${e}=${!e}") + fi +done + +# If arguments are given, all arguments will become the commands run +# in the container, otherwise run TRAMPOLINE_BUILD_FILE. +if [[ $# -ge 1 ]]; then + log_yellow "Running the given commands '" "${@:1}" "' in the container." + readonly commands=("${@:1}") + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" + fi + docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" +else + log_yellow "Running the tests in a Docker container." + docker_flags+=("--entrypoint=${TRAMPOLINE_BUILD_FILE}") + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" + fi + docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" +fi + + +test_retval=$? + +if [[ ${test_retval} -eq 0 ]]; then + log_green "Build finished with ${test_retval}" +else + log_red "Build finished with ${test_retval}" +fi + +# Only upload it when the test passes. +if [[ "${update_cache}" == "true" ]] && \ + [[ $test_retval == 0 ]] && \ + [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]]; then + log_yellow "Uploading the Docker image." + if docker push "${TRAMPOLINE_IMAGE}"; then + log_green "Finished uploading the Docker image." + else + log_red "Failed uploading the Docker image." + fi + # Call trampoline_after_upload_hook if it's defined. + if function_exists trampoline_after_upload_hook; then + trampoline_after_upload_hook + fi + +fi + +exit "${test_retval}" diff --git a/.trampolinerc b/.trampolinerc new file mode 100644 index 000000000..995ee2911 --- /dev/null +++ b/.trampolinerc @@ -0,0 +1,51 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Template for .trampolinerc + +# Add required env vars here. +required_envvars+=( + "STAGING_BUCKET" + "V2_STAGING_BUCKET" +) + +# Add env vars which are passed down into the container here. +pass_down_envvars+=( + "STAGING_BUCKET" + "V2_STAGING_BUCKET" +) + +# Prevent unintentional override on the default image. +if [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]] && \ + [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then + echo "Please set TRAMPOLINE_IMAGE if you want to upload the Docker image." + exit 1 +fi + +# Define the default value if it makes sense. +if [[ -z "${TRAMPOLINE_IMAGE_UPLOAD:-}" ]]; then + TRAMPOLINE_IMAGE_UPLOAD="" +fi + +if [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then + TRAMPOLINE_IMAGE="" +fi + +if [[ -z "${TRAMPOLINE_DOCKERFILE:-}" ]]; then + TRAMPOLINE_DOCKERFILE="" +fi + +if [[ -z "${TRAMPOLINE_BUILD_FILE:-}" ]]; then + TRAMPOLINE_BUILD_FILE="" +fi diff --git a/docs/conf.py b/docs/conf.py index 251e1f4ca..155606c97 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,6 +20,10 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) +# For plugins that can not read conf.py. +# See also: https://github.com/docascode/sphinx-docfx-yaml/issues/85 +sys.path.insert(0, os.path.abspath(".")) + __version__ = "" # -- General configuration ------------------------------------------------ diff --git a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/google/cloud/bigquery_v2/proto/encryption_config_pb2.py index 5ae21ea6f..5147743b6 100644 --- a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ b/google/cloud/bigquery_v2/proto/encryption_config_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/encryption_config.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/google/cloud/bigquery_v2/proto/model_pb2.py b/google/cloud/bigquery_v2/proto/model_pb2.py index 7b66be8f7..f485c4568 100644 --- a/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/google/cloud/bigquery_v2/proto/model_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/google/cloud/bigquery_v2/proto/model_reference_pb2.py index 2411c4863..07d7e4c4b 100644 --- a/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ b/google/cloud/bigquery_v2/proto/model_reference_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model_reference.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index bfe77f934..15f6715a2 100644 --- a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/standard_sql.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/noxfile.py b/noxfile.py index bb6a10e1e..4664278f1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -73,6 +73,10 @@ def unit(session): def system(session): """Run the system test suite.""" + # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. + if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": + session.skip("RUN_SYSTEM_TESTS is set to false, skipping") + # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable.") @@ -190,3 +194,36 @@ def docs(session): os.path.join("docs", ""), os.path.join("docs", "_build", "html", ""), ) + + +@nox.session(python="3.8") +def docfx(session): + """Build the docfx yaml files for this library.""" + + session.install("-e", ".") + session.install("sphinx", "alabaster", "recommonmark", "sphinx-docfx-yaml") + + shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run( + "sphinx-build", + "-T", # show full traceback on exception + "-N", # no colors + "-D", + ( + "extensions=sphinx.ext.autodoc," + "sphinx.ext.autosummary," + "docfx_yaml.extension," + "sphinx.ext.intersphinx," + "sphinx.ext.coverage," + "sphinx.ext.napoleon," + "sphinx.ext.todo," + "sphinx.ext.viewcode," + "recommonmark" + ), + "-b", + "html", + "-d", + os.path.join("docs", "_build", "doctrees", ""), + os.path.join("docs", ""), + os.path.join("docs", "_build", "html", ""), + ) diff --git a/synth.metadata b/synth.metadata index b7e46157b..46c63367d 100644 --- a/synth.metadata +++ b/synth.metadata @@ -3,22 +3,16 @@ { "git": { "name": ".", - "remote": "git@github.com:googleapis/python-bigquery.git", - "sha": "416c0daf40e481c80fb5327b48baa915f0e7aa2f" + "remote": "git@github.com:tmatsuo/python-bigquery.git", + "sha": "5ed817523a85a6f332951e10c0bf7dbb86d7e1cf" } }, { "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "b8ff6a41e195cda91bbfb20d9f11d5e58b7cc218" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "b8ff6a41e195cda91bbfb20d9f11d5e58b7cc218" + "name": "googleapis", + "remote": "https://github.com/googleapis/googleapis.git", + "sha": "868615a5c1c1059c636bb3d82a555edb1d5a251e", + "internalRef": "324294521" } } ], diff --git a/synth.py b/synth.py index 5125c398e..ac20c9aec 100644 --- a/synth.py +++ b/synth.py @@ -59,7 +59,7 @@ # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- -templated_files = common.py_library(cov_level=100, samples=True) +templated_files = common.py_library(cov_level=100, samples=True, split_system_tests=True) # BigQuery has a custom multiprocessing note s.move(templated_files, excludes=["noxfile.py", "docs/multiprocessing.rst"]) From af2c987fedca61d2b6d0ccdcef64455002ad684f Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 5 Aug 2020 23:53:41 +0200 Subject: [PATCH 10/20] chore(deps): pin ipython to v7.17.0 for samples, python >= 3.7 (#208) Co-authored-by: Tres Seaver Co-authored-by: Takashi Matsuo --- samples/snippets/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f67eb3587..ea84f6bac 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,6 @@ google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 google-auth-oauthlib==0.4.1 -ipython==7.16.1 +ipython==7.16.1; python_version < '3.7' +ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.0 pytz==2020.1 From bae233770463db697df63dc66f9a6f54bd10d437 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Mon, 10 Aug 2020 15:50:26 -0400 Subject: [PATCH 11/20] chore: fix docs build broken by Sphinx 3.2.0 (#218) Closes #217. --- google/cloud/bigquery/query.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 0f4c80686..f2ed6337e 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -86,8 +86,8 @@ class ScalarQueryParameter(_AbstractQueryParameter): 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, - datetime.datetime, datetime.date]): The scalar parameter value. + value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + The scalar parameter value. """ def __init__(self, name, type_, value): @@ -105,9 +105,8 @@ def positional(cls, type_, value): 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, - datetime.datetime, - datetime.date]): The scalar parameter value. + value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + The scalar parameter value. Returns: google.cloud.bigquery.query.ScalarQueryParameter: Instance without name From 478597a38167fa57b60ae7f65b581f3fe75ddc7c Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Fri, 14 Aug 2020 10:24:19 +0530 Subject: [PATCH 12/20] feat(bigquery): add client_options to base class (#216) * feat(bigquery): add client_options to base class * chore: bump g-c-c to 1.4.1 --- google/cloud/bigquery/client.py | 5 ++++- setup.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 651f0263e..52ddffe7d 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -177,7 +177,10 @@ def __init__( client_options=None, ): super(Client, self).__init__( - project=project, credentials=credentials, _http=_http + project=project, + credentials=credentials, + client_options=client_options, + _http=_http, ) kw_args = {"client_info": client_info} diff --git a/setup.py b/setup.py index b00b2cbe5..fe6143557 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ dependencies = [ 'enum34; python_version < "3.4"', "google-api-core >= 1.21.0, < 2.0dev", - "google-cloud-core >= 1.1.0, < 2.0dev", + "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.5.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", ] From 8209203e967f0624ad306166c0af6f6f1027c550 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 15 Aug 2020 14:20:04 +0200 Subject: [PATCH 13/20] fix: converting to dataframe with out of bounds timestamps (#209) Fixes #168. This PR fixes the problem when converting query results to Pandas with `pyarrow` when data contains timestamps that would fall out of `pyarrow`'s nanoseconds precision. The fix requires `pyarrow>=1.0.0`, thus it only works on Python 3. ### PR checklist - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- google/cloud/bigquery/table.py | 31 ++++++++++++++++- setup.py | 4 ++- tests/unit/test_table.py | 63 ++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 10b4198d3..d9e5f7773 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -21,6 +21,7 @@ import functools import logging import operator +import pytz import warnings import six @@ -1726,7 +1727,35 @@ def to_dataframe( bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, ) - df = record_batch.to_pandas(date_as_object=date_as_object) + + # When converting timestamp values to nanosecond precision, the result + # can be out of pyarrow bounds. To avoid the error when converting to + # Pandas, we set the timestamp_as_object parameter to True, if necessary. + # + # NOTE: Python 3+ only, as timestamp_as_object parameter is only supported + # in pyarrow>=1.0, but the latter is not compatible with Python 2. + if six.PY2: + extra_kwargs = {} + else: + types_to_check = { + pyarrow.timestamp("us"), + pyarrow.timestamp("us", tz=pytz.UTC), + } + + for column in record_batch: + if column.type in types_to_check: + try: + column.cast("timestamp[ns]") + except pyarrow.lib.ArrowInvalid: + timestamp_as_object = True + break + else: + timestamp_as_object = False + + extra_kwargs = {"timestamp_as_object": timestamp_as_object} + + df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) + for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) return df diff --git a/setup.py b/setup.py index fe6143557..389517277 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,9 @@ "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. 'pyarrow: platform_system != "Windows" or python_version >= "3.5"': [ - "pyarrow>=0.17.0" + "pyarrow>=1.0.0, <2.0dev; python_version>='3.4'", + # Pyarrow >= 0.17.0 is not compatible with Python 2 anymore. + "pyarrow < 0.17.0; python_version < '3.0'", ], "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], "fastparquet": [ diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 28575bd43..80223e8e1 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime as dt import itertools import logging import time @@ -2271,6 +2272,68 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") + @pytest.mark.xfail( + six.PY2, + reason=( + "Requires pyarrow>-1.0 to work, but the latter is not compatible " + "with Python 2 anymore." + ), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField("some_timestamp", "TIMESTAMP")] + rows = [ + {"f": [{"v": "81953424000.0"}]}, # 4567-01-01 00:00:00 UTC + {"f": [{"v": "253402214400.0"}]}, # 9999-12-31 00:00:00 UTC + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe(create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 2) # verify the number of rows + self.assertEqual(list(df.columns), ["some_timestamp"]) + self.assertEqual( + list(df["some_timestamp"]), + [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + ) + + @pytest.mark.xfail( + six.PY2, + reason=( + "Requires pyarrow>-1.0 to work, but the latter is not compatible " + "with Python 2 anymore." + ), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField("some_datetime", "DATETIME")] + rows = [ + {"f": [{"v": "4567-01-01T00:00:00"}]}, + {"f": [{"v": "9999-12-31T00:00:00"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe(create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 2) # verify the number of rows + self.assertEqual(list(df.columns), ["some_datetime"]) + self.assertEqual( + list(df["some_datetime"]), + [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_warning_wo_pyarrow(self): from google.cloud.bigquery.client import PyarrowMissingWarning From f947bed3f6a025cdec635e6b33c679c317a3a074 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 17 Aug 2020 15:32:14 -0400 Subject: [PATCH 14/20] chore: release 1.27.0 (#210) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 15 +++++++++++++++ setup.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a209dbaf5..fb41f761b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [1.27.0](https://www.github.com/googleapis/python-bigquery/compare/v1.26.1...v1.27.0) (2020-08-15) + + +### Features + +* add support and tests for struct fields ([#146](https://www.github.com/googleapis/python-bigquery/issues/146)) ([fee2ba8](https://www.github.com/googleapis/python-bigquery/commit/fee2ba80e338d093ee61565359268da91a5c9913)) +* add support for getting and setting table IAM policy ([#144](https://www.github.com/googleapis/python-bigquery/issues/144)) ([f59fc9a](https://www.github.com/googleapis/python-bigquery/commit/f59fc9a482d9f9ae63e2b2bfc80b9a3481d09bde)) +* **bigquery:** add client_options to base class ([#216](https://www.github.com/googleapis/python-bigquery/issues/216)) ([478597a](https://www.github.com/googleapis/python-bigquery/commit/478597a38167fa57b60ae7f65b581f3fe75ddc7c)) + + +### Bug Fixes + +* converting to dataframe with out of bounds timestamps ([#209](https://www.github.com/googleapis/python-bigquery/issues/209)) ([8209203](https://www.github.com/googleapis/python-bigquery/commit/8209203e967f0624ad306166c0af6f6f1027c550)), closes [#168](https://www.github.com/googleapis/python-bigquery/issues/168) +* raise error if inserting rows with unknown fields ([#163](https://www.github.com/googleapis/python-bigquery/issues/163)) ([8fe7254](https://www.github.com/googleapis/python-bigquery/commit/8fe725429541eed34ddc01cffc8b1ee846c14162)) + ### [1.26.1](https://www.github.com/googleapis/python-bigquery/compare/v1.26.0...v1.26.1) (2020-07-25) ### Documentation diff --git a/setup.py b/setup.py index 389517277..0a8f6685e 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.26.1" +version = "1.27.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 8bf853377edfa701dfcff6f67c06318b794e41a1 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 17 Aug 2020 22:08:06 +0200 Subject: [PATCH 15/20] chore(deps): update dependency matplotlib to v3.3.1 (#224) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [matplotlib](https://matplotlib.org) ([source](https://togithub.com/matplotlib/matplotlib)) | patch | `==3.3.0` -> `==3.3.1` | --- ### Release Notes
matplotlib/matplotlib ### [`v3.3.1`](https://togithub.com/matplotlib/matplotlib/releases/v3.3.1) [Compare Source](https://togithub.com/matplotlib/matplotlib/compare/v3.3.0...v3.3.1) This is the first bugfix release of the 3.3.x series. This release contains several critical bug-fixes: - fix docstring import issues when running Python with optimization - fix `hist` with categorical data, such as with Pandas - fix install on BSD systems - fix nbagg compatibility with Chrome 84+ - fix ordering of scatter marker size in 3D plots - fix performance regression when plotting `Path`s - fix reading from URL in `imread` - fix several regressions with new date epoch handling - fix some bad constrained and tight layout interactions with colorbars - fix use of customized toolbars in TkAgg and WXAgg backends
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ea84f6bac..7fe839119 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,5 +2,5 @@ google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 google-auth-oauthlib==0.4.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.0 +matplotlib==3.3.1 pytz==2020.1 From 6754a76bc6e8bbf25c5eccd6f21ca6cdc8100724 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 17 Aug 2020 22:35:57 +0200 Subject: [PATCH 16/20] chore(deps): update dependency llvmlite to <=0.34.0 (#223) Co-authored-by: Tres Seaver --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 0a8f6685e..77f32044b 100644 --- a/setup.py +++ b/setup.py @@ -59,8 +59,8 @@ # llvmlite >= 0.32.0 cannot be installed on Python 3.5 and below # (building the wheel fails), thus needs to be restricted. # See: https://github.com/googleapis/python-bigquery/issues/78 - "llvmlite <= 0.33.0;python_version>='3.6'", - "llvmlite <= 0.31.0;python_version<'3.6'", + "llvmlite<=0.34.0;python_version>='3.6'", + "llvmlite<=0.31.0;python_version<'3.6'", ], } From c15efbd1ee4488898fc862768eef701443f492f6 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 18 Aug 2020 07:01:24 -0400 Subject: [PATCH 17/20] fix: tweak pyarrow extra to soothe PyPI (#230) Release-As: 1.27.1 --- setup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 77f32044b..32d150774 100644 --- a/setup.py +++ b/setup.py @@ -47,8 +47,11 @@ ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. - 'pyarrow: platform_system != "Windows" or python_version >= "3.5"': [ - "pyarrow>=1.0.0, <2.0dev; python_version>='3.4'", + 'pyarrow: platform_system == "Windows"': [ + "pyarrow>=1.0.0, <2.0dev; python_version>='3.5'", + ], + 'pyarrow: platform_system != "Windows"': [ + "pyarrow>=1.0.0, <2.0dev; python_version>='3.5'", # Pyarrow >= 0.17.0 is not compatible with Python 2 anymore. "pyarrow < 0.17.0; python_version < '3.0'", ], From 06a581545040f961a21e92fbe9db640a47829ea2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 18 Aug 2020 11:28:06 +0000 Subject: [PATCH 18/20] chore: release 1.27.1 (#231) :robot: I have created a release \*beep\* \*boop\* --- ### [1.27.1](https://www.github.com/googleapis/python-bigquery/compare/v1.27.0...v1.27.1) (2020-08-18) ### Bug Fixes * tweak pyarrow extra to soothe PyPI ([#230](https://www.github.com/googleapis/python-bigquery/issues/230)) ([c15efbd](https://www.github.com/googleapis/python-bigquery/commit/c15efbd1ee4488898fc862768eef701443f492f6)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). --- CHANGELOG.md | 7 +++++++ setup.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb41f761b..41bbda18a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [1.27.1](https://www.github.com/googleapis/python-bigquery/compare/v1.27.0...v1.27.1) (2020-08-18) + + +### Bug Fixes + +* tweak pyarrow extra to soothe PyPI ([#230](https://www.github.com/googleapis/python-bigquery/issues/230)) ([c15efbd](https://www.github.com/googleapis/python-bigquery/commit/c15efbd1ee4488898fc862768eef701443f492f6)) + ## [1.27.0](https://www.github.com/googleapis/python-bigquery/compare/v1.26.1...v1.27.0) (2020-08-15) diff --git a/setup.py b/setup.py index 32d150774..02a89f607 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.27.0" +version = "1.27.1" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From c9a0567f59491b769a9e2fd535430423e39d4fa8 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 18 Aug 2020 16:15:17 -0400 Subject: [PATCH 19/20] fix: rationalize platform constraints for 'pyarrow' extra (#235) Release-As: 1.27.2 --- setup.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 02a89f607..22bc6a874 100644 --- a/setup.py +++ b/setup.py @@ -47,13 +47,10 @@ ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. - 'pyarrow: platform_system == "Windows"': [ - "pyarrow>=1.0.0, <2.0dev; python_version>='3.5'", - ], - 'pyarrow: platform_system != "Windows"': [ - "pyarrow>=1.0.0, <2.0dev; python_version>='3.5'", + "pyarrow": [ + "pyarrow >= 1.0.0, < 2.0dev; python_version >= '3.5'", # Pyarrow >= 0.17.0 is not compatible with Python 2 anymore. - "pyarrow < 0.17.0; python_version < '3.0'", + "pyarrow < 0.17.0; python_version < '3.0' and platform_system != 'Windows'", ], "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], "fastparquet": [ From 1c601aad7fc08ff2244554e21e73118783ff88e5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 18 Aug 2020 20:44:02 +0000 Subject: [PATCH 20/20] chore: release 1.27.2 (#236) :robot: I have created a release \*beep\* \*boop\* --- ### [1.27.2](https://www.github.com/googleapis/python-bigquery/compare/v1.27.1...v1.27.2) (2020-08-18) ### Bug Fixes * rationalize platform constraints for 'pyarrow' extra ([#235](https://www.github.com/googleapis/python-bigquery/issues/235)) ([c9a0567](https://www.github.com/googleapis/python-bigquery/commit/c9a0567f59491b769a9e2fd535430423e39d4fa8)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). --- CHANGELOG.md | 7 +++++++ setup.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41bbda18a..5ef22e8d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [1.27.2](https://www.github.com/googleapis/python-bigquery/compare/v1.27.1...v1.27.2) (2020-08-18) + + +### Bug Fixes + +* rationalize platform constraints for 'pyarrow' extra ([#235](https://www.github.com/googleapis/python-bigquery/issues/235)) ([c9a0567](https://www.github.com/googleapis/python-bigquery/commit/c9a0567f59491b769a9e2fd535430423e39d4fa8)) + ### [1.27.1](https://www.github.com/googleapis/python-bigquery/compare/v1.27.0...v1.27.1) (2020-08-18) diff --git a/setup.py b/setup.py index 22bc6a874..18bb78926 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.27.1" +version = "1.27.2" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta'