From 7cf4f32abde1436fbe6c0848647b4ff703e60f85 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Fri, 6 Nov 2020 09:44:27 -0800 Subject: [PATCH 01/22] chore: re-generated to pick up changes from synthtool (#95) * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * fix(python_library): fix external unit test dependencies I recently submitted https://github.com/googleapis/synthtool/pull/811/files, allowing external dependencies for unit tests. This fixes a small missing comma bug Source-Author: Daniel Sanche Source-Date: Thu Oct 29 16:58:01 2020 -0700 Source-Repo: googleapis/synthtool Source-Sha: 6542bd723403513626f61642fc02ddca528409aa Source-Link: https://github.com/googleapis/synthtool/commit/6542bd723403513626f61642fc02ddca528409aa * chore: add type hint check Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Wed Nov 4 17:36:32 2020 -0800 Source-Repo: googleapis/synthtool Source-Sha: 3d3e94c4e02370f307a9a200b0c743c3d8d19f29 Source-Link: https://github.com/googleapis/synthtool/commit/3d3e94c4e02370f307a9a200b0c743c3d8d19f29 --- noxfile.py | 6 +++++- samples/quickstart/noxfile.py | 8 +++++++- samples/to_dataframe/noxfile.py | 8 +++++++- synth.metadata | 6 +++--- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/noxfile.py b/noxfile.py index 7f37b788..8d5df594 100644 --- a/noxfile.py +++ b/noxfile.py @@ -72,7 +72,11 @@ def default(session): # Install all test dependencies, then install this package in-place. session.install("asyncmock", "pytest-asyncio") - session.install("mock", "pytest", "pytest-cov") + session.install( + "mock", "pytest", "pytest-cov", + ) + session.install("-e", ".") + session.install("-e", ".[fastavro,pandas,pyarrow]") # Run py.test against the unit tests. diff --git a/samples/quickstart/noxfile.py b/samples/quickstart/noxfile.py index f3a90583..9be2c922 100644 --- a/samples/quickstart/noxfile.py +++ b/samples/quickstart/noxfile.py @@ -38,6 +38,9 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string @@ -130,7 +133,10 @@ def _determine_local_import_names(start_dir): @nox.session def lint(session): - session.install("flake8", "flake8-import-order") + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ diff --git a/samples/to_dataframe/noxfile.py b/samples/to_dataframe/noxfile.py index f3a90583..9be2c922 100644 --- a/samples/to_dataframe/noxfile.py +++ b/samples/to_dataframe/noxfile.py @@ -38,6 +38,9 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string @@ -130,7 +133,10 @@ def _determine_local_import_names(start_dir): @nox.session def lint(session): - session.install("flake8", "flake8-import-order") + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ diff --git a/synth.metadata b/synth.metadata index ea2bcb4e..65ac4f5b 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery-storage.git", - "sha": "20ed21d40cd4f89c3d4ae5d8db7ed3c6b801cc4c" + "sha": "6ff112158afef87200d01fd73a252bff35e5665d" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "ea52b8a0bd560f72f376efcf45197fb7c8869120" + "sha": "3d3e94c4e02370f307a9a200b0c743c3d8d19f29" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "ea52b8a0bd560f72f376efcf45197fb7c8869120" + "sha": "3d3e94c4e02370f307a9a200b0c743c3d8d19f29" } } ], From e9f53eff9cfc340baaae7ebd91b5a1fc63b41db6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Nov 2020 11:50:11 -0600 Subject: [PATCH 02/22] chore: update synth.py to latest noxfile template (#93) Follow-up to https://github.com/googleapis/python-bigquery-storage/pull/86 where I manually reverted a change to `noxfile.py` --- noxfile.py | 2 -- synth.py | 18 ++++-------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/noxfile.py b/noxfile.py index 8d5df594..34936e27 100644 --- a/noxfile.py +++ b/noxfile.py @@ -75,8 +75,6 @@ def default(session): session.install( "mock", "pytest", "pytest-cov", ) - session.install("-e", ".") - session.install("-e", ".[fastavro,pandas,pyarrow]") # Run py.test against the unit tests. diff --git a/synth.py b/synth.py index 863ba860..92cf5c29 100644 --- a/synth.py +++ b/synth.py @@ -74,7 +74,8 @@ templated_files = common.py_library( microgenerator=True, samples=True, - unit_test_dependencies=optional_deps, + unit_test_local_dependencies=optional_deps, + system_test_local_dependencies=optional_deps, cov_level=95, ) s.move( @@ -201,19 +202,8 @@ # redundant to install the library twice. s.replace( "noxfile.py", - ( - r'session\.install\("-e", "\."\)\n ' - r'(?=session\.install\("-e", "\.\[fastavro)' # in unit tests session - ), - "", -) -s.replace( - "noxfile.py", - ( - r'(?<=google-cloud-testutils", \)\n)' - r' session\.install\("-e", "\."\)\n' # in system tests session - ), - ' session.install("-e", ".[fastavro,pandas,pyarrow]")\n', + r'\)\s*session\.install\("-e", "\."\)\n', + ")\n", ) # Fix test coverage plugin paths. From 5e59476d0b998645a6c2e7fbe7b64e5acf31aa59 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 12 Nov 2020 18:46:21 +0100 Subject: [PATCH 03/22] chore(deps): update dependency pyarrow to v2 (#90) Co-authored-by: Tim Swast Co-authored-by: Takashi Matsuo --- samples/to_dataframe/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt index 14c1784c..2c36d997 100644 --- a/samples/to_dataframe/requirements.txt +++ b/samples/to_dataframe/requirements.txt @@ -1,7 +1,7 @@ google-auth==1.23.0 google-cloud-bigquery-storage==2.0.0 google-cloud-bigquery==2.2.0 -pyarrow==1.0.1 +pyarrow==2.0.0 ipython==7.10.2; python_version > '3.0' ipython==5.9.0; python_version < '3.0' pandas==0.25.3; python_version > '3.0' From 68ebc2d43f2bd0fd76a1aff614070ba416192fc1 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 12 Nov 2020 19:13:08 +0100 Subject: [PATCH 04/22] chore(deps): update dependency google-cloud-bigquery-storage to v2.0.1 (#88) * chore(deps): update dependency google-cloud-bigquery-storage to v2.0.1 * test: make system test more robust I was a bit worried that since WA appeared in the request as part of the filter, the test might pass even in certain error conditions. Co-authored-by: Tim Swast Co-authored-by: Takashi Matsuo --- samples/quickstart/quickstart.py | 2 +- samples/quickstart/quickstart_test.py | 4 ++-- samples/quickstart/requirements.txt | 2 +- samples/to_dataframe/requirements.txt | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/quickstart/quickstart.py b/samples/quickstart/quickstart.py index 4372c22d..7dda6bbf 100644 --- a/samples/quickstart/quickstart.py +++ b/samples/quickstart/quickstart.py @@ -79,7 +79,7 @@ def main(project_id="your-project-id", snapshot_millis=0): names.add(row["name"]) states.add(row["state"]) - print("Got {} unique names in states: {}".format(len(names), states)) + print("Got {} unique names in states: {}".format(len(names), ", ".join(states))) # [END bigquerystorage_quickstart] diff --git a/samples/quickstart/quickstart_test.py b/samples/quickstart/quickstart_test.py index 33494cca..23f3c350 100644 --- a/samples/quickstart/quickstart_test.py +++ b/samples/quickstart/quickstart_test.py @@ -35,10 +35,10 @@ def project_id(): def test_quickstart_wo_snapshot(capsys, project_id): quickstart.main(project_id) out, _ = capsys.readouterr() - assert "WA" in out + assert "unique names in states: WA" in out def test_quickstart_with_snapshot(capsys, project_id): quickstart.main(project_id, now_millis() - 5000) out, _ = capsys.readouterr() - assert "WA" in out + assert "unique names in states: WA" in out diff --git a/samples/quickstart/requirements.txt b/samples/quickstart/requirements.txt index 31c61a34..83912611 100644 --- a/samples/quickstart/requirements.txt +++ b/samples/quickstart/requirements.txt @@ -1,2 +1,2 @@ fastavro -google-cloud-bigquery-storage==1.0.0 +google-cloud-bigquery-storage==2.0.1 diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt index 2c36d997..076b9203 100644 --- a/samples/to_dataframe/requirements.txt +++ b/samples/to_dataframe/requirements.txt @@ -1,5 +1,5 @@ google-auth==1.23.0 -google-cloud-bigquery-storage==2.0.0 +google-cloud-bigquery-storage==2.0.1 google-cloud-bigquery==2.2.0 pyarrow==2.0.0 ipython==7.10.2; python_version > '3.0' From b2a2018c3b5111d442822da80adeb935166d0cf5 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 12 Nov 2020 11:42:28 -0800 Subject: [PATCH 05/22] chore: add blacken to template (#96) Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Thu Nov 5 15:22:03 2020 -0800 Source-Repo: googleapis/synthtool Source-Sha: 1f1148d3c7a7a52f0c98077f976bd9b3c948ee2b Source-Link: https://github.com/googleapis/synthtool/commit/1f1148d3c7a7a52f0c98077f976bd9b3c948ee2b Co-authored-by: Tim Swast --- samples/quickstart/noxfile.py | 13 +++++++++++++ samples/to_dataframe/noxfile.py | 13 +++++++++++++ synth.metadata | 6 +++--- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/samples/quickstart/noxfile.py b/samples/quickstart/noxfile.py index 9be2c922..ab2c4922 100644 --- a/samples/quickstart/noxfile.py +++ b/samples/quickstart/noxfile.py @@ -147,6 +147,19 @@ def lint(session): session.run("flake8", *args) +# +# Black +# + + +@nox.session +def blacken(session): + session.install("black") + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + # # Sample Tests # diff --git a/samples/to_dataframe/noxfile.py b/samples/to_dataframe/noxfile.py index 9be2c922..ab2c4922 100644 --- a/samples/to_dataframe/noxfile.py +++ b/samples/to_dataframe/noxfile.py @@ -147,6 +147,19 @@ def lint(session): session.run("flake8", *args) +# +# Black +# + + +@nox.session +def blacken(session): + session.install("black") + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + # # Sample Tests # diff --git a/synth.metadata b/synth.metadata index 65ac4f5b..22c8acb8 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery-storage.git", - "sha": "6ff112158afef87200d01fd73a252bff35e5665d" + "sha": "7cf4f32abde1436fbe6c0848647b4ff703e60f85" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "3d3e94c4e02370f307a9a200b0c743c3d8d19f29" + "sha": "1f1148d3c7a7a52f0c98077f976bd9b3c948ee2b" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "3d3e94c4e02370f307a9a200b0c743c3d8d19f29" + "sha": "1f1148d3c7a7a52f0c98077f976bd9b3c948ee2b" } } ], From c4f7d705d050f33aac1d4cf3577e79f97e0f1da2 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 13 Nov 2020 01:34:02 +0100 Subject: [PATCH 06/22] chore(deps): update dependency google-cloud-bigquery-storage to v2.1.0 (#98) --- samples/quickstart/requirements.txt | 2 +- samples/to_dataframe/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/quickstart/requirements.txt b/samples/quickstart/requirements.txt index 83912611..6361f957 100644 --- a/samples/quickstart/requirements.txt +++ b/samples/quickstart/requirements.txt @@ -1,2 +1,2 @@ fastavro -google-cloud-bigquery-storage==2.0.1 +google-cloud-bigquery-storage==2.1.0 diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt index 076b9203..f7fb919d 100644 --- a/samples/to_dataframe/requirements.txt +++ b/samples/to_dataframe/requirements.txt @@ -1,5 +1,5 @@ google-auth==1.23.0 -google-cloud-bigquery-storage==2.0.1 +google-cloud-bigquery-storage==2.1.0 google-cloud-bigquery==2.2.0 pyarrow==2.0.0 ipython==7.10.2; python_version > '3.0' From 994a7c1cb1f8008e630d2325a9c168001e5081b4 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 16 Nov 2020 19:05:56 +0100 Subject: [PATCH 07/22] chore(deps): update dependency google-cloud-bigquery to v2.3.1 (#97) --- samples/to_dataframe/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt index f7fb919d..7542e362 100644 --- a/samples/to_dataframe/requirements.txt +++ b/samples/to_dataframe/requirements.txt @@ -1,6 +1,6 @@ google-auth==1.23.0 google-cloud-bigquery-storage==2.1.0 -google-cloud-bigquery==2.2.0 +google-cloud-bigquery==2.3.1 pyarrow==2.0.0 ipython==7.10.2; python_version > '3.0' ipython==5.9.0; python_version < '3.0' From 8e2e445c5c15919f8e981c8cf44888073a14ac0e Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 7 Dec 2020 23:43:02 +0100 Subject: [PATCH 08/22] chore(deps): update dependency google-cloud-bigquery to v2.5.0 (#100) --- samples/to_dataframe/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt index 7542e362..0bc709e7 100644 --- a/samples/to_dataframe/requirements.txt +++ b/samples/to_dataframe/requirements.txt @@ -1,6 +1,6 @@ google-auth==1.23.0 google-cloud-bigquery-storage==2.1.0 -google-cloud-bigquery==2.3.1 +google-cloud-bigquery==2.5.0 pyarrow==2.0.0 ipython==7.10.2; python_version > '3.0' ipython==5.9.0; python_version < '3.0' From f38d1c7a5eab4e38d92a214bf272607625ef7131 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Dec 2020 04:17:56 +0100 Subject: [PATCH 09/22] chore(deps): update dependency google-cloud-bigquery to v2.6.0 (#103) --- samples/to_dataframe/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt index 0bc709e7..71bf59f5 100644 --- a/samples/to_dataframe/requirements.txt +++ b/samples/to_dataframe/requirements.txt @@ -1,6 +1,6 @@ google-auth==1.23.0 google-cloud-bigquery-storage==2.1.0 -google-cloud-bigquery==2.5.0 +google-cloud-bigquery==2.6.0 pyarrow==2.0.0 ipython==7.10.2; python_version > '3.0' ipython==5.9.0; python_version < '3.0' From cef5d4a6794b5f5bd12dc019877d76faa467ada4 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 8 Dec 2020 15:06:03 -0700 Subject: [PATCH 10/22] chore: require samples checks (#101) Make samples kokoro sessions required --- .github/sync-repo-settings.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/sync-repo-settings.yaml diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml new file mode 100644 index 00000000..af599353 --- /dev/null +++ b/.github/sync-repo-settings.yaml @@ -0,0 +1,13 @@ +# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings +# Rules for master branch protection +branchProtectionRules: +# Identifies the protection rule pattern. Name of the branch to be protected. +# Defaults to `master` +- pattern: master + requiredStatusCheckContexts: + - 'Kokoro' + - 'cla/google' + - 'Samples - Lint' + - 'Samples - Python 3.6' + - 'Samples - Python 3.7' + - 'Samples - Python 3.8' From 3218893c61185c2186499b1db0890d300b22c944 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 22 Dec 2020 09:33:33 -0600 Subject: [PATCH 11/22] test: add system test SQL to run tests locally (#74) * test: add system test SQL to run tests locally I was missing the `public_samples_copy` dataset when trying to run the system tests locally. Also, it is unnecessary to use the `mock` backfill package now that Python 3.6+ is required. * revert change to use mock built-in needs AsyncMock backfill --- tests/system/assets/public_samples_copy.sql | 17 +++++++++++++++++ tests/unit/test_client_v1.py | 3 ++- tests/unit/test_reader_v1.py | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/system/assets/public_samples_copy.sql diff --git a/tests/system/assets/public_samples_copy.sql b/tests/system/assets/public_samples_copy.sql new file mode 100644 index 00000000..0709fa09 --- /dev/null +++ b/tests/system/assets/public_samples_copy.sql @@ -0,0 +1,17 @@ +-- Copyright 2020 Google LLC +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- https://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +CREATE TABLE public_samples_copy.shakespeare +AS +SELECT * FROM `bigquery-public-data.samples.shakespeare` diff --git a/tests/unit/test_client_v1.py b/tests/unit/test_client_v1.py index 9ef3378d..63f9f086 100644 --- a/tests/unit/test_client_v1.py +++ b/tests/unit/test_client_v1.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + from google.api_core.gapic_v1 import client_info -import mock import pytest from google.cloud.bigquery_storage import types diff --git a/tests/unit/test_reader_v1.py b/tests/unit/test_reader_v1.py index 4922ab47..7d9708cc 100644 --- a/tests/unit/test_reader_v1.py +++ b/tests/unit/test_reader_v1.py @@ -18,10 +18,10 @@ import decimal import itertools import json +from unittest import mock import fastavro import pyarrow -import mock import pandas import pandas.testing import pytest From bcec3ea1da2c3f90268b9a4f99c4dcd25a4a6feb Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 5 Jan 2021 19:23:06 +0100 Subject: [PATCH 12/22] chore(deps): update dependency google-auth to v1.24.0 (#108) --- samples/to_dataframe/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt index 71bf59f5..0d87ca53 100644 --- a/samples/to_dataframe/requirements.txt +++ b/samples/to_dataframe/requirements.txt @@ -1,4 +1,4 @@ -google-auth==1.23.0 +google-auth==1.24.0 google-cloud-bigquery-storage==2.1.0 google-cloud-bigquery==2.6.0 pyarrow==2.0.0 From 89ba292281970cbdee5bb43b45a9dac69e29ff0a Mon Sep 17 00:00:00 2001 From: arithmetic1728 <58957152+arithmetic1728@users.noreply.github.com> Date: Tue, 5 Jan 2021 12:50:33 -0800 Subject: [PATCH 13/22] fix: skip some system tests for mtls testing (#106) --- tests/system/v1/conftest.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/system/v1/conftest.py b/tests/system/v1/conftest.py index cb8c5c19..f8ac01f5 100644 --- a/tests/system/v1/conftest.py +++ b/tests/system/v1/conftest.py @@ -33,9 +33,20 @@ def project_id(): @pytest.fixture(scope="session") -def credentials(): +def use_mtls(): + return "always" == os.environ.get("GOOGLE_API_USE_MTLS_ENDPOINT", "never") + + +@pytest.fixture(scope="session") +def credentials(use_mtls): + import google.auth from google.oauth2 import service_account + if use_mtls: + # mTLS test uses user credentials instead of service account credentials + creds, _ = google.auth.default() + return creds + # NOTE: the test config in noxfile checks that the env variable is indeed set filename = os.environ["GOOGLE_APPLICATION_CREDENTIALS"] return service_account.Credentials.from_service_account_file(filename) @@ -59,7 +70,11 @@ def small_table_reference(): @pytest.fixture(scope="session") -def local_shakespeare_table_reference(project_id): +def local_shakespeare_table_reference(project_id, use_mtls): + if use_mtls: + pytest.skip( + "Skip it for mTLS testing since the table does not exist for mTLS project" + ) return _TABLE_FORMAT.format(project_id, "public_samples_copy", "shakespeare") @@ -100,7 +115,9 @@ def table(project_id, dataset, bq_client): @pytest.fixture(scope="session") -def bq_client(credentials): +def bq_client(credentials, use_mtls): + if use_mtls: + pytest.skip("Skip it for mTLS testing since bigquery does not support mTLS") from google.cloud import bigquery return bigquery.Client(credentials=credentials) From d20d041d39fb54d19650a56117b624e637405ebb Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Wed, 6 Jan 2021 16:15:48 -0700 Subject: [PATCH 14/22] chore: add constraints file (#107) * chore: add comnstraints file * chore: add comnstraints file * chore: add comnstraints file * chore: add comnstraints file * chore: add comnstraints file * chore: add comnstraints file --- testing/constraints-3.10.txt | 0 testing/constraints-3.11.txt | 0 testing/constraints-3.6.txt | 13 +++++++++++++ testing/constraints-3.7.txt | 0 testing/constraints-3.8.txt | 0 testing/constraints-3.9.txt | 0 6 files changed, 13 insertions(+) create mode 100644 testing/constraints-3.10.txt create mode 100644 testing/constraints-3.11.txt create mode 100644 testing/constraints-3.6.txt create mode 100644 testing/constraints-3.7.txt create mode 100644 testing/constraints-3.8.txt create mode 100644 testing/constraints-3.9.txt diff --git a/testing/constraints-3.10.txt b/testing/constraints-3.10.txt new file mode 100644 index 00000000..e69de29b diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt new file mode 100644 index 00000000..e69de29b diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt new file mode 100644 index 00000000..60b1b798 --- /dev/null +++ b/testing/constraints-3.6.txt @@ -0,0 +1,13 @@ +# This constraints file is used to check that lower bounds +# are correct in setup.py +# List *all* library dependencies and extras in this file. +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 +google-api-core==1.22.2 +proto-plus==1.4.0 +libcst==0.2.5 +fastavro==0.21.2 +pandas==0.17.1 +pyarrow==0.15.0 \ No newline at end of file diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt new file mode 100644 index 00000000..e69de29b diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt new file mode 100644 index 00000000..e69de29b diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt new file mode 100644 index 00000000..e69de29b From 4842e99a178e59036dbb72f23c3cb05d51c88a26 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 8 Jan 2021 21:07:51 +0100 Subject: [PATCH 15/22] chore(deps): update dependency google-cloud-bigquery to v2.6.1 (#109) --- samples/to_dataframe/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt index 0d87ca53..6c9dc681 100644 --- a/samples/to_dataframe/requirements.txt +++ b/samples/to_dataframe/requirements.txt @@ -1,6 +1,6 @@ google-auth==1.24.0 google-cloud-bigquery-storage==2.1.0 -google-cloud-bigquery==2.6.0 +google-cloud-bigquery==2.6.1 pyarrow==2.0.0 ipython==7.10.2; python_version > '3.0' ipython==5.9.0; python_version < '3.0' From 936e0a5777a201c49fb76f7638a69cfd2569c2e7 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 11 Jan 2021 07:41:33 -0800 Subject: [PATCH 16/22] test: test on Python 3.9 (#99) * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * docs(python): update intersphinx for grpc and auth * docs(python): update intersphinx for grpc and auth * use https for python intersphinx Co-authored-by: Tim Swast Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Wed Nov 18 14:37:25 2020 -0700 Source-Repo: googleapis/synthtool Source-Sha: 9a7d9fbb7045c34c9d3d22c1ff766eeae51f04c9 Source-Link: https://github.com/googleapis/synthtool/commit/9a7d9fbb7045c34c9d3d22c1ff766eeae51f04c9 * remove unnecessary index.html * add temporary hack to remove pyarrow from 3.9 builds * typo * test: separate pyarrow tests from other tests * test: lint errors * test: add missing fixtures Co-authored-by: Tim Swast --- docs/conf.py | 6 +- noxfile.py | 12 +- synth.metadata | 6 +- tests/unit/__init__.py | 0 tests/unit/helpers.py | 73 ++++++ tests/unit/test_reader_v1.py | 351 +---------------------------- tests/unit/test_reader_v1_arrow.py | 351 +++++++++++++++++++++++++++++ 7 files changed, 440 insertions(+), 359 deletions(-) create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/helpers.py create mode 100644 tests/unit/test_reader_v1_arrow.py diff --git a/docs/conf.py b/docs/conf.py index e3efb9fd..96d96148 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -345,10 +345,10 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "python": ("http://python.readthedocs.org/en/latest/", None), - "google-auth": ("https://google-auth.readthedocs.io/en/stable", None), + "python": ("https://python.readthedocs.org/en/latest/", None), + "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), - "grpc": ("https://grpc.io/grpc/python/", None), + "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), } diff --git a/noxfile.py b/noxfile.py index 34936e27..c6d9258f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -28,7 +28,7 @@ DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8"] -UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8"] +UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] @nox.session(python=DEFAULT_PYTHON_VERSION) @@ -75,7 +75,10 @@ def default(session): session.install( "mock", "pytest", "pytest-cov", ) - session.install("-e", ".[fastavro,pandas,pyarrow]") + extras = "fastavro,pandas,pyarrow" + if session.python == "3.9": + extras = "fastavro,pandas" + session.install("-e", f".[{extras}]") # Run py.test against the unit tests. session.run( @@ -129,7 +132,10 @@ def system(session): session.install( "mock", "pytest", "google-cloud-testutils", ) - session.install("-e", ".[fastavro,pandas,pyarrow]") + extras = "fastavro,pandas,pyarrow" + if session.python == "3.9": + extras = "fastavro,pandas" + session.install("-e", f".[{extras}]") # Run py.test against the system tests. if system_test_exists: diff --git a/synth.metadata b/synth.metadata index 22c8acb8..831ec1b1 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery-storage.git", - "sha": "7cf4f32abde1436fbe6c0848647b4ff703e60f85" + "sha": "994a7c1cb1f8008e630d2325a9c168001e5081b4" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "1f1148d3c7a7a52f0c98077f976bd9b3c948ee2b" + "sha": "9a7d9fbb7045c34c9d3d22c1ff766eeae51f04c9" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "1f1148d3c7a7a52f0c98077f976bd9b3c948ee2b" + "sha": "9a7d9fbb7045c34c9d3d22c1ff766eeae51f04c9" } } ], diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py new file mode 100644 index 00000000..712850ff --- /dev/null +++ b/tests/unit/helpers.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal + +import pytz + + +SCALAR_COLUMNS = [ + {"name": "int_col", "type": "int64"}, + {"name": "float_col", "type": "float64"}, + {"name": "num_col", "type": "numeric"}, + {"name": "bool_col", "type": "bool"}, + {"name": "str_col", "type": "string"}, + {"name": "bytes_col", "type": "bytes"}, + {"name": "date_col", "type": "date"}, + {"name": "time_col", "type": "time"}, + {"name": "ts_col", "type": "timestamp"}, +] +SCALAR_COLUMN_NAMES = [field["name"] for field in SCALAR_COLUMNS] +SCALAR_BLOCKS = [ + [ + { + "int_col": 123, + "float_col": 3.14, + "num_col": decimal.Decimal("9.99"), + "bool_col": True, + "str_col": "hello world", + "bytes_col": b"ascii bytes", + "date_col": datetime.date(1998, 9, 4), + "time_col": datetime.time(12, 0), + "ts_col": datetime.datetime(2000, 1, 1, 5, 0, tzinfo=pytz.utc), + }, + { + "int_col": 456, + "float_col": 2.72, + "num_col": decimal.Decimal("0.99"), + "bool_col": False, + "str_col": "hallo welt", + "bytes_col": b"\xbb\xee\xff", + "date_col": datetime.date(1995, 3, 2), + "time_col": datetime.time(13, 37), + "ts_col": datetime.datetime(1965, 4, 3, 2, 1, tzinfo=pytz.utc), + }, + ], + [ + { + "int_col": 789, + "float_col": 1.23, + "num_col": decimal.Decimal("5.67"), + "bool_col": True, + "str_col": u"こんにちは世界", + "bytes_col": b"\x54\x69\x6d", + "date_col": datetime.date(1970, 1, 1), + "time_col": datetime.time(16, 20), + "ts_col": datetime.datetime(1991, 8, 25, 20, 57, 8, tzinfo=pytz.utc), + } + ], +] diff --git a/tests/unit/test_reader_v1.py b/tests/unit/test_reader_v1.py index 7d9708cc..fcefbca7 100644 --- a/tests/unit/test_reader_v1.py +++ b/tests/unit/test_reader_v1.py @@ -14,22 +14,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime -import decimal import itertools import json from unittest import mock import fastavro -import pyarrow import pandas import pandas.testing import pytest -import pytz import six import google.api_core.exceptions from google.cloud.bigquery_storage import types +from .helpers import SCALAR_COLUMNS, SCALAR_COLUMN_NAMES, SCALAR_BLOCKS PROJECT = "my-project" @@ -45,71 +42,6 @@ "time": {"type": "long", "logicalType": "time-micros"}, "timestamp": {"type": "long", "logicalType": "timestamp-micros"}, } -# This dictionary is duplicated in bigquery/google/cloud/bigquery/_pandas_helpers.py -# When modifying it be sure to update it there as well. -BQ_TO_ARROW_TYPES = { - "int64": pyarrow.int64(), - "float64": pyarrow.float64(), - "bool": pyarrow.bool_(), - "numeric": pyarrow.decimal128(38, 9), - "string": pyarrow.utf8(), - "bytes": pyarrow.binary(), - "date": pyarrow.date32(), # int32 days since epoch - "datetime": pyarrow.timestamp("us"), - "time": pyarrow.time64("us"), - "timestamp": pyarrow.timestamp("us", tz="UTC"), -} -SCALAR_COLUMNS = [ - {"name": "int_col", "type": "int64"}, - {"name": "float_col", "type": "float64"}, - {"name": "num_col", "type": "numeric"}, - {"name": "bool_col", "type": "bool"}, - {"name": "str_col", "type": "string"}, - {"name": "bytes_col", "type": "bytes"}, - {"name": "date_col", "type": "date"}, - {"name": "time_col", "type": "time"}, - {"name": "ts_col", "type": "timestamp"}, -] -SCALAR_COLUMN_NAMES = [field["name"] for field in SCALAR_COLUMNS] -SCALAR_BLOCKS = [ - [ - { - "int_col": 123, - "float_col": 3.14, - "num_col": decimal.Decimal("9.99"), - "bool_col": True, - "str_col": "hello world", - "bytes_col": b"ascii bytes", - "date_col": datetime.date(1998, 9, 4), - "time_col": datetime.time(12, 0), - "ts_col": datetime.datetime(2000, 1, 1, 5, 0, tzinfo=pytz.utc), - }, - { - "int_col": 456, - "float_col": 2.72, - "num_col": decimal.Decimal("0.99"), - "bool_col": False, - "str_col": "hallo welt", - "bytes_col": b"\xbb\xee\xff", - "date_col": datetime.date(1995, 3, 2), - "time_col": datetime.time(13, 37), - "ts_col": datetime.datetime(1965, 4, 3, 2, 1, tzinfo=pytz.utc), - }, - ], - [ - { - "int_col": 789, - "float_col": 1.23, - "num_col": decimal.Decimal("5.67"), - "bool_col": True, - "str_col": u"こんにちは世界", - "bytes_col": b"\x54\x69\x6d", - "date_col": datetime.date(1970, 1, 1), - "time_col": datetime.time(16, 20), - "ts_col": datetime.datetime(1991, 8, 25, 20, 57, 8, tzinfo=pytz.utc), - } - ], -] @pytest.fixture() @@ -145,35 +77,6 @@ def _bq_to_avro_blocks(bq_blocks, avro_schema_json): return avro_blocks -def _bq_to_arrow_batch_objects(bq_blocks, arrow_schema): - arrow_batches = [] - for block in bq_blocks: - arrays = [] - for name in arrow_schema.names: - arrays.append( - pyarrow.array( - (row[name] for row in block), - type=arrow_schema.field(name).type, - size=len(block), - ) - ) - arrow_batches.append( - pyarrow.RecordBatch.from_arrays(arrays, schema=arrow_schema) - ) - return arrow_batches - - -def _bq_to_arrow_batches(bq_blocks, arrow_schema): - arrow_batches = [] - for record_batch in _bq_to_arrow_batch_objects(bq_blocks, arrow_schema): - response = types.ReadRowsResponse() - response.arrow_record_batch.serialized_record_batch = ( - record_batch.serialize().to_pybytes() - ) - arrow_batches.append(response) - return arrow_batches - - def _pages_w_nonresumable_internal_error(avro_blocks): for block in avro_blocks: yield block @@ -207,12 +110,6 @@ def _generate_avro_read_session(avro_schema_json): return types.ReadSession(avro_schema={"schema": schema}) -def _generate_arrow_read_session(arrow_schema): - return types.ReadSession( - arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()} - ) - - def _bq_to_avro_schema(bq_columns): fields = [] avro_schema = {"type": "record", "name": "__root__", "fields": fields} @@ -231,20 +128,6 @@ def _bq_to_avro_schema(bq_columns): return avro_schema -def _bq_to_arrow_schema(bq_columns): - def bq_col_as_field(column): - metadata = None - if column.get("description") is not None: - metadata = {"description": column.get("description")} - name = column["name"] - type_ = BQ_TO_ARROW_TYPES[column["type"]] - mode = column.get("mode", "nullable").lower() - - return pyarrow.field(name, type_, mode == "nullable", metadata) - - return pyarrow.schema(bq_col_as_field(c) for c in bq_columns) - - def _get_avro_bytes(rows, avro_schema): avro_file = six.BytesIO() for row in rows: @@ -266,20 +149,6 @@ def test_avro_rows_raises_import_error( reader.rows(read_session) -def test_pyarrow_rows_raises_import_error( - mut, class_under_test, mock_gapic_client, monkeypatch -): - monkeypatch.setattr(mut, "pyarrow", None) - reader = class_under_test([], mock_gapic_client, "", 0, {}) - - bq_columns = [{"name": "int_col", "type": "int64"}] - arrow_schema = _bq_to_arrow_schema(bq_columns) - read_session = _generate_arrow_read_session(arrow_schema) - - with pytest.raises(ImportError): - reader.rows(read_session) - - def test_rows_no_schema_set_raises_type_error( mut, class_under_test, mock_gapic_client, monkeypatch ): @@ -300,16 +169,6 @@ def test_rows_w_empty_stream(class_under_test, mock_gapic_client): assert tuple(got) == () -def test_rows_w_empty_stream_arrow(class_under_test, mock_gapic_client): - bq_columns = [{"name": "int_col", "type": "int64"}] - arrow_schema = _bq_to_arrow_schema(bq_columns) - read_session = _generate_arrow_read_session(arrow_schema) - reader = class_under_test([], mock_gapic_client, "", 0, {}) - - got = reader.rows(read_session) - assert tuple(got) == () - - def test_rows_w_scalars(class_under_test, mock_gapic_client): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) @@ -322,21 +181,6 @@ def test_rows_w_scalars(class_under_test, mock_gapic_client): assert got == expected -def test_rows_w_scalars_arrow(class_under_test, mock_gapic_client): - arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - - reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) - got = tuple( - dict((key, value.as_py()) for key, value in row_dict.items()) - for row_dict in reader.rows(read_session) - ) - - expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS)) - assert got == expected - - def test_rows_w_timeout(class_under_test, mock_gapic_client): bq_columns = [{"name": "int_col", "type": "int64"}] avro_schema = _bq_to_avro_schema(bq_columns) @@ -481,37 +325,6 @@ def test_rows_w_reconnect_by_page(class_under_test, mock_gapic_client): assert page_4.remaining == 0 -def test_to_arrow_no_pyarrow_raises_import_error( - mut, class_under_test, mock_gapic_client, monkeypatch -): - monkeypatch.setattr(mut, "pyarrow", None) - arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) - - with pytest.raises(ImportError): - reader.to_arrow(read_session) - - with pytest.raises(ImportError): - reader.rows(read_session).to_arrow() - - with pytest.raises(ImportError): - next(reader.rows(read_session).pages).to_arrow() - - -def test_to_arrow_w_scalars_arrow(class_under_test): - arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) - actual_table = reader.to_arrow(read_session) - expected_table = pyarrow.Table.from_batches( - _bq_to_arrow_batch_objects(SCALAR_BLOCKS, arrow_schema) - ) - assert actual_table == expected_table - - def test_to_dataframe_no_pandas_raises_import_error( mut, class_under_test, mock_gapic_client, monkeypatch ): @@ -572,24 +385,6 @@ def test_to_dataframe_w_scalars(class_under_test): ) -def test_to_dataframe_w_scalars_arrow(class_under_test): - arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) - - reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) - got = reader.to_dataframe(read_session) - - expected = pandas.DataFrame( - list(itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES - ) - - pandas.testing.assert_frame_equal( - got.reset_index(drop=True), # reset_index to ignore row labels - expected.reset_index(drop=True), - ) - - def test_to_dataframe_w_dtypes(class_under_test): avro_schema = _bq_to_avro_schema( [ @@ -620,36 +415,6 @@ def test_to_dataframe_w_dtypes(class_under_test): ) -def test_to_dataframe_w_dtypes_arrow(class_under_test): - arrow_schema = _bq_to_arrow_schema( - [ - {"name": "bigfloat", "type": "float64"}, - {"name": "lilfloat", "type": "float64"}, - ] - ) - read_session = _generate_arrow_read_session(arrow_schema) - blocks = [ - [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}], - [{"bigfloat": 3.75, "lilfloat": 11.0}], - ] - arrow_batches = _bq_to_arrow_batches(blocks, arrow_schema) - - reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) - got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) - - expected = pandas.DataFrame( - { - "bigfloat": [1.25, 2.5, 3.75], - "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"), - }, - columns=["bigfloat", "lilfloat"], - ) - pandas.testing.assert_frame_equal( - got.reset_index(drop=True), # reset_index to ignore row labels - expected.reset_index(drop=True), - ) - - def test_to_dataframe_empty_w_scalars_avro(class_under_test): avro_schema = _bq_to_avro_schema(SCALAR_COLUMNS) read_session = _generate_avro_read_session(avro_schema) @@ -670,26 +435,6 @@ def test_to_dataframe_empty_w_scalars_avro(class_under_test): ) -def test_to_dataframe_empty_w_scalars_arrow(class_under_test): - arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches([], arrow_schema) - reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) - - got = reader.to_dataframe(read_session) - - expected = pandas.DataFrame([], columns=SCALAR_COLUMN_NAMES) - expected["int_col"] = expected["int_col"].astype("int64") - expected["float_col"] = expected["float_col"].astype("float64") - expected["bool_col"] = expected["bool_col"].astype("bool") - expected["ts_col"] = expected["ts_col"].astype("datetime64[ns, UTC]") - - pandas.testing.assert_frame_equal( - got.reset_index(drop=True), # reset_index to ignore row labels - expected.reset_index(drop=True), - ) - - def test_to_dataframe_empty_w_dtypes_avro(class_under_test, mock_gapic_client): avro_schema = _bq_to_avro_schema( [ @@ -713,29 +458,6 @@ def test_to_dataframe_empty_w_dtypes_avro(class_under_test, mock_gapic_client): ) -def test_to_dataframe_empty_w_dtypes_arrow(class_under_test, mock_gapic_client): - arrow_schema = _bq_to_arrow_schema( - [ - {"name": "bigfloat", "type": "float64"}, - {"name": "lilfloat", "type": "float64"}, - ] - ) - read_session = _generate_arrow_read_session(arrow_schema) - arrow_batches = _bq_to_arrow_batches([], arrow_schema) - reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) - - got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) - - expected = pandas.DataFrame([], columns=["bigfloat", "lilfloat"]) - expected["bigfloat"] = expected["bigfloat"].astype("float64") - expected["lilfloat"] = expected["lilfloat"].astype("float16") - - pandas.testing.assert_frame_equal( - got.reset_index(drop=True), # reset_index to ignore row labels - expected.reset_index(drop=True), - ) - - def test_to_dataframe_by_page(class_under_test, mock_gapic_client): bq_columns = [ {"name": "int_col", "type": "int64"}, @@ -797,74 +519,3 @@ def test_to_dataframe_by_page(class_under_test, mock_gapic_client): drop=True ), ) - - -def test_to_dataframe_by_page_arrow(class_under_test, mock_gapic_client): - bq_columns = [ - {"name": "int_col", "type": "int64"}, - {"name": "bool_col", "type": "bool"}, - ] - arrow_schema = _bq_to_arrow_schema(bq_columns) - read_session = _generate_arrow_read_session(arrow_schema) - - bq_block_1 = [ - {"int_col": 123, "bool_col": True}, - {"int_col": 234, "bool_col": False}, - ] - bq_block_2 = [ - {"int_col": 345, "bool_col": True}, - {"int_col": 456, "bool_col": False}, - ] - bq_block_3 = [ - {"int_col": 567, "bool_col": True}, - {"int_col": 789, "bool_col": False}, - ] - bq_block_4 = [{"int_col": 890, "bool_col": True}] - # Break blocks into two groups to test that iteration continues across - # reconnection. - bq_blocks_1 = [bq_block_1, bq_block_2] - bq_blocks_2 = [bq_block_3, bq_block_4] - batch_1 = _bq_to_arrow_batches(bq_blocks_1, arrow_schema) - batch_2 = _bq_to_arrow_batches(bq_blocks_2, arrow_schema) - - mock_gapic_client.read_rows.return_value = batch_2 - - reader = class_under_test( - _pages_w_unavailable(batch_1), mock_gapic_client, "", 0, {} - ) - got = reader.rows(read_session) - pages = iter(got.pages) - - page_1 = next(pages) - pandas.testing.assert_frame_equal( - page_1.to_dataframe( - dtypes={"int_col": "int64", "bool_col": "bool"} - ).reset_index(drop=True), - pandas.DataFrame(bq_block_1, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - page_2 = next(pages) - pandas.testing.assert_frame_equal( - page_2.to_dataframe().reset_index(drop=True), - pandas.DataFrame(bq_block_2, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - page_3 = next(pages) - pandas.testing.assert_frame_equal( - page_3.to_dataframe().reset_index(drop=True), - pandas.DataFrame(bq_block_3, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) - - page_4 = next(pages) - pandas.testing.assert_frame_equal( - page_4.to_dataframe().reset_index(drop=True), - pandas.DataFrame(bq_block_4, columns=["int_col", "bool_col"]).reset_index( - drop=True - ), - ) diff --git a/tests/unit/test_reader_v1_arrow.py b/tests/unit/test_reader_v1_arrow.py new file mode 100644 index 00000000..202e0d81 --- /dev/null +++ b/tests/unit/test_reader_v1_arrow.py @@ -0,0 +1,351 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +from unittest import mock + +import pandas +import pandas.testing +import pytest + +import google.api_core.exceptions +from google.cloud.bigquery_storage import types +from .helpers import SCALAR_COLUMNS, SCALAR_COLUMN_NAMES, SCALAR_BLOCKS + + +pyarrow = pytest.importorskip("pyarrow") + + +# This dictionary is duplicated in bigquery/google/cloud/bigquery/_pandas_helpers.py +# When modifying it be sure to update it there as well. +BQ_TO_ARROW_TYPES = { + "int64": pyarrow.int64(), + "float64": pyarrow.float64(), + "bool": pyarrow.bool_(), + "numeric": pyarrow.decimal128(38, 9), + "string": pyarrow.utf8(), + "bytes": pyarrow.binary(), + "date": pyarrow.date32(), # int32 days since epoch + "datetime": pyarrow.timestamp("us"), + "time": pyarrow.time64("us"), + "timestamp": pyarrow.timestamp("us", tz="UTC"), +} + + +@pytest.fixture() +def mut(): + from google.cloud.bigquery_storage_v1 import reader + + return reader + + +@pytest.fixture() +def class_under_test(mut): + return mut.ReadRowsStream + + +@pytest.fixture() +def mock_gapic_client(): + from google.cloud.bigquery_storage_v1.services import big_query_read + + return mock.create_autospec(big_query_read.BigQueryReadClient) + + +def _bq_to_arrow_batch_objects(bq_blocks, arrow_schema): + arrow_batches = [] + for block in bq_blocks: + arrays = [] + for name in arrow_schema.names: + arrays.append( + pyarrow.array( + (row[name] for row in block), + type=arrow_schema.field(name).type, + size=len(block), + ) + ) + arrow_batches.append( + pyarrow.RecordBatch.from_arrays(arrays, schema=arrow_schema) + ) + return arrow_batches + + +def _bq_to_arrow_batches(bq_blocks, arrow_schema): + arrow_batches = [] + for record_batch in _bq_to_arrow_batch_objects(bq_blocks, arrow_schema): + response = types.ReadRowsResponse() + response.arrow_record_batch.serialized_record_batch = ( + record_batch.serialize().to_pybytes() + ) + arrow_batches.append(response) + return arrow_batches + + +def _bq_to_arrow_schema(bq_columns): + def bq_col_as_field(column): + metadata = None + if column.get("description") is not None: + metadata = {"description": column.get("description")} + name = column["name"] + type_ = BQ_TO_ARROW_TYPES[column["type"]] + mode = column.get("mode", "nullable").lower() + + return pyarrow.field(name, type_, mode == "nullable", metadata) + + return pyarrow.schema(bq_col_as_field(c) for c in bq_columns) + + +def _generate_arrow_read_session(arrow_schema): + return types.ReadSession( + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()} + ) + + +def _pages_w_unavailable(pages): + for page in pages: + yield page + raise google.api_core.exceptions.ServiceUnavailable("test: please reconnect") + + +def test_pyarrow_rows_raises_import_error( + mut, class_under_test, mock_gapic_client, monkeypatch +): + monkeypatch.setattr(mut, "pyarrow", None) + reader = class_under_test([], mock_gapic_client, "", 0, {}) + + bq_columns = [{"name": "int_col", "type": "int64"}] + arrow_schema = _bq_to_arrow_schema(bq_columns) + read_session = _generate_arrow_read_session(arrow_schema) + + with pytest.raises(ImportError): + reader.rows(read_session) + + +def test_to_arrow_no_pyarrow_raises_import_error( + mut, class_under_test, mock_gapic_client, monkeypatch +): + monkeypatch.setattr(mut, "pyarrow", None) + arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) + read_session = _generate_arrow_read_session(arrow_schema) + arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) + + with pytest.raises(ImportError): + reader.to_arrow(read_session) + + with pytest.raises(ImportError): + reader.rows(read_session).to_arrow() + + with pytest.raises(ImportError): + next(reader.rows(read_session).pages).to_arrow() + + +def test_to_arrow_w_scalars_arrow(class_under_test): + arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) + read_session = _generate_arrow_read_session(arrow_schema) + arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) + actual_table = reader.to_arrow(read_session) + expected_table = pyarrow.Table.from_batches( + _bq_to_arrow_batch_objects(SCALAR_BLOCKS, arrow_schema) + ) + assert actual_table == expected_table + + +def test_to_dataframe_w_scalars_arrow(class_under_test): + arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) + read_session = _generate_arrow_read_session(arrow_schema) + arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) + + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) + got = reader.to_dataframe(read_session) + + expected = pandas.DataFrame( + list(itertools.chain.from_iterable(SCALAR_BLOCKS)), columns=SCALAR_COLUMN_NAMES + ) + + pandas.testing.assert_frame_equal( + got.reset_index(drop=True), # reset_index to ignore row labels + expected.reset_index(drop=True), + ) + + +def test_rows_w_empty_stream_arrow(class_under_test, mock_gapic_client): + bq_columns = [{"name": "int_col", "type": "int64"}] + arrow_schema = _bq_to_arrow_schema(bq_columns) + read_session = _generate_arrow_read_session(arrow_schema) + reader = class_under_test([], mock_gapic_client, "", 0, {}) + + got = reader.rows(read_session) + assert tuple(got) == () + + +def test_rows_w_scalars_arrow(class_under_test, mock_gapic_client): + arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) + read_session = _generate_arrow_read_session(arrow_schema) + arrow_batches = _bq_to_arrow_batches(SCALAR_BLOCKS, arrow_schema) + + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) + got = tuple( + dict((key, value.as_py()) for key, value in row_dict.items()) + for row_dict in reader.rows(read_session) + ) + + expected = tuple(itertools.chain.from_iterable(SCALAR_BLOCKS)) + assert got == expected + + +def test_to_dataframe_w_dtypes_arrow(class_under_test): + arrow_schema = _bq_to_arrow_schema( + [ + {"name": "bigfloat", "type": "float64"}, + {"name": "lilfloat", "type": "float64"}, + ] + ) + read_session = _generate_arrow_read_session(arrow_schema) + blocks = [ + [{"bigfloat": 1.25, "lilfloat": 30.5}, {"bigfloat": 2.5, "lilfloat": 21.125}], + [{"bigfloat": 3.75, "lilfloat": 11.0}], + ] + arrow_batches = _bq_to_arrow_batches(blocks, arrow_schema) + + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) + got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) + + expected = pandas.DataFrame( + { + "bigfloat": [1.25, 2.5, 3.75], + "lilfloat": pandas.Series([30.5, 21.125, 11.0], dtype="float16"), + }, + columns=["bigfloat", "lilfloat"], + ) + pandas.testing.assert_frame_equal( + got.reset_index(drop=True), # reset_index to ignore row labels + expected.reset_index(drop=True), + ) + + +def test_to_dataframe_empty_w_scalars_arrow(class_under_test): + arrow_schema = _bq_to_arrow_schema(SCALAR_COLUMNS) + read_session = _generate_arrow_read_session(arrow_schema) + arrow_batches = _bq_to_arrow_batches([], arrow_schema) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) + + got = reader.to_dataframe(read_session) + + expected = pandas.DataFrame([], columns=SCALAR_COLUMN_NAMES) + expected["int_col"] = expected["int_col"].astype("int64") + expected["float_col"] = expected["float_col"].astype("float64") + expected["bool_col"] = expected["bool_col"].astype("bool") + expected["ts_col"] = expected["ts_col"].astype("datetime64[ns, UTC]") + + pandas.testing.assert_frame_equal( + got.reset_index(drop=True), # reset_index to ignore row labels + expected.reset_index(drop=True), + ) + + +def test_to_dataframe_empty_w_dtypes_arrow(class_under_test, mock_gapic_client): + arrow_schema = _bq_to_arrow_schema( + [ + {"name": "bigfloat", "type": "float64"}, + {"name": "lilfloat", "type": "float64"}, + ] + ) + read_session = _generate_arrow_read_session(arrow_schema) + arrow_batches = _bq_to_arrow_batches([], arrow_schema) + reader = class_under_test(arrow_batches, mock_gapic_client, "", 0, {}) + + got = reader.to_dataframe(read_session, dtypes={"lilfloat": "float16"}) + + expected = pandas.DataFrame([], columns=["bigfloat", "lilfloat"]) + expected["bigfloat"] = expected["bigfloat"].astype("float64") + expected["lilfloat"] = expected["lilfloat"].astype("float16") + + pandas.testing.assert_frame_equal( + got.reset_index(drop=True), # reset_index to ignore row labels + expected.reset_index(drop=True), + ) + + +def test_to_dataframe_by_page_arrow(class_under_test, mock_gapic_client): + bq_columns = [ + {"name": "int_col", "type": "int64"}, + {"name": "bool_col", "type": "bool"}, + ] + arrow_schema = _bq_to_arrow_schema(bq_columns) + read_session = _generate_arrow_read_session(arrow_schema) + + bq_block_1 = [ + {"int_col": 123, "bool_col": True}, + {"int_col": 234, "bool_col": False}, + ] + bq_block_2 = [ + {"int_col": 345, "bool_col": True}, + {"int_col": 456, "bool_col": False}, + ] + bq_block_3 = [ + {"int_col": 567, "bool_col": True}, + {"int_col": 789, "bool_col": False}, + ] + bq_block_4 = [{"int_col": 890, "bool_col": True}] + # Break blocks into two groups to test that iteration continues across + # reconnection. + bq_blocks_1 = [bq_block_1, bq_block_2] + bq_blocks_2 = [bq_block_3, bq_block_4] + batch_1 = _bq_to_arrow_batches(bq_blocks_1, arrow_schema) + batch_2 = _bq_to_arrow_batches(bq_blocks_2, arrow_schema) + + mock_gapic_client.read_rows.return_value = batch_2 + + reader = class_under_test( + _pages_w_unavailable(batch_1), mock_gapic_client, "", 0, {} + ) + got = reader.rows(read_session) + pages = iter(got.pages) + + page_1 = next(pages) + pandas.testing.assert_frame_equal( + page_1.to_dataframe( + dtypes={"int_col": "int64", "bool_col": "bool"} + ).reset_index(drop=True), + pandas.DataFrame(bq_block_1, columns=["int_col", "bool_col"]).reset_index( + drop=True + ), + ) + + page_2 = next(pages) + pandas.testing.assert_frame_equal( + page_2.to_dataframe().reset_index(drop=True), + pandas.DataFrame(bq_block_2, columns=["int_col", "bool_col"]).reset_index( + drop=True + ), + ) + + page_3 = next(pages) + pandas.testing.assert_frame_equal( + page_3.to_dataframe().reset_index(drop=True), + pandas.DataFrame(bq_block_3, columns=["int_col", "bool_col"]).reset_index( + drop=True + ), + ) + + page_4 = next(pages) + pandas.testing.assert_frame_equal( + page_4.to_dataframe().reset_index(drop=True), + pandas.DataFrame(bq_block_4, columns=["int_col", "bool_col"]).reset_index( + drop=True + ), + ) From c6c7a1a5e9c1616c0e133be104557447fcf7967f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 12 Jan 2021 10:42:05 -0600 Subject: [PATCH 17/22] chore: remove manual workaround for response size validation (#112) * chore: remove manual workaround for response size validation * test: revert extras changes to fix 3.9 build --- .coveragerc | 31 +++-------- .flake8 | 1 + .kokoro/build.sh | 16 +++--- .kokoro/docs/docs-presubmit.cfg | 11 ++++ .pre-commit-config.yaml | 17 ++++++ .trampolinerc | 2 + CONTRIBUTING.rst | 21 +++++--- LICENSE | 7 +-- docs/_static/custom.css | 7 ++- docs/bigquery_storage_v1/big_query_read.rst | 6 +++ docs/bigquery_storage_v1/services.rst | 6 +-- docs/bigquery_storage_v1/types.rst | 1 + .../services/big_query_read/async_client.py | 24 +++++---- .../services/big_query_read/client.py | 53 +++++++++++++------ .../big_query_read/transports/__init__.py | 1 - .../big_query_read/transports/grpc.py | 10 ++-- .../big_query_read/transports/grpc_asyncio.py | 8 +-- .../bigquery_storage_v1/types/__init__.py | 3 +- .../bigquery_storage_v1/types/storage.py | 16 +++--- .../cloud/bigquery_storage_v1/types/stream.py | 16 +++--- noxfile.py | 15 ++++-- samples/quickstart/noxfile.py | 19 ++++--- samples/to_dataframe/noxfile.py | 19 ++++--- synth.metadata | 14 ++--- synth.py | 38 ------------- .../test_big_query_read.py | 41 ++++++++++---- 26 files changed, 231 insertions(+), 172 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 docs/bigquery_storage_v1/big_query_read.rst diff --git a/.coveragerc b/.coveragerc index dd39c854..593c73d9 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,35 +1,18 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Generated by synthtool. DO NOT EDIT! [run] branch = True [report] fail_under = 100 show_missing = True +omit = + google/cloud/bigquery_storage/__init__.py exclude_lines = # Re-enable the standard pragma pragma: NO COVER # Ignore debug-only repr def __repr__ - # Ignore abstract methods - raise NotImplementedError -omit = - */gapic/*.py - */proto/*.py - */core/*.py - */site-packages/*.py \ No newline at end of file + # Ignore pkg_resources exceptions. + # This is added at the module level as a safeguard for if someone + # generates the code and tries to run it without pip installing. This + # makes it virtually impossible to test properly. + except pkg_resources.DistributionNotFound diff --git a/.flake8 b/.flake8 index ed931638..29227d4c 100644 --- a/.flake8 +++ b/.flake8 @@ -26,6 +26,7 @@ exclude = *_pb2.py # Standard linting exemptions. + **/.nox/** __pycache__, .git, *.pyc, diff --git a/.kokoro/build.sh b/.kokoro/build.sh index a7968df7..9d2f7d14 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -15,7 +15,11 @@ set -eo pipefail -cd github/python-bigquery-storage +if [[ -z "${PROJECT_ROOT:-}" ]]; then + PROJECT_ROOT="github/python-bigquery-storage" +fi + +cd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 @@ -30,16 +34,16 @@ export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") # Remove old nox -python3.6 -m pip uninstall --yes --quiet nox-automation +python3 -m pip uninstall --yes --quiet nox-automation # Install nox -python3.6 -m pip install --upgrade --quiet nox -python3.6 -m nox --version +python3 -m pip install --upgrade --quiet nox +python3 -m nox --version # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then - python3.6 -m nox -s "${NOX_SESSION:-}" + python3 -m nox -s ${NOX_SESSION:-} else - python3.6 -m nox + python3 -m nox fi diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg index 11181078..d59ee418 100644 --- a/.kokoro/docs/docs-presubmit.cfg +++ b/.kokoro/docs/docs-presubmit.cfg @@ -15,3 +15,14 @@ env_vars: { key: "TRAMPOLINE_IMAGE_UPLOAD" value: "false" } + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery-storage/.kokoro/build.sh" +} + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "docs docfx" +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..a9024b15 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml +- repo: https://github.com/psf/black + rev: 19.10b0 + hooks: + - id: black +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 diff --git a/.trampolinerc b/.trampolinerc index 995ee291..c7d663ae 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -18,12 +18,14 @@ required_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" + "NOX_SESSION" ) # Add env vars which are passed down into the container here. pass_down_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" + "NOX_SESSION" ) # Prevent unintentional override on the default image. diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index cc2641d2..2ee34e24 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -21,8 +21,8 @@ In order to add a feature: - The feature must be documented in both the API and narrative documentation. -- The feature must work fully on the following CPython versions: 2.7, - 3.5, 3.6, 3.7 and 3.8 on both UNIX and Windows. +- The feature must work fully on the following CPython versions: + 3.6, 3.7, 3.8 and 3.9 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -111,6 +111,16 @@ Coding Style should point to the official ``googleapis`` checkout and the the branch should be the main branch on that remote (``master``). +- This repository contains configuration for the + `pre-commit `__ tool, which automates checking + our linters during a commit. If you have it installed on your ``$PATH``, + you can enable enforcing those checks via: + +.. code-block:: bash + + $ pre-commit install + pre-commit installed at .git/hooks/pre-commit + Exceptions to PEP8: - Many unit tests use a helper method, ``_call_fut`` ("FUT" is short for @@ -192,25 +202,24 @@ Supported Python Versions We support: -- `Python 3.5`_ - `Python 3.6`_ - `Python 3.7`_ - `Python 3.8`_ +- `Python 3.9`_ -.. _Python 3.5: https://docs.python.org/3.5/ .. _Python 3.6: https://docs.python.org/3.6/ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ +.. _Python 3.9: https://docs.python.org/3.9/ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery-storage/blob/master/noxfile.py -Python 2.7 support is deprecated. All code changes should maintain Python 2.7 compatibility until January 1, 2020. We also explicitly decided to support Python 3 beginning with version -3.5. Reasons for this include: +3.6. Reasons for this include: - Encouraging use of newest versions of Python 3 - Taking the lead of `prominent`_ open-source `projects`_ diff --git a/LICENSE b/LICENSE index a8ee855d..d6456956 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ - Apache License + + Apache License Version 2.0, January 2004 - https://www.apache.org/licenses/ + http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION @@ -192,7 +193,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - https://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 0abaf229..bcd37bbd 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,4 +1,9 @@ div#python2-eol { border-color: red; border-width: medium; -} \ No newline at end of file +} + +/* Ensure minimum width for 'Parameters' / 'Returns' column */ +dl.field-list > dt { + min-width: 100px +} diff --git a/docs/bigquery_storage_v1/big_query_read.rst b/docs/bigquery_storage_v1/big_query_read.rst new file mode 100644 index 00000000..74f0a5fa --- /dev/null +++ b/docs/bigquery_storage_v1/big_query_read.rst @@ -0,0 +1,6 @@ +BigQueryRead +------------------------------ + +.. automodule:: google.cloud.bigquery_storage_v1.services.big_query_read + :members: + :inherited-members: diff --git a/docs/bigquery_storage_v1/services.rst b/docs/bigquery_storage_v1/services.rst index 5d0f9532..3f6cff3d 100644 --- a/docs/bigquery_storage_v1/services.rst +++ b/docs/bigquery_storage_v1/services.rst @@ -1,6 +1,6 @@ Services for Google Cloud Bigquery Storage v1 API ================================================= +.. toctree:: + :maxdepth: 2 -.. automodule:: google.cloud.bigquery_storage_v1.services.big_query_read - :members: - :inherited-members: + big_query_read diff --git a/docs/bigquery_storage_v1/types.rst b/docs/bigquery_storage_v1/types.rst index 3f722c57..28b5db72 100644 --- a/docs/bigquery_storage_v1/types.rst +++ b/docs/bigquery_storage_v1/types.rst @@ -3,4 +3,5 @@ Types for Google Cloud Bigquery Storage v1 API .. automodule:: google.cloud.bigquery_storage_v1.types :members: + :undoc-members: :show-inheritance: diff --git a/google/cloud/bigquery_storage_v1/services/big_query_read/async_client.py b/google/cloud/bigquery_storage_v1/services/big_query_read/async_client.py index 7108ffd0..5363e60f 100644 --- a/google/cloud/bigquery_storage_v1/services/big_query_read/async_client.py +++ b/google/cloud/bigquery_storage_v1/services/big_query_read/async_client.py @@ -81,6 +81,7 @@ class BigQueryReadAsyncClient: BigQueryReadClient.parse_common_location_path ) + from_service_account_info = BigQueryReadClient.from_service_account_info from_service_account_file = BigQueryReadClient.from_service_account_file from_service_account_json = from_service_account_file @@ -181,16 +182,17 @@ async def create_read_session( caller. Args: - request (:class:`~.storage.CreateReadSessionRequest`): + request (:class:`google.cloud.bigquery_storage_v1.types.CreateReadSessionRequest`): The request object. Request message for `CreateReadSession`. parent (:class:`str`): Required. The request project that owns the session, in the form of ``projects/{project_id}``. + This corresponds to the ``parent`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - read_session (:class:`~.stream.ReadSession`): + read_session (:class:`google.cloud.bigquery_storage_v1.types.ReadSession`): Required. Session to be created. This corresponds to the ``read_session`` field on the ``request`` instance; if ``request`` is provided, this @@ -210,6 +212,7 @@ async def create_read_session( Streams must be read starting from offset 0. + This corresponds to the ``max_stream_count`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -221,7 +224,7 @@ async def create_read_session( sent along with the request as metadata. Returns: - ~.stream.ReadSession: + google.cloud.bigquery_storage_v1.types.ReadSession: Information about the ReadSession. """ # Create or coerce a protobuf request object. @@ -296,7 +299,7 @@ def read_rows( reflecting the current state of the stream. Args: - request (:class:`~.storage.ReadRowsRequest`): + request (:class:`google.cloud.bigquery_storage_v1.types.ReadRowsRequest`): The request object. Request message for `ReadRows`. read_stream (:class:`str`): Required. Stream to read rows from. @@ -309,6 +312,7 @@ def read_rows( Requesting a larger offset is undefined. If not specified, start reading from offset zero. + This corresponds to the ``offset`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -320,9 +324,9 @@ def read_rows( sent along with the request as metadata. Returns: - AsyncIterable[~.storage.ReadRowsResponse]: - Response from calling ``ReadRows`` may include row data, - progress and throttling information. + AsyncIterable[google.cloud.bigquery_storage_v1.types.ReadRowsResponse]: + Response from calling ReadRows may include row data, progress and + throttling information. """ # Create or coerce a protobuf request object. @@ -396,7 +400,7 @@ async def split_read_stream( once the streams have been read to completion. Args: - request (:class:`~.storage.SplitReadStreamRequest`): + request (:class:`google.cloud.bigquery_storage_v1.types.SplitReadStreamRequest`): The request object. Request message for `SplitReadStream`. @@ -407,8 +411,8 @@ async def split_read_stream( sent along with the request as metadata. Returns: - ~.storage.SplitReadStreamResponse: - Response message for ``SplitReadStream``. + google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse: + Response message for SplitReadStream. """ # Create or coerce a protobuf request object. diff --git a/google/cloud/bigquery_storage_v1/services/big_query_read/client.py b/google/cloud/bigquery_storage_v1/services/big_query_read/client.py index 3f04760f..3cb3f026 100644 --- a/google/cloud/bigquery_storage_v1/services/big_query_read/client.py +++ b/google/cloud/bigquery_storage_v1/services/big_query_read/client.py @@ -113,6 +113,22 @@ def _get_default_mtls_endpoint(api_endpoint): DEFAULT_ENDPOINT ) + @classmethod + def from_service_account_info(cls, info: dict, *args, **kwargs): + """Creates an instance of this client using the provided credentials info. + + Args: + info (dict): The service account private key info. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + BigQueryReadClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_info(info) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + @classmethod def from_service_account_file(cls, filename: str, *args, **kwargs): """Creates an instance of this client using the provided credentials @@ -125,7 +141,7 @@ def from_service_account_file(cls, filename: str, *args, **kwargs): kwargs: Additional arguments to pass to the constructor. Returns: - {@api.name}: The constructed client. + BigQueryReadClient: The constructed client. """ credentials = service_account.Credentials.from_service_account_file(filename) kwargs["credentials"] = credentials @@ -267,10 +283,10 @@ def __init__( credentials identify the application to the service; if none are specified, the client will attempt to ascertain the credentials from the environment. - transport (Union[str, ~.BigQueryReadTransport]): The + transport (Union[str, BigQueryReadTransport]): The transport to use. If set to None, a transport is chosen automatically. - client_options (client_options_lib.ClientOptions): Custom options for the + client_options (google.api_core.client_options.ClientOptions): Custom options for the client. It won't take effect if a ``transport`` instance is provided. (1) The ``api_endpoint`` property can be used to override the default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT @@ -404,21 +420,22 @@ def create_read_session( caller. Args: - request (:class:`~.storage.CreateReadSessionRequest`): + request (google.cloud.bigquery_storage_v1.types.CreateReadSessionRequest): The request object. Request message for `CreateReadSession`. - parent (:class:`str`): + parent (str): Required. The request project that owns the session, in the form of ``projects/{project_id}``. + This corresponds to the ``parent`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - read_session (:class:`~.stream.ReadSession`): + read_session (google.cloud.bigquery_storage_v1.types.ReadSession): Required. Session to be created. This corresponds to the ``read_session`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - max_stream_count (:class:`int`): + max_stream_count (int): Max initial number of streams. If unset or zero, the server will provide a value of streams so as to produce @@ -433,6 +450,7 @@ def create_read_session( Streams must be read starting from offset 0. + This corresponds to the ``max_stream_count`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -444,7 +462,7 @@ def create_read_session( sent along with the request as metadata. Returns: - ~.stream.ReadSession: + google.cloud.bigquery_storage_v1.types.ReadSession: Information about the ReadSession. """ # Create or coerce a protobuf request object. @@ -512,19 +530,20 @@ def read_rows( reflecting the current state of the stream. Args: - request (:class:`~.storage.ReadRowsRequest`): + request (google.cloud.bigquery_storage_v1.types.ReadRowsRequest): The request object. Request message for `ReadRows`. - read_stream (:class:`str`): + read_stream (str): Required. Stream to read rows from. This corresponds to the ``read_stream`` field on the ``request`` instance; if ``request`` is provided, this should not be set. - offset (:class:`int`): + offset (int): The offset requested must be less than the last row read from Read. Requesting a larger offset is undefined. If not specified, start reading from offset zero. + This corresponds to the ``offset`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -536,9 +555,9 @@ def read_rows( sent along with the request as metadata. Returns: - Iterable[~.storage.ReadRowsResponse]: - Response from calling ``ReadRows`` may include row data, - progress and throttling information. + Iterable[google.cloud.bigquery_storage_v1.types.ReadRowsResponse]: + Response from calling ReadRows may include row data, progress and + throttling information. """ # Create or coerce a protobuf request object. @@ -607,7 +626,7 @@ def split_read_stream( once the streams have been read to completion. Args: - request (:class:`~.storage.SplitReadStreamRequest`): + request (google.cloud.bigquery_storage_v1.types.SplitReadStreamRequest): The request object. Request message for `SplitReadStream`. @@ -618,8 +637,8 @@ def split_read_stream( sent along with the request as metadata. Returns: - ~.storage.SplitReadStreamResponse: - Response message for ``SplitReadStream``. + google.cloud.bigquery_storage_v1.types.SplitReadStreamResponse: + Response message for SplitReadStream. """ # Create or coerce a protobuf request object. diff --git a/google/cloud/bigquery_storage_v1/services/big_query_read/transports/__init__.py b/google/cloud/bigquery_storage_v1/services/big_query_read/transports/__init__.py index 2e9fe066..87e56323 100644 --- a/google/cloud/bigquery_storage_v1/services/big_query_read/transports/__init__.py +++ b/google/cloud/bigquery_storage_v1/services/big_query_read/transports/__init__.py @@ -28,7 +28,6 @@ _transport_registry["grpc"] = BigQueryReadGrpcTransport _transport_registry["grpc_asyncio"] = BigQueryReadGrpcAsyncIOTransport - __all__ = ( "BigQueryReadTransport", "BigQueryReadGrpcTransport", diff --git a/google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc.py b/google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc.py index 041854b9..95fac6e5 100644 --- a/google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc.py +++ b/google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc.py @@ -148,10 +148,10 @@ def __init__( ssl_credentials=ssl_credentials, scopes=scopes or self.AUTH_SCOPES, quota_project_id=quota_project_id, - options=( + options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), - ), + ], ) self._ssl_channel_credentials = ssl_credentials else: @@ -170,10 +170,10 @@ def __init__( ssl_credentials=ssl_channel_credentials, scopes=scopes or self.AUTH_SCOPES, quota_project_id=quota_project_id, - options=( + options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), - ), + ], ) self._stubs = {} # type: Dict[str, Callable] @@ -200,7 +200,7 @@ def create_channel( ) -> grpc.Channel: """Create and return a gRPC channel object. Args: - address (Optionsl[str]): The host for the channel to use. + address (Optional[str]): The host for the channel to use. credentials (Optional[~.Credentials]): The authorization credentials to attach to requests. These credentials identify this application to the service. If diff --git a/google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc_asyncio.py b/google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc_asyncio.py index 3e08afdd..34ec72ad 100644 --- a/google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc_asyncio.py +++ b/google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc_asyncio.py @@ -193,10 +193,10 @@ def __init__( ssl_credentials=ssl_credentials, scopes=scopes or self.AUTH_SCOPES, quota_project_id=quota_project_id, - options=( + options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), - ), + ], ) self._ssl_channel_credentials = ssl_credentials else: @@ -215,10 +215,10 @@ def __init__( ssl_credentials=ssl_channel_credentials, scopes=scopes or self.AUTH_SCOPES, quota_project_id=quota_project_id, - options=( + options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), - ), + ], ) # Run the base constructor. diff --git a/google/cloud/bigquery_storage_v1/types/__init__.py b/google/cloud/bigquery_storage_v1/types/__init__.py index 346ce9cf..14fc7096 100644 --- a/google/cloud/bigquery_storage_v1/types/__init__.py +++ b/google/cloud/bigquery_storage_v1/types/__init__.py @@ -27,6 +27,7 @@ DataFormat, ReadSession, ReadStream, + DataFormat, ) from .storage import ( CreateReadSessionRequest, @@ -38,7 +39,6 @@ SplitReadStreamResponse, ) - __all__ = ( "ArrowSchema", "ArrowRecordBatch", @@ -47,6 +47,7 @@ "DataFormat", "ReadSession", "ReadStream", + "DataFormat", "CreateReadSessionRequest", "ReadRowsRequest", "ThrottleState", diff --git a/google/cloud/bigquery_storage_v1/types/storage.py b/google/cloud/bigquery_storage_v1/types/storage.py index 1b9c9d35..367d3bb3 100644 --- a/google/cloud/bigquery_storage_v1/types/storage.py +++ b/google/cloud/bigquery_storage_v1/types/storage.py @@ -44,7 +44,7 @@ class CreateReadSessionRequest(proto.Message): parent (str): Required. The request project that owns the session, in the form of ``projects/{project_id}``. - read_session (~.stream.ReadSession): + read_session (google.cloud.bigquery_storage_v1.types.ReadSession): Required. Session to be created. max_stream_count (int): Max initial number of streams. If unset or @@ -102,7 +102,7 @@ class StreamStats(proto.Message): r"""Estimated stream statistics for a given Stream. Attributes: - progress (~.storage.StreamStats.Progress): + progress (google.cloud.bigquery_storage_v1.types.StreamStats.Progress): Represents the progress of the current stream. """ @@ -141,16 +141,16 @@ class ReadRowsResponse(proto.Message): and throttling information. Attributes: - avro_rows (~.avro.AvroRows): + avro_rows (google.cloud.bigquery_storage_v1.types.AvroRows): Serialized row data in AVRO format. - arrow_record_batch (~.arrow.ArrowRecordBatch): + arrow_record_batch (google.cloud.bigquery_storage_v1.types.ArrowRecordBatch): Serialized row data in Arrow RecordBatch format. row_count (int): Number of serialized rows in the rows block. - stats (~.storage.StreamStats): + stats (google.cloud.bigquery_storage_v1.types.StreamStats): Statistics for the stream. - throttle_state (~.storage.ThrottleState): + throttle_state (google.cloud.bigquery_storage_v1.types.ThrottleState): Throttling state. If unset, the latest response still describes the current throttling status. @@ -201,11 +201,11 @@ class SplitReadStreamResponse(proto.Message): r"""Response message for ``SplitReadStream``. Attributes: - primary_stream (~.stream.ReadStream): + primary_stream (google.cloud.bigquery_storage_v1.types.ReadStream): Primary stream, which contains the beginning portion of \|original_stream|. An empty value indicates that the original stream can no longer be split. - remainder_stream (~.stream.ReadStream): + remainder_stream (google.cloud.bigquery_storage_v1.types.ReadStream): Remainder stream, which contains the tail of \|original_stream|. An empty value indicates that the original stream can no longer be split. diff --git a/google/cloud/bigquery_storage_v1/types/stream.py b/google/cloud/bigquery_storage_v1/types/stream.py index eeec7a88..34b865eb 100644 --- a/google/cloud/bigquery_storage_v1/types/stream.py +++ b/google/cloud/bigquery_storage_v1/types/stream.py @@ -43,28 +43,28 @@ class ReadSession(proto.Message): name (str): Output only. Unique identifier for the session, in the form ``projects/{project_id}/locations/{location}/sessions/{session_id}``. - expire_time (~.timestamp.Timestamp): + expire_time (google.protobuf.timestamp_pb2.Timestamp): Output only. Time at which the session becomes invalid. After this time, subsequent requests to read this Session will return errors. The expire_time is automatically assigned and currently cannot be specified or updated. - data_format (~.stream.DataFormat): + data_format (google.cloud.bigquery_storage_v1.types.DataFormat): Immutable. Data format of the output data. - avro_schema (~.avro.AvroSchema): + avro_schema (google.cloud.bigquery_storage_v1.types.AvroSchema): Output only. Avro schema. - arrow_schema (~.arrow.ArrowSchema): + arrow_schema (google.cloud.bigquery_storage_v1.types.ArrowSchema): Output only. Arrow schema. table (str): Immutable. Table that this ReadSession is reading from, in the form ``projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`` - table_modifiers (~.stream.ReadSession.TableModifiers): + table_modifiers (google.cloud.bigquery_storage_v1.types.ReadSession.TableModifiers): Optional. Any modifiers which are applied when reading from the specified table. - read_options (~.stream.ReadSession.TableReadOptions): + read_options (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions): Optional. Read options for this session (e.g. column selection, filters). - streams (Sequence[~.stream.ReadStream]): + streams (Sequence[google.cloud.bigquery_storage_v1.types.ReadStream]): Output only. A list of streams created with the session. At least one stream is created with the session. In the @@ -78,7 +78,7 @@ class TableModifiers(proto.Message): r"""Additional attributes when reading a table. Attributes: - snapshot_time (~.timestamp.Timestamp): + snapshot_time (google.protobuf.timestamp_pb2.Timestamp): The snapshot time of the table. If not set, interpreted as now. """ diff --git a/noxfile.py b/noxfile.py index c6d9258f..c5baa18c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -30,6 +30,17 @@ SYSTEM_TEST_PYTHON_VERSIONS = ["3.8"] UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] +# 'docfx' is excluded since it only needs to run in 'docs-presubmit' +nox.options.sessions = [ + "unit", + "system", + "cover", + "lint", + "lint_setup_py", + "blacken", + "docs", +] + @nox.session(python=DEFAULT_PYTHON_VERSION) def lint(session): @@ -84,9 +95,7 @@ def default(session): session.run( "py.test", "--quiet", - "--cov=google.cloud.bigquery_storage", - "--cov=google.cloud.bigquery_storage_v1", - "--cov=google.cloud", + "--cov=google/cloud", "--cov=tests/unit", "--cov-append", "--cov-config=.coveragerc", diff --git a/samples/quickstart/noxfile.py b/samples/quickstart/noxfile.py index ab2c4922..bbd25fcd 100644 --- a/samples/quickstart/noxfile.py +++ b/samples/quickstart/noxfile.py @@ -17,6 +17,7 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox @@ -65,7 +66,7 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} @@ -94,7 +95,7 @@ def get_pytest_env_vars(): # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -132,7 +133,7 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): +def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: @@ -153,7 +154,7 @@ def lint(session): @nox.session -def blacken(session): +def blacken(session: nox.sessions.Session) -> None: session.install("black") python_files = [path for path in os.listdir(".") if path.endswith(".py")] @@ -168,7 +169,9 @@ def blacken(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): session.install("-r", "requirements.txt") @@ -194,7 +197,7 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) @@ -209,7 +212,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -232,7 +235,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) diff --git a/samples/to_dataframe/noxfile.py b/samples/to_dataframe/noxfile.py index ab2c4922..bbd25fcd 100644 --- a/samples/to_dataframe/noxfile.py +++ b/samples/to_dataframe/noxfile.py @@ -17,6 +17,7 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox @@ -65,7 +66,7 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} @@ -94,7 +95,7 @@ def get_pytest_env_vars(): # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -132,7 +133,7 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): +def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: @@ -153,7 +154,7 @@ def lint(session): @nox.session -def blacken(session): +def blacken(session: nox.sessions.Session) -> None: session.install("black") python_files = [path for path in os.listdir(".") if path.endswith(".py")] @@ -168,7 +169,9 @@ def blacken(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): session.install("-r", "requirements.txt") @@ -194,7 +197,7 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) @@ -209,7 +212,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -232,7 +235,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) diff --git a/synth.metadata b/synth.metadata index 831ec1b1..dc973ce0 100644 --- a/synth.metadata +++ b/synth.metadata @@ -3,30 +3,30 @@ { "git": { "name": ".", - "remote": "https://github.com/googleapis/python-bigquery-storage.git", - "sha": "994a7c1cb1f8008e630d2325a9c168001e5081b4" + "remote": "git@github.com:tswast/python-bigquery-storage.git", + "sha": "936e0a5777a201c49fb76f7638a69cfd2569c2e7" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "07d41a7e5cade45aba6f0d277c89722b48f2c956", - "internalRef": "339292950" + "sha": "38cb5ce59453ce509773afcdfc72764441a4b531", + "internalRef": "351190966" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "9a7d9fbb7045c34c9d3d22c1ff766eeae51f04c9" + "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "9a7d9fbb7045c34c9d3d22c1ff766eeae51f04c9" + "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" } } ], @@ -136,4 +136,4 @@ "tests/unit/gapic/bigquery_storage_v1/__init__.py", "tests/unit/gapic/bigquery_storage_v1/test_big_query_read.py" ] -} \ No newline at end of file +} diff --git a/synth.py b/synth.py index 92cf5c29..fffa438f 100644 --- a/synth.py +++ b/synth.py @@ -97,44 +97,6 @@ '\g<0>\n\n session.install("google-cloud-bigquery")', ) -# Remove client-side validation of message length. -# https://github.com/googleapis/python-bigquery-storage/issues/78 -s.replace( - [ - "google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc.py", - "google/cloud/bigquery_storage_v1/services/big_query_read/transports/grpc_asyncio.py", - ], - ( - r"type\(self\).create_channel\(\s*" - r"host,\s*" - r"credentials=credentials,\s*" - r"credentials_file=credentials_file,\s*" - r"ssl_credentials=ssl_[a-z_]*credentials,\s*" - r"scopes=scopes or self.AUTH_SCOPES,\s*" - r"quota_project_id=quota_project_id" - ), - """\g<0>, - options=( - ('grpc.max_send_message_length', -1), - ('grpc.max_receive_message_length', -1) - )""", -) -s.replace( - "tests/unit/gapic/bigquery_storage_v1/test_big_query_read.py", - ( - r"grpc_create_channel\.assert_called_once_with\([^()]+" - r"scopes=\([^()]+\),\s*" - r"ssl_credentials=[a-z_]+,\s*" - r"quota_project_id=None" - ), - """\g<0>, - options=( - ('grpc.max_send_message_length', -1), - ('grpc.max_receive_message_length', -1) - )""", -) - - # We don't want the generated client to be accessible through # "google.cloud.bigquery_storage", replace it with the hand written client that # wraps it. diff --git a/tests/unit/gapic/bigquery_storage_v1/test_big_query_read.py b/tests/unit/gapic/bigquery_storage_v1/test_big_query_read.py index 1c3cfafb..3823109b 100644 --- a/tests/unit/gapic/bigquery_storage_v1/test_big_query_read.py +++ b/tests/unit/gapic/bigquery_storage_v1/test_big_query_read.py @@ -86,7 +86,20 @@ def test__get_default_mtls_endpoint(): assert BigQueryReadClient._get_default_mtls_endpoint(non_googleapi) == non_googleapi -@pytest.mark.parametrize("client_class", [BigQueryReadClient, BigQueryReadAsyncClient]) +def test_big_query_read_client_from_service_account_info(): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_info" + ) as factory: + factory.return_value = creds + info = {"valid": True} + client = BigQueryReadClient.from_service_account_info(info) + assert client.transport._credentials == creds + + assert client.transport._host == "bigquerystorage.googleapis.com:443" + + +@pytest.mark.parametrize("client_class", [BigQueryReadClient, BigQueryReadAsyncClient,]) def test_big_query_read_client_from_service_account_file(client_class): creds = credentials.AnonymousCredentials() with mock.patch.object( @@ -104,7 +117,10 @@ def test_big_query_read_client_from_service_account_file(client_class): def test_big_query_read_client_get_transport_class(): transport = BigQueryReadClient.get_transport_class() - assert transport == transports.BigQueryReadGrpcTransport + available_transports = [ + transports.BigQueryReadGrpcTransport, + ] + assert transport in available_transports transport = BigQueryReadClient.get_transport_class("grpc") assert transport == transports.BigQueryReadGrpcTransport @@ -1051,7 +1067,10 @@ def test_transport_get_channel(): @pytest.mark.parametrize( "transport_class", - [transports.BigQueryReadGrpcTransport, transports.BigQueryReadGrpcAsyncIOTransport], + [ + transports.BigQueryReadGrpcTransport, + transports.BigQueryReadGrpcAsyncIOTransport, + ], ) def test_transport_adc(transport_class): # Test default credentials are used if not provided. @@ -1186,7 +1205,7 @@ def test_big_query_read_host_with_port(): def test_big_query_read_grpc_transport_channel(): - channel = grpc.insecure_channel("http://localhost/") + channel = grpc.secure_channel("http://localhost/", grpc.local_channel_credentials()) # Check that channel is used if provided. transport = transports.BigQueryReadGrpcTransport( @@ -1198,7 +1217,7 @@ def test_big_query_read_grpc_transport_channel(): def test_big_query_read_grpc_asyncio_transport_channel(): - channel = aio.insecure_channel("http://localhost/") + channel = aio.secure_channel("http://localhost/", grpc.local_channel_credentials()) # Check that channel is used if provided. transport = transports.BigQueryReadGrpcAsyncIOTransport( @@ -1218,7 +1237,7 @@ def test_big_query_read_transport_channel_mtls_with_client_cert_source(transport "grpc.ssl_channel_credentials", autospec=True ) as grpc_ssl_channel_cred: with mock.patch.object( - transport_class, "create_channel", autospec=True + transport_class, "create_channel" ) as grpc_create_channel: mock_ssl_cred = mock.Mock() grpc_ssl_channel_cred.return_value = mock_ssl_cred @@ -1251,10 +1270,10 @@ def test_big_query_read_transport_channel_mtls_with_client_cert_source(transport ), ssl_credentials=mock_ssl_cred, quota_project_id=None, - options=( + options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), - ), + ], ) assert transport.grpc_channel == mock_grpc_channel assert transport._ssl_channel_credentials == mock_ssl_cred @@ -1272,7 +1291,7 @@ def test_big_query_read_transport_channel_mtls_with_adc(transport_class): ssl_credentials=mock.PropertyMock(return_value=mock_ssl_cred), ): with mock.patch.object( - transport_class, "create_channel", autospec=True + transport_class, "create_channel" ) as grpc_create_channel: mock_grpc_channel = mock.Mock() grpc_create_channel.return_value = mock_grpc_channel @@ -1297,10 +1316,10 @@ def test_big_query_read_transport_channel_mtls_with_adc(transport_class): ), ssl_credentials=mock_ssl_cred, quota_project_id=None, - options=( + options=[ ("grpc.max_send_message_length", -1), ("grpc.max_receive_message_length", -1), - ), + ], ) assert transport.grpc_channel == mock_grpc_channel From e5f6198262cf9a593c62219cf5f6632c5a2a811e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 13 Jan 2021 13:50:02 -0600 Subject: [PATCH 18/22] feat: add clients for v1beta2 endpoint (#113) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is 100% autogenerated code. Subsequent PRs will cover manual classes. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-storage/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 TODO: - [x] docs build successful - [x] unit tests pass (need to remove `test_append_rows_flattened_*` tests, as there are no flattened arguments for `append_rows`) --- .../big_query_read.rst | 6 + .../big_query_write.rst | 6 + docs/bigquery_storage_v1beta2/services.rst | 7 + docs/bigquery_storage_v1beta2/types.rst | 7 + docs/index.rst | 2 + .../proto/arrow.proto | 57 + .../bigquery_storage_v1beta2/proto/avro.proto | 35 + .../proto/protobuf.proto | 41 + .../proto/storage.proto | 538 +++++ .../proto/stream.proto | 189 ++ .../proto/table.proto | 102 + .../cloud/bigquery_storage_v1beta2/py.typed | 2 + .../services/__init__.py | 16 + .../services/big_query_read/__init__.py | 24 + .../services/big_query_read/async_client.py | 462 ++++ .../services/big_query_read/client.py | 681 ++++++ .../big_query_read/transports/__init__.py | 35 + .../big_query_read/transports/base.py | 186 ++ .../big_query_read/transports/grpc.py | 364 +++ .../big_query_read/transports/grpc_asyncio.py | 370 +++ .../services/big_query_write/__init__.py | 24 + .../services/big_query_write/async_client.py | 671 ++++++ .../services/big_query_write/client.py | 843 +++++++ .../big_query_write/transports/__init__.py | 35 + .../big_query_write/transports/base.py | 261 +++ .../big_query_write/transports/grpc.py | 440 ++++ .../transports/grpc_asyncio.py | 445 ++++ .../types/__init__.py | 96 + .../bigquery_storage_v1beta2/types/arrow.py | 71 + .../bigquery_storage_v1beta2/types/avro.py | 50 + .../types/protobuf.py | 64 + .../bigquery_storage_v1beta2/types/storage.py | 487 ++++ .../bigquery_storage_v1beta2/types/stream.py | 203 ++ .../bigquery_storage_v1beta2/types/table.py | 94 + synth.py | 30 +- .../bigquery_storage_v1beta2/__init__.py | 1 + .../test_big_query_read.py | 1525 ++++++++++++ .../test_big_query_write.py | 2043 +++++++++++++++++ 38 files changed, 10498 insertions(+), 15 deletions(-) create mode 100644 docs/bigquery_storage_v1beta2/big_query_read.rst create mode 100644 docs/bigquery_storage_v1beta2/big_query_write.rst create mode 100644 docs/bigquery_storage_v1beta2/services.rst create mode 100644 docs/bigquery_storage_v1beta2/types.rst create mode 100644 google/cloud/bigquery_storage_v1beta2/proto/arrow.proto create mode 100644 google/cloud/bigquery_storage_v1beta2/proto/avro.proto create mode 100644 google/cloud/bigquery_storage_v1beta2/proto/protobuf.proto create mode 100644 google/cloud/bigquery_storage_v1beta2/proto/storage.proto create mode 100644 google/cloud/bigquery_storage_v1beta2/proto/stream.proto create mode 100644 google/cloud/bigquery_storage_v1beta2/proto/table.proto create mode 100644 google/cloud/bigquery_storage_v1beta2/py.typed create mode 100644 google/cloud/bigquery_storage_v1beta2/services/__init__.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_read/__init__.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_read/async_client.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_read/client.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/__init__.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/base.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/grpc.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/grpc_asyncio.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_write/__init__.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_write/async_client.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_write/client.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/__init__.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/base.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/grpc.py create mode 100644 google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/grpc_asyncio.py create mode 100644 google/cloud/bigquery_storage_v1beta2/types/__init__.py create mode 100644 google/cloud/bigquery_storage_v1beta2/types/arrow.py create mode 100644 google/cloud/bigquery_storage_v1beta2/types/avro.py create mode 100644 google/cloud/bigquery_storage_v1beta2/types/protobuf.py create mode 100644 google/cloud/bigquery_storage_v1beta2/types/storage.py create mode 100644 google/cloud/bigquery_storage_v1beta2/types/stream.py create mode 100644 google/cloud/bigquery_storage_v1beta2/types/table.py create mode 100644 tests/unit/gapic/bigquery_storage_v1beta2/__init__.py create mode 100644 tests/unit/gapic/bigquery_storage_v1beta2/test_big_query_read.py create mode 100644 tests/unit/gapic/bigquery_storage_v1beta2/test_big_query_write.py diff --git a/docs/bigquery_storage_v1beta2/big_query_read.rst b/docs/bigquery_storage_v1beta2/big_query_read.rst new file mode 100644 index 00000000..10b3714b --- /dev/null +++ b/docs/bigquery_storage_v1beta2/big_query_read.rst @@ -0,0 +1,6 @@ +BigQueryRead +------------------------------ + +.. automodule:: google.cloud.bigquery_storage_v1beta2.services.big_query_read + :members: + :inherited-members: diff --git a/docs/bigquery_storage_v1beta2/big_query_write.rst b/docs/bigquery_storage_v1beta2/big_query_write.rst new file mode 100644 index 00000000..c685994d --- /dev/null +++ b/docs/bigquery_storage_v1beta2/big_query_write.rst @@ -0,0 +1,6 @@ +BigQueryWrite +------------------------------- + +.. automodule:: google.cloud.bigquery_storage_v1beta2.services.big_query_write + :members: + :inherited-members: diff --git a/docs/bigquery_storage_v1beta2/services.rst b/docs/bigquery_storage_v1beta2/services.rst new file mode 100644 index 00000000..67dae0ab --- /dev/null +++ b/docs/bigquery_storage_v1beta2/services.rst @@ -0,0 +1,7 @@ +Services for Google Cloud Bigquery Storage v1beta2 API +====================================================== +.. toctree:: + :maxdepth: 2 + + big_query_read + big_query_write diff --git a/docs/bigquery_storage_v1beta2/types.rst b/docs/bigquery_storage_v1beta2/types.rst new file mode 100644 index 00000000..995806da --- /dev/null +++ b/docs/bigquery_storage_v1beta2/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Bigquery Storage v1beta2 API +=================================================== + +.. automodule:: google.cloud.bigquery_storage_v1beta2.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/index.rst b/docs/index.rst index 618e74d7..6892b30c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -21,6 +21,8 @@ API Reference bigquery_storage_v1/library bigquery_storage_v1/services bigquery_storage_v1/types + bigquery_storage_v1beta2/services + bigquery_storage_v1beta2/types Migration Guide diff --git a/google/cloud/bigquery_storage_v1beta2/proto/arrow.proto b/google/cloud/bigquery_storage_v1beta2/proto/arrow.proto new file mode 100644 index 00000000..bc2e4eb1 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/proto/arrow.proto @@ -0,0 +1,57 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.bigquery.storage.v1beta2; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage"; +option java_multiple_files = true; +option java_outer_classname = "ArrowProto"; +option java_package = "com.google.cloud.bigquery.storage.v1beta2"; + +// Arrow schema as specified in +// https://arrow.apache.org/docs/python/api/datatypes.html +// and serialized to bytes using IPC: +// https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc +// +// See code samples on how this message can be deserialized. +message ArrowSchema { + // IPC serialized Arrow schema. + bytes serialized_schema = 1; +} + +// Arrow RecordBatch. +message ArrowRecordBatch { + // IPC-serialized Arrow RecordBatch. + bytes serialized_record_batch = 1; +} + +// Contains options specific to Arrow Serialization. +message ArrowSerializationOptions { + // The IPC format to use when serializing Arrow streams. + enum Format { + // If unspecied the IPC format as of 0.15 release will be used. + FORMAT_UNSPECIFIED = 0; + + // Use the legacy IPC message format as of Apache Arrow Release 0.14. + ARROW_0_14 = 1; + + // Use the message format as of Apache Arrow Release 0.15. + ARROW_0_15 = 2; + } + + // The Arrow IPC format to use. + Format format = 1; +} diff --git a/google/cloud/bigquery_storage_v1beta2/proto/avro.proto b/google/cloud/bigquery_storage_v1beta2/proto/avro.proto new file mode 100644 index 00000000..109ec86a --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/proto/avro.proto @@ -0,0 +1,35 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.bigquery.storage.v1beta2; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage"; +option java_multiple_files = true; +option java_outer_classname = "AvroProto"; +option java_package = "com.google.cloud.bigquery.storage.v1beta2"; + +// Avro schema. +message AvroSchema { + // Json serialized schema, as described at + // https://avro.apache.org/docs/1.8.1/spec.html. + string schema = 1; +} + +// Avro rows. +message AvroRows { + // Binary serialized rows in a block. + bytes serialized_binary_rows = 1; +} diff --git a/google/cloud/bigquery_storage_v1beta2/proto/protobuf.proto b/google/cloud/bigquery_storage_v1beta2/proto/protobuf.proto new file mode 100644 index 00000000..741e7d11 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/proto/protobuf.proto @@ -0,0 +1,41 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.bigquery.storage.v1beta2; + +import "google/protobuf/descriptor.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage"; +option java_multiple_files = true; +option java_outer_classname = "ProtoBufProto"; +option java_package = "com.google.cloud.bigquery.storage.v1beta2"; + +// Protobuf schema is an API presentation the proto buffer schema. +message ProtoSchema { + // Descriptor for input message. The descriptor has to be self contained, + // including all the nested types, excepted for proto buffer well known types + // (https://developers.google.com/protocol-buffers/docs/reference/google.protobuf). + google.protobuf.DescriptorProto proto_descriptor = 1; +} + +// Protobuf rows. +message ProtoRows { + // A sequence of rows serialized as a Protocol Buffer. + // + // See https://developers.google.com/protocol-buffers/docs/overview for more + // information on deserializing this field. + repeated bytes serialized_rows = 1; +} diff --git a/google/cloud/bigquery_storage_v1beta2/proto/storage.proto b/google/cloud/bigquery_storage_v1beta2/proto/storage.proto new file mode 100644 index 00000000..5538e29f --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/proto/storage.proto @@ -0,0 +1,538 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.bigquery.storage.v1beta2; + +import "google/api/annotations.proto"; +import "google/api/client.proto"; +import "google/api/field_behavior.proto"; +import "google/api/resource.proto"; +import "google/cloud/bigquery/storage/v1beta2/arrow.proto"; +import "google/cloud/bigquery/storage/v1beta2/avro.proto"; +import "google/cloud/bigquery/storage/v1beta2/protobuf.proto"; +import "google/cloud/bigquery/storage/v1beta2/stream.proto"; +import "google/cloud/bigquery/storage/v1beta2/table.proto"; +import "google/protobuf/timestamp.proto"; +import "google/protobuf/wrappers.proto"; +import "google/rpc/status.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage"; +option java_multiple_files = true; +option java_outer_classname = "StorageProto"; +option java_package = "com.google.cloud.bigquery.storage.v1beta2"; + +// BigQuery Read API. +// +// The Read API can be used to read data from BigQuery. +// +// New code should use the v1 Read API going forward, if they don't use Write +// API at the same time. +service BigQueryRead { + option (google.api.default_host) = "bigquerystorage.googleapis.com"; + option (google.api.oauth_scopes) = + "https://www.googleapis.com/auth/bigquery," + "https://www.googleapis.com/auth/bigquery.readonly," + "https://www.googleapis.com/auth/cloud-platform"; + + // Creates a new read session. A read session divides the contents of a + // BigQuery table into one or more streams, which can then be used to read + // data from the table. The read session also specifies properties of the + // data to be read, such as a list of columns or a push-down filter describing + // the rows to be returned. + // + // A particular row can be read by at most one stream. When the caller has + // reached the end of each stream in the session, then all the data in the + // table has been read. + // + // Data is assigned to each stream such that roughly the same number of + // rows can be read from each stream. Because the server-side unit for + // assigning data is collections of rows, the API does not guarantee that + // each stream will return the same number or rows. Additionally, the + // limits are enforced based on the number of pre-filtered rows, so some + // filters can lead to lopsided assignments. + // + // Read sessions automatically expire 24 hours after they are created and do + // not require manual clean-up by the caller. + rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) { + option (google.api.http) = { + post: "/v1beta2/{read_session.table=projects/*/datasets/*/tables/*}" + body: "*" + }; + option (google.api.method_signature) = + "parent,read_session,max_stream_count"; + } + + // Reads rows from the stream in the format prescribed by the ReadSession. + // Each response contains one or more table rows, up to a maximum of 100 MiB + // per response; read requests which attempt to read individual rows larger + // than 100 MiB will fail. + // + // Each request also returns a set of stream statistics reflecting the current + // state of the stream. + rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) { + option (google.api.http) = { + get: "/v1beta2/{read_stream=projects/*/locations/*/sessions/*/streams/*}" + }; + option (google.api.method_signature) = "read_stream,offset"; + } + + // Splits a given `ReadStream` into two `ReadStream` objects. These + // `ReadStream` objects are referred to as the primary and the residual + // streams of the split. The original `ReadStream` can still be read from in + // the same manner as before. Both of the returned `ReadStream` objects can + // also be read from, and the rows returned by both child streams will be + // the same as the rows read from the original stream. + // + // Moreover, the two child streams will be allocated back-to-back in the + // original `ReadStream`. Concretely, it is guaranteed that for streams + // original, primary, and residual, that original[0-j] = primary[0-j] and + // original[j-n] = residual[0-m] once the streams have been read to + // completion. + rpc SplitReadStream(SplitReadStreamRequest) + returns (SplitReadStreamResponse) { + option (google.api.http) = { + get: "/v1beta2/{name=projects/*/locations/*/sessions/*/streams/*}" + }; + } +} + +// BigQuery Write API. +// +// The Write API can be used to write data to BigQuery. +service BigQueryWrite { + option (google.api.default_host) = "bigquerystorage.googleapis.com"; + option (google.api.oauth_scopes) = + "https://www.googleapis.com/auth/bigquery," + "https://www.googleapis.com/auth/bigquery.insertdata," + "https://www.googleapis.com/auth/cloud-platform"; + + // Creates a write stream to the given table. + // Additionally, every table has a special COMMITTED stream named '_default' + // to which data can be written. This stream doesn't need to be created using + // CreateWriteStream. It is a stream that can be used simultaneously by any + // number of clients. Data written to this stream is considered committed as + // soon as an acknowledgement is received. + rpc CreateWriteStream(CreateWriteStreamRequest) returns (WriteStream) { + option (google.api.http) = { + post: "/v1beta2/{parent=projects/*/datasets/*/tables/*}" + body: "write_stream" + }; + option (google.api.method_signature) = "parent,write_stream"; + } + + // Appends data to the given stream. + // + // If `offset` is specified, the `offset` is checked against the end of + // stream. The server returns `OUT_OF_RANGE` in `AppendRowsResponse` if an + // attempt is made to append to an offset beyond the current end of the stream + // or `ALREADY_EXISTS` if user provids an `offset` that has already been + // written to. User can retry with adjusted offset within the same RPC + // stream. If `offset` is not specified, append happens at the end of the + // stream. + // + // The response contains the offset at which the append happened. Responses + // are received in the same order in which requests are sent. There will be + // one response for each successful request. If the `offset` is not set in + // response, it means append didn't happen due to some errors. If one request + // fails, all the subsequent requests will also fail until a success request + // is made again. + // + // If the stream is of `PENDING` type, data will only be available for read + // operations after the stream is committed. + rpc AppendRows(stream AppendRowsRequest) returns (stream AppendRowsResponse) { + option (google.api.http) = { + post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}" + body: "*" + }; + option (google.api.method_signature) = "write_stream"; + } + + // Gets a write stream. + rpc GetWriteStream(GetWriteStreamRequest) returns (WriteStream) { + option (google.api.http) = { + post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}" + body: "*" + }; + option (google.api.method_signature) = "name"; + } + + // Finalize a write stream so that no new data can be appended to the + // stream. Finalize is not supported on the '_default' stream. + rpc FinalizeWriteStream(FinalizeWriteStreamRequest) + returns (FinalizeWriteStreamResponse) { + option (google.api.http) = { + post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}" + body: "*" + }; + option (google.api.method_signature) = "name"; + } + + // Atomically commits a group of `PENDING` streams that belong to the same + // `parent` table. + // Streams must be finalized before commit and cannot be committed multiple + // times. Once a stream is committed, data in the stream becomes available + // for read operations. + rpc BatchCommitWriteStreams(BatchCommitWriteStreamsRequest) + returns (BatchCommitWriteStreamsResponse) { + option (google.api.http) = { + get: "/v1beta2/{parent=projects/*/datasets/*/tables/*}" + }; + option (google.api.method_signature) = "parent"; + } + + // Flushes rows to a BUFFERED stream. + // If users are appending rows to BUFFERED stream, flush operation is + // required in order for the rows to become available for reading. A + // Flush operation flushes up to any previously flushed offset in a BUFFERED + // stream, to the offset specified in the request. + // Flush is not supported on the _default stream, since it is not BUFFERED. + rpc FlushRows(FlushRowsRequest) returns (FlushRowsResponse) { + option (google.api.http) = { + post: "/v1beta2/{write_stream=projects/*/datasets/*/tables/*/streams/*}" + body: "*" + }; + option (google.api.method_signature) = "write_stream"; + } +} + +// Request message for `CreateReadSession`. +message CreateReadSessionRequest { + // Required. The request project that owns the session, in the form of + // `projects/{project_id}`. + string parent = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "cloudresourcemanager.googleapis.com/Project" + } + ]; + + // Required. Session to be created. + ReadSession read_session = 2 [(google.api.field_behavior) = REQUIRED]; + + // Max initial number of streams. If unset or zero, the server will + // provide a value of streams so as to produce reasonable throughput. Must be + // non-negative. The number of streams may be lower than the requested number, + // depending on the amount parallelism that is reasonable for the table. Error + // will be returned if the max count is greater than the current system + // max limit of 1,000. + // + // Streams must be read starting from offset 0. + int32 max_stream_count = 3; +} + +// Request message for `ReadRows`. +message ReadRowsRequest { + // Required. Stream to read rows from. + string read_stream = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "bigquerystorage.googleapis.com/ReadStream" + } + ]; + + // The offset requested must be less than the last row read from Read. + // Requesting a larger offset is undefined. If not specified, start reading + // from offset zero. + int64 offset = 2; +} + +// Information on if the current connection is being throttled. +message ThrottleState { + // How much this connection is being throttled. Zero means no throttling, + // 100 means fully throttled. + int32 throttle_percent = 1; +} + +// Estimated stream statistics for a given Stream. +message StreamStats { + message Progress { + // The fraction of rows assigned to the stream that have been processed by + // the server so far, not including the rows in the current response + // message. + // + // This value, along with `at_response_end`, can be used to interpolate + // the progress made as the rows in the message are being processed using + // the following formula: `at_response_start + (at_response_end - + // at_response_start) * rows_processed_from_response / rows_in_response`. + // + // Note that if a filter is provided, the `at_response_end` value of the + // previous response may not necessarily be equal to the + // `at_response_start` value of the current response. + double at_response_start = 1; + + // Similar to `at_response_start`, except that this value includes the + // rows in the current response. + double at_response_end = 2; + } + + // Represents the progress of the current stream. + Progress progress = 2; +} + +// Response from calling `ReadRows` may include row data, progress and +// throttling information. +message ReadRowsResponse { + // Row data is returned in format specified during session creation. + oneof rows { + // Serialized row data in AVRO format. + AvroRows avro_rows = 3; + + // Serialized row data in Arrow RecordBatch format. + ArrowRecordBatch arrow_record_batch = 4; + } + + // Number of serialized rows in the rows block. + int64 row_count = 6; + + // Statistics for the stream. + StreamStats stats = 2; + + // Throttling state. If unset, the latest response still describes + // the current throttling status. + ThrottleState throttle_state = 5; +} + +// Request message for `SplitReadStream`. +message SplitReadStreamRequest { + // Required. Name of the stream to split. + string name = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "bigquerystorage.googleapis.com/ReadStream" + } + ]; + + // A value in the range (0.0, 1.0) that specifies the fractional point at + // which the original stream should be split. The actual split point is + // evaluated on pre-filtered rows, so if a filter is provided, then there is + // no guarantee that the division of the rows between the new child streams + // will be proportional to this fractional value. Additionally, because the + // server-side unit for assigning data is collections of rows, this fraction + // will always map to a data storage boundary on the server side. + double fraction = 2; +} + +message SplitReadStreamResponse { + // Primary stream, which contains the beginning portion of + // |original_stream|. An empty value indicates that the original stream can no + // longer be split. + ReadStream primary_stream = 1; + + // Remainder stream, which contains the tail of |original_stream|. An empty + // value indicates that the original stream can no longer be split. + ReadStream remainder_stream = 2; +} + +// Request message for `CreateWriteStream`. +message CreateWriteStreamRequest { + // Required. Reference to the table to which the stream belongs, in the format + // of `projects/{project}/datasets/{dataset}/tables/{table}`. + string parent = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" } + ]; + + // Required. Stream to be created. + WriteStream write_stream = 2 [(google.api.field_behavior) = REQUIRED]; +} + +// Request message for `AppendRows`. +message AppendRowsRequest { + // Proto schema and data. + message ProtoData { + // Proto schema used to serialize the data. + ProtoSchema writer_schema = 1; + + // Serialized row data in protobuf message format. + ProtoRows rows = 2; + } + + // Required. The stream that is the target of the append operation. This value + // must be specified for the initial request. If subsequent requests specify + // the stream name, it must equal to the value provided in the first request. + // To write to the _default stream, populate this field with a string in the + // format `projects/{project}/datasets/{dataset}/tables/{table}/_default`. + string write_stream = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "bigquerystorage.googleapis.com/WriteStream" + } + ]; + + // If present, the write is only performed if the next append offset is same + // as the provided value. If not present, the write is performed at the + // current end of stream. Specifying a value for this field is not allowed + // when calling AppendRows for the '_default' stream. + google.protobuf.Int64Value offset = 2; + + // Input rows. The `writer_schema` field must be specified at the initial + // request and currently, it will be ignored if specified in following + // requests. Following requests must have data in the same format as the + // initial request. + oneof rows { + // Rows in proto format. + ProtoData proto_rows = 4; + } + + // Id set by client to annotate its identity. Only initial request setting is + // respected. + string trace_id = 6; +} + +// Response message for `AppendRows`. +message AppendRowsResponse { + // A success append result. + message AppendResult { + // The row offset at which the last append occurred. The offset will not be + // set if appending using default streams. + google.protobuf.Int64Value offset = 1; + } + + oneof response { + // Result if the append is successful. + AppendResult append_result = 1; + + // Error in case of request failed. If set, it means rows are not accepted + // into the system. Users can retry or continue with other requests within + // the same connection. + // ALREADY_EXISTS: happens when offset is specified, it means the entire + // request is already appended, it is safe to ignore this error. + // OUT_OF_RANGE: happens when offset is specified, it means the specified + // offset is beyond the end of the stream. + // INVALID_ARGUMENT: error caused by malformed request or data. + // RESOURCE_EXHAUSTED: request rejected due to throttling. Only happens when + // append without offset. + // ABORTED: request processing is aborted because of prior failures, request + // can be retried if previous failure is fixed. + // INTERNAL: server side errors that can be retried. + google.rpc.Status error = 2; + } + + // If backend detects a schema update, pass it to user so that user can + // use it to input new type of message. It will be empty when there is no + // schema updates. + TableSchema updated_schema = 3; +} + +// Request message for `GetWriteStreamRequest`. +message GetWriteStreamRequest { + // Required. Name of the stream to get, in the form of + // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`. + string name = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "bigquerystorage.googleapis.com/WriteStream" + } + ]; +} + +// Request message for `BatchCommitWriteStreams`. +message BatchCommitWriteStreamsRequest { + // Required. Parent table that all the streams should belong to, in the form + // of `projects/{project}/datasets/{dataset}/tables/{table}`. + string parent = 1 [(google.api.field_behavior) = REQUIRED]; + + // Required. The group of streams that will be committed atomically. + repeated string write_streams = 2 [(google.api.field_behavior) = REQUIRED]; +} + +// Response message for `BatchCommitWriteStreams`. +message BatchCommitWriteStreamsResponse { + // The time at which streams were committed in microseconds granularity. + // This field will only exist when there is no stream errors. + google.protobuf.Timestamp commit_time = 1; + + // Stream level error if commit failed. Only streams with error will be in + // the list. + repeated StorageError stream_errors = 2; +} + +// Request message for invoking `FinalizeWriteStream`. +message FinalizeWriteStreamRequest { + // Required. Name of the stream to finalize, in the form of + // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`. + string name = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "bigquerystorage.googleapis.com/WriteStream" + } + ]; +} + +// Response message for `FinalizeWriteStream`. +message FinalizeWriteStreamResponse { + // Number of rows in the finalized stream. + int64 row_count = 1; +} + +// Request message for `FlushRows`. +message FlushRowsRequest { + // Required. The stream that is the target of the flush operation. + string write_stream = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "bigquerystorage.googleapis.com/WriteStream" + } + ]; + + // Ending offset of the flush operation. Rows before this offset(including + // this offset) will be flushed. + google.protobuf.Int64Value offset = 2; +} + +// Respond message for `FlushRows`. +message FlushRowsResponse { + // The rows before this offset (including this offset) are flushed. + int64 offset = 1; +} + +// Structured custom BigQuery Storage error message. The error can be attached +// as error details in the returned rpc Status. User can use the info to process +// errors in a structural way, rather than having to parse error messages. +message StorageError { + // Error code for `StorageError`. + enum StorageErrorCode { + // Default error. + STORAGE_ERROR_CODE_UNSPECIFIED = 0; + + // Table is not found in the system. + TABLE_NOT_FOUND = 1; + + // Stream is already committed. + STREAM_ALREADY_COMMITTED = 2; + + // Stream is not found. + STREAM_NOT_FOUND = 3; + + // Invalid Stream type. + // For example, you try to commit a stream that is not pending. + INVALID_STREAM_TYPE = 4; + + // Invalid Stream state. + // For example, you try to commit a stream that is not fianlized or is + // garbaged. + INVALID_STREAM_STATE = 5; + } + + // BigQuery Storage specific error code. + StorageErrorCode code = 1; + + // Name of the failed entity. + string entity = 2; + + // Message that describes the error. + string error_message = 3; +} diff --git a/google/cloud/bigquery_storage_v1beta2/proto/stream.proto b/google/cloud/bigquery_storage_v1beta2/proto/stream.proto new file mode 100644 index 00000000..2b0a58c9 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/proto/stream.proto @@ -0,0 +1,189 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.bigquery.storage.v1beta2; + +import "google/api/field_behavior.proto"; +import "google/api/resource.proto"; +import "google/cloud/bigquery/storage/v1beta2/arrow.proto"; +import "google/cloud/bigquery/storage/v1beta2/avro.proto"; +import "google/cloud/bigquery/storage/v1beta2/table.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage"; +option java_multiple_files = true; +option java_outer_classname = "StreamProto"; +option java_package = "com.google.cloud.bigquery.storage.v1beta2"; +option (google.api.resource_definition) = { + type: "bigquery.googleapis.com/Table" + pattern: "projects/{project}/datasets/{dataset}/tables/{table}" +}; + +// Data format for input or output data. +enum DataFormat { + DATA_FORMAT_UNSPECIFIED = 0; + + // Avro is a standard open source row based file format. + // See https://avro.apache.org/ for more details. + AVRO = 1; + + // Arrow is a standard open source column-based message format. + // See https://arrow.apache.org/ for more details. + ARROW = 2; +} + +// Information about the ReadSession. +message ReadSession { + option (google.api.resource) = { + type: "bigquerystorage.googleapis.com/ReadSession" + pattern: "projects/{project}/locations/{location}/sessions/{session}" + }; + + // Additional attributes when reading a table. + message TableModifiers { + // The snapshot time of the table. If not set, interpreted as now. + google.protobuf.Timestamp snapshot_time = 1; + } + + // Options dictating how we read a table. + message TableReadOptions { + // Names of the fields in the table that should be read. If empty, all + // fields will be read. If the specified field is a nested field, all + // the sub-fields in the field will be selected. The output field order is + // unrelated to the order of fields in selected_fields. + repeated string selected_fields = 1; + + // SQL text filtering statement, similar to a WHERE clause in a query. + // Aggregates are not supported. + // + // Examples: "int_field > 5" + // "date_field = CAST('2014-9-27' as DATE)" + // "nullable_field is not NULL" + // "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))" + // "numeric_field BETWEEN 1.0 AND 5.0" + string row_restriction = 2; + + // Optional. Options specific to the Apache Arrow output format. + ArrowSerializationOptions arrow_serialization_options = 3 [(google.api.field_behavior) = OPTIONAL]; + } + + // Output only. Unique identifier for the session, in the form + // `projects/{project_id}/locations/{location}/sessions/{session_id}`. + string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. Time at which the session becomes invalid. After this time, subsequent + // requests to read this Session will return errors. The expire_time is + // automatically assigned and currently cannot be specified or updated. + google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Immutable. Data format of the output data. + DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE]; + + // The schema for the read. If read_options.selected_fields is set, the + // schema may be different from the table schema as it will only contain + // the selected fields. + oneof schema { + // Output only. Avro schema. + AvroSchema avro_schema = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. Arrow schema. + ArrowSchema arrow_schema = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; + } + + // Immutable. Table that this ReadSession is reading from, in the form + // `projects/{project_id}/datasets/{dataset_id}/tables/{table_id} + string table = 6 [ + (google.api.field_behavior) = IMMUTABLE, + (google.api.resource_reference) = { + type: "bigquery.googleapis.com/Table" + } + ]; + + // Optional. Any modifiers which are applied when reading from the specified table. + TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Read options for this session (e.g. column selection, filters). + TableReadOptions read_options = 8 [(google.api.field_behavior) = OPTIONAL]; + + // Output only. A list of streams created with the session. + // + // At least one stream is created with the session. In the future, larger + // request_stream_count values *may* result in this list being unpopulated, + // in that case, the user will need to use a List method to get the streams + // instead, which is not yet available. + repeated ReadStream streams = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; +} + +// Information about a single stream that gets data out of the storage system. +// Most of the information about `ReadStream` instances is aggregated, making +// `ReadStream` lightweight. +message ReadStream { + option (google.api.resource) = { + type: "bigquerystorage.googleapis.com/ReadStream" + pattern: "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}" + }; + + // Output only. Name of the stream, in the form + // `projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}`. + string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; +} + +// Information about a single stream that gets data inside the storage system. +message WriteStream { + option (google.api.resource) = { + type: "bigquerystorage.googleapis.com/WriteStream" + pattern: "projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}" + }; + + // Type enum of the stream. + enum Type { + // Unknown type. + TYPE_UNSPECIFIED = 0; + + // Data will commit automatically and appear as soon as the write is + // acknowledged. + COMMITTED = 1; + + // Data is invisible until the stream is committed. + PENDING = 2; + + // Data is only visible up to the offset to which it was flushed. + BUFFERED = 3; + } + + // Output only. Name of the stream, in the form + // `projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}`. + string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Immutable. Type of the stream. + Type type = 2 [(google.api.field_behavior) = IMMUTABLE]; + + // Output only. Create time of the stream. For the _default stream, this is the + // creation_time of the table. + google.protobuf.Timestamp create_time = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. Commit time of the stream. + // If a stream is of `COMMITTED` type, then it will have a commit_time same as + // `create_time`. If the stream is of `PENDING` type, commit_time being empty + // means it is not committed. + google.protobuf.Timestamp commit_time = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Output only. The schema of the destination table. It is only returned in + // `CreateWriteStream` response. Caller should generate data that's + // compatible with this schema to send in initial `AppendRowsRequest`. + // The table schema could go out of date during the life time of the stream. + TableSchema table_schema = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; +} diff --git a/google/cloud/bigquery_storage_v1beta2/proto/table.proto b/google/cloud/bigquery_storage_v1beta2/proto/table.proto new file mode 100644 index 00000000..fd8a0a75 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/proto/table.proto @@ -0,0 +1,102 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.bigquery.storage.v1beta2; + +import "google/api/field_behavior.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2;storage"; +option java_multiple_files = true; +option java_outer_classname = "TableProto"; +option java_package = "com.google.cloud.bigquery.storage.v1beta2"; + +// Schema of a table +message TableSchema { + // Describes the fields in a table. + repeated TableFieldSchema fields = 1; +} + +// A field in TableSchema +message TableFieldSchema { + enum Type { + // Illegal value + TYPE_UNSPECIFIED = 0; + + // 64K, UTF8 + STRING = 1; + + // 64-bit signed + INT64 = 2; + + // 64-bit IEEE floating point + DOUBLE = 3; + + // Aggregate type + STRUCT = 4; + + // 64K, Binary + BYTES = 5; + + // 2-valued + BOOL = 6; + + // 64-bit signed usec since UTC epoch + TIMESTAMP = 7; + + // Civil date - Year, Month, Day + DATE = 8; + + // Civil time - Hour, Minute, Second, Microseconds + TIME = 9; + + // Combination of civil date and civil time + DATETIME = 10; + + // Geography object + GEOGRAPHY = 11; + + // Numeric value + NUMERIC = 12; + } + + enum Mode { + // Illegal value + MODE_UNSPECIFIED = 0; + + NULLABLE = 1; + + REQUIRED = 2; + + REPEATED = 3; + } + + // Required. The field name. The name must contain only letters (a-z, A-Z), + // numbers (0-9), or underscores (_), and must start with a letter or + // underscore. The maximum length is 128 characters. + string name = 1 [(google.api.field_behavior) = REQUIRED]; + + // Required. The field data type. + Type type = 2 [(google.api.field_behavior) = REQUIRED]; + + // Optional. The field mode. The default value is NULLABLE. + Mode mode = 3 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Describes the nested schema fields if the type property is set to STRUCT. + repeated TableFieldSchema fields = 4 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. The field description. The maximum length is 1,024 characters. + string description = 6 [(google.api.field_behavior) = OPTIONAL]; +} diff --git a/google/cloud/bigquery_storage_v1beta2/py.typed b/google/cloud/bigquery_storage_v1beta2/py.typed new file mode 100644 index 00000000..e71b4749 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-bigquery-storage package uses inline types. diff --git a/google/cloud/bigquery_storage_v1beta2/services/__init__.py b/google/cloud/bigquery_storage_v1beta2/services/__init__.py new file mode 100644 index 00000000..42ffdf2b --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_read/__init__.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/__init__.py new file mode 100644 index 00000000..2105a1a6 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .client import BigQueryReadClient +from .async_client import BigQueryReadAsyncClient + +__all__ = ( + "BigQueryReadClient", + "BigQueryReadAsyncClient", +) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_read/async_client.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/async_client.py new file mode 100644 index 00000000..69b6ebe2 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/async_client.py @@ -0,0 +1,462 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +import functools +import re +from typing import Dict, AsyncIterable, Awaitable, Sequence, Tuple, Type, Union +import pkg_resources + +import google.api_core.client_options as ClientOptions # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore +from google.oauth2 import service_account # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import arrow +from google.cloud.bigquery_storage_v1beta2.types import avro +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream +from google.protobuf import timestamp_pb2 as timestamp # type: ignore + +from .transports.base import BigQueryReadTransport, DEFAULT_CLIENT_INFO +from .transports.grpc_asyncio import BigQueryReadGrpcAsyncIOTransport +from .client import BigQueryReadClient + + +class BigQueryReadAsyncClient: + """BigQuery Read API. + The Read API can be used to read data from BigQuery. + New code should use the v1 Read API going forward, if they don't + use Write API at the same time. + """ + + _client: BigQueryReadClient + + DEFAULT_ENDPOINT = BigQueryReadClient.DEFAULT_ENDPOINT + DEFAULT_MTLS_ENDPOINT = BigQueryReadClient.DEFAULT_MTLS_ENDPOINT + + read_session_path = staticmethod(BigQueryReadClient.read_session_path) + parse_read_session_path = staticmethod(BigQueryReadClient.parse_read_session_path) + read_stream_path = staticmethod(BigQueryReadClient.read_stream_path) + parse_read_stream_path = staticmethod(BigQueryReadClient.parse_read_stream_path) + table_path = staticmethod(BigQueryReadClient.table_path) + parse_table_path = staticmethod(BigQueryReadClient.parse_table_path) + + common_billing_account_path = staticmethod( + BigQueryReadClient.common_billing_account_path + ) + parse_common_billing_account_path = staticmethod( + BigQueryReadClient.parse_common_billing_account_path + ) + + common_folder_path = staticmethod(BigQueryReadClient.common_folder_path) + parse_common_folder_path = staticmethod(BigQueryReadClient.parse_common_folder_path) + + common_organization_path = staticmethod(BigQueryReadClient.common_organization_path) + parse_common_organization_path = staticmethod( + BigQueryReadClient.parse_common_organization_path + ) + + common_project_path = staticmethod(BigQueryReadClient.common_project_path) + parse_common_project_path = staticmethod( + BigQueryReadClient.parse_common_project_path + ) + + common_location_path = staticmethod(BigQueryReadClient.common_location_path) + parse_common_location_path = staticmethod( + BigQueryReadClient.parse_common_location_path + ) + + from_service_account_info = BigQueryReadClient.from_service_account_info + from_service_account_file = BigQueryReadClient.from_service_account_file + from_service_account_json = from_service_account_file + + @property + def transport(self) -> BigQueryReadTransport: + """Return the transport used by the client instance. + + Returns: + BigQueryReadTransport: The transport used by the client instance. + """ + return self._client.transport + + get_transport_class = functools.partial( + type(BigQueryReadClient).get_transport_class, type(BigQueryReadClient) + ) + + def __init__( + self, + *, + credentials: credentials.Credentials = None, + transport: Union[str, BigQueryReadTransport] = "grpc_asyncio", + client_options: ClientOptions = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the big query read client. + + Args: + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + transport (Union[str, ~.BigQueryReadTransport]): The + transport to use. If set to None, a transport is chosen + automatically. + client_options (ClientOptions): Custom options for the client. It + won't take effect if a ``transport`` instance is provided. + (1) The ``api_endpoint`` property can be used to override the + default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT + environment variable can also be used to override the endpoint: + "always" (always use the default mTLS endpoint), "never" (always + use the default regular endpoint) and "auto" (auto switch to the + default mTLS endpoint if client certificate is present, this is + the default value). However, the ``api_endpoint`` property takes + precedence if provided. + (2) If GOOGLE_API_USE_CLIENT_CERTIFICATE environment variable + is "true", then the ``client_cert_source`` property can be used + to provide client certificate for mutual TLS transport. If + not provided, the default SSL client certificate will be used if + present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not + set, no client certificate will be used. + + Raises: + google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport + creation failed for any reason. + """ + + self._client = BigQueryReadClient( + credentials=credentials, + transport=transport, + client_options=client_options, + client_info=client_info, + ) + + async def create_read_session( + self, + request: storage.CreateReadSessionRequest = None, + *, + parent: str = None, + read_session: stream.ReadSession = None, + max_stream_count: int = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> stream.ReadSession: + r"""Creates a new read session. A read session divides + the contents of a BigQuery table into one or more + streams, which can then be used to read data from the + table. The read session also specifies properties of the + data to be read, such as a list of columns or a push- + down filter describing the rows to be returned. + + A particular row can be read by at most one stream. When + the caller has reached the end of each stream in the + session, then all the data in the table has been read. + + Data is assigned to each stream such that roughly the + same number of rows can be read from each stream. + Because the server-side unit for assigning data is + collections of rows, the API does not guarantee that + each stream will return the same number or rows. + Additionally, the limits are enforced based on the + number of pre-filtered rows, so some filters can lead to + lopsided assignments. + + Read sessions automatically expire 24 hours after they + are created and do not require manual clean-up by the + caller. + + Args: + request (:class:`google.cloud.bigquery_storage_v1beta2.types.CreateReadSessionRequest`): + The request object. Request message for + `CreateReadSession`. + parent (:class:`str`): + Required. The request project that owns the session, in + the form of ``projects/{project_id}``. + + This corresponds to the ``parent`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + read_session (:class:`google.cloud.bigquery_storage_v1beta2.types.ReadSession`): + Required. Session to be created. + This corresponds to the ``read_session`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + max_stream_count (:class:`int`): + Max initial number of streams. If + unset or zero, the server will provide a + value of streams so as to produce + reasonable throughput. Must be non- + negative. The number of streams may be + lower than the requested number, + depending on the amount parallelism that + is reasonable for the table. Error will + be returned if the max count is greater + than the current system max limit of + 1,000. + + Streams must be read starting from + offset 0. + + This corresponds to the ``max_stream_count`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.ReadSession: + Information about the ReadSession. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([parent, read_session, max_stream_count]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = storage.CreateReadSessionRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if parent is not None: + request.parent = parent + if read_session is not None: + request.read_session = read_session + if max_stream_count is not None: + request.max_stream_count = max_stream_count + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.create_read_session, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("read_session.table", request.read_session.table),) + ), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def read_rows( + self, + request: storage.ReadRowsRequest = None, + *, + read_stream: str = None, + offset: int = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> Awaitable[AsyncIterable[storage.ReadRowsResponse]]: + r"""Reads rows from the stream in the format prescribed + by the ReadSession. Each response contains one or more + table rows, up to a maximum of 100 MiB per response; + read requests which attempt to read individual rows + larger than 100 MiB will fail. + + Each request also returns a set of stream statistics + reflecting the current state of the stream. + + Args: + request (:class:`google.cloud.bigquery_storage_v1beta2.types.ReadRowsRequest`): + The request object. Request message for `ReadRows`. + read_stream (:class:`str`): + Required. Stream to read rows from. + This corresponds to the ``read_stream`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + offset (:class:`int`): + The offset requested must be less + than the last row read from Read. + Requesting a larger offset is undefined. + If not specified, start reading from + offset zero. + + This corresponds to the ``offset`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + AsyncIterable[google.cloud.bigquery_storage_v1beta2.types.ReadRowsResponse]: + Response from calling ReadRows may include row data, progress and + throttling information. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([read_stream, offset]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = storage.ReadRowsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if read_stream is not None: + request.read_stream = read_stream + if offset is not None: + request.offset = offset + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.read_rows, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type(exceptions.ServiceUnavailable,), + ), + default_timeout=86400.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("read_stream", request.read_stream),) + ), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + async def split_read_stream( + self, + request: storage.SplitReadStreamRequest = None, + *, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.SplitReadStreamResponse: + r"""Splits a given ``ReadStream`` into two ``ReadStream`` objects. + These ``ReadStream`` objects are referred to as the primary and + the residual streams of the split. The original ``ReadStream`` + can still be read from in the same manner as before. Both of the + returned ``ReadStream`` objects can also be read from, and the + rows returned by both child streams will be the same as the rows + read from the original stream. + + Moreover, the two child streams will be allocated back-to-back + in the original ``ReadStream``. Concretely, it is guaranteed + that for streams original, primary, and residual, that + original[0-j] = primary[0-j] and original[j-n] = residual[0-m] + once the streams have been read to completion. + + Args: + request (:class:`google.cloud.bigquery_storage_v1beta2.types.SplitReadStreamRequest`): + The request object. Request message for + `SplitReadStream`. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.SplitReadStreamResponse: + + """ + # Create or coerce a protobuf request object. + + request = storage.SplitReadStreamRequest(request) + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.split_read_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-bigquery-storage", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +__all__ = ("BigQueryReadAsyncClient",) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_read/client.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/client.py new file mode 100644 index 00000000..110dfd5f --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/client.py @@ -0,0 +1,681 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from distutils import util +import os +import re +from typing import Callable, Dict, Optional, Iterable, Sequence, Tuple, Type, Union +import pkg_resources + +from google.api_core import client_options as client_options_lib # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport import mtls # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore +from google.auth.exceptions import MutualTLSChannelError # type: ignore +from google.oauth2 import service_account # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import arrow +from google.cloud.bigquery_storage_v1beta2.types import avro +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream +from google.protobuf import timestamp_pb2 as timestamp # type: ignore + +from .transports.base import BigQueryReadTransport, DEFAULT_CLIENT_INFO +from .transports.grpc import BigQueryReadGrpcTransport +from .transports.grpc_asyncio import BigQueryReadGrpcAsyncIOTransport + + +class BigQueryReadClientMeta(type): + """Metaclass for the BigQueryRead client. + + This provides class-level methods for building and retrieving + support objects (e.g. transport) without polluting the client instance + objects. + """ + + _transport_registry = OrderedDict() # type: Dict[str, Type[BigQueryReadTransport]] + _transport_registry["grpc"] = BigQueryReadGrpcTransport + _transport_registry["grpc_asyncio"] = BigQueryReadGrpcAsyncIOTransport + + def get_transport_class(cls, label: str = None,) -> Type[BigQueryReadTransport]: + """Return an appropriate transport class. + + Args: + label: The name of the desired transport. If none is + provided, then the first transport in the registry is used. + + Returns: + The transport class to use. + """ + # If a specific transport is requested, return that one. + if label: + return cls._transport_registry[label] + + # No transport is requested; return the default (that is, the first one + # in the dictionary). + return next(iter(cls._transport_registry.values())) + + +class BigQueryReadClient(metaclass=BigQueryReadClientMeta): + """BigQuery Read API. + The Read API can be used to read data from BigQuery. + New code should use the v1 Read API going forward, if they don't + use Write API at the same time. + """ + + @staticmethod + def _get_default_mtls_endpoint(api_endpoint): + """Convert api endpoint to mTLS endpoint. + Convert "*.sandbox.googleapis.com" and "*.googleapis.com" to + "*.mtls.sandbox.googleapis.com" and "*.mtls.googleapis.com" respectively. + Args: + api_endpoint (Optional[str]): the api endpoint to convert. + Returns: + str: converted mTLS api endpoint. + """ + if not api_endpoint: + return api_endpoint + + mtls_endpoint_re = re.compile( + r"(?P[^.]+)(?P\.mtls)?(?P\.sandbox)?(?P\.googleapis\.com)?" + ) + + m = mtls_endpoint_re.match(api_endpoint) + name, mtls, sandbox, googledomain = m.groups() + if mtls or not googledomain: + return api_endpoint + + if sandbox: + return api_endpoint.replace( + "sandbox.googleapis.com", "mtls.sandbox.googleapis.com" + ) + + return api_endpoint.replace(".googleapis.com", ".mtls.googleapis.com") + + DEFAULT_ENDPOINT = "bigquerystorage.googleapis.com" + DEFAULT_MTLS_ENDPOINT = _get_default_mtls_endpoint.__func__( # type: ignore + DEFAULT_ENDPOINT + ) + + @classmethod + def from_service_account_info(cls, info: dict, *args, **kwargs): + """Creates an instance of this client using the provided credentials info. + + Args: + info (dict): The service account private key info. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + BigQueryReadClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_info(info) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + + @classmethod + def from_service_account_file(cls, filename: str, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + BigQueryReadClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_file(filename) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + + from_service_account_json = from_service_account_file + + @property + def transport(self) -> BigQueryReadTransport: + """Return the transport used by the client instance. + + Returns: + BigQueryReadTransport: The transport used by the client instance. + """ + return self._transport + + @staticmethod + def read_session_path(project: str, location: str, session: str,) -> str: + """Return a fully-qualified read_session string.""" + return "projects/{project}/locations/{location}/sessions/{session}".format( + project=project, location=location, session=session, + ) + + @staticmethod + def parse_read_session_path(path: str) -> Dict[str, str]: + """Parse a read_session path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/sessions/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def read_stream_path( + project: str, location: str, session: str, stream: str, + ) -> str: + """Return a fully-qualified read_stream string.""" + return "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}".format( + project=project, location=location, session=session, stream=stream, + ) + + @staticmethod + def parse_read_stream_path(path: str) -> Dict[str, str]: + """Parse a read_stream path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/sessions/(?P.+?)/streams/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def table_path(project: str, dataset: str, table: str,) -> str: + """Return a fully-qualified table string.""" + return "projects/{project}/datasets/{dataset}/tables/{table}".format( + project=project, dataset=dataset, table=table, + ) + + @staticmethod + def parse_table_path(path: str) -> Dict[str, str]: + """Parse a table path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/datasets/(?P.+?)/tables/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def common_billing_account_path(billing_account: str,) -> str: + """Return a fully-qualified billing_account string.""" + return "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + + @staticmethod + def parse_common_billing_account_path(path: str) -> Dict[str, str]: + """Parse a billing_account path into its component segments.""" + m = re.match(r"^billingAccounts/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_folder_path(folder: str,) -> str: + """Return a fully-qualified folder string.""" + return "folders/{folder}".format(folder=folder,) + + @staticmethod + def parse_common_folder_path(path: str) -> Dict[str, str]: + """Parse a folder path into its component segments.""" + m = re.match(r"^folders/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_organization_path(organization: str,) -> str: + """Return a fully-qualified organization string.""" + return "organizations/{organization}".format(organization=organization,) + + @staticmethod + def parse_common_organization_path(path: str) -> Dict[str, str]: + """Parse a organization path into its component segments.""" + m = re.match(r"^organizations/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_project_path(project: str,) -> str: + """Return a fully-qualified project string.""" + return "projects/{project}".format(project=project,) + + @staticmethod + def parse_common_project_path(path: str) -> Dict[str, str]: + """Parse a project path into its component segments.""" + m = re.match(r"^projects/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_location_path(project: str, location: str,) -> str: + """Return a fully-qualified location string.""" + return "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + + @staticmethod + def parse_common_location_path(path: str) -> Dict[str, str]: + """Parse a location path into its component segments.""" + m = re.match(r"^projects/(?P.+?)/locations/(?P.+?)$", path) + return m.groupdict() if m else {} + + def __init__( + self, + *, + credentials: Optional[credentials.Credentials] = None, + transport: Union[str, BigQueryReadTransport, None] = None, + client_options: Optional[client_options_lib.ClientOptions] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the big query read client. + + Args: + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + transport (Union[str, BigQueryReadTransport]): The + transport to use. If set to None, a transport is chosen + automatically. + client_options (google.api_core.client_options.ClientOptions): Custom options for the + client. It won't take effect if a ``transport`` instance is provided. + (1) The ``api_endpoint`` property can be used to override the + default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT + environment variable can also be used to override the endpoint: + "always" (always use the default mTLS endpoint), "never" (always + use the default regular endpoint) and "auto" (auto switch to the + default mTLS endpoint if client certificate is present, this is + the default value). However, the ``api_endpoint`` property takes + precedence if provided. + (2) If GOOGLE_API_USE_CLIENT_CERTIFICATE environment variable + is "true", then the ``client_cert_source`` property can be used + to provide client certificate for mutual TLS transport. If + not provided, the default SSL client certificate will be used if + present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not + set, no client certificate will be used. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTLSChannelError: If mutual TLS transport + creation failed for any reason. + """ + if isinstance(client_options, dict): + client_options = client_options_lib.from_dict(client_options) + if client_options is None: + client_options = client_options_lib.ClientOptions() + + # Create SSL credentials for mutual TLS if needed. + use_client_cert = bool( + util.strtobool(os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE", "false")) + ) + + ssl_credentials = None + is_mtls = False + if use_client_cert: + if client_options.client_cert_source: + import grpc # type: ignore + + cert, key = client_options.client_cert_source() + ssl_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + is_mtls = True + else: + creds = SslCredentials() + is_mtls = creds.is_mtls + ssl_credentials = creds.ssl_credentials if is_mtls else None + + # Figure out which api endpoint to use. + if client_options.api_endpoint is not None: + api_endpoint = client_options.api_endpoint + else: + use_mtls_env = os.getenv("GOOGLE_API_USE_MTLS_ENDPOINT", "auto") + if use_mtls_env == "never": + api_endpoint = self.DEFAULT_ENDPOINT + elif use_mtls_env == "always": + api_endpoint = self.DEFAULT_MTLS_ENDPOINT + elif use_mtls_env == "auto": + api_endpoint = ( + self.DEFAULT_MTLS_ENDPOINT if is_mtls else self.DEFAULT_ENDPOINT + ) + else: + raise MutualTLSChannelError( + "Unsupported GOOGLE_API_USE_MTLS_ENDPOINT value. Accepted values: never, auto, always" + ) + + # Save or instantiate the transport. + # Ordinarily, we provide the transport, but allowing a custom transport + # instance provides an extensibility point for unusual situations. + if isinstance(transport, BigQueryReadTransport): + # transport is a BigQueryReadTransport instance. + if credentials or client_options.credentials_file: + raise ValueError( + "When providing a transport instance, " + "provide its credentials directly." + ) + if client_options.scopes: + raise ValueError( + "When providing a transport instance, " + "provide its scopes directly." + ) + self._transport = transport + else: + Transport = type(self).get_transport_class(transport) + self._transport = Transport( + credentials=credentials, + credentials_file=client_options.credentials_file, + host=api_endpoint, + scopes=client_options.scopes, + ssl_channel_credentials=ssl_credentials, + quota_project_id=client_options.quota_project_id, + client_info=client_info, + ) + + def create_read_session( + self, + request: storage.CreateReadSessionRequest = None, + *, + parent: str = None, + read_session: stream.ReadSession = None, + max_stream_count: int = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> stream.ReadSession: + r"""Creates a new read session. A read session divides + the contents of a BigQuery table into one or more + streams, which can then be used to read data from the + table. The read session also specifies properties of the + data to be read, such as a list of columns or a push- + down filter describing the rows to be returned. + + A particular row can be read by at most one stream. When + the caller has reached the end of each stream in the + session, then all the data in the table has been read. + + Data is assigned to each stream such that roughly the + same number of rows can be read from each stream. + Because the server-side unit for assigning data is + collections of rows, the API does not guarantee that + each stream will return the same number or rows. + Additionally, the limits are enforced based on the + number of pre-filtered rows, so some filters can lead to + lopsided assignments. + + Read sessions automatically expire 24 hours after they + are created and do not require manual clean-up by the + caller. + + Args: + request (google.cloud.bigquery_storage_v1beta2.types.CreateReadSessionRequest): + The request object. Request message for + `CreateReadSession`. + parent (str): + Required. The request project that owns the session, in + the form of ``projects/{project_id}``. + + This corresponds to the ``parent`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + read_session (google.cloud.bigquery_storage_v1beta2.types.ReadSession): + Required. Session to be created. + This corresponds to the ``read_session`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + max_stream_count (int): + Max initial number of streams. If + unset or zero, the server will provide a + value of streams so as to produce + reasonable throughput. Must be non- + negative. The number of streams may be + lower than the requested number, + depending on the amount parallelism that + is reasonable for the table. Error will + be returned if the max count is greater + than the current system max limit of + 1,000. + + Streams must be read starting from + offset 0. + + This corresponds to the ``max_stream_count`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.ReadSession: + Information about the ReadSession. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([parent, read_session, max_stream_count]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a storage.CreateReadSessionRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.CreateReadSessionRequest): + request = storage.CreateReadSessionRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if parent is not None: + request.parent = parent + if read_session is not None: + request.read_session = read_session + if max_stream_count is not None: + request.max_stream_count = max_stream_count + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.create_read_session] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("read_session.table", request.read_session.table),) + ), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def read_rows( + self, + request: storage.ReadRowsRequest = None, + *, + read_stream: str = None, + offset: int = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> Iterable[storage.ReadRowsResponse]: + r"""Reads rows from the stream in the format prescribed + by the ReadSession. Each response contains one or more + table rows, up to a maximum of 100 MiB per response; + read requests which attempt to read individual rows + larger than 100 MiB will fail. + + Each request also returns a set of stream statistics + reflecting the current state of the stream. + + Args: + request (google.cloud.bigquery_storage_v1beta2.types.ReadRowsRequest): + The request object. Request message for `ReadRows`. + read_stream (str): + Required. Stream to read rows from. + This corresponds to the ``read_stream`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + offset (int): + The offset requested must be less + than the last row read from Read. + Requesting a larger offset is undefined. + If not specified, start reading from + offset zero. + + This corresponds to the ``offset`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + Iterable[google.cloud.bigquery_storage_v1beta2.types.ReadRowsResponse]: + Response from calling ReadRows may include row data, progress and + throttling information. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([read_stream, offset]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a storage.ReadRowsRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.ReadRowsRequest): + request = storage.ReadRowsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if read_stream is not None: + request.read_stream = read_stream + if offset is not None: + request.offset = offset + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.read_rows] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("read_stream", request.read_stream),) + ), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def split_read_stream( + self, + request: storage.SplitReadStreamRequest = None, + *, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.SplitReadStreamResponse: + r"""Splits a given ``ReadStream`` into two ``ReadStream`` objects. + These ``ReadStream`` objects are referred to as the primary and + the residual streams of the split. The original ``ReadStream`` + can still be read from in the same manner as before. Both of the + returned ``ReadStream`` objects can also be read from, and the + rows returned by both child streams will be the same as the rows + read from the original stream. + + Moreover, the two child streams will be allocated back-to-back + in the original ``ReadStream``. Concretely, it is guaranteed + that for streams original, primary, and residual, that + original[0-j] = primary[0-j] and original[j-n] = residual[0-m] + once the streams have been read to completion. + + Args: + request (google.cloud.bigquery_storage_v1beta2.types.SplitReadStreamRequest): + The request object. Request message for + `SplitReadStream`. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.SplitReadStreamResponse: + + """ + # Create or coerce a protobuf request object. + + # Minor optimization to avoid making a copy if the user passes + # in a storage.SplitReadStreamRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.SplitReadStreamRequest): + request = storage.SplitReadStreamRequest(request) + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.split_read_stream] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-bigquery-storage", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +__all__ = ("BigQueryReadClient",) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/__init__.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/__init__.py new file mode 100644 index 00000000..87e56323 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/__init__.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from typing import Dict, Type + +from .base import BigQueryReadTransport +from .grpc import BigQueryReadGrpcTransport +from .grpc_asyncio import BigQueryReadGrpcAsyncIOTransport + + +# Compile a registry of transports. +_transport_registry = OrderedDict() # type: Dict[str, Type[BigQueryReadTransport]] +_transport_registry["grpc"] = BigQueryReadGrpcTransport +_transport_registry["grpc_asyncio"] = BigQueryReadGrpcAsyncIOTransport + +__all__ = ( + "BigQueryReadTransport", + "BigQueryReadGrpcTransport", + "BigQueryReadGrpcAsyncIOTransport", +) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/base.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/base.py new file mode 100644 index 00000000..72f43ab7 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/base.py @@ -0,0 +1,186 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import abc +import typing +import pkg_resources + +from google import auth # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-bigquery-storage", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +class BigQueryReadTransport(abc.ABC): + """Abstract transport class for BigQueryRead.""" + + AUTH_SCOPES = ( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ) + + def __init__( + self, + *, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: typing.Optional[str] = None, + scopes: typing.Optional[typing.Sequence[str]] = AUTH_SCOPES, + quota_project_id: typing.Optional[str] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + **kwargs, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is mutually exclusive with credentials. + scope (Optional[Sequence[str]]): A list of scopes. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + """ + # Save the hostname. Default to port 443 (HTTPS) if none is specified. + if ":" not in host: + host += ":443" + self._host = host + + # If no credentials are provided, then determine the appropriate + # defaults. + if credentials and credentials_file: + raise exceptions.DuplicateCredentialArgs( + "'credentials_file' and 'credentials' are mutually exclusive" + ) + + if credentials_file is not None: + credentials, _ = auth.load_credentials_from_file( + credentials_file, scopes=scopes, quota_project_id=quota_project_id + ) + + elif credentials is None: + credentials, _ = auth.default( + scopes=scopes, quota_project_id=quota_project_id + ) + + # Save the credentials. + self._credentials = credentials + + # Lifted into its own function so it can be stubbed out during tests. + self._prep_wrapped_messages(client_info) + + def _prep_wrapped_messages(self, client_info): + # Precompute the wrapped methods. + self._wrapped_methods = { + self.create_read_session: gapic_v1.method.wrap_method( + self.create_read_session, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=client_info, + ), + self.read_rows: gapic_v1.method.wrap_method( + self.read_rows, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type(exceptions.ServiceUnavailable,), + ), + default_timeout=86400.0, + client_info=client_info, + ), + self.split_read_stream: gapic_v1.method.wrap_method( + self.split_read_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=client_info, + ), + } + + @property + def create_read_session( + self, + ) -> typing.Callable[ + [storage.CreateReadSessionRequest], + typing.Union[stream.ReadSession, typing.Awaitable[stream.ReadSession]], + ]: + raise NotImplementedError() + + @property + def read_rows( + self, + ) -> typing.Callable[ + [storage.ReadRowsRequest], + typing.Union[ + storage.ReadRowsResponse, typing.Awaitable[storage.ReadRowsResponse] + ], + ]: + raise NotImplementedError() + + @property + def split_read_stream( + self, + ) -> typing.Callable[ + [storage.SplitReadStreamRequest], + typing.Union[ + storage.SplitReadStreamResponse, + typing.Awaitable[storage.SplitReadStreamResponse], + ], + ]: + raise NotImplementedError() + + +__all__ = ("BigQueryReadTransport",) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/grpc.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/grpc.py new file mode 100644 index 00000000..50d3c3fd --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/grpc.py @@ -0,0 +1,364 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import warnings +from typing import Callable, Dict, Optional, Sequence, Tuple + +from google.api_core import grpc_helpers # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google import auth # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore + +import grpc # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream + +from .base import BigQueryReadTransport, DEFAULT_CLIENT_INFO + + +class BigQueryReadGrpcTransport(BigQueryReadTransport): + """gRPC backend transport for BigQueryRead. + + BigQuery Read API. + The Read API can be used to read data from BigQuery. + New code should use the v1 Read API going forward, if they don't + use Write API at the same time. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + """ + + _stubs: Dict[str, Callable] + + def __init__( + self, + *, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: str = None, + scopes: Sequence[str] = None, + channel: grpc.Channel = None, + api_mtls_endpoint: str = None, + client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, + ssl_channel_credentials: grpc.ChannelCredentials = None, + quota_project_id: Optional[str] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional(Sequence[str])): A list of scopes. This argument is + ignored if ``channel`` is provided. + channel (Optional[grpc.Channel]): A ``Channel`` instance through + which to make calls. + api_mtls_endpoint (Optional[str]): Deprecated. The mutual TLS endpoint. + If provided, it overrides the ``host`` argument and tries to create + a mutual TLS channel with client SSL credentials from + ``client_cert_source`` or applicatin default SSL credentials. + client_cert_source (Optional[Callable[[], Tuple[bytes, bytes]]]): + Deprecated. A callback to provide client SSL certificate bytes and + private key bytes, both in PEM format. It is ignored if + ``api_mtls_endpoint`` is None. + ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials + for grpc channel. It is ignored if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTLSChannelError: If mutual TLS transport + creation failed for any reason. + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + self._ssl_channel_credentials = ssl_channel_credentials + + if channel: + # Sanity check: Ensure that channel and credentials are not both + # provided. + credentials = False + + # If a channel was explicitly provided, set it. + self._grpc_channel = channel + self._ssl_channel_credentials = None + elif api_mtls_endpoint: + warnings.warn( + "api_mtls_endpoint and client_cert_source are deprecated", + DeprecationWarning, + ) + + host = ( + api_mtls_endpoint + if ":" in api_mtls_endpoint + else api_mtls_endpoint + ":443" + ) + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + ssl_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + ssl_credentials = SslCredentials().ssl_credentials + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + self._ssl_channel_credentials = ssl_credentials + else: + host = host if ":" in host else host + ":443" + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_channel_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + self._stubs = {} # type: Dict[str, Callable] + + # Run the base constructor. + super().__init__( + host=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + client_info=client_info, + ) + + @classmethod + def create_channel( + cls, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: str = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ) -> grpc.Channel: + """Create and return a gRPC channel object. + Args: + address (Optional[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is mutually exclusive with credentials. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + grpc.Channel: A gRPC channel object. + + Raises: + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + scopes = scopes or cls.AUTH_SCOPES + return grpc_helpers.create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) + + @property + def grpc_channel(self) -> grpc.Channel: + """Return the channel designed to connect to this service. + """ + return self._grpc_channel + + @property + def create_read_session( + self, + ) -> Callable[[storage.CreateReadSessionRequest], stream.ReadSession]: + r"""Return a callable for the create read session method over gRPC. + + Creates a new read session. A read session divides + the contents of a BigQuery table into one or more + streams, which can then be used to read data from the + table. The read session also specifies properties of the + data to be read, such as a list of columns or a push- + down filter describing the rows to be returned. + + A particular row can be read by at most one stream. When + the caller has reached the end of each stream in the + session, then all the data in the table has been read. + + Data is assigned to each stream such that roughly the + same number of rows can be read from each stream. + Because the server-side unit for assigning data is + collections of rows, the API does not guarantee that + each stream will return the same number or rows. + Additionally, the limits are enforced based on the + number of pre-filtered rows, so some filters can lead to + lopsided assignments. + + Read sessions automatically expire 24 hours after they + are created and do not require manual clean-up by the + caller. + + Returns: + Callable[[~.CreateReadSessionRequest], + ~.ReadSession]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "create_read_session" not in self._stubs: + self._stubs["create_read_session"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryRead/CreateReadSession", + request_serializer=storage.CreateReadSessionRequest.serialize, + response_deserializer=stream.ReadSession.deserialize, + ) + return self._stubs["create_read_session"] + + @property + def read_rows( + self, + ) -> Callable[[storage.ReadRowsRequest], storage.ReadRowsResponse]: + r"""Return a callable for the read rows method over gRPC. + + Reads rows from the stream in the format prescribed + by the ReadSession. Each response contains one or more + table rows, up to a maximum of 100 MiB per response; + read requests which attempt to read individual rows + larger than 100 MiB will fail. + + Each request also returns a set of stream statistics + reflecting the current state of the stream. + + Returns: + Callable[[~.ReadRowsRequest], + ~.ReadRowsResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "read_rows" not in self._stubs: + self._stubs["read_rows"] = self.grpc_channel.unary_stream( + "/google.cloud.bigquery.storage.v1beta2.BigQueryRead/ReadRows", + request_serializer=storage.ReadRowsRequest.serialize, + response_deserializer=storage.ReadRowsResponse.deserialize, + ) + return self._stubs["read_rows"] + + @property + def split_read_stream( + self, + ) -> Callable[[storage.SplitReadStreamRequest], storage.SplitReadStreamResponse]: + r"""Return a callable for the split read stream method over gRPC. + + Splits a given ``ReadStream`` into two ``ReadStream`` objects. + These ``ReadStream`` objects are referred to as the primary and + the residual streams of the split. The original ``ReadStream`` + can still be read from in the same manner as before. Both of the + returned ``ReadStream`` objects can also be read from, and the + rows returned by both child streams will be the same as the rows + read from the original stream. + + Moreover, the two child streams will be allocated back-to-back + in the original ``ReadStream``. Concretely, it is guaranteed + that for streams original, primary, and residual, that + original[0-j] = primary[0-j] and original[j-n] = residual[0-m] + once the streams have been read to completion. + + Returns: + Callable[[~.SplitReadStreamRequest], + ~.SplitReadStreamResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "split_read_stream" not in self._stubs: + self._stubs["split_read_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryRead/SplitReadStream", + request_serializer=storage.SplitReadStreamRequest.serialize, + response_deserializer=storage.SplitReadStreamResponse.deserialize, + ) + return self._stubs["split_read_stream"] + + +__all__ = ("BigQueryReadGrpcTransport",) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/grpc_asyncio.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/grpc_asyncio.py new file mode 100644 index 00000000..9d2a7aa5 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_read/transports/grpc_asyncio.py @@ -0,0 +1,370 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import warnings +from typing import Awaitable, Callable, Dict, Optional, Sequence, Tuple + +from google.api_core import gapic_v1 # type: ignore +from google.api_core import grpc_helpers_async # type: ignore +from google import auth # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore + +import grpc # type: ignore +from grpc.experimental import aio # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream + +from .base import BigQueryReadTransport, DEFAULT_CLIENT_INFO +from .grpc import BigQueryReadGrpcTransport + + +class BigQueryReadGrpcAsyncIOTransport(BigQueryReadTransport): + """gRPC AsyncIO backend transport for BigQueryRead. + + BigQuery Read API. + The Read API can be used to read data from BigQuery. + New code should use the v1 Read API going forward, if they don't + use Write API at the same time. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + """ + + _grpc_channel: aio.Channel + _stubs: Dict[str, Callable] = {} + + @classmethod + def create_channel( + cls, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ) -> aio.Channel: + """Create and return a gRPC AsyncIO channel object. + Args: + address (Optional[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + aio.Channel: A gRPC AsyncIO channel object. + """ + scopes = scopes or cls.AUTH_SCOPES + return grpc_helpers_async.create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) + + def __init__( + self, + *, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + channel: aio.Channel = None, + api_mtls_endpoint: str = None, + client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, + ssl_channel_credentials: grpc.ChannelCredentials = None, + quota_project_id=None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + channel (Optional[aio.Channel]): A ``Channel`` instance through + which to make calls. + api_mtls_endpoint (Optional[str]): Deprecated. The mutual TLS endpoint. + If provided, it overrides the ``host`` argument and tries to create + a mutual TLS channel with client SSL credentials from + ``client_cert_source`` or applicatin default SSL credentials. + client_cert_source (Optional[Callable[[], Tuple[bytes, bytes]]]): + Deprecated. A callback to provide client SSL certificate bytes and + private key bytes, both in PEM format. It is ignored if + ``api_mtls_endpoint`` is None. + ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials + for grpc channel. It is ignored if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport + creation failed for any reason. + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + self._ssl_channel_credentials = ssl_channel_credentials + + if channel: + # Sanity check: Ensure that channel and credentials are not both + # provided. + credentials = False + + # If a channel was explicitly provided, set it. + self._grpc_channel = channel + self._ssl_channel_credentials = None + elif api_mtls_endpoint: + warnings.warn( + "api_mtls_endpoint and client_cert_source are deprecated", + DeprecationWarning, + ) + + host = ( + api_mtls_endpoint + if ":" in api_mtls_endpoint + else api_mtls_endpoint + ":443" + ) + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + ssl_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + ssl_credentials = SslCredentials().ssl_credentials + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + self._ssl_channel_credentials = ssl_credentials + else: + host = host if ":" in host else host + ":443" + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_channel_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Run the base constructor. + super().__init__( + host=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + client_info=client_info, + ) + + self._stubs = {} + + @property + def grpc_channel(self) -> aio.Channel: + """Create the channel designed to connect to this service. + + This property caches on the instance; repeated calls return + the same channel. + """ + # Return the channel from cache. + return self._grpc_channel + + @property + def create_read_session( + self, + ) -> Callable[[storage.CreateReadSessionRequest], Awaitable[stream.ReadSession]]: + r"""Return a callable for the create read session method over gRPC. + + Creates a new read session. A read session divides + the contents of a BigQuery table into one or more + streams, which can then be used to read data from the + table. The read session also specifies properties of the + data to be read, such as a list of columns or a push- + down filter describing the rows to be returned. + + A particular row can be read by at most one stream. When + the caller has reached the end of each stream in the + session, then all the data in the table has been read. + + Data is assigned to each stream such that roughly the + same number of rows can be read from each stream. + Because the server-side unit for assigning data is + collections of rows, the API does not guarantee that + each stream will return the same number or rows. + Additionally, the limits are enforced based on the + number of pre-filtered rows, so some filters can lead to + lopsided assignments. + + Read sessions automatically expire 24 hours after they + are created and do not require manual clean-up by the + caller. + + Returns: + Callable[[~.CreateReadSessionRequest], + Awaitable[~.ReadSession]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "create_read_session" not in self._stubs: + self._stubs["create_read_session"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryRead/CreateReadSession", + request_serializer=storage.CreateReadSessionRequest.serialize, + response_deserializer=stream.ReadSession.deserialize, + ) + return self._stubs["create_read_session"] + + @property + def read_rows( + self, + ) -> Callable[[storage.ReadRowsRequest], Awaitable[storage.ReadRowsResponse]]: + r"""Return a callable for the read rows method over gRPC. + + Reads rows from the stream in the format prescribed + by the ReadSession. Each response contains one or more + table rows, up to a maximum of 100 MiB per response; + read requests which attempt to read individual rows + larger than 100 MiB will fail. + + Each request also returns a set of stream statistics + reflecting the current state of the stream. + + Returns: + Callable[[~.ReadRowsRequest], + Awaitable[~.ReadRowsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "read_rows" not in self._stubs: + self._stubs["read_rows"] = self.grpc_channel.unary_stream( + "/google.cloud.bigquery.storage.v1beta2.BigQueryRead/ReadRows", + request_serializer=storage.ReadRowsRequest.serialize, + response_deserializer=storage.ReadRowsResponse.deserialize, + ) + return self._stubs["read_rows"] + + @property + def split_read_stream( + self, + ) -> Callable[ + [storage.SplitReadStreamRequest], Awaitable[storage.SplitReadStreamResponse] + ]: + r"""Return a callable for the split read stream method over gRPC. + + Splits a given ``ReadStream`` into two ``ReadStream`` objects. + These ``ReadStream`` objects are referred to as the primary and + the residual streams of the split. The original ``ReadStream`` + can still be read from in the same manner as before. Both of the + returned ``ReadStream`` objects can also be read from, and the + rows returned by both child streams will be the same as the rows + read from the original stream. + + Moreover, the two child streams will be allocated back-to-back + in the original ``ReadStream``. Concretely, it is guaranteed + that for streams original, primary, and residual, that + original[0-j] = primary[0-j] and original[j-n] = residual[0-m] + once the streams have been read to completion. + + Returns: + Callable[[~.SplitReadStreamRequest], + Awaitable[~.SplitReadStreamResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "split_read_stream" not in self._stubs: + self._stubs["split_read_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryRead/SplitReadStream", + request_serializer=storage.SplitReadStreamRequest.serialize, + response_deserializer=storage.SplitReadStreamResponse.deserialize, + ) + return self._stubs["split_read_stream"] + + +__all__ = ("BigQueryReadGrpcAsyncIOTransport",) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_write/__init__.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/__init__.py new file mode 100644 index 00000000..89e849a9 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .client import BigQueryWriteClient +from .async_client import BigQueryWriteAsyncClient + +__all__ = ( + "BigQueryWriteClient", + "BigQueryWriteAsyncClient", +) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_write/async_client.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/async_client.py new file mode 100644 index 00000000..378bd9ef --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/async_client.py @@ -0,0 +1,671 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +import functools +import re +from typing import ( + Dict, + AsyncIterable, + Awaitable, + AsyncIterator, + Sequence, + Tuple, + Type, + Union, +) +import pkg_resources + +import google.api_core.client_options as ClientOptions # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore +from google.oauth2 import service_account # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream +from google.cloud.bigquery_storage_v1beta2.types import table +from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.rpc import status_pb2 as status # type: ignore + +from .transports.base import BigQueryWriteTransport, DEFAULT_CLIENT_INFO +from .transports.grpc_asyncio import BigQueryWriteGrpcAsyncIOTransport +from .client import BigQueryWriteClient + + +class BigQueryWriteAsyncClient: + """BigQuery Write API. + The Write API can be used to write data to BigQuery. + """ + + _client: BigQueryWriteClient + + DEFAULT_ENDPOINT = BigQueryWriteClient.DEFAULT_ENDPOINT + DEFAULT_MTLS_ENDPOINT = BigQueryWriteClient.DEFAULT_MTLS_ENDPOINT + + table_path = staticmethod(BigQueryWriteClient.table_path) + parse_table_path = staticmethod(BigQueryWriteClient.parse_table_path) + write_stream_path = staticmethod(BigQueryWriteClient.write_stream_path) + parse_write_stream_path = staticmethod(BigQueryWriteClient.parse_write_stream_path) + + common_billing_account_path = staticmethod( + BigQueryWriteClient.common_billing_account_path + ) + parse_common_billing_account_path = staticmethod( + BigQueryWriteClient.parse_common_billing_account_path + ) + + common_folder_path = staticmethod(BigQueryWriteClient.common_folder_path) + parse_common_folder_path = staticmethod( + BigQueryWriteClient.parse_common_folder_path + ) + + common_organization_path = staticmethod( + BigQueryWriteClient.common_organization_path + ) + parse_common_organization_path = staticmethod( + BigQueryWriteClient.parse_common_organization_path + ) + + common_project_path = staticmethod(BigQueryWriteClient.common_project_path) + parse_common_project_path = staticmethod( + BigQueryWriteClient.parse_common_project_path + ) + + common_location_path = staticmethod(BigQueryWriteClient.common_location_path) + parse_common_location_path = staticmethod( + BigQueryWriteClient.parse_common_location_path + ) + + from_service_account_info = BigQueryWriteClient.from_service_account_info + from_service_account_file = BigQueryWriteClient.from_service_account_file + from_service_account_json = from_service_account_file + + @property + def transport(self) -> BigQueryWriteTransport: + """Return the transport used by the client instance. + + Returns: + BigQueryWriteTransport: The transport used by the client instance. + """ + return self._client.transport + + get_transport_class = functools.partial( + type(BigQueryWriteClient).get_transport_class, type(BigQueryWriteClient) + ) + + def __init__( + self, + *, + credentials: credentials.Credentials = None, + transport: Union[str, BigQueryWriteTransport] = "grpc_asyncio", + client_options: ClientOptions = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the big query write client. + + Args: + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + transport (Union[str, ~.BigQueryWriteTransport]): The + transport to use. If set to None, a transport is chosen + automatically. + client_options (ClientOptions): Custom options for the client. It + won't take effect if a ``transport`` instance is provided. + (1) The ``api_endpoint`` property can be used to override the + default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT + environment variable can also be used to override the endpoint: + "always" (always use the default mTLS endpoint), "never" (always + use the default regular endpoint) and "auto" (auto switch to the + default mTLS endpoint if client certificate is present, this is + the default value). However, the ``api_endpoint`` property takes + precedence if provided. + (2) If GOOGLE_API_USE_CLIENT_CERTIFICATE environment variable + is "true", then the ``client_cert_source`` property can be used + to provide client certificate for mutual TLS transport. If + not provided, the default SSL client certificate will be used if + present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not + set, no client certificate will be used. + + Raises: + google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport + creation failed for any reason. + """ + + self._client = BigQueryWriteClient( + credentials=credentials, + transport=transport, + client_options=client_options, + client_info=client_info, + ) + + async def create_write_stream( + self, + request: storage.CreateWriteStreamRequest = None, + *, + parent: str = None, + write_stream: stream.WriteStream = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> stream.WriteStream: + r"""Creates a write stream to the given table. Additionally, every + table has a special COMMITTED stream named '_default' to which + data can be written. This stream doesn't need to be created + using CreateWriteStream. It is a stream that can be used + simultaneously by any number of clients. Data written to this + stream is considered committed as soon as an acknowledgement is + received. + + Args: + request (:class:`google.cloud.bigquery_storage_v1beta2.types.CreateWriteStreamRequest`): + The request object. Request message for + `CreateWriteStream`. + parent (:class:`str`): + Required. Reference to the table to which the stream + belongs, in the format of + ``projects/{project}/datasets/{dataset}/tables/{table}``. + + This corresponds to the ``parent`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + write_stream (:class:`google.cloud.bigquery_storage_v1beta2.types.WriteStream`): + Required. Stream to be created. + This corresponds to the ``write_stream`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.WriteStream: + Information about a single stream + that gets data inside the storage + system. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([parent, write_stream]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = storage.CreateWriteStreamRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if parent is not None: + request.parent = parent + if write_stream is not None: + request.write_stream = write_stream + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.create_write_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, + exceptions.ResourceExhausted, + exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("parent", request.parent),)), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def append_rows( + self, + requests: AsyncIterator[storage.AppendRowsRequest] = None, + *, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> Awaitable[AsyncIterable[storage.AppendRowsResponse]]: + r"""Appends data to the given stream. + + If ``offset`` is specified, the ``offset`` is checked against + the end of stream. The server returns ``OUT_OF_RANGE`` in + ``AppendRowsResponse`` if an attempt is made to append to an + offset beyond the current end of the stream or + ``ALREADY_EXISTS`` if user provids an ``offset`` that has + already been written to. User can retry with adjusted offset + within the same RPC stream. If ``offset`` is not specified, + append happens at the end of the stream. + + The response contains the offset at which the append happened. + Responses are received in the same order in which requests are + sent. There will be one response for each successful request. If + the ``offset`` is not set in response, it means append didn't + happen due to some errors. If one request fails, all the + subsequent requests will also fail until a success request is + made again. + + If the stream is of ``PENDING`` type, data will only be + available for read operations after the stream is committed. + + Args: + requests (AsyncIterator[`google.cloud.bigquery_storage_v1beta2.types.AppendRowsRequest`]): + The request object AsyncIterator. Request message for `AppendRows`. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + AsyncIterable[google.cloud.bigquery_storage_v1beta2.types.AppendRowsResponse]: + Response message for AppendRows. + """ + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.append_rows, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.ResourceExhausted, exceptions.ServiceUnavailable, + ), + ), + default_timeout=86400.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + (gapic_v1.routing_header.to_grpc_metadata(()),) + + # Send the request. + response = rpc(requests, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + async def get_write_stream( + self, + request: storage.GetWriteStreamRequest = None, + *, + name: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> stream.WriteStream: + r"""Gets a write stream. + + Args: + request (:class:`google.cloud.bigquery_storage_v1beta2.types.GetWriteStreamRequest`): + The request object. Request message for + `GetWriteStreamRequest`. + name (:class:`str`): + Required. Name of the stream to get, in the form of + ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``. + + This corresponds to the ``name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.WriteStream: + Information about a single stream + that gets data inside the storage + system. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([name]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = storage.GetWriteStreamRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if name is not None: + request.name = name + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.get_write_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + async def finalize_write_stream( + self, + request: storage.FinalizeWriteStreamRequest = None, + *, + name: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.FinalizeWriteStreamResponse: + r"""Finalize a write stream so that no new data can be appended to + the stream. Finalize is not supported on the '_default' stream. + + Args: + request (:class:`google.cloud.bigquery_storage_v1beta2.types.FinalizeWriteStreamRequest`): + The request object. Request message for invoking + `FinalizeWriteStream`. + name (:class:`str`): + Required. Name of the stream to finalize, in the form of + ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``. + + This corresponds to the ``name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.FinalizeWriteStreamResponse: + Response message for FinalizeWriteStream. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([name]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = storage.FinalizeWriteStreamRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if name is not None: + request.name = name + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.finalize_write_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + async def batch_commit_write_streams( + self, + request: storage.BatchCommitWriteStreamsRequest = None, + *, + parent: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.BatchCommitWriteStreamsResponse: + r"""Atomically commits a group of ``PENDING`` streams that belong to + the same ``parent`` table. Streams must be finalized before + commit and cannot be committed multiple times. Once a stream is + committed, data in the stream becomes available for read + operations. + + Args: + request (:class:`google.cloud.bigquery_storage_v1beta2.types.BatchCommitWriteStreamsRequest`): + The request object. Request message for + `BatchCommitWriteStreams`. + parent (:class:`str`): + Required. Parent table that all the streams should + belong to, in the form of + ``projects/{project}/datasets/{dataset}/tables/{table}``. + + This corresponds to the ``parent`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.BatchCommitWriteStreamsResponse: + Response message for BatchCommitWriteStreams. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([parent]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = storage.BatchCommitWriteStreamsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if parent is not None: + request.parent = parent + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.batch_commit_write_streams, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("parent", request.parent),)), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + async def flush_rows( + self, + request: storage.FlushRowsRequest = None, + *, + write_stream: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.FlushRowsResponse: + r"""Flushes rows to a BUFFERED stream. If users are appending rows + to BUFFERED stream, flush operation is required in order for the + rows to become available for reading. A Flush operation flushes + up to any previously flushed offset in a BUFFERED stream, to the + offset specified in the request. Flush is not supported on the + \_default stream, since it is not BUFFERED. + + Args: + request (:class:`google.cloud.bigquery_storage_v1beta2.types.FlushRowsRequest`): + The request object. Request message for `FlushRows`. + write_stream (:class:`str`): + Required. The stream that is the + target of the flush operation. + + This corresponds to the ``write_stream`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.FlushRowsResponse: + Respond message for FlushRows. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([write_stream]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = storage.FlushRowsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if write_stream is not None: + request.write_stream = write_stream + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.flush_rows, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("write_stream", request.write_stream),) + ), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-bigquery-storage", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +__all__ = ("BigQueryWriteAsyncClient",) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_write/client.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/client.py new file mode 100644 index 00000000..73cc2c15 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/client.py @@ -0,0 +1,843 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from distutils import util +import os +import re +from typing import ( + Callable, + Dict, + Optional, + Iterable, + Iterator, + Sequence, + Tuple, + Type, + Union, +) +import pkg_resources + +from google.api_core import client_options as client_options_lib # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport import mtls # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore +from google.auth.exceptions import MutualTLSChannelError # type: ignore +from google.oauth2 import service_account # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream +from google.cloud.bigquery_storage_v1beta2.types import table +from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.rpc import status_pb2 as status # type: ignore + +from .transports.base import BigQueryWriteTransport, DEFAULT_CLIENT_INFO +from .transports.grpc import BigQueryWriteGrpcTransport +from .transports.grpc_asyncio import BigQueryWriteGrpcAsyncIOTransport + + +class BigQueryWriteClientMeta(type): + """Metaclass for the BigQueryWrite client. + + This provides class-level methods for building and retrieving + support objects (e.g. transport) without polluting the client instance + objects. + """ + + _transport_registry = OrderedDict() # type: Dict[str, Type[BigQueryWriteTransport]] + _transport_registry["grpc"] = BigQueryWriteGrpcTransport + _transport_registry["grpc_asyncio"] = BigQueryWriteGrpcAsyncIOTransport + + def get_transport_class(cls, label: str = None,) -> Type[BigQueryWriteTransport]: + """Return an appropriate transport class. + + Args: + label: The name of the desired transport. If none is + provided, then the first transport in the registry is used. + + Returns: + The transport class to use. + """ + # If a specific transport is requested, return that one. + if label: + return cls._transport_registry[label] + + # No transport is requested; return the default (that is, the first one + # in the dictionary). + return next(iter(cls._transport_registry.values())) + + +class BigQueryWriteClient(metaclass=BigQueryWriteClientMeta): + """BigQuery Write API. + The Write API can be used to write data to BigQuery. + """ + + @staticmethod + def _get_default_mtls_endpoint(api_endpoint): + """Convert api endpoint to mTLS endpoint. + Convert "*.sandbox.googleapis.com" and "*.googleapis.com" to + "*.mtls.sandbox.googleapis.com" and "*.mtls.googleapis.com" respectively. + Args: + api_endpoint (Optional[str]): the api endpoint to convert. + Returns: + str: converted mTLS api endpoint. + """ + if not api_endpoint: + return api_endpoint + + mtls_endpoint_re = re.compile( + r"(?P[^.]+)(?P\.mtls)?(?P\.sandbox)?(?P\.googleapis\.com)?" + ) + + m = mtls_endpoint_re.match(api_endpoint) + name, mtls, sandbox, googledomain = m.groups() + if mtls or not googledomain: + return api_endpoint + + if sandbox: + return api_endpoint.replace( + "sandbox.googleapis.com", "mtls.sandbox.googleapis.com" + ) + + return api_endpoint.replace(".googleapis.com", ".mtls.googleapis.com") + + DEFAULT_ENDPOINT = "bigquerystorage.googleapis.com" + DEFAULT_MTLS_ENDPOINT = _get_default_mtls_endpoint.__func__( # type: ignore + DEFAULT_ENDPOINT + ) + + @classmethod + def from_service_account_info(cls, info: dict, *args, **kwargs): + """Creates an instance of this client using the provided credentials info. + + Args: + info (dict): The service account private key info. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + BigQueryWriteClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_info(info) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + + @classmethod + def from_service_account_file(cls, filename: str, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + BigQueryWriteClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_file(filename) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + + from_service_account_json = from_service_account_file + + @property + def transport(self) -> BigQueryWriteTransport: + """Return the transport used by the client instance. + + Returns: + BigQueryWriteTransport: The transport used by the client instance. + """ + return self._transport + + @staticmethod + def table_path(project: str, dataset: str, table: str,) -> str: + """Return a fully-qualified table string.""" + return "projects/{project}/datasets/{dataset}/tables/{table}".format( + project=project, dataset=dataset, table=table, + ) + + @staticmethod + def parse_table_path(path: str) -> Dict[str, str]: + """Parse a table path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/datasets/(?P.+?)/tables/(?P
.+?)$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def write_stream_path(project: str, dataset: str, table: str, stream: str,) -> str: + """Return a fully-qualified write_stream string.""" + return "projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}".format( + project=project, dataset=dataset, table=table, stream=stream, + ) + + @staticmethod + def parse_write_stream_path(path: str) -> Dict[str, str]: + """Parse a write_stream path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/datasets/(?P.+?)/tables/(?P
.+?)/streams/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def common_billing_account_path(billing_account: str,) -> str: + """Return a fully-qualified billing_account string.""" + return "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + + @staticmethod + def parse_common_billing_account_path(path: str) -> Dict[str, str]: + """Parse a billing_account path into its component segments.""" + m = re.match(r"^billingAccounts/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_folder_path(folder: str,) -> str: + """Return a fully-qualified folder string.""" + return "folders/{folder}".format(folder=folder,) + + @staticmethod + def parse_common_folder_path(path: str) -> Dict[str, str]: + """Parse a folder path into its component segments.""" + m = re.match(r"^folders/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_organization_path(organization: str,) -> str: + """Return a fully-qualified organization string.""" + return "organizations/{organization}".format(organization=organization,) + + @staticmethod + def parse_common_organization_path(path: str) -> Dict[str, str]: + """Parse a organization path into its component segments.""" + m = re.match(r"^organizations/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_project_path(project: str,) -> str: + """Return a fully-qualified project string.""" + return "projects/{project}".format(project=project,) + + @staticmethod + def parse_common_project_path(path: str) -> Dict[str, str]: + """Parse a project path into its component segments.""" + m = re.match(r"^projects/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_location_path(project: str, location: str,) -> str: + """Return a fully-qualified location string.""" + return "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + + @staticmethod + def parse_common_location_path(path: str) -> Dict[str, str]: + """Parse a location path into its component segments.""" + m = re.match(r"^projects/(?P.+?)/locations/(?P.+?)$", path) + return m.groupdict() if m else {} + + def __init__( + self, + *, + credentials: Optional[credentials.Credentials] = None, + transport: Union[str, BigQueryWriteTransport, None] = None, + client_options: Optional[client_options_lib.ClientOptions] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the big query write client. + + Args: + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + transport (Union[str, BigQueryWriteTransport]): The + transport to use. If set to None, a transport is chosen + automatically. + client_options (google.api_core.client_options.ClientOptions): Custom options for the + client. It won't take effect if a ``transport`` instance is provided. + (1) The ``api_endpoint`` property can be used to override the + default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT + environment variable can also be used to override the endpoint: + "always" (always use the default mTLS endpoint), "never" (always + use the default regular endpoint) and "auto" (auto switch to the + default mTLS endpoint if client certificate is present, this is + the default value). However, the ``api_endpoint`` property takes + precedence if provided. + (2) If GOOGLE_API_USE_CLIENT_CERTIFICATE environment variable + is "true", then the ``client_cert_source`` property can be used + to provide client certificate for mutual TLS transport. If + not provided, the default SSL client certificate will be used if + present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not + set, no client certificate will be used. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTLSChannelError: If mutual TLS transport + creation failed for any reason. + """ + if isinstance(client_options, dict): + client_options = client_options_lib.from_dict(client_options) + if client_options is None: + client_options = client_options_lib.ClientOptions() + + # Create SSL credentials for mutual TLS if needed. + use_client_cert = bool( + util.strtobool(os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE", "false")) + ) + + ssl_credentials = None + is_mtls = False + if use_client_cert: + if client_options.client_cert_source: + import grpc # type: ignore + + cert, key = client_options.client_cert_source() + ssl_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + is_mtls = True + else: + creds = SslCredentials() + is_mtls = creds.is_mtls + ssl_credentials = creds.ssl_credentials if is_mtls else None + + # Figure out which api endpoint to use. + if client_options.api_endpoint is not None: + api_endpoint = client_options.api_endpoint + else: + use_mtls_env = os.getenv("GOOGLE_API_USE_MTLS_ENDPOINT", "auto") + if use_mtls_env == "never": + api_endpoint = self.DEFAULT_ENDPOINT + elif use_mtls_env == "always": + api_endpoint = self.DEFAULT_MTLS_ENDPOINT + elif use_mtls_env == "auto": + api_endpoint = ( + self.DEFAULT_MTLS_ENDPOINT if is_mtls else self.DEFAULT_ENDPOINT + ) + else: + raise MutualTLSChannelError( + "Unsupported GOOGLE_API_USE_MTLS_ENDPOINT value. Accepted values: never, auto, always" + ) + + # Save or instantiate the transport. + # Ordinarily, we provide the transport, but allowing a custom transport + # instance provides an extensibility point for unusual situations. + if isinstance(transport, BigQueryWriteTransport): + # transport is a BigQueryWriteTransport instance. + if credentials or client_options.credentials_file: + raise ValueError( + "When providing a transport instance, " + "provide its credentials directly." + ) + if client_options.scopes: + raise ValueError( + "When providing a transport instance, " + "provide its scopes directly." + ) + self._transport = transport + else: + Transport = type(self).get_transport_class(transport) + self._transport = Transport( + credentials=credentials, + credentials_file=client_options.credentials_file, + host=api_endpoint, + scopes=client_options.scopes, + ssl_channel_credentials=ssl_credentials, + quota_project_id=client_options.quota_project_id, + client_info=client_info, + ) + + def create_write_stream( + self, + request: storage.CreateWriteStreamRequest = None, + *, + parent: str = None, + write_stream: stream.WriteStream = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> stream.WriteStream: + r"""Creates a write stream to the given table. Additionally, every + table has a special COMMITTED stream named '_default' to which + data can be written. This stream doesn't need to be created + using CreateWriteStream. It is a stream that can be used + simultaneously by any number of clients. Data written to this + stream is considered committed as soon as an acknowledgement is + received. + + Args: + request (google.cloud.bigquery_storage_v1beta2.types.CreateWriteStreamRequest): + The request object. Request message for + `CreateWriteStream`. + parent (str): + Required. Reference to the table to which the stream + belongs, in the format of + ``projects/{project}/datasets/{dataset}/tables/{table}``. + + This corresponds to the ``parent`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + write_stream (google.cloud.bigquery_storage_v1beta2.types.WriteStream): + Required. Stream to be created. + This corresponds to the ``write_stream`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.WriteStream: + Information about a single stream + that gets data inside the storage + system. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([parent, write_stream]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a storage.CreateWriteStreamRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.CreateWriteStreamRequest): + request = storage.CreateWriteStreamRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if parent is not None: + request.parent = parent + if write_stream is not None: + request.write_stream = write_stream + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.create_write_stream] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("parent", request.parent),)), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def append_rows( + self, + requests: Iterator[storage.AppendRowsRequest] = None, + *, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> Iterable[storage.AppendRowsResponse]: + r"""Appends data to the given stream. + + If ``offset`` is specified, the ``offset`` is checked against + the end of stream. The server returns ``OUT_OF_RANGE`` in + ``AppendRowsResponse`` if an attempt is made to append to an + offset beyond the current end of the stream or + ``ALREADY_EXISTS`` if user provids an ``offset`` that has + already been written to. User can retry with adjusted offset + within the same RPC stream. If ``offset`` is not specified, + append happens at the end of the stream. + + The response contains the offset at which the append happened. + Responses are received in the same order in which requests are + sent. There will be one response for each successful request. If + the ``offset`` is not set in response, it means append didn't + happen due to some errors. If one request fails, all the + subsequent requests will also fail until a success request is + made again. + + If the stream is of ``PENDING`` type, data will only be + available for read operations after the stream is committed. + + Args: + requests (Iterator[google.cloud.bigquery_storage_v1beta2.types.AppendRowsRequest]): + The request object iterator. Request message for `AppendRows`. + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + Iterable[google.cloud.bigquery_storage_v1beta2.types.AppendRowsResponse]: + Response message for AppendRows. + """ + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.append_rows] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + (gapic_v1.routing_header.to_grpc_metadata(()),) + + # Send the request. + response = rpc(requests, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def get_write_stream( + self, + request: storage.GetWriteStreamRequest = None, + *, + name: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> stream.WriteStream: + r"""Gets a write stream. + + Args: + request (google.cloud.bigquery_storage_v1beta2.types.GetWriteStreamRequest): + The request object. Request message for + `GetWriteStreamRequest`. + name (str): + Required. Name of the stream to get, in the form of + ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``. + + This corresponds to the ``name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.WriteStream: + Information about a single stream + that gets data inside the storage + system. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([name]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a storage.GetWriteStreamRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.GetWriteStreamRequest): + request = storage.GetWriteStreamRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if name is not None: + request.name = name + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.get_write_stream] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def finalize_write_stream( + self, + request: storage.FinalizeWriteStreamRequest = None, + *, + name: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.FinalizeWriteStreamResponse: + r"""Finalize a write stream so that no new data can be appended to + the stream. Finalize is not supported on the '_default' stream. + + Args: + request (google.cloud.bigquery_storage_v1beta2.types.FinalizeWriteStreamRequest): + The request object. Request message for invoking + `FinalizeWriteStream`. + name (str): + Required. Name of the stream to finalize, in the form of + ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``. + + This corresponds to the ``name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.FinalizeWriteStreamResponse: + Response message for FinalizeWriteStream. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([name]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a storage.FinalizeWriteStreamRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.FinalizeWriteStreamRequest): + request = storage.FinalizeWriteStreamRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if name is not None: + request.name = name + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.finalize_write_stream] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def batch_commit_write_streams( + self, + request: storage.BatchCommitWriteStreamsRequest = None, + *, + parent: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.BatchCommitWriteStreamsResponse: + r"""Atomically commits a group of ``PENDING`` streams that belong to + the same ``parent`` table. Streams must be finalized before + commit and cannot be committed multiple times. Once a stream is + committed, data in the stream becomes available for read + operations. + + Args: + request (google.cloud.bigquery_storage_v1beta2.types.BatchCommitWriteStreamsRequest): + The request object. Request message for + `BatchCommitWriteStreams`. + parent (str): + Required. Parent table that all the streams should + belong to, in the form of + ``projects/{project}/datasets/{dataset}/tables/{table}``. + + This corresponds to the ``parent`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.BatchCommitWriteStreamsResponse: + Response message for BatchCommitWriteStreams. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([parent]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a storage.BatchCommitWriteStreamsRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.BatchCommitWriteStreamsRequest): + request = storage.BatchCommitWriteStreamsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if parent is not None: + request.parent = parent + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[ + self._transport.batch_commit_write_streams + ] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("parent", request.parent),)), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def flush_rows( + self, + request: storage.FlushRowsRequest = None, + *, + write_stream: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> storage.FlushRowsResponse: + r"""Flushes rows to a BUFFERED stream. If users are appending rows + to BUFFERED stream, flush operation is required in order for the + rows to become available for reading. A Flush operation flushes + up to any previously flushed offset in a BUFFERED stream, to the + offset specified in the request. Flush is not supported on the + \_default stream, since it is not BUFFERED. + + Args: + request (google.cloud.bigquery_storage_v1beta2.types.FlushRowsRequest): + The request object. Request message for `FlushRows`. + write_stream (str): + Required. The stream that is the + target of the flush operation. + + This corresponds to the ``write_stream`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.bigquery_storage_v1beta2.types.FlushRowsResponse: + Respond message for FlushRows. + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([write_stream]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a storage.FlushRowsRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, storage.FlushRowsRequest): + request = storage.FlushRowsRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if write_stream is not None: + request.write_stream = write_stream + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.flush_rows] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("write_stream", request.write_stream),) + ), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-bigquery-storage", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +__all__ = ("BigQueryWriteClient",) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/__init__.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/__init__.py new file mode 100644 index 00000000..33e48de0 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/__init__.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from typing import Dict, Type + +from .base import BigQueryWriteTransport +from .grpc import BigQueryWriteGrpcTransport +from .grpc_asyncio import BigQueryWriteGrpcAsyncIOTransport + + +# Compile a registry of transports. +_transport_registry = OrderedDict() # type: Dict[str, Type[BigQueryWriteTransport]] +_transport_registry["grpc"] = BigQueryWriteGrpcTransport +_transport_registry["grpc_asyncio"] = BigQueryWriteGrpcAsyncIOTransport + +__all__ = ( + "BigQueryWriteTransport", + "BigQueryWriteGrpcTransport", + "BigQueryWriteGrpcAsyncIOTransport", +) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/base.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/base.py new file mode 100644 index 00000000..6b9b6a18 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/base.py @@ -0,0 +1,261 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import abc +import typing +import pkg_resources + +from google import auth # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-bigquery-storage", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +class BigQueryWriteTransport(abc.ABC): + """Abstract transport class for BigQueryWrite.""" + + AUTH_SCOPES = ( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.insertdata", + "https://www.googleapis.com/auth/cloud-platform", + ) + + def __init__( + self, + *, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: typing.Optional[str] = None, + scopes: typing.Optional[typing.Sequence[str]] = AUTH_SCOPES, + quota_project_id: typing.Optional[str] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + **kwargs, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is mutually exclusive with credentials. + scope (Optional[Sequence[str]]): A list of scopes. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + """ + # Save the hostname. Default to port 443 (HTTPS) if none is specified. + if ":" not in host: + host += ":443" + self._host = host + + # If no credentials are provided, then determine the appropriate + # defaults. + if credentials and credentials_file: + raise exceptions.DuplicateCredentialArgs( + "'credentials_file' and 'credentials' are mutually exclusive" + ) + + if credentials_file is not None: + credentials, _ = auth.load_credentials_from_file( + credentials_file, scopes=scopes, quota_project_id=quota_project_id + ) + + elif credentials is None: + credentials, _ = auth.default( + scopes=scopes, quota_project_id=quota_project_id + ) + + # Save the credentials. + self._credentials = credentials + + # Lifted into its own function so it can be stubbed out during tests. + self._prep_wrapped_messages(client_info) + + def _prep_wrapped_messages(self, client_info): + # Precompute the wrapped methods. + self._wrapped_methods = { + self.create_write_stream: gapic_v1.method.wrap_method( + self.create_write_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, + exceptions.ResourceExhausted, + exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=client_info, + ), + self.append_rows: gapic_v1.method.wrap_method( + self.append_rows, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.ResourceExhausted, exceptions.ServiceUnavailable, + ), + ), + default_timeout=86400.0, + client_info=client_info, + ), + self.get_write_stream: gapic_v1.method.wrap_method( + self.get_write_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=client_info, + ), + self.finalize_write_stream: gapic_v1.method.wrap_method( + self.finalize_write_stream, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=client_info, + ), + self.batch_commit_write_streams: gapic_v1.method.wrap_method( + self.batch_commit_write_streams, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=client_info, + ), + self.flush_rows: gapic_v1.method.wrap_method( + self.flush_rows, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + ), + default_timeout=600.0, + client_info=client_info, + ), + } + + @property + def create_write_stream( + self, + ) -> typing.Callable[ + [storage.CreateWriteStreamRequest], + typing.Union[stream.WriteStream, typing.Awaitable[stream.WriteStream]], + ]: + raise NotImplementedError() + + @property + def append_rows( + self, + ) -> typing.Callable[ + [storage.AppendRowsRequest], + typing.Union[ + storage.AppendRowsResponse, typing.Awaitable[storage.AppendRowsResponse] + ], + ]: + raise NotImplementedError() + + @property + def get_write_stream( + self, + ) -> typing.Callable[ + [storage.GetWriteStreamRequest], + typing.Union[stream.WriteStream, typing.Awaitable[stream.WriteStream]], + ]: + raise NotImplementedError() + + @property + def finalize_write_stream( + self, + ) -> typing.Callable[ + [storage.FinalizeWriteStreamRequest], + typing.Union[ + storage.FinalizeWriteStreamResponse, + typing.Awaitable[storage.FinalizeWriteStreamResponse], + ], + ]: + raise NotImplementedError() + + @property + def batch_commit_write_streams( + self, + ) -> typing.Callable[ + [storage.BatchCommitWriteStreamsRequest], + typing.Union[ + storage.BatchCommitWriteStreamsResponse, + typing.Awaitable[storage.BatchCommitWriteStreamsResponse], + ], + ]: + raise NotImplementedError() + + @property + def flush_rows( + self, + ) -> typing.Callable[ + [storage.FlushRowsRequest], + typing.Union[ + storage.FlushRowsResponse, typing.Awaitable[storage.FlushRowsResponse] + ], + ]: + raise NotImplementedError() + + +__all__ = ("BigQueryWriteTransport",) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/grpc.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/grpc.py new file mode 100644 index 00000000..d4638404 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/grpc.py @@ -0,0 +1,440 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import warnings +from typing import Callable, Dict, Optional, Sequence, Tuple + +from google.api_core import grpc_helpers # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google import auth # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore + +import grpc # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream + +from .base import BigQueryWriteTransport, DEFAULT_CLIENT_INFO + + +class BigQueryWriteGrpcTransport(BigQueryWriteTransport): + """gRPC backend transport for BigQueryWrite. + + BigQuery Write API. + The Write API can be used to write data to BigQuery. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + """ + + _stubs: Dict[str, Callable] + + def __init__( + self, + *, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: str = None, + scopes: Sequence[str] = None, + channel: grpc.Channel = None, + api_mtls_endpoint: str = None, + client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, + ssl_channel_credentials: grpc.ChannelCredentials = None, + quota_project_id: Optional[str] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional(Sequence[str])): A list of scopes. This argument is + ignored if ``channel`` is provided. + channel (Optional[grpc.Channel]): A ``Channel`` instance through + which to make calls. + api_mtls_endpoint (Optional[str]): Deprecated. The mutual TLS endpoint. + If provided, it overrides the ``host`` argument and tries to create + a mutual TLS channel with client SSL credentials from + ``client_cert_source`` or applicatin default SSL credentials. + client_cert_source (Optional[Callable[[], Tuple[bytes, bytes]]]): + Deprecated. A callback to provide client SSL certificate bytes and + private key bytes, both in PEM format. It is ignored if + ``api_mtls_endpoint`` is None. + ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials + for grpc channel. It is ignored if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTLSChannelError: If mutual TLS transport + creation failed for any reason. + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + self._ssl_channel_credentials = ssl_channel_credentials + + if channel: + # Sanity check: Ensure that channel and credentials are not both + # provided. + credentials = False + + # If a channel was explicitly provided, set it. + self._grpc_channel = channel + self._ssl_channel_credentials = None + elif api_mtls_endpoint: + warnings.warn( + "api_mtls_endpoint and client_cert_source are deprecated", + DeprecationWarning, + ) + + host = ( + api_mtls_endpoint + if ":" in api_mtls_endpoint + else api_mtls_endpoint + ":443" + ) + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + ssl_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + ssl_credentials = SslCredentials().ssl_credentials + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + self._ssl_channel_credentials = ssl_credentials + else: + host = host if ":" in host else host + ":443" + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_channel_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + self._stubs = {} # type: Dict[str, Callable] + + # Run the base constructor. + super().__init__( + host=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + client_info=client_info, + ) + + @classmethod + def create_channel( + cls, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: str = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ) -> grpc.Channel: + """Create and return a gRPC channel object. + Args: + address (Optional[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is mutually exclusive with credentials. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + grpc.Channel: A gRPC channel object. + + Raises: + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + scopes = scopes or cls.AUTH_SCOPES + return grpc_helpers.create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) + + @property + def grpc_channel(self) -> grpc.Channel: + """Return the channel designed to connect to this service. + """ + return self._grpc_channel + + @property + def create_write_stream( + self, + ) -> Callable[[storage.CreateWriteStreamRequest], stream.WriteStream]: + r"""Return a callable for the create write stream method over gRPC. + + Creates a write stream to the given table. Additionally, every + table has a special COMMITTED stream named '_default' to which + data can be written. This stream doesn't need to be created + using CreateWriteStream. It is a stream that can be used + simultaneously by any number of clients. Data written to this + stream is considered committed as soon as an acknowledgement is + received. + + Returns: + Callable[[~.CreateWriteStreamRequest], + ~.WriteStream]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "create_write_stream" not in self._stubs: + self._stubs["create_write_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/CreateWriteStream", + request_serializer=storage.CreateWriteStreamRequest.serialize, + response_deserializer=stream.WriteStream.deserialize, + ) + return self._stubs["create_write_stream"] + + @property + def append_rows( + self, + ) -> Callable[[storage.AppendRowsRequest], storage.AppendRowsResponse]: + r"""Return a callable for the append rows method over gRPC. + + Appends data to the given stream. + + If ``offset`` is specified, the ``offset`` is checked against + the end of stream. The server returns ``OUT_OF_RANGE`` in + ``AppendRowsResponse`` if an attempt is made to append to an + offset beyond the current end of the stream or + ``ALREADY_EXISTS`` if user provids an ``offset`` that has + already been written to. User can retry with adjusted offset + within the same RPC stream. If ``offset`` is not specified, + append happens at the end of the stream. + + The response contains the offset at which the append happened. + Responses are received in the same order in which requests are + sent. There will be one response for each successful request. If + the ``offset`` is not set in response, it means append didn't + happen due to some errors. If one request fails, all the + subsequent requests will also fail until a success request is + made again. + + If the stream is of ``PENDING`` type, data will only be + available for read operations after the stream is committed. + + Returns: + Callable[[~.AppendRowsRequest], + ~.AppendRowsResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "append_rows" not in self._stubs: + self._stubs["append_rows"] = self.grpc_channel.stream_stream( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/AppendRows", + request_serializer=storage.AppendRowsRequest.serialize, + response_deserializer=storage.AppendRowsResponse.deserialize, + ) + return self._stubs["append_rows"] + + @property + def get_write_stream( + self, + ) -> Callable[[storage.GetWriteStreamRequest], stream.WriteStream]: + r"""Return a callable for the get write stream method over gRPC. + + Gets a write stream. + + Returns: + Callable[[~.GetWriteStreamRequest], + ~.WriteStream]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "get_write_stream" not in self._stubs: + self._stubs["get_write_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/GetWriteStream", + request_serializer=storage.GetWriteStreamRequest.serialize, + response_deserializer=stream.WriteStream.deserialize, + ) + return self._stubs["get_write_stream"] + + @property + def finalize_write_stream( + self, + ) -> Callable[ + [storage.FinalizeWriteStreamRequest], storage.FinalizeWriteStreamResponse + ]: + r"""Return a callable for the finalize write stream method over gRPC. + + Finalize a write stream so that no new data can be appended to + the stream. Finalize is not supported on the '_default' stream. + + Returns: + Callable[[~.FinalizeWriteStreamRequest], + ~.FinalizeWriteStreamResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "finalize_write_stream" not in self._stubs: + self._stubs["finalize_write_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/FinalizeWriteStream", + request_serializer=storage.FinalizeWriteStreamRequest.serialize, + response_deserializer=storage.FinalizeWriteStreamResponse.deserialize, + ) + return self._stubs["finalize_write_stream"] + + @property + def batch_commit_write_streams( + self, + ) -> Callable[ + [storage.BatchCommitWriteStreamsRequest], + storage.BatchCommitWriteStreamsResponse, + ]: + r"""Return a callable for the batch commit write streams method over gRPC. + + Atomically commits a group of ``PENDING`` streams that belong to + the same ``parent`` table. Streams must be finalized before + commit and cannot be committed multiple times. Once a stream is + committed, data in the stream becomes available for read + operations. + + Returns: + Callable[[~.BatchCommitWriteStreamsRequest], + ~.BatchCommitWriteStreamsResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "batch_commit_write_streams" not in self._stubs: + self._stubs["batch_commit_write_streams"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/BatchCommitWriteStreams", + request_serializer=storage.BatchCommitWriteStreamsRequest.serialize, + response_deserializer=storage.BatchCommitWriteStreamsResponse.deserialize, + ) + return self._stubs["batch_commit_write_streams"] + + @property + def flush_rows( + self, + ) -> Callable[[storage.FlushRowsRequest], storage.FlushRowsResponse]: + r"""Return a callable for the flush rows method over gRPC. + + Flushes rows to a BUFFERED stream. If users are appending rows + to BUFFERED stream, flush operation is required in order for the + rows to become available for reading. A Flush operation flushes + up to any previously flushed offset in a BUFFERED stream, to the + offset specified in the request. Flush is not supported on the + \_default stream, since it is not BUFFERED. + + Returns: + Callable[[~.FlushRowsRequest], + ~.FlushRowsResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "flush_rows" not in self._stubs: + self._stubs["flush_rows"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/FlushRows", + request_serializer=storage.FlushRowsRequest.serialize, + response_deserializer=storage.FlushRowsResponse.deserialize, + ) + return self._stubs["flush_rows"] + + +__all__ = ("BigQueryWriteGrpcTransport",) diff --git a/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/grpc_asyncio.py b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/grpc_asyncio.py new file mode 100644 index 00000000..c41a7daa --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/services/big_query_write/transports/grpc_asyncio.py @@ -0,0 +1,445 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import warnings +from typing import Awaitable, Callable, Dict, Optional, Sequence, Tuple + +from google.api_core import gapic_v1 # type: ignore +from google.api_core import grpc_helpers_async # type: ignore +from google import auth # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore + +import grpc # type: ignore +from grpc.experimental import aio # type: ignore + +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream + +from .base import BigQueryWriteTransport, DEFAULT_CLIENT_INFO +from .grpc import BigQueryWriteGrpcTransport + + +class BigQueryWriteGrpcAsyncIOTransport(BigQueryWriteTransport): + """gRPC AsyncIO backend transport for BigQueryWrite. + + BigQuery Write API. + The Write API can be used to write data to BigQuery. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + """ + + _grpc_channel: aio.Channel + _stubs: Dict[str, Callable] = {} + + @classmethod + def create_channel( + cls, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ) -> aio.Channel: + """Create and return a gRPC AsyncIO channel object. + Args: + address (Optional[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + aio.Channel: A gRPC AsyncIO channel object. + """ + scopes = scopes or cls.AUTH_SCOPES + return grpc_helpers_async.create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) + + def __init__( + self, + *, + host: str = "bigquerystorage.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + channel: aio.Channel = None, + api_mtls_endpoint: str = None, + client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, + ssl_channel_credentials: grpc.ChannelCredentials = None, + quota_project_id=None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + channel (Optional[aio.Channel]): A ``Channel`` instance through + which to make calls. + api_mtls_endpoint (Optional[str]): Deprecated. The mutual TLS endpoint. + If provided, it overrides the ``host`` argument and tries to create + a mutual TLS channel with client SSL credentials from + ``client_cert_source`` or applicatin default SSL credentials. + client_cert_source (Optional[Callable[[], Tuple[bytes, bytes]]]): + Deprecated. A callback to provide client SSL certificate bytes and + private key bytes, both in PEM format. It is ignored if + ``api_mtls_endpoint`` is None. + ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials + for grpc channel. It is ignored if ``channel`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport + creation failed for any reason. + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + self._ssl_channel_credentials = ssl_channel_credentials + + if channel: + # Sanity check: Ensure that channel and credentials are not both + # provided. + credentials = False + + # If a channel was explicitly provided, set it. + self._grpc_channel = channel + self._ssl_channel_credentials = None + elif api_mtls_endpoint: + warnings.warn( + "api_mtls_endpoint and client_cert_source are deprecated", + DeprecationWarning, + ) + + host = ( + api_mtls_endpoint + if ":" in api_mtls_endpoint + else api_mtls_endpoint + ":443" + ) + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + ssl_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + ssl_credentials = SslCredentials().ssl_credentials + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + self._ssl_channel_credentials = ssl_credentials + else: + host = host if ":" in host else host + ":443" + + if credentials is None: + credentials, _ = auth.default( + scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id + ) + + # create a new channel. The provided one is ignored. + self._grpc_channel = type(self).create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + ssl_credentials=ssl_channel_credentials, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Run the base constructor. + super().__init__( + host=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes or self.AUTH_SCOPES, + quota_project_id=quota_project_id, + client_info=client_info, + ) + + self._stubs = {} + + @property + def grpc_channel(self) -> aio.Channel: + """Create the channel designed to connect to this service. + + This property caches on the instance; repeated calls return + the same channel. + """ + # Return the channel from cache. + return self._grpc_channel + + @property + def create_write_stream( + self, + ) -> Callable[[storage.CreateWriteStreamRequest], Awaitable[stream.WriteStream]]: + r"""Return a callable for the create write stream method over gRPC. + + Creates a write stream to the given table. Additionally, every + table has a special COMMITTED stream named '_default' to which + data can be written. This stream doesn't need to be created + using CreateWriteStream. It is a stream that can be used + simultaneously by any number of clients. Data written to this + stream is considered committed as soon as an acknowledgement is + received. + + Returns: + Callable[[~.CreateWriteStreamRequest], + Awaitable[~.WriteStream]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "create_write_stream" not in self._stubs: + self._stubs["create_write_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/CreateWriteStream", + request_serializer=storage.CreateWriteStreamRequest.serialize, + response_deserializer=stream.WriteStream.deserialize, + ) + return self._stubs["create_write_stream"] + + @property + def append_rows( + self, + ) -> Callable[[storage.AppendRowsRequest], Awaitable[storage.AppendRowsResponse]]: + r"""Return a callable for the append rows method over gRPC. + + Appends data to the given stream. + + If ``offset`` is specified, the ``offset`` is checked against + the end of stream. The server returns ``OUT_OF_RANGE`` in + ``AppendRowsResponse`` if an attempt is made to append to an + offset beyond the current end of the stream or + ``ALREADY_EXISTS`` if user provids an ``offset`` that has + already been written to. User can retry with adjusted offset + within the same RPC stream. If ``offset`` is not specified, + append happens at the end of the stream. + + The response contains the offset at which the append happened. + Responses are received in the same order in which requests are + sent. There will be one response for each successful request. If + the ``offset`` is not set in response, it means append didn't + happen due to some errors. If one request fails, all the + subsequent requests will also fail until a success request is + made again. + + If the stream is of ``PENDING`` type, data will only be + available for read operations after the stream is committed. + + Returns: + Callable[[~.AppendRowsRequest], + Awaitable[~.AppendRowsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "append_rows" not in self._stubs: + self._stubs["append_rows"] = self.grpc_channel.stream_stream( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/AppendRows", + request_serializer=storage.AppendRowsRequest.serialize, + response_deserializer=storage.AppendRowsResponse.deserialize, + ) + return self._stubs["append_rows"] + + @property + def get_write_stream( + self, + ) -> Callable[[storage.GetWriteStreamRequest], Awaitable[stream.WriteStream]]: + r"""Return a callable for the get write stream method over gRPC. + + Gets a write stream. + + Returns: + Callable[[~.GetWriteStreamRequest], + Awaitable[~.WriteStream]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "get_write_stream" not in self._stubs: + self._stubs["get_write_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/GetWriteStream", + request_serializer=storage.GetWriteStreamRequest.serialize, + response_deserializer=stream.WriteStream.deserialize, + ) + return self._stubs["get_write_stream"] + + @property + def finalize_write_stream( + self, + ) -> Callable[ + [storage.FinalizeWriteStreamRequest], + Awaitable[storage.FinalizeWriteStreamResponse], + ]: + r"""Return a callable for the finalize write stream method over gRPC. + + Finalize a write stream so that no new data can be appended to + the stream. Finalize is not supported on the '_default' stream. + + Returns: + Callable[[~.FinalizeWriteStreamRequest], + Awaitable[~.FinalizeWriteStreamResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "finalize_write_stream" not in self._stubs: + self._stubs["finalize_write_stream"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/FinalizeWriteStream", + request_serializer=storage.FinalizeWriteStreamRequest.serialize, + response_deserializer=storage.FinalizeWriteStreamResponse.deserialize, + ) + return self._stubs["finalize_write_stream"] + + @property + def batch_commit_write_streams( + self, + ) -> Callable[ + [storage.BatchCommitWriteStreamsRequest], + Awaitable[storage.BatchCommitWriteStreamsResponse], + ]: + r"""Return a callable for the batch commit write streams method over gRPC. + + Atomically commits a group of ``PENDING`` streams that belong to + the same ``parent`` table. Streams must be finalized before + commit and cannot be committed multiple times. Once a stream is + committed, data in the stream becomes available for read + operations. + + Returns: + Callable[[~.BatchCommitWriteStreamsRequest], + Awaitable[~.BatchCommitWriteStreamsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "batch_commit_write_streams" not in self._stubs: + self._stubs["batch_commit_write_streams"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/BatchCommitWriteStreams", + request_serializer=storage.BatchCommitWriteStreamsRequest.serialize, + response_deserializer=storage.BatchCommitWriteStreamsResponse.deserialize, + ) + return self._stubs["batch_commit_write_streams"] + + @property + def flush_rows( + self, + ) -> Callable[[storage.FlushRowsRequest], Awaitable[storage.FlushRowsResponse]]: + r"""Return a callable for the flush rows method over gRPC. + + Flushes rows to a BUFFERED stream. If users are appending rows + to BUFFERED stream, flush operation is required in order for the + rows to become available for reading. A Flush operation flushes + up to any previously flushed offset in a BUFFERED stream, to the + offset specified in the request. Flush is not supported on the + \_default stream, since it is not BUFFERED. + + Returns: + Callable[[~.FlushRowsRequest], + Awaitable[~.FlushRowsResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "flush_rows" not in self._stubs: + self._stubs["flush_rows"] = self.grpc_channel.unary_unary( + "/google.cloud.bigquery.storage.v1beta2.BigQueryWrite/FlushRows", + request_serializer=storage.FlushRowsRequest.serialize, + response_deserializer=storage.FlushRowsResponse.deserialize, + ) + return self._stubs["flush_rows"] + + +__all__ = ("BigQueryWriteGrpcAsyncIOTransport",) diff --git a/google/cloud/bigquery_storage_v1beta2/types/__init__.py b/google/cloud/bigquery_storage_v1beta2/types/__init__.py new file mode 100644 index 00000000..ba1cac28 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/types/__init__.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .arrow import ( + ArrowSchema, + ArrowRecordBatch, + ArrowSerializationOptions, +) +from .avro import ( + AvroSchema, + AvroRows, +) +from .protobuf import ( + ProtoSchema, + ProtoRows, +) +from .table import ( + TableSchema, + TableFieldSchema, +) +from .stream import ( + DataFormat, + ReadSession, + ReadStream, + WriteStream, + DataFormat, +) +from .storage import ( + CreateReadSessionRequest, + ReadRowsRequest, + ThrottleState, + StreamStats, + ReadRowsResponse, + SplitReadStreamRequest, + SplitReadStreamResponse, + CreateWriteStreamRequest, + AppendRowsRequest, + AppendRowsResponse, + GetWriteStreamRequest, + BatchCommitWriteStreamsRequest, + BatchCommitWriteStreamsResponse, + FinalizeWriteStreamRequest, + FinalizeWriteStreamResponse, + FlushRowsRequest, + FlushRowsResponse, + StorageError, +) + +__all__ = ( + "ArrowSchema", + "ArrowRecordBatch", + "ArrowSerializationOptions", + "AvroSchema", + "AvroRows", + "ProtoSchema", + "ProtoRows", + "TableSchema", + "TableFieldSchema", + "DataFormat", + "ReadSession", + "ReadStream", + "WriteStream", + "DataFormat", + "CreateReadSessionRequest", + "ReadRowsRequest", + "ThrottleState", + "StreamStats", + "ReadRowsResponse", + "SplitReadStreamRequest", + "SplitReadStreamResponse", + "CreateWriteStreamRequest", + "AppendRowsRequest", + "AppendRowsResponse", + "GetWriteStreamRequest", + "BatchCommitWriteStreamsRequest", + "BatchCommitWriteStreamsResponse", + "FinalizeWriteStreamRequest", + "FinalizeWriteStreamResponse", + "FlushRowsRequest", + "FlushRowsResponse", + "StorageError", +) diff --git a/google/cloud/bigquery_storage_v1beta2/types/arrow.py b/google/cloud/bigquery_storage_v1beta2/types/arrow.py new file mode 100644 index 00000000..0ebbbe8a --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/types/arrow.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1beta2", + manifest={"ArrowSchema", "ArrowRecordBatch", "ArrowSerializationOptions",}, +) + + +class ArrowSchema(proto.Message): + r"""Arrow schema as specified in + https://arrow.apache.org/docs/python/api/datatypes.html and + serialized to bytes using IPC: + https://arrow.apache.org/docs/format/Columnar.html#serialization- + and-interprocess-communication-ipc + See code samples on how this message can be deserialized. + + Attributes: + serialized_schema (bytes): + IPC serialized Arrow schema. + """ + + serialized_schema = proto.Field(proto.BYTES, number=1) + + +class ArrowRecordBatch(proto.Message): + r"""Arrow RecordBatch. + + Attributes: + serialized_record_batch (bytes): + IPC-serialized Arrow RecordBatch. + """ + + serialized_record_batch = proto.Field(proto.BYTES, number=1) + + +class ArrowSerializationOptions(proto.Message): + r"""Contains options specific to Arrow Serialization. + + Attributes: + format_ (google.cloud.bigquery_storage_v1beta2.types.ArrowSerializationOptions.Format): + The Arrow IPC format to use. + """ + + class Format(proto.Enum): + r"""The IPC format to use when serializing Arrow streams.""" + FORMAT_UNSPECIFIED = 0 + ARROW_0_14 = 1 + ARROW_0_15 = 2 + + format_ = proto.Field(proto.ENUM, number=1, enum=Format,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_storage_v1beta2/types/avro.py b/google/cloud/bigquery_storage_v1beta2/types/avro.py new file mode 100644 index 00000000..49aa017d --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/types/avro.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1beta2", + manifest={"AvroSchema", "AvroRows",}, +) + + +class AvroSchema(proto.Message): + r"""Avro schema. + + Attributes: + schema (str): + Json serialized schema, as described at + https://avro.apache.org/docs/1.8.1/spec.html. + """ + + schema = proto.Field(proto.STRING, number=1) + + +class AvroRows(proto.Message): + r"""Avro rows. + + Attributes: + serialized_binary_rows (bytes): + Binary serialized rows in a block. + """ + + serialized_binary_rows = proto.Field(proto.BYTES, number=1) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_storage_v1beta2/types/protobuf.py b/google/cloud/bigquery_storage_v1beta2/types/protobuf.py new file mode 100644 index 00000000..99c0543c --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/types/protobuf.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.protobuf import descriptor_pb2 as descriptor # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1beta2", + manifest={"ProtoSchema", "ProtoRows",}, +) + + +class ProtoSchema(proto.Message): + r"""Protobuf schema is an API presentation the proto buffer + schema. + + Attributes: + proto_descriptor (google.protobuf.descriptor_pb2.DescriptorProto): + Descriptor for input message. The descriptor + has to be self contained, including all the + nested types, excepted for proto buffer well + known types + (https://developers.google.com/protocol- + buffers/docs/reference/google.protobuf). + """ + + proto_descriptor = proto.Field( + proto.MESSAGE, number=1, message=descriptor.DescriptorProto, + ) + + +class ProtoRows(proto.Message): + r"""Protobuf rows. + + Attributes: + serialized_rows (Sequence[bytes]): + A sequence of rows serialized as a Protocol + Buffer. + See https://developers.google.com/protocol- + buffers/docs/overview for more information on + deserializing this field. + """ + + serialized_rows = proto.RepeatedField(proto.BYTES, number=1) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_storage_v1beta2/types/storage.py b/google/cloud/bigquery_storage_v1beta2/types/storage.py new file mode 100644 index 00000000..85369388 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/types/storage.py @@ -0,0 +1,487 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.bigquery_storage_v1beta2.types import arrow +from google.cloud.bigquery_storage_v1beta2.types import avro +from google.cloud.bigquery_storage_v1beta2.types import protobuf +from google.cloud.bigquery_storage_v1beta2.types import stream +from google.cloud.bigquery_storage_v1beta2.types import table +from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.protobuf import wrappers_pb2 as wrappers # type: ignore +from google.rpc import status_pb2 as status # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1beta2", + manifest={ + "CreateReadSessionRequest", + "ReadRowsRequest", + "ThrottleState", + "StreamStats", + "ReadRowsResponse", + "SplitReadStreamRequest", + "SplitReadStreamResponse", + "CreateWriteStreamRequest", + "AppendRowsRequest", + "AppendRowsResponse", + "GetWriteStreamRequest", + "BatchCommitWriteStreamsRequest", + "BatchCommitWriteStreamsResponse", + "FinalizeWriteStreamRequest", + "FinalizeWriteStreamResponse", + "FlushRowsRequest", + "FlushRowsResponse", + "StorageError", + }, +) + + +class CreateReadSessionRequest(proto.Message): + r"""Request message for ``CreateReadSession``. + + Attributes: + parent (str): + Required. The request project that owns the session, in the + form of ``projects/{project_id}``. + read_session (google.cloud.bigquery_storage_v1beta2.types.ReadSession): + Required. Session to be created. + max_stream_count (int): + Max initial number of streams. If unset or + zero, the server will provide a value of streams + so as to produce reasonable throughput. Must be + non-negative. The number of streams may be lower + than the requested number, depending on the + amount parallelism that is reasonable for the + table. Error will be returned if the max count + is greater than the current system max limit of + 1,000. + + Streams must be read starting from offset 0. + """ + + parent = proto.Field(proto.STRING, number=1) + + read_session = proto.Field(proto.MESSAGE, number=2, message=stream.ReadSession,) + + max_stream_count = proto.Field(proto.INT32, number=3) + + +class ReadRowsRequest(proto.Message): + r"""Request message for ``ReadRows``. + + Attributes: + read_stream (str): + Required. Stream to read rows from. + offset (int): + The offset requested must be less than the + last row read from Read. Requesting a larger + offset is undefined. If not specified, start + reading from offset zero. + """ + + read_stream = proto.Field(proto.STRING, number=1) + + offset = proto.Field(proto.INT64, number=2) + + +class ThrottleState(proto.Message): + r"""Information on if the current connection is being throttled. + + Attributes: + throttle_percent (int): + How much this connection is being throttled. + Zero means no throttling, 100 means fully + throttled. + """ + + throttle_percent = proto.Field(proto.INT32, number=1) + + +class StreamStats(proto.Message): + r"""Estimated stream statistics for a given Stream. + + Attributes: + progress (google.cloud.bigquery_storage_v1beta2.types.StreamStats.Progress): + Represents the progress of the current + stream. + """ + + class Progress(proto.Message): + r""" + + Attributes: + at_response_start (float): + The fraction of rows assigned to the stream that have been + processed by the server so far, not including the rows in + the current response message. + + This value, along with ``at_response_end``, can be used to + interpolate the progress made as the rows in the message are + being processed using the following formula: + ``at_response_start + (at_response_end - at_response_start) * rows_processed_from_response / rows_in_response``. + + Note that if a filter is provided, the ``at_response_end`` + value of the previous response may not necessarily be equal + to the ``at_response_start`` value of the current response. + at_response_end (float): + Similar to ``at_response_start``, except that this value + includes the rows in the current response. + """ + + at_response_start = proto.Field(proto.DOUBLE, number=1) + + at_response_end = proto.Field(proto.DOUBLE, number=2) + + progress = proto.Field(proto.MESSAGE, number=2, message=Progress,) + + +class ReadRowsResponse(proto.Message): + r"""Response from calling ``ReadRows`` may include row data, progress + and throttling information. + + Attributes: + avro_rows (google.cloud.bigquery_storage_v1beta2.types.AvroRows): + Serialized row data in AVRO format. + arrow_record_batch (google.cloud.bigquery_storage_v1beta2.types.ArrowRecordBatch): + Serialized row data in Arrow RecordBatch + format. + row_count (int): + Number of serialized rows in the rows block. + stats (google.cloud.bigquery_storage_v1beta2.types.StreamStats): + Statistics for the stream. + throttle_state (google.cloud.bigquery_storage_v1beta2.types.ThrottleState): + Throttling state. If unset, the latest + response still describes the current throttling + status. + """ + + avro_rows = proto.Field( + proto.MESSAGE, number=3, oneof="rows", message=avro.AvroRows, + ) + + arrow_record_batch = proto.Field( + proto.MESSAGE, number=4, oneof="rows", message=arrow.ArrowRecordBatch, + ) + + row_count = proto.Field(proto.INT64, number=6) + + stats = proto.Field(proto.MESSAGE, number=2, message="StreamStats",) + + throttle_state = proto.Field(proto.MESSAGE, number=5, message="ThrottleState",) + + +class SplitReadStreamRequest(proto.Message): + r"""Request message for ``SplitReadStream``. + + Attributes: + name (str): + Required. Name of the stream to split. + fraction (float): + A value in the range (0.0, 1.0) that + specifies the fractional point at which the + original stream should be split. The actual + split point is evaluated on pre-filtered rows, + so if a filter is provided, then there is no + guarantee that the division of the rows between + the new child streams will be proportional to + this fractional value. Additionally, because the + server-side unit for assigning data is + collections of rows, this fraction will always + map to a data storage boundary on the server + side. + """ + + name = proto.Field(proto.STRING, number=1) + + fraction = proto.Field(proto.DOUBLE, number=2) + + +class SplitReadStreamResponse(proto.Message): + r""" + + Attributes: + primary_stream (google.cloud.bigquery_storage_v1beta2.types.ReadStream): + Primary stream, which contains the beginning portion of + \|original_stream|. An empty value indicates that the + original stream can no longer be split. + remainder_stream (google.cloud.bigquery_storage_v1beta2.types.ReadStream): + Remainder stream, which contains the tail of + \|original_stream|. An empty value indicates that the + original stream can no longer be split. + """ + + primary_stream = proto.Field(proto.MESSAGE, number=1, message=stream.ReadStream,) + + remainder_stream = proto.Field(proto.MESSAGE, number=2, message=stream.ReadStream,) + + +class CreateWriteStreamRequest(proto.Message): + r"""Request message for ``CreateWriteStream``. + + Attributes: + parent (str): + Required. Reference to the table to which the stream + belongs, in the format of + ``projects/{project}/datasets/{dataset}/tables/{table}``. + write_stream (google.cloud.bigquery_storage_v1beta2.types.WriteStream): + Required. Stream to be created. + """ + + parent = proto.Field(proto.STRING, number=1) + + write_stream = proto.Field(proto.MESSAGE, number=2, message=stream.WriteStream,) + + +class AppendRowsRequest(proto.Message): + r"""Request message for ``AppendRows``. + + Attributes: + write_stream (str): + Required. The stream that is the target of the append + operation. This value must be specified for the initial + request. If subsequent requests specify the stream name, it + must equal to the value provided in the first request. To + write to the \_default stream, populate this field with a + string in the format + ``projects/{project}/datasets/{dataset}/tables/{table}/_default``. + offset (google.protobuf.wrappers_pb2.Int64Value): + If present, the write is only performed if the next append + offset is same as the provided value. If not present, the + write is performed at the current end of stream. Specifying + a value for this field is not allowed when calling + AppendRows for the '_default' stream. + proto_rows (google.cloud.bigquery_storage_v1beta2.types.AppendRowsRequest.ProtoData): + Rows in proto format. + trace_id (str): + Id set by client to annotate its identity. + Only initial request setting is respected. + """ + + class ProtoData(proto.Message): + r"""Proto schema and data. + + Attributes: + writer_schema (google.cloud.bigquery_storage_v1beta2.types.ProtoSchema): + Proto schema used to serialize the data. + rows (google.cloud.bigquery_storage_v1beta2.types.ProtoRows): + Serialized row data in protobuf message + format. + """ + + writer_schema = proto.Field( + proto.MESSAGE, number=1, message=protobuf.ProtoSchema, + ) + + rows = proto.Field(proto.MESSAGE, number=2, message=protobuf.ProtoRows,) + + write_stream = proto.Field(proto.STRING, number=1) + + offset = proto.Field(proto.MESSAGE, number=2, message=wrappers.Int64Value,) + + proto_rows = proto.Field(proto.MESSAGE, number=4, oneof="rows", message=ProtoData,) + + trace_id = proto.Field(proto.STRING, number=6) + + +class AppendRowsResponse(proto.Message): + r"""Response message for ``AppendRows``. + + Attributes: + append_result (google.cloud.bigquery_storage_v1beta2.types.AppendRowsResponse.AppendResult): + Result if the append is successful. + error (google.rpc.status_pb2.Status): + Error in case of request failed. If set, it means rows are + not accepted into the system. Users can retry or continue + with other requests within the same connection. + ALREADY_EXISTS: happens when offset is specified, it means + the entire request is already appended, it is safe to ignore + this error. OUT_OF_RANGE: happens when offset is specified, + it means the specified offset is beyond the end of the + stream. INVALID_ARGUMENT: error caused by malformed request + or data. RESOURCE_EXHAUSTED: request rejected due to + throttling. Only happens when append without offset. + ABORTED: request processing is aborted because of prior + failures, request can be retried if previous failure is + fixed. INTERNAL: server side errors that can be retried. + updated_schema (google.cloud.bigquery_storage_v1beta2.types.TableSchema): + If backend detects a schema update, pass it + to user so that user can use it to input new + type of message. It will be empty when there is + no schema updates. + """ + + class AppendResult(proto.Message): + r"""A success append result. + + Attributes: + offset (google.protobuf.wrappers_pb2.Int64Value): + The row offset at which the last append + occurred. The offset will not be set if + appending using default streams. + """ + + offset = proto.Field(proto.MESSAGE, number=1, message=wrappers.Int64Value,) + + append_result = proto.Field( + proto.MESSAGE, number=1, oneof="response", message=AppendResult, + ) + + error = proto.Field( + proto.MESSAGE, number=2, oneof="response", message=status.Status, + ) + + updated_schema = proto.Field(proto.MESSAGE, number=3, message=table.TableSchema,) + + +class GetWriteStreamRequest(proto.Message): + r"""Request message for ``GetWriteStreamRequest``. + + Attributes: + name (str): + Required. Name of the stream to get, in the form of + ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``. + """ + + name = proto.Field(proto.STRING, number=1) + + +class BatchCommitWriteStreamsRequest(proto.Message): + r"""Request message for ``BatchCommitWriteStreams``. + + Attributes: + parent (str): + Required. Parent table that all the streams should belong + to, in the form of + ``projects/{project}/datasets/{dataset}/tables/{table}``. + write_streams (Sequence[str]): + Required. The group of streams that will be + committed atomically. + """ + + parent = proto.Field(proto.STRING, number=1) + + write_streams = proto.RepeatedField(proto.STRING, number=2) + + +class BatchCommitWriteStreamsResponse(proto.Message): + r"""Response message for ``BatchCommitWriteStreams``. + + Attributes: + commit_time (google.protobuf.timestamp_pb2.Timestamp): + The time at which streams were committed in + microseconds granularity. This field will only + exist when there is no stream errors. + stream_errors (Sequence[google.cloud.bigquery_storage_v1beta2.types.StorageError]): + Stream level error if commit failed. Only + streams with error will be in the list. + """ + + commit_time = proto.Field(proto.MESSAGE, number=1, message=timestamp.Timestamp,) + + stream_errors = proto.RepeatedField( + proto.MESSAGE, number=2, message="StorageError", + ) + + +class FinalizeWriteStreamRequest(proto.Message): + r"""Request message for invoking ``FinalizeWriteStream``. + + Attributes: + name (str): + Required. Name of the stream to finalize, in the form of + ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``. + """ + + name = proto.Field(proto.STRING, number=1) + + +class FinalizeWriteStreamResponse(proto.Message): + r"""Response message for ``FinalizeWriteStream``. + + Attributes: + row_count (int): + Number of rows in the finalized stream. + """ + + row_count = proto.Field(proto.INT64, number=1) + + +class FlushRowsRequest(proto.Message): + r"""Request message for ``FlushRows``. + + Attributes: + write_stream (str): + Required. The stream that is the target of + the flush operation. + offset (google.protobuf.wrappers_pb2.Int64Value): + Ending offset of the flush operation. Rows + before this offset(including this offset) will + be flushed. + """ + + write_stream = proto.Field(proto.STRING, number=1) + + offset = proto.Field(proto.MESSAGE, number=2, message=wrappers.Int64Value,) + + +class FlushRowsResponse(proto.Message): + r"""Respond message for ``FlushRows``. + + Attributes: + offset (int): + The rows before this offset (including this + offset) are flushed. + """ + + offset = proto.Field(proto.INT64, number=1) + + +class StorageError(proto.Message): + r"""Structured custom BigQuery Storage error message. The error + can be attached as error details in the returned rpc Status. + User can use the info to process errors in a structural way, + rather than having to parse error messages. + + Attributes: + code (google.cloud.bigquery_storage_v1beta2.types.StorageError.StorageErrorCode): + BigQuery Storage specific error code. + entity (str): + Name of the failed entity. + error_message (str): + Message that describes the error. + """ + + class StorageErrorCode(proto.Enum): + r"""Error code for ``StorageError``.""" + STORAGE_ERROR_CODE_UNSPECIFIED = 0 + TABLE_NOT_FOUND = 1 + STREAM_ALREADY_COMMITTED = 2 + STREAM_NOT_FOUND = 3 + INVALID_STREAM_TYPE = 4 + INVALID_STREAM_STATE = 5 + + code = proto.Field(proto.ENUM, number=1, enum=StorageErrorCode,) + + entity = proto.Field(proto.STRING, number=2) + + error_message = proto.Field(proto.STRING, number=3) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_storage_v1beta2/types/stream.py b/google/cloud/bigquery_storage_v1beta2/types/stream.py new file mode 100644 index 00000000..77fb444c --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/types/stream.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.bigquery_storage_v1beta2.types import arrow +from google.cloud.bigquery_storage_v1beta2.types import avro +from google.cloud.bigquery_storage_v1beta2.types import table as gcbs_table +from google.protobuf import timestamp_pb2 as timestamp # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1beta2", + manifest={"DataFormat", "ReadSession", "ReadStream", "WriteStream",}, +) + + +class DataFormat(proto.Enum): + r"""Data format for input or output data.""" + DATA_FORMAT_UNSPECIFIED = 0 + AVRO = 1 + ARROW = 2 + + +class ReadSession(proto.Message): + r"""Information about the ReadSession. + + Attributes: + name (str): + Output only. Unique identifier for the session, in the form + ``projects/{project_id}/locations/{location}/sessions/{session_id}``. + expire_time (google.protobuf.timestamp_pb2.Timestamp): + Output only. Time at which the session becomes invalid. + After this time, subsequent requests to read this Session + will return errors. The expire_time is automatically + assigned and currently cannot be specified or updated. + data_format (google.cloud.bigquery_storage_v1beta2.types.DataFormat): + Immutable. Data format of the output data. + avro_schema (google.cloud.bigquery_storage_v1beta2.types.AvroSchema): + Output only. Avro schema. + arrow_schema (google.cloud.bigquery_storage_v1beta2.types.ArrowSchema): + Output only. Arrow schema. + table (str): + Immutable. Table that this ReadSession is reading from, in + the form + \`projects/{project_id}/datasets/{dataset_id}/tables/{table_id} + table_modifiers (google.cloud.bigquery_storage_v1beta2.types.ReadSession.TableModifiers): + Optional. Any modifiers which are applied + when reading from the specified table. + read_options (google.cloud.bigquery_storage_v1beta2.types.ReadSession.TableReadOptions): + Optional. Read options for this session (e.g. + column selection, filters). + streams (Sequence[google.cloud.bigquery_storage_v1beta2.types.ReadStream]): + Output only. A list of streams created with the session. + + At least one stream is created with the session. In the + future, larger request_stream_count values *may* result in + this list being unpopulated, in that case, the user will + need to use a List method to get the streams instead, which + is not yet available. + """ + + class TableModifiers(proto.Message): + r"""Additional attributes when reading a table. + + Attributes: + snapshot_time (google.protobuf.timestamp_pb2.Timestamp): + The snapshot time of the table. If not set, + interpreted as now. + """ + + snapshot_time = proto.Field( + proto.MESSAGE, number=1, message=timestamp.Timestamp, + ) + + class TableReadOptions(proto.Message): + r"""Options dictating how we read a table. + + Attributes: + selected_fields (Sequence[str]): + Names of the fields in the table that should be read. If + empty, all fields will be read. If the specified field is a + nested field, all the sub-fields in the field will be + selected. The output field order is unrelated to the order + of fields in selected_fields. + row_restriction (str): + SQL text filtering statement, similar to a WHERE clause in a + query. Aggregates are not supported. + + Examples: "int_field > 5" "date_field = CAST('2014-9-27' as + DATE)" "nullable_field is not NULL" "st_equals(geo_field, + st_geofromtext("POINT(2, 2)"))" "numeric_field BETWEEN 1.0 + AND 5.0". + arrow_serialization_options (google.cloud.bigquery_storage_v1beta2.types.ArrowSerializationOptions): + Optional. Options specific to the Apache + Arrow output format. + """ + + selected_fields = proto.RepeatedField(proto.STRING, number=1) + + row_restriction = proto.Field(proto.STRING, number=2) + + arrow_serialization_options = proto.Field( + proto.MESSAGE, number=3, message=arrow.ArrowSerializationOptions, + ) + + name = proto.Field(proto.STRING, number=1) + + expire_time = proto.Field(proto.MESSAGE, number=2, message=timestamp.Timestamp,) + + data_format = proto.Field(proto.ENUM, number=3, enum="DataFormat",) + + avro_schema = proto.Field( + proto.MESSAGE, number=4, oneof="schema", message=avro.AvroSchema, + ) + + arrow_schema = proto.Field( + proto.MESSAGE, number=5, oneof="schema", message=arrow.ArrowSchema, + ) + + table = proto.Field(proto.STRING, number=6) + + table_modifiers = proto.Field(proto.MESSAGE, number=7, message=TableModifiers,) + + read_options = proto.Field(proto.MESSAGE, number=8, message=TableReadOptions,) + + streams = proto.RepeatedField(proto.MESSAGE, number=10, message="ReadStream",) + + +class ReadStream(proto.Message): + r"""Information about a single stream that gets data out of the storage + system. Most of the information about ``ReadStream`` instances is + aggregated, making ``ReadStream`` lightweight. + + Attributes: + name (str): + Output only. Name of the stream, in the form + ``projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}``. + """ + + name = proto.Field(proto.STRING, number=1) + + +class WriteStream(proto.Message): + r"""Information about a single stream that gets data inside the + storage system. + + Attributes: + name (str): + Output only. Name of the stream, in the form + ``projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}``. + type_ (google.cloud.bigquery_storage_v1beta2.types.WriteStream.Type): + Immutable. Type of the stream. + create_time (google.protobuf.timestamp_pb2.Timestamp): + Output only. Create time of the stream. For the \_default + stream, this is the creation_time of the table. + commit_time (google.protobuf.timestamp_pb2.Timestamp): + Output only. Commit time of the stream. If a stream is of + ``COMMITTED`` type, then it will have a commit_time same as + ``create_time``. If the stream is of ``PENDING`` type, + commit_time being empty means it is not committed. + table_schema (google.cloud.bigquery_storage_v1beta2.types.TableSchema): + Output only. The schema of the destination table. It is only + returned in ``CreateWriteStream`` response. Caller should + generate data that's compatible with this schema to send in + initial ``AppendRowsRequest``. The table schema could go out + of date during the life time of the stream. + """ + + class Type(proto.Enum): + r"""Type enum of the stream.""" + TYPE_UNSPECIFIED = 0 + COMMITTED = 1 + PENDING = 2 + BUFFERED = 3 + + name = proto.Field(proto.STRING, number=1) + + type_ = proto.Field(proto.ENUM, number=2, enum=Type,) + + create_time = proto.Field(proto.MESSAGE, number=3, message=timestamp.Timestamp,) + + commit_time = proto.Field(proto.MESSAGE, number=4, message=timestamp.Timestamp,) + + table_schema = proto.Field(proto.MESSAGE, number=5, message=gcbs_table.TableSchema,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_storage_v1beta2/types/table.py b/google/cloud/bigquery_storage_v1beta2/types/table.py new file mode 100644 index 00000000..f1a209e0 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/types/table.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.storage.v1beta2", + manifest={"TableSchema", "TableFieldSchema",}, +) + + +class TableSchema(proto.Message): + r"""Schema of a table + + Attributes: + fields (Sequence[google.cloud.bigquery_storage_v1beta2.types.TableFieldSchema]): + Describes the fields in a table. + """ + + fields = proto.RepeatedField(proto.MESSAGE, number=1, message="TableFieldSchema",) + + +class TableFieldSchema(proto.Message): + r"""A field in TableSchema + + Attributes: + name (str): + Required. The field name. The name must contain only letters + (a-z, A-Z), numbers (0-9), or underscores (_), and must + start with a letter or underscore. The maximum length is 128 + characters. + type_ (google.cloud.bigquery_storage_v1beta2.types.TableFieldSchema.Type): + Required. The field data type. + mode (google.cloud.bigquery_storage_v1beta2.types.TableFieldSchema.Mode): + Optional. The field mode. The default value + is NULLABLE. + fields (Sequence[google.cloud.bigquery_storage_v1beta2.types.TableFieldSchema]): + Optional. Describes the nested schema fields + if the type property is set to STRUCT. + description (str): + Optional. The field description. The maximum + length is 1,024 characters. + """ + + class Type(proto.Enum): + r"""""" + TYPE_UNSPECIFIED = 0 + STRING = 1 + INT64 = 2 + DOUBLE = 3 + STRUCT = 4 + BYTES = 5 + BOOL = 6 + TIMESTAMP = 7 + DATE = 8 + TIME = 9 + DATETIME = 10 + GEOGRAPHY = 11 + NUMERIC = 12 + + class Mode(proto.Enum): + r"""""" + MODE_UNSPECIFIED = 0 + NULLABLE = 1 + REQUIRED = 2 + REPEATED = 3 + + name = proto.Field(proto.STRING, number=1) + + type_ = proto.Field(proto.ENUM, number=2, enum=Type,) + + mode = proto.Field(proto.ENUM, number=3, enum=Mode,) + + fields = proto.RepeatedField(proto.MESSAGE, number=4, message="TableFieldSchema",) + + description = proto.Field(proto.STRING, number=6) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/synth.py b/synth.py index fffa438f..21fb8b3a 100644 --- a/synth.py +++ b/synth.py @@ -38,6 +38,8 @@ "docs/conf.py", "docs/index.rst", f"google/cloud/bigquery_storage_{version}/__init__.py", + # v1beta2 was first generated after the microgenerator migration. + "scripts/fixup_bigquery_storage_v1beta2_keywords.py", "README.rst", "nox*.py", "setup.py", @@ -150,16 +152,27 @@ # The DataFormat enum is not exposed in bigquery_storage_v1/types, add it there. s.replace( - "google/cloud/bigquery_storage_v1/types/__init__.py", + "google/cloud/bigquery_storage_v1*/types/__init__.py", r"from \.stream import \(", "\g<0>\n DataFormat,", ) s.replace( - "google/cloud/bigquery_storage_v1/types/__init__.py", + "google/cloud/bigquery_storage_v1*/types/__init__.py", r"""["']ReadSession["']""", '"DataFormat",\n \g<0>', ) +# The append_rows method doesn't contain keyword arguments that build request +# objects, so flattened tests are not needed and break with TypeError. +s.replace( + 'tests/unit/gapic/bigquery_storage_v1*/test_big_query_write.py', + r"(@[a-z.()\n]*\n)?(async )?" + r"def test_append_rows_flattened[_a-z]*\(\):\n" + r"( {4}.*|\n)+", + '\n', +) + + # Fix library installations in nox sessions (unit and system tests) - it's # redundant to install the library twice. s.replace( @@ -168,19 +181,6 @@ ")\n", ) -# Fix test coverage plugin paths. -s.replace( - "noxfile.py", - r'"--cov=google\.cloud\.bigquerystorage"', - ( - '"--cov=google.cloud.bigquery_storage",\n' - ' "--cov=google.cloud.bigquery_storage_v1"' - ), -) -s.replace( - "noxfile.py", r"--cov=tests\.unit", "--cov=tests/unit", -) - # TODO(busunkim): Use latest sphinx after microgenerator transition s.replace("noxfile.py", """['"]sphinx['"]""", '"sphinx<3.0.0"') diff --git a/tests/unit/gapic/bigquery_storage_v1beta2/__init__.py b/tests/unit/gapic/bigquery_storage_v1beta2/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/unit/gapic/bigquery_storage_v1beta2/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/unit/gapic/bigquery_storage_v1beta2/test_big_query_read.py b/tests/unit/gapic/bigquery_storage_v1beta2/test_big_query_read.py new file mode 100644 index 00000000..c83a2593 --- /dev/null +++ b/tests/unit/gapic/bigquery_storage_v1beta2/test_big_query_read.py @@ -0,0 +1,1525 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import mock + +import grpc +from grpc.experimental import aio +import math +import pytest +from proto.marshal.rules.dates import DurationRule, TimestampRule + +from google import auth +from google.api_core import client_options +from google.api_core import exceptions +from google.api_core import gapic_v1 +from google.api_core import grpc_helpers +from google.api_core import grpc_helpers_async +from google.auth import credentials +from google.auth.exceptions import MutualTLSChannelError +from google.cloud.bigquery_storage_v1beta2.services.big_query_read import ( + BigQueryReadAsyncClient, +) +from google.cloud.bigquery_storage_v1beta2.services.big_query_read import ( + BigQueryReadClient, +) +from google.cloud.bigquery_storage_v1beta2.services.big_query_read import transports +from google.cloud.bigquery_storage_v1beta2.types import arrow +from google.cloud.bigquery_storage_v1beta2.types import avro +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream +from google.oauth2 import service_account +from google.protobuf import timestamp_pb2 as timestamp # type: ignore + + +def client_cert_source_callback(): + return b"cert bytes", b"key bytes" + + +# If default endpoint is localhost, then default mtls endpoint will be the same. +# This method modifies the default endpoint so the client can produce a different +# mtls endpoint for endpoint testing purposes. +def modify_default_endpoint(client): + return ( + "foo.googleapis.com" + if ("localhost" in client.DEFAULT_ENDPOINT) + else client.DEFAULT_ENDPOINT + ) + + +def test__get_default_mtls_endpoint(): + api_endpoint = "example.googleapis.com" + api_mtls_endpoint = "example.mtls.googleapis.com" + sandbox_endpoint = "example.sandbox.googleapis.com" + sandbox_mtls_endpoint = "example.mtls.sandbox.googleapis.com" + non_googleapi = "api.example.com" + + assert BigQueryReadClient._get_default_mtls_endpoint(None) is None + assert ( + BigQueryReadClient._get_default_mtls_endpoint(api_endpoint) == api_mtls_endpoint + ) + assert ( + BigQueryReadClient._get_default_mtls_endpoint(api_mtls_endpoint) + == api_mtls_endpoint + ) + assert ( + BigQueryReadClient._get_default_mtls_endpoint(sandbox_endpoint) + == sandbox_mtls_endpoint + ) + assert ( + BigQueryReadClient._get_default_mtls_endpoint(sandbox_mtls_endpoint) + == sandbox_mtls_endpoint + ) + assert BigQueryReadClient._get_default_mtls_endpoint(non_googleapi) == non_googleapi + + +def test_big_query_read_client_from_service_account_info(): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_info" + ) as factory: + factory.return_value = creds + info = {"valid": True} + client = BigQueryReadClient.from_service_account_info(info) + assert client.transport._credentials == creds + + assert client.transport._host == "bigquerystorage.googleapis.com:443" + + +@pytest.mark.parametrize("client_class", [BigQueryReadClient, BigQueryReadAsyncClient,]) +def test_big_query_read_client_from_service_account_file(client_class): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_file" + ) as factory: + factory.return_value = creds + client = client_class.from_service_account_file("dummy/file/path.json") + assert client.transport._credentials == creds + + client = client_class.from_service_account_json("dummy/file/path.json") + assert client.transport._credentials == creds + + assert client.transport._host == "bigquerystorage.googleapis.com:443" + + +def test_big_query_read_client_get_transport_class(): + transport = BigQueryReadClient.get_transport_class() + available_transports = [ + transports.BigQueryReadGrpcTransport, + ] + assert transport in available_transports + + transport = BigQueryReadClient.get_transport_class("grpc") + assert transport == transports.BigQueryReadGrpcTransport + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +@mock.patch.object( + BigQueryReadClient, "DEFAULT_ENDPOINT", modify_default_endpoint(BigQueryReadClient) +) +@mock.patch.object( + BigQueryReadAsyncClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(BigQueryReadAsyncClient), +) +def test_big_query_read_client_client_options( + client_class, transport_class, transport_name +): + # Check that if channel is provided we won't create a new one. + with mock.patch.object(BigQueryReadClient, "get_transport_class") as gtc: + transport = transport_class(credentials=credentials.AnonymousCredentials()) + client = client_class(transport=transport) + gtc.assert_not_called() + + # Check that if channel is provided via str we will create a new one. + with mock.patch.object(BigQueryReadClient, "get_transport_class") as gtc: + client = client_class(transport=transport_name) + gtc.assert_called() + + # Check the case api_endpoint is provided. + options = client_options.ClientOptions(api_endpoint="squid.clam.whelk") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host="squid.clam.whelk", + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT is + # "never". + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "never"}): + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT is + # "always". + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "always"}): + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_MTLS_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT has + # unsupported value. + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "Unsupported"}): + with pytest.raises(MutualTLSChannelError): + client = client_class() + + # Check the case GOOGLE_API_USE_CLIENT_CERTIFICATE has unsupported value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": "Unsupported"} + ): + with pytest.raises(ValueError): + client = client_class() + + # Check the case quota_project_id is provided + options = client_options.ClientOptions(quota_project_id="octopus") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id="octopus", + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name,use_client_cert_env", + [ + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc", "true"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + "true", + ), + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc", "false"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + "false", + ), + ], +) +@mock.patch.object( + BigQueryReadClient, "DEFAULT_ENDPOINT", modify_default_endpoint(BigQueryReadClient) +) +@mock.patch.object( + BigQueryReadAsyncClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(BigQueryReadAsyncClient), +) +@mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "auto"}) +def test_big_query_read_client_mtls_env_auto( + client_class, transport_class, transport_name, use_client_cert_env +): + # This tests the endpoint autoswitch behavior. Endpoint is autoswitched to the default + # mtls endpoint, if GOOGLE_API_USE_CLIENT_CERTIFICATE is "true" and client cert exists. + + # Check the case client_cert_source is provided. Whether client cert is used depends on + # GOOGLE_API_USE_CLIENT_CERTIFICATE value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + options = client_options.ClientOptions( + client_cert_source=client_cert_source_callback + ) + with mock.patch.object(transport_class, "__init__") as patched: + ssl_channel_creds = mock.Mock() + with mock.patch( + "grpc.ssl_channel_credentials", return_value=ssl_channel_creds + ): + patched.return_value = None + client = client_class(client_options=options) + + if use_client_cert_env == "false": + expected_ssl_channel_creds = None + expected_host = client.DEFAULT_ENDPOINT + else: + expected_ssl_channel_creds = ssl_channel_creds + expected_host = client.DEFAULT_MTLS_ENDPOINT + + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + ssl_channel_credentials=expected_ssl_channel_creds, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case ADC client cert is provided. Whether client cert is used depends on + # GOOGLE_API_USE_CLIENT_CERTIFICATE value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.grpc.SslCredentials.__init__", return_value=None + ): + with mock.patch( + "google.auth.transport.grpc.SslCredentials.is_mtls", + new_callable=mock.PropertyMock, + ) as is_mtls_mock: + with mock.patch( + "google.auth.transport.grpc.SslCredentials.ssl_credentials", + new_callable=mock.PropertyMock, + ) as ssl_credentials_mock: + if use_client_cert_env == "false": + is_mtls_mock.return_value = False + ssl_credentials_mock.return_value = None + expected_host = client.DEFAULT_ENDPOINT + expected_ssl_channel_creds = None + else: + is_mtls_mock.return_value = True + ssl_credentials_mock.return_value = mock.Mock() + expected_host = client.DEFAULT_MTLS_ENDPOINT + expected_ssl_channel_creds = ( + ssl_credentials_mock.return_value + ) + + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + ssl_channel_credentials=expected_ssl_channel_creds, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case client_cert_source and ADC client cert are not provided. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.grpc.SslCredentials.__init__", return_value=None + ): + with mock.patch( + "google.auth.transport.grpc.SslCredentials.is_mtls", + new_callable=mock.PropertyMock, + ) as is_mtls_mock: + is_mtls_mock.return_value = False + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +def test_big_query_read_client_client_options_scopes( + client_class, transport_class, transport_name +): + # Check the case scopes are provided. + options = client_options.ClientOptions(scopes=["1", "2"],) + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=["1", "2"], + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + (BigQueryReadClient, transports.BigQueryReadGrpcTransport, "grpc"), + ( + BigQueryReadAsyncClient, + transports.BigQueryReadGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +def test_big_query_read_client_client_options_credentials_file( + client_class, transport_class, transport_name +): + # Check the case credentials file is provided. + options = client_options.ClientOptions(credentials_file="credentials.json") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file="credentials.json", + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +def test_big_query_read_client_client_options_from_dict(): + with mock.patch( + "google.cloud.bigquery_storage_v1beta2.services.big_query_read.transports.BigQueryReadGrpcTransport.__init__" + ) as grpc_transport: + grpc_transport.return_value = None + client = BigQueryReadClient(client_options={"api_endpoint": "squid.clam.whelk"}) + grpc_transport.assert_called_once_with( + credentials=None, + credentials_file=None, + host="squid.clam.whelk", + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +def test_create_read_session( + transport: str = "grpc", request_type=storage.CreateReadSessionRequest +): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_read_session), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = stream.ReadSession( + name="name_value", + data_format=stream.DataFormat.AVRO, + table="table_value", + avro_schema=avro.AvroSchema(schema="schema_value"), + ) + + response = client.create_read_session(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.CreateReadSessionRequest() + + # Establish that the response is the type that we expect. + + assert isinstance(response, stream.ReadSession) + + assert response.name == "name_value" + + assert response.data_format == stream.DataFormat.AVRO + + assert response.table == "table_value" + + +def test_create_read_session_from_dict(): + test_create_read_session(request_type=dict) + + +@pytest.mark.asyncio +async def test_create_read_session_async( + transport: str = "grpc_asyncio", request_type=storage.CreateReadSessionRequest +): + client = BigQueryReadAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_read_session), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + stream.ReadSession( + name="name_value", + data_format=stream.DataFormat.AVRO, + table="table_value", + ) + ) + + response = await client.create_read_session(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.CreateReadSessionRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, stream.ReadSession) + + assert response.name == "name_value" + + assert response.data_format == stream.DataFormat.AVRO + + assert response.table == "table_value" + + +@pytest.mark.asyncio +async def test_create_read_session_async_from_dict(): + await test_create_read_session_async(request_type=dict) + + +def test_create_read_session_field_headers(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.CreateReadSessionRequest() + request.read_session.table = "read_session.table/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_read_session), "__call__" + ) as call: + call.return_value = stream.ReadSession() + + client.create_read_session(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "read_session.table=read_session.table/value", + ) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_create_read_session_field_headers_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.CreateReadSessionRequest() + request.read_session.table = "read_session.table/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_read_session), "__call__" + ) as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(stream.ReadSession()) + + await client.create_read_session(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "read_session.table=read_session.table/value", + ) in kw["metadata"] + + +def test_create_read_session_flattened(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_read_session), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = stream.ReadSession() + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.create_read_session( + parent="parent_value", + read_session=stream.ReadSession(name="name_value"), + max_stream_count=1721, + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].parent == "parent_value" + + assert args[0].read_session == stream.ReadSession(name="name_value") + + assert args[0].max_stream_count == 1721 + + +def test_create_read_session_flattened_error(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.create_read_session( + storage.CreateReadSessionRequest(), + parent="parent_value", + read_session=stream.ReadSession(name="name_value"), + max_stream_count=1721, + ) + + +@pytest.mark.asyncio +async def test_create_read_session_flattened_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_read_session), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = stream.ReadSession() + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(stream.ReadSession()) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.create_read_session( + parent="parent_value", + read_session=stream.ReadSession(name="name_value"), + max_stream_count=1721, + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].parent == "parent_value" + + assert args[0].read_session == stream.ReadSession(name="name_value") + + assert args[0].max_stream_count == 1721 + + +@pytest.mark.asyncio +async def test_create_read_session_flattened_error_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.create_read_session( + storage.CreateReadSessionRequest(), + parent="parent_value", + read_session=stream.ReadSession(name="name_value"), + max_stream_count=1721, + ) + + +def test_read_rows(transport: str = "grpc", request_type=storage.ReadRowsRequest): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.read_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = iter([storage.ReadRowsResponse()]) + + response = client.read_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.ReadRowsRequest() + + # Establish that the response is the type that we expect. + for message in response: + assert isinstance(message, storage.ReadRowsResponse) + + +def test_read_rows_from_dict(): + test_read_rows(request_type=dict) + + +@pytest.mark.asyncio +async def test_read_rows_async( + transport: str = "grpc_asyncio", request_type=storage.ReadRowsRequest +): + client = BigQueryReadAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.read_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + call.return_value.read = mock.AsyncMock( + side_effect=[storage.ReadRowsResponse()] + ) + + response = await client.read_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.ReadRowsRequest() + + # Establish that the response is the type that we expect. + message = await response.read() + assert isinstance(message, storage.ReadRowsResponse) + + +@pytest.mark.asyncio +async def test_read_rows_async_from_dict(): + await test_read_rows_async(request_type=dict) + + +def test_read_rows_field_headers(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.ReadRowsRequest() + request.read_stream = "read_stream/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.read_rows), "__call__") as call: + call.return_value = iter([storage.ReadRowsResponse()]) + + client.read_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "read_stream=read_stream/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_read_rows_field_headers_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.ReadRowsRequest() + request.read_stream = "read_stream/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.read_rows), "__call__") as call: + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + call.return_value.read = mock.AsyncMock( + side_effect=[storage.ReadRowsResponse()] + ) + + await client.read_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "read_stream=read_stream/value",) in kw["metadata"] + + +def test_read_rows_flattened(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.read_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = iter([storage.ReadRowsResponse()]) + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.read_rows( + read_stream="read_stream_value", offset=647, + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].read_stream == "read_stream_value" + + assert args[0].offset == 647 + + +def test_read_rows_flattened_error(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.read_rows( + storage.ReadRowsRequest(), read_stream="read_stream_value", offset=647, + ) + + +@pytest.mark.asyncio +async def test_read_rows_flattened_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.read_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = iter([storage.ReadRowsResponse()]) + + call.return_value = mock.Mock(aio.UnaryStreamCall, autospec=True) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.read_rows(read_stream="read_stream_value", offset=647,) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].read_stream == "read_stream_value" + + assert args[0].offset == 647 + + +@pytest.mark.asyncio +async def test_read_rows_flattened_error_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.read_rows( + storage.ReadRowsRequest(), read_stream="read_stream_value", offset=647, + ) + + +def test_split_read_stream( + transport: str = "grpc", request_type=storage.SplitReadStreamRequest +): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.split_read_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = storage.SplitReadStreamResponse() + + response = client.split_read_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.SplitReadStreamRequest() + + # Establish that the response is the type that we expect. + + assert isinstance(response, storage.SplitReadStreamResponse) + + +def test_split_read_stream_from_dict(): + test_split_read_stream(request_type=dict) + + +@pytest.mark.asyncio +async def test_split_read_stream_async( + transport: str = "grpc_asyncio", request_type=storage.SplitReadStreamRequest +): + client = BigQueryReadAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.split_read_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.SplitReadStreamResponse() + ) + + response = await client.split_read_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.SplitReadStreamRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, storage.SplitReadStreamResponse) + + +@pytest.mark.asyncio +async def test_split_read_stream_async_from_dict(): + await test_split_read_stream_async(request_type=dict) + + +def test_split_read_stream_field_headers(): + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.SplitReadStreamRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.split_read_stream), "__call__" + ) as call: + call.return_value = storage.SplitReadStreamResponse() + + client.split_read_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_split_read_stream_field_headers_async(): + client = BigQueryReadAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.SplitReadStreamRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.split_read_stream), "__call__" + ) as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.SplitReadStreamResponse() + ) + + await client.split_read_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +def test_credentials_transport_error(): + # It is an error to provide credentials and a transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # It is an error to provide a credentials file and a transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = BigQueryReadClient( + client_options={"credentials_file": "credentials.json"}, + transport=transport, + ) + + # It is an error to provide scopes and a transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = BigQueryReadClient( + client_options={"scopes": ["1", "2"]}, transport=transport, + ) + + +def test_transport_instance(): + # A client may be instantiated with a custom transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + client = BigQueryReadClient(transport=transport) + assert client.transport is transport + + +def test_transport_get_channel(): + # A client may be instantiated with a custom transport instance. + transport = transports.BigQueryReadGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + channel = transport.grpc_channel + assert channel + + transport = transports.BigQueryReadGrpcAsyncIOTransport( + credentials=credentials.AnonymousCredentials(), + ) + channel = transport.grpc_channel + assert channel + + +@pytest.mark.parametrize( + "transport_class", + [ + transports.BigQueryReadGrpcTransport, + transports.BigQueryReadGrpcAsyncIOTransport, + ], +) +def test_transport_adc(transport_class): + # Test default credentials are used if not provided. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + transport_class() + adc.assert_called_once() + + +def test_transport_grpc_default(): + # A client should use the gRPC transport by default. + client = BigQueryReadClient(credentials=credentials.AnonymousCredentials(),) + assert isinstance(client.transport, transports.BigQueryReadGrpcTransport,) + + +def test_big_query_read_base_transport_error(): + # Passing both a credentials object and credentials_file should raise an error + with pytest.raises(exceptions.DuplicateCredentialArgs): + transport = transports.BigQueryReadTransport( + credentials=credentials.AnonymousCredentials(), + credentials_file="credentials.json", + ) + + +def test_big_query_read_base_transport(): + # Instantiate the base transport. + with mock.patch( + "google.cloud.bigquery_storage_v1beta2.services.big_query_read.transports.BigQueryReadTransport.__init__" + ) as Transport: + Transport.return_value = None + transport = transports.BigQueryReadTransport( + credentials=credentials.AnonymousCredentials(), + ) + + # Every method on the transport should just blindly + # raise NotImplementedError. + methods = ( + "create_read_session", + "read_rows", + "split_read_stream", + ) + for method in methods: + with pytest.raises(NotImplementedError): + getattr(transport, method)(request=object()) + + +def test_big_query_read_base_transport_with_credentials_file(): + # Instantiate the base transport with a credentials file + with mock.patch.object( + auth, "load_credentials_from_file" + ) as load_creds, mock.patch( + "google.cloud.bigquery_storage_v1beta2.services.big_query_read.transports.BigQueryReadTransport._prep_wrapped_messages" + ) as Transport: + Transport.return_value = None + load_creds.return_value = (credentials.AnonymousCredentials(), None) + transport = transports.BigQueryReadTransport( + credentials_file="credentials.json", quota_project_id="octopus", + ) + load_creds.assert_called_once_with( + "credentials.json", + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + quota_project_id="octopus", + ) + + +def test_big_query_read_base_transport_with_adc(): + # Test the default credentials are used if credentials and credentials_file are None. + with mock.patch.object(auth, "default") as adc, mock.patch( + "google.cloud.bigquery_storage_v1beta2.services.big_query_read.transports.BigQueryReadTransport._prep_wrapped_messages" + ) as Transport: + Transport.return_value = None + adc.return_value = (credentials.AnonymousCredentials(), None) + transport = transports.BigQueryReadTransport() + adc.assert_called_once() + + +def test_big_query_read_auth_adc(): + # If no credentials are provided, we should use ADC credentials. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + BigQueryReadClient() + adc.assert_called_once_with( + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + quota_project_id=None, + ) + + +def test_big_query_read_transport_auth_adc(): + # If credentials and host are not provided, the transport class should use + # ADC credentials. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + transports.BigQueryReadGrpcTransport( + host="squid.clam.whelk", quota_project_id="octopus" + ) + adc.assert_called_once_with( + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + quota_project_id="octopus", + ) + + +def test_big_query_read_host_no_port(): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="bigquerystorage.googleapis.com" + ), + ) + assert client.transport._host == "bigquerystorage.googleapis.com:443" + + +def test_big_query_read_host_with_port(): + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="bigquerystorage.googleapis.com:8000" + ), + ) + assert client.transport._host == "bigquerystorage.googleapis.com:8000" + + +def test_big_query_read_grpc_transport_channel(): + channel = grpc.secure_channel("http://localhost/", grpc.local_channel_credentials()) + + # Check that channel is used if provided. + transport = transports.BigQueryReadGrpcTransport( + host="squid.clam.whelk", channel=channel, + ) + assert transport.grpc_channel == channel + assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None + + +def test_big_query_read_grpc_asyncio_transport_channel(): + channel = aio.secure_channel("http://localhost/", grpc.local_channel_credentials()) + + # Check that channel is used if provided. + transport = transports.BigQueryReadGrpcAsyncIOTransport( + host="squid.clam.whelk", channel=channel, + ) + assert transport.grpc_channel == channel + assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None + + +@pytest.mark.parametrize( + "transport_class", + [transports.BigQueryReadGrpcTransport, transports.BigQueryReadGrpcAsyncIOTransport], +) +def test_big_query_read_transport_channel_mtls_with_client_cert_source(transport_class): + with mock.patch( + "grpc.ssl_channel_credentials", autospec=True + ) as grpc_ssl_channel_cred: + with mock.patch.object( + transport_class, "create_channel" + ) as grpc_create_channel: + mock_ssl_cred = mock.Mock() + grpc_ssl_channel_cred.return_value = mock_ssl_cred + + mock_grpc_channel = mock.Mock() + grpc_create_channel.return_value = mock_grpc_channel + + cred = credentials.AnonymousCredentials() + with pytest.warns(DeprecationWarning): + with mock.patch.object(auth, "default") as adc: + adc.return_value = (cred, None) + transport = transport_class( + host="squid.clam.whelk", + api_mtls_endpoint="mtls.squid.clam.whelk", + client_cert_source=client_cert_source_callback, + ) + adc.assert_called_once() + + grpc_ssl_channel_cred.assert_called_once_with( + certificate_chain=b"cert bytes", private_key=b"key bytes" + ) + grpc_create_channel.assert_called_once_with( + "mtls.squid.clam.whelk:443", + credentials=cred, + credentials_file=None, + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + ssl_credentials=mock_ssl_cred, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + assert transport.grpc_channel == mock_grpc_channel + assert transport._ssl_channel_credentials == mock_ssl_cred + + +@pytest.mark.parametrize( + "transport_class", + [transports.BigQueryReadGrpcTransport, transports.BigQueryReadGrpcAsyncIOTransport], +) +def test_big_query_read_transport_channel_mtls_with_adc(transport_class): + mock_ssl_cred = mock.Mock() + with mock.patch.multiple( + "google.auth.transport.grpc.SslCredentials", + __init__=mock.Mock(return_value=None), + ssl_credentials=mock.PropertyMock(return_value=mock_ssl_cred), + ): + with mock.patch.object( + transport_class, "create_channel" + ) as grpc_create_channel: + mock_grpc_channel = mock.Mock() + grpc_create_channel.return_value = mock_grpc_channel + mock_cred = mock.Mock() + + with pytest.warns(DeprecationWarning): + transport = transport_class( + host="squid.clam.whelk", + credentials=mock_cred, + api_mtls_endpoint="mtls.squid.clam.whelk", + client_cert_source=None, + ) + + grpc_create_channel.assert_called_once_with( + "mtls.squid.clam.whelk:443", + credentials=mock_cred, + credentials_file=None, + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.readonly", + "https://www.googleapis.com/auth/cloud-platform", + ), + ssl_credentials=mock_ssl_cred, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + assert transport.grpc_channel == mock_grpc_channel + + +def test_read_session_path(): + project = "squid" + location = "clam" + session = "whelk" + + expected = "projects/{project}/locations/{location}/sessions/{session}".format( + project=project, location=location, session=session, + ) + actual = BigQueryReadClient.read_session_path(project, location, session) + assert expected == actual + + +def test_parse_read_session_path(): + expected = { + "project": "octopus", + "location": "oyster", + "session": "nudibranch", + } + path = BigQueryReadClient.read_session_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_read_session_path(path) + assert expected == actual + + +def test_read_stream_path(): + project = "cuttlefish" + location = "mussel" + session = "winkle" + stream = "nautilus" + + expected = "projects/{project}/locations/{location}/sessions/{session}/streams/{stream}".format( + project=project, location=location, session=session, stream=stream, + ) + actual = BigQueryReadClient.read_stream_path(project, location, session, stream) + assert expected == actual + + +def test_parse_read_stream_path(): + expected = { + "project": "scallop", + "location": "abalone", + "session": "squid", + "stream": "clam", + } + path = BigQueryReadClient.read_stream_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_read_stream_path(path) + assert expected == actual + + +def test_table_path(): + project = "whelk" + dataset = "octopus" + table = "oyster" + + expected = "projects/{project}/datasets/{dataset}/tables/{table}".format( + project=project, dataset=dataset, table=table, + ) + actual = BigQueryReadClient.table_path(project, dataset, table) + assert expected == actual + + +def test_parse_table_path(): + expected = { + "project": "nudibranch", + "dataset": "cuttlefish", + "table": "mussel", + } + path = BigQueryReadClient.table_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_table_path(path) + assert expected == actual + + +def test_common_billing_account_path(): + billing_account = "winkle" + + expected = "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + actual = BigQueryReadClient.common_billing_account_path(billing_account) + assert expected == actual + + +def test_parse_common_billing_account_path(): + expected = { + "billing_account": "nautilus", + } + path = BigQueryReadClient.common_billing_account_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_common_billing_account_path(path) + assert expected == actual + + +def test_common_folder_path(): + folder = "scallop" + + expected = "folders/{folder}".format(folder=folder,) + actual = BigQueryReadClient.common_folder_path(folder) + assert expected == actual + + +def test_parse_common_folder_path(): + expected = { + "folder": "abalone", + } + path = BigQueryReadClient.common_folder_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_common_folder_path(path) + assert expected == actual + + +def test_common_organization_path(): + organization = "squid" + + expected = "organizations/{organization}".format(organization=organization,) + actual = BigQueryReadClient.common_organization_path(organization) + assert expected == actual + + +def test_parse_common_organization_path(): + expected = { + "organization": "clam", + } + path = BigQueryReadClient.common_organization_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_common_organization_path(path) + assert expected == actual + + +def test_common_project_path(): + project = "whelk" + + expected = "projects/{project}".format(project=project,) + actual = BigQueryReadClient.common_project_path(project) + assert expected == actual + + +def test_parse_common_project_path(): + expected = { + "project": "octopus", + } + path = BigQueryReadClient.common_project_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_common_project_path(path) + assert expected == actual + + +def test_common_location_path(): + project = "oyster" + location = "nudibranch" + + expected = "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + actual = BigQueryReadClient.common_location_path(project, location) + assert expected == actual + + +def test_parse_common_location_path(): + expected = { + "project": "cuttlefish", + "location": "mussel", + } + path = BigQueryReadClient.common_location_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryReadClient.parse_common_location_path(path) + assert expected == actual + + +def test_client_withDEFAULT_CLIENT_INFO(): + client_info = gapic_v1.client_info.ClientInfo() + + with mock.patch.object( + transports.BigQueryReadTransport, "_prep_wrapped_messages" + ) as prep: + client = BigQueryReadClient( + credentials=credentials.AnonymousCredentials(), client_info=client_info, + ) + prep.assert_called_once_with(client_info) + + with mock.patch.object( + transports.BigQueryReadTransport, "_prep_wrapped_messages" + ) as prep: + transport_class = BigQueryReadClient.get_transport_class() + transport = transport_class( + credentials=credentials.AnonymousCredentials(), client_info=client_info, + ) + prep.assert_called_once_with(client_info) diff --git a/tests/unit/gapic/bigquery_storage_v1beta2/test_big_query_write.py b/tests/unit/gapic/bigquery_storage_v1beta2/test_big_query_write.py new file mode 100644 index 00000000..98355b8a --- /dev/null +++ b/tests/unit/gapic/bigquery_storage_v1beta2/test_big_query_write.py @@ -0,0 +1,2043 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import mock + +import grpc +from grpc.experimental import aio +import math +import pytest +from proto.marshal.rules.dates import DurationRule, TimestampRule + +from google import auth +from google.api_core import client_options +from google.api_core import exceptions +from google.api_core import gapic_v1 +from google.api_core import grpc_helpers +from google.api_core import grpc_helpers_async +from google.auth import credentials +from google.auth.exceptions import MutualTLSChannelError +from google.cloud.bigquery_storage_v1beta2.services.big_query_write import ( + BigQueryWriteAsyncClient, +) +from google.cloud.bigquery_storage_v1beta2.services.big_query_write import ( + BigQueryWriteClient, +) +from google.cloud.bigquery_storage_v1beta2.services.big_query_write import transports +from google.cloud.bigquery_storage_v1beta2.types import protobuf +from google.cloud.bigquery_storage_v1beta2.types import storage +from google.cloud.bigquery_storage_v1beta2.types import stream +from google.cloud.bigquery_storage_v1beta2.types import table +from google.oauth2 import service_account +from google.protobuf import descriptor_pb2 as descriptor # type: ignore +from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.protobuf import wrappers_pb2 as wrappers # type: ignore +from google.rpc import status_pb2 as status # type: ignore + + +def client_cert_source_callback(): + return b"cert bytes", b"key bytes" + + +# If default endpoint is localhost, then default mtls endpoint will be the same. +# This method modifies the default endpoint so the client can produce a different +# mtls endpoint for endpoint testing purposes. +def modify_default_endpoint(client): + return ( + "foo.googleapis.com" + if ("localhost" in client.DEFAULT_ENDPOINT) + else client.DEFAULT_ENDPOINT + ) + + +def test__get_default_mtls_endpoint(): + api_endpoint = "example.googleapis.com" + api_mtls_endpoint = "example.mtls.googleapis.com" + sandbox_endpoint = "example.sandbox.googleapis.com" + sandbox_mtls_endpoint = "example.mtls.sandbox.googleapis.com" + non_googleapi = "api.example.com" + + assert BigQueryWriteClient._get_default_mtls_endpoint(None) is None + assert ( + BigQueryWriteClient._get_default_mtls_endpoint(api_endpoint) + == api_mtls_endpoint + ) + assert ( + BigQueryWriteClient._get_default_mtls_endpoint(api_mtls_endpoint) + == api_mtls_endpoint + ) + assert ( + BigQueryWriteClient._get_default_mtls_endpoint(sandbox_endpoint) + == sandbox_mtls_endpoint + ) + assert ( + BigQueryWriteClient._get_default_mtls_endpoint(sandbox_mtls_endpoint) + == sandbox_mtls_endpoint + ) + assert ( + BigQueryWriteClient._get_default_mtls_endpoint(non_googleapi) == non_googleapi + ) + + +def test_big_query_write_client_from_service_account_info(): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_info" + ) as factory: + factory.return_value = creds + info = {"valid": True} + client = BigQueryWriteClient.from_service_account_info(info) + assert client.transport._credentials == creds + + assert client.transport._host == "bigquerystorage.googleapis.com:443" + + +@pytest.mark.parametrize( + "client_class", [BigQueryWriteClient, BigQueryWriteAsyncClient,] +) +def test_big_query_write_client_from_service_account_file(client_class): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_file" + ) as factory: + factory.return_value = creds + client = client_class.from_service_account_file("dummy/file/path.json") + assert client.transport._credentials == creds + + client = client_class.from_service_account_json("dummy/file/path.json") + assert client.transport._credentials == creds + + assert client.transport._host == "bigquerystorage.googleapis.com:443" + + +def test_big_query_write_client_get_transport_class(): + transport = BigQueryWriteClient.get_transport_class() + available_transports = [ + transports.BigQueryWriteGrpcTransport, + ] + assert transport in available_transports + + transport = BigQueryWriteClient.get_transport_class("grpc") + assert transport == transports.BigQueryWriteGrpcTransport + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + (BigQueryWriteClient, transports.BigQueryWriteGrpcTransport, "grpc"), + ( + BigQueryWriteAsyncClient, + transports.BigQueryWriteGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +@mock.patch.object( + BigQueryWriteClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(BigQueryWriteClient), +) +@mock.patch.object( + BigQueryWriteAsyncClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(BigQueryWriteAsyncClient), +) +def test_big_query_write_client_client_options( + client_class, transport_class, transport_name +): + # Check that if channel is provided we won't create a new one. + with mock.patch.object(BigQueryWriteClient, "get_transport_class") as gtc: + transport = transport_class(credentials=credentials.AnonymousCredentials()) + client = client_class(transport=transport) + gtc.assert_not_called() + + # Check that if channel is provided via str we will create a new one. + with mock.patch.object(BigQueryWriteClient, "get_transport_class") as gtc: + client = client_class(transport=transport_name) + gtc.assert_called() + + # Check the case api_endpoint is provided. + options = client_options.ClientOptions(api_endpoint="squid.clam.whelk") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host="squid.clam.whelk", + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT is + # "never". + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "never"}): + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT is + # "always". + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "always"}): + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_MTLS_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT has + # unsupported value. + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "Unsupported"}): + with pytest.raises(MutualTLSChannelError): + client = client_class() + + # Check the case GOOGLE_API_USE_CLIENT_CERTIFICATE has unsupported value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": "Unsupported"} + ): + with pytest.raises(ValueError): + client = client_class() + + # Check the case quota_project_id is provided + options = client_options.ClientOptions(quota_project_id="octopus") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id="octopus", + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name,use_client_cert_env", + [ + (BigQueryWriteClient, transports.BigQueryWriteGrpcTransport, "grpc", "true"), + ( + BigQueryWriteAsyncClient, + transports.BigQueryWriteGrpcAsyncIOTransport, + "grpc_asyncio", + "true", + ), + (BigQueryWriteClient, transports.BigQueryWriteGrpcTransport, "grpc", "false"), + ( + BigQueryWriteAsyncClient, + transports.BigQueryWriteGrpcAsyncIOTransport, + "grpc_asyncio", + "false", + ), + ], +) +@mock.patch.object( + BigQueryWriteClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(BigQueryWriteClient), +) +@mock.patch.object( + BigQueryWriteAsyncClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(BigQueryWriteAsyncClient), +) +@mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "auto"}) +def test_big_query_write_client_mtls_env_auto( + client_class, transport_class, transport_name, use_client_cert_env +): + # This tests the endpoint autoswitch behavior. Endpoint is autoswitched to the default + # mtls endpoint, if GOOGLE_API_USE_CLIENT_CERTIFICATE is "true" and client cert exists. + + # Check the case client_cert_source is provided. Whether client cert is used depends on + # GOOGLE_API_USE_CLIENT_CERTIFICATE value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + options = client_options.ClientOptions( + client_cert_source=client_cert_source_callback + ) + with mock.patch.object(transport_class, "__init__") as patched: + ssl_channel_creds = mock.Mock() + with mock.patch( + "grpc.ssl_channel_credentials", return_value=ssl_channel_creds + ): + patched.return_value = None + client = client_class(client_options=options) + + if use_client_cert_env == "false": + expected_ssl_channel_creds = None + expected_host = client.DEFAULT_ENDPOINT + else: + expected_ssl_channel_creds = ssl_channel_creds + expected_host = client.DEFAULT_MTLS_ENDPOINT + + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + ssl_channel_credentials=expected_ssl_channel_creds, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case ADC client cert is provided. Whether client cert is used depends on + # GOOGLE_API_USE_CLIENT_CERTIFICATE value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.grpc.SslCredentials.__init__", return_value=None + ): + with mock.patch( + "google.auth.transport.grpc.SslCredentials.is_mtls", + new_callable=mock.PropertyMock, + ) as is_mtls_mock: + with mock.patch( + "google.auth.transport.grpc.SslCredentials.ssl_credentials", + new_callable=mock.PropertyMock, + ) as ssl_credentials_mock: + if use_client_cert_env == "false": + is_mtls_mock.return_value = False + ssl_credentials_mock.return_value = None + expected_host = client.DEFAULT_ENDPOINT + expected_ssl_channel_creds = None + else: + is_mtls_mock.return_value = True + ssl_credentials_mock.return_value = mock.Mock() + expected_host = client.DEFAULT_MTLS_ENDPOINT + expected_ssl_channel_creds = ( + ssl_credentials_mock.return_value + ) + + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + ssl_channel_credentials=expected_ssl_channel_creds, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case client_cert_source and ADC client cert are not provided. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.grpc.SslCredentials.__init__", return_value=None + ): + with mock.patch( + "google.auth.transport.grpc.SslCredentials.is_mtls", + new_callable=mock.PropertyMock, + ) as is_mtls_mock: + is_mtls_mock.return_value = False + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + (BigQueryWriteClient, transports.BigQueryWriteGrpcTransport, "grpc"), + ( + BigQueryWriteAsyncClient, + transports.BigQueryWriteGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +def test_big_query_write_client_client_options_scopes( + client_class, transport_class, transport_name +): + # Check the case scopes are provided. + options = client_options.ClientOptions(scopes=["1", "2"],) + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=["1", "2"], + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + (BigQueryWriteClient, transports.BigQueryWriteGrpcTransport, "grpc"), + ( + BigQueryWriteAsyncClient, + transports.BigQueryWriteGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +def test_big_query_write_client_client_options_credentials_file( + client_class, transport_class, transport_name +): + # Check the case credentials file is provided. + options = client_options.ClientOptions(credentials_file="credentials.json") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file="credentials.json", + host=client.DEFAULT_ENDPOINT, + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +def test_big_query_write_client_client_options_from_dict(): + with mock.patch( + "google.cloud.bigquery_storage_v1beta2.services.big_query_write.transports.BigQueryWriteGrpcTransport.__init__" + ) as grpc_transport: + grpc_transport.return_value = None + client = BigQueryWriteClient( + client_options={"api_endpoint": "squid.clam.whelk"} + ) + grpc_transport.assert_called_once_with( + credentials=None, + credentials_file=None, + host="squid.clam.whelk", + scopes=None, + ssl_channel_credentials=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +def test_create_write_stream( + transport: str = "grpc", request_type=storage.CreateWriteStreamRequest +): + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_write_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = stream.WriteStream( + name="name_value", type_=stream.WriteStream.Type.COMMITTED, + ) + + response = client.create_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.CreateWriteStreamRequest() + + # Establish that the response is the type that we expect. + + assert isinstance(response, stream.WriteStream) + + assert response.name == "name_value" + + assert response.type_ == stream.WriteStream.Type.COMMITTED + + +def test_create_write_stream_from_dict(): + test_create_write_stream(request_type=dict) + + +@pytest.mark.asyncio +async def test_create_write_stream_async( + transport: str = "grpc_asyncio", request_type=storage.CreateWriteStreamRequest +): + client = BigQueryWriteAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_write_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + stream.WriteStream( + name="name_value", type_=stream.WriteStream.Type.COMMITTED, + ) + ) + + response = await client.create_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.CreateWriteStreamRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, stream.WriteStream) + + assert response.name == "name_value" + + assert response.type_ == stream.WriteStream.Type.COMMITTED + + +@pytest.mark.asyncio +async def test_create_write_stream_async_from_dict(): + await test_create_write_stream_async(request_type=dict) + + +def test_create_write_stream_field_headers(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.CreateWriteStreamRequest() + request.parent = "parent/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_write_stream), "__call__" + ) as call: + call.return_value = stream.WriteStream() + + client.create_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "parent=parent/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_create_write_stream_field_headers_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.CreateWriteStreamRequest() + request.parent = "parent/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_write_stream), "__call__" + ) as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(stream.WriteStream()) + + await client.create_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "parent=parent/value",) in kw["metadata"] + + +def test_create_write_stream_flattened(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_write_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = stream.WriteStream() + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.create_write_stream( + parent="parent_value", write_stream=stream.WriteStream(name="name_value"), + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].parent == "parent_value" + + assert args[0].write_stream == stream.WriteStream(name="name_value") + + +def test_create_write_stream_flattened_error(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.create_write_stream( + storage.CreateWriteStreamRequest(), + parent="parent_value", + write_stream=stream.WriteStream(name="name_value"), + ) + + +@pytest.mark.asyncio +async def test_create_write_stream_flattened_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.create_write_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = stream.WriteStream() + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(stream.WriteStream()) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.create_write_stream( + parent="parent_value", write_stream=stream.WriteStream(name="name_value"), + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].parent == "parent_value" + + assert args[0].write_stream == stream.WriteStream(name="name_value") + + +@pytest.mark.asyncio +async def test_create_write_stream_flattened_error_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.create_write_stream( + storage.CreateWriteStreamRequest(), + parent="parent_value", + write_stream=stream.WriteStream(name="name_value"), + ) + + +def test_append_rows(transport: str = "grpc", request_type=storage.AppendRowsRequest): + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + requests = [request] + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.append_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = iter([storage.AppendRowsResponse()]) + + response = client.append_rows(iter(requests)) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert next(args[0]) == request + + # Establish that the response is the type that we expect. + for message in response: + assert isinstance(message, storage.AppendRowsResponse) + + +def test_append_rows_from_dict(): + test_append_rows(request_type=dict) + + +@pytest.mark.asyncio +async def test_append_rows_async( + transport: str = "grpc_asyncio", request_type=storage.AppendRowsRequest +): + client = BigQueryWriteAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + requests = [request] + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.append_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = mock.Mock(aio.StreamStreamCall, autospec=True) + call.return_value.read = mock.AsyncMock( + side_effect=[storage.AppendRowsResponse()] + ) + + response = await client.append_rows(iter(requests)) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert next(args[0]) == request + + # Establish that the response is the type that we expect. + message = await response.read() + assert isinstance(message, storage.AppendRowsResponse) + + +@pytest.mark.asyncio +async def test_append_rows_async_from_dict(): + await test_append_rows_async(request_type=dict) + + +def test_get_write_stream( + transport: str = "grpc", request_type=storage.GetWriteStreamRequest +): + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.get_write_stream), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = stream.WriteStream( + name="name_value", type_=stream.WriteStream.Type.COMMITTED, + ) + + response = client.get_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.GetWriteStreamRequest() + + # Establish that the response is the type that we expect. + + assert isinstance(response, stream.WriteStream) + + assert response.name == "name_value" + + assert response.type_ == stream.WriteStream.Type.COMMITTED + + +def test_get_write_stream_from_dict(): + test_get_write_stream(request_type=dict) + + +@pytest.mark.asyncio +async def test_get_write_stream_async( + transport: str = "grpc_asyncio", request_type=storage.GetWriteStreamRequest +): + client = BigQueryWriteAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.get_write_stream), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + stream.WriteStream( + name="name_value", type_=stream.WriteStream.Type.COMMITTED, + ) + ) + + response = await client.get_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.GetWriteStreamRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, stream.WriteStream) + + assert response.name == "name_value" + + assert response.type_ == stream.WriteStream.Type.COMMITTED + + +@pytest.mark.asyncio +async def test_get_write_stream_async_from_dict(): + await test_get_write_stream_async(request_type=dict) + + +def test_get_write_stream_field_headers(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.GetWriteStreamRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.get_write_stream), "__call__") as call: + call.return_value = stream.WriteStream() + + client.get_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_get_write_stream_field_headers_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.GetWriteStreamRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.get_write_stream), "__call__") as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(stream.WriteStream()) + + await client.get_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +def test_get_write_stream_flattened(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.get_write_stream), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = stream.WriteStream() + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.get_write_stream(name="name_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].name == "name_value" + + +def test_get_write_stream_flattened_error(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.get_write_stream( + storage.GetWriteStreamRequest(), name="name_value", + ) + + +@pytest.mark.asyncio +async def test_get_write_stream_flattened_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.get_write_stream), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = stream.WriteStream() + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(stream.WriteStream()) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.get_write_stream(name="name_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].name == "name_value" + + +@pytest.mark.asyncio +async def test_get_write_stream_flattened_error_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.get_write_stream( + storage.GetWriteStreamRequest(), name="name_value", + ) + + +def test_finalize_write_stream( + transport: str = "grpc", request_type=storage.FinalizeWriteStreamRequest +): + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.finalize_write_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = storage.FinalizeWriteStreamResponse(row_count=992,) + + response = client.finalize_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.FinalizeWriteStreamRequest() + + # Establish that the response is the type that we expect. + + assert isinstance(response, storage.FinalizeWriteStreamResponse) + + assert response.row_count == 992 + + +def test_finalize_write_stream_from_dict(): + test_finalize_write_stream(request_type=dict) + + +@pytest.mark.asyncio +async def test_finalize_write_stream_async( + transport: str = "grpc_asyncio", request_type=storage.FinalizeWriteStreamRequest +): + client = BigQueryWriteAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.finalize_write_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.FinalizeWriteStreamResponse(row_count=992,) + ) + + response = await client.finalize_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.FinalizeWriteStreamRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, storage.FinalizeWriteStreamResponse) + + assert response.row_count == 992 + + +@pytest.mark.asyncio +async def test_finalize_write_stream_async_from_dict(): + await test_finalize_write_stream_async(request_type=dict) + + +def test_finalize_write_stream_field_headers(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.FinalizeWriteStreamRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.finalize_write_stream), "__call__" + ) as call: + call.return_value = storage.FinalizeWriteStreamResponse() + + client.finalize_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_finalize_write_stream_field_headers_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.FinalizeWriteStreamRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.finalize_write_stream), "__call__" + ) as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.FinalizeWriteStreamResponse() + ) + + await client.finalize_write_stream(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +def test_finalize_write_stream_flattened(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.finalize_write_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = storage.FinalizeWriteStreamResponse() + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.finalize_write_stream(name="name_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].name == "name_value" + + +def test_finalize_write_stream_flattened_error(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.finalize_write_stream( + storage.FinalizeWriteStreamRequest(), name="name_value", + ) + + +@pytest.mark.asyncio +async def test_finalize_write_stream_flattened_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.finalize_write_stream), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = storage.FinalizeWriteStreamResponse() + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.FinalizeWriteStreamResponse() + ) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.finalize_write_stream(name="name_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].name == "name_value" + + +@pytest.mark.asyncio +async def test_finalize_write_stream_flattened_error_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.finalize_write_stream( + storage.FinalizeWriteStreamRequest(), name="name_value", + ) + + +def test_batch_commit_write_streams( + transport: str = "grpc", request_type=storage.BatchCommitWriteStreamsRequest +): + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_commit_write_streams), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = storage.BatchCommitWriteStreamsResponse() + + response = client.batch_commit_write_streams(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.BatchCommitWriteStreamsRequest() + + # Establish that the response is the type that we expect. + + assert isinstance(response, storage.BatchCommitWriteStreamsResponse) + + +def test_batch_commit_write_streams_from_dict(): + test_batch_commit_write_streams(request_type=dict) + + +@pytest.mark.asyncio +async def test_batch_commit_write_streams_async( + transport: str = "grpc_asyncio", request_type=storage.BatchCommitWriteStreamsRequest +): + client = BigQueryWriteAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_commit_write_streams), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.BatchCommitWriteStreamsResponse() + ) + + response = await client.batch_commit_write_streams(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.BatchCommitWriteStreamsRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, storage.BatchCommitWriteStreamsResponse) + + +@pytest.mark.asyncio +async def test_batch_commit_write_streams_async_from_dict(): + await test_batch_commit_write_streams_async(request_type=dict) + + +def test_batch_commit_write_streams_field_headers(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.BatchCommitWriteStreamsRequest() + request.parent = "parent/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_commit_write_streams), "__call__" + ) as call: + call.return_value = storage.BatchCommitWriteStreamsResponse() + + client.batch_commit_write_streams(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "parent=parent/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_batch_commit_write_streams_field_headers_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.BatchCommitWriteStreamsRequest() + request.parent = "parent/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_commit_write_streams), "__call__" + ) as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.BatchCommitWriteStreamsResponse() + ) + + await client.batch_commit_write_streams(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "parent=parent/value",) in kw["metadata"] + + +def test_batch_commit_write_streams_flattened(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_commit_write_streams), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = storage.BatchCommitWriteStreamsResponse() + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.batch_commit_write_streams(parent="parent_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].parent == "parent_value" + + +def test_batch_commit_write_streams_flattened_error(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.batch_commit_write_streams( + storage.BatchCommitWriteStreamsRequest(), parent="parent_value", + ) + + +@pytest.mark.asyncio +async def test_batch_commit_write_streams_flattened_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_commit_write_streams), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = storage.BatchCommitWriteStreamsResponse() + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.BatchCommitWriteStreamsResponse() + ) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.batch_commit_write_streams(parent="parent_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].parent == "parent_value" + + +@pytest.mark.asyncio +async def test_batch_commit_write_streams_flattened_error_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.batch_commit_write_streams( + storage.BatchCommitWriteStreamsRequest(), parent="parent_value", + ) + + +def test_flush_rows(transport: str = "grpc", request_type=storage.FlushRowsRequest): + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.flush_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = storage.FlushRowsResponse(offset=647,) + + response = client.flush_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.FlushRowsRequest() + + # Establish that the response is the type that we expect. + + assert isinstance(response, storage.FlushRowsResponse) + + assert response.offset == 647 + + +def test_flush_rows_from_dict(): + test_flush_rows(request_type=dict) + + +@pytest.mark.asyncio +async def test_flush_rows_async( + transport: str = "grpc_asyncio", request_type=storage.FlushRowsRequest +): + client = BigQueryWriteAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.flush_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.FlushRowsResponse(offset=647,) + ) + + response = await client.flush_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == storage.FlushRowsRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, storage.FlushRowsResponse) + + assert response.offset == 647 + + +@pytest.mark.asyncio +async def test_flush_rows_async_from_dict(): + await test_flush_rows_async(request_type=dict) + + +def test_flush_rows_field_headers(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.FlushRowsRequest() + request.write_stream = "write_stream/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.flush_rows), "__call__") as call: + call.return_value = storage.FlushRowsResponse() + + client.flush_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "write_stream=write_stream/value",) in kw[ + "metadata" + ] + + +@pytest.mark.asyncio +async def test_flush_rows_field_headers_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = storage.FlushRowsRequest() + request.write_stream = "write_stream/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.flush_rows), "__call__") as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.FlushRowsResponse() + ) + + await client.flush_rows(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "write_stream=write_stream/value",) in kw[ + "metadata" + ] + + +def test_flush_rows_flattened(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.flush_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = storage.FlushRowsResponse() + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.flush_rows(write_stream="write_stream_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].write_stream == "write_stream_value" + + +def test_flush_rows_flattened_error(): + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.flush_rows( + storage.FlushRowsRequest(), write_stream="write_stream_value", + ) + + +@pytest.mark.asyncio +async def test_flush_rows_flattened_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.flush_rows), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = storage.FlushRowsResponse() + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + storage.FlushRowsResponse() + ) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.flush_rows(write_stream="write_stream_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].write_stream == "write_stream_value" + + +@pytest.mark.asyncio +async def test_flush_rows_flattened_error_async(): + client = BigQueryWriteAsyncClient(credentials=credentials.AnonymousCredentials(),) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.flush_rows( + storage.FlushRowsRequest(), write_stream="write_stream_value", + ) + + +def test_credentials_transport_error(): + # It is an error to provide credentials and a transport instance. + transport = transports.BigQueryWriteGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # It is an error to provide a credentials file and a transport instance. + transport = transports.BigQueryWriteGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = BigQueryWriteClient( + client_options={"credentials_file": "credentials.json"}, + transport=transport, + ) + + # It is an error to provide scopes and a transport instance. + transport = transports.BigQueryWriteGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = BigQueryWriteClient( + client_options={"scopes": ["1", "2"]}, transport=transport, + ) + + +def test_transport_instance(): + # A client may be instantiated with a custom transport instance. + transport = transports.BigQueryWriteGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + client = BigQueryWriteClient(transport=transport) + assert client.transport is transport + + +def test_transport_get_channel(): + # A client may be instantiated with a custom transport instance. + transport = transports.BigQueryWriteGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + channel = transport.grpc_channel + assert channel + + transport = transports.BigQueryWriteGrpcAsyncIOTransport( + credentials=credentials.AnonymousCredentials(), + ) + channel = transport.grpc_channel + assert channel + + +@pytest.mark.parametrize( + "transport_class", + [ + transports.BigQueryWriteGrpcTransport, + transports.BigQueryWriteGrpcAsyncIOTransport, + ], +) +def test_transport_adc(transport_class): + # Test default credentials are used if not provided. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + transport_class() + adc.assert_called_once() + + +def test_transport_grpc_default(): + # A client should use the gRPC transport by default. + client = BigQueryWriteClient(credentials=credentials.AnonymousCredentials(),) + assert isinstance(client.transport, transports.BigQueryWriteGrpcTransport,) + + +def test_big_query_write_base_transport_error(): + # Passing both a credentials object and credentials_file should raise an error + with pytest.raises(exceptions.DuplicateCredentialArgs): + transport = transports.BigQueryWriteTransport( + credentials=credentials.AnonymousCredentials(), + credentials_file="credentials.json", + ) + + +def test_big_query_write_base_transport(): + # Instantiate the base transport. + with mock.patch( + "google.cloud.bigquery_storage_v1beta2.services.big_query_write.transports.BigQueryWriteTransport.__init__" + ) as Transport: + Transport.return_value = None + transport = transports.BigQueryWriteTransport( + credentials=credentials.AnonymousCredentials(), + ) + + # Every method on the transport should just blindly + # raise NotImplementedError. + methods = ( + "create_write_stream", + "append_rows", + "get_write_stream", + "finalize_write_stream", + "batch_commit_write_streams", + "flush_rows", + ) + for method in methods: + with pytest.raises(NotImplementedError): + getattr(transport, method)(request=object()) + + +def test_big_query_write_base_transport_with_credentials_file(): + # Instantiate the base transport with a credentials file + with mock.patch.object( + auth, "load_credentials_from_file" + ) as load_creds, mock.patch( + "google.cloud.bigquery_storage_v1beta2.services.big_query_write.transports.BigQueryWriteTransport._prep_wrapped_messages" + ) as Transport: + Transport.return_value = None + load_creds.return_value = (credentials.AnonymousCredentials(), None) + transport = transports.BigQueryWriteTransport( + credentials_file="credentials.json", quota_project_id="octopus", + ) + load_creds.assert_called_once_with( + "credentials.json", + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.insertdata", + "https://www.googleapis.com/auth/cloud-platform", + ), + quota_project_id="octopus", + ) + + +def test_big_query_write_base_transport_with_adc(): + # Test the default credentials are used if credentials and credentials_file are None. + with mock.patch.object(auth, "default") as adc, mock.patch( + "google.cloud.bigquery_storage_v1beta2.services.big_query_write.transports.BigQueryWriteTransport._prep_wrapped_messages" + ) as Transport: + Transport.return_value = None + adc.return_value = (credentials.AnonymousCredentials(), None) + transport = transports.BigQueryWriteTransport() + adc.assert_called_once() + + +def test_big_query_write_auth_adc(): + # If no credentials are provided, we should use ADC credentials. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + BigQueryWriteClient() + adc.assert_called_once_with( + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.insertdata", + "https://www.googleapis.com/auth/cloud-platform", + ), + quota_project_id=None, + ) + + +def test_big_query_write_transport_auth_adc(): + # If credentials and host are not provided, the transport class should use + # ADC credentials. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + transports.BigQueryWriteGrpcTransport( + host="squid.clam.whelk", quota_project_id="octopus" + ) + adc.assert_called_once_with( + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.insertdata", + "https://www.googleapis.com/auth/cloud-platform", + ), + quota_project_id="octopus", + ) + + +def test_big_query_write_host_no_port(): + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="bigquerystorage.googleapis.com" + ), + ) + assert client.transport._host == "bigquerystorage.googleapis.com:443" + + +def test_big_query_write_host_with_port(): + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="bigquerystorage.googleapis.com:8000" + ), + ) + assert client.transport._host == "bigquerystorage.googleapis.com:8000" + + +def test_big_query_write_grpc_transport_channel(): + channel = grpc.secure_channel("http://localhost/", grpc.local_channel_credentials()) + + # Check that channel is used if provided. + transport = transports.BigQueryWriteGrpcTransport( + host="squid.clam.whelk", channel=channel, + ) + assert transport.grpc_channel == channel + assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None + + +def test_big_query_write_grpc_asyncio_transport_channel(): + channel = aio.secure_channel("http://localhost/", grpc.local_channel_credentials()) + + # Check that channel is used if provided. + transport = transports.BigQueryWriteGrpcAsyncIOTransport( + host="squid.clam.whelk", channel=channel, + ) + assert transport.grpc_channel == channel + assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None + + +@pytest.mark.parametrize( + "transport_class", + [ + transports.BigQueryWriteGrpcTransport, + transports.BigQueryWriteGrpcAsyncIOTransport, + ], +) +def test_big_query_write_transport_channel_mtls_with_client_cert_source( + transport_class, +): + with mock.patch( + "grpc.ssl_channel_credentials", autospec=True + ) as grpc_ssl_channel_cred: + with mock.patch.object( + transport_class, "create_channel" + ) as grpc_create_channel: + mock_ssl_cred = mock.Mock() + grpc_ssl_channel_cred.return_value = mock_ssl_cred + + mock_grpc_channel = mock.Mock() + grpc_create_channel.return_value = mock_grpc_channel + + cred = credentials.AnonymousCredentials() + with pytest.warns(DeprecationWarning): + with mock.patch.object(auth, "default") as adc: + adc.return_value = (cred, None) + transport = transport_class( + host="squid.clam.whelk", + api_mtls_endpoint="mtls.squid.clam.whelk", + client_cert_source=client_cert_source_callback, + ) + adc.assert_called_once() + + grpc_ssl_channel_cred.assert_called_once_with( + certificate_chain=b"cert bytes", private_key=b"key bytes" + ) + grpc_create_channel.assert_called_once_with( + "mtls.squid.clam.whelk:443", + credentials=cred, + credentials_file=None, + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.insertdata", + "https://www.googleapis.com/auth/cloud-platform", + ), + ssl_credentials=mock_ssl_cred, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + assert transport.grpc_channel == mock_grpc_channel + assert transport._ssl_channel_credentials == mock_ssl_cred + + +@pytest.mark.parametrize( + "transport_class", + [ + transports.BigQueryWriteGrpcTransport, + transports.BigQueryWriteGrpcAsyncIOTransport, + ], +) +def test_big_query_write_transport_channel_mtls_with_adc(transport_class): + mock_ssl_cred = mock.Mock() + with mock.patch.multiple( + "google.auth.transport.grpc.SslCredentials", + __init__=mock.Mock(return_value=None), + ssl_credentials=mock.PropertyMock(return_value=mock_ssl_cred), + ): + with mock.patch.object( + transport_class, "create_channel" + ) as grpc_create_channel: + mock_grpc_channel = mock.Mock() + grpc_create_channel.return_value = mock_grpc_channel + mock_cred = mock.Mock() + + with pytest.warns(DeprecationWarning): + transport = transport_class( + host="squid.clam.whelk", + credentials=mock_cred, + api_mtls_endpoint="mtls.squid.clam.whelk", + client_cert_source=None, + ) + + grpc_create_channel.assert_called_once_with( + "mtls.squid.clam.whelk:443", + credentials=mock_cred, + credentials_file=None, + scopes=( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/bigquery.insertdata", + "https://www.googleapis.com/auth/cloud-platform", + ), + ssl_credentials=mock_ssl_cred, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + assert transport.grpc_channel == mock_grpc_channel + + +def test_table_path(): + project = "squid" + dataset = "clam" + table = "whelk" + + expected = "projects/{project}/datasets/{dataset}/tables/{table}".format( + project=project, dataset=dataset, table=table, + ) + actual = BigQueryWriteClient.table_path(project, dataset, table) + assert expected == actual + + +def test_parse_table_path(): + expected = { + "project": "octopus", + "dataset": "oyster", + "table": "nudibranch", + } + path = BigQueryWriteClient.table_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryWriteClient.parse_table_path(path) + assert expected == actual + + +def test_write_stream_path(): + project = "cuttlefish" + dataset = "mussel" + table = "winkle" + stream = "nautilus" + + expected = "projects/{project}/datasets/{dataset}/tables/{table}/streams/{stream}".format( + project=project, dataset=dataset, table=table, stream=stream, + ) + actual = BigQueryWriteClient.write_stream_path(project, dataset, table, stream) + assert expected == actual + + +def test_parse_write_stream_path(): + expected = { + "project": "scallop", + "dataset": "abalone", + "table": "squid", + "stream": "clam", + } + path = BigQueryWriteClient.write_stream_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryWriteClient.parse_write_stream_path(path) + assert expected == actual + + +def test_common_billing_account_path(): + billing_account = "whelk" + + expected = "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + actual = BigQueryWriteClient.common_billing_account_path(billing_account) + assert expected == actual + + +def test_parse_common_billing_account_path(): + expected = { + "billing_account": "octopus", + } + path = BigQueryWriteClient.common_billing_account_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryWriteClient.parse_common_billing_account_path(path) + assert expected == actual + + +def test_common_folder_path(): + folder = "oyster" + + expected = "folders/{folder}".format(folder=folder,) + actual = BigQueryWriteClient.common_folder_path(folder) + assert expected == actual + + +def test_parse_common_folder_path(): + expected = { + "folder": "nudibranch", + } + path = BigQueryWriteClient.common_folder_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryWriteClient.parse_common_folder_path(path) + assert expected == actual + + +def test_common_organization_path(): + organization = "cuttlefish" + + expected = "organizations/{organization}".format(organization=organization,) + actual = BigQueryWriteClient.common_organization_path(organization) + assert expected == actual + + +def test_parse_common_organization_path(): + expected = { + "organization": "mussel", + } + path = BigQueryWriteClient.common_organization_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryWriteClient.parse_common_organization_path(path) + assert expected == actual + + +def test_common_project_path(): + project = "winkle" + + expected = "projects/{project}".format(project=project,) + actual = BigQueryWriteClient.common_project_path(project) + assert expected == actual + + +def test_parse_common_project_path(): + expected = { + "project": "nautilus", + } + path = BigQueryWriteClient.common_project_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryWriteClient.parse_common_project_path(path) + assert expected == actual + + +def test_common_location_path(): + project = "scallop" + location = "abalone" + + expected = "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + actual = BigQueryWriteClient.common_location_path(project, location) + assert expected == actual + + +def test_parse_common_location_path(): + expected = { + "project": "squid", + "location": "clam", + } + path = BigQueryWriteClient.common_location_path(**expected) + + # Check that the path construction is reversible. + actual = BigQueryWriteClient.parse_common_location_path(path) + assert expected == actual + + +def test_client_withDEFAULT_CLIENT_INFO(): + client_info = gapic_v1.client_info.ClientInfo() + + with mock.patch.object( + transports.BigQueryWriteTransport, "_prep_wrapped_messages" + ) as prep: + client = BigQueryWriteClient( + credentials=credentials.AnonymousCredentials(), client_info=client_info, + ) + prep.assert_called_once_with(client_info) + + with mock.patch.object( + transports.BigQueryWriteTransport, "_prep_wrapped_messages" + ) as prep: + transport_class = BigQueryWriteClient.get_transport_class() + transport = transport_class( + credentials=credentials.AnonymousCredentials(), client_info=client_info, + ) + prep.assert_called_once_with(client_info) From 35186247018b0c93a4af1fcde52fa739efa803c4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 13 Jan 2021 15:33:59 -0600 Subject: [PATCH 19/22] docs: request only a single stream in dataframe example (#114) * docs: request only a single stream in dataframe example * blacken --- samples/to_dataframe/main_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/to_dataframe/main_test.py b/samples/to_dataframe/main_test.py index 46820578..bda6d601 100644 --- a/samples/to_dataframe/main_test.py +++ b/samples/to_dataframe/main_test.py @@ -124,7 +124,7 @@ def test_session_to_dataframe(capsys, clients): read_options=read_options, ) read_session = bqstorageclient.create_read_session( - parent=parent, read_session=requested_session + parent=parent, read_session=requested_session, max_stream_count=1, ) # This example reads from only a single stream. Read from multiple streams From d9691f1714bf34b3119d4e457293a723c2fb9120 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 14 Jan 2021 10:18:03 -0600 Subject: [PATCH 20/22] docs: add note about Arrow blocks to README (#73) Suggest Arrow as prefered data format when downloading to a pandas dataframe, as this is several times faster. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-storage/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- README.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index ad8a5e99..27d3f4fd 100644 --- a/README.rst +++ b/README.rst @@ -86,15 +86,21 @@ Optional Dependencies Several features of ``google-cloud-bigquery-storage`` require additional dependencies. +* Parse Arrow blocks in a ``read_rows()`` stream using `pyarrow + `_. + + ``pip install 'google-cloud-bigquery-storage[pyarrow]'`` + + * Parse Avro blocks in a ``read_rows()`` stream using `fastavro `_. ``pip install google-cloud-bigquery-storage[fastavro]`` -* Write rows to a `pandas `_ +* Download rows to a `pandas `_ dataframe. - ``pip install google-cloud-bigquery-storage[pandas,fastavro]`` + ``pip install 'google-cloud-bigquery-storage[pandas,pyarrow]'`` Next Steps ~~~~~~~~~~ From 798cd341fbe0734f99b9c2ac3c50ae09886d1c90 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 22 Jan 2021 15:08:22 -0600 Subject: [PATCH 21/22] feat: add manual wrapper for v1beta2 read client (#117) * feat: add manual wrapper for v1beta2 read client * add missing v1beta2 * sort versions so v1 is last * tests: unify v1 and v1beta2 system tests * tests: use proto object for DataFormat checks * blacken * docs: add handwritten class to docs --- docs/bigquery_storage_v1beta2/library.rst | 6 + docs/index.rst | 1 + .../bigquery_storage_v1beta2/__init__.py | 39 +++++ .../cloud/bigquery_storage_v1beta2/client.py | 137 +++++++++++++++++ synth.py | 2 +- tests/system/{v1 => }/conftest.py | 9 +- tests/system/reader/conftest.py | 38 +++++ .../test_reader.py} | 142 +++++++++--------- .../test_reader_dataframe.py} | 12 +- 9 files changed, 303 insertions(+), 83 deletions(-) create mode 100644 docs/bigquery_storage_v1beta2/library.rst create mode 100644 google/cloud/bigquery_storage_v1beta2/__init__.py create mode 100644 google/cloud/bigquery_storage_v1beta2/client.py rename tests/system/{v1 => }/conftest.py (97%) create mode 100644 tests/system/reader/conftest.py rename tests/system/{v1/test_reader_v1.py => reader/test_reader.py} (78%) rename tests/system/{v1/test_reader_dataframe_v1.py => reader/test_reader_dataframe.py} (91%) diff --git a/docs/bigquery_storage_v1beta2/library.rst b/docs/bigquery_storage_v1beta2/library.rst new file mode 100644 index 00000000..4e25d9d8 --- /dev/null +++ b/docs/bigquery_storage_v1beta2/library.rst @@ -0,0 +1,6 @@ +Bigquery Storage v1beta2 API Library +==================================== + +.. automodule:: google.cloud.bigquery_storage_v1beta2.client + :members: + :inherited-members: diff --git a/docs/index.rst b/docs/index.rst index 6892b30c..802cdca2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -21,6 +21,7 @@ API Reference bigquery_storage_v1/library bigquery_storage_v1/services bigquery_storage_v1/types + bigquery_storage_v1beta2/library bigquery_storage_v1beta2/services bigquery_storage_v1beta2/types diff --git a/google/cloud/bigquery_storage_v1beta2/__init__.py b/google/cloud/bigquery_storage_v1beta2/__init__.py new file mode 100644 index 00000000..6d0b34e1 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/__init__.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import pkg_resources + +__version__ = pkg_resources.get_distribution( + "google-cloud-bigquery-storage" +).version # noqa + +from google.cloud.bigquery_storage_v1beta2 import client +from google.cloud.bigquery_storage_v1beta2 import types + + +class BigQueryReadClient(client.BigQueryReadClient): + __doc__ = client.BigQueryReadClient.__doc__ + + +__all__ = ( + # google.cloud.bigquery_storage_v1beta2 + "__version__", + "types", + # google.cloud.bigquery_storage_v1beta2.client + "BigQueryReadClient", +) diff --git a/google/cloud/bigquery_storage_v1beta2/client.py b/google/cloud/bigquery_storage_v1beta2/client.py new file mode 100644 index 00000000..f2776a20 --- /dev/null +++ b/google/cloud/bigquery_storage_v1beta2/client.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parent client for calling the Cloud BigQuery Storage API. + +This is the base from which all interactions with the API occur. +""" + +from __future__ import absolute_import + +import google.api_core.gapic_v1.method + +from google.cloud.bigquery_storage_v1 import reader +from google.cloud.bigquery_storage_v1beta2.services import big_query_read + + +_SCOPES = ( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/cloud-platform", +) + + +class BigQueryReadClient(big_query_read.BigQueryReadClient): + """Client for interacting with BigQuery Storage API. + + The BigQuery storage API can be used to read data stored in BigQuery. + """ + + def read_rows( + self, + name, + offset=0, + retry=google.api_core.gapic_v1.method.DEFAULT, + timeout=google.api_core.gapic_v1.method.DEFAULT, + metadata=(), + ): + """ + Reads rows from the table in the format prescribed by the read + session. Each response contains one or more table rows, up to a + maximum of 10 MiB per response; read requests which attempt to read + individual rows larger than this will fail. + + Each request also returns a set of stream statistics reflecting the + estimated total number of rows in the read stream. This number is + computed based on the total table size and the number of active + streams in the read session, and may change as other streams continue + to read data. + + Example: + >>> from google.cloud import bigquery_storage + >>> + >>> client = bigquery_storage.BigQueryReadClient() + >>> + >>> # TODO: Initialize ``table``: + >>> table = "projects/{}/datasets/{}/tables/{}".format( + ... 'project_id': 'your-data-project-id', + ... 'dataset_id': 'your_dataset_id', + ... 'table_id': 'your_table_id', + ... ) + >>> + >>> # TODO: Initialize `parent`: + >>> parent = 'projects/your-billing-project-id' + >>> + >>> requested_session = bigquery_storage.types.ReadSession( + ... table=table, + ... data_format=bigquery_storage.types.DataFormat.AVRO, + ... ) + >>> session = client.create_read_session( + ... parent=parent, read_session=requested_session + ... ) + >>> + >>> stream = session.streams[0], # TODO: Also read any other streams. + >>> read_rows_stream = client.read_rows(stream.name) + >>> + >>> for element in read_rows_stream.rows(session): + ... # process element + ... pass + + Args: + name (str): + Required. Name of the stream to start + reading from, of the form + `projects/{project_id}/locations/{location}/sessions/{session_id}/streams/{stream_id}` + offset (Optional[int]): + The starting offset from which to begin reading rows from + in the stream. The offset requested must be less than the last + row read from ReadRows. Requesting a larger offset is + undefined. + retry (Optional[google.api_core.retry.Retry]): A retry object used + to retry requests. If ``None`` is specified, requests will not + be retried. + timeout (Optional[float]): The amount of time, in seconds, to wait + for the request to complete. Note that if ``retry`` is + specified, the timeout applies to each individual attempt. + metadata (Optional[Sequence[Tuple[str, str]]]): Additional metadata + that is provided to the method. + + Returns: + ~google.cloud.bigquery_storage_v1.reader.ReadRowsStream: + An iterable of + :class:`~google.cloud.bigquery_storage_v1.types.ReadRowsResponse`. + + Raises: + google.api_core.exceptions.GoogleAPICallError: If the request + failed for any reason. + google.api_core.exceptions.RetryError: If the request failed due + to a retryable error and retry attempts failed. + ValueError: If the parameters are invalid. + """ + gapic_client = super(BigQueryReadClient, self) + stream = gapic_client.read_rows( + read_stream=name, + offset=offset, + retry=retry, + timeout=timeout, + metadata=metadata, + ) + return reader.ReadRowsStream( + stream, + gapic_client, + name, + offset, + {"retry": retry, "timeout": timeout, "metadata": metadata}, + ) diff --git a/synth.py b/synth.py index 21fb8b3a..f9abef9e 100644 --- a/synth.py +++ b/synth.py @@ -22,7 +22,7 @@ gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() -versions = ["v1"] +versions = ["v1beta2", "v1"] for version in versions: library = gapic.py_library( diff --git a/tests/system/v1/conftest.py b/tests/system/conftest.py similarity index 97% rename from tests/system/v1/conftest.py rename to tests/system/conftest.py index f8ac01f5..dd42e736 100644 --- a/tests/system/v1/conftest.py +++ b/tests/system/conftest.py @@ -20,11 +20,9 @@ import pytest -from google.cloud import bigquery_storage - _TABLE_FORMAT = "projects/{}/datasets/{}/tables/{}" -_ASSETS_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../assets") +_ASSETS_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "assets") @pytest.fixture(scope="session") @@ -52,11 +50,6 @@ def credentials(use_mtls): return service_account.Credentials.from_service_account_file(filename) -@pytest.fixture(scope="session") -def client(credentials): - return bigquery_storage.BigQueryReadClient(credentials=credentials) - - @pytest.fixture() def table_reference(): return _TABLE_FORMAT.format("bigquery-public-data", "usa_names", "usa_1910_2013") diff --git a/tests/system/reader/conftest.py b/tests/system/reader/conftest.py new file mode 100644 index 00000000..c27bd771 --- /dev/null +++ b/tests/system/reader/conftest.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""System tests for reading rows from tables.""" + +import pytest + +from google.cloud import bigquery_storage +from google.cloud import bigquery_storage_v1beta2 + + +@pytest.fixture(scope="session") +def client_v1(credentials): + return bigquery_storage.BigQueryReadClient(credentials=credentials) + + +@pytest.fixture(scope="session") +def client_v1beta2(credentials): + return bigquery_storage_v1beta2.BigQueryReadClient(credentials=credentials) + + +@pytest.fixture(scope="session", params=["v1", "v1beta2"]) +def client_and_types(request, client_v1, client_v1beta2): + if request.param == "v1": + return client_v1, bigquery_storage.types + return client_v1beta2, bigquery_storage_v1beta2.types diff --git a/tests/system/v1/test_reader_v1.py b/tests/system/reader/test_reader.py similarity index 78% rename from tests/system/v1/test_reader_v1.py rename to tests/system/reader/test_reader.py index a77b65f7..d0328041 100644 --- a/tests/system/v1/test_reader_v1.py +++ b/tests/system/reader/test_reader.py @@ -24,7 +24,6 @@ import pytz from google.cloud import bigquery -from google.cloud.bigquery_storage import types def _to_bq_table_ref(table_name_string, partition_suffix=""): @@ -54,12 +53,16 @@ def _to_bq_table_ref(table_name_string, partition_suffix=""): @pytest.mark.parametrize( "data_format,expected_schema_type", - ((types.DataFormat.AVRO, "avro_schema"), (types.DataFormat.ARROW, "arrow_schema")), + (("AVRO", "avro_schema"), ("ARROW", "arrow_schema")), ) def test_read_rows_as_blocks_full_table( - client, project_id, small_table_reference, data_format, expected_schema_type + client_and_types, + project_id, + small_table_reference, + data_format, + expected_schema_type, ): - + client, types = client_and_types read_session = types.ReadSession() read_session.table = small_table_reference read_session.data_format = data_format @@ -81,14 +84,11 @@ def test_read_rows_as_blocks_full_table( assert len(blocks) > 0 -@pytest.mark.parametrize( - "data_format,expected_schema_type", - ((types.DataFormat.AVRO, "avro_schema"), (types.DataFormat.ARROW, "arrow_schema")), -) +@pytest.mark.parametrize("data_format", ("AVRO", "ARROW")) def test_read_rows_as_rows_full_table( - client, project_id, small_table_reference, data_format, expected_schema_type + client_and_types, project_id, small_table_reference, data_format ): - + client, types = client_and_types read_session = types.ReadSession() read_session.table = small_table_reference read_session.data_format = data_format @@ -107,10 +107,11 @@ def test_read_rows_as_rows_full_table( assert len(rows) > 0 -@pytest.mark.parametrize( - "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) -) -def test_basic_nonfiltered_read(client, project_id, table_with_data_ref, data_format): +@pytest.mark.parametrize("data_format", ("AVRO", "ARROW")) +def test_basic_nonfiltered_read( + client_and_types, project_id, table_with_data_ref, data_format +): + client, types = client_and_types read_session = types.ReadSession() read_session.table = table_with_data_ref read_session.data_format = data_format @@ -129,7 +130,8 @@ def test_basic_nonfiltered_read(client, project_id, table_with_data_ref, data_fo assert len(rows) == 5 # all table rows -def test_filtered_rows_read(client, project_id, table_with_data_ref): +def test_filtered_rows_read(client_and_types, project_id, table_with_data_ref): + client, types = client_and_types read_session = types.ReadSession() read_session.table = table_with_data_ref read_session.data_format = types.DataFormat.AVRO @@ -149,11 +151,11 @@ def test_filtered_rows_read(client, project_id, table_with_data_ref): assert len(rows) == 2 -@pytest.mark.parametrize( - "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) -) -def test_column_selection_read(client, project_id, table_with_data_ref, data_format): - +@pytest.mark.parametrize("data_format", ("AVRO", "ARROW")) +def test_column_selection_read( + client_and_types, project_id, table_with_data_ref, data_format +): + client, types = client_and_types read_session = types.ReadSession() read_session.table = table_with_data_ref read_session.data_format = data_format @@ -175,14 +177,14 @@ def test_column_selection_read(client, project_id, table_with_data_ref, data_for assert sorted(row.keys()) == ["age", "first_name"] -def test_snapshot(client, project_id, table_with_data_ref, bq_client): - before_new_data = types.Timestamp() - before_new_data.GetCurrentTime() +def test_snapshot(client_and_types, project_id, table_with_data_ref, bq_client): + client, types = client_and_types + before_new_data = dt.datetime.now(tz=dt.timezone.utc) # load additional data into the table new_data = [ - {u"first_name": u"NewGuyFoo", u"last_name": u"Smith", u"age": 46}, - {u"first_name": u"NewGuyBar", u"last_name": u"Jones", u"age": 30}, + {"first_name": "NewGuyFoo", "last_name": "Smith", "age": 46}, + {"first_name": "NewGuyBar", "last_name": "Jones", "age": 30}, ] destination = _to_bq_table_ref(table_with_data_ref) @@ -214,8 +216,9 @@ def test_snapshot(client, project_id, table_with_data_ref, bq_client): def test_column_partitioned_table( - client, project_id, col_partition_table_ref, bq_client + client_and_types, project_id, col_partition_table_ref, bq_client ): + client, types = client_and_types data = [ {"description": "Tracking established.", "occurred": "2017-02-15"}, {"description": "Look, a solar eclipse!", "occurred": "2018-02-15"}, @@ -256,12 +259,11 @@ def test_column_partitioned_table( assert row["description"] in expected_descriptions -@pytest.mark.parametrize( - "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) -) +@pytest.mark.parametrize("data_format", ("AVRO", "ARROW")) def test_ingestion_time_partitioned_table( - client, project_id, ingest_partition_table_ref, bq_client, data_format + client_and_types, project_id, ingest_partition_table_ref, bq_client, data_format ): + client, types = client_and_types data = [{"shape": "cigar", "altitude": 1200}, {"shape": "disc", "altitude": 750}] destination = _to_bq_table_ref( ingest_partition_table_ref, partition_suffix="$20190809" @@ -306,37 +308,38 @@ def test_ingestion_time_partitioned_table( rows = list(client.read_rows(stream).rows(session)) assert len(rows) == 2 + data_format = getattr(types.DataFormat, data_format) if data_format == types.DataFormat.AVRO: actual_items = {(row["shape"], row["altitude"]) for row in rows} - else: - assert data_format == types.DataFormat.ARROW + elif data_format == types.DataFormat.ARROW: actual_items = {(row["shape"].as_py(), row["altitude"].as_py()) for row in rows} + else: + raise AssertionError(f"got unexpected data_format: {data_format}") expected_items = {("sphere", 3500), ("doughnut", 100)} assert actual_items == expected_items -@pytest.mark.parametrize( - "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) -) +@pytest.mark.parametrize("data_format", ("AVRO", "ARROW")) def test_decoding_data_types( - client, project_id, all_types_table_ref, bq_client, data_format + client_and_types, project_id, all_types_table_ref, bq_client, data_format ): + client, types = client_and_types data = [ { - u"string_field": u"Price: € 9.95.", - u"bytes_field": bigquery._helpers._bytes_to_json(b"byteees"), - u"int64_field": -1085, - u"float64_field": -42.195, - u"numeric_field": "1.4142", - u"bool_field": True, - u"geography_field": '{"type": "Point", "coordinates": [-49.3028, 69.0622]}', - u"person_struct_field": {u"name": u"John", u"age": 42}, - u"timestamp_field": 1565357902.017896, # 2019-08-09T13:38:22.017896 - u"date_field": u"1995-03-17", - u"time_field": u"16:24:51", - u"datetime_field": u"2005-10-26T19:49:41", - u"string_array_field": [u"foo", u"bar", u"baz"], + "string_field": "Price: € 9.95.", + "bytes_field": bigquery._helpers._bytes_to_json(b"byteees"), + "int64_field": -1085, + "float64_field": -42.195, + "numeric_field": "1.4142", + "bool_field": True, + "geography_field": '{"type": "Point", "coordinates": [-49.3028, 69.0622]}', + "person_struct_field": {"name": "John", "age": 42}, + "timestamp_field": 1565357902.017896, # 2019-08-09T13:38:22.017896 + "date_field": "1995-03-17", + "time_field": "16:24:51", + "datetime_field": "2005-10-26T19:49:41", + "string_array_field": ["foo", "bar", "baz"], } ] @@ -386,28 +389,30 @@ def test_decoding_data_types( stream = session.streams[0].name + data_format = getattr(types.DataFormat, data_format) if data_format == types.DataFormat.AVRO: rows = list(client.read_rows(stream).rows(session)) - else: - assert data_format == types.DataFormat.ARROW + elif data_format == types.DataFormat.ARROW: rows = list( dict((key, value.as_py()) for key, value in row_dict.items()) for row_dict in client.read_rows(stream).rows(session) ) + else: + raise AssertionError(f"got unexpected data_format: {data_format}") expected_result = { - u"string_field": u"Price: € 9.95.", - u"bytes_field": b"byteees", - u"int64_field": -1085, - u"float64_field": -42.195, - u"numeric_field": decimal.Decimal("1.4142"), - u"bool_field": True, - u"geography_field": "POINT(-49.3028 69.0622)", - u"person_struct_field": {u"name": u"John", u"age": 42}, - u"timestamp_field": dt.datetime(2019, 8, 9, 13, 38, 22, 17896, tzinfo=pytz.UTC), - u"date_field": dt.date(1995, 3, 17), - u"time_field": dt.time(16, 24, 51), - u"string_array_field": [u"foo", u"bar", u"baz"], + "string_field": "Price: € 9.95.", + "bytes_field": b"byteees", + "int64_field": -1085, + "float64_field": -42.195, + "numeric_field": decimal.Decimal("1.4142"), + "bool_field": True, + "geography_field": "POINT(-49.3028 69.0622)", + "person_struct_field": {"name": "John", "age": 42}, + "timestamp_field": dt.datetime(2019, 8, 9, 13, 38, 22, 17896, tzinfo=pytz.UTC), + "date_field": dt.date(1995, 3, 17), + "time_field": dt.time(16, 24, 51), + "string_array_field": ["foo", "bar", "baz"], } result_copy = copy.copy(rows[0]) @@ -421,13 +426,11 @@ def test_decoding_data_types( assert expected_pattern.match(str(rows[0]["datetime_field"])) -@pytest.mark.parametrize( - "data_format", ((types.DataFormat.AVRO), (types.DataFormat.ARROW)) -) +@pytest.mark.parametrize("data_format", ("AVRO", "ARROW")) def test_resuming_read_from_offset( - client, project_id, data_format, local_shakespeare_table_reference + client_and_types, project_id, data_format, local_shakespeare_table_reference ): - + client, types = client_and_types read_session = types.ReadSession() read_session.table = local_shakespeare_table_reference read_session.data_format = data_format @@ -463,9 +466,10 @@ def test_resuming_read_from_offset( assert actual_len == expected_len -def test_read_rows_to_dataframe_with_wide_table(client, project_id): +def test_read_rows_to_dataframe_with_wide_table(client_and_types, project_id): # Use a wide table to boost the chance of getting a large message size. # https://github.com/googleapis/python-bigquery-storage/issues/78 + client, types = client_and_types read_session = types.ReadSession() read_session.table = "projects/{}/datasets/{}/tables/{}".format( "bigquery-public-data", "geo_census_tracts", "us_census_tracts_national" diff --git a/tests/system/v1/test_reader_dataframe_v1.py b/tests/system/reader/test_reader_dataframe.py similarity index 91% rename from tests/system/v1/test_reader_dataframe_v1.py rename to tests/system/reader/test_reader_dataframe.py index ce1a46ee..7defe888 100644 --- a/tests/system/v1/test_reader_dataframe_v1.py +++ b/tests/system/reader/test_reader_dataframe.py @@ -19,10 +19,9 @@ import pyarrow.types import pytest -from google.cloud.bigquery_storage import types - -def test_read_v1(client, project_id): +def test_read_rows_to_arrow(client_and_types, project_id): + client, types = client_and_types read_session = types.ReadSession() read_session.table = "projects/{}/datasets/{}/tables/{}".format( "bigquery-public-data", "new_york_citibike", "citibike_stations" @@ -60,9 +59,12 @@ def test_read_v1(client, project_id): @pytest.mark.parametrize( "data_format,expected_schema_type", - ((types.DataFormat.AVRO, "avro_schema"), (types.DataFormat.ARROW, "arrow_schema")), + (("AVRO", "avro_schema"), ("ARROW", "arrow_schema")), ) -def test_read_rows_to_dataframe(client, project_id, data_format, expected_schema_type): +def test_read_rows_to_dataframe( + client_and_types, project_id, data_format, expected_schema_type +): + client, types = client_and_types read_session = types.ReadSession() read_session.table = "projects/{}/datasets/{}/tables/{}".format( "bigquery-public-data", "new_york_citibike", "citibike_stations" From 9ad82d14893dafcb558b28902356aef772264274 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 25 Jan 2021 09:17:33 -0600 Subject: [PATCH 22/22] chore: release 2.2.0 (#116) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 19 +++++++++++++++++++ setup.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de1c6918..7f95378f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery-storage/#history +## [2.2.0](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.1.0...v2.2.0) (2021-01-22) + + +### Features + +* add clients for v1beta2 endpoint ([#113](https://www.github.com/googleapis/python-bigquery-storage/issues/113)) ([e5f6198](https://www.github.com/googleapis/python-bigquery-storage/commit/e5f6198262cf9a593c62219cf5f6632c5a2a811e)) +* add manual wrapper for v1beta2 read client ([#117](https://www.github.com/googleapis/python-bigquery-storage/issues/117)) ([798cd34](https://www.github.com/googleapis/python-bigquery-storage/commit/798cd341fbe0734f99b9c2ac3c50ae09886d1c90)) + + +### Bug Fixes + +* skip some system tests for mtls testing ([#106](https://www.github.com/googleapis/python-bigquery-storage/issues/106)) ([89ba292](https://www.github.com/googleapis/python-bigquery-storage/commit/89ba292281970cbdee5bb43b45a9dac69e29ff0a)) + + +### Documentation + +* add note about Arrow blocks to README ([#73](https://www.github.com/googleapis/python-bigquery-storage/issues/73)) ([d9691f1](https://www.github.com/googleapis/python-bigquery-storage/commit/d9691f1714bf34b3119d4e457293a723c2fb9120)) +* request only a single stream in dataframe example ([#114](https://www.github.com/googleapis/python-bigquery-storage/issues/114)) ([3518624](https://www.github.com/googleapis/python-bigquery-storage/commit/35186247018b0c93a4af1fcde52fa739efa803c4)) + ## [2.1.0](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.0.1...v2.1.0) (2020-11-04) diff --git a/setup.py b/setup.py index 1ccf3b83..d8a8fd37 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ name = "google-cloud-bigquery-storage" description = "BigQuery Storage API API client library" -version = "2.1.0" +version = "2.2.0" release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "google-api-core[grpc] >= 1.22.2, < 2.0.0dev",