Skip to content

Commit 2ab105b

Browse files
emar-karGurov Ilya
authored andcommitted
BigQuery: Add support to Dataset for project_ids with org prefix. (#8877)
1 parent fcf99ce commit 2ab105b

2 files changed

Lines changed: 36 additions & 5 deletions

File tree

bigquery/google/cloud/bigquery/dataset.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import six
2020
import copy
21+
import re
2122

2223
import google.cloud._helpers
2324
from google.cloud.bigquery import _helpers
@@ -26,6 +27,14 @@
2627
from google.cloud.bigquery.table import TableReference
2728

2829

30+
_PROJECT_PREFIX_PATTERN = re.compile(
31+
r"""
32+
(?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)$
33+
""",
34+
re.VERBOSE,
35+
)
36+
37+
2938
def _get_table_reference(self, table_id):
3039
"""Constructs a TableReference.
3140
@@ -269,7 +278,7 @@ def from_string(cls, dataset_id, default_project=None):
269278
Args:
270279
dataset_id (str):
271280
A dataset ID in standard SQL format. If ``default_project``
272-
is not specified, this must included both the project ID and
281+
is not specified, this must include both the project ID and
273282
the dataset ID, separated by ``.``.
274283
default_project (str):
275284
Optional. The project ID to use when ``dataset_id`` does not
@@ -290,13 +299,19 @@ def from_string(cls, dataset_id, default_project=None):
290299
"""
291300
output_dataset_id = dataset_id
292301
output_project_id = default_project
293-
parts = dataset_id.split(".")
302+
with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id)
303+
if with_prefix is None:
304+
parts = dataset_id.split(".")
305+
else:
306+
project_id = with_prefix.group("project_id")
307+
dataset_id = with_prefix.group("dataset_id")
308+
parts = [project_id, dataset_id]
294309

295310
if len(parts) == 1 and not default_project:
296311
raise ValueError(
297312
"When default_project is not set, dataset_id must be a "
298-
"fully-qualified dataset ID in standard SQL format. "
299-
'e.g. "project.dataset_id", got {}'.format(dataset_id)
313+
"fully-qualified dataset ID in standard SQL format, "
314+
'e.g., "project.dataset_id" got {}'.format(dataset_id)
300315
)
301316
elif len(parts) == 2:
302317
output_project_id, output_dataset_id = parts
@@ -554,7 +569,7 @@ def from_string(cls, full_dataset_id):
554569
Args:
555570
full_dataset_id (str):
556571
A fully-qualified dataset ID in standard SQL format. Must
557-
included both the project ID and the dataset ID, separated by
572+
include both the project ID and the dataset ID, separated by
558573
``.``.
559574
560575
Returns:

bigquery/tests/unit/test_dataset.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,27 @@ def test_from_string(self):
186186
self.assertEqual(got.project, "string-project")
187187
self.assertEqual(got.dataset_id, "string_dataset")
188188

189+
def test_from_string_w_prefix(self):
190+
cls = self._get_target_class()
191+
got = cls.from_string("google.com:string-project.string_dataset")
192+
self.assertEqual(got.project, "google.com:string-project")
193+
self.assertEqual(got.dataset_id, "string_dataset")
194+
189195
def test_from_string_legacy_string(self):
190196
cls = self._get_target_class()
191197
with self.assertRaises(ValueError):
192198
cls.from_string("string-project:string_dataset")
193199

200+
def test_from_string_w_incorrect_prefix(self):
201+
cls = self._get_target_class()
202+
with self.assertRaises(ValueError):
203+
cls.from_string("google.com.string-project.dataset_id")
204+
205+
def test_from_string_w_prefix_and_too_many_parts(self):
206+
cls = self._get_target_class()
207+
with self.assertRaises(ValueError):
208+
cls.from_string("google.com:string-project.dataset_id.table_id")
209+
194210
def test_from_string_not_fully_qualified(self):
195211
cls = self._get_target_class()
196212
with self.assertRaises(ValueError):

0 commit comments

Comments
 (0)