BigQuery: Add support to Dataset for project_ids with org prefix. (#8877)

emar-kar · Gurov Ilya · commit 2ab105b5189b · 2019-08-22T17:41:57.000+03:00
diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
@@ -18,6 +18,7 @@
 
 import six
 import copy
+import re
 
 import google.cloud._helpers
 from google.cloud.bigquery import _helpers
@@ -26,6 +27,14 @@
 from google.cloud.bigquery.table import TableReference
 
 
+_PROJECT_PREFIX_PATTERN = re.compile(
+    r"""
+    (?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)$
+""",
+    re.VERBOSE,
+)
+
+
 def _get_table_reference(self, table_id):
     """Constructs a TableReference.
 
@@ -269,7 +278,7 @@ def from_string(cls, dataset_id, default_project=None):
         Args:
             dataset_id (str):
                 A dataset ID in standard SQL format. If ``default_project``
-                is not specified, this must included both the project ID and
+                is not specified, this must include both the project ID and
                 the dataset ID, separated by ``.``.
             default_project (str):
                 Optional. The project ID to use when ``dataset_id`` does not
@@ -290,13 +299,19 @@ def from_string(cls, dataset_id, default_project=None):
         """
         output_dataset_id = dataset_id
         output_project_id = default_project
-        parts = dataset_id.split(".")
+        with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id)
+        if with_prefix is None:
+            parts = dataset_id.split(".")
+        else:
+            project_id = with_prefix.group("project_id")
+            dataset_id = with_prefix.group("dataset_id")
+            parts = [project_id, dataset_id]
 
         if len(parts) == 1 and not default_project:
             raise ValueError(
                 "When default_project is not set, dataset_id must be a "
-                "fully-qualified dataset ID in standard SQL format. "
-                'e.g. "project.dataset_id", got {}'.format(dataset_id)
+                "fully-qualified dataset ID in standard SQL format, "
+                'e.g., "project.dataset_id" got {}'.format(dataset_id)
             )
         elif len(parts) == 2:
             output_project_id, output_dataset_id = parts
@@ -554,7 +569,7 @@ def from_string(cls, full_dataset_id):
         Args:
             full_dataset_id (str):
                 A fully-qualified dataset ID in standard SQL format. Must
-                included both the project ID and the dataset ID, separated by
+                include both the project ID and the dataset ID, separated by
                 ``.``.
 
         Returns:
diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py
@@ -186,11 +186,27 @@ def test_from_string(self):
         self.assertEqual(got.project, "string-project")
         self.assertEqual(got.dataset_id, "string_dataset")
 
+    def test_from_string_w_prefix(self):
+        cls = self._get_target_class()
+        got = cls.from_string("google.com:string-project.string_dataset")
+        self.assertEqual(got.project, "google.com:string-project")
+        self.assertEqual(got.dataset_id, "string_dataset")
+
     def test_from_string_legacy_string(self):
         cls = self._get_target_class()
         with self.assertRaises(ValueError):
             cls.from_string("string-project:string_dataset")
 
+    def test_from_string_w_incorrect_prefix(self):
+        cls = self._get_target_class()
+        with self.assertRaises(ValueError):
+            cls.from_string("google.com.string-project.dataset_id")
+
+    def test_from_string_w_prefix_and_too_many_parts(self):
+        cls = self._get_target_class()
+        with self.assertRaises(ValueError):
+            cls.from_string("google.com:string-project.dataset_id.table_id")
+
     def test_from_string_not_fully_qualified(self):
         cls = self._get_target_class()
         with self.assertRaises(ValueError):