From 74520c62aeebb25480b6f43a40d740ffde966271 Mon Sep 17 00:00:00 2001 From: David del Real Sifuentes Date: Tue, 16 Jun 2026 23:33:57 +0000 Subject: [PATCH 1/9] feat(dataplex): add global data quality scan sample - Add new `data_quality_scan_global.py` sample demonstrating how to create a global data quality scan - Add `requirements.txt` for the new sample --- dataplex/scan/data_quality_scan_global.py | 62 +++++++++++++++++++++++ dataplex/scan/requirements.txt | 1 + 2 files changed, 63 insertions(+) create mode 100644 dataplex/scan/data_quality_scan_global.py create mode 100644 dataplex/scan/requirements.txt diff --git a/dataplex/scan/data_quality_scan_global.py b/dataplex/scan/data_quality_scan_global.py new file mode 100644 index 0000000000..cd743a224e --- /dev/null +++ b/dataplex/scan/data_quality_scan_global.py @@ -0,0 +1,62 @@ +from google.cloud import dataplex_v1 +import google.api_core.exceptions + + +def create_data_quality_scan_global( + project_id: str, + dataset_id: str, + table_id: str, + location: str, + column_id_1: str, + column_id_2: str +) -> dataplex_v1.DataScan: + """Creates a Dataplex Data Quality Scan using global API endpoint routing. + + A bigquery table with at least 2 columns is expected. + """ + client = dataplex_v1.DataScanServiceClient() + + parent = client.common_location_path(project=project_id, location=location) + + bigquery_table = f"//bigquery.googleapis.com/projects/{project_id}/datasets/{dataset_id}/tables/{table_id}" + + data_quality_spec = dataplex_v1.DataQualitySpec( + rules=[ + dataplex_v1.DataQualityRule( + name="global-null-assertion", + dimension="COMPLETENESS", + description="Fails if any row contains a null value", + sql_assertion=dataplex_v1.DataQualityRule.SqlAssertion( + # Use ${data()} as the placeholder for the table Dataplex is scanning + sql_statement="SELECT * FROM ${data()}" f"WHERE {column_id_1} IS NULL OR {column_id_2} IS NULL" + ) + ) + ] + ) + + data_scan = dataplex_v1.DataScan( + display_name="Global Data Quality Scan", + data=dataplex_v1.DataSource(resource=bigquery_table), + data_quality_spec=data_quality_spec, + ) + + request = dataplex_v1.CreateDataScanRequest( + parent=parent, + data_scan=data_scan + ) + + try: + operation = client.create_data_scan(request=request) + print(operation) + return operation.result() + except google.api_core.exceptions.AlreadyExists: + print("A scan with this ID already Exists.") + except google.api_core.exceptions.InvalidArgument as e: + print(f"Your scan configuration is invalid: {e}") + except google.api_core.exceptions.GoogleAPIError as e: + print(f"Unexpected exception: {e}") + + +# TODO remove before PR +if __name__ == "__main__": + create_data_quality_scan_global(project_id="samples-xwf-01", dataset_id="test_dataset_01", table_id="test_table_01", location="us-central1", column_id_1="test_field_01", column_id_2="test_field_02") diff --git a/dataplex/scan/requirements.txt b/dataplex/scan/requirements.txt new file mode 100644 index 0000000000..66cd4575bf --- /dev/null +++ b/dataplex/scan/requirements.txt @@ -0,0 +1 @@ +google-cloud-dataplex==2.20.0 \ No newline at end of file From 1cd27279c5ab7c379597dfd53461e98bb6e99951 Mon Sep 17 00:00:00 2001 From: David del Real Sifuentes Date: Wed, 17 Jun 2026 17:27:14 +0000 Subject: [PATCH 2/9] Formatted. --- dataplex/scan/data_quality_scan_global.py | 31 ++++++++++------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/dataplex/scan/data_quality_scan_global.py b/dataplex/scan/data_quality_scan_global.py index cd743a224e..0d691ecff3 100644 --- a/dataplex/scan/data_quality_scan_global.py +++ b/dataplex/scan/data_quality_scan_global.py @@ -8,17 +8,20 @@ def create_data_quality_scan_global( table_id: str, location: str, column_id_1: str, - column_id_2: str -) -> dataplex_v1.DataScan: + column_id_2: str, +) -> None: """Creates a Dataplex Data Quality Scan using global API endpoint routing. - A bigquery table with at least 2 columns is expected. + A bigquery table with at least 2 columns is expected. """ client = dataplex_v1.DataScanServiceClient() parent = client.common_location_path(project=project_id, location=location) - bigquery_table = f"//bigquery.googleapis.com/projects/{project_id}/datasets/{dataset_id}/tables/{table_id}" + bigquery_table = ( + f"//bigquery.googleapis.com/projects/{project_id}" + f"/datasets/{dataset_id}/tables/{table_id}" + ) data_quality_spec = dataplex_v1.DataQualitySpec( rules=[ @@ -28,8 +31,11 @@ def create_data_quality_scan_global( description="Fails if any row contains a null value", sql_assertion=dataplex_v1.DataQualityRule.SqlAssertion( # Use ${data()} as the placeholder for the table Dataplex is scanning - sql_statement="SELECT * FROM ${data()}" f"WHERE {column_id_1} IS NULL OR {column_id_2} IS NULL" - ) + sql_statement=( + "SELECT * FROM ${data()} " + f"WHERE {column_id_1} IS NULL OR {column_id_2} IS NULL" + ) + ), ) ] ) @@ -40,23 +46,14 @@ def create_data_quality_scan_global( data_quality_spec=data_quality_spec, ) - request = dataplex_v1.CreateDataScanRequest( - parent=parent, - data_scan=data_scan - ) + request = dataplex_v1.CreateDataScanRequest(parent=parent, data_scan=data_scan) try: operation = client.create_data_scan(request=request) - print(operation) - return operation.result() + print(operation.result()) except google.api_core.exceptions.AlreadyExists: print("A scan with this ID already Exists.") except google.api_core.exceptions.InvalidArgument as e: print(f"Your scan configuration is invalid: {e}") except google.api_core.exceptions.GoogleAPIError as e: print(f"Unexpected exception: {e}") - - -# TODO remove before PR -if __name__ == "__main__": - create_data_quality_scan_global(project_id="samples-xwf-01", dataset_id="test_dataset_01", table_id="test_table_01", location="us-central1", column_id_1="test_field_01", column_id_2="test_field_02") From 01f96a68c97199c2c893f10b094481813da713bc Mon Sep 17 00:00:00 2001 From: David del Real Sifuentes Date: Wed, 17 Jun 2026 17:31:40 +0000 Subject: [PATCH 3/9] Added header. --- dataplex/scan/data_quality_scan_global.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dataplex/scan/data_quality_scan_global.py b/dataplex/scan/data_quality_scan_global.py index 0d691ecff3..2b08f4e3b7 100644 --- a/dataplex/scan/data_quality_scan_global.py +++ b/dataplex/scan/data_quality_scan_global.py @@ -1,3 +1,17 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from google.cloud import dataplex_v1 import google.api_core.exceptions From 2ef926afa5e86efe8d707adcc576873aad56b345 Mon Sep 17 00:00:00 2001 From: David del Real Sifuentes Date: Wed, 17 Jun 2026 17:34:16 +0000 Subject: [PATCH 4/9] Missing white line. --- dataplex/scan/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataplex/scan/requirements.txt b/dataplex/scan/requirements.txt index 66cd4575bf..07ce35f203 100644 --- a/dataplex/scan/requirements.txt +++ b/dataplex/scan/requirements.txt @@ -1 +1 @@ -google-cloud-dataplex==2.20.0 \ No newline at end of file +google-cloud-dataplex==2.20.0 From 17e041586110d7af8561f418fcdb36b5e456053c Mon Sep 17 00:00:00 2001 From: David del Real Sifuentes Date: Wed, 17 Jun 2026 17:40:01 +0000 Subject: [PATCH 5/9] Rearrange imports. --- dataplex/scan/data_quality_scan_global.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataplex/scan/data_quality_scan_global.py b/dataplex/scan/data_quality_scan_global.py index 2b08f4e3b7..50c07b93fc 100644 --- a/dataplex/scan/data_quality_scan_global.py +++ b/dataplex/scan/data_quality_scan_global.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud import dataplex_v1 import google.api_core.exceptions +from google.cloud import dataplex_v1 def create_data_quality_scan_global( From 74c3c311bf447b1a04339f4a87544a4c4912686d Mon Sep 17 00:00:00 2001 From: David del Real Sifuentes Date: Wed, 17 Jun 2026 17:53:55 +0000 Subject: [PATCH 6/9] Added new region tag. --- dataplex/scan/data_quality_scan_global.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dataplex/scan/data_quality_scan_global.py b/dataplex/scan/data_quality_scan_global.py index 50c07b93fc..0f1ff85747 100644 --- a/dataplex/scan/data_quality_scan_global.py +++ b/dataplex/scan/data_quality_scan_global.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +# [START dataplex_data_quality_scan_global] import google.api_core.exceptions from google.cloud import dataplex_v1 @@ -71,3 +72,5 @@ def create_data_quality_scan_global( print(f"Your scan configuration is invalid: {e}") except google.api_core.exceptions.GoogleAPIError as e: print(f"Unexpected exception: {e}") + +# [END dataplex_data_quality_scan_global] From 5f1a1f866237180c6c31e544fefa21aa1fe40b04 Mon Sep 17 00:00:00 2001 From: David del Real Sifuentes Date: Wed, 17 Jun 2026 20:46:47 +0000 Subject: [PATCH 7/9] typo fix --- dataplex/scan/data_quality_scan_global.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataplex/scan/data_quality_scan_global.py b/dataplex/scan/data_quality_scan_global.py index 0f1ff85747..bc972492eb 100644 --- a/dataplex/scan/data_quality_scan_global.py +++ b/dataplex/scan/data_quality_scan_global.py @@ -67,7 +67,7 @@ def create_data_quality_scan_global( operation = client.create_data_scan(request=request) print(operation.result()) except google.api_core.exceptions.AlreadyExists: - print("A scan with this ID already Exists.") + print("A scan with this ID already exists.") except google.api_core.exceptions.InvalidArgument as e: print(f"Your scan configuration is invalid: {e}") except google.api_core.exceptions.GoogleAPIError as e: From df5a2b1dae8a43f0cbf79f128a42a641b6a5a271 Mon Sep 17 00:00:00 2001 From: David del Real Sifuentes Date: Wed, 17 Jun 2026 22:58:10 +0000 Subject: [PATCH 8/9] relocated sample and renamed. --- dataplex/scan/requirements.txt | 1 - .../create_data_quality_scan_global.py} | 10 ++++------ 2 files changed, 4 insertions(+), 7 deletions(-) delete mode 100644 dataplex/scan/requirements.txt rename dataplex/{scan/data_quality_scan_global.py => snippets/create_data_quality_scan_global.py} (93%) diff --git a/dataplex/scan/requirements.txt b/dataplex/scan/requirements.txt deleted file mode 100644 index 07ce35f203..0000000000 --- a/dataplex/scan/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -google-cloud-dataplex==2.20.0 diff --git a/dataplex/scan/data_quality_scan_global.py b/dataplex/snippets/create_data_quality_scan_global.py similarity index 93% rename from dataplex/scan/data_quality_scan_global.py rename to dataplex/snippets/create_data_quality_scan_global.py index bc972492eb..3cd3aab8a9 100644 --- a/dataplex/scan/data_quality_scan_global.py +++ b/dataplex/snippets/create_data_quality_scan_global.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# [START dataplex_data_quality_scan_global] +# [START dataplex_create_data_quality_scan_global] import google.api_core.exceptions from google.cloud import dataplex_v1 @@ -25,14 +25,12 @@ def create_data_quality_scan_global( column_id_1: str, column_id_2: str, ) -> None: - """Creates a Dataplex Data Quality Scan using global API endpoint routing. - - A bigquery table with at least 2 columns is expected. - """ + """Creates a Dataplex Data Quality Scan using global API endpoint routing.""" client = dataplex_v1.DataScanServiceClient() parent = client.common_location_path(project=project_id, location=location) + # A bigquery table with at least 2 columns is assumed. bigquery_table = ( f"//bigquery.googleapis.com/projects/{project_id}" f"/datasets/{dataset_id}/tables/{table_id}" @@ -73,4 +71,4 @@ def create_data_quality_scan_global( except google.api_core.exceptions.GoogleAPIError as e: print(f"Unexpected exception: {e}") -# [END dataplex_data_quality_scan_global] +# [END dataplex_create_data_quality_scan_global] From 02331fba91652c66ae9f38784caa0f0e098f951f Mon Sep 17 00:00:00 2001 From: David del Real Sifuentes Date: Wed, 17 Jun 2026 22:59:42 +0000 Subject: [PATCH 9/9] Updated requirements file. --- dataplex/snippets/requirements-test.txt | 2 +- dataplex/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dataplex/snippets/requirements-test.txt b/dataplex/snippets/requirements-test.txt index c9e154ba44..b3b2b3ca4c 100644 --- a/dataplex/snippets/requirements-test.txt +++ b/dataplex/snippets/requirements-test.txt @@ -1 +1 @@ -pytest==9.0.3; python_version >= "3.10" +pytest==9.0.3 diff --git a/dataplex/snippets/requirements.txt b/dataplex/snippets/requirements.txt index abaf6c843d..07ce35f203 100644 --- a/dataplex/snippets/requirements.txt +++ b/dataplex/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-dataplex==2.4.0 +google-cloud-dataplex==2.20.0