From 62d83f3d89b5603396831353682018626098e7cd Mon Sep 17 00:00:00 2001
From: Jennifer Davis <sigje@google.com>
Date: Tue, 31 Mar 2026 17:26:00 -0700
Subject: [PATCH] feat(bigquery/dataframes): add BigFrames samples and test
 configuration

Signed-off-by: Jennifer Davis <sigje@google.com>
---
 bigquery/dataframes/noxfile_config.py         | 31 +++++++++
 bigquery/dataframes/read_gbq_function.py      | 51 +++++++++++++++
 bigquery/dataframes/read_gbq_function_test.py | 64 +++++++++++++++++++
 bigquery/dataframes/requirements-test.txt     |  5 ++
 bigquery/dataframes/requirements.txt          |  3 +
 bigquery/dataframes/sql_scalar.py             | 50 +++++++++++++++
 bigquery/dataframes/sql_scalar_test.py        | 30 +++++++++
 7 files changed, 234 insertions(+)
 create mode 100644 bigquery/dataframes/noxfile_config.py
 create mode 100644 bigquery/dataframes/read_gbq_function.py
 create mode 100644 bigquery/dataframes/read_gbq_function_test.py
 create mode 100644 bigquery/dataframes/requirements-test.txt
 create mode 100644 bigquery/dataframes/requirements.txt
 create mode 100644 bigquery/dataframes/sql_scalar.py
 create mode 100644 bigquery/dataframes/sql_scalar_test.py

diff --git a/bigquery/dataframes/noxfile_config.py b/bigquery/dataframes/noxfile_config.py
new file mode 100644
index 00000000000..ae46e2a97f5
--- /dev/null
+++ b/bigquery/dataframes/noxfile_config.py
@@ -0,0 +1,31 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TEST_CONFIG_OVERRIDE = {
+    # You can opt out from the test for specific Python versions.
+    # Skipping for Python 3.9 due to pyarrow compilation failure.
+    "ignored_versions": ["2.7", "3.6", "3.9", "3.11"],
+    # Old samples are opted out of enforcing Python type hints
+    # All new samples should feature them
+    "enforce_type_hints": True,
+    # An envvar key for determining the project id to use. Change it
+    # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+    # build specific Cloud project. You can also use your own string
+    # to use your own Cloud project.
+    "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+    # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+    # A dictionary you want to inject into your test. Don't put any
+    # secrets here. These values will override predefined values.
+    "envs": {},
+}
diff --git a/bigquery/dataframes/read_gbq_function.py b/bigquery/dataframes/read_gbq_function.py
new file mode 100644
index 00000000000..38e323ef72a
--- /dev/null
+++ b/bigquery/dataframes/read_gbq_function.py
@@ -0,0 +1,51 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Registers and applies existing BigQuery User-Defined Functions (UDFs) to DataFrames.
+
+Enables the reuse of existing BigQuery SQL or JavaScript UDFs as callable
+objects within a BigQuery DataFrames session.
+"""
+
+# [START bigquery_dataframes_read_gbq_function]
+import bigframes.pandas as bpd
+
+
+def use_read_gbq_function(project_id: str, function_id: str) -> None:
+    bpd.options.bigquery.project = project_id
+    bpd.options.bigquery.location = "US"
+
+    # Register an existing BigQuery UDF.
+    # The function must have an explicit return type in its BigQuery definition.
+    # In production, use functions deployed to your own project for stability.
+    extract_title = bpd.read_gbq_function(function_id)
+
+    df = bpd.DataFrame(
+        {
+            "book_xml": [
+                "<book><title>The Great Gatsby</title></book>",
+                "<book><title>1984</title></book>",
+                "<book><title>Brave New World</title></book>",
+            ]
+        }
+    )
+
+    # Use apply to call the registered BigQuery function for each row.
+    # This executes the logic in BigQuery rather than locally.
+    df["title"] = df["book_xml"].apply(extract_title)
+
+    print(df[["title"]].to_pandas())
+
+
+# [END bigquery_dataframes_read_gbq_function]
diff --git a/bigquery/dataframes/read_gbq_function_test.py b/bigquery/dataframes/read_gbq_function_test.py
new file mode 100644
index 00000000000..63c8e148a6e
--- /dev/null
+++ b/bigquery/dataframes/read_gbq_function_test.py
@@ -0,0 +1,64 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Generator
+
+from google.cloud import bigquery
+import pytest
+import test_utils.prefixer
+
+import read_gbq_function
+
+PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
+prefixer = test_utils.prefixer.Prefixer("python-docs-samples", "bigquery/dataframes")
+
+
+@pytest.fixture(scope="module")
+def bq_client() -> bigquery.Client:
+    return bigquery.Client(project=PROJECT_ID)
+
+
+@pytest.fixture(scope="module")
+def dataset_id(bq_client: bigquery.Client) -> Generator[str, None, None]:
+    dataset_name = prefixer.create_prefix().replace("-", "_")
+    dataset_id = f"{PROJECT_ID}.{dataset_name}"
+    dataset = bigquery.Dataset(dataset_id)
+    dataset.location = "US"
+    bq_client.create_dataset(dataset)
+    yield dataset_id
+    bq_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)
+
+
+@pytest.fixture(scope="module")
+def udf_id(bq_client: bigquery.Client, dataset_id: str) -> str:
+    function_id = f"{dataset_id}.extract_title"
+    query = f"""
+    CREATE OR REPLACE FUNCTION `{function_id}`(xml STRING) RETURNS STRING AS (
+      SAFE.REGEXP_EXTRACT(xml, r'<title>(.*?)</title>')
+    );
+    """
+    bq_client.query(query).result()
+    return function_id
+
+
+def test_use_read_gbq_function(
+    capsys: pytest.CaptureFixture[str], udf_id: str
+) -> None:
+    read_gbq_function.use_read_gbq_function(PROJECT_ID, udf_id)
+    out, _ = capsys.readouterr()
+
+    assert "The Great Gatsby" in out
+    assert "1984" in out
+    assert "Brave New World" in out
diff --git a/bigquery/dataframes/requirements-test.txt b/bigquery/dataframes/requirements-test.txt
new file mode 100644
index 00000000000..abb3c5a680c
--- /dev/null
+++ b/bigquery/dataframes/requirements-test.txt
@@ -0,0 +1,5 @@
+pytest
+pytest-asyncio
+google-cloud-bigquery
+bigframes
+google-cloud-testutils
diff --git a/bigquery/dataframes/requirements.txt b/bigquery/dataframes/requirements.txt
new file mode 100644
index 00000000000..241dae3a997
--- /dev/null
+++ b/bigquery/dataframes/requirements.txt
@@ -0,0 +1,3 @@
+google-cloud-bigquery-storage
+bigframes
+google-cloud-bigquery
diff --git a/bigquery/dataframes/sql_scalar.py b/bigquery/dataframes/sql_scalar.py
new file mode 100644
index 00000000000..cf77d10f9ba
--- /dev/null
+++ b/bigquery/dataframes/sql_scalar.py
@@ -0,0 +1,50 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Extracts data from XML strings using SQL scalar functions in BigQuery DataFrames.
+
+Demonstrates using BigQuery SQL expressions directly within a DataFrame
+transformation for efficient server-side processing.
+"""
+
+# [START bigquery_dataframes_sql_scalar]
+import bigframes.bigquery as bbq
+import bigframes.pandas as bpd
+
+
+def create_sql_scalar_extraction(project_id: str) -> None:
+    bpd.options.bigquery.project = project_id
+    bpd.options.bigquery.location = "US"
+
+    df = bpd.DataFrame(
+        {
+            "book_xml": [
+                "<book><title>The Great Gatsby</title></book>",
+                "<book><title>1984</title></book>",
+                "<book><title>Brave New World</title></book>",
+            ]
+        }
+    )
+
+    # Use bbq.sql_scalar to execute arbitrary SQL expressions directly in BigQuery.
+    # The {0} placeholder refers to the first Series in the provided list.
+    df["title"] = bbq.sql_scalar(
+        "SAFE.REGEXP_EXTRACT({0}, r'<title>(.*?)</title>')",
+        [df["book_xml"]],
+    )
+
+    print(df[["title"]].to_pandas())
+
+
+# [END bigquery_dataframes_sql_scalar]
diff --git a/bigquery/dataframes/sql_scalar_test.py b/bigquery/dataframes/sql_scalar_test.py
new file mode 100644
index 00000000000..8509e5b992b
--- /dev/null
+++ b/bigquery/dataframes/sql_scalar_test.py
@@ -0,0 +1,30 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+
+import sql_scalar
+
+PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
+
+
+def test_create_sql_scalar_extraction(capsys: pytest.CaptureFixture[str]) -> None:
+    sql_scalar.create_sql_scalar_extraction(PROJECT_ID)
+    out, _ = capsys.readouterr()
+
+    assert "The Great Gatsby" in out
+    assert "1984" in out
+    assert "Brave New World" in out