Skip to content

Commit 037a761

Browse files
feat: added --destination_table parameter to bigquery magic
1 parent 48359eb commit 037a761

File tree

2 files changed

+143
-0
lines changed

2 files changed

+143
-0
lines changed

bigquery/google/cloud/bigquery/magics.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
this parameter is used. If an error occurs during the query execution,
3232
the corresponding ``QueryJob`` instance (if available) is stored in
3333
the variable instead.
34+
* ``--destination_table`` (optional, line argument):
35+
A dataset and table to store the query results. If table does not exists,
36+
it will be created. If table already exists, its data will be overwritten.
37+
Variable should be in a format <dataset_id>.<table_id>.
3438
* ``--project <project>`` (optional, line argument):
3539
Project to use for running the query. Defaults to the context
3640
:attr:`~google.cloud.bigquery.magics.Context.project`.
@@ -145,6 +149,7 @@
145149
raise ImportError("This module can only be loaded in IPython.")
146150

147151
from google.api_core import client_info
152+
from google.api_core.exceptions import NotFound
148153
import google.auth
149154
from google.cloud import bigquery
150155
from google.cloud.bigquery.dbapi import _helpers
@@ -336,12 +341,44 @@ def _run_query(client, query, job_config=None):
336341
return query_job
337342

338343

344+
def _create_dataset_if_necessary(client, dataset_id):
345+
"""Create a dataset in the current project if it doesn't exist.
346+
347+
Args:
348+
client (google.cloud.bigquery.client.Client):
349+
Client to bundle configuration needed for API requests.
350+
dataset_id (str):
351+
Dataset id.
352+
"""
353+
dataset_reference = bigquery.dataset.DatasetReference(client.project, dataset_id)
354+
try:
355+
dataset = client.get_dataset(dataset_reference)
356+
return
357+
except NotFound:
358+
pass
359+
dataset = bigquery.Dataset(dataset_reference)
360+
dataset.location = client.location
361+
print("Creating dataset: {}".format(dataset_id))
362+
dataset = client.create_dataset(dataset)
363+
364+
339365
@magic_arguments.magic_arguments()
340366
@magic_arguments.argument(
341367
"destination_var",
342368
nargs="?",
343369
help=("If provided, save the output to this variable instead of displaying it."),
344370
)
371+
@magic_arguments.argument(
372+
"--destination_table",
373+
type=str,
374+
default=None,
375+
help=(
376+
"If provided, save the output of the query to a new BigQuery table. "
377+
"Variable should be in a format <dataset_id>.<table_id>. "
378+
"If table does not exists, it will be created. "
379+
"If table already exists, its data will be overwritten."
380+
),
381+
)
345382
@magic_arguments.argument(
346383
"--project",
347384
type=str,
@@ -485,6 +522,21 @@ def _cell_magic(line, query):
485522
job_config.use_legacy_sql = args.use_legacy_sql
486523
job_config.dry_run = args.dry_run
487524

525+
if args.destination_table:
526+
split = args.destination_table.split(".")
527+
if len(split) != 2:
528+
raise ValueError(
529+
"--destination_table should be in a <dataset_id>.<table_id> format."
530+
)
531+
dataset_id, table_id = split
532+
job_config.allow_large_results = True
533+
dataset_ref = client.dataset(dataset_id)
534+
destination_table_ref = dataset_ref.table(table_id)
535+
job_config.destination = destination_table_ref
536+
job_config.create_disposition = "CREATE_IF_NEEDED"
537+
job_config.write_disposition = "WRITE_TRUNCATE"
538+
_create_dataset_if_necessary(client, dataset_id)
539+
488540
if args.maximum_bytes_billed == "None":
489541
job_config.maximum_bytes_billed = 0
490542
elif args.maximum_bytes_billed is not None:

bigquery/tests/unit/test_magics.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from google.cloud import bigquery_storage_v1beta1
4040
except ImportError: # pragma: NO COVER
4141
bigquery_storage_v1beta1 = None
42+
from google.cloud import bigquery
4243
from google.cloud.bigquery import job
4344
from google.cloud.bigquery import table
4445
from google.cloud.bigquery import magics
@@ -336,6 +337,37 @@ def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib):
336337
assert "grpcio" in str(exc_context.value)
337338

338339

340+
def test__create_dataset_if_necessary_exists():
341+
project = "project_id"
342+
dataset_id = "dataset_id"
343+
dataset_reference = bigquery.dataset.DatasetReference(project, dataset_id)
344+
dataset = bigquery.Dataset(dataset_reference)
345+
client_patch = mock.patch(
346+
"google.cloud.bigquery.magics.bigquery.Client", autospec=True
347+
)
348+
with client_patch as client_mock:
349+
client = client_mock()
350+
client.project = project
351+
client.get_dataset.result_value = dataset
352+
magics._create_dataset_if_necessary(client, dataset_id)
353+
client.create_dataset.assert_not_called()
354+
355+
356+
def test__create_dataset_if_necessary_not_exist():
357+
project = "project_id"
358+
dataset_id = "dataset_id"
359+
client_patch = mock.patch(
360+
"google.cloud.bigquery.magics.bigquery.Client", autospec=True
361+
)
362+
with client_patch as client_mock:
363+
client = client_mock()
364+
client.location = "us"
365+
client.project = project
366+
client.get_dataset.side_effect = exceptions.NotFound("dataset not found")
367+
magics._create_dataset_if_necessary(client, dataset_id)
368+
client.create_dataset.assert_called_once()
369+
370+
339371
@pytest.mark.usefixtures("ipython_interactive")
340372
def test_extension_load():
341373
ip = IPython.get_ipython()
@@ -1199,3 +1231,62 @@ def test_bigquery_magic_omits_tracebacks_from_error_message():
11991231
assert "400 Syntax error in SQL query" in output
12001232
assert "Traceback (most recent call last)" not in output
12011233
assert "Syntax error" not in captured_io.stdout
1234+
1235+
1236+
@pytest.mark.usefixtures("ipython_interactive")
1237+
def test_bigquery_magic_w_destination_table_invalid_format():
1238+
ip = IPython.get_ipython()
1239+
ip.extension_manager.load_extension("google.cloud.bigquery")
1240+
magics.context._project = None
1241+
1242+
credentials_mock = mock.create_autospec(
1243+
google.auth.credentials.Credentials, instance=True
1244+
)
1245+
default_patch = mock.patch(
1246+
"google.auth.default", return_value=(credentials_mock, "general-project")
1247+
)
1248+
1249+
client_patch = mock.patch(
1250+
"google.cloud.bigquery.magics.bigquery.Client", autospec=True
1251+
)
1252+
1253+
with client_patch, default_patch, pytest.raises(ValueError) as exc_context:
1254+
ip.run_cell_magic(
1255+
"bigquery", "--destination_table dataset", "SELECT foo FROM WHERE LIMIT bar"
1256+
)
1257+
error_msg = str(exc_context.value)
1258+
assert (
1259+
"--destination_table should be in a "
1260+
"<dataset_id>.<table_id> format." in error_msg
1261+
)
1262+
1263+
1264+
@pytest.mark.usefixtures("ipython_interactive")
1265+
def test_bigquery_magic_w_destination_table():
1266+
ip = IPython.get_ipython()
1267+
ip.extension_manager.load_extension("google.cloud.bigquery")
1268+
magics.context.credentials = mock.create_autospec(
1269+
google.auth.credentials.Credentials, instance=True
1270+
)
1271+
1272+
create_dataset_if_necessary_patch = mock.patch(
1273+
"google.cloud.bigquery.magics._create_dataset_if_necessary", autospec=True
1274+
)
1275+
1276+
run_query_patch = mock.patch(
1277+
"google.cloud.bigquery.magics._run_query", autospec=True
1278+
)
1279+
1280+
with create_dataset_if_necessary_patch, run_query_patch as run_query_mock:
1281+
ip.run_cell_magic(
1282+
"bigquery",
1283+
"--destination_table dataset_id.table_id",
1284+
"SELECT foo FROM WHERE LIMIT bar",
1285+
)
1286+
1287+
job_config_used = run_query_mock.call_args_list[0][1]["job_config"]
1288+
assert job_config_used.allow_large_results is True
1289+
assert job_config_used.create_disposition == "CREATE_IF_NEEDED"
1290+
assert job_config_used.write_disposition == "WRITE_TRUNCATE"
1291+
assert job_config_used.destination.dataset_id == "dataset_id"
1292+
assert job_config_used.destination.table_id == "table_id"

0 commit comments

Comments
 (0)