Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit 2313644

Browse files
authored
Add 'clustering_fields' properties. (#5630)
* Add 'Table.clustering_fields' property. * Add 'clustering_fields' support for load / query jobs.
1 parent b57bf86 commit 2313644

7 files changed

Lines changed: 270 additions & 0 deletions

File tree

google/cloud/bigquery/_helpers.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,42 @@ def _set_sub_prop(container, keys, value):
417417
sub_val[keys[-1]] = value
418418

419419

420+
def _del_sub_prop(container, keys):
421+
"""Remove a nested key fro a dictionary.
422+
423+
Arguments:
424+
container (dict):
425+
A dictionary which may contain other dictionaries as values.
426+
keys (iterable):
427+
A sequence of keys to attempt to clear the value for. Each item in
428+
the sequence represents a deeper nesting. The first key is for
429+
the top level. If there is a dictionary there, the second key
430+
attempts to get the value within that, and so on.
431+
432+
Examples:
433+
Remove a top-level value (equivalent to ``del container['key']``).
434+
435+
>>> container = {'key': 'value'}
436+
>>> _del_sub_prop(container, ['key'])
437+
>>> container
438+
{}
439+
440+
Remove a nested value.
441+
442+
>>> container = {'key': {'subkey': 'value'}}
443+
>>> _del_sub_prop(container, ['key', 'subkey'])
444+
>>> container
445+
{'key': {}}
446+
"""
447+
sub_val = container
448+
for key in keys[:-1]:
449+
if key not in sub_val:
450+
sub_val[key] = {}
451+
sub_val = sub_val[key]
452+
if keys[-1] in sub_val:
453+
del sub_val[keys[-1]]
454+
455+
420456
def _int_or_none(value):
421457
"""Helper: deserialize int value from JSON string."""
422458
if isinstance(value, int):

google/cloud/bigquery/job.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,27 @@ def _set_sub_prop(self, key, value):
781781
"""
782782
_helpers._set_sub_prop(self._properties, [self._job_type, key], value)
783783

784+
def _del_sub_prop(self, key):
785+
"""Reove ``key`` from the ``self._properties[self._job_type]`` dict.
786+
787+
Most job properties are inside the dictionary related to the job type
788+
(e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear
789+
those properties::
790+
791+
self._del_sub_prop('useLegacySql')
792+
793+
This is equivalent to using the ``_helper._del_sub_prop`` function::
794+
795+
_helper._del_sub_prop(
796+
self._properties, ['query', 'useLegacySql'])
797+
798+
Arguments:
799+
key (str):
800+
Key to remove in the ``self._properties[self._job_type]``
801+
dictionary.
802+
"""
803+
_helpers._del_sub_prop(self._properties, [self._job_type, key])
804+
784805
def to_api_repr(self):
785806
"""Build an API representation of the job config.
786807
@@ -1051,6 +1072,34 @@ def time_partitioning(self, value):
10511072
api_repr = value.to_api_repr()
10521073
self._set_sub_prop('timePartitioning', api_repr)
10531074

1075+
@property
1076+
def clustering_fields(self):
1077+
"""Union[List[str], None]: Fields defining clustering for the table
1078+
1079+
(Defaults to :data:`None`).
1080+
1081+
Clustering fields are immutable after table creation.
1082+
1083+
.. note::
1084+
1085+
As of 2018-06-29, clustering fields cannot be set on a table
1086+
which does not also have time partioning defined.
1087+
"""
1088+
prop = self._get_sub_prop('clustering')
1089+
if prop is not None:
1090+
return list(prop.get('fields', ()))
1091+
1092+
@clustering_fields.setter
1093+
def clustering_fields(self, value):
1094+
"""Union[List[str], None]: Fields defining clustering for the table
1095+
1096+
(Defaults to :data:`None`).
1097+
"""
1098+
if value is not None:
1099+
self._set_sub_prop('clustering', {'fields': value})
1100+
else:
1101+
self._del_sub_prop('clustering')
1102+
10541103
@property
10551104
def schema_update_options(self):
10561105
"""List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies
@@ -1217,6 +1266,13 @@ def time_partitioning(self):
12171266
"""
12181267
return self._configuration.time_partitioning
12191268

1269+
@property
1270+
def clustering_fields(self):
1271+
"""See
1272+
:attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`.
1273+
"""
1274+
return self._configuration.clustering_fields
1275+
12201276
@property
12211277
def schema_update_options(self):
12221278
"""See
@@ -2037,6 +2093,34 @@ def time_partitioning(self, value):
20372093
api_repr = value.to_api_repr()
20382094
self._set_sub_prop('timePartitioning', api_repr)
20392095

2096+
@property
2097+
def clustering_fields(self):
2098+
"""Union[List[str], None]: Fields defining clustering for the table
2099+
2100+
(Defaults to :data:`None`).
2101+
2102+
Clustering fields are immutable after table creation.
2103+
2104+
.. note::
2105+
2106+
As of 2018-06-29, clustering fields cannot be set on a table
2107+
which does not also have time partioning defined.
2108+
"""
2109+
prop = self._get_sub_prop('clustering')
2110+
if prop is not None:
2111+
return list(prop.get('fields', ()))
2112+
2113+
@clustering_fields.setter
2114+
def clustering_fields(self, value):
2115+
"""Union[List[str], None]: Fields defining clustering for the table
2116+
2117+
(Defaults to :data:`None`).
2118+
"""
2119+
if value is not None:
2120+
self._set_sub_prop('clustering', {'fields': value})
2121+
else:
2122+
self._del_sub_prop('clustering')
2123+
20402124
@property
20412125
def schema_update_options(self):
20422126
"""List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies
@@ -2227,6 +2311,13 @@ def time_partitioning(self):
22272311
"""
22282312
return self._configuration.time_partitioning
22292313

2314+
@property
2315+
def clustering_fields(self):
2316+
"""See
2317+
:attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`.
2318+
"""
2319+
return self._configuration.clustering_fields
2320+
22302321
@property
22312322
def schema_update_options(self):
22322323
"""See

google/cloud/bigquery/table.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,36 @@ def partition_expiration(self, value):
537537
'type': TimePartitioningType.DAY}
538538
self._properties['timePartitioning']['expirationMs'] = str(value)
539539

540+
@property
541+
def clustering_fields(self):
542+
"""Union[List[str], None]: Fields defining clustering for the table
543+
544+
(Defaults to :data:`None`).
545+
546+
Clustering fields are immutable after table creation.
547+
548+
.. note::
549+
550+
As of 2018-06-29, clustering fields cannot be set on a table
551+
which does not also have time partioning defined.
552+
"""
553+
prop = self._properties.get('clustering')
554+
if prop is not None:
555+
return list(prop.get('fields', ()))
556+
557+
@clustering_fields.setter
558+
def clustering_fields(self, value):
559+
"""Union[List[str], None]: Fields defining clustering for the table
560+
561+
(Defaults to :data:`None`).
562+
"""
563+
if value is not None:
564+
prop = self._properties.setdefault('clustering', {})
565+
prop['fields'] = value
566+
else:
567+
if 'clustering' in self._properties:
568+
del self._properties['clustering']
569+
540570
@property
541571
def description(self):
542572
"""Union[str, None]: Description of the table (defaults to

tests/system.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,19 @@
6969
bigquery.SchemaField('full_name', 'STRING', mode='REQUIRED'),
7070
bigquery.SchemaField('age', 'INTEGER', mode='REQUIRED'),
7171
]
72+
TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA = [
73+
bigquery.SchemaField('transaction_time', 'TIMESTAMP', mode='REQUIRED'),
74+
bigquery.SchemaField('transaction_id', 'INTEGER', mode='REQUIRED'),
75+
bigquery.SchemaField('user_email', 'STRING', mode='REQUIRED'),
76+
bigquery.SchemaField('store_code', 'STRING', mode='REQUIRED'),
77+
bigquery.SchemaField(
78+
'items', 'RECORD', mode='REPEATED', fields=[
79+
bigquery.SchemaField('item_code', 'STRING', mode='REQUIRED'),
80+
bigquery.SchemaField('quantity', 'INTEGER', mode='REQUIRED'),
81+
bigquery.SchemaField('comments', 'STRING', mode='NULLABLE'),
82+
bigquery.SchemaField('expiration_date', 'DATE', mode='REQUIRED'),
83+
]),
84+
]
7285

7386

7487
def _has_rows(result):
@@ -245,6 +258,31 @@ def test_create_table(self):
245258
self.assertTrue(_table_exists(table))
246259
self.assertEqual(table.table_id, table_id)
247260

261+
def test_create_table_w_time_partitioning_w_clustering_fields(self):
262+
from google.cloud.bigquery.table import TimePartitioning
263+
from google.cloud.bigquery.table import TimePartitioningType
264+
265+
dataset = self.temp_dataset(_make_dataset_id('create_table_tp_cf'))
266+
table_id = 'test_table'
267+
table_arg = Table(
268+
dataset.table(table_id),
269+
schema=TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA)
270+
self.assertFalse(_table_exists(table_arg))
271+
272+
table_arg.time_partitioning = TimePartitioning(
273+
field='transaction_time')
274+
275+
table_arg.clustering_fields = ['user_email', 'store_code']
276+
table = retry_403(Config.CLIENT.create_table)(table_arg)
277+
self.to_delete.insert(0, table)
278+
279+
self.assertTrue(_table_exists(table))
280+
self.assertEqual(table.table_id, table_id)
281+
time_partitioning = table.time_partitioning
282+
self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY)
283+
self.assertEqual(time_partitioning.field, 'transaction_time')
284+
self.assertEqual(table.clustering_fields, ['user_email', 'store_code'])
285+
248286
def test_delete_dataset_delete_contents_true(self):
249287
dataset_id = _make_dataset_id('delete_table_true')
250288
dataset = retry_403(Config.CLIENT.create_dataset)(

tests/unit/test__helpers.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,29 @@ def test_w_nested_keys_existing_value(self):
860860
self.assertEqual(container, {'key1': {'key2': {'key3': 'after'}}})
861861

862862

863+
class Test__del_sub_prop(unittest.TestCase):
864+
865+
def _call_fut(self, container, keys):
866+
from google.cloud.bigquery._helpers import _del_sub_prop
867+
868+
return _del_sub_prop(container, keys)
869+
870+
def test_w_single_key(self):
871+
container = {'key1': 'value'}
872+
self._call_fut(container, ['key1'])
873+
self.assertEqual(container, {})
874+
875+
def test_w_empty_container_nested_keys(self):
876+
container = {}
877+
self._call_fut(container, ['key1', 'key2', 'key3'])
878+
self.assertEqual(container, {'key1': {'key2': {}}})
879+
880+
def test_w_existing_value_nested_keys(self):
881+
container = {'key1': {'key2': {'key3': 'value'}}}
882+
self._call_fut(container, ['key1', 'key2', 'key3'])
883+
self.assertEqual(container, {'key1': {'key2': {}}})
884+
885+
863886
class Test__int_or_none(unittest.TestCase):
864887

865888
def _call_fut(self, value):

tests/unit/test_job.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,6 +1141,15 @@ def test_time_partitioning(self):
11411141
config.time_partitioning = None
11421142
self.assertIsNone(config.time_partitioning)
11431143

1144+
def test_clustering_fields(self):
1145+
fields = ['email', 'postal_code']
1146+
config = self._get_target_class()()
1147+
config.clustering_fields = fields
1148+
self.assertEqual(config.clustering_fields, fields)
1149+
1150+
config.clustering_fields = None
1151+
self.assertIsNone(config.clustering_fields)
1152+
11441153
def test_api_repr(self):
11451154
resource = self._make_resource()
11461155
config = self._get_target_class().from_api_repr(resource)
@@ -1347,6 +1356,7 @@ def test_ctor(self):
13471356
self.assertIsNone(job.write_disposition)
13481357
self.assertIsNone(job.destination_encryption_configuration)
13491358
self.assertIsNone(job.time_partitioning)
1359+
self.assertIsNone(job.clustering_fields)
13501360
self.assertIsNone(job.schema_update_options)
13511361

13521362
def test_ctor_w_config(self):
@@ -2787,6 +2797,15 @@ def test_time_partitioning(self):
27872797
config.time_partitioning = None
27882798
self.assertIsNone(config.time_partitioning)
27892799

2800+
def test_clustering_fields(self):
2801+
fields = ['email', 'postal_code']
2802+
config = self._get_target_class()()
2803+
config.clustering_fields = fields
2804+
self.assertEqual(config.clustering_fields, fields)
2805+
2806+
config.clustering_fields = None
2807+
self.assertIsNone(config.clustering_fields)
2808+
27902809
def test_from_api_repr_empty(self):
27912810
klass = self._get_target_class()
27922811
config = klass.from_api_repr({})
@@ -3076,6 +3095,7 @@ def test_ctor_defaults(self):
30763095
self.assertIsNone(job.table_definitions)
30773096
self.assertIsNone(job.destination_encryption_configuration)
30783097
self.assertIsNone(job.time_partitioning)
3098+
self.assertIsNone(job.clustering_fields)
30793099
self.assertIsNone(job.schema_update_options)
30803100

30813101
def test_ctor_w_udf_resources(self):

tests/unit/test_table.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,8 @@ def test_ctor(self):
398398
self.assertIsNone(table.external_data_configuration)
399399
self.assertEquals(table.labels, {})
400400
self.assertIsNone(table.encryption_configuration)
401+
self.assertIsNone(table.time_partitioning)
402+
self.assertIsNone(table.clustering_fields)
401403

402404
def test_ctor_w_schema(self):
403405
from google.cloud.bigquery.table import SchemaField
@@ -859,6 +861,36 @@ def test_partition_expiration_setter(self):
859861

860862
assert warn_patch.called
861863

864+
def test_clustering_fields_setter_w_fields(self):
865+
dataset = DatasetReference(self.PROJECT, self.DS_ID)
866+
table_ref = dataset.table(self.TABLE_NAME)
867+
table = self._make_one(table_ref)
868+
fields = ['email', 'phone']
869+
870+
table.clustering_fields = fields
871+
self.assertEqual(table.clustering_fields, fields)
872+
self.assertEqual(table._properties['clustering'], {'fields': fields})
873+
874+
def test_clustering_fields_setter_w_none(self):
875+
dataset = DatasetReference(self.PROJECT, self.DS_ID)
876+
table_ref = dataset.table(self.TABLE_NAME)
877+
table = self._make_one(table_ref)
878+
fields = ['email', 'phone']
879+
880+
table._properties['clustering'] = {'fields': fields}
881+
table.clustering_fields = None
882+
self.assertEqual(table.clustering_fields, None)
883+
self.assertFalse('clustering' in table._properties)
884+
885+
def test_clustering_fields_setter_w_none_noop(self):
886+
dataset = DatasetReference(self.PROJECT, self.DS_ID)
887+
table_ref = dataset.table(self.TABLE_NAME)
888+
table = self._make_one(table_ref)
889+
890+
table.clustering_fields = None
891+
self.assertEqual(table.clustering_fields, None)
892+
self.assertFalse('clustering' in table._properties)
893+
862894
def test_encryption_configuration_setter(self):
863895
from google.cloud.bigquery.table import EncryptionConfiguration
864896
dataset = DatasetReference(self.PROJECT, self.DS_ID)

0 commit comments

Comments
 (0)