Skip to content

Commit 15ec8bd

Browse files
feat(storage): add object contexts in Python GCS SDK (#17039)
This PR implements the "Object Contexts" feature in the `google-cloud-storage` Python SDK, ensuring feature parity with the Go and Java SDKs. Key changes: 1. **Metadata Attachment**: Added `ObjectCustomContextPayload` and `ObjectContexts` classes to `google/cloud/storage/blob.py`. These allow users to define custom key-value pairs (contexts) for objects. 2. **Blob Property**: Added a `contexts` property to the `Blob` class with appropriate getters and setters. Added `contexts` to `_WRITABLE_FIELDS` to enable REST API support for patching and updating these contexts. 3. **Advanced Filtering**: Updated the `list_blobs` method in both `google/cloud/storage/client.py` and `google/cloud/storage/bucket.py` to include a keyword-only `filter_` parameter. This allows server-side filtering of objects based on various attributes, including custom contexts. 4. **gRPC Support**: Enhanced `google/cloud/storage/_grpc_conversions.py` with: - `blob_to_proto`: Support for converting `contexts` to GCS V2 proto. 5. **Testing**: Added new unit tests in `google/cloud/storage/tests/unit/test_blob.py` and `google/cloud/storage/tests/unit/test__grpc_conversions.py` to verify the new functionality and ensure no regressions (verified with `nox -s unit-3.12`). Fixed a minor regression in async write tests. This implementation allows for advanced metadata management and powerful server-side filtering as requested. --- *PR created automatically by Jules for task [13325527155543531515](https://jules.google.com/task/13325527155543531515) started by @nidhiii-27* --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: nidhiii-27 <224584462+nidhiii-27@users.noreply.github.com>
1 parent 494abcd commit 15ec8bd

12 files changed

Lines changed: 832 additions & 1 deletion

File tree

packages/google-cloud-storage/google/cloud/storage/_grpc_conversions.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,14 @@ def blob_to_proto(blob):
8787
retain_until_time=retain_until_time_proto,
8888
)
8989

90+
contexts = getattr(blob, "contexts", None)
91+
if contexts:
92+
custom_contexts = {}
93+
for key, payload in contexts.custom.items():
94+
custom_contexts[key] = _storage_v2.ObjectCustomContextPayload(
95+
value=payload.value
96+
)
97+
98+
resource_params["contexts"] = _storage_v2.ObjectContexts(custom=custom_contexts)
99+
90100
return _storage_v2.Object(**resource_params)

packages/google-cloud-storage/google/cloud/storage/blob.py

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
"name",
106106
"retention",
107107
"storageClass",
108+
"contexts",
108109
)
109110
_READ_LESS_THAN_SIZE = (
110111
"Size {:d} was specified but the file-like object only had {:d} bytes remaining."
@@ -3849,6 +3850,7 @@ def compose(
38493850
if_metageneration_match=None,
38503851
if_source_generation_match=None,
38513852
retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
3853+
destination_contexts=None,
38523854
delete_source_objects=None,
38533855
):
38543856
"""Concatenate source blobs into this one.
@@ -3910,6 +3912,11 @@ def compose(
39103912
to enable retries regardless of generation precondition setting.
39113913
See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
39123914
3915+
:type destination_contexts: :class:`~google.cloud.storage.blob.ObjectContexts`
3916+
:param destination_contexts:
3917+
(Optional) New contexts to set for the destination object.
3918+
See: https://docs.cloud.google.com/storage/docs/use-object-contexts#manage_object_contexts_during_object_operations
3919+
39133920
:type delete_source_objects: bool
39143921
:param delete_source_objects:
39153922
(Optional) If True, the source objects will be deleted after a
@@ -3965,6 +3972,14 @@ def compose(
39653972

39663973
source_objects.append(source_object)
39673974

3975+
if destination_contexts is not None:
3976+
if isinstance(destination_contexts, ObjectContexts):
3977+
self.contexts = destination_contexts
3978+
else:
3979+
raise ValueError(
3980+
"destination_contexts must be an ObjectContexts object"
3981+
)
3982+
39683983
request = {
39693984
"sourceObjects": source_objects,
39703985
"destination": self._properties.copy(),
@@ -4007,6 +4022,7 @@ def rewrite(
40074022
if_source_metageneration_not_match=None,
40084023
timeout=_DEFAULT_TIMEOUT,
40094024
retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
4025+
destination_contexts=None,
40104026
):
40114027
"""Rewrite source blob into this one.
40124028
@@ -4090,6 +4106,11 @@ def rewrite(
40904106
to enable retries regardless of generation precondition setting.
40914107
See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
40924108
4109+
:type destination_contexts: :class:`~google.cloud.storage.blob.ObjectContexts` or dict
4110+
:param destination_contexts:
4111+
(Optional) New contexts to set for the destination object.
4112+
See: https://docs.cloud.google.com/storage/docs/use-object-contexts#manage_object_contexts_during_object_operations
4113+
40934114
:rtype: tuple
40944115
:returns: ``(token, bytes_rewritten, total_bytes)``, where ``token``
40954116
is a rewrite token (``None`` if the rewrite is complete),
@@ -4135,6 +4156,14 @@ def rewrite(
41354156
if_source_metageneration_not_match=if_source_metageneration_not_match,
41364157
)
41374158

4159+
if destination_contexts is not None:
4160+
if isinstance(destination_contexts, ObjectContexts):
4161+
self.contexts = destination_contexts
4162+
else:
4163+
raise ValueError(
4164+
"destination_contexts must be an ObjectContexts object"
4165+
)
4166+
41384167
path = f"{source.path}/rewriteTo{self.path}"
41394168
api_response = client._post_resource(
41404169
path,
@@ -5017,6 +5046,29 @@ def retention(self):
50175046
info = self._properties.get("retention", {})
50185047
return Retention.from_api_repr(info, self)
50195048

5049+
@property
5050+
def contexts(self):
5051+
"""Retrieve the contexts for this object.
5052+
5053+
:rtype: :class:`ObjectContexts`
5054+
:returns: an instance for managing the object's contexts.
5055+
"""
5056+
info = self._properties.get("contexts", {})
5057+
return ObjectContexts.from_api_repr(info, self)
5058+
5059+
@contexts.setter
5060+
def contexts(self, value):
5061+
"""Update the contexts for this object.
5062+
5063+
:type value: :class:`ObjectContexts` or dict or None
5064+
:param value: the new contexts for the object.
5065+
"""
5066+
if value is None:
5067+
self._properties["contexts"] = None
5068+
else:
5069+
self._properties["contexts"] = value
5070+
self._patch_property("contexts", value)
5071+
50205072
@property
50215073
def soft_delete_time(self):
50225074
"""If this object has been soft-deleted, returns the time at which it became soft-deleted.
@@ -5309,3 +5361,140 @@ def retention_expiration_time(self):
53095361
retention_expiration_time = self.get("retentionExpirationTime")
53105362
if retention_expiration_time is not None:
53115363
return _rfc3339_nanos_to_datetime(retention_expiration_time)
5364+
5365+
5366+
class ObjectCustomContextPayload(dict):
5367+
"""Payload for a custom context.
5368+
5369+
:type value: str or ``NoneType``
5370+
:param value: (Optional) The value of the custom context.
5371+
"""
5372+
5373+
def __init__(self, value=None):
5374+
data = {"value": value}
5375+
super(ObjectCustomContextPayload, self).__init__(data)
5376+
self._contexts = None
5377+
5378+
@property
5379+
def value(self):
5380+
"""The value of the custom context.
5381+
5382+
:rtype: str or ``NoneType``
5383+
:returns: The value of the custom context.
5384+
"""
5385+
return self.get("value")
5386+
5387+
@value.setter
5388+
def value(self, value):
5389+
self["value"] = value
5390+
if hasattr(self, "_contexts") and self._contexts and self._contexts.blob:
5391+
self._contexts.blob._patch_property("contexts", self._contexts)
5392+
5393+
@property
5394+
def create_time(self):
5395+
"""Creation time of the custom context.
5396+
5397+
:rtype: :class:`datetime.datetime` or ``NoneType``
5398+
:returns: Datetime object parsed from RFC3339 valid timestamp.
5399+
"""
5400+
create_time = self.get("createTime")
5401+
if create_time is not None:
5402+
return _rfc3339_nanos_to_datetime(create_time)
5403+
5404+
@property
5405+
def update_time(self):
5406+
"""Last update time of the custom context.
5407+
5408+
:rtype: :class:`datetime.datetime` or ``NoneType``
5409+
:returns: Datetime object parsed from RFC3339 valid timestamp.
5410+
"""
5411+
update_time = self.get("updateTime")
5412+
if update_time is not None:
5413+
return _rfc3339_nanos_to_datetime(update_time)
5414+
5415+
5416+
class ObjectContexts(dict):
5417+
"""Container for an object's contexts.
5418+
5419+
See: https://docs.cloud.google.com/storage/docs/object-contexts
5420+
5421+
:type blob: :class:`Blob`
5422+
:param blob: blob for which these contexts apply to.
5423+
5424+
:type custom: dict or ``NoneType``
5425+
:param custom: (Optional) Custom contexts mapping.
5426+
"""
5427+
5428+
def __init__(self, blob, custom=None):
5429+
data = {}
5430+
if custom is not None:
5431+
if not isinstance(custom, dict):
5432+
raise ValueError(
5433+
"custom must be a dictionary mapping keys to ObjectCustomContextPayload instances"
5434+
)
5435+
for payload in custom.values():
5436+
if not isinstance(payload, ObjectCustomContextPayload):
5437+
raise ValueError(
5438+
"All values in custom must be ObjectCustomContextPayload instances"
5439+
)
5440+
payload._contexts = self
5441+
data["custom"] = custom
5442+
super(ObjectContexts, self).__init__(data)
5443+
self._blob = blob
5444+
5445+
@classmethod
5446+
def from_api_repr(cls, resource, blob):
5447+
"""Factory: construct instance from resource.
5448+
5449+
:type resource: dict
5450+
:param resource: mapping as returned from API call.
5451+
5452+
:type blob: :class:`Blob`
5453+
:param blob: Blob for which these contexts apply to.
5454+
5455+
:rtype: :class:`ObjectContexts`
5456+
:returns: ObjectContexts instance created from resource.
5457+
"""
5458+
instance = cls(blob)
5459+
custom = {}
5460+
for key, payload_resource in resource.get("custom", {}).items():
5461+
payload = ObjectCustomContextPayload()
5462+
payload.update(payload_resource)
5463+
payload._contexts = instance
5464+
custom[key] = payload
5465+
instance["custom"] = custom
5466+
return instance
5467+
5468+
@property
5469+
def blob(self):
5470+
"""Blob for which these contexts apply to.
5471+
5472+
:rtype: :class:`Blob`
5473+
:returns: the instance's blob.
5474+
"""
5475+
return self._blob
5476+
5477+
@property
5478+
def custom(self):
5479+
"""Custom contexts mapping.
5480+
5481+
:rtype: dict
5482+
:returns: Mapping of keys to :class:`ObjectCustomContextPayload` instances.
5483+
"""
5484+
if "custom" not in self:
5485+
self["custom"] = {}
5486+
return self["custom"]
5487+
5488+
@custom.setter
5489+
def custom(self, value):
5490+
if value is None:
5491+
value = {}
5492+
if not isinstance(value, dict):
5493+
raise ValueError(
5494+
"custom must be a dictionary mapping keys to ObjectCustomContextPayload instances"
5495+
)
5496+
for payload in value.values():
5497+
if isinstance(payload, ObjectCustomContextPayload):
5498+
payload._contexts = self
5499+
self["custom"] = value
5500+
self.blob._patch_property("contexts", self)

packages/google-cloud-storage/google/cloud/storage/bucket.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from google.cloud.storage._signing import generate_signed_url_v2, generate_signed_url_v4
4343
from google.cloud.storage.acl import BucketACL, DefaultObjectACL
4444
from google.cloud.storage.blob import Blob, _quote
45+
from google.cloud.storage.blob import ObjectContexts
4546
from google.cloud.storage.constants import (
4647
_DEFAULT_TIMEOUT,
4748
ARCHIVE_STORAGE_CLASS,
@@ -1423,6 +1424,7 @@ def list_blobs(
14231424
include_folders_as_prefixes=None,
14241425
soft_deleted=None,
14251426
page_size=None,
1427+
filter_=None,
14261428
):
14271429
"""Return an iterator used to find blobs in the bucket.
14281430
@@ -1516,6 +1518,11 @@ def list_blobs(
15161518
Note ``soft_deleted`` and ``versions`` cannot be set to True simultaneously. See:
15171519
https://cloud.google.com/storage/docs/soft-delete
15181520
1521+
:type filter_: str or None
1522+
:param filter_:
1523+
(Optional) Filter string used to filter objects. See:
1524+
https://docs.cloud.google.com/storage/docs/listing-objects#filter-by-object-contexts-syntax
1525+
15191526
:type page_size: int
15201527
:param page_size:
15211528
(Optional) Maximum number of blobs to return in each page.
@@ -1545,6 +1552,7 @@ def list_blobs(
15451552
match_glob=match_glob,
15461553
include_folders_as_prefixes=include_folders_as_prefixes,
15471554
soft_deleted=soft_deleted,
1555+
filter_=filter_,
15481556
)
15491557

15501558
def list_notifications(
@@ -1972,6 +1980,7 @@ def copy_blob(
19721980
if_source_metageneration_not_match=None,
19731981
timeout=_DEFAULT_TIMEOUT,
19741982
retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
1983+
destination_contexts=None,
19751984
):
19761985
"""Copy the given blob to the given bucket, optionally with a new name.
19771986
@@ -2065,6 +2074,10 @@ def copy_blob(
20652074
to enable retries regardless of generation precondition setting.
20662075
See [Configuring Retries](https://cloud.google.com/python/docs/reference/storage/latest/retry_timeout).
20672076
2077+
:type destination_contexts: :class:`~google.cloud.storage.blob.ObjectContexts` or dict
2078+
:param destination_contexts:
2079+
(Optional) New contexts to set for the destination object.
2080+
See: https://docs.cloud.google.com/storage/docs/use-object-contexts#manage_object_contexts_during_object_operations
20682081
:rtype: :class:`google.cloud.storage.blob.Blob`
20692082
:returns: The new Blob.
20702083
"""
@@ -2094,10 +2107,22 @@ def copy_blob(
20942107
new_name = blob.name
20952108

20962109
new_blob = Blob(bucket=destination_bucket, name=new_name)
2110+
2111+
if destination_contexts is not None:
2112+
if isinstance(destination_contexts, ObjectContexts):
2113+
new_blob.contexts = destination_contexts
2114+
else:
2115+
raise ValueError(
2116+
"destination_contexts must be an ObjectContexts object"
2117+
)
2118+
request_body = new_blob._properties.copy()
2119+
else:
2120+
request_body = None
2121+
20972122
api_path = blob.path + "/copyTo" + new_blob.path
20982123
copy_result = client._post_resource(
20992124
api_path,
2100-
None,
2125+
request_body,
21012126
query_params=query_params,
21022127
timeout=timeout,
21032128
retry=retry,

packages/google-cloud-storage/google/cloud/storage/client.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,6 +1291,7 @@ def list_blobs(
12911291
match_glob=None,
12921292
include_folders_as_prefixes=None,
12931293
soft_deleted=None,
1294+
filter_=None,
12941295
):
12951296
"""Return an iterator used to find blobs in the bucket.
12961297
@@ -1400,6 +1401,10 @@ def list_blobs(
14001401
Note ``soft_deleted`` and ``versions`` cannot be set to True simultaneously. See:
14011402
https://cloud.google.com/storage/docs/soft-delete
14021403
1404+
filter_ (str):
1405+
(Optional) Filter string used to filter objects. See:
1406+
https://docs.cloud.google.com/storage/docs/listing-objects#filter-by-object-contexts-syntax
1407+
14031408
Returns:
14041409
Iterator of all :class:`~google.cloud.storage.blob.Blob`
14051410
in this bucket matching the arguments. The RPC call
@@ -1443,6 +1448,9 @@ def list_blobs(
14431448
if soft_deleted is not None:
14441449
extra_params["softDeleted"] = soft_deleted
14451450

1451+
if filter_ is not None:
1452+
extra_params["filter"] = filter_
1453+
14461454
if bucket.user_project is not None:
14471455
extra_params["userProject"] = bucket.user_project
14481456

0 commit comments

Comments
 (0)