From ad8a555eba1f46ea7fec0a1cde669f43c07bc305 Mon Sep 17 00:00:00 2001 From: Nidhi Nandwani Date: Thu, 11 Jun 2026 09:39:56 +0000 Subject: [PATCH 1/2] feat(storage): log additional bytes received from GCS in read path It has been found that GCS can occasionally send additional bytes while reading from stream. This scenario should be logged properly for debugging and tracking purposes. Fixes: 475824752 [Generated-by: AI] --- .../google/cloud/storage/_media/_download.py | 25 ++++++++++++ .../google/cloud/storage/blob.py | 26 ++++++++++++- .../tests/unit/test_blob.py | 38 +++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-storage/google/cloud/storage/_media/_download.py b/packages/google-cloud-storage/google/cloud/storage/_media/_download.py index 593a152c0a17..9ca4527ea286 100644 --- a/packages/google-cloud-storage/google/cloud/storage/_media/_download.py +++ b/packages/google-cloud-storage/google/cloud/storage/_media/_download.py @@ -66,6 +66,8 @@ def __init__( end=None, headers=None, retry=DEFAULT_RETRY, + client_info_bucket_name=None, + client_info_object_name=None, ): self.media_url = media_url self._stream = stream @@ -76,6 +78,8 @@ def __init__( self._headers = headers self._finished = False self._retry_strategy = retry + self.client_info_bucket_name = client_info_bucket_name + self.client_info_object_name = client_info_object_name @property def finished(self): @@ -487,6 +491,27 @@ def _process_response(self, response): # Write the response body to the stream. self._stream.write(response_body) + if self._finished: + requested_length = None + if self.start is not None and self.start < 0 and self.end is None: + requested_length = -self.start + elif self.start is not None and self.end is not None: + requested_length = self.end - self.start + 1 + elif self.start is None and self.end is not None: + requested_length = self.end + 1 + + if requested_length is not None and self._bytes_downloaded > requested_length: + if headers.get("x-goog-stored-content-encoding") != "gzip": + import logging + logger = logging.getLogger(__name__) + bucket_name = getattr(self, "client_info_bucket_name", "unknown") + object_name = getattr(self, "client_info_object_name", "unknown") + diff = self._bytes_downloaded - requested_length + logger.warning( + f'storage: received {diff} more bytes than requested from GCS ' + f'for bucket "{bucket_name}", object "{object_name}"' + ) + def consume_next_chunk(self, transport, timeout=None): """Consume the next chunk of the resource to be downloaded. diff --git a/packages/google-cloud-storage/google/cloud/storage/blob.py b/packages/google-cloud-storage/google/cloud/storage/blob.py index 87d97ef739cd..d998808b7511 100644 --- a/packages/google-cloud-storage/google/cloud/storage/blob.py +++ b/packages/google-cloud-storage/google/cloud/storage/blob.py @@ -1120,8 +1120,32 @@ def _do_download( attributes=extra_attributes, api_request=args, ): + response = None while not download.finished: - download.consume_next_chunk(transport, timeout=timeout) + response = download.consume_next_chunk(transport, timeout=timeout) + + requested_length = None + if start is not None and start < 0 and end is None: + requested_length = -start + elif start is not None and end is not None: + requested_length = end - start + 1 + elif start is None and end is not None: + requested_length = end + 1 + + if requested_length is not None and requested_length >= 0: + received_bytes = getattr(download, "_bytes_downloaded", 0) + if isinstance(received_bytes, int) and received_bytes > requested_length: + from google.cloud.storage._media import _helpers as media_helpers + + if response is not None and not media_helpers._is_decompressive_transcoding( + response, download._get_headers + ): + _logger.warning( + "storage: received %d more bytes than requested from GCS for bucket %r, object %r", + received_bytes - requested_length, + self.bucket.name, + self.name, + ) def download_to_file( self, diff --git a/packages/google-cloud-storage/tests/unit/test_blob.py b/packages/google-cloud-storage/tests/unit/test_blob.py index 0d2f11339628..ee2b35dc5650 100644 --- a/packages/google-cloud-storage/tests/unit/test_blob.py +++ b/packages/google-cloud-storage/tests/unit/test_blob.py @@ -1374,6 +1374,44 @@ def test__do_download_wo_chunks_w_range_w_raw_w_headers(self): w_range=True, raw_download=True, headers={"If-Match": "kittens"} ) + @mock.patch("google.cloud.storage.blob._logger") + def test__do_download_log_extra_bytes(self, mock_logger): + blob_name = "blob-name" + client = self._make_client() + bucket = _Bucket(client) + blob = self._make_one(blob_name, bucket=bucket) + blob.chunk_size = None + + transport = object() + file_obj = io.BytesIO() + download_url = "http://test.invalid" + + patch = mock.patch("google.cloud.storage.blob.Download") + with patch as patched: + download = patched.return_value + download._bytes_downloaded = 10 + + mock_response = mock.Mock() + mock_response.headers = {} + download.consume.return_value = mock_response + download._get_headers.return_value = {} + + blob._do_download( + transport, + file_obj, + download_url, + {}, + start=0, + end=4, + ) + + mock_logger.warning.assert_called_once_with( + "storage: received %d more bytes than requested from GCS for bucket %r, object %r", + 5, + "name", + "blob-name", + ) + def test__do_download_wo_chunks_w_custom_timeout(self): self._do_download_helper_wo_chunks( w_range=False, raw_download=False, timeout=9.58 From bb8cd28a72c46c32a0fb1e7e84a50c79b385bb13 Mon Sep 17 00:00:00 2001 From: Nidhi Nandwani Date: Thu, 11 Jun 2026 10:29:56 +0000 Subject: [PATCH 2/2] Fix python formatting --- .../google/cloud/storage/_media/_download.py | 18 +++++++++++++----- .../google/cloud/storage/blob.py | 7 +++++-- .../tests/unit/test_blob.py | 5 +++-- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/packages/google-cloud-storage/google/cloud/storage/_media/_download.py b/packages/google-cloud-storage/google/cloud/storage/_media/_download.py index 9ca4527ea286..eea0eaf708db 100644 --- a/packages/google-cloud-storage/google/cloud/storage/_media/_download.py +++ b/packages/google-cloud-storage/google/cloud/storage/_media/_download.py @@ -500,15 +500,23 @@ def _process_response(self, response): elif self.start is None and self.end is not None: requested_length = self.end + 1 - if requested_length is not None and self._bytes_downloaded > requested_length: - if headers.get("x-goog-stored-content-encoding") != "gzip": + if ( + requested_length is not None + and self._bytes_downloaded > requested_length + ): + if response.headers.get("x-goog-stored-content-encoding") != "gzip": import logging + logger = logging.getLogger(__name__) - bucket_name = getattr(self, "client_info_bucket_name", "unknown") - object_name = getattr(self, "client_info_object_name", "unknown") + bucket_name = ( + getattr(self, "client_info_bucket_name", None) or "unknown" + ) + object_name = ( + getattr(self, "client_info_object_name", None) or "unknown" + ) diff = self._bytes_downloaded - requested_length logger.warning( - f'storage: received {diff} more bytes than requested from GCS ' + f"storage: received {diff} more bytes than requested from GCS " f'for bucket "{bucket_name}", object "{object_name}"' ) diff --git a/packages/google-cloud-storage/google/cloud/storage/blob.py b/packages/google-cloud-storage/google/cloud/storage/blob.py index d998808b7511..44573209e813 100644 --- a/packages/google-cloud-storage/google/cloud/storage/blob.py +++ b/packages/google-cloud-storage/google/cloud/storage/blob.py @@ -1137,8 +1137,11 @@ def _do_download( if isinstance(received_bytes, int) and received_bytes > requested_length: from google.cloud.storage._media import _helpers as media_helpers - if response is not None and not media_helpers._is_decompressive_transcoding( - response, download._get_headers + if ( + response is not None + and not media_helpers._is_decompressive_transcoding( + response, download._get_headers + ) ): _logger.warning( "storage: received %d more bytes than requested from GCS for bucket %r, object %r", diff --git a/packages/google-cloud-storage/tests/unit/test_blob.py b/packages/google-cloud-storage/tests/unit/test_blob.py index ee2b35dc5650..bf0ed876f563 100644 --- a/packages/google-cloud-storage/tests/unit/test_blob.py +++ b/packages/google-cloud-storage/tests/unit/test_blob.py @@ -1390,10 +1390,11 @@ def test__do_download_log_extra_bytes(self, mock_logger): with patch as patched: download = patched.return_value download._bytes_downloaded = 10 - + type(download).finished = mock.PropertyMock(side_effect=[False, True]) + mock_response = mock.Mock() mock_response.headers = {} - download.consume.return_value = mock_response + download.consume_next_chunk.return_value = mock_response download._get_headers.return_value = {} blob._do_download(