diff --git a/packages/google-cloud-storage/google/cloud/storage/_media/_download.py b/packages/google-cloud-storage/google/cloud/storage/_media/_download.py index 593a152c0a17..eea0eaf708db 100644 --- a/packages/google-cloud-storage/google/cloud/storage/_media/_download.py +++ b/packages/google-cloud-storage/google/cloud/storage/_media/_download.py @@ -66,6 +66,8 @@ def __init__( end=None, headers=None, retry=DEFAULT_RETRY, + client_info_bucket_name=None, + client_info_object_name=None, ): self.media_url = media_url self._stream = stream @@ -76,6 +78,8 @@ def __init__( self._headers = headers self._finished = False self._retry_strategy = retry + self.client_info_bucket_name = client_info_bucket_name + self.client_info_object_name = client_info_object_name @property def finished(self): @@ -487,6 +491,35 @@ def _process_response(self, response): # Write the response body to the stream. self._stream.write(response_body) + if self._finished: + requested_length = None + if self.start is not None and self.start < 0 and self.end is None: + requested_length = -self.start + elif self.start is not None and self.end is not None: + requested_length = self.end - self.start + 1 + elif self.start is None and self.end is not None: + requested_length = self.end + 1 + + if ( + requested_length is not None + and self._bytes_downloaded > requested_length + ): + if response.headers.get("x-goog-stored-content-encoding") != "gzip": + import logging + + logger = logging.getLogger(__name__) + bucket_name = ( + getattr(self, "client_info_bucket_name", None) or "unknown" + ) + object_name = ( + getattr(self, "client_info_object_name", None) or "unknown" + ) + diff = self._bytes_downloaded - requested_length + logger.warning( + f"storage: received {diff} more bytes than requested from GCS " + f'for bucket "{bucket_name}", object "{object_name}"' + ) + def consume_next_chunk(self, transport, timeout=None): """Consume the next chunk of the resource to be downloaded. diff --git a/packages/google-cloud-storage/google/cloud/storage/blob.py b/packages/google-cloud-storage/google/cloud/storage/blob.py index 87d97ef739cd..44573209e813 100644 --- a/packages/google-cloud-storage/google/cloud/storage/blob.py +++ b/packages/google-cloud-storage/google/cloud/storage/blob.py @@ -1120,8 +1120,35 @@ def _do_download( attributes=extra_attributes, api_request=args, ): + response = None while not download.finished: - download.consume_next_chunk(transport, timeout=timeout) + response = download.consume_next_chunk(transport, timeout=timeout) + + requested_length = None + if start is not None and start < 0 and end is None: + requested_length = -start + elif start is not None and end is not None: + requested_length = end - start + 1 + elif start is None and end is not None: + requested_length = end + 1 + + if requested_length is not None and requested_length >= 0: + received_bytes = getattr(download, "_bytes_downloaded", 0) + if isinstance(received_bytes, int) and received_bytes > requested_length: + from google.cloud.storage._media import _helpers as media_helpers + + if ( + response is not None + and not media_helpers._is_decompressive_transcoding( + response, download._get_headers + ) + ): + _logger.warning( + "storage: received %d more bytes than requested from GCS for bucket %r, object %r", + received_bytes - requested_length, + self.bucket.name, + self.name, + ) def download_to_file( self, diff --git a/packages/google-cloud-storage/tests/unit/test_blob.py b/packages/google-cloud-storage/tests/unit/test_blob.py index 0d2f11339628..bf0ed876f563 100644 --- a/packages/google-cloud-storage/tests/unit/test_blob.py +++ b/packages/google-cloud-storage/tests/unit/test_blob.py @@ -1374,6 +1374,45 @@ def test__do_download_wo_chunks_w_range_w_raw_w_headers(self): w_range=True, raw_download=True, headers={"If-Match": "kittens"} ) + @mock.patch("google.cloud.storage.blob._logger") + def test__do_download_log_extra_bytes(self, mock_logger): + blob_name = "blob-name" + client = self._make_client() + bucket = _Bucket(client) + blob = self._make_one(blob_name, bucket=bucket) + blob.chunk_size = None + + transport = object() + file_obj = io.BytesIO() + download_url = "http://test.invalid" + + patch = mock.patch("google.cloud.storage.blob.Download") + with patch as patched: + download = patched.return_value + download._bytes_downloaded = 10 + type(download).finished = mock.PropertyMock(side_effect=[False, True]) + + mock_response = mock.Mock() + mock_response.headers = {} + download.consume_next_chunk.return_value = mock_response + download._get_headers.return_value = {} + + blob._do_download( + transport, + file_obj, + download_url, + {}, + start=0, + end=4, + ) + + mock_logger.warning.assert_called_once_with( + "storage: received %d more bytes than requested from GCS for bucket %r, object %r", + 5, + "name", + "blob-name", + ) + def test__do_download_wo_chunks_w_custom_timeout(self): self._do_download_helper_wo_chunks( w_range=False, raw_download=False, timeout=9.58