Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ def __init__(
end=None,
headers=None,
retry=DEFAULT_RETRY,
client_info_bucket_name=None,
client_info_object_name=None,
):
self.media_url = media_url
self._stream = stream
Expand All @@ -76,6 +78,8 @@ def __init__(
self._headers = headers
self._finished = False
self._retry_strategy = retry
self.client_info_bucket_name = client_info_bucket_name
self.client_info_object_name = client_info_object_name

@property
def finished(self):
Expand Down Expand Up @@ -487,6 +491,35 @@ def _process_response(self, response):
# Write the response body to the stream.
self._stream.write(response_body)

if self._finished:
requested_length = None
if self.start is not None and self.start < 0 and self.end is None:
requested_length = -self.start
elif self.start is not None and self.end is not None:
requested_length = self.end - self.start + 1
elif self.start is None and self.end is not None:
requested_length = self.end + 1

if (
requested_length is not None
and self._bytes_downloaded > requested_length
):
if response.headers.get("x-goog-stored-content-encoding") != "gzip":
import logging

logger = logging.getLogger(__name__)
bucket_name = (
getattr(self, "client_info_bucket_name", None) or "unknown"
)
object_name = (
getattr(self, "client_info_object_name", None) or "unknown"
)
diff = self._bytes_downloaded - requested_length
logger.warning(
f"storage: received {diff} more bytes than requested from GCS "
f'for bucket "{bucket_name}", object "{object_name}"'
)

def consume_next_chunk(self, transport, timeout=None):
"""Consume the next chunk of the resource to be downloaded.

Expand Down
29 changes: 28 additions & 1 deletion packages/google-cloud-storage/google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -1120,8 +1120,35 @@ def _do_download(
attributes=extra_attributes,
api_request=args,
):
response = None
while not download.finished:
download.consume_next_chunk(transport, timeout=timeout)
response = download.consume_next_chunk(transport, timeout=timeout)

requested_length = None
if start is not None and start < 0 and end is None:
requested_length = -start
elif start is not None and end is not None:
requested_length = end - start + 1
elif start is None and end is not None:
requested_length = end + 1
Comment on lines +1127 to +1133

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with the logic in _media/_download.py, blob.py should also handle the case where start is negative and end is None (which requests the last -start bytes of the blob).

Suggested change
requested_length = None
if start is not None and end is not None:
requested_length = end - start + 1
elif start is None and end is not None:
requested_length = end + 1
requested_length = None
if start is not None and start < 0 and end is None:
requested_length = -start
elif start is not None and end is not None:
requested_length = end - start + 1
elif start is None and end is not None:
requested_length = end + 1


if requested_length is not None and requested_length >= 0:
received_bytes = getattr(download, "_bytes_downloaded", 0)
if isinstance(received_bytes, int) and received_bytes > requested_length:
from google.cloud.storage._media import _helpers as media_helpers

if (
response is not None
and not media_helpers._is_decompressive_transcoding(
response, download._get_headers
)
):
_logger.warning(
"storage: received %d more bytes than requested from GCS for bucket %r, object %r",
received_bytes - requested_length,
self.bucket.name,
self.name,
)

def download_to_file(
self,
Expand Down
39 changes: 39 additions & 0 deletions packages/google-cloud-storage/tests/unit/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -1374,6 +1374,45 @@ def test__do_download_wo_chunks_w_range_w_raw_w_headers(self):
w_range=True, raw_download=True, headers={"If-Match": "kittens"}
)

@mock.patch("google.cloud.storage.blob._logger")
def test__do_download_log_extra_bytes(self, mock_logger):
blob_name = "blob-name"
client = self._make_client()
bucket = _Bucket(client)
blob = self._make_one(blob_name, bucket=bucket)
blob.chunk_size = None

transport = object()
file_obj = io.BytesIO()
download_url = "http://test.invalid"

patch = mock.patch("google.cloud.storage.blob.Download")
with patch as patched:
download = patched.return_value
download._bytes_downloaded = 10
type(download).finished = mock.PropertyMock(side_effect=[False, True])

mock_response = mock.Mock()
mock_response.headers = {}
download.consume_next_chunk.return_value = mock_response
download._get_headers.return_value = {}
Comment on lines +1391 to +1398

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The test does not actually execute the while not download.finished: loop because download.finished is a Mock object (which is truthy, making not download.finished evaluate to False). Additionally, the test mocks download.consume instead of download.consume_next_chunk.

To make the test effective and ensure the loop runs once, you should mock download.finished to return False then True, and mock download.consume_next_chunk instead of download.consume.

Suggested change
download = patched.return_value
download._bytes_downloaded = 10
mock_response = mock.Mock()
mock_response.headers = {}
download.consume.return_value = mock_response
download._get_headers.return_value = {}
download = patched.return_value
download._bytes_downloaded = 10
type(download).finished = mock.PropertyMock(side_effect=[False, True])
mock_response = mock.Mock()
mock_response.headers = {}
download.consume_next_chunk.return_value = mock_response
download._get_headers.return_value = {}


blob._do_download(
transport,
file_obj,
download_url,
{},
start=0,
end=4,
)

mock_logger.warning.assert_called_once_with(
"storage: received %d more bytes than requested from GCS for bucket %r, object %r",
5,
"name",
"blob-name",
)

def test__do_download_wo_chunks_w_custom_timeout(self):
self._do_download_helper_wo_chunks(
w_range=False, raw_download=False, timeout=9.58
Expand Down
Loading