Skip to content

Commit 7c770c8

Browse files
mfschwartzdhermes
authored andcommitted
Catch checksum validation failure from download, delete corrupt file (googleapis#4133)
1 parent 5cc463f commit 7c770c8

3 files changed

Lines changed: 83 additions & 3 deletions

File tree

storage/google/cloud/storage/blob.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,13 @@ def download_to_filename(self, filename, client=None):
503503
504504
:raises: :class:`google.cloud.exceptions.NotFound`
505505
"""
506-
with open(filename, 'wb') as file_obj:
507-
self.download_to_file(file_obj, client=client)
506+
try:
507+
with open(filename, 'wb') as file_obj:
508+
self.download_to_file(file_obj, client=client)
509+
except resumable_media.DataCorruption as exc:
510+
# Delete the corrupt downloaded file.
511+
os.remove(filename)
512+
raise
508513

509514
updated = self.updated
510515
if updated is not None:

storage/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
REQUIREMENTS = [
5454
'google-cloud-core >= 0.27.0, < 0.28dev',
5555
'google-auth >= 1.0.0',
56-
'google-resumable-media >= 0.2.3',
56+
'google-resumable-media >= 0.3.0',
5757
'requests >= 2.18.0',
5858
]
5959

storage/tests/unit/test_blob.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import base64
1516
import datetime
17+
import hashlib
1618
import io
1719
import json
1820
import os
21+
import tempfile
1922
import unittest
2023

2124
import mock
@@ -665,6 +668,78 @@ def test_download_to_filename(self):
665668
def test_download_to_filename_wo_updated(self):
666669
self._download_to_filename_helper()
667670

671+
def test_download_to_filename_corrupted(self):
672+
from google.resumable_media import DataCorruption
673+
from google.resumable_media.requests.download import _CHECKSUM_MISMATCH
674+
675+
blob_name = 'blob-name'
676+
transport = mock.Mock(spec=['request'])
677+
empty_hash = base64.b64encode(
678+
hashlib.md5(b'').digest()).decode(u'utf-8')
679+
headers = {'x-goog-hash': 'md5=' + empty_hash}
680+
response = mock.MagicMock(
681+
headers=headers,
682+
status_code=http_client.OK,
683+
spec=[
684+
'__enter__',
685+
'__exit__',
686+
'headers',
687+
'iter_content',
688+
'status_code',
689+
],
690+
)
691+
# i.e. context manager returns ``self``.
692+
response.__enter__.return_value = response
693+
response.__exit__.return_value = None
694+
chunks = (b'noms1', b'coooookies2')
695+
response.iter_content.return_value = iter(chunks)
696+
697+
transport.request.return_value = response
698+
# Create a fake client/bucket and use them in the Blob() constructor.
699+
client = mock.Mock(_http=transport, spec=['_http'])
700+
bucket = mock.Mock(
701+
client=client,
702+
user_project=None,
703+
spec=['client', 'user_project'],
704+
)
705+
media_link = 'http://example.com/media/'
706+
properties = {'mediaLink': media_link}
707+
blob = self._make_one(blob_name, bucket=bucket, properties=properties)
708+
# Make sure the download is **not** chunked.
709+
self.assertIsNone(blob.chunk_size)
710+
711+
# Make sure the hash will be wrong.
712+
content = b''.join(chunks)
713+
expected_hash = base64.b64encode(
714+
hashlib.md5(content).digest()).decode(u'utf-8')
715+
self.assertNotEqual(empty_hash, expected_hash)
716+
717+
# Try to download into a temporary file (don't use
718+
# `_NamedTemporaryFile` it will try to remove after the file is
719+
# already removed)
720+
filehandle, filename = tempfile.mkstemp()
721+
os.close(filehandle)
722+
with self.assertRaises(DataCorruption) as exc_info:
723+
blob.download_to_filename(filename)
724+
725+
msg = _CHECKSUM_MISMATCH.format(media_link, empty_hash, expected_hash)
726+
self.assertEqual(exc_info.exception.args, (msg,))
727+
# Make sure the file was cleaned up.
728+
self.assertFalse(os.path.exists(filename))
729+
730+
# Check the mocks.
731+
response.__enter__.assert_called_once_with()
732+
response.__exit__.assert_called_once_with(None, None, None)
733+
response.iter_content.assert_called_once_with(
734+
chunk_size=8192, decode_unicode=False)
735+
transport.request.assert_called_once_with(
736+
'GET',
737+
media_link,
738+
data=None,
739+
headers={'accept-encoding': 'gzip'},
740+
stream=True,
741+
)
742+
668743
def test_download_to_filename_w_key(self):
669744
import os
670745
import time

0 commit comments

Comments
 (0)