Skip to content

Commit 32df26a

Browse files
committed
Disable MD5 image checksums
MD5 image checksums have long been supersceeded by the use of a ``os_hash_algo`` and ``os_hash_value`` field as part of the properties of an image. In the process of doing this, we determined that checksum via URL usage was non-trivial and determined that an appropriate path was to allow the checksum type to be determined as needed. Change-Id: I26ba8f8c37d663096f558e83028ff463d31bd4e6
1 parent 0304c73 commit 32df26a

File tree

6 files changed

+456
-77
lines changed

6 files changed

+456
-77
lines changed

doc/source/admin/how_it_works.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,3 +213,36 @@ fields:
213213
.. note::
214214
This is most likely to be set by the DHCP server. Could be localhost
215215
if the DHCP server does not set it.
216+
217+
Image Checksums
218+
---------------
219+
220+
As part of the process of downloading images to be written to disk as part of
221+
image deployment, a series of fields are utilized to determine if the
222+
image which has been downloaded matches what the user stated as the expected
223+
image checksum utilizing the ``instance_info/image_checksum`` value.
224+
225+
OpenStack, as a whole, has replaced the "legacy" ``checksum`` field with
226+
``os_hash_value`` and ``os_hash_algo`` fields, which allows for an image
227+
checksum and value to be asserted. An advantage of this is a variety of
228+
algorithms are available, if a user/operator is so-inclined.
229+
230+
For the purposes of Ironic, we continue to support the pass-through checksum
231+
field as we support the checksum being retrieved via a URL.
232+
233+
We also support determining the checksum by length.
234+
235+
The field can be utilized to designate:
236+
237+
* A URL to retreive a checksum from.
238+
* MD5 (Disabled by default, see ``[DEFAULT]md5_enabled`` in the agent
239+
configuration file.)
240+
* SHA-2 based SHA256
241+
* SHA-2 based SHA512
242+
243+
SHA-3 based checksums are not supported for auto-determination as they can
244+
have a variable length checksum result. At of when this documentation was
245+
added, SHA-2 based checksum algorithms have not been withdrawn from from
246+
approval. If you need to force use of SHA-3 based checksums, you *must*
247+
utilize the ``os_hash_algo`` setting along with the ``os_hash_value``
248+
setting.

ironic_python_agent/agent.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,8 @@ def process_lookup_data(self, content):
408408
if config.get('metrics_statsd'):
409409
for opt, val in config.items():
410410
setattr(cfg.CONF.metrics_statsd, opt, val)
411+
if config.get('agent_md5_checksum_enable'):
412+
cfg.set_override('md5_enabled', True)
411413
if config.get('agent_token_required'):
412414
self.agent_token_required = True
413415
token = config.get('agent_token')

ironic_python_agent/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,9 @@
326326
'cleaning from inadvertently destroying a running '
327327
'cluster which may be visible over a storage fabric '
328328
'such as FibreChannel.'),
329+
cfg.BoolOpt('md5_enabled',
330+
default=False,
331+
help='If the MD5 algorithm is enabled for file checksums.'),
329332
]
330333

331334
CONF.register_cli_opts(cli_opts)

ironic_python_agent/extensions/standby.py

Lines changed: 90 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,17 @@ def _download_with_proxy(image_info, url, image_id):
9999
return resp
100100

101101

102+
def _is_checksum_url(checksum):
103+
"""Identify if checksum is not a url"""
104+
if (checksum.startswith('http://') or checksum.startswith('https://')):
105+
return True
106+
else:
107+
return False
108+
109+
102110
def _fetch_checksum(checksum, image_info):
103111
"""Fetch checksum from remote location, if needed."""
104-
if not (checksum.startswith('http://') or checksum.startswith('https://')):
112+
if not _is_checksum_url(checksum):
105113
# Not a remote checksum, return as it is.
106114
return checksum
107115

@@ -263,6 +271,47 @@ def _message_format(msg, image_info, device, partition_uuids):
263271
return message
264272

265273

274+
def _get_algorithm_by_length(checksum):
275+
"""Determine the SHA-2 algorithm by checksum length.
276+
277+
:param checksum: The requested checksum.
278+
:returns: A hashlib object based upon the checksum
279+
or ValueError if the algorthm could not be
280+
identified.
281+
"""
282+
# NOTE(TheJulia): This is all based on SHA-2 lengths.
283+
# SHA-3 would require a hint, thus ValueError because
284+
# it may not be a fixed length. That said, SHA-2 is not
285+
# as of this not being added, being withdrawn standards wise.
286+
checksum_len = len(checksum)
287+
if checksum_len == 128:
288+
# Sha512 is 512 bits, or 128 characters
289+
return hashlib.new('sha512')
290+
elif checksum_len == 64:
291+
# SHA256 is 256 bits, or 64 characters
292+
return hashlib.new('sha256')
293+
elif checksum_len == 32:
294+
check_md5_enabled()
295+
# This is not super great, but opt-in only.
296+
return hashlib.new('md5') # nosec
297+
else:
298+
# Previously, we would have just assumed the value was
299+
# md5 by default. This way we are a little smarter and
300+
# gracefully handle things better when md5 is explicitly
301+
# disabled.
302+
raise ValueError('Unable to identify checksum algorithm '
303+
'used, and a value is not specified in '
304+
'the os_hash_algo setting.')
305+
306+
307+
def check_md5_enabled():
308+
"""Checks if md5 is permitted, otherwise raises ValueError."""
309+
if not CONF.md5_enabled:
310+
raise ValueError('MD5 support is disabled, and support '
311+
'will be removed in a 2024 version of '
312+
'Ironic.')
313+
314+
266315
class ImageDownload(object):
267316
"""Helper class that opens a HTTP connection to download an image.
268317
@@ -292,6 +341,8 @@ def __init__(self, image_info, time_obj=None):
292341
self._time = time_obj or time.time()
293342
self._image_info = image_info
294343
self._request = None
344+
checksum = image_info.get('checksum')
345+
retrieved_checksum = False
295346

296347
# Determine the hash algorithm and value will be used for calculation
297348
# and verification, fallback to md5 if algorithm is not set or not
@@ -300,18 +351,37 @@ def __init__(self, image_info, time_obj=None):
300351
if algo and algo in hashlib.algorithms_available:
301352
self._hash_algo = hashlib.new(algo)
302353
self._expected_hash_value = image_info.get('os_hash_value')
303-
elif image_info.get('checksum'):
354+
elif checksum and _is_checksum_url(checksum):
355+
# Treat checksum urls as first class request citizens, else
356+
# fallback to legacy handling.
357+
self._expected_hash_value = _fetch_checksum(
358+
checksum,
359+
image_info)
360+
retrieved_checksum = True
361+
if not algo:
362+
# Override algorithm not suppied as os_hash_algo
363+
self._hash_algo = _get_algorithm_by_length(
364+
self._expected_hash_value)
365+
elif checksum:
366+
# Fallback to md5 path.
304367
try:
305-
self._hash_algo = hashlib.md5()
368+
new_algo = _get_algorithm_by_length(checksum)
369+
370+
if not new_algo:
371+
# Realistically, this should never happen, but for
372+
# compatability...
373+
# TODO(TheJulia): Remove for a 2024 release.
374+
self._hash_algo = hashlib.new('md5')
375+
else:
376+
self._hash_algo = new_algo
306377
except ValueError as e:
307-
message = ('Unable to proceed with image {} as the legacy '
308-
'checksum indicator has been used, which makes use '
309-
'the MD5 algorithm. This algorithm failed to load '
310-
'due to the underlying operating system. Error: '
378+
message = ('Unable to proceed with image {} as the '
379+
'checksum indicator has been used but the '
380+
'algorithm could not be identified. Error: '
311381
'{}').format(image_info['id'], str(e))
312382
LOG.error(message)
313383
raise errors.RESTError(details=message)
314-
self._expected_hash_value = image_info['checksum']
384+
self._expected_hash_value = checksum
315385
else:
316386
message = ('Unable to verify image {} with available checksums. '
317387
'Please make sure the specified \'os_hash_algo\' '
@@ -322,8 +392,12 @@ def __init__(self, image_info, time_obj=None):
322392
LOG.error(message)
323393
raise errors.RESTError(details=message)
324394

325-
self._expected_hash_value = _fetch_checksum(self._expected_hash_value,
326-
image_info)
395+
if not retrieved_checksum:
396+
# Fallback to retrieve the checksum if we didn't retrieve it
397+
# earlier on.
398+
self._expected_hash_value = _fetch_checksum(
399+
self._expected_hash_value,
400+
image_info)
327401

328402
details = []
329403
for url in image_info['urls']:
@@ -363,7 +437,10 @@ def __iter__(self):
363437
# this code.
364438
if chunk:
365439
self._last_chunk_time = time.time()
366-
self._hash_algo.update(chunk)
440+
if isinstance(chunk, str):
441+
self._hash_algo.update(chunk.encode())
442+
else:
443+
self._hash_algo.update(chunk)
367444
yield chunk
368445
elif (time.time() - self._last_chunk_time
369446
> CONF.image_download_connection_timeout):
@@ -476,7 +553,8 @@ def _validate_image_info(ext, image_info=None, **kwargs):
476553
or not image_info['checksum']):
477554
raise errors.InvalidCommandParamsError(
478555
'Image \'checksum\' must be a non-empty string.')
479-
md5sum_avail = True
556+
if CONF.md5_enabled:
557+
md5sum_avail = True
480558

481559
os_hash_algo = image_info.get('os_hash_algo')
482560
os_hash_value = image_info.get('os_hash_value')

0 commit comments

Comments
 (0)