Skip to content

Commit 1e13eb0

Browse files
committed
- Issue #13703: oCERT-2011-003: add -R command-line option and PYTHONHASHSEED
environment variable, to provide an opt-in way to protect against denial of service attacks due to hash collisions within the dict and set types. Patch by David Malcolm, based on work by Victor Stinner.
1 parent f5a5beb commit 1e13eb0

27 files changed

+705
-151
lines changed

Doc/library/sys.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,11 @@ always available.
289289
+------------------------------+------------------------------------------+
290290
| :const:`bytes_warning` | -b |
291291
+------------------------------+------------------------------------------+
292+
+------------------------------+------------------------------------------+
293+
| :const:`hash_randomization` | -R |
294+
| | |
295+
| | .. versionadded:: 2.6.8 |
296+
+------------------------------+------------------------------------------+
292297

293298
.. versionadded:: 2.6
294299

Doc/reference/datamodel.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,8 @@ Basic customization
12731273
modules are still available at the time when the :meth:`__del__` method is
12741274
called.
12751275

1276+
See also the :option:`-R` command-line option.
1277+
12761278

12771279
.. method:: object.__repr__(self)
12781280

Doc/using/cmdline.rst

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Command line
2121

2222
When invoking Python, you may specify any of these options::
2323

24-
python [-BdEiOQsStuUvVWxX3?] [-c command | -m module-name | script | - ] [args]
24+
python [-BdEiOQsRStuUvVWxX3?] [-c command | -m module-name | script | - ] [args]
2525

2626
The most common use case is, of course, a simple invocation of a script::
2727

@@ -239,6 +239,29 @@ Miscellaneous options
239239
:pep:`238` -- Changing the division operator
240240

241241

242+
.. cmdoption:: -R
243+
244+
Turn on hash randomization, so that the :meth:`__hash__` values of str,
245+
bytes and datetime objects are "salted" with an unpredictable random value.
246+
Although they remain constant within an individual Python process, they are
247+
not predictable between repeated invocations of Python.
248+
249+
This is intended to provide protection against a denial-of-service caused by
250+
carefully-chosen inputs that exploit the worst case performance of a dict
251+
insertion, O(n^2) complexity. See
252+
http://www.ocert.org/advisories/ocert-2011-003.html for details.
253+
254+
Changing hash values affects the order in which keys are retrieved from a
255+
dict. Although Python has never made guarantees about this ordering (and it
256+
typically varies between 32-bit and 64-bit builds), enough real-world code
257+
implicitly relies on this non-guaranteed behavior that the randomization is
258+
disabled by default.
259+
260+
See also :envvar:`PYTHONHASHSEED`.
261+
262+
.. versionadded:: 2.6.8
263+
264+
242265
.. cmdoption:: -s
243266

244267
Don't add user site directory to sys.path
@@ -501,6 +524,27 @@ These environment variables influence Python's behavior.
501524

502525
.. versionadded:: 2.6
503526

527+
.. envvar:: PYTHONHASHSEED
528+
529+
If this variable is set to ``random``, the effect is the same as specifying
530+
the :option:`-R` option: a random value is used to seed the hashes of str,
531+
bytes and datetime objects.
532+
533+
If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a
534+
fixed seed for generating the hash() of the types covered by the hash
535+
randomization.
536+
537+
Its purpose is to allow repeatable hashing, such as for selftests for the
538+
interpreter itself, or to allow a cluster of python processes to share hash
539+
values.
540+
541+
The integer must be a decimal number in the range [0,4294967295].
542+
Specifying the value 0 will lead to the same hash values as when hash
543+
randomization is disabled.
544+
545+
.. versionadded:: 2.6.8
546+
547+
504548
.. envvar:: PYTHONIOENCODING
505549

506550
Overrides the encoding used for stdin/stdout/stderr, in the syntax

Include/object.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,12 @@ PyAPI_FUNC(void) Py_ReprLeave(PyObject *);
506506
PyAPI_FUNC(long) _Py_HashDouble(double);
507507
PyAPI_FUNC(long) _Py_HashPointer(void*);
508508

509+
typedef struct {
510+
long prefix;
511+
long suffix;
512+
} _Py_HashSecret_t;
513+
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
514+
509515
/* Helper for passing objects to printf and the like */
510516
#define PyObject_REPR(obj) PyString_AS_STRING(PyObject_Repr(obj))
511517

Include/pydebug.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ PyAPI_DATA(int) Py_NoUserSiteDirectory;
2626
PyAPI_DATA(int) _Py_QnewFlag;
2727
/* Warn about 3.x issues */
2828
PyAPI_DATA(int) Py_Py3kWarningFlag;
29+
PyAPI_DATA(int) Py_HashRandomizationFlag;
2930

3031
/* this is a wrapper around getenv() that pays attention to
3132
Py_IgnoreEnvironmentFlag. It should be used for getting variables like

Include/pythonrun.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ typedef void (*PyOS_sighandler_t)(int);
168168
PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
169169
PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);
170170

171+
/* Random */
172+
PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size);
171173

172174
#ifdef __cplusplus
173175
}

Lib/os.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -742,22 +742,3 @@ def _pickle_statvfs_result(sr):
742742
_make_statvfs_result)
743743
except NameError: # statvfs_result may not exist
744744
pass
745-
746-
if not _exists("urandom"):
747-
def urandom(n):
748-
"""urandom(n) -> str
749-
750-
Return a string of n random bytes suitable for cryptographic use.
751-
752-
"""
753-
try:
754-
_urandomfd = open("/dev/urandom", O_RDONLY)
755-
except (OSError, IOError):
756-
raise NotImplementedError("/dev/urandom (or equivalent) not found")
757-
try:
758-
bs = b""
759-
while n - len(bs) >= 1:
760-
bs += read(_urandomfd, n - len(bs))
761-
finally:
762-
close(_urandomfd)
763-
return bs

Lib/test/test_cmd_line.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,20 @@ def test_run_code(self):
103103
self.exit_code('-c', 'pass'),
104104
0)
105105

106+
def test_hash_randomization(self):
107+
# Verify that -R enables hash randomization:
108+
self.verify_valid_flag('-R')
109+
hashes = []
110+
for i in range(2):
111+
code = 'print(hash("spam"))'
112+
data = self.start_python('-R', '-c', code)
113+
hashes.append(data)
114+
self.assertNotEqual(hashes[0], hashes[1])
115+
116+
# Verify that sys.flags contains hash_randomization
117+
code = 'import sys; print sys.flags'
118+
data = self.start_python('-R', '-c', code)
119+
self.assertTrue('hash_randomization=1' in data)
106120

107121
def test_main():
108122
test.test_support.run_unittest(CmdLineTest)

Lib/test/test_hash.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,18 @@
33
#
44
# Also test that hash implementations are inherited as expected
55

6+
import os
7+
import sys
8+
import struct
9+
import datetime
610
import unittest
11+
import subprocess
12+
713
from test import test_support
814
from collections import Hashable
915

16+
IS_64BIT = (struct.calcsize('l') == 8)
17+
1018

1119
class HashEqualityTestCase(unittest.TestCase):
1220

@@ -133,10 +141,100 @@ def test_hashes(self):
133141
for obj in self.hashes_to_check:
134142
self.assertEqual(hash(obj), _default_hash(obj))
135143

144+
class HashRandomizationTests(unittest.TestCase):
145+
146+
# Each subclass should define a field "repr_", containing the repr() of
147+
# an object to be tested
148+
149+
def get_hash_command(self, repr_):
150+
return 'print(hash(%s))' % repr_
151+
152+
def get_hash(self, repr_, seed=None):
153+
env = os.environ.copy()
154+
if seed is not None:
155+
env['PYTHONHASHSEED'] = str(seed)
156+
else:
157+
env.pop('PYTHONHASHSEED', None)
158+
cmd_line = [sys.executable, '-c', self.get_hash_command(repr_)]
159+
p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
160+
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
161+
env=env)
162+
out, err = p.communicate()
163+
out = test_support.strip_python_stderr(out)
164+
return int(out.strip())
165+
166+
def test_randomized_hash(self):
167+
# two runs should return different hashes
168+
run1 = self.get_hash(self.repr_, seed='random')
169+
run2 = self.get_hash(self.repr_, seed='random')
170+
self.assertNotEqual(run1, run2)
171+
172+
class StringlikeHashRandomizationTests(HashRandomizationTests):
173+
def test_null_hash(self):
174+
# PYTHONHASHSEED=0 disables the randomized hash
175+
if IS_64BIT:
176+
known_hash_of_obj = 1453079729188098211
177+
else:
178+
known_hash_of_obj = -1600925533
179+
180+
# Randomization is disabled by default:
181+
self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)
182+
183+
# It can also be disabled by setting the seed to 0:
184+
self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
185+
186+
def test_fixed_hash(self):
187+
# test a fixed seed for the randomized hash
188+
# Note that all types share the same values:
189+
if IS_64BIT:
190+
h = -4410911502303878509
191+
else:
192+
h = -206076799
193+
self.assertEqual(self.get_hash(self.repr_, seed=42), h)
194+
195+
class StrHashRandomizationTests(StringlikeHashRandomizationTests):
196+
repr_ = repr('abc')
197+
198+
def test_empty_string(self):
199+
self.assertEqual(hash(""), 0)
200+
201+
class UnicodeHashRandomizationTests(StringlikeHashRandomizationTests):
202+
repr_ = repr(u'abc')
203+
204+
def test_empty_string(self):
205+
self.assertEqual(hash(u""), 0)
206+
207+
class BufferHashRandomizationTests(StringlikeHashRandomizationTests):
208+
repr_ = 'buffer("abc")'
209+
210+
def test_empty_string(self):
211+
self.assertEqual(hash(buffer("")), 0)
212+
213+
class DatetimeTests(HashRandomizationTests):
214+
def get_hash_command(self, repr_):
215+
return 'import datetime; print(hash(%s))' % repr_
216+
217+
class DatetimeDateTests(DatetimeTests):
218+
repr_ = repr(datetime.date(1066, 10, 14))
219+
220+
class DatetimeDatetimeTests(DatetimeTests):
221+
repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
222+
223+
class DatetimeTimeTests(DatetimeTests):
224+
repr_ = repr(datetime.time(0))
225+
226+
136227
def test_main():
137228
test_support.run_unittest(HashEqualityTestCase,
138229
HashInheritanceTestCase,
139-
HashBuiltinsTestCase)
230+
HashBuiltinsTestCase,
231+
StrHashRandomizationTests,
232+
UnicodeHashRandomizationTests,
233+
BufferHashRandomizationTests,
234+
DatetimeDateTests,
235+
DatetimeDatetimeTests,
236+
DatetimeTimeTests)
237+
140238

141239

142240
if __name__ == "__main__":

Lib/test/test_os.py

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import unittest
77
import warnings
88
import sys
9+
import subprocess
10+
911
from test import test_support
1012

1113
warnings.filterwarnings("ignore", "tempnam", RuntimeWarning, __name__)
@@ -499,18 +501,46 @@ def test_devnull(self):
499501

500502
class URandomTests (unittest.TestCase):
501503
def test_urandom(self):
502-
try:
503-
with test_support.check_warnings():
504-
self.assertEqual(len(os.urandom(1)), 1)
505-
self.assertEqual(len(os.urandom(10)), 10)
506-
self.assertEqual(len(os.urandom(100)), 100)
507-
self.assertEqual(len(os.urandom(1000)), 1000)
508-
# see http://bugs.python.org/issue3708
509-
self.assertEqual(len(os.urandom(0.9)), 0)
510-
self.assertEqual(len(os.urandom(1.1)), 1)
511-
self.assertEqual(len(os.urandom(2.0)), 2)
512-
except NotImplementedError:
513-
pass
504+
with test_support.check_warnings():
505+
self.assertEqual(len(os.urandom(1)), 1)
506+
self.assertEqual(len(os.urandom(10)), 10)
507+
self.assertEqual(len(os.urandom(100)), 100)
508+
self.assertEqual(len(os.urandom(1000)), 1000)
509+
# see http://bugs.python.org/issue3708
510+
self.assertEqual(len(os.urandom(0.9)), 0)
511+
self.assertEqual(len(os.urandom(1.1)), 1)
512+
self.assertEqual(len(os.urandom(2.0)), 2)
513+
514+
def test_urandom_length(self):
515+
self.assertEqual(len(os.urandom(0)), 0)
516+
self.assertEqual(len(os.urandom(1)), 1)
517+
self.assertEqual(len(os.urandom(10)), 10)
518+
self.assertEqual(len(os.urandom(100)), 100)
519+
self.assertEqual(len(os.urandom(1000)), 1000)
520+
521+
def test_urandom_value(self):
522+
data1 = os.urandom(16)
523+
data2 = os.urandom(16)
524+
self.assertNotEqual(data1, data2)
525+
526+
def get_urandom_subprocess(self, count):
527+
code = '\n'.join((
528+
'import os, sys',
529+
'data = os.urandom(%s)' % count,
530+
'sys.stdout.write(data)',
531+
'sys.stdout.flush()'))
532+
cmd_line = [sys.executable, '-c', code]
533+
p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
534+
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
535+
out, err = p.communicate()
536+
out = test_support.strip_python_stderr(out)
537+
self.assertEqual(len(out), count)
538+
return out
539+
540+
def test_urandom_subprocess(self):
541+
data1 = self.get_urandom_subprocess(16)
542+
data2 = self.get_urandom_subprocess(16)
543+
self.assertNotEqual(data1, data2)
514544

515545
class Win32ErrorTests(unittest.TestCase):
516546
def test_rename(self):

0 commit comments

Comments
 (0)