Skip to content

Commit aee9dfb

Browse files
committed
merge 2.6 with hash randomization fix
2 parents 34b345b + b19fb24 commit aee9dfb

26 files changed

+2502
-139
lines changed

Doc/library/sys.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ always available.
286286
:const:`verbose` :option:`-v`
287287
:const:`unicode` :option:`-U`
288288
:const:`bytes_warning` :option:`-b`
289+
:const:`hash_randomization` :option:`-R`
289290
============================= ===================================
290291

291292
.. versionadded:: 2.6

Doc/reference/datamodel.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,8 @@ Basic customization
12821282
modules are still available at the time when the :meth:`__del__` method is
12831283
called.
12841284

1285+
See also the :option:`-R` command-line option.
1286+
12851287

12861288
.. method:: object.__repr__(self)
12871289

Doc/using/cmdline.rst

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Command line
2424

2525
When invoking Python, you may specify any of these options::
2626

27-
python [-BdEiOQsStuUvVWxX3?] [-c command | -m module-name | script | - ] [args]
27+
python [-BdEiOQsRStuUvVWxX3?] [-c command | -m module-name | script | - ] [args]
2828

2929
The most common use case is, of course, a simple invocation of a script::
3030

@@ -253,6 +253,29 @@ Miscellaneous options
253253
:pep:`238` -- Changing the division operator
254254

255255

256+
.. cmdoption:: -R
257+
258+
Turn on hash randomization, so that the :meth:`__hash__` values of str,
259+
bytes and datetime objects are "salted" with an unpredictable random value.
260+
Although they remain constant within an individual Python process, they are
261+
not predictable between repeated invocations of Python.
262+
263+
This is intended to provide protection against a denial-of-service caused by
264+
carefully-chosen inputs that exploit the worst case performance of a dict
265+
insertion, O(n^2) complexity. See
266+
http://www.ocert.org/advisories/ocert-2011-003.html for details.
267+
268+
Changing hash values affects the order in which keys are retrieved from a
269+
dict. Although Python has never made guarantees about this ordering (and it
270+
typically varies between 32-bit and 64-bit builds), enough real-world code
271+
implicitly relies on this non-guaranteed behavior that the randomization is
272+
disabled by default.
273+
274+
See also :envvar:`PYTHONHASHSEED`.
275+
276+
.. versionadded:: 2.6.8
277+
278+
256279
.. cmdoption:: -s
257280

258281
Don't add the :data:`user site-packages directory <site.USER_SITE>` to
@@ -522,6 +545,27 @@ These environment variables influence Python's behavior.
522545

523546
.. versionadded:: 2.6
524547

548+
.. envvar:: PYTHONHASHSEED
549+
550+
If this variable is set to ``random``, the effect is the same as specifying
551+
the :option:`-R` option: a random value is used to seed the hashes of str,
552+
bytes and datetime objects.
553+
554+
If :envvar:`PYTHONHASHSEED` is set to an integer value, it is used as a
555+
fixed seed for generating the hash() of the types covered by the hash
556+
randomization.
557+
558+
Its purpose is to allow repeatable hashing, such as for selftests for the
559+
interpreter itself, or to allow a cluster of python processes to share hash
560+
values.
561+
562+
The integer must be a decimal number in the range [0,4294967295].
563+
Specifying the value 0 will lead to the same hash values as when hash
564+
randomization is disabled.
565+
566+
.. versionadded:: 2.6.8
567+
568+
525569
.. envvar:: PYTHONIOENCODING
526570

527571
Overrides the encoding used for stdin/stdout/stderr, in the syntax

Include/object.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,12 @@ PyAPI_FUNC(void) Py_ReprLeave(PyObject *);
517517
PyAPI_FUNC(long) _Py_HashDouble(double);
518518
PyAPI_FUNC(long) _Py_HashPointer(void*);
519519

520+
typedef struct {
521+
long prefix;
522+
long suffix;
523+
} _Py_HashSecret_t;
524+
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
525+
520526
/* Helper for passing objects to printf and the like */
521527
#define PyObject_REPR(obj) PyString_AS_STRING(PyObject_Repr(obj))
522528

Include/pydebug.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ PyAPI_DATA(int) Py_NoUserSiteDirectory;
2626
PyAPI_DATA(int) _Py_QnewFlag;
2727
/* Warn about 3.x issues */
2828
PyAPI_DATA(int) Py_Py3kWarningFlag;
29+
PyAPI_DATA(int) Py_HashRandomizationFlag;
2930

3031
/* this is a wrapper around getenv() that pays attention to
3132
Py_IgnoreEnvironmentFlag. It should be used for getting variables like

Include/pythonrun.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ typedef void (*PyOS_sighandler_t)(int);
171171
PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int);
172172
PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t);
173173

174+
/* Random */
175+
PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size);
174176

175177
#ifdef __cplusplus
176178
}

Lib/os.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -738,22 +738,3 @@ def _pickle_statvfs_result(sr):
738738
_make_statvfs_result)
739739
except NameError: # statvfs_result may not exist
740740
pass
741-
742-
if not _exists("urandom"):
743-
def urandom(n):
744-
"""urandom(n) -> str
745-
746-
Return a string of n random bytes suitable for cryptographic use.
747-
748-
"""
749-
try:
750-
_urandomfd = open("/dev/urandom", O_RDONLY)
751-
except (OSError, IOError):
752-
raise NotImplementedError("/dev/urandom (or equivalent) not found")
753-
try:
754-
bs = b""
755-
while n > len(bs):
756-
bs += read(_urandomfd, n - len(bs))
757-
finally:
758-
close(_urandomfd)
759-
return bs

Lib/test/test_cmd_line.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,20 @@ def test_run_code(self):
8686
self.exit_code('-c', 'pass'),
8787
0)
8888

89+
def test_hash_randomization(self):
90+
# Verify that -R enables hash randomization:
91+
self.verify_valid_flag('-R')
92+
hashes = []
93+
for i in range(2):
94+
code = 'print(hash("spam"))'
95+
data = self.start_python('-R', '-c', code)
96+
hashes.append(data)
97+
self.assertNotEqual(hashes[0], hashes[1])
98+
99+
# Verify that sys.flags contains hash_randomization
100+
code = 'import sys; print sys.flags'
101+
data = self.start_python('-R', '-c', code)
102+
self.assertTrue('hash_randomization=1' in data)
89103

90104
def test_main():
91105
test.test_support.run_unittest(CmdLineTest)

Lib/test/test_hash.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,18 @@
33
#
44
# Also test that hash implementations are inherited as expected
55

6+
import os
7+
import sys
8+
import struct
9+
import datetime
610
import unittest
11+
import subprocess
12+
713
from test import test_support
814
from collections import Hashable
915

16+
IS_64BIT = (struct.calcsize('l') == 8)
17+
1018

1119
class HashEqualityTestCase(unittest.TestCase):
1220

@@ -134,10 +142,100 @@ def test_hashes(self):
134142
for obj in self.hashes_to_check:
135143
self.assertEqual(hash(obj), _default_hash(obj))
136144

145+
class HashRandomizationTests(unittest.TestCase):
146+
147+
# Each subclass should define a field "repr_", containing the repr() of
148+
# an object to be tested
149+
150+
def get_hash_command(self, repr_):
151+
return 'print(hash(%s))' % repr_
152+
153+
def get_hash(self, repr_, seed=None):
154+
env = os.environ.copy()
155+
if seed is not None:
156+
env['PYTHONHASHSEED'] = str(seed)
157+
else:
158+
env.pop('PYTHONHASHSEED', None)
159+
cmd_line = [sys.executable, '-c', self.get_hash_command(repr_)]
160+
p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
161+
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
162+
env=env)
163+
out, err = p.communicate()
164+
out = test_support.strip_python_stderr(out)
165+
return int(out.strip())
166+
167+
def test_randomized_hash(self):
168+
# two runs should return different hashes
169+
run1 = self.get_hash(self.repr_, seed='random')
170+
run2 = self.get_hash(self.repr_, seed='random')
171+
self.assertNotEqual(run1, run2)
172+
173+
class StringlikeHashRandomizationTests(HashRandomizationTests):
174+
def test_null_hash(self):
175+
# PYTHONHASHSEED=0 disables the randomized hash
176+
if IS_64BIT:
177+
known_hash_of_obj = 1453079729188098211
178+
else:
179+
known_hash_of_obj = -1600925533
180+
181+
# Randomization is disabled by default:
182+
self.assertEqual(self.get_hash(self.repr_), known_hash_of_obj)
183+
184+
# It can also be disabled by setting the seed to 0:
185+
self.assertEqual(self.get_hash(self.repr_, seed=0), known_hash_of_obj)
186+
187+
def test_fixed_hash(self):
188+
# test a fixed seed for the randomized hash
189+
# Note that all types share the same values:
190+
if IS_64BIT:
191+
h = -4410911502303878509
192+
else:
193+
h = -206076799
194+
self.assertEqual(self.get_hash(self.repr_, seed=42), h)
195+
196+
class StrHashRandomizationTests(StringlikeHashRandomizationTests):
197+
repr_ = repr('abc')
198+
199+
def test_empty_string(self):
200+
self.assertEqual(hash(""), 0)
201+
202+
class UnicodeHashRandomizationTests(StringlikeHashRandomizationTests):
203+
repr_ = repr(u'abc')
204+
205+
def test_empty_string(self):
206+
self.assertEqual(hash(u""), 0)
207+
208+
class BufferHashRandomizationTests(StringlikeHashRandomizationTests):
209+
repr_ = 'buffer("abc")'
210+
211+
def test_empty_string(self):
212+
self.assertEqual(hash(buffer("")), 0)
213+
214+
class DatetimeTests(HashRandomizationTests):
215+
def get_hash_command(self, repr_):
216+
return 'import datetime; print(hash(%s))' % repr_
217+
218+
class DatetimeDateTests(DatetimeTests):
219+
repr_ = repr(datetime.date(1066, 10, 14))
220+
221+
class DatetimeDatetimeTests(DatetimeTests):
222+
repr_ = repr(datetime.datetime(1, 2, 3, 4, 5, 6, 7))
223+
224+
class DatetimeTimeTests(DatetimeTests):
225+
repr_ = repr(datetime.time(0))
226+
227+
137228
def test_main():
138229
test_support.run_unittest(HashEqualityTestCase,
139230
HashInheritanceTestCase,
140-
HashBuiltinsTestCase)
231+
HashBuiltinsTestCase,
232+
StrHashRandomizationTests,
233+
UnicodeHashRandomizationTests,
234+
BufferHashRandomizationTests,
235+
DatetimeDateTests,
236+
DatetimeDatetimeTests,
237+
DatetimeTimeTests)
238+
141239

142240

143241
if __name__ == "__main__":

Lib/test/test_os.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import signal
1111
import subprocess
1212
import time
13+
1314
from test import test_support
1415
import mmap
1516
import uuid
@@ -536,8 +537,41 @@ def test_urandom(self):
536537
self.assertRaises(TypeError, os.urandom, 0.9)
537538
self.assertRaises(TypeError, os.urandom, 1.1)
538539
self.assertRaises(TypeError, os.urandom, 2.0)
539-
except NotImplementedError:
540-
pass
540+
self.assertEqual(len(os.urandom(0.9)), 0)
541+
self.assertEqual(len(os.urandom(1.1)), 1)
542+
self.assertEqual(len(os.urandom(2.0)), 2)
543+
544+
def test_urandom_length(self):
545+
self.assertEqual(len(os.urandom(0)), 0)
546+
self.assertEqual(len(os.urandom(1)), 1)
547+
self.assertEqual(len(os.urandom(10)), 10)
548+
self.assertEqual(len(os.urandom(100)), 100)
549+
self.assertEqual(len(os.urandom(1000)), 1000)
550+
551+
def test_urandom_value(self):
552+
data1 = os.urandom(16)
553+
data2 = os.urandom(16)
554+
self.assertNotEqual(data1, data2)
555+
556+
def get_urandom_subprocess(self, count):
557+
code = '\n'.join((
558+
'import os, sys',
559+
'data = os.urandom(%s)' % count,
560+
'sys.stdout.write(data)',
561+
'sys.stdout.flush()'))
562+
cmd_line = [sys.executable, '-c', code]
563+
p = subprocess.Popen(cmd_line, stdin=subprocess.PIPE,
564+
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
565+
out, err = p.communicate()
566+
out = test_support.strip_python_stderr(out)
567+
self.assertEqual(len(out), count)
568+
return out
569+
570+
def test_urandom_subprocess(self):
571+
data1 = self.get_urandom_subprocess(16)
572+
data2 = self.get_urandom_subprocess(16)
573+
self.assertNotEqual(data1, data2)
574+
>>>>>>> other
541575

542576
def test_execvpe_with_bad_arglist(self):
543577
self.assertRaises(ValueError, os.execvpe, 'notepad', [], None)

0 commit comments

Comments
 (0)