From 92281cef811f8207859cdda57e425a8281bbca3e Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Thu, 8 Nov 2018 13:14:32 -0800 Subject: [PATCH 1/6] Fix an off by one error in the RETURN_VALUE case. By doing a bounds check within find_op so it can return before going past the end as a safety measure. https://github.com/python/cpython/commit/7db3c488335168993689ddae5914a28e16188447#diff-a33329ae6ae0bb295d742f0caf93c137 introduced this off by one error while fixing another one nearby. --- Python/peephole.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/Python/peephole.c b/Python/peephole.c index 16fd5009bf1d8c..77d134939f8de9 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -50,9 +50,9 @@ lastn_const_start(const _Py_CODEUNIT *codestr, Py_ssize_t i, Py_ssize_t n) /* Scans through EXTENDED ARGs, seeking the index of the effective opcode */ static Py_ssize_t -find_op(const _Py_CODEUNIT *codestr, Py_ssize_t i) +find_op(const _Py_CODEUNIT *codestr, Py_ssize_t codelen, Py_ssize_t i) { - while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) { + while (i < codelen && _Py_OPCODE(codestr[i]) == EXTENDED_ARG) { i++; } return i; @@ -128,8 +128,9 @@ copy_op_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned char op, Called with codestr pointing to the first LOAD_CONST. */ static Py_ssize_t -fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start, - Py_ssize_t opcode_end, PyObject *consts, int n) +fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t codelen, + Py_ssize_t c_start, Py_ssize_t opcode_end, + PyObject *consts, int n) { /* Pre-conditions */ assert(PyList_CheckExact(consts)); @@ -142,7 +143,7 @@ fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start, for (Py_ssize_t i = 0, pos = c_start; i < n; i++, pos++) { assert(pos < opcode_end); - pos = find_op(codestr, pos); + pos = find_op(codestr, codelen, pos); assert(_Py_OPCODE(codestr[pos]) == LOAD_CONST); unsigned int arg = get_arg(codestr, pos); @@ -265,7 +266,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, goto exitError; assert(PyList_Check(consts)); - for (i=find_op(codestr, 0) ; i= 1 && _Py_OPCODE(codestr[op_start-1]) == EXTENDED_ARG) { @@ -303,7 +304,8 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, if (j > 0 && lastlc >= j) { h = lastn_const_start(codestr, op_start, j); if (ISBASICBLOCK(blocks, h, op_start)) { - h = fold_tuple_on_constants(codestr, h, i+1, consts, j); + h = fold_tuple_on_constants(codestr, codelen, + h, i+1, consts, j); break; } } @@ -340,7 +342,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, case JUMP_IF_FALSE_OR_POP: case JUMP_IF_TRUE_OR_POP: h = get_arg(codestr, i) / sizeof(_Py_CODEUNIT); - tgt = find_op(codestr, h); + tgt = find_op(codestr, codelen, h); j = _Py_OPCODE(codestr[tgt]); if (CONDITIONAL_JUMP(j)) { @@ -374,7 +376,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, case JUMP_FORWARD: case JUMP_ABSOLUTE: h = GETJUMPTGT(codestr, i); - tgt = find_op(codestr, h); + tgt = find_op(codestr, codelen, h); /* Replace JUMP_* to a RETURN into just a RETURN */ if (UNCONDITIONAL_JUMP(opcode) && _Py_OPCODE(codestr[tgt]) == RETURN_VALUE) { @@ -417,7 +419,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, } if (h > i + 1) { fill_nops(codestr, i + 1, h); - nexti = find_op(codestr, h); + nexti = find_op(codestr, codelen, h); } break; } From 4b0f93fdf21b9ced09036746a8bf6a0f00357551 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Thu, 8 Nov 2018 15:01:07 -0800 Subject: [PATCH 2/6] Add a NEWS entry. --- .../Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst new file mode 100644 index 00000000000000..db481f71ac86b0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst @@ -0,0 +1,3 @@ +Fix an off by one error in the bytecode peephole optimizer where it could +access one byte beyond the end of bounds of a bytecode array when removing +unreachable operations after a return. From fb357b6d3b947b1cc0c01c969c97a451a9af8a18 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Thu, 8 Nov 2018 16:29:02 -0800 Subject: [PATCH 3/6] Reword the NEWS entry. --- .../2018-11-08-15-00-58.bpo-35193.HzPS6R.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst index db481f71ac86b0..be0639450ed908 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst @@ -1,3 +1,4 @@ -Fix an off by one error in the bytecode peephole optimizer where it could -access one byte beyond the end of bounds of a bytecode array when removing -unreachable operations after a return. +Fix an off by one error in the bytecode peephole optimizer where it could read +bytes beyond the end of bounds of a bytecode array when removing unreachable +code after a return. This did not always crash an interpreter, but could. +An interpreter compiled using the clang memory sanitizer always failed. From 9580c5d54a13bb2cd674bd96304e7393255a2385 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Thu, 8 Nov 2018 16:30:12 -0800 Subject: [PATCH 4/6] Add a unittest that will fail under clang msan. I don't see how to test for the presence of this out of bounds memory access issue itself from the level of the CPython interpreter, but this test will reliably fail (crash) when the interpreter is built using the clang memory sanitizer. --- Lib/test/test_compile.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 6b45a24334381c..da19a902d326ef 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1,3 +1,4 @@ +import dis import math import os import unittest @@ -621,6 +622,24 @@ def check_same_constant(const): self.check_constant(f1, frozenset({0})) self.assertTrue(f1(0)) + # This is a regression test for a CPython specific peephole optimizer + # implementation bug present in a few releases. It's assertion verifies + # that peephole optimization was actually done though that isn't an + # indication of the bugs presence or not (crashing is). + @support.cpython_only + def test_peephole_opt_unreachable_code_array_access_in_bounds(self): + """Regression test for issue35193 when run under clang msan.""" + def unused_code_at_end(): + return 3 + raise RuntimeError("unreachable") + # The above function definition will trigger the out of bounds + # bug in the peephole optimizer as it scans opcodes past the + # RETURN_VALUE opcode. This does not always crash an interpreter. + # When you build with the clang memory sanitizer it reliably aborts. + self.assertEqual( + 'RETURN_VALUE', + list(dis.get_instructions(unused_code_at_end))[-1].opname) + def test_dont_merge_constants(self): # Issue #25843: compile() must not merge constants which are equal # but have a different type. From 5d79253b8ad0f5c95de46e410d25a7a3381e2fbb Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Thu, 8 Nov 2018 16:52:37 -0800 Subject: [PATCH 5/6] Mention the releases that this bug is in. --- .../Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst index be0639450ed908..c4848a88bf6129 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst @@ -2,3 +2,4 @@ Fix an off by one error in the bytecode peephole optimizer where it could read bytes beyond the end of bounds of a bytecode array when removing unreachable code after a return. This did not always crash an interpreter, but could. An interpreter compiled using the clang memory sanitizer always failed. +This bug was present in every release of Python 3.6 and 3.7 until now. From d400140d8e145e2ced96ca3e62a8c3eb15216b73 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Thu, 8 Nov 2018 17:04:58 -0800 Subject: [PATCH 6/6] Shorten the news entry text. --- .../2018-11-08-15-00-58.bpo-35193.HzPS6R.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst index c4848a88bf6129..dddebe16708086 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-08-15-00-58.bpo-35193.HzPS6R.rst @@ -1,5 +1,3 @@ Fix an off by one error in the bytecode peephole optimizer where it could read -bytes beyond the end of bounds of a bytecode array when removing unreachable -code after a return. This did not always crash an interpreter, but could. -An interpreter compiled using the clang memory sanitizer always failed. +bytes beyond the end of bounds of an array when removing unreachable code. This bug was present in every release of Python 3.6 and 3.7 until now.