Issue 24226: Fix parsing of many sequential one-line 'def' statements.

Yury Selivanov · Yury Selivanov · commit 8085b80c1878 · 2015-05-18T12:50:52.000-04:00
diff --git a/Lib/test/test_coroutines.py b/Lib/test/test_coroutines.py
@@ -1,4 +1,5 @@
 import contextlib
+import inspect
 import sys
 import types
 import unittest
@@ -87,6 +88,28 @@ def test_badsyntax_9(self):
             import test.badsyntax_async9
 
 
+class TokenizerRegrTest(unittest.TestCase):
+
+    def test_oneline_defs(self):
+        buf = []
+        for i in range(500):
+            buf.append('def i{i}(): return {i}'.format(i=i))
+        buf = '\n'.join(buf)
+
+        # Test that 500 consequent, one-line defs is OK
+        ns = {}
+        exec(buf, ns, ns)
+        self.assertEqual(ns['i499'](), 499)
+
+        # Test that 500 consequent, one-line defs *and*
+        # one 'async def' following them is OK
+        buf += '\nasync def foo():\n    return'
+        ns = {}
+        exec(buf, ns, ns)
+        self.assertEqual(ns['i499'](), 499)
+        self.assertTrue(inspect.iscoroutinefunction(ns['foo']))
+
+
 class CoroutineTest(unittest.TestCase):
 
     def test_gen_1(self):
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
@@ -1289,6 +1289,17 @@ def mock_readline():
 
         self.assertTrue(encoding_used, encoding)
 
+    def test_oneline_defs(self):
+        buf = []
+        for i in range(500):
+            buf.append('def i{i}(): return {i}'.format(i=i))
+        buf.append('OK')
+        buf = '\n'.join(buf)
+
+        # Test that 500 consequent, one-line defs is OK
+        toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
+        self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER
+
     def assertExactTypeEqual(self, opstr, *optypes):
         tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
         num_optypes = len(optypes)
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
@@ -1501,17 +1501,20 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
 
         tok_len = tok->cur - tok->start;
         if (tok_len == 3 && memcmp(tok->start, "def", 3) == 0) {
-
-            if (tok->def + 1 >= MAXINDENT) {
-                tok->done = E_TOODEEP;
-                tok->cur = tok->inp;
-                return ERRORTOKEN;
-            }
-
             if (tok->def && tok->deftypestack[tok->def] == 3) {
                 tok->deftypestack[tok->def] = 2;
             }
-            else {
+            else if (tok->defstack[tok->def] < tok->indent) {
+                /* We advance defs stack only when we see "def" *and*
+                   the indentation level was increased relative to the
+                   previous "def". */
+
+                if (tok->def + 1 >= MAXINDENT) {
+                    tok->done = E_TOODEEP;
+                    tok->cur = tok->inp;
+                    return ERRORTOKEN;
+                }
+
                 tok->def++;
                 tok->defstack[tok->def] = tok->indent;
                 tok->deftypestack[tok->def] = 1;
@@ -1528,6 +1531,12 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
                         ahead_tok.cur - ahead_tok.start == 3 &&
                         memcmp(ahead_tok.start, "def", 3) == 0) {
 
+                    if (tok->def + 1 >= MAXINDENT) {
+                        tok->done = E_TOODEEP;
+                        tok->cur = tok->inp;
+                        return ERRORTOKEN;
+                    }
+
                     tok->def++;
                     tok->defstack[tok->def] = tok->indent;
                     tok->deftypestack[tok->def] = 3;