Skip to content

Commit c2843c9

Browse files
committed
bpo-36143: Regenerate Lib/keyword.py from the Grammar and Tokens file using pgen
1 parent 0d765e3 commit c2843c9

5 files changed

Lines changed: 92 additions & 161 deletions

File tree

Lib/keyword.py

100755100644
Lines changed: 9 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,30 @@
11
#! /usr/bin/env python3
22

3-
"""Keywords (from "graminit.c")
3+
"""Keywords (from "Grammar/Grammar")
44
55
This file is automatically generated; please don't muck it up!
66
77
To update the symbols in this file, 'cd' to the top directory of
8-
the python source tree after building the interpreter and run:
8+
the python source tree and run:
99
10-
./python Lib/keyword.py
10+
python -m Parser.pgen.keywordgen ./Grammar/Grammar \
11+
./Grammar/Tokens \
12+
./Lib/keyword.py
13+
14+
Alternatively, you can run 'make regen-keyword'.
1115
"""
1216

1317
__all__ = ["iskeyword", "kwlist"]
1418

1519
kwlist = [
16-
#--start keywords--
1720
'False',
1821
'None',
1922
'True',
2023
'and',
2124
'as',
2225
'assert',
26+
'async',
27+
'await',
2328
'break',
2429
'class',
2530
'continue',
@@ -47,52 +52,6 @@
4752
'while',
4853
'with',
4954
'yield',
50-
#--end keywords--
5155
]
5256

53-
kwlist.append('async')
54-
kwlist.append('await')
55-
kwlist.sort()
56-
5757
iskeyword = frozenset(kwlist).__contains__
58-
59-
def main():
60-
import sys, re
61-
62-
args = sys.argv[1:]
63-
iptfile = args and args[0] or "Python/graminit.c"
64-
if len(args) > 1: optfile = args[1]
65-
else: optfile = "Lib/keyword.py"
66-
67-
# load the output skeleton from the target, taking care to preserve its
68-
# newline convention.
69-
with open(optfile, newline='') as fp:
70-
format = fp.readlines()
71-
nl = format[0][len(format[0].strip()):] if format else '\n'
72-
73-
# scan the source file for keywords
74-
with open(iptfile) as fp:
75-
strprog = re.compile('"([^"]+)"')
76-
lines = []
77-
for line in fp:
78-
if '{1, "' in line:
79-
match = strprog.search(line)
80-
if match:
81-
lines.append(" '" + match.group(1) + "'," + nl)
82-
lines.sort()
83-
84-
# insert the lines of keywords into the skeleton
85-
try:
86-
start = format.index("#--start keywords--" + nl) + 1
87-
end = format.index("#--end keywords--" + nl)
88-
format[start:end] = lines
89-
except ValueError:
90-
sys.stderr.write("target does not contain format markers\n")
91-
sys.exit(1)
92-
93-
# write the output file
94-
with open(optfile, 'w', newline='') as fp:
95-
fp.writelines(format)
96-
97-
if __name__ == "__main__":
98-
main()

Lib/test/test_keyword.py

Lines changed: 9 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,5 @@
11
import keyword
22
import unittest
3-
from test import support
4-
import filecmp
5-
import os
6-
import sys
7-
import subprocess
8-
import shutil
9-
import textwrap
10-
11-
KEYWORD_FILE = support.findfile('keyword.py')
12-
GRAMMAR_FILE = os.path.join(os.path.split(__file__)[0],
13-
'..', '..', 'Python', 'graminit.c')
14-
TEST_PY_FILE = 'keyword_test.py'
15-
GRAMMAR_TEST_FILE = 'graminit_test.c'
16-
PY_FILE_WITHOUT_KEYWORDS = 'minimal_keyword.py'
17-
NONEXISTENT_FILE = 'not_here.txt'
183

194

205
class Test_iskeyword(unittest.TestCase):
@@ -35,103 +20,17 @@ def test_changing_the_kwlist_does_not_affect_iskeyword(self):
3520
keyword.kwlist = ['its', 'all', 'eggs', 'beans', 'and', 'a', 'slice']
3621
self.assertFalse(keyword.iskeyword('eggs'))
3722

23+
def test_all_keywords_fail_to_be_used_as_names(self):
24+
for key in keyword.kwlist:
25+
with self.assertRaises(SyntaxError):
26+
exec(f"{key} = 42")
3827

39-
class TestKeywordGeneration(unittest.TestCase):
40-
41-
def _copy_file_without_generated_keywords(self, source_file, dest_file):
42-
with open(source_file, 'rb') as fp:
43-
lines = fp.readlines()
44-
nl = lines[0][len(lines[0].strip()):]
45-
with open(dest_file, 'wb') as fp:
46-
fp.writelines(lines[:lines.index(b"#--start keywords--" + nl) + 1])
47-
fp.writelines(lines[lines.index(b"#--end keywords--" + nl):])
48-
49-
def _generate_keywords(self, grammar_file, target_keyword_py_file):
50-
proc = subprocess.Popen([sys.executable,
51-
KEYWORD_FILE,
52-
grammar_file,
53-
target_keyword_py_file], stderr=subprocess.PIPE)
54-
stderr = proc.communicate()[1]
55-
return proc.returncode, stderr
56-
57-
@unittest.skipIf(not os.path.exists(GRAMMAR_FILE),
58-
'test only works from source build directory')
59-
def test_real_grammar_and_keyword_file(self):
60-
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
61-
self.addCleanup(support.unlink, TEST_PY_FILE)
62-
self.assertFalse(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
63-
self.assertEqual((0, b''), self._generate_keywords(GRAMMAR_FILE,
64-
TEST_PY_FILE))
65-
self.assertTrue(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
66-
67-
def test_grammar(self):
68-
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
69-
self.addCleanup(support.unlink, TEST_PY_FILE)
70-
with open(GRAMMAR_TEST_FILE, 'w') as fp:
71-
# Some of these are probably implementation accidents.
72-
fp.writelines(textwrap.dedent("""\
73-
{2, 1},
74-
{11, "encoding_decl", 0, 2, states_79,
75-
"\000\000\040\000\000\000\000\000\000\000\000\000"
76-
"\000\000\000\000\000\000\000\000\000"},
77-
{1, "jello"},
78-
{326, 0},
79-
{1, "turnip"},
80-
\t{1, "This one is tab indented"
81-
{278, 0},
82-
{1, "crazy but legal"
83-
"also legal" {1, "
84-
{1, "continue"},
85-
{1, "lemon"},
86-
{1, "tomato"},
87-
{1, "wigii"},
88-
{1, 'no good'}
89-
{283, 0},
90-
{1, "too many spaces"}"""))
91-
self.addCleanup(support.unlink, GRAMMAR_TEST_FILE)
92-
self._generate_keywords(GRAMMAR_TEST_FILE, TEST_PY_FILE)
93-
expected = [
94-
" 'This one is tab indented',",
95-
" 'also legal',",
96-
" 'continue',",
97-
" 'crazy but legal',",
98-
" 'jello',",
99-
" 'lemon',",
100-
" 'tomato',",
101-
" 'turnip',",
102-
" 'wigii',",
103-
]
104-
with open(TEST_PY_FILE) as fp:
105-
lines = fp.read().splitlines()
106-
start = lines.index("#--start keywords--") + 1
107-
end = lines.index("#--end keywords--")
108-
actual = lines[start:end]
109-
self.assertEqual(actual, expected)
110-
111-
def test_empty_grammar_results_in_no_keywords(self):
112-
self._copy_file_without_generated_keywords(KEYWORD_FILE,
113-
PY_FILE_WITHOUT_KEYWORDS)
114-
self.addCleanup(support.unlink, PY_FILE_WITHOUT_KEYWORDS)
115-
shutil.copyfile(KEYWORD_FILE, TEST_PY_FILE)
116-
self.addCleanup(support.unlink, TEST_PY_FILE)
117-
self.assertEqual((0, b''), self._generate_keywords(os.devnull,
118-
TEST_PY_FILE))
119-
self.assertTrue(filecmp.cmp(TEST_PY_FILE, PY_FILE_WITHOUT_KEYWORDS))
120-
121-
def test_keywords_py_without_markers_produces_error(self):
122-
rc, stderr = self._generate_keywords(os.devnull, os.devnull)
123-
self.assertNotEqual(rc, 0)
124-
self.assertRegex(stderr, b'does not contain format markers')
125-
126-
def test_missing_grammar_file_produces_error(self):
127-
rc, stderr = self._generate_keywords(NONEXISTENT_FILE, KEYWORD_FILE)
128-
self.assertNotEqual(rc, 0)
129-
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
28+
def test_async_and_await_are_keywords(self):
29+
self.assertIn("async", keyword.kwlist)
30+
self.assertIn("await", keyword.kwlist)
13031

131-
def test_missing_keywords_py_file_produces_error(self):
132-
rc, stderr = self._generate_keywords(os.devnull, NONEXISTENT_FILE)
133-
self.assertNotEqual(rc, 0)
134-
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
32+
def test_keywords_are_sorted(self):
33+
self.assertListEqual(sorted(keyword.kwlist), keyword.kwlist)
13534

13635

13736
if __name__ == "__main__":

Makefile.pre.in

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ regen-importlib: Programs/_freeze_importlib
724724
# Regenerate all generated files
725725

726726
regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \
727-
regen-token regen-symbol regen-ast regen-importlib clinic
727+
regen-token regen-keyword regen-symbol regen-ast regen-importlib clinic
728728

729729
############################################################################
730730
# Special rules for object files
@@ -843,6 +843,15 @@ regen-token:
843843
$(srcdir)/Grammar/Tokens \
844844
$(srcdir)/Lib/token.py
845845

846+
.PHONY: regen-keyword
847+
regen-keyword:
848+
# Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens
849+
# using Parser/pgen
850+
$(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \
851+
$(srcdir)/Grammar/Tokens \
852+
$(srcdir)/Lib/keyword.py.new
853+
$(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new
854+
846855
.PHONY: regen-symbol
847856
regen-symbol: $(srcdir)/Include/graminit.h
848857
# Regenerate Lib/symbol.py from Include/graminit.h
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Regenerate :mod:`keyword` from the Grammar and Tokens file using pgen. Patch
2+
by Pablo Galindo.

Parser/pgen/keywordgen.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
2+
3+
import argparse
4+
5+
from .pgen import ParserGenerator
6+
7+
TEMPLATE = '''\
8+
#! /usr/bin/env python3
9+
10+
"""Keywords (from "Grammar/Grammar")
11+
12+
This file is automatically generated; please don't muck it up!
13+
14+
To update the symbols in this file, 'cd' to the top directory of
15+
the python source tree and run:
16+
17+
python -m Parser.pgen.keywordgen ./Grammar/Grammar \\
18+
./Grammar/Tokens \\
19+
./Lib/keyword.py
20+
21+
Alternatively, you can run 'make regen-keyword'.
22+
"""
23+
24+
__all__ = ["iskeyword", "kwlist"]
25+
26+
kwlist = [
27+
{keywords}
28+
]
29+
30+
iskeyword = frozenset(kwlist).__contains__
31+
'''
32+
33+
EXTRA_KEYWORDS = ["async", "await"]
34+
35+
36+
def main():
37+
parser = argparse.ArgumentParser(description="Parser generator main program.")
38+
parser.add_argument(
39+
"grammar", type=str, help="The file with the grammar definition in EBNF format"
40+
)
41+
parser.add_argument(
42+
"tokens", type=str, help="The file with the token definitions"
43+
)
44+
parser.add_argument(
45+
"keyword_file",
46+
type=argparse.FileType('w'),
47+
help="The path to write the keyword definitions",
48+
)
49+
args = parser.parse_args()
50+
p = ParserGenerator(args.grammar, args.tokens)
51+
grammar = p.make_grammar()
52+
53+
with args.keyword_file as thefile:
54+
all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS)
55+
56+
keywords = "\n".join(" '{}',".format(keyword)
57+
for keyword in all_keywords)
58+
thefile.write(TEMPLATE.format(keywords=keywords))
59+
60+
61+
if __name__ == "__main__":
62+
main()

0 commit comments

Comments
 (0)