Skip to content

Commit 5da6039

Browse files
committed
Refactor to isolate HTML encoding step from the parsing step.
1 parent 3575f91 commit 5da6039

1 file changed

Lines changed: 21 additions & 16 deletions

File tree

Tools/scripts/highlight.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,23 @@ def is_builtin(s):
1010
'Return True if s is the name of a builtin'
1111
return s in vars(__builtins__)
1212

13-
def escape_range(lines, start, end):
14-
'Return escaped content from a range of lines between start and end'
13+
def combine_range(lines, start, end):
14+
'Join content from a range of lines between start and end'
1515
(srow, scol), (erow, ecol) = start, end
1616
if srow == erow:
1717
rows = [lines[srow-1][scol:ecol]]
1818
else:
1919
rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
20-
return cgi.escape(''.join(rows)), end
20+
return ''.join(rows), end
2121

22-
def colorize(source):
23-
'Convert Python source code to an HTML fragment with colorized markup'
22+
def isolate_tokens(source):
23+
'Generate chunks of source and indentify chunks to be highlighted'
2424
lines = source.splitlines(True)
2525
lines.append('')
2626
readline = functools.partial(next, iter(lines), '')
2727
kind = tok_str = ''
2828
tok_type = tokenize.COMMENT
2929
written = (1, 0)
30-
result = []
3130
for tok in tokenize.generate_tokens(readline):
3231
prev_tok_type, prev_tok_str = tok_type, tok_str
3332
tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
@@ -49,23 +48,29 @@ def colorize(source):
4948
kind = 'keyword'
5049
elif is_builtin(tok_str) and prev_tok_str != '.':
5150
kind = 'builtin'
51+
line_upto_token, written = combine_range(lines, written, (srow, scol))
52+
line_thru_token, written = combine_range(lines, written, (erow, ecol))
53+
yield kind, line_upto_token, line_thru_token
54+
55+
def colorize(source):
56+
'Convert Python source code to an HTML fragment with colorized markup'
57+
result = ['<pre class="python">\n']
58+
for kind, line_upto_token, line_thru_token in isolate_tokens(source):
5259
if kind:
53-
line_upto_token, written = escape_range(lines, written, (srow, scol))
54-
line_thru_token, written = escape_range(lines, written, (erow, ecol))
55-
result += [line_upto_token, '<span class="%s">' % kind,
56-
line_thru_token, '</span>']
60+
result += [cgi.escape(line_upto_token),
61+
'<span class="%s">' % kind,
62+
cgi.escape(line_thru_token),
63+
'</span>']
5764
else:
58-
line_thru_token, written = escape_range(lines, written, (erow, ecol))
59-
result += [line_thru_token]
60-
61-
result.insert(0, '<pre class="python">\n')
62-
result.append('</pre>\n')
65+
result += [cgi.escape(line_upto_token),
66+
cgi.escape(line_thru_token)]
67+
result += ['</pre>\n']
6368
return ''.join(result)
6469

6570
default_css = {
6671
'.comment': '{color: crimson;}',
6772
'.string': '{color: forestgreen;}',
68-
'.docstring': '{color: forestgreen; font-style:italic}',
73+
'.docstring': '{color: forestgreen; font-style:italic;}',
6974
'.keyword': '{color: darkorange;}',
7075
'.builtin': '{color: purple;}',
7176
'.definition': '{color: darkorange; font-weight:bold;}',

0 commit comments

Comments
 (0)