Skip to content

Commit f2cc352

Browse files
committed
Do HTML escaping after the tokenization step.
1 parent 98979b8 commit f2cc352

1 file changed

Lines changed: 23 additions & 14 deletions

File tree

Tools/scripts/pycolorize.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,29 @@
66

77
import keyword, tokenize, cgi, functools
88

9-
def insert(s, i, text):
10-
'Insert text at position i in string s'
11-
return s[:i] + text + s[i:]
12-
139
def is_builtin(s):
1410
'Return True if s is the name of a builtin'
1511
return s in vars(__builtins__)
1612

13+
def escape_range(lines, start, end):
14+
'Return escaped content from a range of lines between start and end'
15+
(srow, scol), (erow, ecol) = start, end
16+
if srow == erow:
17+
rows = [lines[srow-1][scol:ecol]]
18+
else:
19+
rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
20+
return cgi.escape(''.join(rows)), end
21+
1722
def colorize(source):
1823
'Convert Python source code to an HTML fragment with colorized markup'
19-
text = cgi.escape(source)
20-
lines = text.splitlines(True)
24+
lines = source.splitlines(True)
25+
lines.append('')
2126
readline = functools.partial(next, iter(lines), '')
2227
actions = []
2328
kind = tok_str = ''
2429
tok_type = tokenize.COMMENT
30+
written = (1, 0)
31+
result = []
2532
for tok in tokenize.generate_tokens(readline):
2633
prev_tok_type, prev_tok_str = tok_type, tok_str
2734
tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
@@ -44,15 +51,17 @@ def colorize(source):
4451
elif is_builtin(tok_str) and prev_tok_str != '.':
4552
kind = 'builtin'
4653
if kind:
47-
actions.append(((srow, scol), (erow, ecol), kind))
48-
49-
for (srow, scol), (erow, ecol), kind in reversed(actions):
50-
lines[erow-1] = insert(lines[erow-1], ecol, '</span>')
51-
lines[srow-1] = insert(lines[srow-1], scol, '<span class="%s">' % kind)
54+
line_upto_token, written = escape_range(lines, written, (srow, scol))
55+
line_thru_token, written = escape_range(lines, written, (erow, ecol))
56+
result += [line_upto_token, '<span class="%s">' % kind,
57+
line_thru_token, '</span>']
58+
else:
59+
line_thru_token, written = escape_range(lines, written, (erow, ecol))
60+
result += [line_thru_token]
5261

53-
lines.insert(0, '<pre class="python">\n')
54-
lines.append('</pre>\n')
55-
return ''.join(lines)
62+
result.insert(0, '<pre class="python">\n')
63+
result.append('</pre>\n')
64+
return ''.join(result)
5665

5766
default_css = {
5867
'.comment': '{color: crimson;}',

0 commit comments

Comments
 (0)