Skip to content

Commit 6296a6c

Browse files
committed
tools: make js2c.py strip comments from sources
In order to allow using Unicode characters inside comments of built-in JavaScript libraries without forcing them to be stored as UTF-16 data in Node's binary, update the tooling to strip comments during build process. All line breaks are preserved so that line numbers in stack traces aren't broken. Refs: #11129 Refs: #11371 (comment)
1 parent 00c86cc commit 6296a6c

File tree

1 file changed

+83
-1
lines changed

1 file changed

+83
-1
lines changed

tools/js2c.py

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def ReadFile(filename):
5353
lines = file.read()
5454
finally:
5555
file.close()
56-
return lines
56+
return StripComments(lines)
5757

5858

5959
def ReadLines(filename):
@@ -67,6 +67,88 @@ def ReadLines(filename):
6767
return result
6868

6969

70+
def StripComments(source):
71+
chars = source.decode('utf-8')
72+
result = []
73+
index = 0
74+
string_mode = None # None or one of ('\'', '"', '`')
75+
escape_mode = False
76+
77+
while index < len(chars):
78+
char = chars[index]
79+
80+
# TODO(aqrln): support comments inside placeholders
81+
# of ES6 template literals
82+
if char in ('\'', '"', '`'):
83+
if char == string_mode and not escape_mode:
84+
string_mode = None
85+
elif string_mode is None:
86+
string_mode = char
87+
88+
# Skip comments only if they are not preceded by odd number of backslashes
89+
# (which is indicated by escape_mode). This is needed for some regular
90+
# expressions that have endings mistakenly considered to be comments by
91+
# IsCommentStart function, e.g.: /\//
92+
# Regex detection is rather complicated, so here's a workaround for it.
93+
# And it really doesn't matter much for situations when a backslash is
94+
# followed by a real comment (whether //-style or /* */-style) since they
95+
# are syntax errors anyway, so let it crash with the proper exception at
96+
# run time as it would when there was no comment stripping.
97+
if not string_mode and not escape_mode and IsCommentStart(chars, index):
98+
index, line_breaks = SkipComments(chars, index)
99+
result.extend('\n' * line_breaks)
100+
else:
101+
result.append(char)
102+
index += 1
103+
104+
# If we encounter a sequence of backslashes, toggle the escape mode
105+
# for each backslash and turn it off when the sequence is over.
106+
if char == '\\':
107+
escape_mode = not escape_mode
108+
else:
109+
escape_mode = False
110+
111+
return u''.join(result).encode('utf-8')
112+
113+
114+
def IsCommentStart(chars, index):
115+
return chars[index] == '/' and \
116+
index + 1 < len(chars) and \
117+
chars[index + 1] in ('/', '*')
118+
119+
120+
def SkipComments(chars, index):
121+
line_breaks = 0
122+
next_char = chars[index + 1]
123+
if next_char == '/':
124+
index = SkipToLineEnd(chars, index)
125+
elif next_char == '*':
126+
index, line_breaks = SkipMultilineComment(chars, index)
127+
return index, line_breaks
128+
129+
130+
def SkipToLineEnd(chars, index):
131+
while index < len(chars) and chars[index] != '\n':
132+
index += 1
133+
return index
134+
135+
136+
def SkipMultilineComment(chars, index):
137+
line_breaks = 0
138+
size = len(chars)
139+
index += 2
140+
while index < size:
141+
char = chars[index]
142+
next_index = index + 1
143+
if char == '\n':
144+
line_breaks += 1
145+
if char == '*' and next_index < size and chars[next_index] == '/':
146+
index += 2
147+
break
148+
index += 1
149+
return index, line_breaks
150+
151+
70152
def ExpandConstants(lines, constants):
71153
for key, value in constants.items():
72154
lines = lines.replace(key, str(value))

0 commit comments

Comments
 (0)