Skip to content

Commit 0de05b5

Browse files
committed
Harden regexp parsing a bit
1 parent 7c8670a commit 0de05b5

File tree

7 files changed

+54
-18
lines changed

7 files changed

+54
-18
lines changed

src/ast.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,11 @@ export abstract class Node {
240240
return expr;
241241
}
242242

243-
static createRegexpLiteralExpression(pattern: string, modifiers: string, range: Range): RegexpLiteralExpression {
243+
static createRegexpLiteralExpression(pattern: string, flags: string, range: Range): RegexpLiteralExpression {
244244
var expr = new RegexpLiteralExpression();
245245
expr.range = range;
246246
expr.pattern = pattern;
247-
expr.modifiers = modifiers;
247+
expr.patternFlags = flags;
248248
return expr;
249249
}
250250

@@ -1048,14 +1048,14 @@ export class RegexpLiteralExpression extends LiteralExpression {
10481048

10491049
/** Regular expression pattern. */
10501050
pattern: string;
1051-
/** Regular expression modifiers. */
1052-
modifiers: string;
1051+
/** Regular expression flags. */
1052+
patternFlags: string;
10531053

10541054
serialize(sb: string[]): void {
10551055
sb.push("/");
10561056
sb.push(this.pattern);
10571057
sb.push("/");
1058-
sb.push(this.modifiers);
1058+
sb.push(this.patternFlags);
10591059
}
10601060
}
10611061

src/diagnosticMessages.generated.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export enum DiagnosticCode {
1010
Type_0_cannot_be_changed_to_type_1 = 106,
1111
Structs_cannot_extend_classes_and_vice_versa = 107,
1212
Structs_cannot_implement_interfaces = 108,
13+
Invalid_regular_expression_flags = 109,
1314
Unterminated_string_literal = 1002,
1415
Identifier_expected = 1003,
1516
_0_expected = 1005,
@@ -95,6 +96,7 @@ export function diagnosticCodeToString(code: DiagnosticCode): string {
9596
case 106: return "Type '{0}' cannot be changed to type '{1}'.";
9697
case 107: return "Structs cannot extend classes and vice-versa.";
9798
case 108: return "Structs cannot implement interfaces.";
99+
case 109: return "Invalid regular expression flags.";
98100
case 1002: return "Unterminated string literal.";
99101
case 1003: return "Identifier expected.";
100102
case 1005: return "'{0}' expected.";

src/diagnosticMessages.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
"Type '{0}' cannot be changed to type '{1}'.": 106,
99
"Structs cannot extend classes and vice-versa.": 107,
1010
"Structs cannot implement interfaces.": 108,
11+
"Invalid regular expression flags.": 109,
1112

1213
"Unterminated string literal.": 1002,
1314
"Identifier expected.": 1003,

src/parser.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,8 +1426,11 @@ export class Parser extends DiagnosticEmitter {
14261426
return Node.createNewExpression((<CallExpression>operand).expression, (<CallExpression>operand).typeArguments, (<CallExpression>operand).arguments, tn.range(startPos, tn.pos));
14271427
this.error(DiagnosticCode.Operation_not_supported, tn.range());
14281428
return null;
1429-
} else
1429+
} else {
14301430
operand = this.parseExpression(tn, p);
1431+
if (!operand)
1432+
return null;
1433+
}
14311434

14321435
// UnaryPrefixExpression
14331436
if (token == Token.PLUS_PLUS || token == Token.MINUS_MINUS)
@@ -1513,15 +1516,14 @@ export class Parser extends DiagnosticEmitter {
15131516
return Node.createFloatLiteralExpression(tn.readFloat(), tn.range(startPos, tn.pos));
15141517

15151518
// RegexpLiteralExpression
1519+
// note that this also continues on invalid ones so the surrounding AST remains intact
15161520
case Token.SLASH:
1517-
var regexpPattern = tn.readRegexpPattern();
1518-
if (regexpPattern == null)
1519-
return null;
1521+
var regexpPattern = tn.readRegexpPattern(); // also reports
15201522
if (!tn.skip(Token.SLASH)) {
15211523
this.error(DiagnosticCode._0_expected, tn.range(), "/");
15221524
return null;
15231525
}
1524-
return Node.createRegexpLiteralExpression(regexpPattern, tn.readRegexpModifiers(), tn.range(startPos, tn.pos));
1526+
return Node.createRegexpLiteralExpression(regexpPattern, tn.readRegexpFlags() /* also reports */, tn.range(startPos, tn.pos));
15251527

15261528
default:
15271529
this.error(DiagnosticCode.Expression_expected, tn.range());

src/tokenizer.ts

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -901,14 +901,14 @@ export class Tokenizer extends DiagnosticEmitter {
901901
}
902902
}
903903

904-
readRegexpPattern(): string | null {
904+
readRegexpPattern(): string {
905905
var text = this.source.text;
906906
var start = this.pos;
907907
var escaped = false;
908908
while (true) {
909909
if (this.pos >= this.end) {
910910
this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.end));
911-
return null;
911+
break;
912912
}
913913
if (text.charCodeAt(this.pos) == CharCode.BACKSLASH) {
914914
++this.pos;
@@ -920,31 +920,43 @@ export class Tokenizer extends DiagnosticEmitter {
920920
break;
921921
if (isLineBreak(c)) {
922922
this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.pos));
923-
return null;
923+
break;
924924
}
925925
++this.pos;
926926
escaped = false;
927927
}
928928
return text.substring(start, this.pos);
929929
}
930930

931-
readRegexpModifiers(): string {
931+
readRegexpFlags(): string {
932932
var text = this.source.text;
933933
var start = this.pos;
934-
/a/
934+
var flags = 0;
935935
while (this.pos < this.end) {
936-
switch (text.charCodeAt(this.pos)) {
936+
var c: i32 = text.charCodeAt(this.pos);
937+
if (!isIdentifierPart(c))
938+
break;
939+
++this.pos;
940+
switch (c) {
937941

942+
// make sure each supported flag is unique
938943
case CharCode.g:
944+
flags |= select<i32>(1, -1, !(flags & 1));
945+
break;
939946
case CharCode.i:
947+
flags |= select<i32>(2, -1, !(flags & 2));
948+
break;
940949
case CharCode.m:
941-
++this.pos;
950+
flags |= select<i32>(4, -1, !(flags & 4));
942951
break;
943952

944953
default:
945-
return text.substring(start, this.pos);
954+
flags = -1;
955+
break;
946956
}
947957
}
958+
if (flags == -1)
959+
this.error(DiagnosticCode.Invalid_regular_expression_flags, this.range(start, this.pos));
948960
return text.substring(start, this.pos);
949961
}
950962

tests/parser/regexp.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,15 @@ b/ig;
1616

1717
// just a comment
1818
//ig;
19+
20+
// duplicate flags
21+
22+
/(abc)\//iig;
23+
24+
// invalid flags
25+
26+
/(abc)\//iX;
27+
28+
// surrounding AST remains intact
29+
30+
false && /abc/gX.test(someString) || true;

tests/parser/regexp.ts.fixture.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,11 @@
33
let re = /(abc)\//ig;
44
let noRe = !/(abc)\//i;
55
b / ig;
6+
/(abc)\//iig;
7+
/(abc)\//iX;
8+
false && /abc/gX.test(someString) || true;
69
// ERROR 1161: "Unterminated regular expression literal." in regexp.ts @ 75,76
10+
// ERROR 1005: "'/' expected." in regexp.ts @ 74,76
11+
// ERROR 109: "Invalid regular expression flags." in regexp.ts @ 95,98
12+
// ERROR 109: "Invalid regular expression flags." in regexp.ts @ 111,113
13+
// ERROR 109: "Invalid regular expression flags." in regexp.ts @ 131,133

0 commit comments

Comments
 (0)