Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'main' into regex-non-bmp-flags
  • Loading branch information
graphemecluster committed May 30, 2024
commit f91f83fe65f8704e291a0d60631d919812b6c6e9
120 changes: 81 additions & 39 deletions src/compiler/scanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2504,46 +2504,88 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
}
pos++;
}
const isUnterminated = !!(tokenFlags & TokenFlags.Unterminated);
const endOfBody = p - (isUnterminated ? 0 : 1);
let regExpFlags = RegularExpressionFlags.None;
while (true) {
const ch = codePointChecked(p);
if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) {
break;
}
const size = charSize(ch);
if (reportErrors) {
const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch));
if (flag === undefined) {
error(Diagnostics.Unknown_regular_expression_flag, p, size);
const endOfRegExpBody = pos;
if (tokenFlags & TokenFlags.Unterminated) {
// Search for the nearest unbalanced bracket for better recovery. Since the expression is
// invalid anyways, we take nested square brackets into consideration for the best guess.
pos = startOfRegExpBody;
inEscape = false;
let characterClassDepth = 0;
let inDecimalQuantifier = false;
let groupDepth = 0;
while (pos < endOfRegExpBody) {
const ch = charCodeUnchecked(pos);
if (inEscape) {
inEscape = false;
}
else if (ch === CharacterCodes.backslash) {
inEscape = true;
}
else if (ch === CharacterCodes.openBracket) {
characterClassDepth++;
}
else if (ch === CharacterCodes.closeBracket && characterClassDepth) {
characterClassDepth--;
}
else if (!characterClassDepth) {
if (ch === CharacterCodes.openBrace) {
inDecimalQuantifier = true;
}
else if (ch === CharacterCodes.closeBrace && inDecimalQuantifier) {
inDecimalQuantifier = false;
}
else if (!inDecimalQuantifier) {
if (ch === CharacterCodes.openParen) {
groupDepth++;
}
else if (ch === CharacterCodes.closeParen && groupDepth) {
groupDepth--;
}
else if (ch === CharacterCodes.closeParen || ch === CharacterCodes.closeBracket || ch === CharacterCodes.closeBrace) {
// We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex.
break;
}
}
}
else if (regExpFlags & flag) {
error(Diagnostics.Duplicate_regular_expression_flag, p, size);
pos++;
}
// Whitespaces and semicolons at the end are not likely to be part of the regex
while (isWhiteSpaceLike(charCodeChecked(pos - 1)) || charCodeChecked(pos - 1) === CharacterCodes.semicolon) pos--;
error(Diagnostics.Unterminated_regular_expression_literal, tokenStart, pos - tokenStart);
}
else {
// Consume the slash character
pos++;
let regExpFlags = RegularExpressionFlags.None;
while (true) {
const ch = codePointChecked(pos);
if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) {
break;
}
else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) {
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, size);
const size = charSize(ch);
if (reportErrors) {
const flag = characterToRegularExpressionFlag(utf16EncodeAsString(ch));
if (flag === undefined) {
error(Diagnostics.Unknown_regular_expression_flag, pos, size);
}
else if (regExpFlags & flag) {
error(Diagnostics.Duplicate_regular_expression_flag, pos, size);
}
else if (((regExpFlags | flag) & RegularExpressionFlags.AnyUnicodeMode) === RegularExpressionFlags.AnyUnicodeMode) {
error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, size);
}
else {
regExpFlags |= flag;
checkRegularExpressionFlagAvailability(flag, size);
}
}
else {
regExpFlags |= flag;
checkRegularExpressionFlagAvailability(flag, p, size);
}
}
p += size;
}
pos = p;
if (reportErrors) {
const saveTokenStart = tokenStart;
const saveTokenFlags = tokenFlags;
const savePos = pos;
const saveEnd = end;
pos = tokenStart + 1;
end = endOfBody;
scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true);
tokenStart = saveTokenStart;
tokenFlags = saveTokenFlags;
pos = savePos;
end = saveEnd;
pos += size;
}
if (reportErrors) {
scanRange(startOfRegExpBody, endOfRegExpBody - startOfRegExpBody, () => {
scanRegularExpressionWorker(regExpFlags, /*annexB*/ true, namedCaptureGroups);
});
}
}
tokenValue = text.substring(tokenStart, pos);
token = SyntaxKind.RegularExpressionLiteral;
Expand Down Expand Up @@ -2819,7 +2861,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
}
else {
currFlags |= flag;
checkRegularExpressionFlagAvailability(flag, pos, size);
checkRegularExpressionFlagAvailability(flag, size);
}
pos += size;
}
Expand Down Expand Up @@ -3543,7 +3585,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
});
}

function checkRegularExpressionFlagAvailability(flag: RegularExpressionFlags, pos: number, size: number) {
function checkRegularExpressionFlagAvailability(flag: RegularExpressionFlags, size: number) {
const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined;
if (availableFrom && languageVersion < availableFrom) {
error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, size, getNameOfScriptTarget(availableFrom));
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.