Merge from main

microsoft · rbuckton · May 29, 2024 · Apr 25, 2024 · Apr 25, 2024 · Apr 27, 2024
commit 603c3cf18ce89adab5c35a3275934f3ae9324a2c
diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts
@@ -2675,215 +2675,46 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                         const digitsStart = pos;
                         scanDigits();
                         const min = tokenValue;
+                        if (annexB && !min) {
+                            isPreviousTermQuantifiable = true;
+                            break;
+                        }
                         if (charCodeChecked(pos) === CharacterCodes.comma) {
                             pos++;
                             scanDigits();
-                            const min = tokenValue;
-                            if (annexB && !min) {
-                                isPreviousTermQuantifiable = true;
-                                break;
-                            }
-                            if (text.charCodeAt(pos) === CharacterCodes.comma) {
-                                pos++;
-                                scanDigits();
-                                const max = tokenValue;
-                                if (!min) {
-                                    if (max || text.charCodeAt(pos) === CharacterCodes.closeBrace) {
-                                        error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0);
-                                    }
-                                    else {
-                                        error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch));
-                                        isPreviousTermQuantifiable = true;
-                                        break;
-                                    }
+                            const max = tokenValue;
+                            if (!min) {
+                                if (max || charCodeChecked(pos) === CharacterCodes.closeBrace) {
+                                    error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0);
                                 }
-                                else if (max && Number.parseInt(min) > Number.parseInt(max) && (!annexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) {
-                                    error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart);
-                                }
-                            }
-                            else if (!min) {
-                                if (!annexB) {
+                                else {
                                     error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch));
-                                }
-                                isPreviousTermQuantifiable = true;
-                                break;
-                            }
-                            if (text.charCodeAt(pos) !== CharacterCodes.closeBrace) {
-                                if (annexB) {
                                     isPreviousTermQuantifiable = true;
                                     break;
                                 }
-                                else {
-                                    error(Diagnostics._0_expected, pos, 0, String.fromCharCode(CharacterCodes.closeBrace));
-                                    pos--;
-                                }
                             }
-                        // falls through
-                        case CharacterCodes.asterisk:
-                        case CharacterCodes.plus:
-                        case CharacterCodes.question:
-                            pos++;
-                            if (text.charCodeAt(pos) === CharacterCodes.question) {
-                                // Non-greedy
-                                pos++;
+                            else if (max && Number.parseInt(min) > Number.parseInt(max) && (!annexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) {
+                                error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart);
                             }
-                            isPreviousTermQuantifiable = true;
-                            break;
-                        case CharacterCodes.openBracket:
-                            pos++;
-                            if (unicodeSetsMode) {
-                                scanClassSetExpression();
-                            }
-                            else {
-                                scanClassRanges();
+                        }
+                        else if (!min) {
+                            if (!annexB) {
+                                error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch));
                             }
-                            scanExpectedChar(CharacterCodes.closeBracket);
                             isPreviousTermQuantifiable = true;
                             break;
-                        case CharacterCodes.closeParen:
-                            if (isInGroup) {
-                                return;
-                            }
-                        // falls through
-                        case CharacterCodes.closeBracket:
-                        case CharacterCodes.closeBrace:
-                            if (isUnterminated && !isInGroup) {
-                                // Assume what starting from the character to be outside of the regex
-                                return;
+                        }
+                        if (charCodeChecked(pos) !== CharacterCodes.closeBrace) {
+                            if (annexB) {
+                                isPreviousTermQuantifiable = true;
+                                break;
                             }
-                            if (!annexB || ch === CharacterCodes.closeParen) {
-                                error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
+                            else {
+                                error(Diagnostics._0_expected, pos, 0, String.fromCharCode(CharacterCodes.closeBrace));
+                                pos--;
                             }
-                            pos++;
-                            isPreviousTermQuantifiable = true;
-                            break;
-                        case CharacterCodes.slash:
-                        case CharacterCodes.bar:
-                            return;
-                        default:
-                            scanSourceCharacter();
-                            isPreviousTermQuantifiable = true;
-                            break;
-                    }
-                }
-            }
-
-            function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags {
-                while (pos < end) {
-                    const ch = text.charCodeAt(pos);
-                    if (!isIdentifierPart(ch, languageVersion)) {
-                        break;
-                    }
-                    const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
-                    if (flag === undefined) {
-                        error(Diagnostics.Unknown_regular_expression_flag, pos, 1);
-                    }
-                    else if (currFlags & flag) {
-                        error(Diagnostics.Duplicate_regular_expression_flag, pos, 1);
-                    }
-                    else if (!(flag & RegularExpressionFlags.Modifiers)) {
-                        error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1);
-                    }
-                    else {
-                        currFlags |= flag;
-                        const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
-                        if (languageVersion < availableFrom) {
-                            error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom));
                         }
-                    }
-                    pos++;
-                }
-                return currFlags;
-            }
-
-            // AtomEscape ::=
-            //     | DecimalEscape
-            //     | CharacterClassEscape
-            //     | CharacterEscape
-            //     | 'k<' RegExpIdentifierName '>'
-            function scanAtomEscape() {
-                Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash);
-                switch (text.charCodeAt(pos)) {
-                    case CharacterCodes.k:
-                        pos++;
-                        if (text.charCodeAt(pos) === CharacterCodes.lessThan) {
-                            pos++;
-                            scanGroupName(/*isReference*/ true);
-                            scanExpectedChar(CharacterCodes.greaterThan);
-                        }
-                        else {
-                            // This is actually allowed in Annex B if there are no named capturing groups in the regex,
-                            // but if we were going to suppress these errors, we would have to record the positions of all '\k's
-                            // and defer the errors until after the scanning to know if the regex has any named capturing groups.
-                            error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2);
-                        }
-                        break;
-                    case CharacterCodes.q:
-                        if (unicodeSetsMode) {
-                            pos++;
-                            error(Diagnostics.q_is_only_available_inside_character_class, pos - 2, 2);
-                            break;
-                        }
-                        scanExpectedChar(CharacterCodes.closeBrace);
-                        pos--;
                     // falls through
-                    default:
-                        // The scanEscapeSequence call in scanCharacterEscape must return non-empty strings
-                        // since there must not be line breaks in a regex literal
-                        Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape(/*atomEscape*/ true));
-                        break;
-                }
-            }
-
-            // DecimalEscape ::= [1-9] [0-9]*
-            function scanDecimalEscape(): boolean {
-                Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash);
-                const ch = text.charCodeAt(pos);
-                if (ch >= CharacterCodes._1 && ch <= CharacterCodes._9) {
-                    const start = pos;
-                    scanDigits();
-                    decimalEscapes.push({ pos: start, end: pos, value: +tokenValue });
-                    return true;
-                }
-                return false;
-            }
-
-            // CharacterEscape ::=
-            //     | `c` ControlLetter
-            //     | IdentityEscape
-            //     | (Other sequences handled by `scanEscapeSequence`)
-            // IdentityEscape ::=
-            //     | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|'
-            //     | [~UnicodeMode] (any other non-identifier characters)
-            function scanCharacterEscape(atomEscape: boolean): string {
-                Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash);
-                let ch = text.charCodeAt(pos);
-                switch (ch) {
-                    case CharacterCodes.c:
-                        pos++;
-                        ch = text.charCodeAt(pos);
-                        if (isASCIILetter(ch)) {
-                            pos++;
-                            return String.fromCharCode(ch & 0x1f);
-                        }
-                        if (!annexB) {
-                            error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2);
-                        }
-                        else if (atomEscape) {
-                            // Annex B treats
-                            //
-                            //  ExtendedAtom : `\` [lookahead = `c`]
-                            //
-                            // as the single character `\` when `c` isn't followed by a valid control character
-                            pos--;
-                            return "\\";
-                        }
-                        return String.fromCharCode(ch);
-                    case CharacterCodes.caret:
-                    case CharacterCodes.$:
-                    case CharacterCodes.slash:
-                    case CharacterCodes.backslash:
-                    case CharacterCodes.dot:
                     case CharacterCodes.asterisk:
                     case CharacterCodes.plus:
                     case CharacterCodes.question:
@@ -2923,7 +2754,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                             // Assume what starting from the character to be outside of the regex
                             return;
                         }
-                        if (unicodeMode || ch === CharacterCodes.closeParen) {
+                        if (!annexB || ch === CharacterCodes.closeParen) {
                             error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
                         }
                         pos++;
@@ -2980,7 +2811,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                         scanGroupName(/*isReference*/ true);
                         scanExpectedChar(CharacterCodes.greaterThan);
                     }
-                    else if (unicodeMode) {
+                    else {
+                        // This is actually allowed in Annex B if there are no named capturing groups in the regex,
+                        // but if we were going to suppress these errors, we would have to record the positions of all '\k's
+                        // and defer the errors until after the scanning to know if the regex has any named capturing groups.
                         error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2);
                     }
                     break;
@@ -3030,10 +2864,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                         pos++;
                         return String.fromCharCode(ch & 0x1f);
                     }
-                    if (unicodeMode) {
+                    if (!annexB) {
                         error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2);
                     }
-                    else if (atomEscape && annexB) {
+                    else if (atomEscape) {
                         // Annex B treats
                         //
                         //  ExtendedAtom : `\` [lookahead = `c`]
@@ -3588,15 +3422,39 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                                 }
                             }
                         }
-                        else if (annexB) {
-                            pos--;
-                            return false;
-                        }
                         else {
-                            error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch));
+                            if (pos === propertyNameOrValueStart) {
+                                error(Diagnostics.Expected_a_Unicode_property_name_or_value);
+                            }
+                            else if (binaryUnicodePropertiesOfStrings.has(propertyNameOrValue)) {
+                                if (!unicodeSetsMode) {
+                                    error(Diagnostics.Any_Unicode_property_that_would_possibly_match_more_than_a_single_character_is_only_available_when_the_Unicode_Sets_v_flag_is_set, propertyNameOrValueStart, pos - propertyNameOrValueStart);
+                                }
+                                else if (isCharacterComplement) {
+                                    error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, propertyNameOrValueStart, pos - propertyNameOrValueStart);
+                                }
+                                else {
+                                    mayContainStrings = true;
+                                }
+                            }
+                            else if (!valuesOfNonBinaryUnicodeProperties.General_Category.has(propertyNameOrValue) && !binaryUnicodeProperties.has(propertyNameOrValue)) {
+                                error(Diagnostics.Unknown_Unicode_property_name_or_value, propertyNameOrValueStart, pos - propertyNameOrValueStart);
+                                const suggestion = getSpellingSuggestion(propertyNameOrValue, [...valuesOfNonBinaryUnicodeProperties.General_Category, ...binaryUnicodeProperties, ...binaryUnicodePropertiesOfStrings], identity);
+                                if (suggestion) {
+                                    error(Diagnostics.Did_you_mean_0, propertyNameOrValueStart, pos - propertyNameOrValueStart, suggestion);
+                                }
+                            }
+                        }
+                        scanExpectedChar(CharacterCodes.closeBrace);
+                        if (!unicodeMode) {
+                            error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start);
                         }
                     }
-                    else if (unicodeMode) {
+                    else if (annexB) {
+                        pos--;
+                        return false;
+                    }
+                    else {
                         error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch));
                     }
                     return true;