@@ -2426,6 +2426,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24262426 // Quickly get to the end of regex such that we know the flags
24272427 let p = tokenStart + 1 ;
24282428 let inEscape = false ;
2429+ let namedCaptureGroups = false ;
24292430 // Although nested character classes are allowed in Unicode Sets mode,
24302431 // an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
24312432 // Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
@@ -2469,6 +2470,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24692470 else if ( ch === CharacterCodes . closeBracket ) {
24702471 inCharacterClass = false ;
24712472 }
2473+ else if (
2474+ ch === CharacterCodes . openParen
2475+ && charCodeUnchecked ( p + 1 ) === CharacterCodes . question
2476+ && charCodeUnchecked ( p + 2 ) === CharacterCodes . lessThan
2477+ && charCodeUnchecked ( p + 3 ) !== CharacterCodes . equals
2478+ && charCodeUnchecked ( p + 3 ) !== CharacterCodes . exclamation
2479+ ) {
2480+ namedCaptureGroups = true ;
2481+ }
24722482 p ++ ;
24732483 }
24742484 const isUnterminated = ! ! ( tokenFlags & TokenFlags . Unterminated ) ;
@@ -2505,7 +2515,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25052515 const saveEnd = end ;
25062516 pos = tokenStart + 1 ;
25072517 end = endOfBody ;
2508- scanRegularExpressionWorker ( regExpFlags , isUnterminated , /*annexB*/ true ) ;
2518+ scanRegularExpressionWorker ( regExpFlags , isUnterminated , /*annexB*/ true , namedCaptureGroups ) ;
25092519 tokenStart = saveTokenStart ;
25102520 tokenFlags = saveTokenFlags ;
25112521 pos = savePos ;
@@ -2517,7 +2527,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25172527 return token ;
25182528 }
25192529
2520- function scanRegularExpressionWorker ( regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean ) {
2530+ function scanRegularExpressionWorker ( regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean , namedCaptureGroups : boolean ) {
25212531 // Why var? It avoids TDZ checks in the runtime which can be costly.
25222532 // See: https://github.com/microsoft/TypeScript/issues/52924
25232533 /* eslint-disable no-var */
@@ -2527,10 +2537,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25272537 /** Grammar parameter */
25282538 var unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
25292539
2530- if ( unicodeMode ) {
2531- // Annex B treats any unicode mode as the strict syntax.
2532- annexB = false ;
2533- }
2540+ // Annex B treats any unicode mode as the strict syntax.
2541+ var anyUnicodeModeOrNonAnnexB = unicodeMode || ! annexB ;
25342542
25352543 /** @see {scanClassSetExpression} */
25362544 var mayContainStrings = false ;
@@ -2626,7 +2634,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26262634 case CharacterCodes . exclamation :
26272635 pos ++ ;
26282636 // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
2629- isPreviousTermQuantifiable = annexB ;
2637+ isPreviousTermQuantifiable = ! anyUnicodeModeOrNonAnnexB ;
26302638 break ;
26312639 case CharacterCodes . lessThan :
26322640 const groupNameStart = pos ;
@@ -2675,7 +2683,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26752683 const digitsStart = pos ;
26762684 scanDigits ( ) ;
26772685 const min = tokenValue ;
2678- if ( annexB && ! min ) {
2686+ if ( ! anyUnicodeModeOrNonAnnexB && ! min ) {
26792687 isPreviousTermQuantifiable = true ;
26802688 break ;
26812689 }
@@ -2693,26 +2701,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26932701 break ;
26942702 }
26952703 }
2696- else if ( max && Number . parseInt ( min ) > Number . parseInt ( max ) && ( ! annexB || text . charCodeAt ( pos ) === CharacterCodes . closeBrace ) ) {
2704+ else if ( max && Number . parseInt ( min ) > Number . parseInt ( max ) && ( anyUnicodeModeOrNonAnnexB || text . charCodeAt ( pos ) === CharacterCodes . closeBrace ) ) {
26972705 error ( Diagnostics . Numbers_out_of_order_in_quantifier , digitsStart , pos - digitsStart ) ;
26982706 }
26992707 }
27002708 else if ( ! min ) {
2701- if ( ! annexB ) {
2709+ if ( anyUnicodeModeOrNonAnnexB ) {
27022710 error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , start , 1 , String . fromCharCode ( ch ) ) ;
27032711 }
27042712 isPreviousTermQuantifiable = true ;
27052713 break ;
27062714 }
27072715 if ( charCodeChecked ( pos ) !== CharacterCodes . closeBrace ) {
2708- if ( annexB ) {
2709- isPreviousTermQuantifiable = true ;
2710- break ;
2711- }
2712- else {
2716+ if ( anyUnicodeModeOrNonAnnexB ) {
27132717 error ( Diagnostics . _0_expected , pos , 0 , String . fromCharCode ( CharacterCodes . closeBrace ) ) ;
27142718 pos -- ;
27152719 }
2720+ else {
2721+ isPreviousTermQuantifiable = true ;
2722+ break ;
2723+ }
27162724 }
27172725 // falls through
27182726 case CharacterCodes . asterisk :
@@ -2754,7 +2762,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27542762 // Assume what starting from the character to be outside of the regex
27552763 return ;
27562764 }
2757- if ( ! annexB || ch === CharacterCodes . closeParen ) {
2765+ if ( anyUnicodeModeOrNonAnnexB || ch === CharacterCodes . closeParen ) {
27582766 error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , pos , 1 , String . fromCharCode ( ch ) ) ;
27592767 }
27602768 pos ++ ;
@@ -2811,10 +2819,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28112819 scanGroupName ( /*isReference*/ true ) ;
28122820 scanExpectedChar ( CharacterCodes . greaterThan ) ;
28132821 }
2814- else {
2815- // This is actually allowed in Annex B if there are no named capturing groups in the regex,
2816- // but if we were going to suppress these errors, we would have to record the positions of all '\k's
2817- // and defer the errors until after the scanning to know if the regex has any named capturing groups.
2822+ else if ( namedCaptureGroups ) {
28182823 error ( Diagnostics . k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets , pos - 2 , 2 ) ;
28192824 }
28202825 break ;
@@ -2864,7 +2869,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28642869 pos ++ ;
28652870 return String . fromCharCode ( ch & 0x1f ) ;
28662871 }
2867- if ( ! annexB ) {
2872+ if ( anyUnicodeModeOrNonAnnexB ) {
28682873 error ( Diagnostics . c_must_be_followed_by_an_ASCII_letter , pos - 2 , 2 ) ;
28692874 }
28702875 else if ( atomEscape ) {
@@ -2900,7 +2905,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29002905 return "\\" ;
29012906 }
29022907 pos -- ;
2903- return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
2908+ return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ ! anyUnicodeModeOrNonAnnexB || "annex-b" ) ;
29042909 }
29052910 }
29062911
@@ -2949,12 +2954,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29492954 if ( isClassContentExit ( ch ) ) {
29502955 return ;
29512956 }
2952- if ( ! minCharacter && ! annexB ) {
2957+ if ( ! minCharacter && anyUnicodeModeOrNonAnnexB ) {
29532958 error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , minStart , pos - 1 - minStart ) ;
29542959 }
29552960 const maxStart = pos ;
29562961 const maxCharacter = scanClassAtom ( ) ;
2957- if ( ! maxCharacter && ! annexB ) {
2962+ if ( ! maxCharacter && anyUnicodeModeOrNonAnnexB ) {
29582963 error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , maxStart , pos - maxStart ) ;
29592964 continue ;
29602965 }
@@ -3450,12 +3455,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
34503455 error ( Diagnostics . Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set , start , pos - start ) ;
34513456 }
34523457 }
3453- else if ( annexB ) {
3454- pos -- ;
3455- return false ;
3458+ else if ( anyUnicodeModeOrNonAnnexB ) {
3459+ error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
34563460 }
34573461 else {
3458- error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
3462+ pos -- ;
3463+ return false ;
34593464 }
34603465 return true ;
34613466 }
@@ -3500,7 +3505,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
35003505 forEach ( decimalEscapes , escape => {
35013506 // in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
35023507 // either a LegacyOctalEscapeSequence or IdentityEscape
3503- if ( ! annexB && escape . value > numberOfCapturingGroups ) {
3508+ if ( anyUnicodeModeOrNonAnnexB && escape . value > numberOfCapturingGroups ) {
35043509 if ( numberOfCapturingGroups ) {
35053510 error ( Diagnostics . This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression , escape . pos , escape . end - escape . pos , numberOfCapturingGroups ) ;
35063511 }
0 commit comments