@@ -76,6 +76,8 @@ export interface Scanner {
7676 getTokenFlags ( ) : TokenFlags ;
7777 reScanGreaterToken ( ) : SyntaxKind ;
7878 reScanSlashToken ( ) : SyntaxKind ;
79+ /** @internal */
80+ reScanSlashToken ( reportErrors ?: boolean ) : SyntaxKind ; // eslint-disable-line @typescript-eslint/unified-signatures
7981 reScanAsteriskEqualsToken ( ) : SyntaxKind ;
8082 reScanTemplateToken ( isTaggedTemplate : boolean ) : SyntaxKind ;
8183 /** @deprecated use {@link reScanTemplateToken}(false) */
@@ -1484,7 +1486,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
14841486 // | [0-3] [0-7] [0-7]?
14851487 // | [4-7] [0-7]
14861488 // NonOctalDecimalEscapeSequence ::= [89]
1487- function scanEscapeSequence ( shouldEmitInvalidEscapeError : boolean , isRegularExpression : boolean ) : string {
1489+ function scanEscapeSequence ( shouldEmitInvalidEscapeError : boolean , isRegularExpression : boolean | "annex-b" ) : string {
14881490 const start = pos ;
14891491 pos ++ ;
14901492 if ( pos >= end ) {
@@ -1523,7 +1525,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
15231525 tokenFlags |= TokenFlags . ContainsInvalidEscape ;
15241526 if ( isRegularExpression || shouldEmitInvalidEscapeError ) {
15251527 const code = parseInt ( text . substring ( start + 1 , pos ) , 8 ) ;
1526- error ( Diagnostics . Octal_escape_sequences_are_not_allowed_Use_the_syntax_0 , start , pos - start , "\\x" + code . toString ( 16 ) . padStart ( 2 , "0" ) ) ;
1528+ if ( isRegularExpression !== "annex-b" ) {
1529+ error ( Diagnostics . Octal_escape_sequences_are_not_allowed_Use_the_syntax_0 , start , pos - start , "\\x" + code . toString ( 16 ) . padStart ( 2 , "0" ) ) ;
1530+ }
15271531 return String . fromCharCode ( code ) ;
15281532 }
15291533 return text . substring ( start , pos ) ;
@@ -1559,7 +1563,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
15591563 ) {
15601564 // '\u{DDDDDD}'
15611565 pos -= 2 ;
1562- return scanExtendedUnicodeEscape ( isRegularExpression || shouldEmitInvalidEscapeError ) ;
1566+ return scanExtendedUnicodeEscape ( ! ! isRegularExpression || shouldEmitInvalidEscapeError ) ;
15631567 }
15641568 // '\uDDDD'
15651569 for ( ; pos < start + 6 ; pos ++ ) {
@@ -1623,7 +1627,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
16231627 case CharacterCodes . paragraphSeparator :
16241628 return "" ;
16251629 default :
1626- if ( isRegularExpression && ( shouldEmitInvalidEscapeError || isIdentifierPart ( ch , languageVersion ) ) ) {
1630+ if ( isRegularExpression === true && ( shouldEmitInvalidEscapeError || isIdentifierPart ( ch , languageVersion ) ) ) {
16271631 error ( Diagnostics . This_character_cannot_be_escaped_in_a_regular_expression , pos - 2 , 2 ) ;
16281632 }
16291633 return String . fromCharCode ( ch ) ;
@@ -2386,7 +2390,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
23862390 return token = SyntaxKind . EqualsToken ;
23872391 }
23882392
2389- function reScanSlashToken ( ) : SyntaxKind {
2393+ function reScanSlashToken ( reportErrors ?: boolean ) : SyntaxKind {
23902394 if ( token === SyntaxKind . SlashToken || token === SyntaxKind . SlashEqualsToken ) {
23912395 // Quickly get to the end of regex such that we know the flags
23922396 let p = tokenStart + 1 ;
@@ -2444,44 +2448,57 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24442448 if ( ! isIdentifierPart ( ch , languageVersion ) ) {
24452449 break ;
24462450 }
2447- const flag = characterToRegularExpressionFlag ( String . fromCharCode ( ch ) ) ;
2448- if ( flag === undefined ) {
2449- error ( Diagnostics . Unknown_regular_expression_flag , p , 1 ) ;
2450- }
2451- else if ( regExpFlags & flag ) {
2452- error ( Diagnostics . Duplicate_regular_expression_flag , p , 1 ) ;
2453- }
2454- else if ( ( ( regExpFlags | flag ) & RegularExpressionFlags . UnicodeMode ) === RegularExpressionFlags . UnicodeMode ) {
2455- error ( Diagnostics . The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously , p , 1 ) ;
2456- }
2457- else {
2458- regExpFlags |= flag ;
2459- const availableFrom = regExpFlagToFirstAvailableLanguageVersion . get ( flag ) ! ;
2460- if ( languageVersion < availableFrom ) {
2461- error ( Diagnostics . This_regular_expression_flag_is_only_available_when_targeting_0_or_later , p , 1 , getNameOfScriptTarget ( availableFrom ) ) ;
2451+ if ( reportErrors ) {
2452+ const flag = characterToRegularExpressionFlag ( String . fromCharCode ( ch ) ) ;
2453+ if ( flag === undefined ) {
2454+ error ( Diagnostics . Unknown_regular_expression_flag , p , 1 ) ;
2455+ }
2456+ else if ( regExpFlags & flag ) {
2457+ error ( Diagnostics . Duplicate_regular_expression_flag , p , 1 ) ;
2458+ }
2459+ else if ( ( ( regExpFlags | flag ) & RegularExpressionFlags . UnicodeMode ) === RegularExpressionFlags . UnicodeMode ) {
2460+ error ( Diagnostics . The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously , p , 1 ) ;
2461+ }
2462+ else {
2463+ regExpFlags |= flag ;
2464+ const availableFrom = regExpFlagToFirstAvailableLanguageVersion . get ( flag ) ! ;
2465+ if ( languageVersion < availableFrom ) {
2466+ error ( Diagnostics . This_regular_expression_flag_is_only_available_when_targeting_0_or_later , p , 1 , getNameOfScriptTarget ( availableFrom ) ) ;
2467+ }
24622468 }
24632469 }
24642470 p ++ ;
24652471 }
2466- pos = tokenStart + 1 ;
2467- const saveTokenPos = tokenStart ;
2468- const saveTokenFlags = tokenFlags ;
2469- scanRegularExpressionWorker ( text , endOfBody , regExpFlags , isUnterminated ) ;
2470- if ( ! isUnterminated ) {
2472+ if ( reportErrors ) {
2473+ pos = tokenStart + 1 ;
2474+ const saveTokenPos = tokenStart ;
2475+ const saveTokenFlags = tokenFlags ;
2476+ scanRegularExpressionWorker ( text , endOfBody , regExpFlags , isUnterminated , /*annexB*/ true ) ;
2477+ if ( ! isUnterminated ) {
2478+ pos = p ;
2479+ }
2480+ tokenStart = saveTokenPos ;
2481+ tokenFlags = saveTokenFlags ;
2482+ }
2483+ else {
24712484 pos = p ;
24722485 }
2473- tokenStart = saveTokenPos ;
2474- tokenFlags = saveTokenFlags ;
24752486 tokenValue = text . substring ( tokenStart , pos ) ;
24762487 token = SyntaxKind . RegularExpressionLiteral ;
24772488 }
24782489 return token ;
24792490
2480- function scanRegularExpressionWorker ( text : string , end : number , regExpFlags : RegularExpressionFlags , isUnterminated : boolean ) {
2481- /** Grammar parameter */
2482- const unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
2491+ function scanRegularExpressionWorker ( text : string , end : number , regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean ) {
24832492 /** Grammar parameter */
24842493 const unicodeSetsMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeSets ) ;
2494+ /** Grammar parameter */
2495+ const unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
2496+
2497+ if ( unicodeMode ) {
2498+ // Annex B treats any unicode mode as the strict syntax.
2499+ annexB = false ;
2500+ }
2501+
24852502 /** @see {scanClassSetExpression} */
24862503 let mayContainStrings = false ;
24872504
@@ -2571,7 +2588,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25712588 case CharacterCodes . equals :
25722589 case CharacterCodes . exclamation :
25732590 pos ++ ;
2574- isPreviousTermQuantifiable = false ;
2591+ // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
2592+ isPreviousTermQuantifiable = annexB ;
25752593 break ;
25762594 case CharacterCodes . lessThan :
25772595 const groupNameStart = pos ;
@@ -2763,7 +2781,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27632781 default :
27642782 // The scanEscapeSequence call in scanCharacterEscape must return non-empty strings
27652783 // since there must not be line breaks in a regex literal
2766- Debug . assert ( scanCharacterClassEscape ( ) || scanDecimalEscape ( ) || scanCharacterEscape ( ) ) ;
2784+ Debug . assert ( scanCharacterClassEscape ( ) || scanDecimalEscape ( ) || scanCharacterEscape ( /*atomEscape*/ true ) ) ;
27672785 break ;
27682786 }
27692787 }
@@ -2788,7 +2806,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27882806 // IdentityEscape ::=
27892807 // | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|'
27902808 // | [~UnicodeMode] (any other non-identifier characters)
2791- function scanCharacterEscape ( ) : string {
2809+ function scanCharacterEscape ( atomEscape : boolean ) : string {
27922810 Debug . assertEqual ( text . charCodeAt ( pos - 1 ) , CharacterCodes . backslash ) ;
27932811 let ch = text . charCodeAt ( pos ) ;
27942812 switch ( ch ) {
@@ -2802,6 +2820,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28022820 if ( unicodeMode ) {
28032821 error ( Diagnostics . c_must_be_followed_by_an_ASCII_letter , pos - 2 , 2 ) ;
28042822 }
2823+ else if ( atomEscape && annexB ) {
2824+ // Annex B treats
2825+ //
2826+ // ExtendedAtom : `\` [lookahead = `c`]
2827+ //
2828+ // as the single character `\` when `c` isn't followed by a valid control character
2829+ pos -- ;
2830+ return "\\" ;
2831+ }
28052832 return String . fromCharCode ( ch ) ;
28062833 case CharacterCodes . caret :
28072834 case CharacterCodes . $ :
@@ -2826,7 +2853,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28262853 return "\\" ;
28272854 }
28282855 pos -- ;
2829- return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ true ) ;
2856+ return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
28302857 }
28312858 }
28322859
@@ -2873,12 +2900,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28732900 if ( isClassContentExit ( ch ) ) {
28742901 return ;
28752902 }
2876- if ( ! minCharacter ) {
2903+ if ( ! minCharacter && ! annexB ) {
28772904 error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , minStart , pos - 1 - minStart ) ;
28782905 }
28792906 const maxStart = pos ;
28802907 const maxCharacter = scanClassAtom ( ) ;
2881- if ( ! maxCharacter ) {
2908+ if ( ! maxCharacter && ! annexB ) {
28822909 error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , maxStart , pos - maxStart ) ;
28832910 continue ;
28842911 }
@@ -3208,7 +3235,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
32083235 pos ++ ;
32093236 return String . fromCharCode ( ch ) ;
32103237 default :
3211- return scanCharacterEscape ( ) ;
3238+ return scanCharacterEscape ( /*atomEscape*/ false ) ;
32123239 }
32133240 }
32143241 else if ( ch === text . charCodeAt ( pos + 1 ) ) {
@@ -3275,7 +3302,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
32753302 if ( scanCharacterClassEscape ( ) ) {
32763303 return "" ;
32773304 }
3278- return scanCharacterEscape ( ) ;
3305+ return scanCharacterEscape ( /*atomEscape*/ false ) ;
32793306 }
32803307 }
32813308 else {
@@ -3407,7 +3434,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
34073434 }
34083435 } ) ;
34093436 forEach ( decimalEscapes , escape => {
3410- if ( escape . value > numberOfCapturingGroups ) {
3437+ // in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
3438+ // either a LegacyOctalEscapeSequence or IdentityEscape
3439+ if ( ! annexB && escape . value > numberOfCapturingGroups ) {
34113440 if ( numberOfCapturingGroups ) {
34123441 error ( Diagnostics . A_decimal_escape_must_refer_to_an_existent_capturing_group_There_are_only_0_capturing_groups_in_this_regular_expression , escape . pos , escape . end - escape . pos , numberOfCapturingGroups ) ;
34133442 }
0 commit comments