Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
c44a057
Type Inference for Regular Expressions
graphemecluster Oct 17, 2024
c435526
Fix Incorrect Disjunction Alternative Visibility
graphemecluster Nov 4, 2024
24c16f9
Add Test Cases for Duplicate Capturing Group Name
graphemecluster Nov 4, 2024
13cfe15
Performance optimization: Reduce type creation
graphemecluster Nov 6, 2024
6aeac05
Fix incorrect type for `/{0?/` etc. in non-Unicode mode
graphemecluster Nov 7, 2024
b828249
Fix: `i` modifier is not removed if no flags are added
graphemecluster Nov 7, 2024
bd74623
Remove more redundant literals esp. `"" | string`
graphemecluster Nov 7, 2024
a6e0b98
Fix: Incorrect characters included for `/\c/` & `/[\c]/` in Annex B
graphemecluster Nov 8, 2024
a31ddcb
Refactor: Rename variables and modify type of reduced union
graphemecluster Nov 8, 2024
42a6568
Refactor: Fast path character classes
graphemecluster Nov 9, 2024
9ce54aa
Refine types for cases where the cross product size is too large
graphemecluster Nov 9, 2024
bb0e808
Refactor: Type `RegularExpressionAnyString` as a unique symbol for be…
graphemecluster Nov 9, 2024
524f291
Expand test cases in `regularExpressionLiteralTypes.ts`
graphemecluster Nov 9, 2024
48f86ff
Fix: missing `"-"` in `/[+-]/`
graphemecluster Nov 9, 2024
3ddc5da
Mark `RegExpDigits` as character class (for fast path)
graphemecluster Nov 9, 2024
9dc953c
Correct lib types & Refine type checking test case
graphemecluster Nov 10, 2024
0cf763d
Separate type checking test case into 2 files which tests for differe…
graphemecluster Nov 10, 2024
53425e5
Add test case for `String#matchAll`
graphemecluster Nov 10, 2024
12e875b
Collapse consecutive string types in template literals
graphemecluster Nov 10, 2024
5dfdb56
Merge remote-tracking branch 'upstream/main' into regex-type-infer
graphemecluster Nov 10, 2024
09ad9c3
Fix up all baselines
graphemecluster Nov 10, 2024
61fc428
Fix up all self check errors
graphemecluster Nov 10, 2024
402760c
Separate the type checking test case further
graphemecluster Nov 10, 2024
99355c0
Temporarily exclude `RegExp` & `RegExpExecArray` from `checkNoTypeArg…
graphemecluster Nov 10, 2024
80dd5d0
Revert "Temporarily exclude `RegExp` & `RegExpExecArray` from `checkN…
graphemecluster Nov 10, 2024
b251992
Fix self check in a more ugly way due to build issue
graphemecluster Nov 10, 2024
5bd7951
Temporarily slience a few `no-unnecessary-type-assertion` lint lines …
graphemecluster Nov 10, 2024
037ef8e
Fix minor incorrect fix in self check
graphemecluster Nov 10, 2024
e51e144
Temporarily shrink type checking test case due to timeout
graphemecluster Nov 10, 2024
2989429
Inline `RegularExpressionAnyString` to avoid `export` keywords in `ty…
graphemecluster Nov 12, 2024
8cc6840
Keep `RegularExpressionAnyString` but export as internal
graphemecluster Nov 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Next Next commit
Type Inference for Regular Expressions
  • Loading branch information
graphemecluster committed Oct 17, 2024
commit c44a0572d7c767d4c4cf279224f5b61be6129a0e
144 changes: 114 additions & 30 deletions src/compiler/checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,10 @@ import {
rangeOfTypeParameters,
ReadonlyKeyword,
reduceLeft,
RegExpAnyString,
RegularExpressionFlags,
RegularExpressionLiteral,
RegularExpressionPatternUnion,
RelationComparisonResult,
relativeComplement,
removeExtension,
Expand Down Expand Up @@ -1304,6 +1307,17 @@ const typeofNEFacts: ReadonlyMap<string, TypeFacts> = new Map(Object.entries({
function: TypeFacts.TypeofNEFunction,
}));

const regExpFlagToPropertyName: ReadonlyMap<RegularExpressionFlags, __String> = new Map([
[RegularExpressionFlags.HasIndices, "hasIndices" as __String],
[RegularExpressionFlags.Global, "global" as __String],
[RegularExpressionFlags.IgnoreCase, "ignoreCase" as __String],
[RegularExpressionFlags.Multiline, "multiline" as __String],
[RegularExpressionFlags.DotAll, "dotAll" as __String],
[RegularExpressionFlags.Unicode, "unicode" as __String],
[RegularExpressionFlags.UnicodeSets, "unicodeSets" as __String],
[RegularExpressionFlags.Sticky, "sticky" as __String],
]);

type TypeSystemEntity = Node | Symbol | Type | Signature;

const enum TypeSystemPropertyName {
Expand Down Expand Up @@ -2087,7 +2101,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
var stringNumberSymbolType = getUnionType([stringType, numberType, esSymbolType]);
var numberOrBigIntType = getUnionType([numberType, bigintType]);
var templateConstraintType = getUnionType([stringType, numberType, booleanType, bigintType, nullType, undefinedType]) as UnionType;
var numericStringType = getTemplateLiteralType(["", ""], [numberType]); // The `${number}` type
var numericStringType = getTemplateLiteralType(/*texts*/ undefined, [numberType]); // The `${number}` type

var restrictiveMapper: TypeMapper = makeFunctionTypeMapper(t => t.flags & TypeFlags.TypeParameter ? getRestrictiveTypeParameter(t as TypeParameter) : t, () => "(restrictive mapper)");
var permissiveMapper: TypeMapper = makeFunctionTypeMapper(t => t.flags & TypeFlags.TypeParameter ? wildcardType : t, () => "(permissive mapper)");
Expand Down Expand Up @@ -2230,7 +2244,6 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
var globalStringType: ObjectType;
var globalNumberType: ObjectType;
var globalBooleanType: ObjectType;
var globalRegExpType: ObjectType;
var globalThisType: GenericType;
var anyArrayType: Type;
var autoArrayType: Type;
Expand Down Expand Up @@ -2269,6 +2282,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
var deferredGlobalImportAttributesType: ObjectType | undefined;
var deferredGlobalDisposableType: ObjectType | undefined;
var deferredGlobalAsyncDisposableType: ObjectType | undefined;
var deferredGlobalRegExpSymbol: Symbol | undefined;
var deferredGlobalExtractSymbol: Symbol | undefined;
var deferredGlobalOmitSymbol: Symbol | undefined;
var deferredGlobalAwaitedSymbol: Symbol | undefined;
Expand Down Expand Up @@ -16914,6 +16928,12 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
return symbol && getTypeOfGlobalSymbol(symbol, arity) as GenericType;
}

function getGlobalRegExpSymbol(): Symbol | undefined {
// We always report an error, so cache a result in the event we could not resolve the symbol to prevent reporting it multiple times
deferredGlobalRegExpSymbol ||= getGlobalTypeAliasSymbol("RegExp" as __String, /*arity*/ 3, /*reportErrors*/ true) || unknownSymbol;
return deferredGlobalRegExpSymbol === unknownSymbol ? undefined : deferredGlobalRegExpSymbol;
}

function getGlobalExtractSymbol(): Symbol | undefined {
// We always report an error, so cache a result in the event we could not resolve the symbol to prevent reporting it multiple times
deferredGlobalExtractSymbol ||= getGlobalTypeAliasSymbol("Extract" as __String, /*arity*/ 2, /*reportErrors*/ true) || unknownSymbol;
Expand Down Expand Up @@ -18072,8 +18092,11 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
return reduceLeft(types, (n, t) => t.flags & TypeFlags.Union ? n * (t as UnionType).types.length : t.flags & TypeFlags.Never ? 0 : n, 1);
}

function checkCrossProductUnion(types: readonly Type[]) {
function checkCrossProductUnion(types: readonly Type[], isRegularExpression?: boolean) {
const size = getCrossProductUnionSize(types);
if (isRegularExpression) {
return size < 10000;
}
if (size >= 100000) {
tracing?.instant(tracing.Phase.CheckTypes, "checkCrossProductUnion_DepthLimit", { typeIds: types.map(t => t.id), size });
error(currentNode, Diagnostics.Expression_produces_a_union_type_that_is_too_complex_to_represent);
Expand Down Expand Up @@ -18329,19 +18352,19 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
return links.resolvedType;
}

function getTemplateLiteralType(texts: readonly string[], types: readonly Type[]): Type {
function getTemplateLiteralType(texts: readonly string[] | undefined, types: readonly Type[], isRegularExpression?: boolean): Type {
const unionIndex = findIndex(types, t => !!(t.flags & (TypeFlags.Never | TypeFlags.Union)));
if (unionIndex >= 0) {
return checkCrossProductUnion(types) ?
mapType(types[unionIndex], t => getTemplateLiteralType(texts, replaceElement(types, unionIndex, t))) :
errorType;
return checkCrossProductUnion(types, isRegularExpression) ?
mapType(types[unionIndex], t => getTemplateLiteralType(texts, replaceElement(types, unionIndex, t), isRegularExpression)) :
isRegularExpression ? stringType : errorType;
}
if (contains(types, wildcardType)) {
return wildcardType;
}
const newTypes: Type[] = [];
const newTexts: string[] = [];
let text = texts[0];
let text = texts ? texts[0] : "";
if (!addSpans(texts, types)) {
return stringType;
}
Expand All @@ -18365,22 +18388,27 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
}
return type;

function addSpans(texts: readonly string[], types: readonly Type[]): boolean {
function addSpans(texts: readonly string[] | undefined, types: readonly Type[]): boolean {
for (let i = 0; i < types.length; i++) {
const t = types[i];
if (t.flags & (TypeFlags.Literal | TypeFlags.Null | TypeFlags.Undefined)) {
text += getTemplateStringForType(t) || "";
text += texts[i + 1];
if (texts) text += texts[i + 1];
}
else if (t.flags & TypeFlags.TemplateLiteral) {
text += (t as TemplateLiteralType).texts[0];
if (!addSpans((t as TemplateLiteralType).texts, (t as TemplateLiteralType).types)) return false;
text += texts[i + 1];
if (texts) text += texts[i + 1];
}
else if (isGenericIndexType(t) || isPatternLiteralPlaceholderType(t)) {
newTypes.push(t);
newTexts.push(text);
text = texts[i + 1];
if (!text && lastOrUndefined(newTypes) === stringType && t === stringType) {
// Quickly collapse consecutive `${string}${string}` for calls from regular expressions
}
else {
newTypes.push(t);
newTexts.push(text);
}
text = texts ? texts[i + 1] : "";
}
else {
return false;
Expand Down Expand Up @@ -18413,7 +18441,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
type.flags & TypeFlags.StringMapping && symbol === type.symbol ? type :
type.flags & (TypeFlags.Any | TypeFlags.String | TypeFlags.StringMapping) || isGenericIndexType(type) ? getStringMappingTypeForGenericType(symbol, type) :
// This handles Mapping<`${number}`> and Mapping<`${bigint}`>
isPatternLiteralPlaceholderType(type) ? getStringMappingTypeForGenericType(symbol, getTemplateLiteralType(["", ""], [type])) :
isPatternLiteralPlaceholderType(type) ? getStringMappingTypeForGenericType(symbol, getTemplateLiteralType(/*texts*/ undefined, [type])) :
type;
}

Expand Down Expand Up @@ -25897,7 +25925,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
}

function getStringLikeTypeForType(type: Type) {
return type.flags & (TypeFlags.Any | TypeFlags.StringLike) ? type : getTemplateLiteralType(["", ""], [type]);
return type.flags & (TypeFlags.Any | TypeFlags.StringLike) ? type : getTemplateLiteralType(/*texts*/ undefined, [type]);
}

// This function infers from the text parts and type parts of a source literal to a target template literal. The number
Expand Down Expand Up @@ -32398,9 +32426,10 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
}
}

function checkGrammarRegularExpressionLiteral(node: RegularExpressionLiteral) {
const sourceFile = getSourceFileOfNode(node);
if (!hasParseDiagnostics(sourceFile) && !node.isUnterminated) {
function checkRegularExpressionLiteral(node: RegularExpressionLiteral) {
const regExpTypeAlias = getGlobalRegExpSymbol();
if (regExpTypeAlias) {
const sourceFile = getSourceFileOfNode(node);
let lastError: DiagnosticWithLocation | undefined;
scanner ??= createScanner(ScriptTarget.ESNext, /*skipTrivia*/ true);
scanner.setScriptTarget(sourceFile.languageVersion);
Expand All @@ -32421,23 +32450,79 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
try {
scanner.scan();
Debug.assert(scanner.reScanSlashToken(/*reportErrors*/ true) === SyntaxKind.RegularExpressionLiteral, "Expected scanner to rescan RegularExpressionLiteral");
return !!lastError;
}
finally {
scanner.setText("");
scanner.setOnError(/*onError*/ undefined);
}
}
return false;
}

function checkRegularExpressionLiteral(node: RegularExpressionLiteral) {
const nodeLinks = getNodeLinks(node);
if (!(nodeLinks.flags & NodeCheckFlags.TypeChecked)) {
nodeLinks.flags |= NodeCheckFlags.TypeChecked;
addLazyDiagnostic(() => checkGrammarRegularExpressionLiteral(node));
const regExpCapturingGroups = scanner.getRegExpCapturingGroups();
const regExpCapturingGroupSpecifiers = scanner.getRegExpCapturingGroupSpecifiers();
const patternUnionTypeCache = new WeakMap<RegularExpressionPatternUnion, Type>();

const capturingGroupsType = createTupleType(map(regExpCapturingGroups, patternUnion => {
const patternUnionType = getTypeFromPatternUnion(patternUnion);
return patternUnion.isPossiblyUndefined ? getUnionType([patternUnionType, undefinedType]) : patternUnionType;
}));

let namedCapturingGroupsType: Type;
if (regExpCapturingGroupSpecifiers?.size) {
const namedCapturingGroupsTypeMembers = createSymbolTable();
for (const [groupName, capturingGroups] of regExpCapturingGroupSpecifiers) {
const escapedGroupName = escapeLeadingUnderscores(groupName);
const types = map(capturingGroups, getTypeFromPatternUnion);
if (some(capturingGroups, patternUnion => patternUnion.isPossiblyUndefined!)) {
types.push(undefinedType);
}
const groupType = getUnionType(types);
namedCapturingGroupsTypeMembers.set(escapedGroupName, createProperty(escapedGroupName, groupType));
}
namedCapturingGroupsType = createAnonymousType(/*symbol*/ undefined, namedCapturingGroupsTypeMembers, emptyArray, emptyArray, emptyArray);
}
else {
namedCapturingGroupsType = undefinedType;
}

const regExpFlags = scanner.getRegExpFlags();
const flagsTypeMembers = createSymbolTable();
for (const [flag, propertyName] of regExpFlagToPropertyName) {
flagsTypeMembers.set(propertyName, createProperty(propertyName, regExpFlags & flag ? trueType : falseType));
}
const flagsType = createAnonymousType(/*symbol*/ undefined, flagsTypeMembers, emptyArray, emptyArray, emptyArray);

return getTypeAliasInstantiation(regExpTypeAlias, [capturingGroupsType, namedCapturingGroupsType, flagsType]);

function getTypeFromPatternUnion(patternUnion: RegularExpressionPatternUnion): Type {
let patternUnionType = patternUnionTypeCache.get(patternUnion);
if (!patternUnionType) {
const types = arrayFrom(patternUnion, pattern => {
if (typeof pattern === "string") {
return getStringLiteralType(pattern);
}
return getTemplateLiteralType(
/*texts*/ undefined,
/*types*/ map(pattern, content => {
if (typeof content === "string") {
return getStringLiteralType(content);
}
if (content === RegExpAnyString) {
return stringType;
}
if (content instanceof Set) {
return getTypeFromPatternUnion(content);
}
Debug.fail();
}),
/*isRegularExpression*/ true,
);
});
patternUnionType = getUnionType(types);
patternUnionTypeCache.set(patternUnion, patternUnionType);
}
return patternUnionType;
}
}
return globalRegExpType;
return emptyGenericType;
}

function checkSpreadExpression(node: SpreadElement, checkMode?: CheckMode): Type {
Expand Down Expand Up @@ -50415,7 +50500,6 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker {
globalStringType = getGlobalType("String" as __String, /*arity*/ 0, /*reportErrors*/ true);
globalNumberType = getGlobalType("Number" as __String, /*arity*/ 0, /*reportErrors*/ true);
globalBooleanType = getGlobalType("Boolean" as __String, /*arity*/ 0, /*reportErrors*/ true);
globalRegExpType = getGlobalType("RegExp" as __String, /*arity*/ 0, /*reportErrors*/ true);
anyArrayType = createArrayType(anyType);

autoArrayType = createArrayType(autoType);
Expand Down
6 changes: 6 additions & 0 deletions src/compiler/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,12 @@ export function last<T>(array: readonly T[]): T {
return array[array.length - 1];
}

/** @internal */
export function setLast<T>(array: T[], value: T): T {
Debug.assert(array.length !== 0);
return array[array.length - 1] = value;
}

/**
* Returns the only element of an array if it contains only one element, `undefined` otherwise.
*
Expand Down
8 changes: 4 additions & 4 deletions src/compiler/diagnosticMessages.json
Original file line number Diff line number Diff line change
Expand Up @@ -1705,10 +1705,6 @@
"category": "Error",
"code": 1512
},
"Undetermined character escape.": {
"category": "Error",
"code": 1513
},
"Expected a capturing group name.": {
"category": "Error",
"code": 1514
Expand Down Expand Up @@ -1834,6 +1830,10 @@
"category": "Error",
"code": 1544
},
"'\\k' is only available outside character class.": {
"category": "Error",
"code": 1545
},
Comment on lines +1833 to +1836
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to confirm, a new message can’t take the place of an old, unused one right?
(TS1513 never actually appears as TS1161 would have been emitted at the same position if a RegExp is unterminated, so replacing the message shouldn’t cause any problems.)


"The types of '{0}' are incompatible between these types.": {
"category": "Error",
Expand Down
Loading