From 36ba8cae1a3bdd44fffabf2c8d2e13a11d8ac7fe Mon Sep 17 00:00:00 2001 From: Jake Bailey <5341706+jakebailey@users.noreply.github.com> Date: Fri, 5 Jun 2026 10:55:09 -0700 Subject: [PATCH] Pull off full code points in template literal inference --- src/compiler/checker.ts | 5 +- ...templateLiteralTypesInferenceRunes.symbols | 76 +++++++++++++++++++ .../templateLiteralTypesInferenceRunes.types | 74 ++++++++++++++++++ .../templateLiteralTypesInferenceRunes.ts | 28 +++++++ 4 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 tests/baselines/reference/templateLiteralTypesInferenceRunes.symbols create mode 100644 tests/baselines/reference/templateLiteralTypesInferenceRunes.types create mode 100644 tests/cases/compiler/templateLiteralTypesInferenceRunes.ts diff --git a/src/compiler/checker.ts b/src/compiler/checker.ts index 0567712f11da3..b2419fbe90cf3 100644 --- a/src/compiler/checker.ts +++ b/src/compiler/checker.ts @@ -26744,7 +26744,10 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker { pos += delim.length; } else if (pos < getSourceText(seg).length) { - addMatch(seg, pos + 1); + // Consume a whole code point (rune) so that surrogate pairs (e.g. emoji) + // are not split in half. A lone surrogate consumes a single code unit. + const ch = getSourceText(seg).codePointAt(pos)!; + addMatch(seg, pos + (ch >= 0x10000 ? 2 : 1)); } else if (seg < lastSourceIndex) { addMatch(seg + 1, 0); diff --git a/tests/baselines/reference/templateLiteralTypesInferenceRunes.symbols b/tests/baselines/reference/templateLiteralTypesInferenceRunes.symbols new file mode 100644 index 0000000000000..295e75b8e54aa --- /dev/null +++ b/tests/baselines/reference/templateLiteralTypesInferenceRunes.symbols @@ -0,0 +1,76 @@ +//// [tests/cases/compiler/templateLiteralTypesInferenceRunes.ts] //// + +=== templateLiteralTypesInferenceRunes.ts === +// Template literal type inference should pull off whole code points (runes) +// rather than splitting surrogate pairs (e.g. emoji) into lone surrogates. +// https://github.com/microsoft/TypeScript/issues/63533 + +type Heads = S extends `${infer C}${infer R}` ? [C, R] : never; +>Heads : Symbol(Heads, Decl(templateLiteralTypesInferenceRunes.ts, 0, 0)) +>S : Symbol(S, Decl(templateLiteralTypesInferenceRunes.ts, 4, 11)) +>S : Symbol(S, Decl(templateLiteralTypesInferenceRunes.ts, 4, 11)) +>C : Symbol(C, Decl(templateLiteralTypesInferenceRunes.ts, 4, 34)) +>R : Symbol(R, Decl(templateLiteralTypesInferenceRunes.ts, 4, 44)) +>C : Symbol(C, Decl(templateLiteralTypesInferenceRunes.ts, 4, 34)) +>R : Symbol(R, Decl(templateLiteralTypesInferenceRunes.ts, 4, 44)) + +type A = Heads<"😀abc">; +>A : Symbol(A, Decl(templateLiteralTypesInferenceRunes.ts, 4, 66)) +>Heads : Symbol(Heads, Decl(templateLiteralTypesInferenceRunes.ts, 0, 0)) + +declare let a: A; +>a : Symbol(a, Decl(templateLiteralTypesInferenceRunes.ts, 7, 11)) +>A : Symbol(A, Decl(templateLiteralTypesInferenceRunes.ts, 4, 66)) + +const chk: ["😀", "abc"] = a; +>chk : Symbol(chk, Decl(templateLiteralTypesInferenceRunes.ts, 8, 5)) +>a : Symbol(a, Decl(templateLiteralTypesInferenceRunes.ts, 7, 11)) + +// Multiple surrogate-pair characters in a row +type B = Heads<"😀😁">; +>B : Symbol(B, Decl(templateLiteralTypesInferenceRunes.ts, 8, 29)) +>Heads : Symbol(Heads, Decl(templateLiteralTypesInferenceRunes.ts, 0, 0)) + +declare let b: B; +>b : Symbol(b, Decl(templateLiteralTypesInferenceRunes.ts, 12, 11)) +>B : Symbol(B, Decl(templateLiteralTypesInferenceRunes.ts, 8, 29)) + +const chk2: ["😀", "😁"] = b; +>chk2 : Symbol(chk2, Decl(templateLiteralTypesInferenceRunes.ts, 13, 5)) +>b : Symbol(b, Decl(templateLiteralTypesInferenceRunes.ts, 12, 11)) + +// Surrogate pair followed by a BMP character +type Pair = S extends `${infer C1}${infer C2}` ? [C1, C2] : never; +>Pair : Symbol(Pair, Decl(templateLiteralTypesInferenceRunes.ts, 13, 29)) +>S : Symbol(S, Decl(templateLiteralTypesInferenceRunes.ts, 16, 10)) +>S : Symbol(S, Decl(templateLiteralTypesInferenceRunes.ts, 16, 10)) +>C1 : Symbol(C1, Decl(templateLiteralTypesInferenceRunes.ts, 16, 33)) +>C2 : Symbol(C2, Decl(templateLiteralTypesInferenceRunes.ts, 16, 44)) +>C1 : Symbol(C1, Decl(templateLiteralTypesInferenceRunes.ts, 16, 33)) +>C2 : Symbol(C2, Decl(templateLiteralTypesInferenceRunes.ts, 16, 44)) + +type C = Pair<"😀x">; +>C : Symbol(C, Decl(templateLiteralTypesInferenceRunes.ts, 16, 69)) +>Pair : Symbol(Pair, Decl(templateLiteralTypesInferenceRunes.ts, 13, 29)) + +declare let c: C; +>c : Symbol(c, Decl(templateLiteralTypesInferenceRunes.ts, 18, 11)) +>C : Symbol(C, Decl(templateLiteralTypesInferenceRunes.ts, 16, 69)) + +const chk3: ["😀", "x"] = c; +>chk3 : Symbol(chk3, Decl(templateLiteralTypesInferenceRunes.ts, 19, 5)) +>c : Symbol(c, Decl(templateLiteralTypesInferenceRunes.ts, 18, 11)) + +// A leading BMP character then a surrogate pair +type D = Pair<"x😀">; +>D : Symbol(D, Decl(templateLiteralTypesInferenceRunes.ts, 19, 28)) +>Pair : Symbol(Pair, Decl(templateLiteralTypesInferenceRunes.ts, 13, 29)) + +declare let d: D; +>d : Symbol(d, Decl(templateLiteralTypesInferenceRunes.ts, 23, 11)) +>D : Symbol(D, Decl(templateLiteralTypesInferenceRunes.ts, 19, 28)) + +const chk4: ["x", "😀"] = d; +>chk4 : Symbol(chk4, Decl(templateLiteralTypesInferenceRunes.ts, 24, 5)) +>d : Symbol(d, Decl(templateLiteralTypesInferenceRunes.ts, 23, 11)) + diff --git a/tests/baselines/reference/templateLiteralTypesInferenceRunes.types b/tests/baselines/reference/templateLiteralTypesInferenceRunes.types new file mode 100644 index 0000000000000..df19dc47a5029 --- /dev/null +++ b/tests/baselines/reference/templateLiteralTypesInferenceRunes.types @@ -0,0 +1,74 @@ +//// [tests/cases/compiler/templateLiteralTypesInferenceRunes.ts] //// + +=== templateLiteralTypesInferenceRunes.ts === +// Template literal type inference should pull off whole code points (runes) +// rather than splitting surrogate pairs (e.g. emoji) into lone surrogates. +// https://github.com/microsoft/TypeScript/issues/63533 + +type Heads = S extends `${infer C}${infer R}` ? [C, R] : never; +>Heads : Heads +> : ^^^^^^^^ + +type A = Heads<"😀abc">; +>A : ["😀", "abc"] +> : ^^^^^^^^^^^^^ + +declare let a: A; +>a : ["😀", "abc"] +> : ^^^^^^^^^^^^^ + +const chk: ["😀", "abc"] = a; +>chk : ["😀", "abc"] +> : ^^^^^^^^^^^^^ +>a : ["😀", "abc"] +> : ^^^^^^^^^^^^^ + +// Multiple surrogate-pair characters in a row +type B = Heads<"😀😁">; +>B : ["😀", "😁"] +> : ^^^^^^^^^^^^ + +declare let b: B; +>b : ["😀", "😁"] +> : ^^^^^^^^^^^^ + +const chk2: ["😀", "😁"] = b; +>chk2 : ["😀", "😁"] +> : ^^^^^^^^^^^^ +>b : ["😀", "😁"] +> : ^^^^^^^^^^^^ + +// Surrogate pair followed by a BMP character +type Pair = S extends `${infer C1}${infer C2}` ? [C1, C2] : never; +>Pair : Pair +> : ^^^^^^^ + +type C = Pair<"😀x">; +>C : ["😀", "x"] +> : ^^^^^^^^^^^ + +declare let c: C; +>c : ["😀", "x"] +> : ^^^^^^^^^^^ + +const chk3: ["😀", "x"] = c; +>chk3 : ["😀", "x"] +> : ^^^^^^^^^^^ +>c : ["😀", "x"] +> : ^^^^^^^^^^^ + +// A leading BMP character then a surrogate pair +type D = Pair<"x😀">; +>D : ["x", "😀"] +> : ^^^^^^^^^^^ + +declare let d: D; +>d : ["x", "😀"] +> : ^^^^^^^^^^^ + +const chk4: ["x", "😀"] = d; +>chk4 : ["x", "😀"] +> : ^^^^^^^^^^^ +>d : ["x", "😀"] +> : ^^^^^^^^^^^ + diff --git a/tests/cases/compiler/templateLiteralTypesInferenceRunes.ts b/tests/cases/compiler/templateLiteralTypesInferenceRunes.ts new file mode 100644 index 0000000000000..6e9bb913300e7 --- /dev/null +++ b/tests/cases/compiler/templateLiteralTypesInferenceRunes.ts @@ -0,0 +1,28 @@ +// @strict: true +// @noEmit: true + +// Template literal type inference should pull off whole code points (runes) +// rather than splitting surrogate pairs (e.g. emoji) into lone surrogates. +// https://github.com/microsoft/TypeScript/issues/63533 + +type Heads = S extends `${infer C}${infer R}` ? [C, R] : never; + +type A = Heads<"😀abc">; +declare let a: A; +const chk: ["😀", "abc"] = a; + +// Multiple surrogate-pair characters in a row +type B = Heads<"😀😁">; +declare let b: B; +const chk2: ["😀", "😁"] = b; + +// Surrogate pair followed by a BMP character +type Pair = S extends `${infer C1}${infer C2}` ? [C1, C2] : never; +type C = Pair<"😀x">; +declare let c: C; +const chk3: ["😀", "x"] = c; + +// A leading BMP character then a surrogate pair +type D = Pair<"x😀">; +declare let d: D; +const chk4: ["x", "😀"] = d;