Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
fix(compiler): correctly compile long numeric HTML entities
Fixes an issue where long numeric HTML entities (e.g. 🛈) were incorrectly compiled due to the use of 4-digit
  • Loading branch information
SkyZeroZx committed Oct 8, 2025
commit 9335833a11011a876396f8cb0c4ff2c4095fb7f3
2 changes: 1 addition & 1 deletion packages/compiler/src/ml_parser/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ class _Tokenizer {
this._cursor.advance();
try {
const charCode = parseInt(strNum, isHex ? 16 : 10);
this._endToken([String.fromCharCode(charCode), this._cursor.getChars(start)]);
this._endToken([String.fromCodePoint(charCode), this._cursor.getChars(start)]);
Comment thread
JeanMeche marked this conversation as resolved.
} catch {
throw this._createError(
_unknownEntityErrorMsg(this._cursor.getChars(start)),
Expand Down
51 changes: 51 additions & 0 deletions packages/compiler/test/ml_parser/html_parser_spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,22 @@ describe('HtmlParser', () => {
]);
});

it('should parse text nodes with HTML entities (5+ hex digits)', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source)
expect(humanizeDom(parser.parse('<div>&#x1F6C8;</div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
[html.Text, '\u{1F6C8}', 1, [''], ['\u{1F6C8}', '&#x1F6C8;'], ['']],
]);
});

it('should parse text nodes with decimal HTML entities (5+ digits)', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source) as decimal 128712
expect(humanizeDom(parser.parse('<div>&#128712;</div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
[html.Text, '\u{1F6C8}', 1, [''], ['\u{1F6C8}', '&#128712;'], ['']],
]);
});

it('should normalize line endings within CDATA', () => {
const parsed = parser.parse('<![CDATA[ line 1 \r\n line 2 ]]>', 'TestComp');
expect(humanizeDom(parsed)).toEqual([
Expand Down Expand Up @@ -326,6 +342,22 @@ describe('HtmlParser', () => {
]);
});

it('should parse attributes containing encoded entities (5+ hex digits)', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source)
expect(humanizeDom(parser.parse('<div foo="&#x1F6C8;"></div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
[html.Attribute, 'foo', '\u{1F6C8}', [''], ['\u{1F6C8}', '&#x1F6C8;'], ['']],
]);
});

it('should parse attributes containing encoded decimal entities (5+ digits)', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source) as decimal 128712
expect(humanizeDom(parser.parse('<div foo="&#128712;"></div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
[html.Attribute, 'foo', '\u{1F6C8}', [''], ['\u{1F6C8}', '&#128712;'], ['']],
]);
});

it('should parse attributes containing unquoted interpolation', () => {
expect(humanizeDom(parser.parse('<div foo={{message}}></div>', 'TestComp'))).toEqual([
[html.Element, 'div', 0],
Expand Down Expand Up @@ -1632,6 +1664,25 @@ describe('HtmlParser', () => {
]);
});

it('should decode HTML entities with 5+ hex digits in interpolations', () => {
// Test with 🛈 (U+1F6C8 - Circled Information Source)
expect(
humanizeDomSourceSpans(parser.parse('{{&#x1F6C8;}}' + '{{&#128712;}}', 'TestComp')),
).toEqual([
[
html.Text,
'{{\u{1F6C8}}}' + '{{\u{1F6C8}}}',
0,
[''],
['{{', '&#x1F6C8;', '}}'],
[''],
['{{', '&#128712;', '}}'],
[''],
'{{&#x1F6C8;}}' + '{{&#128712;}}',
],
]);
});

it('should support interpolations in text', () => {
expect(
humanizeDomSourceSpans(parser.parse('<div> pre {{ value }} post </div>', 'TestComp')),
Expand Down
20 changes: 20 additions & 0 deletions packages/compiler/test/ml_parser/lexer_spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2136,6 +2136,26 @@ describe('HtmlLexer', () => {
]);
});

it('should parse entities with more than 4 hex digits', () => {
// Test 5 hex digit entity: &#x1F6C8; (🛈 - Circled Information Source)
expect(tokenizeAndHumanizeParts('&#x1F6C8;')).toEqual([
[TokenType.TEXT, ''],
[TokenType.ENCODED_ENTITY, '\u{1F6C8}', '&#x1F6C8;'],
[TokenType.TEXT, ''],
[TokenType.EOF],
]);
});

it('should parse entities with more than 4 decimal digits', () => {
// Test decimal entity: &#128712; (🛈 - Circled Information Source)
expect(tokenizeAndHumanizeParts('&#128712;')).toEqual([
[TokenType.TEXT, ''],
[TokenType.ENCODED_ENTITY, '\u{1F6C8}', '&#128712;'],
[TokenType.TEXT, ''],
[TokenType.EOF],
]);
});

it('should store the locations', () => {
expect(tokenizeAndHumanizeSourceSpans('a&amp;b')).toEqual([
[TokenType.TEXT, 'a'],
Expand Down