refactor(compiler): trim insignificant whitespace in expressions when…

… `preserveSignificantWhitespace === false`. This parses and reserializes expressions to normalize their whitespace formatting and make them more durable to insignificant changes in whitespace which might otherwise alter message IDs despite no translator-meaningful change being made.
angular · dgp1130 · Oct 12, 2024 · Oct 12, 2024 · Oct 12, 2024 · Oct 12, 2024
commit 52d47d31d89e290bffc7df685e7f0a1d12756bf7
diff --git a/packages/compiler/src/i18n/extractor_merger.ts b/packages/compiler/src/i18n/extractor_merger.ts
@@ -352,7 +352,8 @@ class _Visitor implements html.Visitor {
       // When dropping significant whitespace we need to retain whitespace tokens or
       // else we won't be able to reuse source spans because empty tokens would be
       // removed and cause a mismatch.
-      !this._preserveSignificantWhitespace /* retainEmptyTokens */,
+      /* retainEmptyTokens */ !this._preserveSignificantWhitespace,
+      /* preserveExpressionWhitespace */ this._preserveSignificantWhitespace,
     );
   }
 

diff --git a/packages/compiler/src/i18n/i18n_parser.ts b/packages/compiler/src/i18n/i18n_parser.ts
@@ -8,10 +8,17 @@
 
 import {Lexer as ExpressionLexer} from '../expression_parser/lexer';
 import {Parser as ExpressionParser} from '../expression_parser/parser';
+import {serialize as serializeExpression} from '../expression_parser/serializer';
 import * as html from '../ml_parser/ast';
 import {InterpolationConfig} from '../ml_parser/defaults';
 import {getHtmlTagDefinition} from '../ml_parser/html_tags';
-import {InterpolatedAttributeToken, InterpolatedTextToken, TokenType} from '../ml_parser/tokens';
+import {
+  AttributeValueInterpolationToken,
+  InterpolatedAttributeToken,
+  InterpolatedTextToken,
+  InterpolationToken,
+  TokenType,
+} from '../ml_parser/tokens';
 import {ParseSourceSpan} from '../parse_util';
 
 import * as i18n from './i18n_ast';
@@ -38,12 +45,14 @@ export function createI18nMessageFactory(
   interpolationConfig: InterpolationConfig,
   containerBlocks: Set<string>,
   retainEmptyTokens: boolean,
+  preserveExpressionWhitespace: boolean,
 ): I18nMessageFactory {
   const visitor = new _I18nVisitor(
     _expParser,
     interpolationConfig,
     containerBlocks,
     retainEmptyTokens,
+    preserveExpressionWhitespace,
   );
   return (nodes, meaning, description, customId, visitNodeFn) =>
     visitor.toI18nMessage(nodes, meaning, description, customId, visitNodeFn);
@@ -68,6 +77,7 @@ class _I18nVisitor implements html.Visitor {
     private _interpolationConfig: InterpolationConfig,
     private _containerBlocks: Set<string>,
     private readonly _retainEmptyTokens: boolean,
+    private readonly _preserveExpressionWhitespace: boolean,
   ) {}
 
   public toI18nMessage(
@@ -274,14 +284,24 @@ class _I18nVisitor implements html.Visitor {
         case TokenType.INTERPOLATION:
         case TokenType.ATTR_VALUE_INTERPOLATION:
           hasInterpolation = true;
-          const expression = token.parts[1];
+          const [startMarker, expression, endMarker] = token.parts;
           const baseName = extractPlaceholderName(expression) || 'INTERPOLATION';
           const phName = context.placeholderRegistry.getPlaceholderName(baseName, expression);
-          context.placeholderToContent[phName] = {
-            text: token.parts.join(''),
-            sourceSpan: token.sourceSpan,
-          };
-          nodes.push(new i18n.Placeholder(expression, phName, token.sourceSpan));
+
+          if (this._preserveExpressionWhitespace) {
+            context.placeholderToContent[phName] = {
+              text: token.parts.join(''),
+              sourceSpan: token.sourceSpan,
+            };
+            nodes.push(new i18n.Placeholder(expression, phName, token.sourceSpan));
+          } else {
+            const normalized = this.normalizeExpression(token);
+            context.placeholderToContent[phName] = {
+              text: `${startMarker}${normalized}${endMarker}`,
+              sourceSpan: token.sourceSpan,
+            };
+            nodes.push(new i18n.Placeholder(normalized, phName, token.sourceSpan));
+          }
           break;
         default:
           // Try to merge text tokens with previous tokens. We do this even for all tokens
@@ -332,6 +352,19 @@ class _I18nVisitor implements html.Visitor {
       return nodes[0];
     }
   }
+
+  // Normalize expression whitespace by parsing and re-serializing it. This makes
+  // message IDs more durable to insignificant whitespace changes.
+  normalizeExpression(token: InterpolationToken | AttributeValueInterpolationToken): string {
+    const expression = token.parts[1];
+    const expr = this._expressionParser.parseBinding(
+      expression,
+      /* location */ token.sourceSpan.start.toString(),
+      /* absoluteOffset */ token.sourceSpan.start.offset,
+      this._interpolationConfig,
+    );
+    return serializeExpression(expr);
+  }
 }
 
 /**

diff --git a/packages/compiler/src/render3/view/i18n/meta.ts b/packages/compiler/src/render3/view/i18n/meta.ts
@@ -90,6 +90,7 @@ export class I18nMetaVisitor implements html.Visitor {
       this.interpolationConfig,
       this.containerBlocks,
       this.retainEmptyTokens,
+      /* preserveExpressionWhitespace */ this.preserveSignificantWhitespace,
     );
     const message = createI18nMessage(nodes, meaning, description, customId, visitNodeFn);
     this._setMessageId(message, meta);

diff --git a/packages/compiler/test/i18n/i18n_ast_spec.ts b/packages/compiler/test/i18n/i18n_ast_spec.ts
@@ -16,6 +16,7 @@ describe('Message', () => {
     DEFAULT_INTERPOLATION_CONFIG,
     DEFAULT_CONTAINER_BLOCKS,
     /* retainEmptyTokens */ false,
+    /* preserveExpressionWhitespace */ true,
   );
   describe('messageText()', () => {
     it('should serialize simple text', () => {

diff --git a/packages/compiler/test/i18n/i18n_parser_spec.ts b/packages/compiler/test/i18n/i18n_parser_spec.ts
@@ -227,6 +227,34 @@ describe('I18nParser', () => {
         [['{count, plural, =0 {[{sex, select, male {[m]}}]}}'], '', '', ''],
       ]);
     });
+
+    it('should preserve whitespace when preserving significant whitespace', () => {
+      const html = '<div i18n="m|d">{count, plural, =0 {{{   foo   }}}}</div>';
+      expect(
+        _humanizeMessages(
+          html,
+          /* implicitTags */ undefined,
+          /* implicitAttrs */ undefined,
+          /* preserveSignificantWhitespace */ true,
+        ),
+      ).toEqual([
+        [['{count, plural, =0 {[[<ph name="INTERPOLATION">   foo   </ph>]]}}'], 'm', 'd', ''],
+      ]);
+    });
+
+    it('should normalize whitespace when not preserving significant whitespace', () => {
+      const html = '<div i18n="m|d">{count, plural, =0 {{{   foo   }}}}</div>';
+      expect(
+        _humanizeMessages(
+          html,
+          /* implicitTags */ undefined,
+          /* implicitAttrs */ undefined,
+          /* preserveSignificantWhitespace */ false,
+        ),
+      ).toEqual([
+        [['{count, plural, =0 {[[, <ph name="INTERPOLATION">foo</ph>, ]]}}'], 'm', 'd', ''],
+      ]);
+    });
   });
 
   describe('implicit elements', () => {
@@ -318,50 +346,77 @@ describe('I18nParser', () => {
         '',
       ]);
     });
+
+    it('should preserve whitespace when preserving significant whitespace', () => {
+      const html = '<div i18n="m|d">{{   foo   }}</div>';
+      expect(
+        _humanizeMessages(
+          html,
+          /* implicitTags */ undefined,
+          /* implicitAttrs */ undefined,
+          /* preserveSignificantWhitespace */ true,
+        ),
+      ).toEqual([[['[<ph name="INTERPOLATION">   foo   </ph>]'], 'm', 'd', '']]);
+    });
+
+    it('should normalize whitespace when not preserving significant whitespace', () => {
+      const html = '<div i18n="m|d">{{   foo   }}</div>';
+      expect(
+        _humanizeMessages(
+          html,
+          /* implicitTags */ undefined,
+          /* implicitAttrs */ undefined,
+          /* preserveSignificantWhitespace */ false,
+        ),
+      ).toEqual([[['[, <ph name="INTERPOLATION">foo</ph>, ]'], 'm', 'd', '']]);
+    });
   });
 });
 
 export function _humanizeMessages(
   html: string,
   implicitTags: string[] = [],
   implicitAttrs: {[k: string]: string[]} = {},
+  preserveSignificantWhitespace = true,
 ): [string[], string, string, string][] {
-  return _extractMessages(html, implicitTags, implicitAttrs).map((message) => [
-    serializeNodes(message.nodes),
-    message.meaning,
-    message.description,
-    message.id,
-  ]) as [string[], string, string, string][];
+  return _extractMessages(html, implicitTags, implicitAttrs, preserveSignificantWhitespace).map(
+    (message) => [serializeNodes(message.nodes), message.meaning, message.description, message.id],
+  ) as [string[], string, string, string][];
 }
 
 function _humanizePlaceholders(
   html: string,
   implicitTags: string[] = [],
   implicitAttrs: {[k: string]: string[]} = {},
+  preserveSignificantWhitespace = true,
 ): string[] {
-  return _extractMessages(html, implicitTags, implicitAttrs).map((msg) =>
-    Object.keys(msg.placeholders)
-      .map((name) => `${name}=${msg.placeholders[name].text}`)
-      .join(', '),
+  return _extractMessages(html, implicitTags, implicitAttrs, preserveSignificantWhitespace).map(
+    (msg) =>
+      Object.keys(msg.placeholders)
+        .map((name) => `${name}=${msg.placeholders[name].text}`)
+        .join(', '),
   );
 }
 
 function _humanizePlaceholdersToMessage(
   html: string,
   implicitTags: string[] = [],
   implicitAttrs: {[k: string]: string[]} = {},
+  preserveSignificantWhitespace = true,
 ): string[] {
-  return _extractMessages(html, implicitTags, implicitAttrs).map((msg) =>
-    Object.keys(msg.placeholderToMessage)
-      .map((k) => `${k}=${digest(msg.placeholderToMessage[k])}`)
-      .join(', '),
+  return _extractMessages(html, implicitTags, implicitAttrs, preserveSignificantWhitespace).map(
+    (msg) =>
+      Object.keys(msg.placeholderToMessage)
+        .map((k) => `${k}=${digest(msg.placeholderToMessage[k])}`)
+        .join(', '),
   );
 }
 
 export function _extractMessages(
   html: string,
   implicitTags: string[] = [],
   implicitAttrs: {[k: string]: string[]} = {},
+  preserveSignificantWhitespace = true,
 ): Message[] {
   const htmlParser = new HtmlParser();
   const parseResult = htmlParser.parse(html, 'extractor spec', {tokenizeExpansionForms: true});
@@ -374,6 +429,6 @@ export function _extractMessages(
     DEFAULT_INTERPOLATION_CONFIG,
     implicitTags,
     implicitAttrs,
-    /* preserveSignificantWhitespace */ true,
+    preserveSignificantWhitespace,
   ).messages;
 }