util: graduate TextEncoder/TextDecoder, tests

Add tests ported from Web Platform Tests. Graduate TextEncoder / TextDecoder from experimental
nodejs · jasnell · Oct 2, 2017 · Oct 20, 2017 · Oct 23, 2017 · Oct 24, 2017
commit 87922733c091d38a0919972574426fd7e68fcb90
diff --git a/doc/api/util.md b/doc/api/util.md
@@ -551,8 +551,6 @@ see [Custom promisified functions][].
 added: v8.3.0
 -->
 
-> Stability: 1 - Experimental
-
 An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API.
 
 ```js
@@ -690,8 +688,6 @@ mark.
 added: v8.3.0
 -->
 
-> Stability: 1 - Experimental
-
 An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All
 instances of `TextEncoder` only support UTF-8 encoding.
 

diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js
@@ -10,11 +10,6 @@ const kEncoding = Symbol('encoding');
 const kDecoder = Symbol('decoder');
 const kEncoder = Symbol('encoder');
 
-let warned = false;
-const experimental =
-  'The WHATWG Encoding Standard implementation is an experimental API. It ' +
-  'should not yet be used in production applications.';
-
 const {
   getConstructorOf,
   customInspectSymbol: inspect
@@ -289,11 +284,6 @@ function getEncodingFromLabel(label) {
 
 class TextEncoder {
   constructor() {
-    if (!warned) {
-      warned = true;
-      process.emitWarning(experimental, 'ExperimentalWarning');
-    }
-
     this[kEncoder] = true;
   }
 
@@ -353,11 +343,6 @@ function makeTextDecoderICU() {
 
   class TextDecoder {
     constructor(encoding = 'utf-8', options = {}) {
-      if (!warned) {
-        warned = true;
-        process.emitWarning(experimental, 'ExperimentalWarning');
-      }
-
       encoding = `${encoding}`;
       if (typeof options !== 'object')
         throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
@@ -430,11 +415,6 @@ function makeTextDecoderJS() {
 
   class TextDecoder {
     constructor(encoding = 'utf-8', options = {}) {
-      if (!warned) {
-        warned = true;
-        process.emitWarning(experimental, 'ExperimentalWarning');
-      }
-
       encoding = `${encoding}`;
       if (typeof options !== 'object')
         throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');

diff --git a/test/parallel/test-whatwg-encoding-fatal-streaming.js b/test/parallel/test-whatwg-encoding-fatal-streaming.js
@@ -0,0 +1,72 @@
+'use strict';
+
+// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/textdecoder-fatal-streaming.html
+
+const common = require('../common');
+const assert = require('assert');
+const {
+  TextDecoder
+} = require('util');
+
+
+{
+  [
+    { encoding: 'utf-8', sequence: [0xC0] },
+    { encoding: 'utf-16le', sequence: [0x00] },
+    { encoding: 'utf-16be', sequence: [0x00] }
+  ].forEach((testCase) => {
+    const data = new Uint8Array([testCase.sequence]);
+    common.expectsError(
+      () => {
+        const decoder = new TextDecoder(testCase.encoding, { fatal: true });
+        decoder.decode(data);
+      }, {
+        code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
+        type: TypeError,
+        message:
+          `The encoded data was not valid for encoding ${testCase.encoding}`
+      }
+    );
+
+    assert.strictEqual(
+      new TextDecoder(testCase.encoding).decode(data),
+      '\uFFFD'
+    );
+  });
+}
+
+{
+  const decoder = new TextDecoder('utf-16le', { fatal: true });
+  const odd = new Uint8Array([0x00]);
+  const even = new Uint8Array([0x00, 0x00]);
+
+  assert.strictEqual(decoder.decode(odd, { stream: true }), '');
+  assert.strictEqual(decoder.decode(odd), '\u0000');
+
+  common.expectsError(
+    () => {
+      decoder.decode(even, { stream: true });
+      decoder.decode(odd);
+    }, {
+      code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
+      type: TypeError,
+      message:
+        'The encoded data was not valid for encoding utf-16le'
+    }
+  );
+
+  common.expectsError(
+    () => {
+      decoder.decode(odd, { stream: true });
+      decoder.decode(even);
+    }, {
+      code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
+      type: TypeError,
+      message:
+        'The encoded data was not valid for encoding utf-16le'
+    }
+  );
+
+  assert.strictEqual(decoder.decode(even, { stream: true }), '\u0000');
+  assert.strictEqual(decoder.decode(even), '\u0000');
+}
diff --git a/test/parallel/test-whatwg-encoding-surrogates-utf8.js b/test/parallel/test-whatwg-encoding-surrogates-utf8.js
@@ -0,0 +1,55 @@
+'use strict';
+
+// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/api-surrogates-utf8.html
+
+require('../common');
+const assert = require('assert');
+const {
+  TextDecoder,
+  TextEncoder
+} = require('util');
+
+const badStrings = [
+  {
+    input: 'abc123',
+    expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
+    decoded: 'abc123',
+    name: 'Sanity check'
+  },
+  {
+    input: '\uD800',
+    expected: [0xef, 0xbf, 0xbd],
+    decoded: '\uFFFD',
+    name: 'Surrogate half (low)'
+  },
+  {
+    input: '\uDC00',
+    expected: [0xef, 0xbf, 0xbd],
+    decoded: '\uFFFD',
+    name: 'Surrogate half (high)'
+  },
+  {
+    input: 'abc\uD800123',
+    expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
+    decoded: 'abc\uFFFD123',
+    name: 'Surrogate half (low), in a string'
+  },
+  {
+    input: 'abc\uDC00123',
+    expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
+    decoded: 'abc\uFFFD123',
+    name: 'Surrogate half (high), in a string'
+  },
+  {
+    input: '\uDC00\uD800',
+    expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
+    decoded: '\uFFFD\uFFFD',
+    name: 'Wrong order'
+  }
+];
+
+badStrings.forEach((t) => {
+  const encoded = new TextEncoder().encode(t.input);
+  assert.deepStrictEqual([].slice.call(encoded), t.expected);
+  assert.strictEqual(new TextDecoder('utf-8').decode(encoded), t.decoded);
+});
diff --git a/test/parallel/test-whatwg-encoding-textdecoder-fatal.js b/test/parallel/test-whatwg-encoding-textdecoder-fatal.js
@@ -0,0 +1,89 @@
+'use strict';
+
+// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/textdecoder-fatal.html
+
+const common = require('../common');
+const assert = require('assert');
+const {
+  TextDecoder
+} = require('util');
+
+const bad = [
+  { encoding: 'utf-8', input: [0xFF], name: 'invalid code' },
+  { encoding: 'utf-8', input: [0xC0], name: 'ends early' },
+  { encoding: 'utf-8', input: [0xE0], name: 'ends early 2' },
+  { encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' },
+  { encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' },
+  { encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' },
+  { encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' },
+  { encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' },
+  { encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' },
+  { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
+    name: '> 0x10FFFF' },
+  { encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80],
+    name: 'obsolete lead byte' },
+  // Overlong encodings
+  { encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' },
+  { encoding: 'utf-8', input: [0xE0, 0x80, 0x80],
+    name: 'overlong U+0000 - 3 bytes' },
+  { encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80],
+    name: 'overlong U+0000 - 4 bytes' },
+  { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80],
+    name: 'overlong U+0000 - 5 bytes' },
+  { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
+    name: 'overlong U+0000 - 6 bytes' },
+  { encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' },
+  { encoding: 'utf-8', input: [0xE0, 0x81, 0xBF],
+    name: 'overlong U+007F - 3 bytes' },
+  { encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF],
+    name: 'overlong U+007F - 4 bytes' },
+  { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF],
+    name: 'overlong U+007F - 5 bytes' },
+  { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF],
+    name: 'overlong U+007F - 6 bytes' },
+  { encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF],
+    name: 'overlong U+07FF - 3 bytes' },
+  { encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF],
+    name: 'overlong U+07FF - 4 bytes' },
+  { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF],
+    name: 'overlong U+07FF - 5 bytes' },
+  { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF],
+    name: 'overlong U+07FF - 6 bytes' },
+  { encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF],
+    name: 'overlong U+FFFF - 4 bytes' },
+  { encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF],
+    name: 'overlong U+FFFF - 5 bytes' },
+  { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF],
+    name: 'overlong U+FFFF - 6 bytes' },
+  { encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF],
+    name: 'overlong U+10FFFF - 5 bytes' },
+  { encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF],
+    name: 'overlong U+10FFFF - 6 bytes' },
+  // UTF-16 surrogates encoded as code points in UTF-8
+  { encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' },
+  { encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' },
+  { encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80],
+    name: 'surrogate pair' },
+  { encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' },
+  // Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html
+  // FIXME: Add legacy encoding cases
+];
+
+bad.forEach((t) => {
+  common.expectsError(
+    () => {
+      new TextDecoder(t.encoding, { fatal: true })
+        .decode(new Uint8Array(t.input));
+    }, {
+      code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
+      type: TypeError
+    }
+  );
+});
+
+{
+  assert('fatal' in new TextDecoder());
+  assert.strictEqual(typeof new TextDecoder().fatal, 'boolean');
+  assert(!new TextDecoder().fatal);
+  assert(new TextDecoder('utf-8', { fatal: true }).fatal);
+}
diff --git a/test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js b/test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js
@@ -0,0 +1,42 @@
+'use strict';
+
+// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/textdecoder-ignorebom.html
+
+require('../common');
+const assert = require('assert');
+const {
+  TextDecoder
+} = require('util');
+
+const cases = [
+  {
+    encoding: 'utf-8',
+    bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63]
+  },
+  {
+    encoding: 'utf-16le',
+    bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00]
+  },
+  {
+    encoding: 'utf-16be',
+    bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63]
+  }
+];
+
+cases.forEach((testCase) => {
+  const BOM = '\uFEFF';
+  let decoder = new TextDecoder(testCase.encoding, { ignoreBOM: true });
+  const bytes = new Uint8Array(testCase.bytes);
+  assert.strictEqual(decoder.decode(bytes), `${BOM}abc`);
+  decoder = new TextDecoder(testCase.encoding, { ignoreBOM: false });
+  assert.strictEqual(decoder.decode(bytes), 'abc');
+  decoder = new TextDecoder(testCase.encoding);
+  assert.strictEqual(decoder.decode(bytes), 'abc');
+});
+
+{
+  assert('ignoreBOM' in new TextDecoder());
+  assert.strictEqual(typeof new TextDecoder().ignoreBOM, 'boolean');
+  assert(!new TextDecoder().ignoreBOM);
+  assert(new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM);
+}
diff --git a/test/parallel/test-whatwg-encoding-textdecoder-streaming.js b/test/parallel/test-whatwg-encoding-textdecoder-streaming.js
@@ -0,0 +1,44 @@
+'use strict';
+
+// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/textdecoder-streaming.html
+
+require('../common');
+const assert = require('assert');
+const {
+  TextDecoder
+} = require('util');
+
+const string =
+  '\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF';
+const octets = {
+  'utf-8': [
+    0x00, 0x31, 0x32, 0x33, 0x41, 0x42, 0x43, 0x61, 0x62, 0x63, 0xc2, 0x80,
+    0xc3, 0xbf, 0xc4, 0x80, 0xe1, 0x80, 0x80, 0xef, 0xbf, 0xbd, 0xf0, 0x90,
+    0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf],
+  'utf-16le': [
+    0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00,
+    0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xFF, 0x00,
+    0x00, 0x01, 0x00, 0x10, 0xFD, 0xFF, 0x00, 0xD8, 0x00, 0xDC, 0xFF, 0xDB,
+    0xFF, 0xDF],
+  'utf-16be': [
+    0x00, 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42,
+    0x00, 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xFF,
+    0x01, 0x00, 0x10, 0x00, 0xFF, 0xFD, 0xD8, 0x00, 0xDC, 0x00, 0xDB, 0xFF,
+    0xDF, 0xFF]
+};
+
+Object.keys(octets).forEach((encoding) => {
+  for (let len = 1; len <= 5; ++len) {
+    const encoded = octets[encoding];
+    const decoder = new TextDecoder(encoding);
+    let out = '';
+    for (let i = 0; i < encoded.length; i += len) {
+      const sub = [];
+      for (let j = i; j < encoded.length && j < i + len; ++j)
+        sub.push(encoded[j]);
+      out += decoder.decode(new Uint8Array(sub), { stream: true });
+    }
+    out += decoder.decode();
+    assert.strictEqual(out, string);
+  }
+});