Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
util: graduate TextEncoder/TextDecoder, tests
Add tests ported from Web Platform Tests.

Graduate TextEncoder / TextDecoder from experimental
  • Loading branch information
jasnell committed Oct 23, 2017
commit 87922733c091d38a0919972574426fd7e68fcb90
4 changes: 0 additions & 4 deletions doc/api/util.md
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,6 @@ see [Custom promisified functions][].
added: v8.3.0
-->

> Stability: 1 - Experimental

An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API.

```js
Expand Down Expand Up @@ -690,8 +688,6 @@ mark.
added: v8.3.0
-->

> Stability: 1 - Experimental

An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All
instances of `TextEncoder` only support UTF-8 encoding.

Expand Down
20 changes: 0 additions & 20 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@ const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kEncoder = Symbol('encoder');

let warned = false;
const experimental =
'The WHATWG Encoding Standard implementation is an experimental API. It ' +
'should not yet be used in production applications.';

const {
getConstructorOf,
customInspectSymbol: inspect
Expand Down Expand Up @@ -289,11 +284,6 @@ function getEncodingFromLabel(label) {

class TextEncoder {
constructor() {
if (!warned) {
warned = true;
process.emitWarning(experimental, 'ExperimentalWarning');
}

this[kEncoder] = true;
}

Expand Down Expand Up @@ -353,11 +343,6 @@ function makeTextDecoderICU() {

class TextDecoder {
constructor(encoding = 'utf-8', options = {}) {
if (!warned) {
warned = true;
process.emitWarning(experimental, 'ExperimentalWarning');
}

encoding = `${encoding}`;
if (typeof options !== 'object')
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
Expand Down Expand Up @@ -430,11 +415,6 @@ function makeTextDecoderJS() {

class TextDecoder {
constructor(encoding = 'utf-8', options = {}) {
if (!warned) {
warned = true;
process.emitWarning(experimental, 'ExperimentalWarning');
}

encoding = `${encoding}`;
if (typeof options !== 'object')
throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object');
Expand Down
72 changes: 72 additions & 0 deletions test/parallel/test-whatwg-encoding-fatal-streaming.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/textdecoder-fatal-streaming.html
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the URL tests use a completely different format. These had to be ported over to work with out test suite.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you put the link to a specific sha instead of blob/master? Otherwise we lose track of the version of the test and if the upstream deletes a file it would be 404


const common = require('../common');
const assert = require('assert');
const {
TextDecoder
} = require('util');


{
[
{ encoding: 'utf-8', sequence: [0xC0] },
{ encoding: 'utf-16le', sequence: [0x00] },
{ encoding: 'utf-16be', sequence: [0x00] }
].forEach((testCase) => {
const data = new Uint8Array([testCase.sequence]);
common.expectsError(
() => {
const decoder = new TextDecoder(testCase.encoding, { fatal: true });
decoder.decode(data);
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError,
message:
`The encoded data was not valid for encoding ${testCase.encoding}`
}
);

assert.strictEqual(
new TextDecoder(testCase.encoding).decode(data),
'\uFFFD'
);
});
}

{
const decoder = new TextDecoder('utf-16le', { fatal: true });
const odd = new Uint8Array([0x00]);
const even = new Uint8Array([0x00, 0x00]);

assert.strictEqual(decoder.decode(odd, { stream: true }), '');
assert.strictEqual(decoder.decode(odd), '\u0000');

common.expectsError(
() => {
decoder.decode(even, { stream: true });
decoder.decode(odd);
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError,
message:
'The encoded data was not valid for encoding utf-16le'
}
);

common.expectsError(
() => {
decoder.decode(odd, { stream: true });
decoder.decode(even);
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError,
message:
'The encoded data was not valid for encoding utf-16le'
}
);

assert.strictEqual(decoder.decode(even, { stream: true }), '\u0000');
assert.strictEqual(decoder.decode(even), '\u0000');
}
55 changes: 55 additions & 0 deletions test/parallel/test-whatwg-encoding-surrogates-utf8.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/api-surrogates-utf8.html

require('../common');
const assert = require('assert');
const {
TextDecoder,
TextEncoder
} = require('util');

const badStrings = [
{
input: 'abc123',
expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
decoded: 'abc123',
name: 'Sanity check'
},
{
input: '\uD800',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (low)'
},
{
input: '\uDC00',
expected: [0xef, 0xbf, 0xbd],
decoded: '\uFFFD',
name: 'Surrogate half (high)'
},
{
input: 'abc\uD800123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (low), in a string'
},
{
input: 'abc\uDC00123',
expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
decoded: 'abc\uFFFD123',
name: 'Surrogate half (high), in a string'
},
{
input: '\uDC00\uD800',
expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
decoded: '\uFFFD\uFFFD',
name: 'Wrong order'
}
];

badStrings.forEach((t) => {
const encoded = new TextEncoder().encode(t.input);
assert.deepStrictEqual([].slice.call(encoded), t.expected);
assert.strictEqual(new TextDecoder('utf-8').decode(encoded), t.decoded);
});
89 changes: 89 additions & 0 deletions test/parallel/test-whatwg-encoding-textdecoder-fatal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/textdecoder-fatal.html

const common = require('../common');
const assert = require('assert');
const {
TextDecoder
} = require('util');

const bad = [
{ encoding: 'utf-8', input: [0xFF], name: 'invalid code' },
{ encoding: 'utf-8', input: [0xC0], name: 'ends early' },
{ encoding: 'utf-8', input: [0xE0], name: 'ends early 2' },
{ encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' },
{ encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' },
{ encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' },
{ encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
name: '> 0x10FFFF' },
{ encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80],
name: 'obsolete lead byte' },
// Overlong encodings
{ encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x80],
name: 'overlong U+0000 - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80],
name: 'overlong U+0000 - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80],
name: 'overlong U+0000 - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80],
name: 'overlong U+0000 - 6 bytes' },
{ encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x81, 0xBF],
name: 'overlong U+007F - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF],
name: 'overlong U+007F - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF],
name: 'overlong U+007F - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF],
name: 'overlong U+007F - 6 bytes' },
{ encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF],
name: 'overlong U+07FF - 3 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF],
name: 'overlong U+07FF - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF],
name: 'overlong U+07FF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF],
name: 'overlong U+07FF - 6 bytes' },
{ encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF],
name: 'overlong U+FFFF - 4 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF],
name: 'overlong U+FFFF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF],
name: 'overlong U+FFFF - 6 bytes' },
{ encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF],
name: 'overlong U+10FFFF - 5 bytes' },
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF],
name: 'overlong U+10FFFF - 6 bytes' },
// UTF-16 surrogates encoded as code points in UTF-8
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' },
{ encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' },
{ encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80],
name: 'surrogate pair' },
{ encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' },
// Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html
// FIXME: Add legacy encoding cases
];

bad.forEach((t) => {
common.expectsError(
() => {
new TextDecoder(t.encoding, { fatal: true })
.decode(new Uint8Array(t.input));
}, {
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
type: TypeError
}
);
});

{
assert('fatal' in new TextDecoder());
assert.strictEqual(typeof new TextDecoder().fatal, 'boolean');
assert(!new TextDecoder().fatal);
assert(new TextDecoder('utf-8', { fatal: true }).fatal);
}
42 changes: 42 additions & 0 deletions test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/textdecoder-ignorebom.html

require('../common');
const assert = require('assert');
const {
TextDecoder
} = require('util');

const cases = [
{
encoding: 'utf-8',
bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63]
},
{
encoding: 'utf-16le',
bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00]
},
{
encoding: 'utf-16be',
bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63]
}
];

cases.forEach((testCase) => {
const BOM = '\uFEFF';
let decoder = new TextDecoder(testCase.encoding, { ignoreBOM: true });
const bytes = new Uint8Array(testCase.bytes);
assert.strictEqual(decoder.decode(bytes), `${BOM}abc`);
decoder = new TextDecoder(testCase.encoding, { ignoreBOM: false });
assert.strictEqual(decoder.decode(bytes), 'abc');
decoder = new TextDecoder(testCase.encoding);
assert.strictEqual(decoder.decode(bytes), 'abc');
});

{
assert('ignoreBOM' in new TextDecoder());
assert.strictEqual(typeof new TextDecoder().ignoreBOM, 'boolean');
assert(!new TextDecoder().ignoreBOM);
assert(new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM);
}
44 changes: 44 additions & 0 deletions test/parallel/test-whatwg-encoding-textdecoder-streaming.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
'use strict';

// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/textdecoder-streaming.html

require('../common');
const assert = require('assert');
const {
TextDecoder
} = require('util');

const string =
'\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF';
const octets = {
'utf-8': [
0x00, 0x31, 0x32, 0x33, 0x41, 0x42, 0x43, 0x61, 0x62, 0x63, 0xc2, 0x80,
0xc3, 0xbf, 0xc4, 0x80, 0xe1, 0x80, 0x80, 0xef, 0xbf, 0xbd, 0xf0, 0x90,
0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf],
'utf-16le': [
0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00,
0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xFF, 0x00,
0x00, 0x01, 0x00, 0x10, 0xFD, 0xFF, 0x00, 0xD8, 0x00, 0xDC, 0xFF, 0xDB,
0xFF, 0xDF],
'utf-16be': [
0x00, 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42,
0x00, 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xFF,
0x01, 0x00, 0x10, 0x00, 0xFF, 0xFD, 0xD8, 0x00, 0xDC, 0x00, 0xDB, 0xFF,
0xDF, 0xFF]
};

Object.keys(octets).forEach((encoding) => {
for (let len = 1; len <= 5; ++len) {
const encoded = octets[encoding];
const decoder = new TextDecoder(encoding);
let out = '';
for (let i = 0; i < encoded.length; i += len) {
const sub = [];
for (let j = i; j < encoded.length && j < i + len; ++j)
sub.push(encoded[j]);
out += decoder.decode(new Uint8Array(sub), { stream: true });
}
out += decoder.decode();
assert.strictEqual(out, string);
}
});
Loading