Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
lib: improve normalize encoding performance
This focuses on the common case by making sure they are prioritized.
It also changes some typeof checks to test for undefined since
that is faster and it adds a benchmark.
  • Loading branch information
BridgeAR committed Feb 22, 2018
commit f8249bb3e2d6ae50facd833e6297668539262f88
43 changes: 43 additions & 0 deletions benchmark/buffers/buffer-normalize-encoding.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
'use strict';

const common = require('../common.js');

const bench = common.createBenchmark(main, {
encoding: [
'ascii',
'ASCII',
'base64',
'BASE64',
'binary',
'BINARY',
'hex',
'HEX',
'latin1',
'LATIN1',
'ucs-2',
'UCS-2',
'ucs2',
'UCS2',
'utf-16le',
'UTF-16LE',
'utf-8',
'UTF-8',
'utf16le',
'UTF16LE',
'utf8',
'UTF8'
],
n: [1e6]
}, {
flags: ['--expose-internals']
});

function main({ encoding, n }) {
const { normalizeEncoding } = require('internal/util');

bench.start();
for (var i = 0; i < n; i++) {
normalizeEncoding(encoding);
}
bench.end(n);
}
4 changes: 2 additions & 2 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ function assertSize(size) {
err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size);
}

if (err) {
if (err !== null) {
Error.captureStackTrace(err, assertSize);
throw err;
}
Expand Down Expand Up @@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) {

Buffer.isEncoding = function isEncoding(encoding) {
return typeof encoding === 'string' &&
typeof normalizeEncoding(encoding) === 'string';
normalizeEncoding(encoding) !== undefined;
};
Buffer[kIsEncodingSymbol] = Buffer.isEncoding;

Expand Down
77 changes: 50 additions & 27 deletions lib/internal/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,36 +96,59 @@ function assertCrypto() {
throw new errors.Error('ERR_NO_CRYPTO');
}

// The loop should only run at most twice, retrying with lowercased enc
// if there is no match in the first pass.
// We use a loop instead of branching to retry with a helper
// function in order to avoid the performance hit.
// Return undefined if there is no match.
// Move the "slow cases" to a separate function to make sure this function gets
// inlined properly. That prioritizes the common case.
function normalizeEncoding(enc) {
if (enc == null || enc === '') return 'utf8';
let retried;
while (true) {
switch (enc) {
case 'utf8':
case 'utf-8':
return 'utf8';
case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8';
return slowCases(enc);
}

function slowCases(enc) {
switch (enc.length) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised this improves performance noticeably.

case 4:
if (enc === 'UTF8') return 'utf8';
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
enc = `${enc}`.toLowerCase();
if (enc === 'utf8') return 'utf8';
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
break;
case 3:
if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex')
return 'hex';
break;
case 5:
if (enc === 'ascii') return 'ascii';
if (enc === 'ucs-2') return 'utf16le';
if (enc === 'UTF-8') return 'utf8';
if (enc === 'ASCII') return 'ascii';
if (enc === 'UCS-2') return 'utf16le';
enc = `${enc}`.toLowerCase();
if (enc === 'utf-8') return 'utf8';
if (enc === 'ascii') return 'ascii';
if (enc === 'usc-2') return 'utf16le';
break;
case 6:
if (enc === 'base64') return 'base64';
if (enc === 'latin1' || enc === 'binary') return 'latin1';
if (enc === 'BASE64') return 'base64';
if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1';
enc = `${enc}`.toLowerCase();
if (enc === 'base64') return 'base64';
if (enc === 'latin1' || enc === 'binary') return 'latin1';
break;
case 7:
if (enc === 'utf16le' || enc === 'UTF16LE' ||
`${enc}`.toLowerCase() === 'utf16le')
return 'utf16le';
case 'latin1':
case 'binary':
return 'latin1';
case 'base64':
case 'ascii':
case 'hex':
return enc;
default:
if (retried) return; // undefined
enc = ('' + enc).toLowerCase();
retried = true;
}
break;
case 8:
if (enc === 'utf-16le' || enc === 'UTF-16LE' ||
`${enc}`.toLowerCase() === 'utf-16le')
return 'utf16le';
break;
default:
if (enc === '') return 'utf8';
}
}

Expand Down
10 changes: 6 additions & 4 deletions lib/string_decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder');
// modules monkey-patch it to support additional encodings
function normalizeEncoding(enc) {
const nenc = internalUtil.normalizeEncoding(enc);
if (typeof nenc !== 'string' &&
(Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
return nenc || enc;
if (nenc === undefined) {
if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
return enc;
}
return nenc;
}

const encodingsMap = {};
Expand Down