Skip to content

Commit e12a2ae

Browse files
committed
buffer: speed up Buffer.isEncoding() method
Use automata to avoid toLowerCase(), faster, but more dirty.
1 parent f431984 commit e12a2ae

3 files changed

Lines changed: 111 additions & 22 deletions

File tree

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
'use strict';
2+
3+
const common = require('../common.js');
4+
const v8 = require('v8');
5+
6+
const bench = common.createBenchmark(main, {
7+
encoding: [
8+
'hex',
9+
'utf8',
10+
'utf-8',
11+
'ascii',
12+
'binary',
13+
'base64',
14+
'ucs2',
15+
'ucs-2',
16+
'utf16le',
17+
'utf-16le',
18+
'HEX',
19+
'UTF8',
20+
'UTF-8',
21+
'ASCII',
22+
'BINARY',
23+
'BASE64',
24+
'UCS2',
25+
'UCS-2',
26+
'UTF16LE',
27+
'UTF-16LE',
28+
'utf9',
29+
'utf-7',
30+
'utf17le',
31+
'utf-17le',
32+
'Unicode-FTW',
33+
'new gnu gun'
34+
]
35+
});
36+
37+
function main(conf) {
38+
var encoding = conf.encoding;
39+
// Force optimization before starting the benchmark
40+
Buffer.isEncoding(encoding);
41+
v8.setFlagsFromString('--allow_natives_syntax');
42+
eval('%OptimizeFunctionOnNextCall(Buffer.isEncoding)');
43+
Buffer.isEncoding(encoding);
44+
45+
bench.start();
46+
var n = 1024 * 1024;
47+
for (let i = 0; i < n; i++) {
48+
Buffer.isEncoding(encoding);
49+
}
50+
bench.end(n);
51+
}

lib/buffer.js

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -183,32 +183,67 @@ Buffer.compare = function compare(a, b) {
183183
return binding.compare(a, b);
184184
};
185185

186+
var make = function (list) {
187+
var tpl = '';
188+
var append = function (line) {
189+
tpl += line + '\n';
190+
};
186191

187-
Buffer.isEncoding = function(encoding) {
188-
var loweredCase = false;
189-
for (;;) {
190-
switch (encoding) {
191-
case 'hex':
192-
case 'utf8':
193-
case 'utf-8':
194-
case 'ascii':
195-
case 'binary':
196-
case 'base64':
197-
case 'ucs2':
198-
case 'ucs-2':
199-
case 'utf16le':
200-
case 'utf-16le':
201-
return true;
192+
var map = {};
193+
list.forEach(function (item) {
194+
map[item.length] = item.length;
195+
});
202196

203-
default:
204-
if (loweredCase)
205-
return false;
206-
encoding = ('' + encoding).toLowerCase();
207-
loweredCase = true;
197+
var kind = Object.keys(map).map(function (key) {
198+
return map[key];
199+
});
200+
201+
append(`switch (encoding.length) {`);
202+
for (var i = 0; i < kind.length; i++) {
203+
append(` case ${kind[i]}:`);
204+
var collections = list.filter(function (item) {
205+
return item.length === kind[i];
206+
});
207+
208+
for (var k = 0; k < kind[i]; k++) {
209+
append(` var code${k} = encoding.charCodeAt(${k});`);
208210
}
211+
212+
for (var j = 0; j < collections.length; j++) {
213+
var word = collections[j];
214+
append(` // ${word}`);
215+
var conditions = [];
216+
for (var k = 0; k < word.length; k++) {
217+
var code = word.charCodeAt(k);
218+
if (code >= 97 && code <= 122) {
219+
var cond = '(code' + k + ' === ' + code +
220+
' || code' + k + ' === ' + (code - 32) + ')';
221+
conditions.push(cond);
222+
} else {
223+
conditions.push('code' + k + ' === ' + code);
224+
}
225+
}
226+
227+
append(` if (` + conditions.join(' && ') + `) {`);
228+
append(` return true;`);
229+
append(` }`);
230+
append(``);
231+
}
232+
233+
append(` break;`);
209234
}
235+
append(`}`);
236+
237+
append(`return false;`);
238+
return new Function('encoding', tpl);
210239
};
211240

241+
var cases = [
242+
'hex', 'utf8', 'utf-8', 'ascii', 'binary', 'base64',
243+
'ucs2', 'ucs-2', 'utf16le', 'utf-16le'
244+
];
245+
246+
Buffer.isEncoding = make(cases);
212247

213248
Buffer.concat = function(list, length) {
214249
if (!Array.isArray(list))

test/parallel/test-buffer.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -998,14 +998,17 @@ Buffer(Buffer(0), 0, 0);
998998
'ucs-2',
999999
'utf16le',
10001000
'utf-16le' ].forEach(function(enc) {
1001-
assert.equal(Buffer.isEncoding(enc), true);
1001+
assert.equal(Buffer.isEncoding(enc), true, `${enc} should be an encoding`);
10021002
});
10031003

10041004
[ 'utf9',
10051005
'utf-7',
1006+
'utf17le',
1007+
'utf-17le',
10061008
'Unicode-FTW',
10071009
'new gnu gun' ].forEach(function(enc) {
1008-
assert.equal(Buffer.isEncoding(enc), false);
1010+
assert.equal(Buffer.isEncoding(enc), false,
1011+
`${enc} should not an encoding`);
10091012
});
10101013

10111014

0 commit comments

Comments
 (0)