Skip to content

Commit c107406

Browse files
committed
implement the whole string encoder in wasm
1 parent e36f068 commit c107406

File tree

11 files changed

+330
-89
lines changed

11 files changed

+330
-89
lines changed

.eslintrc.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ module.exports = {
2424
// "prefer-const": "warn", // TODO: AssemblyScript has different semantics.
2525
"guard-for-in": "warn",
2626
"curly": "warn",
27+
"no-param-reassign": "warn",
2728

2829
"@typescript-eslint/no-unused-vars":"warn",
2930
"@typescript-eslint/array-type": ["error", "generic"],

assembly/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@
44
// [input][output]
55
export { utf8DecodeToUint16Array } from "./utf8DecodeToUint16Array";
66
export { utf8CountUint16Array } from "./utf8CountUint16Array";
7+
export { utf8EncodeUint16Array } from "./utf8EncodeUint16Array";
78
export { malloc, free } from "./memory";

assembly/memoryBE.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// load/store values in big-endian
2+
3+
@inline
4+
export function loadFload32BE(byteOffset: usize): f32 {
5+
return reinterpret<f32>(bswap<u32>(load<u32>(byteOffset)));
6+
}
7+
8+
@inline
9+
export function loadFloat64BE(byteOffset: usize): f64 {
10+
return reinterpret<f64>(bswap<u64>(load<u64>(byteOffset)));
11+
}
12+
13+
@inline
14+
export function loadInt8BE(byteOffset: usize): i8 {
15+
return load<i8>(byteOffset);
16+
}
17+
18+
export function loadInt16BE(byteOffset: usize): i16 {
19+
return bswap<i16>(load<i16>(byteOffset));
20+
}
21+
22+
@inline
23+
export function loadInt32BE(byteOffset: usize): i32 {
24+
return bswap<i32>(load<i32>(byteOffset));
25+
}
26+
27+
@inline
28+
export function loadInt64BE(byteOffset: usize): i64 {
29+
return bswap<i64>(load<i64>(byteOffset));
30+
}
31+
32+
@inline
33+
export function loadUint8BE(byteOffset: usize): u8 {
34+
return load<u8>(byteOffset);
35+
}
36+
37+
@inline
38+
export function loadUint16BE(byteOffset: usize): u16 {
39+
return bswap<u16>(load<u16>(byteOffset));
40+
}
41+
42+
@inline
43+
export function loadUint32BE(byteOffset: usize): u32 {
44+
return bswap<u32>(load<u32>(byteOffset));
45+
}
46+
47+
@inline
48+
export function loadUint64BE(byteOffset: usize): u64 {
49+
return bswap<u64>(load<u64>(byteOffset));
50+
}
51+
52+
@inline
53+
export function storeFloat32BE(byteOffset: usize, value: f32): void {
54+
store<u32>(byteOffset, bswap<u32>(reinterpret<u32>(value)));
55+
}
56+
57+
@inline
58+
export function storeFloat64BE(byteOffset: usize, value: f64): void {
59+
store<u64>(byteOffset, bswap<u64>(reinterpret<u64>(value)));
60+
}
61+
62+
@inline
63+
export function storeInt8BE(byteOffset: usize, value: i8): void {
64+
store<i8>(byteOffset, value);
65+
}
66+
67+
@inline
68+
export function storeInt16BE(byteOffset: usize, value: i16): void {
69+
store<i16>(byteOffset, bswap<i16>(value));
70+
}
71+
72+
@inline
73+
export function storeInt32BE(byteOffset: usize, value: i32): void {
74+
store<i32>(byteOffset, bswap<i32>(value));
75+
}
76+
77+
@inline
78+
export function storeInt64BE(byteOffset: usize, value: i64): void {
79+
store<i64>(byteOffset, bswap<i64>(value));
80+
}
81+
82+
@inline
83+
export function storeUint8BE(byteOffset: usize, value: u8): void {
84+
store<u8>(byteOffset, value);
85+
}
86+
87+
@inline
88+
export function storeUint16BE(byteOffset: usize, value: u16): void {
89+
store<u16>(byteOffset, bswap<u16>(value));
90+
}
91+
92+
@inline
93+
export function storeUint32BE(byteOffset: usize, value: u32): void {
94+
store<u32>(byteOffset, bswap<u32>(value));
95+
}
96+
97+
@inline
98+
export function storeUint64BE(byteOffset: usize, value: u64): void {
99+
store<u64>(byteOffset, bswap<u64>(value));
100+
}

assembly/utf8CountUint16Array.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1-
export function utf8CountUint16Array(inputPtr: usize, strLength: usize): usize {
1+
import { loadUint16BE } from "./memoryBE";
2+
3+
// inputPtr: u16*
4+
export function utf8CountUint16Array(inputPtr: usize, inputLength: usize): usize {
25
const u16s = sizeof<u16>();
36

47
let byteLength: usize = 0;
58
let pos: usize = inputPtr;
6-
let end = inputPtr + strLength * u16s;
9+
let end = inputPtr + inputLength * u16s;
710
while (pos < end) {
8-
let value = load<u16>(pos);
11+
let value: u32 = loadUint16BE(pos);
912
pos += u16s;
1013

1114
if (value >= 0xd800 && value <= 0xdbff) {
1215
// high surrogate
13-
if (pos < strLength) {
14-
let extra = load<u16>(pos);
16+
if (pos < end) {
17+
let extra: u32 = loadUint16BE(pos);
1518
if ((extra & 0xfc00) === 0xdc00) {
1619
pos += u16s;
1720
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;

assembly/utf8EncodeUint16Array.ts

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import { utf8CountUint16Array } from "./utf8CountUint16Array";
2+
import { storeUint8BE, storeUint16BE, storeUint32BE, loadUint16BE } from "./memoryBE";
3+
4+
function storeStringHeader(outputPtr: usize, utf8ByteLength: usize): usize {
5+
let ptr = outputPtr;
6+
if (utf8ByteLength < 32) {
7+
// fixstr
8+
storeUint8BE(ptr++, 0xa0 + (utf8ByteLength as u8));
9+
} else if (utf8ByteLength < 0x100) {
10+
// str 8
11+
storeUint8BE(ptr++, 0xd9);
12+
storeUint8BE(ptr++, utf8ByteLength as u8);
13+
} else if (utf8ByteLength < 0x10000) {
14+
// str 16
15+
storeUint8BE(ptr++, 0xda);
16+
storeUint16BE(ptr, utf8ByteLength as u16);
17+
ptr += sizeof<u16>();
18+
} else if ((utf8ByteLength as u64) < 0x100000000) {
19+
// str 32
20+
storeUint8BE(ptr++, 0xdb);
21+
storeUint32BE(ptr, utf8ByteLength as u32);
22+
ptr += sizeof<u32>();
23+
} else {
24+
throw new Error(`Too long string: ${utf8ByteLength} bytes in UTF-8`);
25+
}
26+
return ptr;
27+
}
28+
29+
// outputPtr: u8*
30+
// inputPtr: u16*
31+
// It adds MessagePack str head bytes to the output
32+
export function utf8EncodeUint16Array(outputPtr: usize, inputPtr: usize, inputLength: usize): usize {
33+
let utf8ByteLength = utf8CountUint16Array(inputPtr, inputLength);
34+
let strHeaderOffset = storeStringHeader(outputPtr, utf8ByteLength);
35+
36+
const u16s = sizeof<u16>();
37+
let inputOffset = inputPtr;
38+
let inputEnd = inputPtr + inputLength * u16s;
39+
let outputOffset = strHeaderOffset;
40+
while (inputOffset < inputEnd) {
41+
let value: u32 = loadUint16BE(inputOffset);
42+
inputOffset += u16s;
43+
if (value >= 0xd800 && value <= 0xdbff) {
44+
// high surrogate
45+
if (inputOffset < inputEnd) {
46+
let extra: u32 = loadUint16BE(inputOffset);
47+
if ((extra & 0xfc00) === 0xdc00) {
48+
inputOffset += u16s;
49+
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
50+
}
51+
}
52+
if (value >= 0xd800 && value <= 0xdbff) {
53+
continue; // drop lone surrogate
54+
}
55+
}
56+
57+
if ((value & 0xffffff80) === 0) {
58+
// 1-byte
59+
store<u8>(outputOffset++, value);
60+
continue;
61+
} else if ((value & 0xfffff800) === 0) {
62+
// 2-bytes
63+
store<u8>(outputOffset++, ((value >> 6) & 0x1f) | 0xc0);
64+
} else if ((value & 0xffff0000) === 0) {
65+
// 3-byte
66+
store<u8>(outputOffset++, ((value >> 12) & 0x0f) | 0xe0);
67+
store<u8>(outputOffset++, ((value >> 6) & 0x3f) | 0x80);
68+
} else if ((value & 0xffe00000) === 0) {
69+
// 4-byte
70+
store<u8>(outputOffset++, ((value >> 18) & 0x07) | 0xf0);
71+
store<u8>(outputOffset++, ((value >> 12) & 0x3f) | 0x80);
72+
store<u8>(outputOffset++, ((value >> 6) & 0x3f) | 0x80);
73+
} else {
74+
unreachable();
75+
}
76+
77+
store<u8>(outputOffset++, (value & 0x3f) | 0x80);
78+
}
79+
80+
return outputOffset - outputPtr;
81+
}

package-lock.json

Lines changed: 30 additions & 23 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
"types": "./dist/index.d.ts",
88
"scripts": {
99
"build": "npm publish --dry-run",
10-
"prepare": "npm run clean && npm run asbuild && tsc -p tsconfig.dist.json && webpack",
10+
"prepare": "npm run clean && npm run asbuild:production && tsc -p tsconfig.dist.json && webpack",
1111
"prepublishOnly": "TEST_DIST=true npm run test",
1212
"clean": "rimraf build dist dist.*",
1313
"test": "mocha 'test/**/*.test.ts'",
14+
"test:wasm": "MSGPACK_WASM=force mocha 'test/**/*.test.ts'",
1415
"test:cover": "npx nyc mocha 'test/**/*.test.ts'",
1516
"test:browser": "karma start --single-run",
1617
"test:browser:firefox": "karma start --single-run --browsers FirefoxHeadless",
@@ -24,9 +25,8 @@
2425
"profile:encode": "rimraf isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-encode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer",
2526
"profile:decode": "rimraf isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer",
2627
"benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts",
27-
"asbuild:untouched": "asc assembly/index.ts -b dist/wasm/untouched.wasm -t dist/wasm/untouched.wat --sourceMap --validate --debug",
28-
"asbuild:optimized": "asc assembly/index.ts -b dist/wasm/optimized.wasm -t dist/wasm/optimized.wat --sourceMap --validate -O3",
29-
"asbuild": "npm run asbuild:untouched && npm run asbuild:optimized && ts-node tools/pack-wasm.ts"
28+
"asbuild:development": "asc assembly/index.ts -b dist/wasm/msgpack.wasm -t dist/wasm/msgpack.wat --sourceMap --validate --debug && ts-node tools/pack-wasm.ts",
29+
"asbuild:production": "asc assembly/index.ts -b dist/wasm/msgpack.wasm -t dist/wasm/msgpack.wat --sourceMap --validate -O3 && ts-node tools/pack-wasm.ts"
3030
},
3131
"repository": {
3232
"type": "git",

src/Decoder.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { ExtensionCodec } from "./ExtensionCodec";
33
import { getInt64, getUint64 } from "./utils/int";
44
import { utf8Decode } from "./utils/utf8";
55
import { createDataView, ensureUint8Array } from "./utils/typedArrays";
6+
import { WASM_AVAILABLE, WASM_STR_THRESHOLD, utf8DecodeWasm } from "./wasmFunctions";
67

78
enum State {
89
ARRAY,
@@ -373,13 +374,17 @@ export class Decoder {
373374
});
374375
}
375376

376-
decodeUtf8String(byteLength: number, headOffset: number): string {
377-
if (this.bytes.byteLength < this.pos + headOffset + byteLength) {
377+
decodeUtf8String(byteLength: number, headerOffset: number): string {
378+
if (this.bytes.byteLength < this.pos + headerOffset + byteLength) {
378379
throw MORE_DATA;
379380
}
380381

381-
const object = utf8Decode(this.bytes, this.pos + headOffset, byteLength);
382-
this.pos += headOffset + byteLength;
382+
const offset = this.pos + headerOffset;
383+
const object =
384+
WASM_AVAILABLE && byteLength > WASM_STR_THRESHOLD
385+
? utf8DecodeWasm(this.bytes, offset, byteLength)
386+
: utf8Decode(this.bytes, offset, byteLength);
387+
this.pos += headerOffset + byteLength;
383388
return object;
384389
}
385390

0 commit comments

Comments
 (0)