Skip to content

Commit 8a663c4

Browse files
committed
use memory allocator in wasm functions
1 parent 6066940 commit 8a663c4

File tree

7 files changed

+106
-78
lines changed

7 files changed

+106
-78
lines changed

assembly/index.ts

Lines changed: 2 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2,46 +2,5 @@
22

33
// memory is assumed:
44
// [input][output]
5-
6-
export function utf8ToUtf16(byteLength: i32, outputOffset: i32): i32 {
7-
let inputOffset: i32 = 0;
8-
while (inputOffset < byteLength) {
9-
let byte1: u16 = load<u8>(inputOffset++);
10-
if ((byte1 & 0x80) === 0) {
11-
// 1 byte
12-
store<u16>(outputOffset, byte1);
13-
outputOffset += 2;
14-
} else if ((byte1 & 0xe0) === 0xc0) {
15-
// 2 bytes
16-
let byte2: u16 = load<u8>(inputOffset++) & 0x3f;
17-
// FIXME: consider endians
18-
store<u16>(outputOffset, ((byte1 & 0x1f) << 6) | byte2);
19-
outputOffset += 2;
20-
} else if ((byte1 & 0xf0) === 0xe0) {
21-
// 3 bytes
22-
let byte2: u16 = load<u8>(inputOffset++) & 0x3f;
23-
let byte3: u16 = load<u8>(inputOffset++) & 0x3f;
24-
store<u16>(outputOffset, ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3);
25-
outputOffset += 2;
26-
} else if ((byte1 & 0xf8) === 0xf0) {
27-
// 4 bytes
28-
let byte2 = load<u8>(inputOffset++) & 0x3f;
29-
let byte3 = load<u8>(inputOffset++) & 0x3f;
30-
let byte4 = load<u8>(inputOffset++) & 0x3f;
31-
let codepoint: i32 = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4;
32-
if (codepoint > 0xffff) {
33-
codepoint -= 0x10000;
34-
store<u16>(outputOffset, ((codepoint >>> 10) & 0x3ff) | 0xd800);
35-
outputOffset += 2;
36-
codepoint = 0xdc00 | (codepoint & 0x3ff);
37-
}
38-
store<u16>(outputOffset, codepoint);
39-
outputOffset += 2;
40-
} else {
41-
// invalid UTF-8
42-
store<u16>(outputOffset++, byte1);
43-
outputOffset += 2;
44-
}
45-
}
46-
return outputOffset;
47-
}
5+
export { utf8DecodeToUint16Array } from "./utf8DecodeToUint16Array";
6+
export { malloc, free } from "./memory";

assembly/memory.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import "allocator/tlsf";
2+
3+
export function malloc(size: usize): usize {
4+
return memory.allocate(size);
5+
}
6+
7+
export function free(ptr: usize): void {
8+
memory.free(ptr);
9+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
export function utf8DecodeToUint16Array(outputPtr: usize, inputPtr: usize, byteLength: usize): usize {
2+
let inputOffset = inputPtr;
3+
let outputOffset = outputPtr;
4+
let inputOffsetEnd = inputOffset + byteLength;
5+
const u16s = sizeof<u16>();
6+
7+
while (inputOffset < inputOffsetEnd) {
8+
let byte1: u16 = load<u8>(inputOffset++);
9+
if ((byte1 & 0x80) === 0) {
10+
// 1 byte
11+
store<u16>(outputOffset, byte1);
12+
outputOffset += u16s;
13+
} else if ((byte1 & 0xe0) === 0xc0) {
14+
// 2 bytes
15+
let byte2: u16 = load<u8>(inputOffset++) & 0x3f;
16+
// FIXME: consider endians
17+
store<u16>(outputOffset, ((byte1 & 0x1f) << 6) | byte2);
18+
outputOffset += u16s;
19+
} else if ((byte1 & 0xf0) === 0xe0) {
20+
// 3 bytes
21+
let byte2: u16 = load<u8>(inputOffset++) & 0x3f;
22+
let byte3: u16 = load<u8>(inputOffset++) & 0x3f;
23+
store<u16>(outputOffset, ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3);
24+
outputOffset += u16s;
25+
} else if ((byte1 & 0xf8) === 0xf0) {
26+
// 4 bytes
27+
let byte2 = load<u8>(inputOffset++) & 0x3f;
28+
let byte3 = load<u8>(inputOffset++) & 0x3f;
29+
let byte4 = load<u8>(inputOffset++) & 0x3f;
30+
let codepoint: i32 = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4;
31+
if (codepoint > 0xffff) {
32+
codepoint -= 0x10000;
33+
store<u16>(outputOffset, ((codepoint >>> 10) & 0x3ff) | 0xd800);
34+
outputOffset += u16s;
35+
codepoint = 0xdc00 | (codepoint & 0x3ff);
36+
}
37+
store<u16>(outputOffset, codepoint);
38+
outputOffset += u16s;
39+
} else {
40+
// invalid UTF-8
41+
store<u16>(outputOffset++, byte1);
42+
outputOffset += u16s;
43+
}
44+
}
45+
return (outputOffset - outputPtr) / u16s;
46+
}

benchmark/string.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
/* eslint-disable no-console */
12
import { encode, decode } from "../src";
23

34
const data = "Hello, 🌏\n".repeat(10000);

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
"profile:encode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-encode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer",
2525
"profile:decode": "rm -f isolate-*.log ; node --prof --require ts-node/register -e 'require(\"./benchmark/profile-decode\")' && node --prof-process --preprocess -j isolate-*.log | npx flamebearer",
2626
"benchmark": "ts-node benchmark/benchmark-from-msgpack-lite.ts",
27-
"asbuild:untouched": "asc assembly/index.ts -b build/wasm/untouched.wasm -t build/wasm/untouched.wat --sourceMap --validate --debug --measure",
28-
"asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate -O3 --measure",
27+
"asbuild:untouched": "asc assembly/index.ts -b build/wasm/untouched.wasm -t build/wasm/untouched.wat --sourceMap --validate --debug",
28+
"asbuild:optimized": "asc assembly/index.ts -b build/wasm/optimized.wasm -t build/wasm/optimized.wat --sourceMap --validate -O3",
2929
"asbuild": "rm -rf build/wasm && npm run asbuild:untouched && npm run asbuild:optimized && ts-node tools/pack-wasm.ts"
3030
},
3131
"repository": {

src/utils/utf8.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { prettyByte } from "./prettyByte";
2-
import { WASM_AVAILABLE, utf8DecodeWasm } from "../wasmFunctions";
2+
import { WASM_AVAILABLE, WASM_DEBUG, utf8DecodeWasm } from "../wasmFunctions";
33

4-
const WASM_THRESHOLD = 0x100;
4+
const WASM_THRESHOLD = WASM_DEBUG ? 0 : 0x100;
55

66
export function utf8Count(str: string): number {
77
const strLength = str.length;

src/wasmFunctions.ts

Lines changed: 44 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,64 @@
1+
/* eslint-disable no-console */
2+
3+
// TODO: Use TypeScript built-in type
4+
declare const WebAssembly: any;
5+
6+
export const WASM_DEBUG = !!(process && process.env.WASM_DEBUG === "true");
7+
18
let wasmModule: any;
29
try {
3-
wasmModule = require("../build/wasm/optimized.wasm.js").wasmModule;
4-
} catch {
10+
if (WASM_DEBUG) {
11+
wasmModule = require("../build/wasm/untouched.wasm.js").wasmModule;
12+
} else {
13+
wasmModule = require("../build/wasm/optimized.wasm.js").wasmModule;
14+
}
15+
} catch (e) {
16+
if (WASM_DEBUG) {
17+
console.error(e);
18+
}
519
// WebAssembly is not supported.
620
}
721

8-
declare var WebAssembly: any;
9-
const WASM_MEMORY_PAGE_SIZE = 0x10000; // 64KiB
22+
function abort(filename: number, line: number, column: number): void {
23+
throw new Error(`abort called at ${filename}:${line}:${column}`);
24+
}
1025

11-
const defaultWasmInstance = wasmModule && new WebAssembly.Instance(wasmModule);
26+
const defaultWasmInstance =
27+
wasmModule &&
28+
new WebAssembly.Instance(wasmModule, {
29+
env: {
30+
abort,
31+
},
32+
});
1233

1334
export const WASM_AVAILABLE = !!wasmModule && process.env.NO_WASM !== "true";
1435

15-
function copyArrayBuffer(dest: ArrayBuffer, src: Uint8Array) {
16-
const destView = new Uint8Array(dest);
36+
type pointer = number;
37+
38+
function setMemory(wasm: any, destPtr: pointer, src: Uint8Array, size: number) {
39+
const destView = new Uint8Array(wasm.exports.memory.buffer, destPtr, size);
1740
destView.set(src);
1841
}
1942

2043
export function utf8DecodeWasm(
2144
bytes: Uint8Array,
2245
offset: number,
2346
byteLength: number,
24-
wasmInstance = defaultWasmInstance,
47+
wasm = defaultWasmInstance,
2548
): string {
26-
if (!wasmInstance) {
27-
throw new Error("No WebAssembly available");
28-
}
49+
const inputPtr: pointer = wasm.exports.malloc(byteLength);
50+
// in worst case, the UTF-16 array uses the same as byteLength * 2
51+
const outputPtr: pointer = wasm.exports.malloc(byteLength * 2);
52+
try {
53+
setMemory(wasm, inputPtr, bytes.subarray(offset, offset + byteLength), byteLength);
2954

30-
const currentMemorySize: number = wasmInstance.exports.memory.buffer.byteLength;
31-
const requiredMemorySize = bytes.length * 3; // input(utf8) + output(utf16)
32-
if (currentMemorySize < requiredMemorySize) {
33-
const page = Math.ceil((requiredMemorySize - currentMemorySize) / WASM_MEMORY_PAGE_SIZE);
34-
wasmInstance.exports.memory.grow(page);
35-
}
55+
const outputArraySize = wasm.exports.utf8DecodeToUint16Array(outputPtr, inputPtr, byteLength);
56+
const codepoints = new Uint16Array(wasm.exports.memory.buffer, outputPtr, outputArraySize);
3657

37-
copyArrayBuffer(wasmInstance.exports.memory.buffer, bytes.subarray(offset, offset + byteLength));
38-
// console.log(instanceMemory.subarray(0, 10));
39-
40-
const outputStart = Math.ceil(byteLength / Uint16Array.BYTES_PER_ELEMENT) * Uint16Array.BYTES_PER_ELEMENT;
41-
const outputEnd = wasmInstance.exports.utf8ToUtf16(byteLength, outputStart);
42-
const codepoints = new Uint16Array(
43-
wasmInstance.exports.memory.buffer,
44-
outputStart,
45-
(outputEnd - outputStart) / Uint16Array.BYTES_PER_ELEMENT,
46-
);
47-
// console.log([byteLength, outputStart, outputEnd]);
48-
// console.log(instanceMemory.subarray(0, 10));
49-
// console.log(utf16array);
50-
return String.fromCharCode.apply(String, codepoints as any);
58+
// FIXME: split codepoints if it is too long (the maximum size depends on the JS engine, though).
59+
return String.fromCharCode.apply(String, codepoints as any);
60+
} finally {
61+
wasm.exports.free(inputPtr);
62+
wasm.exports.free(outputPtr);
63+
}
5164
}

0 commit comments

Comments
 (0)