Skip to content

Commit 9a3c7bf

Browse files
committed
fix large string decode issues
1 parent 78ceac5 commit 9a3c7bf

File tree

3 files changed

+26
-9
lines changed

3 files changed

+26
-9
lines changed

src/utils/utf8.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import { prettyByte } from "./prettyByte";
2-
31
export function utf8Count(str: string): number {
42
const strLength = str.length;
53

@@ -89,6 +87,23 @@ export function utf8Encode(str: string, output: DataView, outputOffset: number):
8987
}
9088
}
9189

90+
const CHUNK_SIZE = 0x10_000;
91+
92+
export function safeStringFromCharCode(units: Array<number> | Uint16Array) {
93+
if (units.length <= CHUNK_SIZE) {
94+
// `String.fromCharCode.apply()` is faster than `String.fromCharCode(...units)`
95+
// in case `units` is a typed array
96+
return String.fromCharCode.apply(String, units as any);
97+
}
98+
99+
let result = "";
100+
for (let i = 0; i < units.length; i++) {
101+
const chunk = units.slice(i * CHUNK_SIZE, (i + 1) * CHUNK_SIZE);
102+
result += String.fromCharCode.apply(String, chunk as any);
103+
}
104+
return result;
105+
}
106+
92107
export function utf8Decode(bytes: Uint8Array, outputOffset: number, byteLength: number): string {
93108
let offset = outputOffset;
94109
const out: Array<number> = [];
@@ -123,5 +138,6 @@ export function utf8Decode(bytes: Uint8Array, outputOffset: number, byteLength:
123138
out.push(byte1);
124139
}
125140
}
126-
return String.fromCharCode(...out);
141+
142+
return safeStringFromCharCode(out);
127143
}

src/wasmFunctions.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import { safeStringFromCharCode } from "./utils/utf8";
2+
13
// WASM=never - disable WASM functions
24
// WASM=force - force to use WASM functions
35
const WASM: string = process.env.MSGPACK_WASM || process.env.WASM || "";
@@ -70,10 +72,8 @@ export function utf8DecodeWasm(bytes: Uint8Array, offset: number, byteLength: nu
7072
setMemoryU8(inputPtr, bytes.subarray(offset, offset + byteLength), byteLength);
7173

7274
const outputArraySize = wm.utf8DecodeToUint16Array(outputPtr, inputPtr, byteLength);
73-
const codepoints = new Uint16Array(wm.memory.buffer, outputPtr, outputArraySize);
74-
75-
// FIXME: split codepoints if it is too long (the maximum size depends on the JS engine, though).
76-
return String.fromCharCode.apply(String, codepoints as any);
75+
const units = new Uint16Array(wm.memory.buffer, outputPtr, outputArraySize);
76+
return safeStringFromCharCode(units);
7777
} finally {
7878
wm.free(inputPtr);
7979
wm.free(outputPtr);

test/msgpack-test-suite.test.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,9 @@ describe("msgpack-test-suite", () => {
9191
FLOAT64_POSITIVE_INF: Number.POSITIVE_INFINITY,
9292
FLOAT64_NEGATIVE_INF: Number.NEGATIVE_INFINITY,
9393
FLOAT64_NAN: Number.NaN,
94-
STR16: "x".repeat(0x100),
95-
STR32: "x".repeat(0x10000),
94+
STR16: "a".repeat(0x100),
95+
STR32: "b".repeat(0x10_000),
96+
STR32LARGE: "c".repeat(0x100_000), // may cause "RangeError: Maximum call stack size exceeded" in simple implelementions
9697
BIN16: new Uint8Array(0x100).fill(0xff),
9798
BIN32: new Uint8Array(0x10000).fill(0xff),
9899
ARRAY16: new Array<boolean>(0x100).fill(true),

0 commit comments

Comments
 (0)