Skip to content

Commit 3ed980b

Browse files
authored
Merge pull request #26 from msgpack/assemblyscript
Experimentally implement some functions in AssemblyScript
2 parents b8453db + b8025a5 commit 3ed980b

22 files changed

Lines changed: 717 additions & 125 deletions

.eslintrc.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@ module.exports = {
2121
"no-var": "warn",
2222
"valid-typeof": "warn", // "bigint" is not yet supported
2323
"no-return-await": "warn",
24-
"prefer-const": "warn",
24+
// "prefer-const": "warn", // TODO: AssemblyScript has different semantics.
2525
"guard-for-in": "warn",
2626
"curly": "warn",
27+
"no-param-reassign": "warn",
2728

2829
"@typescript-eslint/no-unused-vars":"warn",
2930
"@typescript-eslint/array-type": ["error", "generic"],
@@ -34,7 +35,6 @@ module.exports = {
3435
"@typescript-eslint/prefer-includes": "warn",
3536
"@typescript-eslint/prefer-string-starts-ends-with": "warn",
3637
"@typescript-eslint/no-use-before-define": "warn",
37-
"@typescript-eslint/restrict-plus-operands": "error",
3838
"@typescript-eslint/await-thenable": "error",
3939
"@typescript-eslint/no-for-in-array": "error",
4040

.nycrc.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"include": ["src/**/*.ts"],
33
"extension": [".ts"],
4-
"reporter": ["text-summary", "html", "lcov"],
4+
"reporter": [],
55
"sourceMap": true,
66
"instrument": true
77
}

assembly/be.ts

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// load/store values in big-endian
2+
3+
export function loadFload32BE(byteOffset: usize): f32 {
4+
return reinterpret<f32>(bswap<u32>(load<u32>(byteOffset)));
5+
}
6+
7+
export function loadFloat64BE(byteOffset: usize): f64 {
8+
return reinterpret<f64>(bswap<u64>(load<u64>(byteOffset)));
9+
}
10+
11+
export function loadInt8BE(byteOffset: usize): i8 {
12+
return load<i8>(byteOffset);
13+
}
14+
15+
export function loadInt16BE(byteOffset: usize): i16 {
16+
return bswap<i16>(load<i16>(byteOffset));
17+
}
18+
19+
export function loadInt32BE(byteOffset: usize): i32 {
20+
return bswap<i32>(load<i32>(byteOffset));
21+
}
22+
23+
export function loadInt64BE(byteOffset: usize): i64 {
24+
return bswap<i64>(load<i64>(byteOffset));
25+
}
26+
27+
export function loadUint8BE(byteOffset: usize): u8 {
28+
return load<u8>(byteOffset);
29+
}
30+
31+
export function loadUint16BE(byteOffset: usize): u16 {
32+
return bswap<u16>(load<u16>(byteOffset));
33+
}
34+
35+
export function loadUint32BE(byteOffset: usize): u32 {
36+
return bswap<u32>(load<u32>(byteOffset));
37+
}
38+
39+
export function loadUint64BE(byteOffset: usize): u64 {
40+
return bswap<u64>(load<u64>(byteOffset));
41+
}
42+
43+
export function storeFloat32BE(byteOffset: usize, value: f32): void {
44+
store<u32>(byteOffset, bswap<u32>(reinterpret<u32>(value)));
45+
}
46+
47+
export function storeFloat64BE(byteOffset: usize, value: f64): void {
48+
store<u64>(byteOffset, bswap<u64>(reinterpret<u64>(value)));
49+
}
50+
51+
export function storeInt8BE(byteOffset: usize, value: i8): void {
52+
store<i8>(byteOffset, value);
53+
}
54+
55+
export function storeInt16BE(byteOffset: usize, value: i16): void {
56+
store<i16>(byteOffset, bswap<i16>(value));
57+
}
58+
59+
export function storeInt32BE(byteOffset: usize, value: i32): void {
60+
store<i32>(byteOffset, bswap<i32>(value));
61+
}
62+
63+
export function storeInt64BE(byteOffset: usize, value: i64): void {
64+
store<i64>(byteOffset, bswap<i64>(value));
65+
}
66+
67+
export function storeUint8BE(byteOffset: usize, value: u8): void {
68+
store<u8>(byteOffset, value);
69+
}
70+
71+
export function storeUint16BE(byteOffset: usize, value: u16): void {
72+
store<u16>(byteOffset, bswap<u16>(value));
73+
}
74+
75+
export function storeUint32BE(byteOffset: usize, value: u32): void {
76+
store<u32>(byteOffset, bswap<u32>(value));
77+
}
78+
79+
export function storeUint64BE(byteOffset: usize, value: u64): void {
80+
store<u64>(byteOffset, bswap<u64>(value));
81+
}

assembly/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
export { utf8DecodeToUint16Array } from "./utf8DecodeToUint16Array";
2+
export { utf8EncodeUint16Array } from "./utf8EncodeUint16Array";
3+
export { malloc, free } from "./memory";

assembly/memory.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import "allocator/tlsf";
2+
3+
export function malloc(size: usize): usize {
4+
return memory.allocate(size);
5+
}
6+
7+
export function free(ptr: usize): void {
8+
memory.free(ptr);
9+
}

assembly/tsconfig.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"extends": "../node_modules/assemblyscript/std/assembly.json",
3+
"include": [
4+
"./**/*.ts"
5+
]
6+
}

assembly/utf8CountUint16Array.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import { loadUint16BE } from "./be";
2+
3+
// inputPtr: u16*
4+
export function utf8CountUint16Array(inputPtr: usize, inputLength: usize): usize {
5+
const u16s = sizeof<u16>();
6+
7+
let byteLength: usize = 0;
8+
let pos: usize = inputPtr;
9+
let end = inputPtr + inputLength * u16s;
10+
while (pos < end) {
11+
let value: u32 = loadUint16BE(pos);
12+
pos += u16s;
13+
14+
if ((value & 0xffffff80) === 0) {
15+
// 1-byte
16+
byteLength++;
17+
continue;
18+
} else if ((value & 0xfffff800) === 0) {
19+
// 2-bytes
20+
byteLength += 2;
21+
} else {
22+
// handle surrogate pair
23+
if (value >= 0xd800 && value <= 0xdbff) {
24+
// high surrogate
25+
if (pos < end) {
26+
let extra: u32 = loadUint16BE(pos);
27+
if ((extra & 0xfc00) === 0xdc00) {
28+
pos += u16s;
29+
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
30+
}
31+
}
32+
}
33+
34+
if ((value & 0xffff0000) === 0) {
35+
// 3-byte
36+
byteLength += 3;
37+
} else {
38+
// 4-byte
39+
byteLength += 4;
40+
}
41+
}
42+
}
43+
return byteLength;
44+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
export function utf8DecodeToUint16Array(outputPtr: usize, inputPtr: usize, byteLength: usize): usize {
2+
let inputOffset = inputPtr;
3+
let outputOffset = outputPtr;
4+
let inputOffsetEnd = inputOffset + byteLength;
5+
const u16s = sizeof<u16>();
6+
7+
while (inputOffset < inputOffsetEnd) {
8+
let byte1: u16 = load<u8>(inputOffset++);
9+
if ((byte1 & 0x80) === 0) {
10+
// 1 byte
11+
store<u16>(outputOffset, byte1);
12+
outputOffset += u16s;
13+
} else if ((byte1 & 0xe0) === 0xc0) {
14+
// 2 bytes
15+
let byte2: u16 = load<u8>(inputOffset++) & 0x3f;
16+
// FIXME: consider endians
17+
store<u16>(outputOffset, ((byte1 & 0x1f) << 6) | byte2);
18+
outputOffset += u16s;
19+
} else if ((byte1 & 0xf0) === 0xe0) {
20+
// 3 bytes
21+
let byte2: u16 = load<u8>(inputOffset++) & 0x3f;
22+
let byte3: u16 = load<u8>(inputOffset++) & 0x3f;
23+
store<u16>(outputOffset, ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3);
24+
outputOffset += u16s;
25+
} else if ((byte1 & 0xf8) === 0xf0) {
26+
// 4 bytes
27+
let byte2 = load<u8>(inputOffset++) & 0x3f;
28+
let byte3 = load<u8>(inputOffset++) & 0x3f;
29+
let byte4 = load<u8>(inputOffset++) & 0x3f;
30+
let codepoint: i32 = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4;
31+
if (codepoint > 0xffff) {
32+
codepoint -= 0x10000;
33+
store<u16>(outputOffset, ((codepoint >>> 10) & 0x3ff) | 0xd800);
34+
outputOffset += u16s;
35+
codepoint = 0xdc00 | (codepoint & 0x3ff);
36+
}
37+
store<u16>(outputOffset, codepoint);
38+
outputOffset += u16s;
39+
} else {
40+
// invalid UTF-8
41+
store<u16>(outputOffset++, byte1);
42+
outputOffset += u16s;
43+
}
44+
}
45+
return (outputOffset - outputPtr) / u16s;
46+
}

assembly/utf8EncodeUint16Array.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import { utf8CountUint16Array } from "./utf8CountUint16Array";
2+
import { storeUint8BE, storeUint16BE, storeUint32BE, loadUint16BE } from "./be";
3+
4+
function storeStringHeader(outputPtr: usize, utf8ByteLength: usize): usize {
5+
let ptr = outputPtr;
6+
if (utf8ByteLength < 32) {
7+
// fixstr
8+
storeUint8BE(ptr++, 0xa0 + (utf8ByteLength as u8));
9+
} else if (utf8ByteLength < 0x100) {
10+
// str 8
11+
storeUint8BE(ptr++, 0xd9);
12+
storeUint8BE(ptr++, utf8ByteLength as u8);
13+
} else if (utf8ByteLength < 0x10000) {
14+
// str 16
15+
storeUint8BE(ptr++, 0xda);
16+
storeUint16BE(ptr, utf8ByteLength as u16);
17+
ptr += sizeof<u16>();
18+
} else if ((utf8ByteLength as u64) < 0x100000000) {
19+
// str 32
20+
storeUint8BE(ptr++, 0xdb);
21+
storeUint32BE(ptr, utf8ByteLength as u32);
22+
ptr += sizeof<u32>();
23+
} else {
24+
throw new Error(`Too long string: ${utf8ByteLength} bytes in UTF-8`);
25+
}
26+
return ptr;
27+
}
28+
29+
// outputPtr: u8*
30+
// inputPtr: u16*
31+
// It adds MessagePack str head bytes to the output
32+
export function utf8EncodeUint16Array(outputPtr: usize, inputPtr: usize, inputLength: usize): usize {
33+
let utf8ByteLength = utf8CountUint16Array(inputPtr, inputLength);
34+
let strHeaderOffset = storeStringHeader(outputPtr, utf8ByteLength);
35+
36+
const u16s = sizeof<u16>();
37+
let inputOffset = inputPtr;
38+
let inputEnd = inputPtr + inputLength * u16s;
39+
let outputOffset = strHeaderOffset;
40+
while (inputOffset < inputEnd) {
41+
let value: u32 = loadUint16BE(inputOffset);
42+
inputOffset += u16s;
43+
44+
if ((value & 0xffffff80) === 0) {
45+
// 1-byte
46+
store<u8>(outputOffset++, value);
47+
continue;
48+
} else if ((value & 0xfffff800) === 0) {
49+
// 2-bytes
50+
store<u8>(outputOffset++, ((value >> 6) & 0x1f) | 0xc0);
51+
} else {
52+
// handle surrogate pair
53+
if (value >= 0xd800 && value <= 0xdbff) {
54+
// high surrogate
55+
if (inputOffset < inputEnd) {
56+
let extra: u32 = loadUint16BE(inputOffset);
57+
if ((extra & 0xfc00) === 0xdc00) {
58+
inputOffset += u16s;
59+
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
60+
}
61+
}
62+
}
63+
64+
if ((value & 0xffff0000) === 0) {
65+
// 3-byte
66+
store<u8>(outputOffset++, ((value >> 12) & 0x0f) | 0xe0);
67+
store<u8>(outputOffset++, ((value >> 6) & 0x3f) | 0x80);
68+
} else {
69+
// 4-byte
70+
store<u8>(outputOffset++, ((value >> 18) & 0x07) | 0xf0);
71+
store<u8>(outputOffset++, ((value >> 12) & 0x3f) | 0x80);
72+
store<u8>(outputOffset++, ((value >> 6) & 0x3f) | 0x80);
73+
}
74+
}
75+
76+
store<u8>(outputOffset++, (value & 0x3f) | 0x80);
77+
}
78+
79+
return outputOffset - outputPtr;
80+
}

benchmark/string.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/* eslint-disable no-console */
2+
import { encode, decode } from "../src";
3+
import { WASM_AVAILABLE } from "../src/wasmFunctions";
4+
5+
console.log(`WASM_AVAILABLE=${WASM_AVAILABLE}`);
6+
7+
const ascii = "A".repeat(40000);
8+
const emoji = "🌏".repeat(20000);
9+
10+
{
11+
// warm up ascii
12+
const data = ascii;
13+
const encoded = encode(data);
14+
decode(encoded);
15+
console.log(`encode / decode ascii data.length=${data.length} encoded.byteLength=${encoded.byteLength}`);
16+
17+
// run
18+
19+
console.time("encode ascii");
20+
for (let i = 0; i < 1000; i++) {
21+
encode(data);
22+
}
23+
console.timeEnd("encode ascii");
24+
25+
console.time("decode ascii");
26+
for (let i = 0; i < 1000; i++) {
27+
decode(encoded);
28+
}
29+
console.timeEnd("decode ascii");
30+
}
31+
32+
{
33+
// warm up emoji
34+
const data = emoji;
35+
const encoded = encode(data);
36+
decode(encoded);
37+
38+
console.log(`encode / decode emoji data.length=${data.length} encoded.byteLength=${encoded.byteLength}`);
39+
40+
// run
41+
42+
console.time("encode emoji");
43+
for (let i = 0; i < 1000; i++) {
44+
encode(data);
45+
}
46+
console.timeEnd("encode emoji");
47+
48+
console.time("decode emoji");
49+
for (let i = 0; i < 1000; i++) {
50+
decode(encoded);
51+
}
52+
console.timeEnd("decode emoji");
53+
}

0 commit comments

Comments
 (0)