Skip to content

Commit 8be4057

Browse files
committed
fix CachedKeyDecoder's remaining issues
1 parent 7050b48 commit 8be4057

File tree

4 files changed

+171
-143
lines changed

4 files changed

+171
-143
lines changed

src/CachedKeyDecoder.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { utf8DecodeJs } from "./utils/utf8";
2+
3+
interface KeyCacheRecord {
4+
readonly bytes: Uint8Array;
5+
readonly value: string;
6+
hits: number;
7+
}
8+
9+
const DEFAULT_MAX_KEY_LENGTH = 16;
10+
const DEFAULT_MAX_LENGTH_PER_KEY = 32;
11+
12+
export class CachedKeyDecoder {
13+
private readonly caches: Array<Array<KeyCacheRecord>>;
14+
15+
constructor(
16+
private readonly maxKeyLength = DEFAULT_MAX_KEY_LENGTH,
17+
private readonly maxLengthPerKey = DEFAULT_MAX_LENGTH_PER_KEY,
18+
) {
19+
// avoid `new Array(N)` to create a non-sparse array for performance.
20+
this.caches = [];
21+
for (let i = 0; i < this.maxKeyLength; i++) {
22+
this.caches.push([]);
23+
}
24+
}
25+
26+
public canBeCached(byteLength: number) {
27+
return byteLength > 0 && byteLength <= this.maxKeyLength;
28+
}
29+
30+
private get(bytes: Uint8Array, inputOffset: number, byteLength: number): string | null {
31+
const records = this.caches[byteLength - 1];
32+
const recordsLength = records.length;
33+
34+
FIND_CHUNK: for (let i = 0; i < recordsLength; i++) {
35+
const record = records[i];
36+
37+
for (let j = 0; j < byteLength; j++) {
38+
if (record.bytes[j] !== bytes[inputOffset + j]) {
39+
continue FIND_CHUNK;
40+
}
41+
}
42+
43+
record.hits++;
44+
return record.value;
45+
}
46+
return null;
47+
}
48+
49+
private store(bytes: Uint8Array, value: string) {
50+
const records = this.caches[bytes.length - 1];
51+
const hits = 1;
52+
records.unshift({ bytes, value, hits });
53+
if (records.length > this.maxLengthPerKey) {
54+
records.pop();
55+
}
56+
}
57+
58+
public decode(bytes: Uint8Array, inputOffset: number, byteLength: number): string {
59+
const cachedValue = this.get(bytes, inputOffset, byteLength);
60+
if (cachedValue) {
61+
return cachedValue;
62+
}
63+
64+
const value = utf8DecodeJs(bytes, inputOffset, byteLength);
65+
const byteSlice = bytes.slice(inputOffset, inputOffset + byteLength);
66+
this.store(byteSlice, value);
67+
return value;
68+
}
69+
}

src/Decoder.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ import { getInt64, getUint64 } from "./utils/int";
44
import { utf8DecodeJs, TEXT_ENCODING_AVAILABLE, TEXT_DECODER_THRESHOLD, utf8DecodeTD } from "./utils/utf8";
55
import { createDataView, ensureUint8Array } from "./utils/typedArrays";
66
import { WASM_AVAILABLE, WASM_STR_THRESHOLD, utf8DecodeWasm } from "./wasmFunctions";
7+
import { CachedKeyDecoder } from "./CachedKeyDecoder";
78

8-
export enum State {
9+
enum State {
910
ARRAY,
1011
MAP_KEY,
1112
MAP_VALUE,
@@ -50,6 +51,8 @@ const MORE_DATA = new DataViewIndexOutOfBoundsError("Insufficient data");
5051

5152
const DEFAULT_MAX_LENGTH = 0xffff_ffff; // uint32_max
5253

54+
const sharedCachedKeyDecoder = new CachedKeyDecoder();
55+
5356
export class Decoder {
5457
totalPos = 0;
5558
pos = 0;
@@ -59,6 +62,9 @@ export class Decoder {
5962
headByte = HEAD_BYTE_REQUIRED;
6063
readonly stack: Array<StackState> = [];
6164

65+
// TODO: parameterize this property.
66+
readonly cachedKeyDecoder = sharedCachedKeyDecoder;
67+
6268
constructor(
6369
readonly extensionCodec = ExtensionCodec.defaultCodec,
6470
readonly maxStrLength = DEFAULT_MAX_LENGTH,
@@ -482,7 +488,9 @@ export class Decoder {
482488

483489
const offset = this.pos + headerOffset;
484490
let object: string;
485-
if (TEXT_ENCODING_AVAILABLE && byteLength > TEXT_DECODER_THRESHOLD) {
491+
if (this.stateIsMapKey() && this.cachedKeyDecoder.canBeCached(byteLength)) {
492+
object = this.cachedKeyDecoder.decode(this.bytes, offset, byteLength);
493+
} else if (TEXT_ENCODING_AVAILABLE && byteLength > TEXT_DECODER_THRESHOLD) {
486494
object = utf8DecodeTD(this.bytes, offset, byteLength);
487495
} else if (WASM_AVAILABLE && byteLength > WASM_STR_THRESHOLD) {
488496
object = utf8DecodeWasm(this.bytes, offset, byteLength);
@@ -493,6 +501,14 @@ export class Decoder {
493501
return object;
494502
}
495503

504+
stateIsMapKey(): boolean {
505+
if (this.stack.length > 0) {
506+
const state = this.stack[this.stack.length - 1];
507+
return state.type === State.MAP_KEY;
508+
}
509+
return false;
510+
}
511+
496512
decodeBinary(byteLength: number, headOffset: number): Uint8Array {
497513
if (byteLength > this.maxBinLength) {
498514
throw new Error(`Max length exceeded: bin length (${byteLength}) > maxBinLength (${this.maxBinLength})`);

src/decode.ts

Lines changed: 9 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { ExtensionCodecType } from "./ExtensionCodec";
2-
import { Decoder, State } from "./Decoder";
3-
import { utf8DecodeJs } from "./utils/utf8";
2+
import { Decoder } from "./Decoder";
43

54
export type DecodeOptions = Partial<
65
Readonly<{
@@ -36,133 +35,6 @@ export type DecodeOptions = Partial<
3635

3736
export const defaultDecodeOptions: DecodeOptions = {};
3837

39-
interface KeyCacheRecord {
40-
readonly bytes: Uint8Array;
41-
readonly key: string;
42-
hits: number;
43-
}
44-
45-
class CachedKeyDecoder {
46-
private readonly caches: Array<Array<KeyCacheRecord>>;
47-
48-
constructor(private readonly maxKeyLength: number = 32) {
49-
this.caches = new Array<Array<KeyCacheRecord>>(this.maxKeyLength + 1);
50-
}
51-
52-
public get(bytes: Uint8Array, inputOffset: number, byteLength: number): string | null {
53-
const chunks = this.caches[byteLength];
54-
55-
if (chunks) {
56-
return this.findKey(bytes, inputOffset, byteLength, chunks);
57-
} else {
58-
return null;
59-
}
60-
}
61-
62-
private findKey(
63-
bytes: Uint8Array,
64-
inputOffset: number,
65-
byteLength: number,
66-
chunks: Array<KeyCacheRecord>,
67-
): string | null {
68-
let prevHits = 0;
69-
const chunksLength = chunks.length;
70-
const halfLength = byteLength / 2;
71-
const endPosition = inputOffset + byteLength;
72-
FIND_CHUNK: for (let i = 0; i < chunksLength; i++) {
73-
const chunk = chunks[i];
74-
75-
if (i > 0 && prevHits < chunk.hits) {
76-
// Sort chunks by number of hits
77-
// in order to improve search speed for most used keys
78-
const prevChunk = chunks[i - 1];
79-
chunks[i] = prevChunk;
80-
chunks[i - 1] = chunk;
81-
prevHits = prevChunk.hits;
82-
} else {
83-
prevHits = chunk.hits;
84-
}
85-
86-
for (let j = 0; j < halfLength; j++) {
87-
if (chunk.bytes[j] !== bytes[inputOffset + j]) {
88-
continue FIND_CHUNK;
89-
}
90-
91-
if (chunk.bytes[byteLength - j - 1] !== bytes[endPosition - j - 1]) {
92-
continue FIND_CHUNK;
93-
}
94-
}
95-
96-
chunk.hits++;
97-
98-
return chunk.key;
99-
}
100-
101-
return null;
102-
}
103-
104-
public cache(bytes: Uint8Array, value: string) {
105-
let chunks: Array<KeyCacheRecord> = this.caches[bytes.length];
106-
107-
if (!chunks) {
108-
chunks = [];
109-
this.caches[bytes.length] = chunks;
110-
}
111-
112-
chunks.push({
113-
bytes: bytes,
114-
key: value,
115-
hits: 1,
116-
});
117-
}
118-
119-
public decode(bytes: Uint8Array, inputOffset: number, byteLength: number): string {
120-
let value = this.get(bytes, inputOffset, byteLength);
121-
122-
if (!value) {
123-
value = utf8DecodeJs(bytes, inputOffset, byteLength);
124-
const stringsBytes = bytes.slice(inputOffset, inputOffset + byteLength);
125-
this.cache(stringsBytes, value);
126-
}
127-
128-
return value;
129-
}
130-
}
131-
132-
class CustomDecoder extends Decoder {
133-
private readonly maxCachedKeyLength = 32;
134-
public cachedKeyDecoder = new CachedKeyDecoder(this.maxCachedKeyLength);
135-
136-
public decodeUtf8String(byteLength: number, headerOffset: number): string {
137-
let isKey = false;
138-
const canBeDecodedAsKey = byteLength > 0 && byteLength < this.maxCachedKeyLength;
139-
140-
if (canBeDecodedAsKey && this.stack.length > 0) {
141-
const state = this.stack[this.stack.length - 1];
142-
143-
isKey = state.type === State.MAP_KEY;
144-
}
145-
146-
if (isKey && canBeDecodedAsKey) {
147-
const offset = this.pos + headerOffset;
148-
const value = this.cachedKeyDecoder.decode(this.bytes, offset, byteLength);
149-
this.pos += headerOffset + byteLength;
150-
return value;
151-
} else {
152-
return super.decodeUtf8String(byteLength, headerOffset);
153-
}
154-
}
155-
}
156-
157-
const sharedDecoder = new CustomDecoder(
158-
defaultDecodeOptions.extensionCodec,
159-
defaultDecodeOptions.maxStrLength,
160-
defaultDecodeOptions.maxBinLength,
161-
defaultDecodeOptions.maxArrayLength,
162-
defaultDecodeOptions.maxMapLength,
163-
defaultDecodeOptions.maxExtLength,
164-
);
165-
16638
/**
16739
* It decodes a MessagePack-encoded buffer.
16840
*
@@ -172,18 +44,14 @@ export function decode(
17244
buffer: ArrayLike<number> | ArrayBuffer,
17345
options: DecodeOptions = defaultDecodeOptions,
17446
): unknown {
175-
const decoder =
176-
options === defaultDecodeOptions
177-
? sharedDecoder
178-
: new CustomDecoder(
179-
options.extensionCodec,
180-
options.maxStrLength,
181-
options.maxBinLength,
182-
options.maxArrayLength,
183-
options.maxMapLength,
184-
options.maxExtLength,
185-
);
186-
47+
const decoder = new Decoder(
48+
options.extensionCodec,
49+
options.maxStrLength,
50+
options.maxBinLength,
51+
options.maxArrayLength,
52+
options.maxMapLength,
53+
options.maxExtLength,
54+
);
18755
decoder.setBuffer(buffer); // decodeSync() requires only one buffer
18856
return decoder.decodeOneSync();
18957
}

test/CachedKeyDecoder.test.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import assert from "assert";
2+
import { CachedKeyDecoder } from "../src/CachedKeyDecoder";
3+
import { utf8EncodeJs, utf8Count } from "../src/utils/utf8";
4+
5+
function tryDecode(decoder: CachedKeyDecoder, str: string): string {
6+
const byteLength = utf8Count(str);
7+
const bytes = new Uint8Array(byteLength);
8+
utf8EncodeJs(str, bytes, 0);
9+
if (!decoder.canBeCached(byteLength)) {
10+
throw new Error("Unexpected precondition");
11+
}
12+
return decoder.decode(bytes, 0, byteLength);
13+
}
14+
15+
describe("CachedKeyDecoder", () => {
16+
context("basic behavior", () => {
17+
it("decodes a string", () => {
18+
const decoder = new CachedKeyDecoder();
19+
20+
assert.deepStrictEqual(tryDecode(decoder, "foo"), "foo");
21+
assert.deepStrictEqual(tryDecode(decoder, "foo"), "foo");
22+
assert.deepStrictEqual(tryDecode(decoder, "foo"), "foo");
23+
24+
// console.dir(decoder, { depth: 100 });
25+
});
26+
27+
it("decodes strings", () => {
28+
const decoder = new CachedKeyDecoder();
29+
30+
assert.deepStrictEqual(tryDecode(decoder, "foo"), "foo");
31+
assert.deepStrictEqual(tryDecode(decoder, "bar"), "bar");
32+
assert.deepStrictEqual(tryDecode(decoder, "foo"), "foo");
33+
34+
// console.dir(decoder, { depth: 100 });
35+
});
36+
37+
it("decodes strings with purging records", () => {
38+
const decoder = new CachedKeyDecoder(16, 2);
39+
40+
assert.deepStrictEqual(tryDecode(decoder, "foo"), "foo");
41+
assert.deepStrictEqual(tryDecode(decoder, "bar"), "bar");
42+
43+
// the next `tryDecode()` should purge the cache of "foo"
44+
assert.deepStrictEqual(tryDecode(decoder, "baz"), "baz");
45+
46+
// with newly created an internal cache record
47+
assert.deepStrictEqual(tryDecode(decoder, "foo"), "foo");
48+
});
49+
});
50+
51+
context("edge cases", () => {
52+
// len=0 is not supported because it is just an empty string
53+
it("decodes str with len=1", () => {
54+
const decoder = new CachedKeyDecoder();
55+
56+
assert.deepStrictEqual(tryDecode(decoder, "f"), "f");
57+
assert.deepStrictEqual(tryDecode(decoder, "a"), "a");
58+
assert.deepStrictEqual(tryDecode(decoder, "f"), "f");
59+
assert.deepStrictEqual(tryDecode(decoder, "a"), "a");
60+
61+
//console.dir(decoder, { depth: 100 });
62+
});
63+
64+
it("decodes str with len=maxKeyLength", () => {
65+
const decoder = new CachedKeyDecoder(1);
66+
67+
assert.deepStrictEqual(tryDecode(decoder, "f"), "f");
68+
assert.deepStrictEqual(tryDecode(decoder, "a"), "a");
69+
assert.deepStrictEqual(tryDecode(decoder, "f"), "f");
70+
assert.deepStrictEqual(tryDecode(decoder, "a"), "a");
71+
72+
//console.dir(decoder, { depth: 100 });
73+
});
74+
});
75+
});

0 commit comments

Comments
 (0)