-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Expand file tree
/
Copy pathssformat.go
More file actions
384 lines (328 loc) · 11.7 KB
/
ssformat.go
File metadata and controls
384 lines (328 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package spanner
import (
"bytes"
"encoding/binary"
"fmt"
"math"
)
// ssformat provides encoding utilities for Sortable String Format (ssformat) keys.
// This encoding scheme converts database values into byte sequences that preserve
// lexicographic ordering when compared byte-by-byte.
//
// The encoding supports both ascending (increasing) and descending (decreasing)
// sort orders for all data types.
const (
// isKey is the bit set in the header byte to indicate this is a key encoding.
ssIsKey = 0x80
// Unsigned integers (variable length 1-9 bytes)
ssTypeUint1 = 0
ssTypeDecreasingUint1 = 40
// Signed integers (variable length 1-8 bytes)
ssTypeNegInt1 = 16
ssTypePosInt1 = 17
ssTypeDecreasingNegInt1 = 48
ssTypeDecreasingPosInt1 = 49
// Strings
ssTypeString = 25
ssTypeDecreasingString = 57
// Nullable markers
ssTypeNullOrderedFirst = 27
ssTypeNullableNotNullNullOrderedFirst = 28
ssTypeNullableNotNullNullOrderedLast = 59
ssTypeNullOrderedLast = 60
// Doubles (variable length 1-8 bytes, encoded as transformed int64)
ssTypeNegDouble1 = 73
ssTypePosDouble1 = 74
ssTypeDecreasingNegDouble1 = 89
ssTypeDecreasingPosDouble1 = 90
// Escape characters for string/bytes encoding
ssAscendingZeroEscape byte = 0xF0
ssAscendingFFEscape byte = 0x10
ssSep byte = 0x78
// Tag encoding constants
ssObjectExistenceTag = 0x7E
ssMaxFieldTag = 0xFFFF
// Offset to make negative timestamp seconds sort correctly
ssTimestampSecondsOffset = 1 << 63
)
// makePrefixSuccessor returns the smallest possible key that is larger than
// the input key and that does not have the input key as a prefix.
//
// This is done by setting the least significant bit of the last byte.
// Returns nil if the input is empty or nil.
func makePrefixSuccessor(key []byte) []byte {
if len(key) == 0 {
return nil
}
result := make([]byte, len(key))
copy(result, key)
result[len(result)-1] |= 1
return result
}
// appendCompositeTag appends a composite tag for a table identifier to the buffer.
// Tags must be in the range [1, 65535] and not equal to 0x7E (object existence tag).
//
// Tag encoding uses variable length:
// - Tags 1-15: 1 byte
// - Tags 16-4095: 2 bytes
// - Tags 4096-65535: 3 bytes
func appendCompositeTag(buf []byte, tag int) ([]byte, error) {
if tag == ssObjectExistenceTag || tag <= 0 || tag > ssMaxFieldTag {
return buf, fmt.Errorf("ssformat: invalid tag value %d (must be 1-%d, excluding %d)",
tag, ssMaxFieldTag, ssObjectExistenceTag)
}
if tag < 16 {
// Short tag: encode as (tag << 1)
return append(buf, byte(tag<<1)), nil
}
// Long tag: reserve one bit for the prefix successor bit.
// Header format: 0 NN TTTTT where NN is num_extra_bytes (01=1, 10=2)
// and TTTTT are the high bits of the shifted tag.
shiftedTag := tag << 1
if shiftedTag < (1 << 13) { // Original tag < 4096
// 2-byte encoding: num_extra_bytes=1, header byte is 001xxxxx (0x20 | tag_bits)
return append(buf,
byte((1<<5)|(shiftedTag>>8)),
byte(shiftedTag&0xFF),
), nil
}
// 3-byte encoding: num_extra_bytes=2, header byte is 010xxxxx (0x40 | tag_bits)
return append(buf,
byte((2<<5)|(shiftedTag>>16)),
byte((shiftedTag>>8)&0xFF),
byte(shiftedTag&0xFF),
), nil
}
// appendNullOrderedFirst appends a NULL marker that sorts before all non-NULL values.
func appendNullOrderedFirst(buf []byte) []byte {
return append(buf, byte(ssIsKey|ssTypeNullOrderedFirst), 0)
}
// appendNullOrderedLast appends a NULL marker that sorts after all non-NULL values.
func appendNullOrderedLast(buf []byte) []byte {
return append(buf, byte(ssIsKey|ssTypeNullOrderedLast), 0)
}
// appendNotNullMarkerNullOrderedFirst appends a non-NULL marker when using NULLS FIRST ordering.
func appendNotNullMarkerNullOrderedFirst(buf []byte) []byte {
return append(buf, byte(ssIsKey|ssTypeNullableNotNullNullOrderedFirst))
}
// appendNotNullMarkerNullOrderedLast appends a non-NULL marker when using NULLS LAST ordering.
func appendNotNullMarkerNullOrderedLast(buf []byte) []byte {
return append(buf, byte(ssIsKey|ssTypeNullableNotNullNullOrderedLast))
}
// appendUint64Increasing appends an unsigned 64-bit integer value in ascending sort order.
func appendUint64Increasing(buf []byte, val uint64) []byte {
var payload [9]byte // Max 9 bytes for value payload
payloadLen := 0
tempVal := val
payload[8-payloadLen] = byte((tempVal & 0x7F) << 1) // LSB is prefix-successor bit (0)
tempVal >>= 7
payloadLen++
for tempVal > 0 {
payload[8-payloadLen] = byte(tempVal & 0xFF)
tempVal >>= 8
payloadLen++
}
buf = append(buf, byte(ssIsKey|(ssTypeUint1+payloadLen-1)))
return append(buf, payload[9-payloadLen:]...)
}
// appendUint64Decreasing appends an unsigned 64-bit integer value in descending sort order.
func appendUint64Decreasing(buf []byte, val uint64) []byte {
var payload [9]byte
payloadLen := 0
tempVal := val
payload[8-payloadLen] = byte((^(tempVal & 0x7F) & 0x7F) << 1)
tempVal >>= 7
payloadLen++
for tempVal > 0 {
payload[8-payloadLen] = byte(^(tempVal & 0xFF))
tempVal >>= 8
payloadLen++
}
buf = append(buf, byte(ssIsKey|(ssTypeDecreasingUint1-payloadLen+1)))
return append(buf, payload[9-payloadLen:]...)
}
// appendIntInternal is the internal implementation for signed integer and double encoding.
func appendIntInternal(buf []byte, val int64, decreasing, isDouble bool) []byte {
if decreasing {
val = ^val
}
var payload [8]byte // Max 8 bytes for payload
payloadLen := 0
tempVal := val
payload[7-payloadLen] = byte((tempVal & 0x7F) << 1)
tempVal >>= 7
payloadLen++
// For positive numbers, loop until all bits are 0s.
// For negative numbers, loop until all bits are 1s (sign extension).
loopEndVal := int64(0)
if tempVal < 0 {
loopEndVal = -1
}
for tempVal != loopEndVal {
payload[7-payloadLen] = byte(tempVal & 0xFF)
tempVal >>= 8
payloadLen++
}
var typeVal int
if val >= 0 {
switch {
case !decreasing && !isDouble:
typeVal = ssTypePosInt1 + payloadLen - 1
case !decreasing && isDouble:
typeVal = ssTypePosDouble1 + payloadLen - 1
case decreasing && !isDouble:
typeVal = ssTypeDecreasingPosInt1 + payloadLen - 1
default: // decreasing && isDouble
typeVal = ssTypeDecreasingPosDouble1 + payloadLen - 1
}
} else {
switch {
case !decreasing && !isDouble:
typeVal = ssTypeNegInt1 - payloadLen + 1
case !decreasing && isDouble:
typeVal = ssTypeNegDouble1 - payloadLen + 1
case decreasing && !isDouble:
typeVal = ssTypeDecreasingNegInt1 - payloadLen + 1
default: // decreasing && isDouble
typeVal = ssTypeDecreasingNegDouble1 - payloadLen + 1
}
}
buf = append(buf, byte(ssIsKey|typeVal))
return append(buf, payload[8-payloadLen:]...)
}
// appendIntIncreasing appends a signed integer value in ascending sort order.
func appendIntIncreasing(buf []byte, value int64) []byte {
return appendIntInternal(buf, value, false, false)
}
// appendIntDecreasing appends a signed integer value in descending sort order.
func appendIntDecreasing(buf []byte, value int64) []byte {
return appendIntInternal(buf, value, true, false)
}
// appendDoubleInternal is the internal implementation for double encoding.
// The double is transformed to maintain lexicographic ordering:
// negative doubles (bit 63 = 1) are transformed via (MinInt64 - enc).
func appendDoubleInternal(buf []byte, value float64, decreasing bool) []byte {
enc := int64(math.Float64bits(value))
if enc < 0 {
enc = math.MinInt64 - enc
}
return appendIntInternal(buf, enc, decreasing, true)
}
// appendDoubleIncreasing appends a double value in ascending sort order.
func appendDoubleIncreasing(buf []byte, value float64) []byte {
return appendDoubleInternal(buf, value, false)
}
// appendDoubleDecreasing appends a double value in descending sort order.
func appendDoubleDecreasing(buf []byte, value float64) []byte {
return appendDoubleInternal(buf, value, true)
}
// appendByteSequence is the internal implementation for string and bytes encoding.
func appendByteSequence(buf, data []byte, decreasing bool) []byte {
if decreasing {
buf = append(buf, byte(ssIsKey|ssTypeDecreasingString))
} else {
buf = append(buf, byte(ssIsKey|ssTypeString))
}
for _, b := range data {
currentByte := b
if decreasing {
currentByte = ^b
}
switch currentByte {
case 0x00:
// Escape sequence for 0x00: write 0x00 followed by 0xF0
buf = append(buf, 0x00, ssAscendingZeroEscape)
case 0xFF:
// Escape sequence for 0xFF: write 0xFF followed by 0x10
buf = append(buf, 0xFF, ssAscendingFFEscape)
default:
buf = append(buf, currentByte)
}
}
// Terminator
if decreasing {
buf = append(buf, 0xFF, ssSep)
} else {
buf = append(buf, 0x00, ssSep)
}
return buf
}
// appendStringIncreasing appends a UTF-8 string value in ascending sort order.
func appendStringIncreasing(buf []byte, value string) []byte {
return appendByteSequence(buf, []byte(value), false)
}
// appendStringDecreasing appends a UTF-8 string value in descending sort order.
func appendStringDecreasing(buf []byte, value string) []byte {
return appendByteSequence(buf, []byte(value), true)
}
// appendBytesIncreasing appends a byte slice value in ascending sort order.
func appendBytesIncreasing(buf []byte, value []byte) []byte {
return appendByteSequence(buf, value, false)
}
// appendBytesDecreasing appends a byte slice value in descending sort order.
func appendBytesDecreasing(buf []byte, value []byte) []byte {
return appendByteSequence(buf, value, true)
}
// encodeTimestamp encodes a timestamp as 12 bytes: 8 bytes for seconds since epoch
// (with offset to handle negative), 4 bytes for nanoseconds.
func encodeTimestamp(seconds int64, nanos int32) []byte {
offsetSeconds := uint64(seconds) + ssTimestampSecondsOffset
buf := make([]byte, 12)
binary.BigEndian.PutUint64(buf[0:8], offsetSeconds)
binary.BigEndian.PutUint32(buf[8:12], uint32(nanos))
return buf
}
// encodeUUID encodes a UUID (128-bit) as 16 bytes in big-endian order.
func encodeUUID(high, low int64) []byte {
buf := make([]byte, 16)
binary.BigEndian.PutUint64(buf[0:8], uint64(high))
binary.BigEndian.PutUint64(buf[8:16], uint64(low))
return buf
}
// targetRange represents a key range with start and limit boundaries for routing.
type targetRange struct {
start []byte
limit []byte
approximate bool
}
// newTargetRange creates a new targetRange with the given boundaries.
func newTargetRange(start, limit []byte, approximate bool) *targetRange {
return &targetRange{
start: start,
limit: limit,
approximate: approximate,
}
}
// isPoint returns true if this range represents a single key (point lookup).
func (r *targetRange) isPoint() bool {
return len(r.limit) == 0
}
// mergeFrom merges another targetRange into this one. The resulting range
// will be the union of the two ranges, taking the minimum start key and
// maximum limit key.
func (r *targetRange) mergeFrom(other *targetRange) {
if bytes.Compare(other.start, r.start) < 0 {
r.start = other.start
}
if other.isPoint() {
if bytes.Compare(other.start, r.limit) >= 0 {
r.limit = makePrefixSuccessor(other.start)
}
} else if bytes.Compare(other.limit, r.limit) > 0 {
r.limit = other.limit
}
r.approximate = r.approximate || other.approximate
}