Skip to content

Commit c11eefc

Browse files
committed
More cleaning.
1 parent 0e48041 commit c11eefc

File tree

12 files changed

+199
-56
lines changed

12 files changed

+199
-56
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ Goal: Speed up the parsing of JSON per se.
1212
/...
1313

1414
const char * filename = ... //
15-
simdjsonstring p = get_corpus(filename);
15+
std::string_view p = get_corpus(filename);
1616
ParsedJson pj;
1717
size_t maxdepth = 1024; // support documents have nesting "depth" up to 1024
1818
pj.allocateCapacity(p.size(), maxdepth); // allocate memory for parsing up to p.size() bytes
19-
bool is_ok = json_parse(p.first, p.second, pj); // do the parsing, return false on error
19+
bool is_ok = json_parse(p, pj); // do the parsing, return false on error
2020
// parsing is done!
2121
// js can be reused with other json_parse calls.
2222
```

benchmark/minifiercompetition.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
6161
exit(1);
6262
}
6363
const char * filename = argv[optind];
64-
simdjsonstring p;
64+
std::string_view p;
6565
try {
6666
p = get_corpus(filename);
6767
} catch (const std::exception& e) { // caught by reference to base
@@ -79,20 +79,20 @@ int main(int argc, char *argv[]) {
7979
std::cout << std::endl;
8080
}
8181
char *buffer = allocate_aligned_buffer(p.size() + 1);
82-
memcpy(buffer, p.c_str(), p.size());
82+
memcpy(buffer, p.data(), p.size());
8383
buffer[p.size()] = '\0';
8484

8585
int repeat = 10;
8686
int volume = p.size();
8787

88-
size_t strlength = rapidstringme((char *)p.c_str()).size();
88+
size_t strlength = rapidstringme((char *)p.data()).size();
8989
if (verbose)
9090
std::cout << "input length is " << p.size() << " stringified length is "
9191
<< strlength << std::endl;
92-
BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.c_str()), , repeat, volume, true);
92+
BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.data()), , repeat, volume, true);
9393
BEST_TIME_NOCHECK("despacing with RapidJSON Insitu", rapidstringmeInsitu((char *)buffer),
94-
memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
95-
memcpy(buffer, p.c_str(), p.size());
94+
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
95+
memcpy(buffer, p.data(), p.size());
9696

9797
size_t outlength =
9898
jsonminify((const uint8_t *)buffer, p.size(), (uint8_t *)buffer);
@@ -101,18 +101,18 @@ int main(int argc, char *argv[]) {
101101

102102
uint8_t *cbuffer = (uint8_t *)buffer;
103103
BEST_TIME("jsonminify", jsonminify(cbuffer, p.size(), cbuffer), outlength,
104-
memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
104+
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
105105
printf("minisize = %zu, original size = %zu (minified down to %.2f percent of original) \n", outlength, p.size(), outlength * 100.0 / p.size());
106106

107107
/***
108108
* Is it worth it to minify before parsing?
109109
***/
110110
rapidjson::Document d;
111111
BEST_TIME("RapidJSON Insitu orig", d.ParseInsitu(buffer).HasParseError(), false,
112-
memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
112+
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
113113

114114
char *minibuffer = allocate_aligned_buffer(p.size() + 1);
115-
size_t minisize = jsonminify((const uint8_t *)p.c_str(), p.size(), (uint8_t*) minibuffer);
115+
size_t minisize = jsonminify((const uint8_t *)p.data(), p.size(), (uint8_t*) minibuffer);
116116
minibuffer[minisize] = '\0';
117117

118118
BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(), false,
@@ -122,14 +122,14 @@ int main(int argc, char *argv[]) {
122122
size_t astbuffersize = p.size() * 2;
123123
size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t));
124124

125-
BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
125+
BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
126126

127127

128128
BEST_TIME("sajson despaced", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(minisize, buffer)).is_valid(), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, true);
129129

130130
ParsedJson pj;
131131
pj.allocateCapacity(p.size(), 1024);
132-
BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
132+
BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
133133

134134
ParsedJson pj2;
135135
pj2.allocateCapacity(p.size(), 1024);

benchmark/parse.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ int main(int argc, char *argv[]) {
6565
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
6666
}
6767
if(verbose) cout << "[verbose] loading " << filename << endl;
68-
simdjsonstring p;
68+
std::string_view p;
6969
try {
7070
p = get_corpus(filename);
7171
} catch (const std::exception& e) { // caught by reference to base
@@ -118,7 +118,7 @@ int main(int argc, char *argv[]) {
118118
#ifndef SQUASH_COUNTERS
119119
unified.start();
120120
#endif
121-
isok = find_structural_bits(p.c_str(), p.size(), pj);
121+
isok = find_structural_bits(p.data(), p.size(), pj);
122122
#ifndef SQUASH_COUNTERS
123123
unified.end(results);
124124
cy1 += results[0];
@@ -147,7 +147,7 @@ int main(int argc, char *argv[]) {
147147
unified.start();
148148
#endif
149149

150-
isok = isok && unified_machine(p.c_str(), p.size(), pj);
150+
isok = isok && unified_machine(p.data(), p.size(), pj);
151151
#ifndef SQUASH_COUNTERS
152152
unified.end(results);
153153
cy3 += results[0];

benchmark/parsingcompetition.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
6161
if(optind + 1 < argc) {
6262
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
6363
}
64-
simdjsonstring p;
64+
std::string_view p;
6565
try {
6666
p = get_corpus(filename);
6767
} catch (const std::exception& e) { // caught by reference to base
@@ -93,32 +93,32 @@ int main(int argc, char *argv[]) {
9393
rapidjson::Document d;
9494

9595
char *buffer = (char *)malloc(p.size() + 1);
96-
memcpy(buffer, p.c_str(), p.size());
96+
memcpy(buffer, p.data(), p.size());
9797
buffer[p.size()] = '\0';
9898

9999
BEST_TIME("RapidJSON",
100100
d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(),
101-
false, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
101+
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
102102
BEST_TIME("RapidJSON Insitu", d.ParseInsitu<kParseValidateEncodingFlag>(buffer).HasParseError(), false,
103-
memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
103+
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
104104

105-
BEST_TIME("sajson (dynamic mem)", sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
105+
BEST_TIME("sajson (dynamic mem)", sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
106106

107107
size_t astbuffersize = p.size();
108108
size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t));
109109

110-
BEST_TIME("sajson (static alloc)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
110+
BEST_TIME("sajson (static alloc)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
111111
std::string json11err;
112-
if(all) BEST_TIME("dropbox (json11) ", (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )), false, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
112+
if(all) BEST_TIME("dropbox (json11) ", (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
113113

114-
if(all) BEST_TIME("fastjson ", fastjson_parse(buffer), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
114+
if(all) BEST_TIME("fastjson ", fastjson_parse(buffer), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
115115
JsonValue value;
116116
JsonAllocator allocator;
117117
char *endptr;
118-
if(all) BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
118+
if(all) BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
119119
void *state;
120-
if(all) BEST_TIME("ultrajson ", (UJDecode(buffer, p.size(), NULL, &state) == NULL), false, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
121-
BEST_TIME("memcpy ", (memcpy(buffer, p.c_str(), p.size()) == buffer), true, , repeat, volume, true);
120+
if(all) BEST_TIME("ultrajson ", (UJDecode(buffer, p.size(), NULL, &state) == NULL), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
121+
BEST_TIME("memcpy ", (memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat, volume, true);
122122
free(ast_buffer);
123123
free(buffer);
124124
}

include/simdjson/jsonminifier.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@ static inline size_t jsonminify(const char *buf, size_t len, char *out) {
1414
}
1515

1616

17-
static inline size_t jsonminify(const simdjsonstring & p, char *out) {
18-
return jsonminify(p.c_str(), p.size(), out);
17+
static inline size_t jsonminify(const std::string_view & p, char *out) {
18+
return jsonminify(p.data(), p.size(), out);
1919
}

src/jsonioutil.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@ char * allocate_aligned_buffer(size_t length) {
1010
if (posix_memalign((void **)&aligned_buffer, 64, totalpaddedlength)) {
1111
throw std::runtime_error("Could not allocate sufficient memory");
1212
};
13-
aligned_buffer[length] = '\0';
14-
for(size_t i = length + 1; i < totalpaddedlength; i++) aligned_buffer[i] = 0x20;
15-
//aligned_buffer[paddedlength] = '\0';
16-
//memset(aligned_buffer + length, 0x20, paddedlength - length);
1713
return aligned_buffer;
1814
}
1915

@@ -29,6 +25,7 @@ std::string_view get_corpus(std::string filename) {
2925
}
3026
std::rewind(fp);
3127
std::fread(buf, 1, len, fp);
28+
buf[len] = '\0';
3229
std::fclose(fp);
3330
return std::string_view(buf,len);
3431
}

src/jsonparser.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@ bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
1111
bool isok = find_structural_bits(buf, len, pj);
1212
if (isok) {
1313
isok = flatten_indexes(len, pj);
14+
} else {
15+
return false;
1416
}
1517
if (isok) {
1618
isok = unified_machine(buf, len, pj);
19+
} else {
20+
return false;
1721
}
1822
return isok;
1923
}

src/stage1_find_marks.cpp

Lines changed: 156 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ WARN_UNUSED
6161
// effectively the very first char is considered to follow "whitespace" for the
6262
// purposes of psuedo-structural character detection
6363
u64 prev_iter_ends_pseudo_pred = 1ULL;
64-
65-
for (size_t idx = 0; idx < len; idx += 64) {
64+
size_t lenminus64 = len + 1 < 64 ? 0 : len + 1 - 64; // len + 1 because of the NULL termination
65+
size_t idx = 0;
66+
for (; idx < lenminus64; idx += 64) {
6667
__builtin_prefetch(buf + idx + 128);
6768
#ifdef DEBUG
6869
cout << "Idx is " << idx << "\n";
@@ -249,21 +250,163 @@ WARN_UNUSED
249250
"final structurals and pseudo structurals after close quote removal");
250251
*(u64 *)(pj.structurals + idx / 8) = structurals;
251252
}
253+
254+
////////////////
255+
/// we use a giant copy-paste which is ugly.
256+
/// but otherwise the string needs to be properly padded or else we
257+
/// risk invalidating the UTF-8 checks.
258+
////////////
259+
if (idx < len + 1) { // +1 due to NULL termination
260+
u8 tmpbuf[64];
261+
memset(tmpbuf,0x20,64);
262+
memcpy(tmpbuf,buf+idx,len - idx + 1);// +1 due to NULL termination
263+
m256 input_lo = _mm256_loadu_si256((const m256 *)(tmpbuf + 0));
264+
m256 input_hi = _mm256_loadu_si256((const m256 *)(tmpbuf + 32));
265+
#ifdef UTF8VALIDATE
266+
m256 highbit = _mm256_set1_epi8(0x80);
267+
if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
268+
// it is ascii, we just check continuation
269+
has_error = _mm256_or_si256(
270+
_mm256_cmpgt_epi8(previous.carried_continuations,
271+
_mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
272+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
273+
9, 9, 9, 9, 9, 9, 9, 1)),has_error);
274+
275+
} else {
276+
// it is not ascii so we have to do heavy work
277+
previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
278+
previous = avxcheckUTF8Bytes(input_hi, &previous, &has_error);
279+
}
280+
#endif
281+
////////////////////////////////////////////////////////////////////////////////////////////
282+
// Step 1: detect odd sequences of backslashes
283+
////////////////////////////////////////////////////////////////////////////////////////////
284+
285+
u64 bs_bits =
286+
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
287+
u64 start_edges = bs_bits & ~(bs_bits << 1);
288+
// flip lowest if we have an odd-length run at the end of the prior
289+
// iteration
290+
u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
291+
u64 even_starts = start_edges & even_start_mask;
292+
u64 odd_starts = start_edges & ~even_start_mask;
293+
u64 even_carries = bs_bits + even_starts;
294+
295+
u64 odd_carries;
296+
// must record the carry-out of our odd-carries out of bit 63; this
297+
// indicates whether the sense of any edge going to the next iteration
298+
// should be flipped
299+
bool iter_ends_odd_backslash =
300+
__builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
301+
302+
odd_carries |=
303+
prev_iter_ends_odd_backslash; // push in bit zero as a potential end
304+
// if we had an odd-numbered run at the
305+
// end of the previous iteration
306+
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
307+
u64 even_carry_ends = even_carries & ~bs_bits;
308+
u64 odd_carry_ends = odd_carries & ~bs_bits;
309+
u64 even_start_odd_end = even_carry_ends & odd_bits;
310+
u64 odd_start_even_end = odd_carry_ends & even_bits;
311+
u64 odd_ends = even_start_odd_end | odd_start_even_end;
312+
313+
////////////////////////////////////////////////////////////////////////////////////////////
314+
// Step 2: detect insides of quote pairs
315+
////////////////////////////////////////////////////////////////////////////////////////////
316+
317+
u64 quote_bits =
318+
cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
319+
quote_bits = quote_bits & ~odd_ends;
320+
u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
321+
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
322+
quote_mask ^= prev_iter_inside_quote;
323+
prev_iter_inside_quote = (u64)((s64)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
324+
325+
// How do we build up a user traversable data structure
326+
// first, do a 'shufti' to detect structural JSON characters
327+
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
328+
// these go into the first 3 buckets of the comparison (1/2/4)
329+
330+
// we are also interested in the four whitespace characters
331+
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
332+
// these go into the next 2 buckets of the comparison (8/16)
333+
const m256 low_nibble_mask = _mm256_setr_epi8(
334+
// 0 9 a b c d
335+
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0,
336+
0, 0, 8, 12, 1, 2, 9, 0, 0);
337+
const m256 high_nibble_mask = _mm256_setr_epi8(
338+
// 0 2 3 5 7
339+
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
340+
1, 0, 0, 0, 3, 2, 1, 0, 0);
341+
342+
m256 structural_shufti_mask = _mm256_set1_epi8(0x7);
343+
m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
344+
345+
m256 v_lo = _mm256_and_si256(
346+
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
347+
_mm256_shuffle_epi8(high_nibble_mask,
348+
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
349+
_mm256_set1_epi8(0x7f))));
350+
351+
m256 v_hi = _mm256_and_si256(
352+
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
353+
_mm256_shuffle_epi8(high_nibble_mask,
354+
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
355+
_mm256_set1_epi8(0x7f))));
356+
m256 tmp_lo = _mm256_cmpeq_epi8(
357+
_mm256_and_si256(v_lo, structural_shufti_mask), _mm256_set1_epi8(0));
358+
m256 tmp_hi = _mm256_cmpeq_epi8(
359+
_mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
360+
361+
u64 structural_res_0 = (u32)_mm256_movemask_epi8(tmp_lo);
362+
u64 structural_res_1 = _mm256_movemask_epi8(tmp_hi);
363+
u64 structurals = ~(structural_res_0 | (structural_res_1 << 32));
364+
365+
// this additional mask and transfer is non-trivially expensive,
366+
// unfortunately
367+
m256 tmp_ws_lo = _mm256_cmpeq_epi8(
368+
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
369+
m256 tmp_ws_hi = _mm256_cmpeq_epi8(
370+
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
371+
372+
u64 ws_res_0 = (u32)_mm256_movemask_epi8(tmp_ws_lo);
373+
u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
374+
u64 whitespace = ~(ws_res_0 | (ws_res_1 << 32));
375+
376+
377+
// mask off anything inside quotes
378+
structurals &= ~quote_mask;
379+
380+
// add the real quote bits back into our bitmask as well, so we can
381+
// quickly traverse the strings we've spent all this trouble gathering
382+
structurals |= quote_bits;
383+
384+
// Now, establish "pseudo-structural characters". These are non-whitespace
385+
// characters that are (a) outside quotes and (b) have a predecessor that's
386+
// either whitespace or a structural character. This means that subsequent
387+
// passes will get a chance to encounter the first character of every string
388+
// of non-whitespace and, if we're parsing an atom like true/false/null or a
389+
// number we can stop at the first whitespace or structural character
390+
// following it.
391+
392+
// a qualified predecessor is something that can happen 1 position before an
393+
// psuedo-structural character
394+
u64 pseudo_pred = structurals | whitespace;
395+
u64 shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
396+
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
397+
u64 pseudo_structurals =
398+
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
399+
structurals |= pseudo_structurals;
400+
401+
// now, we've used our close quotes all we need to. So let's switch them off
402+
// they will be off in the quote mask and on in quote bits.
403+
structurals &= ~(quote_bits & ~quote_mask);
404+
*(u64 *)(pj.structurals + idx / 8) = structurals;
405+
}
252406
if(buf[len] != '\0') {
253407
std::cerr << "Your string should be NULL terminated." << std::endl;
254408
return false;
255409
}
256-
// we are going to zero out everything after len:
257-
size_t count_last_64bits = len % 64;
258-
if(count_last_64bits != 0) { // we have a "final" word where only count_last_64bits matter
259-
u64 lastword = *(u64 *)(pj.structurals + len / 8);
260-
printf("last word %zu \n", lastword);
261-
printf("count_last_64bits%zu \n", count_last_64bits);
262-
lastword &= ( UINT64_C(1) << count_last_64bits) - 1;
263-
*(u64 *)(pj.structurals + len / 8) = lastword;
264-
}
265-
266-
//pj.structural_indexes[pj.n_structural_indexes++] = len; // the final NULL is used as a pseudo-structural character
267410
#ifdef UTF8VALIDATE
268411
return _mm256_testz_si256(has_error, has_error);
269412
#else

0 commit comments

Comments
 (0)