Skip to content

Commit d7d568e

Browse files
committed
Trying something else.
1 parent 3a2f746 commit d7d568e

1 file changed

Lines changed: 33 additions & 15 deletions

File tree

src/stage1_find_marks.cpp

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#endif
1313

1414
#define SET_BIT(i) \
15-
base_ptr[base + i] = (uint32_t)idx + trailingzeroes(structurals); \
15+
base_ptr[base + i] = (uint32_t)idx - 64 + trailingzeroes(structurals); \
1616
structurals = structurals & (structurals - 1);
1717

1818
#define SET_BIT1 SET_BIT(0)
@@ -89,6 +89,7 @@ WARN_UNUSED
8989
uint64_t prev_iter_ends_pseudo_pred = 1ULL;
9090
size_t lenminus64 = len < 64 ? 0 : len - 64;
9191
size_t idx = 0;
92+
uint64_t structurals = 0;
9293
for (; idx < lenminus64; idx += 64) {
9394
#ifndef _MSC_VER
9495
__builtin_prefetch(buf + idx + 128);
@@ -152,6 +153,21 @@ WARN_UNUSED
152153
quote_bits = quote_bits & ~odd_ends;
153154
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
154155
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
156+
157+
158+
159+
uint32_t cnt = hamming(structurals);
160+
uint32_t next_base = base + cnt;
161+
while (structurals) {
162+
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
163+
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
164+
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
165+
s = s & (s - 1);
166+
}*/
167+
base += NO_PDEP_WIDTH;
168+
}
169+
base = next_base;
170+
155171
quote_mask ^= prev_iter_inside_quote;
156172
prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
157173

@@ -193,7 +209,7 @@ WARN_UNUSED
193209

194210
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
195211
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
196-
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
212+
structurals = ~(structural_res_0 | (structural_res_1 << 32));
197213

198214
// this additional mask and transfer is non-trivially expensive,
199215
// unfortunately
@@ -233,17 +249,6 @@ WARN_UNUSED
233249
// they will be off in the quote mask and on in quote bits.
234250
structurals &= ~(quote_bits & ~quote_mask);
235251

236-
uint32_t cnt = hamming(structurals);
237-
uint32_t next_base = base + cnt;
238-
while (structurals) {
239-
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
240-
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
241-
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
242-
s = s & (s - 1);
243-
}*/
244-
base += NO_PDEP_WIDTH;
245-
}
246-
base = next_base;
247252
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
248253
}
249254

@@ -318,6 +323,17 @@ WARN_UNUSED
318323
quote_mask ^= prev_iter_inside_quote;
319324
//prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
320325

326+
uint32_t cnt = hamming(structurals);
327+
uint32_t next_base = base + cnt;
328+
while (structurals) {
329+
CALL(SET_BITLOOPN, NO_PDEP_WIDTH)
330+
/*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
331+
base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
332+
s = s & (s - 1);
333+
}*/
334+
base += NO_PDEP_WIDTH;
335+
}
336+
base = next_base;
321337
// How do we build up a user traversable data structure
322338
// first, do a 'shufti' to detect structural JSON characters
323339
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
@@ -356,7 +372,7 @@ WARN_UNUSED
356372

357373
uint64_t structural_res_0 = (uint32_t)_mm256_movemask_epi8(tmp_lo);
358374
uint64_t structural_res_1 = _mm256_movemask_epi8(tmp_hi);
359-
uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32));
375+
structurals = ~(structural_res_0 | (structural_res_1 << 32));
360376

361377
// this additional mask and transfer is non-trivially expensive,
362378
// unfortunately
@@ -398,6 +414,8 @@ WARN_UNUSED
398414
// they will be off in the quote mask and on in quote bits.
399415
structurals &= ~(quote_bits & ~quote_mask);
400416
//*(uint64_t *)(pj.structurals + idx / 8) = structurals;
417+
idx += 64;
418+
}
401419
uint32_t cnt = hamming(structurals);
402420
uint32_t next_base = base + cnt;
403421
while (structurals) {
@@ -409,7 +427,7 @@ WARN_UNUSED
409427
base += NO_PDEP_WIDTH;
410428
}
411429
base = next_base;
412-
}
430+
413431
pj.n_structural_indexes = base;
414432
if(base_ptr[pj.n_structural_indexes-1] > len) {
415433
fprintf( stderr,"Internal bug\n");

0 commit comments

Comments
 (0)