1212#endif
1313
1414#define SET_BIT (i ) \
15- base_ptr[base + i] = (uint32_t )idx + trailingzeroes(structurals); \
15+ base_ptr[base + i] = (uint32_t )idx - 64 + trailingzeroes(structurals); \
1616 structurals = structurals & (structurals - 1 );
1717
1818#define SET_BIT1 SET_BIT (0 )
@@ -89,6 +89,7 @@ WARN_UNUSED
8989 uint64_t prev_iter_ends_pseudo_pred = 1ULL ;
9090 size_t lenminus64 = len < 64 ? 0 : len - 64 ;
9191 size_t idx = 0 ;
92+ uint64_t structurals = 0 ;
9293 for (; idx < lenminus64; idx += 64 ) {
9394#ifndef _MSC_VER
9495 __builtin_prefetch (buf + idx + 128 );
@@ -152,6 +153,21 @@ WARN_UNUSED
152153 quote_bits = quote_bits & ~odd_ends;
153154 uint64_t quote_mask = _mm_cvtsi128_si64 (_mm_clmulepi64_si128 (
154155 _mm_set_epi64x (0ULL , quote_bits), _mm_set1_epi8 (0xFF ), 0 ));
156+
157+
158+
159+ uint32_t cnt = hamming (structurals);
160+ uint32_t next_base = base + cnt;
161+ while (structurals) {
162+ CALL (SET_BITLOOPN, NO_PDEP_WIDTH)
163+ /* for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
164+ base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
165+ s = s & (s - 1);
166+ }*/
167+ base += NO_PDEP_WIDTH;
168+ }
169+ base = next_base;
170+
155171 quote_mask ^= prev_iter_inside_quote;
156172 prev_iter_inside_quote = (uint64_t )((int64_t )quote_mask >> 63 ); // right shift of a signed value expected to be well-defined and standard compliant as of C++20, John Regher from Utah U. says this is fine code
157173
@@ -193,7 +209,7 @@ WARN_UNUSED
193209
194210 uint64_t structural_res_0 = (uint32_t )_mm256_movemask_epi8 (tmp_lo);
195211 uint64_t structural_res_1 = _mm256_movemask_epi8 (tmp_hi);
196- uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32 ));
212+ structurals = ~(structural_res_0 | (structural_res_1 << 32 ));
197213
198214 // this additional mask and transfer is non-trivially expensive,
199215 // unfortunately
@@ -233,17 +249,6 @@ WARN_UNUSED
233249 // they will be off in the quote mask and on in quote bits.
234250 structurals &= ~(quote_bits & ~quote_mask);
235251
236- uint32_t cnt = hamming (structurals);
237- uint32_t next_base = base + cnt;
238- while (structurals) {
239- CALL (SET_BITLOOPN, NO_PDEP_WIDTH)
240- /* for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
241- base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
242- s = s & (s - 1);
243- }*/
244- base += NO_PDEP_WIDTH;
245- }
246- base = next_base;
247252 // *(uint64_t *)(pj.structurals + idx / 8) = structurals;
248253 }
249254
@@ -318,6 +323,17 @@ WARN_UNUSED
318323 quote_mask ^= prev_iter_inside_quote;
319324 // prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
320325
326+ uint32_t cnt = hamming (structurals);
327+ uint32_t next_base = base + cnt;
328+ while (structurals) {
329+ CALL (SET_BITLOOPN, NO_PDEP_WIDTH)
330+ /* for(size_t i = 0; i < NO_PDEP_WIDTH; i++) {
331+ base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s);
332+ s = s & (s - 1);
333+ }*/
334+ base += NO_PDEP_WIDTH;
335+ }
336+ base = next_base;
321337 // How do we build up a user traversable data structure
322338 // first, do a 'shufti' to detect structural JSON characters
323339 // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
@@ -356,7 +372,7 @@ WARN_UNUSED
356372
357373 uint64_t structural_res_0 = (uint32_t )_mm256_movemask_epi8 (tmp_lo);
358374 uint64_t structural_res_1 = _mm256_movemask_epi8 (tmp_hi);
359- uint64_t structurals = ~(structural_res_0 | (structural_res_1 << 32 ));
375+ structurals = ~(structural_res_0 | (structural_res_1 << 32 ));
360376
361377 // this additional mask and transfer is non-trivially expensive,
362378 // unfortunately
@@ -398,6 +414,8 @@ WARN_UNUSED
398414 // they will be off in the quote mask and on in quote bits.
399415 structurals &= ~(quote_bits & ~quote_mask);
400416 // *(uint64_t *)(pj.structurals + idx / 8) = structurals;
417+ idx += 64 ;
418+ }
401419 uint32_t cnt = hamming (structurals);
402420 uint32_t next_base = base + cnt;
403421 while (structurals) {
@@ -409,7 +427,7 @@ WARN_UNUSED
409427 base += NO_PDEP_WIDTH;
410428 }
411429 base = next_base;
412- }
430+
413431 pj.n_structural_indexes = base;
414432 if (base_ptr[pj.n_structural_indexes -1 ] > len) {
415433 fprintf ( stderr," Internal bug\n " );
0 commit comments