|
7 | 7 | #define SIMDJSON_UTF8VALIDATE |
8 | 8 | #endif |
9 | 9 |
|
10 | | -#ifndef NO_PDEP_WIDTH |
11 | | -#define NO_PDEP_WIDTH 8 |
12 | | -#endif |
13 | | - |
14 | | -#define SET_BIT(i) \ |
15 | | - base_ptr[base + i] = (uint32_t)idx - 64 + trailingzeroes(structurals); \ |
16 | | - structurals = structurals & (structurals - 1); |
17 | | - |
18 | | -#define SET_BIT1 SET_BIT(0) |
19 | | -#define SET_BIT2 SET_BIT1 SET_BIT(1) |
20 | | -#define SET_BIT3 SET_BIT2 SET_BIT(2) |
21 | | -#define SET_BIT4 SET_BIT3 SET_BIT(3) |
22 | | -#define SET_BIT5 SET_BIT4 SET_BIT(4) |
23 | | -#define SET_BIT6 SET_BIT5 SET_BIT(5) |
24 | | -#define SET_BIT7 SET_BIT6 SET_BIT(6) |
25 | | -#define SET_BIT8 SET_BIT7 SET_BIT(7) |
26 | | -#define SET_BIT9 SET_BIT8 SET_BIT(8) |
27 | | -#define SET_BIT10 SET_BIT9 SET_BIT(9) |
28 | | -#define SET_BIT11 SET_BIT10 SET_BIT(10) |
29 | | -#define SET_BIT12 SET_BIT11 SET_BIT(11) |
30 | | -#define SET_BIT13 SET_BIT12 SET_BIT(12) |
31 | | -#define SET_BIT14 SET_BIT13 SET_BIT(13) |
32 | | -#define SET_BIT15 SET_BIT14 SET_BIT(14) |
33 | | -#define SET_BIT16 SET_BIT15 SET_BIT(15) |
34 | | - |
35 | | -#define CALL(macro, ...) macro(__VA_ARGS__) |
36 | | - |
37 | | -#define SET_BITLOOPN(n) SET_BIT##n |
38 | | - |
39 | 10 | // It seems that many parsers do UTF-8 validation. |
40 | 11 | // RapidJSON does not do it by default, but a flag |
41 | 12 | // allows it. |
|
44 | 15 | #endif |
45 | 16 | using namespace std; |
46 | 17 |
|
| 18 | + |
47 | 19 | // a straightforward comparison of a mask against input. 5 uops; would be |
48 | 20 | // cheaper in AVX512. |
49 | 21 | really_inline uint64_t cmp_mask_against_input(__m256i input_lo, __m256i input_hi, |
@@ -159,12 +131,23 @@ WARN_UNUSED |
159 | 131 | uint32_t cnt = hamming(structurals); |
160 | 132 | uint32_t next_base = base + cnt; |
161 | 133 | while (structurals) { |
162 | | - CALL(SET_BITLOOPN, NO_PDEP_WIDTH) |
163 | | - /*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) { |
164 | | - base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s); |
165 | | - s = s & (s - 1); |
166 | | - }*/ |
167 | | - base += NO_PDEP_WIDTH; |
| 134 | + base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 135 | + structurals = structurals & (structurals - 1); |
| 136 | + base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 137 | + structurals = structurals & (structurals - 1); |
| 138 | + base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 139 | + structurals = structurals & (structurals - 1); |
| 140 | + base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 141 | + structurals = structurals & (structurals - 1); |
| 142 | + base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 143 | + structurals = structurals & (structurals - 1); |
| 144 | + base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 145 | + structurals = structurals & (structurals - 1); |
| 146 | + base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 147 | + structurals = structurals & (structurals - 1); |
| 148 | + base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 149 | + structurals = structurals & (structurals - 1); |
| 150 | + base += 8; |
168 | 151 | } |
169 | 152 | base = next_base; |
170 | 153 |
|
@@ -326,12 +309,23 @@ WARN_UNUSED |
326 | 309 | uint32_t cnt = hamming(structurals); |
327 | 310 | uint32_t next_base = base + cnt; |
328 | 311 | while (structurals) { |
329 | | - CALL(SET_BITLOOPN, NO_PDEP_WIDTH) |
330 | | - /*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) { |
331 | | - base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s); |
332 | | - s = s & (s - 1); |
333 | | - }*/ |
334 | | - base += NO_PDEP_WIDTH; |
| 312 | + base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 313 | + structurals = structurals & (structurals - 1); |
| 314 | + base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 315 | + structurals = structurals & (structurals - 1); |
| 316 | + base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 317 | + structurals = structurals & (structurals - 1); |
| 318 | + base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 319 | + structurals = structurals & (structurals - 1); |
| 320 | + base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 321 | + structurals = structurals & (structurals - 1); |
| 322 | + base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 323 | + structurals = structurals & (structurals - 1); |
| 324 | + base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 325 | + structurals = structurals & (structurals - 1); |
| 326 | + base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 327 | + structurals = structurals & (structurals - 1); |
| 328 | + base += 8; |
335 | 329 | } |
336 | 330 | base = next_base; |
337 | 331 | // How do we build up a user traversable data structure |
@@ -419,12 +413,23 @@ WARN_UNUSED |
419 | 413 | uint32_t cnt = hamming(structurals); |
420 | 414 | uint32_t next_base = base + cnt; |
421 | 415 | while (structurals) { |
422 | | - CALL(SET_BITLOOPN, NO_PDEP_WIDTH) |
423 | | - /*for(size_t i = 0; i < NO_PDEP_WIDTH; i++) { |
424 | | - base_ptr[base+i] = (uint32_t)idx + trailingzeroes(s); |
425 | | - s = s & (s - 1); |
426 | | - }*/ |
427 | | - base += NO_PDEP_WIDTH; |
| 416 | + base_ptr[base + 0] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 417 | + structurals = structurals & (structurals - 1); |
| 418 | + base_ptr[base + 1] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 419 | + structurals = structurals & (structurals - 1); |
| 420 | + base_ptr[base + 2] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 421 | + structurals = structurals & (structurals - 1); |
| 422 | + base_ptr[base + 3] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 423 | + structurals = structurals & (structurals - 1); |
| 424 | + base_ptr[base + 4] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 425 | + structurals = structurals & (structurals - 1); |
| 426 | + base_ptr[base + 5] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 427 | + structurals = structurals & (structurals - 1); |
| 428 | + base_ptr[base + 6] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 429 | + structurals = structurals & (structurals - 1); |
| 430 | + base_ptr[base + 7] = (uint32_t)idx - 64 + trailingzeroes(structurals); |
| 431 | + structurals = structurals & (structurals - 1); |
| 432 | + base += 8; |
428 | 433 | } |
429 | 434 | base = next_base; |
430 | 435 |
|
|
0 commit comments