Skip to content

Commit 4beb2ed

Browse files
authored
Make simd8 64 uncopyable and other Visual Studio optimizations (simdjson#1031)
* Working on making simd8x64 immutable * Even less invasive
1 parent 0ff6833 commit 4beb2ed

22 files changed

Lines changed: 220 additions & 210 deletions

cmake/simdjson-flags.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ else()
8686
endif()
8787
if(SIMDJSON_VISUAL_STUDIO_BUILD_WITH_DEBUG_INFO_FOR_PROFILING)
8888
target_link_options(simdjson-flags INTERFACE /DEBUG )
89-
target_compile_options(simdjson-flags INTERFACE /Zi )
89+
target_compile_options(simdjson-flags INTERFACE /Zi)
9090
endif()
9191
else()
9292
target_compile_options(simdjson-internal-flags INTERFACE -fPIC)

singleheader/amalgamate_demo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on Tue 21 Jul 2020 16:54:49 EDT. Do not edit! */
1+
/* auto-generated on Tue 21 Jul 2020 17:54:23 EDT. Do not edit! */
22

33
#include <iostream>
44
#include "simdjson.h"

singleheader/simdjson.cpp

Lines changed: 107 additions & 107 deletions
Large diffs are not rendered by default.

singleheader/simdjson.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on Tue 21 Jul 2020 16:54:49 EDT. Do not edit! */
1+
/* auto-generated on Tue 21 Jul 2020 17:54:23 EDT. Do not edit! */
22
/* begin file include/simdjson.h */
33
#ifndef SIMDJSON_H
44
#define SIMDJSON_H

src/arm64/dom_parser_implementation.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace arm64 {
1515
using namespace simd;
1616

1717
struct json_character_block {
18-
static really_inline json_character_block classify(const simd::simd8x64<uint8_t> in);
18+
static really_inline json_character_block classify(const simd::simd8x64<uint8_t>& in);
1919

2020
really_inline uint64_t whitespace() const { return _whitespace; }
2121
really_inline uint64_t op() const { return _op; }
@@ -25,7 +25,7 @@ struct json_character_block {
2525
uint64_t _op;
2626
};
2727

28-
really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t> in) {
28+
really_inline json_character_block json_character_block::classify(const simd::simd8x64<uint8_t>& in) {
2929
// Functional programming causes trouble with Visual Studio.
3030
// Keeping this version in comments since it is much nicer:
3131
// auto v = in.map<uint8_t>([&](simd8<uint8_t> chunk) {
@@ -38,7 +38,7 @@ really_inline json_character_block json_character_block::classify(const simd::si
3838
const simd8<uint8_t> table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
3939
const simd8<uint8_t> table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0);
4040

41-
auto v = simd8x64<uint8_t>(
41+
simd8x64<uint8_t> v(
4242
(in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2),
4343
(in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2),
4444
(in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2),
@@ -79,12 +79,12 @@ really_inline json_character_block json_character_block::classify(const simd::si
7979
return { whitespace, op };
8080
}
8181

82-
really_inline bool is_ascii(simd8x64<uint8_t> input) {
83-
simd8<uint8_t> bits = (input.chunks[0] | input.chunks[1]) | (input.chunks[2] | input.chunks[3]);
82+
really_inline bool is_ascii(const simd8x64<uint8_t>& input) {
83+
simd8<uint8_t> bits = input.reduce_or();
8484
return bits.max() < 0b10000000u;
8585
}
8686

87-
really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8_t> prev2, simd8<uint8_t> prev3) {
87+
really_inline simd8<bool> must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
8888
simd8<bool> is_second_byte = prev1 >= uint8_t(0b11000000u);
8989
simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
9090
simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
@@ -96,7 +96,7 @@ really_inline simd8<bool> must_be_continuation(simd8<uint8_t> prev1, simd8<uint8
9696
return is_second_byte ^ is_third_byte ^ is_fourth_byte;
9797
}
9898

99-
really_inline simd8<bool> must_be_2_3_continuation(simd8<uint8_t> prev2, simd8<uint8_t> prev3) {
99+
really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
100100
simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
101101
simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
102102
return is_third_byte ^ is_fourth_byte;

src/arm64/simd.h

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -413,10 +413,14 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
413413

414414
template<typename T>
415415
struct simd8x64 {
416-
static const int NUM_CHUNKS = 64 / sizeof(simd8<T>);
416+
static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
417+
static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block.");
417418
const simd8<T> chunks[NUM_CHUNKS];
418419

419-
really_inline simd8x64() : chunks{simd8<T>(), simd8<T>(), simd8<T>(), simd8<T>()} {}
420+
simd8x64(const simd8x64<T>& o) = delete; // no copy allowed
421+
simd8x64<T>& operator=(const simd8<T> other) = delete; // no assignment allowed
422+
simd8x64() = delete; // no default constructor allowed
423+
420424
really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1, const simd8<T> chunk2, const simd8<T> chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {}
421425
really_inline simd8x64(const T ptr[64]) : chunks{simd8<T>::load(ptr), simd8<T>::load(ptr+16), simd8<T>::load(ptr+32), simd8<T>::load(ptr+48)} {}
422426

@@ -427,21 +431,18 @@ really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_
427431
this->chunks[3].store(ptr+sizeof(simd8<T>)*3);
428432
}
429433

434+
really_inline simd8<T> reduce_or() const {
435+
return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]);
436+
}
437+
438+
430439
really_inline void compress(uint64_t mask, T * output) const {
431440
this->chunks[0].compress(uint16_t(mask), output);
432441
this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF));
433442
this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF));
434443
this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF));
435444
}
436445

437-
template <typename F>
438-
static really_inline void each_index(F const& each) {
439-
each(0);
440-
each(1);
441-
each(2);
442-
each(3);
443-
}
444-
445446
really_inline uint64_t to_bitmask() const {
446447
#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO
447448
const uint8x16_t bit_mask = make_uint8x16_t(

src/generic/stage1/buf_block_reader.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ UNUSED static char * format_input_text_64(const uint8_t *text) {
3535
}
3636

3737
// Routines to print masks and text for debugging bitmask operations
38-
UNUSED static char * format_input_text(const simd8x64<uint8_t> in) {
38+
UNUSED static char * format_input_text(const simd8x64<uint8_t>& in) {
3939
static char *buf = (char*)malloc(sizeof(simd8x64<uint8_t>) + 1);
4040
in.store((uint8_t*)buf);
4141
for (size_t i=0; i<sizeof(simd8x64<uint8_t>); i++) {

src/generic/stage1/json_minifier.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ class json_minifier {
1616
{}
1717
template<size_t STEP_SIZE>
1818
really_inline void step(const uint8_t *block_buf, buf_block_reader<STEP_SIZE> &reader) noexcept;
19-
really_inline void next(simd::simd8x64<uint8_t> in, json_block block);
19+
really_inline void next(const simd::simd8x64<uint8_t>& in, json_block block);
2020
really_inline error_code finish(uint8_t *dst_start, size_t &dst_len);
2121
json_scanner scanner{};
2222
uint8_t *dst;
2323
};
2424

25-
really_inline void json_minifier::next(simd::simd8x64<uint8_t> in, json_block block) {
25+
really_inline void json_minifier::next(const simd::simd8x64<uint8_t>& in, json_block block) {
2626
uint64_t mask = block.whitespace();
2727
in.compress(mask, dst);
2828
dst += 64 - count_ones(mask);

src/generic/stage1/json_scanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct json_block {
4949
class json_scanner {
5050
public:
5151
json_scanner() {}
52-
really_inline json_block next(const simd::simd8x64<uint8_t> in);
52+
really_inline json_block next(const simd::simd8x64<uint8_t>& in);
5353
really_inline error_code finish(bool streaming);
5454

5555
private:
@@ -86,7 +86,7 @@ really_inline uint64_t follows(const uint64_t match, const uint64_t filler, uint
8686
return result;
8787
}
8888

89-
really_inline json_block json_scanner::next(const simd::simd8x64<uint8_t> in) {
89+
really_inline json_block json_scanner::next(const simd::simd8x64<uint8_t>& in) {
9090
json_string_block strings = string_scanner.next(in);
9191
json_character_block characters = json_character_block::classify(in);
9292
uint64_t follows_scalar = follows(characters.scalar(), prev_scalar);

src/generic/stage1/json_string_scanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ struct json_string_block {
3333
// Scans blocks for string characters, storing the state necessary to do so
3434
class json_string_scanner {
3535
public:
36-
really_inline json_string_block next(const simd::simd8x64<uint8_t> in);
36+
really_inline json_string_block next(const simd::simd8x64<uint8_t>& in);
3737
really_inline error_code finish(bool streaming);
3838

3939
private:
@@ -99,7 +99,7 @@ really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t bac
9999
//
100100
// Backslash sequences outside of quotes will be detected in stage 2.
101101
//
102-
really_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t> in) {
102+
really_inline json_string_block json_string_scanner::next(const simd::simd8x64<uint8_t>& in) {
103103
const uint64_t backslash = in.eq('\\');
104104
const uint64_t escaped = find_escaped(backslash);
105105
const uint64_t quote = in.eq('"') & ~escaped;

0 commit comments

Comments
 (0)