11#ifndef SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
22#define SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
33
4+ #include " simdjson/simd_input_arm64.h"
45#include " simdjson/simdutf8check_arm64.h"
56#include " simdjson/stage1_find_marks.h"
67
78#ifdef IS_ARM64
89namespace simdjson {
9- template <> struct simd_input <Architecture::ARM64> {
10- uint8x16_t i0;
11- uint8x16_t i1;
12- uint8x16_t i2;
13- uint8x16_t i3;
14- };
15-
16- template <>
17- really_inline simd_input<Architecture::ARM64>
18- fill_input<Architecture::ARM64>(const uint8_t *ptr) {
19- struct simd_input <Architecture::ARM64> in;
20- in.i0 = vld1q_u8 (ptr + 0 );
21- in.i1 = vld1q_u8 (ptr + 16 );
22- in.i2 = vld1q_u8 (ptr + 32 );
23- in.i3 = vld1q_u8 (ptr + 48 );
24- return in;
25- }
26-
27- really_inline uint16_t neon_movemask (uint8x16_t input) {
28- const uint8x16_t bit_mask = {0x01 , 0x02 , 0x4 , 0x8 , 0x10 , 0x20 , 0x40 , 0x80 ,
29- 0x01 , 0x02 , 0x4 , 0x8 , 0x10 , 0x20 , 0x40 , 0x80 };
30- uint8x16_t minput = vandq_u8 (input, bit_mask);
31- uint8x16_t tmp = vpaddq_u8 (minput, minput);
32- tmp = vpaddq_u8 (tmp, tmp);
33- tmp = vpaddq_u8 (tmp, tmp);
34- return vgetq_lane_u16 (vreinterpretq_u16_u8 (tmp), 0 );
35- }
36-
37- really_inline uint64_t neon_movemask_bulk (uint8x16_t p0, uint8x16_t p1,
38- uint8x16_t p2, uint8x16_t p3) {
39- const uint8x16_t bit_mask = {0x01 , 0x02 , 0x4 , 0x8 , 0x10 , 0x20 , 0x40 , 0x80 ,
40- 0x01 , 0x02 , 0x4 , 0x8 , 0x10 , 0x20 , 0x40 , 0x80 };
41- uint8x16_t t0 = vandq_u8 (p0, bit_mask);
42- uint8x16_t t1 = vandq_u8 (p1, bit_mask);
43- uint8x16_t t2 = vandq_u8 (p2, bit_mask);
44- uint8x16_t t3 = vandq_u8 (p3, bit_mask);
45- uint8x16_t sum0 = vpaddq_u8 (t0, t1);
46- uint8x16_t sum1 = vpaddq_u8 (t2, t3);
47- sum0 = vpaddq_u8 (sum0, sum1);
48- sum0 = vpaddq_u8 (sum0, sum0);
49- return vgetq_lane_u64 (vreinterpretq_u64_u8 (sum0), 0 );
50- }
5110
5211template <>
5312really_inline uint64_t
@@ -59,7 +18,8 @@ compute_quote_mask<Architecture::ARM64>(uint64_t quote_bits) {
5918#endif
6019}
6120
62- template <> struct utf8_checking_state <Architecture::ARM64> {
21+ template <>
22+ struct utf8_checking_state <Architecture::ARM64> {
6323 int8x16_t has_error{};
6424 processed_utf_bytes previous{};
6525};
@@ -115,28 +75,6 @@ really_inline ErrorValues check_utf8_errors<Architecture::ARM64>(
11575 : simdjson::SUCCESS;
11676}
11777
118- template <>
119- really_inline uint64_t cmp_mask_against_input<Architecture::ARM64>(
120- simd_input<Architecture::ARM64> in, uint8_t m) {
121- const uint8x16_t mask = vmovq_n_u8 (m);
122- uint8x16_t cmp_res_0 = vceqq_u8 (in.i0 , mask);
123- uint8x16_t cmp_res_1 = vceqq_u8 (in.i1 , mask);
124- uint8x16_t cmp_res_2 = vceqq_u8 (in.i2 , mask);
125- uint8x16_t cmp_res_3 = vceqq_u8 (in.i3 , mask);
126- return neon_movemask_bulk (cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
127- }
128-
129- template <>
130- really_inline uint64_t unsigned_lteq_against_input<Architecture::ARM64>(
131- simd_input<Architecture::ARM64> in, uint8_t m) {
132- const uint8x16_t mask = vmovq_n_u8 (m);
133- uint8x16_t cmp_res_0 = vcleq_u8 (in.i0 , mask);
134- uint8x16_t cmp_res_1 = vcleq_u8 (in.i1 , mask);
135- uint8x16_t cmp_res_2 = vcleq_u8 (in.i2 , mask);
136- uint8x16_t cmp_res_3 = vcleq_u8 (in.i3 , mask);
137- return neon_movemask_bulk (cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
138- }
139-
14078template <>
14179really_inline void find_whitespace_and_structurals<Architecture::ARM64>(
14280 simd_input<Architecture::ARM64> in, uint64_t &whitespace,
0 commit comments