Skip to content

Commit e3a4b41

Browse files
committed
Cleaning.
1 parent c11eefc commit e3a4b41

8 files changed

Lines changed: 115 additions & 93 deletions

File tree

Makefile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ else
1717
CFLAGS += -O3
1818
endif
1919

20-
EXECUTABLES=parse jsoncheck numberparsingcheck stringparsingcheck minifiercompetition parsingcompetition minify allparserscheckfile
20+
MAINEXECUTABLES=parse minify
21+
TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
22+
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition allparserscheckfile
2123

2224
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
2325
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
@@ -34,7 +36,7 @@ UJSON4C_INCLUDE:=dependencies/ujson4c/src/ujdecode.c
3436

3537
LIBS=$(RAPIDJSON_INCLUDE) $(SAJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJSON_INCLUDE) $(GASON_INCLUDE) $(UJSON4C_INCLUDE)
3638
OBJECTS=ujdecode.o
37-
all: $(LIBS) $(EXECUTABLES)
39+
all: $(MAINEXECUTABLES)
3840

3941
test: jsoncheck numberparsingcheck stringparsingcheck
4042
./numberparsingcheck
@@ -105,7 +107,7 @@ cppcheck:
105107

106108

107109
clean:
108-
rm -f $(OBJECTS) $(EXECUTABLES) $(EXTRA_EXECUTABLES)
110+
rm -f $(OBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
109111

110112
cleandist:
111-
rm -f $(OBJECTS) $(EXECUTABLES) $(EXTRA_EXECUTABLES)
113+
rm -f $(OBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)

benchmark/minifiercompetition.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,19 @@ int main(int argc, char *argv[]) {
128128
BEST_TIME("sajson despaced", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(minisize, buffer)).is_valid(), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, true);
129129

130130
ParsedJson pj;
131-
pj.allocateCapacity(p.size(), 1024);
131+
bool isallocok = pj.allocateCapacity(p.size(), 1024);
132+
if(!isallocok) {
133+
printf("failed to allocate memory\n");
134+
return EXIT_FAILURE;
135+
}
132136
BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
133137

134138
ParsedJson pj2;
135-
pj2.allocateCapacity(p.size(), 1024);
136-
139+
bool isallocok2 = pj2.allocateCapacity(p.size(), 1024);
140+
if(!isallocok2) {
141+
printf("failed to allocate memory\n");
142+
return EXIT_FAILURE;
143+
}
137144

138145
BEST_TIME("json_parse despaced", json_parse((const u8*)buffer, minisize, pj2), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, true);
139146

include/simdjson/jsonparser.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,16 @@
1010
// Parse a document found in buf, need to preallocate ParsedJson.
1111
// Return false in case of a failure.
1212
// The string should be NULL terminated.
13+
WARN_UNUSED
1314
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj);
1415

16+
WARN_UNUSED
1517
static inline bool json_parse(const char * buf, size_t len, ParsedJson &pj) {
1618
return json_parse((const u8 *) buf, len, pj);
1719
}
1820

1921
// convenience function
22+
WARN_UNUSED
2023
static inline bool json_parse(const std::string_view &s, ParsedJson &pj) {
2124
return json_parse(s.data(), s.size(), pj);
2225
}

include/simdjson/parsedjson.h

Lines changed: 88 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -16,106 +16,103 @@
1616

1717
#define JSONVALUEMASK 0xFFFFFFFFFFFFFF;
1818

19-
2019
struct ParsedJson {
2120
public:
2221
size_t bytecapacity; // indicates how many bits are meant to be supported by
2322
// structurals
2423
size_t depthcapacity; // how deep we can go
2524
size_t tapecapacity;
26-
size_t stringcapacity;
25+
size_t stringcapacity;
2726
u32 current_loc;
2827
u8 *structurals;
2928
u32 n_structural_indexes;
3029
u32 *structural_indexes;
3130

32-
u64 *tape;
31+
u64 *tape;
3332
u32 *containing_scope_offset;
3433
void **ret_address;
3534

3635
u8 *string_buf; // should be at least bytecapacity
3736
u8 *current_string_buf_loc;
3837

39-
// create a ParsedJson container with zero capacity, call allocateCapacity to
38+
// create a ParsedJson container with zero capacity, call allocateCapacity to
4039
// allocate memory
41-
ParsedJson() : bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
42-
current_loc(0), structurals(NULL), n_structural_indexes(0), structural_indexes(NULL),
43-
tape(NULL), containing_scope_offset(NULL), ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL) {
44-
}
45-
46-
// if needed, allocate memory so that the object is able to process JSON documents having up to len butes and maxdepth "depth"
40+
ParsedJson()
41+
: bytecapacity(0), depthcapacity(0), tapecapacity(0), stringcapacity(0),
42+
current_loc(0), structurals(NULL), n_structural_indexes(0),
43+
structural_indexes(NULL), tape(NULL), containing_scope_offset(NULL),
44+
ret_address(NULL), string_buf(NULL), current_string_buf_loc(NULL) {}
45+
46+
// if needed, allocate memory so that the object is able to process JSON
47+
// documents having up to len butes and maxdepth "depth"
48+
WARN_UNUSED
4749
inline bool allocateCapacity(size_t len, size_t maxdepth) {
48-
if((maxdepth == 0) || (len == 0)) {
50+
if ((maxdepth == 0) || (len == 0)) {
4951
std::cerr << "capacities must be non-zero " << std::endl;
5052
return false;
5153
}
52-
if(len > 0) {
53-
if((len <= bytecapacity) && (depthcapacity<maxdepth)) return true;
54+
if (len > 0) {
55+
if ((len <= bytecapacity) && (depthcapacity < maxdepth))
56+
return true;
5457
deallocate();
5558
}
56-
bytecapacity = 0; // will only set it to len after allocations are a success
57-
if (posix_memalign((void **)&structurals, 8, ROUNDUP_N(len, 64) / 8)) {
58-
std::cerr << "Could not allocate memory for structurals" << std::endl;
59-
return false;
60-
};
61-
n_structural_indexes = 0;
62-
u32 max_structures = ROUNDUP_N(len, 64) + 2 + 7;
63-
structural_indexes = new u32[max_structures];
64-
65-
if (structural_indexes == NULL) {
66-
std::cerr << "Could not allocate memory for structural_indexes"
67-
<< std::endl;
68-
delete[] structurals;
69-
return false;
59+
bytecapacity = 0; // will only set it to len after allocations are a success
60+
if (posix_memalign((void **)&structurals, 8, ROUNDUP_N(len, 64) / 8)) {
61+
std::cerr << "Could not allocate memory for structurals" << std::endl;
62+
return false;
63+
};
64+
n_structural_indexes = 0;
65+
u32 max_structures = ROUNDUP_N(len, 64) + 2 + 7;
66+
structural_indexes = new u32[max_structures];
67+
68+
if (structural_indexes == NULL) {
69+
std::cerr << "Could not allocate memory for structural_indexes"
70+
<< std::endl;
71+
delete[] structurals;
72+
return false;
73+
}
74+
size_t localtapecapacity = ROUNDUP_N(len, 64);
75+
size_t localstringcapacity = ROUNDUP_N(len, 64);
76+
string_buf = new u8[localstringcapacity];
77+
tape = new u64[localtapecapacity];
78+
containing_scope_offset = new u32[maxdepth];
79+
ret_address = new void *[maxdepth];
80+
81+
if ((string_buf == NULL) || (tape == NULL) ||
82+
(containing_scope_offset == NULL) || (ret_address == NULL)) {
83+
std::cerr << "Could not allocate memory" << std::endl;
84+
delete[] ret_address;
85+
delete[] containing_scope_offset;
86+
delete[] tape;
87+
delete[] string_buf;
88+
delete[] structural_indexes;
89+
delete[] structurals;
90+
return false;
91+
}
92+
93+
bytecapacity = len;
94+
depthcapacity = maxdepth;
95+
tapecapacity = localtapecapacity;
96+
stringcapacity = localstringcapacity;
97+
return true;
7098
}
71-
size_t localtapecapacity = ROUNDUP_N(len, 64);
72-
size_t localstringcapacity = ROUNDUP_N(len, 64);
73-
string_buf = new u8[localstringcapacity];
74-
tape = new u64[localtapecapacity];
75-
containing_scope_offset = new u32[maxdepth];
76-
ret_address = new void*[maxdepth];
77-
78-
if ((string_buf == NULL) || (tape == NULL)
79-
|| (containing_scope_offset == NULL) || (ret_address == NULL) ) {
80-
std::cerr << "Could not allocate memory"
81-
<< std::endl;
99+
100+
// deallocate memory and set capacity to zero, called automatically by the
101+
// destructor
102+
void deallocate() {
103+
bytecapacity = 0;
104+
depthcapacity = 0;
105+
tapecapacity = 0;
106+
stringcapacity = 0;
82107
delete[] ret_address;
83108
delete[] containing_scope_offset;
84109
delete[] tape;
85110
delete[] string_buf;
86111
delete[] structural_indexes;
87-
delete[] structurals;
88-
return false;
112+
free(structurals);
89113
}
90114

91-
bytecapacity = len;
92-
depthcapacity = maxdepth;
93-
tapecapacity = localtapecapacity;
94-
stringcapacity = localstringcapacity;
95-
return true;
96-
97-
}
98-
99-
100-
101-
// deallocate memory and set capacity to zero, called automatically by the destructor
102-
void deallocate() {
103-
bytecapacity = 0;
104-
depthcapacity = 0;
105-
tapecapacity = 0;
106-
stringcapacity = 0;
107-
delete[] ret_address;
108-
delete[] containing_scope_offset;
109-
delete[] tape;
110-
delete[] string_buf;
111-
delete[] structural_indexes;
112-
free(structurals);
113-
}
114-
115-
~ParsedJson() {
116-
deallocate() ;
117-
}
118-
115+
~ParsedJson() { deallocate(); }
119116

120117
// this should be called when parsing (right before writing the tapes)
121118
void init() {
@@ -124,7 +121,8 @@ void deallocate() {
124121
}
125122

126123
// print the json to stdout (should be valid)
127-
// return false if the tape is likely wrong (e.g., you did not parse a valid JSON).
124+
// return false if the tape is likely wrong (e.g., you did not parse a valid
125+
// JSON).
128126
bool printjson() {
129127
size_t tapeidx = 0;
130128
u64 tape_val = tape[tapeidx];
@@ -136,8 +134,9 @@ void deallocate() {
136134
printf("Error: no starting root node?");
137135
return false;
138136
}
139-
if(howmany > tapecapacity) {
140-
printf("We may be exceeding the tape capacity. Is this a valid document?\n");
137+
if (howmany > tapecapacity) {
138+
printf(
139+
"We may be exceeding the tape capacity. Is this a valid document?\n");
141140
return false;
142141
}
143142
tapeidx++;
@@ -153,7 +152,7 @@ void deallocate() {
153152
if ((inobjectidx[depth] > 0) && (type != ']'))
154153
printf(", ");
155154
inobjectidx[depth]++;
156-
} else { //if (inobject) {
155+
} else { // if (inobject) {
157156
if ((inobjectidx[depth] > 0) && ((inobjectidx[depth] & 1) == 0) &&
158157
(type != '}'))
159158
printf(", ");
@@ -168,12 +167,14 @@ void deallocate() {
168167
putchar('"');
169168
break;
170169
case 'l': // we have a long int
171-
if(tapeidx + 1 >= howmany) return false;
172-
printf("%" PRId64, (int64_t) tape[tapeidx++]);
170+
if (tapeidx + 1 >= howmany)
171+
return false;
172+
printf("%" PRId64, (int64_t)tape[tapeidx++]);
173173
break;
174174
case 'd': // we have a double
175-
if(tapeidx + 1 >= howmany) return false;
176-
printf("%f", *((double * )& tape[tapeidx++]));
175+
if (tapeidx + 1 >= howmany)
176+
return false;
177+
printf("%f", *((double *)&tape[tapeidx++]));
177178
break;
178179
case 'n': // we have a null
179180
printf("null");
@@ -208,12 +209,18 @@ void deallocate() {
208209
break;
209210
case 'r': // we start and end with the root node
210211
printf("should we be hitting the root node?\n");
212+
free(inobject);
213+
free(inobjectidx);
211214
return false;
212215
default:
213216
printf("bug %c\n", type);
217+
free(inobject);
218+
free(inobjectidx);
214219
return false;
215220
}
216221
}
222+
free(inobject);
223+
free(inobjectidx);
217224
return true;
218225
}
219226

@@ -238,14 +245,13 @@ void deallocate() {
238245

239246
really_inline void write_tape_s64(s64 i) {
240247
write_tape(0, 'l');
241-
tape[current_loc++] =*( (u64*) &i);
248+
tape[current_loc++] = *((u64 *)&i);
242249
}
243250

244251
really_inline void write_tape_double(double d) {
245252
write_tape(0, 'd');
246-
static_assert(sizeof(d) == sizeof(tape[current_loc]),
247-
"mismatch size");
248-
tape[current_loc++] =*( (u64*) &d);
253+
static_assert(sizeof(d) == sizeof(tape[current_loc]), "mismatch size");
254+
tape[current_loc++] = *((u64 *)&d);
249255
}
250256

251257
really_inline u32 get_current_loc() { return current_loc; }
@@ -278,8 +284,8 @@ void deallocate() {
278284
bool down(); // valid if we're at a [ or { call site; moves us to header of
279285
// that scope
280286
// void to_start_scope(); // move us to the start of our current
281-
// scope; always succeeds void to_end_scope(); // move us to the
282-
// start of our current scope; always succeeds
287+
// scope; always succeeds void to_end_scope(); // move us to
288+
// the start of our current scope; always succeeds
283289

284290
// these navigation elements move us across scope if need be, so allow us to
285291
// iterate over everything at a given depth

include/simdjson/stage1_find_marks.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
WARN_UNUSED
77
bool find_structural_bits(const u8 *buf, size_t len, ParsedJson &pj);
88

9+
WARN_UNUSED
910
static inline bool find_structural_bits(const char *buf, size_t len, ParsedJson &pj) {
1011
return find_structural_bits((const u8 *)buf, len, pj);
1112
}

include/simdjson/stringparsing.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ static const u8 escape_map[256] = {
3838
// dest will advance a variable amount (return via pointer)
3939
// return true if the unicode codepoint was valid
4040
// We work in little-endian then swap at write time
41+
WARN_UNUSED
4142
really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
4243
u32 code_point = hex_to_u32_nocheck(*src_ptr + 2);
4344
*src_ptr += 6;
@@ -57,6 +58,7 @@ really_inline bool handle_unicode_codepoint(const u8 **src_ptr, u8 **dst_ptr) {
5758
return offset > 0;
5859
}
5960

61+
WARN_UNUSED
6062
really_inline bool parse_string(const u8 *buf, UNUSED size_t len,
6163
ParsedJson &pj, UNUSED const u32 depth, u32 offset) {
6264
using namespace std;

src/jsonparser.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33

44
// parse a document found in buf, need to preallocate ParsedJson.
5+
WARN_UNUSED
56
bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
67
if (pj.bytecapacity < len) {
78
std::cerr << "Your ParsedJson cannot support documents that big: " << len

0 commit comments

Comments
 (0)