Skip to content

Commit 751dce9

Browse files
committed
Getting there slowly.
1 parent f983703 commit 751dce9

File tree

6 files changed

+327
-51
lines changed

6 files changed

+327
-51
lines changed

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ endif
2424

2525
MAINEXECUTABLES=parse minify json2json
2626
TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
27-
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition allparserscheckfile
27+
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition allparserscheckfile
2828

2929
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
3030
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
@@ -103,6 +103,10 @@ json2json: tools/json2json.cpp $(HEADERS) $(LIBFILES)
103103
ujdecode.o: $(UJSON4C_INCLUDE)
104104
$(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c
105105

106+
parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
107+
$(CXX) $(CXXFLAGS) -o parseandstatcompetition $(LIBFILES) benchmark/parseandstatcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
108+
109+
106110
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
107111
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
108112

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,10 @@ make parsingcompetition
6969
```
7070

7171

72-
## Limitations
72+
## Scope
73+
74+
We provide a fast parser. It fully validates the input according to the various specifications.
75+
The parser builds a useful immutable (read-only) DOM (document-object model) which can be later accessed.
7376

7477
To simplify the engineering, we make some assumptions.
7578

@@ -78,6 +81,9 @@ To simplify the engineering, we make some assumptions.
7881
- We only support GNU GCC and LLVM Clang at this time. There is no support for Microsoft Visual Studio, though it should not be difficult (help is invited).
7982
- In cases of failure, we just report a failure without any indication as to the nature of the problem. (This can be easily improved without affecting performance.)
8083

84+
*We do not aim to provide a general-purpose JSON library.*
85+
86+
8187
## Features
8288

8389
- The input string is unmodified. (Parsers like sajson and RapidJSON use the input string as a buffer.)
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
#include <unistd.h>
2+
#include "simdjson/jsonparser.h"
3+
4+
#include "benchmark.h"
5+
6+
// #define RAPIDJSON_SSE2 // bad for performance
7+
// #define RAPIDJSON_SSE42 // bad for performance
8+
#include "rapidjson/document.h"
9+
#include "rapidjson/reader.h"
10+
#include "rapidjson/stringbuffer.h"
11+
#include "rapidjson/writer.h"
12+
13+
#include "sajson.h"
14+
15+
using namespace rapidjson;
16+
using namespace std;
17+
18+
struct stat_s {
19+
size_t number_count;
20+
size_t object_count;
21+
size_t array_count;
22+
size_t null_count;
23+
size_t true_count;
24+
size_t false_count;
25+
bool valid;
26+
};
27+
28+
typedef struct stat_s stat_t;
29+
30+
stat_t simdjson_computestats(const std::string_view & p) {
31+
stat_t answer;
32+
ParsedJson pj = build_parsed_json(p);
33+
answer.valid = pj.isValid();
34+
if(!answer.valid) {
35+
return answer;
36+
}
37+
answer.number_count = 0;
38+
answer.object_count = 0;
39+
answer.array_count = 0;
40+
answer.null_count = 0;
41+
answer.true_count = 0;
42+
answer.false_count = 0;
43+
size_t tapeidx = 0;
44+
u64 tape_val = pj.tape[tapeidx++];
45+
u8 type = (tape_val >> 56);
46+
size_t howmany = 0;
47+
assert (type == 'r');
48+
howmany = tape_val & JSONVALUEMASK;
49+
tapeidx++;
50+
for (; tapeidx < howmany; tapeidx++) {
51+
tape_val = pj.tape[tapeidx];
52+
u64 payload = tape_val & JSONVALUEMASK;
53+
type = (tape_val >> 56);
54+
switch (type) {
55+
case 'l': // we have a long int
56+
answer.number_count++;
57+
tapeidx++; // skipping the integer
58+
break;
59+
case 'd': // we have a double
60+
answer.number_count++;
61+
tapeidx++; // skipping the double
62+
break;
63+
case 'n': // we have a null
64+
answer.null_count++;
65+
break;
66+
case 't': // we have a true
67+
answer.true_count++;
68+
break;
69+
case 'f': // we have a false
70+
answer.false_count ++;
71+
break;
72+
case '{': // we have an object
73+
answer.object_count ++;
74+
break;
75+
case '}': // we end an object
76+
break;
77+
case '[': // we start an array
78+
answer.array_count ++;
79+
break;
80+
case ']': // we end an array
81+
break;
82+
default:
83+
answer.valid = false;
84+
return answer;
85+
}
86+
}
87+
return answer;
88+
}
89+
90+
stat_t rapid_computestats(const std::string_view & p) {
91+
stat_t answer;
92+
rapidjson::Document d;
93+
d.ParseInsitu<kParseValidateEncodingFlag>(p.data());
94+
answer.valid = ! d.HasParseError();
95+
if(d.HasParseError()) {
96+
97+
}
98+
if(!answer.valid) {
99+
return answer;
100+
}
101+
answer.number_count = 0;
102+
answer.object_count = 0;
103+
answer.array_count = 0;
104+
answer.null_count = 0;
105+
answer.true_count = 0;
106+
answer.false_count = 0;
107+
}
108+
109+
110+
int main(int argc, char *argv[]) {
111+
bool verbose = false;
112+
bool all = false;
113+
int c;
114+
while ((c = getopt (argc, argv, "v")) != -1)
115+
switch (c)
116+
{
117+
case 'v':
118+
verbose = true;
119+
break;
120+
default:
121+
abort ();
122+
}
123+
if (optind >= argc) {
124+
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
125+
cerr << "Or " << argv[0] << " -v <jsonfile>\n";
126+
exit(1);
127+
}
128+
const char * filename = argv[optind];
129+
if(optind + 1 < argc) {
130+
cerr << "warning: ignoring everything after " << argv[optind + 1] << endl;
131+
}
132+
std::string_view p;
133+
try {
134+
p = get_corpus(filename);
135+
} catch (const std::exception& e) { // caught by reference to base
136+
std::cout << "Could not load the file " << filename << std::endl;
137+
return EXIT_FAILURE;
138+
}
139+
140+
if (verbose) {
141+
std::cout << "Input has ";
142+
if (p.size() > 1024 * 1024)
143+
std::cout << p.size() / (1024 * 1024) << " MB ";
144+
else if (p.size() > 1024)
145+
std::cout << p.size() / 1024 << " KB ";
146+
else
147+
std::cout << p.size() << " B ";
148+
std::cout << std::endl;
149+
}
150+
ParsedJson pj;
151+
bool allocok = pj.allocateCapacity(p.size(), 1024);
152+
153+
if (!allocok) {
154+
std::cerr << "can't allocate memory" << std::endl;
155+
return EXIT_FAILURE;
156+
}
157+
int repeat = 10;
158+
int volume = p.size();
159+
BEST_TIME("simdjson (dynamic mem) ", build_parsed_json(p).isValid(), true, , repeat, volume, true);
160+
161+
BEST_TIME("simdjson (static alloc) ", json_parse(p, pj), true, , repeat, volume, true);
162+
163+
rapidjson::Document d;
164+
165+
char *buffer = (char *)malloc(p.size() + 1);
166+
memcpy(buffer, p.data(), p.size());
167+
buffer[p.size()] = '\0';
168+
169+
BEST_TIME("RapidJSON",
170+
d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(),
171+
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
172+
BEST_TIME("RapidJSON (insitu)", d.ParseInsitu<kParseValidateEncodingFlag>(buffer).HasParseError(), false,
173+
memcpy(buffer, p.data(), p.size()), repeat, volume, true);
174+
175+
BEST_TIME("sajson (dynamic mem, insitu)", sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
176+
177+
size_t astbuffersize = p.size();
178+
size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t));
179+
180+
BEST_TIME("sajson (static alloc, insitu)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
181+
std::string json11err;
182+
if(all) BEST_TIME("dropbox (json11) ", (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
183+
184+
if(all) BEST_TIME("fastjson ", fastjson_parse(buffer), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
185+
JsonValue value;
186+
JsonAllocator allocator;
187+
char *endptr;
188+
if(all) BEST_TIME("gason ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
189+
void *state;
190+
if(all) BEST_TIME("ultrajson ", (UJDecode(buffer, p.size(), NULL, &state) == NULL), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
191+
BEST_TIME("memcpy ", (memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat, volume, true);
192+
free((void*)p.data());
193+
free(ast_buffer);
194+
free(buffer);
195+
}
196+

benchmark/parsingcompetition.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,8 @@ int main(int argc, char *argv[]) {
9797
char *buffer = (char *)malloc(p.size() + 1);
9898
memcpy(buffer, p.data(), p.size());
9999
buffer[p.size()] = '\0';
100-
100+
//
101+
// Todo: It is possible to preallocate a block of memory with RapidJSON using a MemoryAllocator.
101102
BEST_TIME("RapidJSON",
102103
d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(),
103104
false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);

0 commit comments

Comments
 (0)