Skip to content

Commit 0769c39

Browse files
committed
Ok. Looks complete.
1 parent c127570 commit 0769c39

File tree

8 files changed

+321
-191
lines changed

8 files changed

+321
-191
lines changed

Makefile

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55

66

77
.PHONY: clean cleandist
8-
9-
DEPSINCLUDE = -Idependencies/rapidjson/include -Idependencies/sajson/include -Idependencies/json11 -Idependencies/fastjson/src -Idependencies/fastjson/include -Idependencies/gason/src -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
10-
CXXFLAGS = -std=c++17 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux $(DEPSINCLUDE)
8+
COREDEPSINCLUDE = -Idependencies/rapidjson/include -Idependencies/sajson/include
9+
EXTRADEPSINCLUDE = -Idependencies/json11 -Idependencies/fastjson/src -Idependencies/fastjson/include -Idependencies/gason/src -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
10+
CXXFLAGS = -std=c++17 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux
1111
CFLAGS = -march=native -Idependencies/ujson4c/3rdparty -Idependencies/ujson4c/src
1212
ifeq ($(SANITIZE),1)
1313
CXXFLAGS += -g3 -O0 -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined
@@ -24,7 +24,7 @@ endif
2424

2525
MAINEXECUTABLES=parse minify json2json
2626
TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck
27-
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition allparserscheckfile
27+
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile
2828

2929
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_flatten.h include/simdjson/stage34_unified.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
3030
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage34_unified.cpp
@@ -40,9 +40,11 @@ GASON_INCLUDE:=dependencies/gason/src/gason.h
4040
UJSON4C_INCLUDE:=dependencies/ujson4c/src/ujdecode.c
4141

4242
LIBS=$(RAPIDJSON_INCLUDE) $(SAJSON_INCLUDE) $(JSON11_INCLUDE) $(FASTJSON_INCLUDE) $(GASON_INCLUDE) $(UJSON4C_INCLUDE)
43-
OBJECTS=ujdecode.o
43+
EXTRAOBJECTS=ujdecode.o
4444
all: $(MAINEXECUTABLES)
4545

46+
competition: $(COMPARISONEXECUTABLES)
47+
4648
test: jsoncheck numberparsingcheck stringparsingcheck
4749
./numberparsingcheck
4850
./stringparsingcheck
@@ -91,7 +93,7 @@ stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
9193

9294

9395
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)
94-
$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) $(MINIFIERLIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS)
96+
$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) $(MINIFIERLIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
9597

9698
minify: tools/minify.cpp $(HEADERS) $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)
9799
$(CXX) $(CXXFLAGS) -o minify $(MINIFIERLIBFILES) $(LIBFILES) tools/minify.cpp -I.
@@ -103,15 +105,18 @@ json2json: tools/json2json.cpp $(HEADERS) $(LIBFILES)
103105
ujdecode.o: $(UJSON4C_INCLUDE)
104106
$(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c
105107

106-
parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
107-
$(CXX) $(CXXFLAGS) -o parseandstatcompetition $(LIBFILES) benchmark/parseandstatcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
108+
parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBFILES)
109+
$(CXX) $(CXXFLAGS) -o parseandstatcompetition $(LIBFILES) benchmark/parseandstatcompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
110+
111+
distinctuseridcompetition: benchmark/distinctuseridcompetition.cpp $(HEADERS) $(LIBFILES)
112+
$(CXX) $(CXXFLAGS) -o distinctuseridcompetition $(LIBFILES) benchmark/distinctuseridcompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
108113

109114

110-
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
111-
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp $(OBJECTS) -I. $(LIBFLAGS)
115+
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(EXTRAOBJECTS)
116+
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp $(EXTRAOBJECTS) -I. $(LIBFLAGS) $(COREDEPSINCLUDE) $(EXTRADEPSINCLUDE)
112117

113-
allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) $(OBJECTS)
114-
$(CXX) $(CXXFLAGS) -o allparserscheckfile $(LIBFILES) tests/allparserscheckfile.cpp $(OBJECTS) -I. $(LIBFLAGS)
118+
allparserscheckfile: tests/allparserscheckfile.cpp $(HEADERS) $(LIBFILES) $(EXTRAOBJECTS)
119+
$(CXX) $(CXXFLAGS) -o allparserscheckfile $(LIBFILES) tests/allparserscheckfile.cpp $(EXTRAOBJECTS) -I. $(LIBFLAGS) $(COREDEPSINCLUDE) $(EXTRADEPSINCLUDE)
115120

116121
parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
117122
$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
@@ -121,7 +126,7 @@ cppcheck:
121126

122127

123128
clean:
124-
rm -f $(OBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
129+
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
125130

126131
cleandist:
127-
rm -f $(OBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)
132+
rm -f $(EXTRAOBJECTS) $(MAINEXECUTABLES) $(EXTRA_EXECUTABLES) $(TESTEXECUTABLES) $(COMPARISONEXECUTABLES)

README.md

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,15 @@ To simplify the engineering, we make some assumptions.
9090
- We assume AVX2 support which is available in all recent mainstream x86 processors produced by AMD and Intel. No support for non-x86 processors is included though it can be done. We plan to support ARM processors (help is invited).
9191
- We only support GNU GCC and LLVM Clang at this time. There is no support for Microsoft Visual Studio, though it should not be difficult (help is invited).
9292
- In cases of failure, we just report a failure without any indication as to the nature of the problem. (This can be easily improved without affecting performance.)
93+
- As allowed by the specification, we allow repeated keys within an object (other parsers like sajson do the same).
9394

9495
*We do not aim to provide a general-purpose JSON library.* A library like RapidJSON offers much more than just parsing, it helps you generate JSON and offers various other convenient functions. We merely parse the document.
9596

9697

9798
## Features
9899

99100
- The input string is unmodified. (Parsers like sajson and RapidJSON use the input string as a buffer.)
100-
- We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers.
101+
- We parse integers and floating-point numbers as separate types which allows us to support large 64-bit integers in [-9223372036854775808,9223372036854775808). Among the parsers that differentiate between integers and floating-point numbers, not all support 64-bit integers. (For example, sajson stores integers larger than 2147483648 as floating-point numbers.)
101102
- We do full UTF-8 validation as part of the parsing. (Parsers like fastjson, gason and dropbox json11 do not do UTF-8 validation.)
102103
- We fully validate the numbers. (Parsers like gason and ultranjson will accept `[0e+]` as valid JSON.)
103104
- We validate string content for unescaped characters. (Parsers like fastjson and ultrajson accept unescaped line breaks and tags in strings.)
@@ -111,6 +112,102 @@ The parser works in three stages:
111112
- Stage 3. (Structure building) Involves constructing a "tree" of sort to navigate through the data. Strings and numbers are parsed at this stage.
112113

113114

115+
## Navigating the parsed document
116+
117+
Here is a code sample to dump back the parsed JSON to a string:
118+
119+
```c
120+
ParsedJson::iterator pjh(pj);
121+
if (!pjh.isOk()) {
122+
std::cerr << " Could not iterate parsed result. " << std::endl;
123+
return EXIT_FAILURE;
124+
}
125+
compute_dump(pj);
126+
//
127+
// where compute_dump is :
128+
129+
void compute_dump(ParsedJson::iterator &pjh) {
130+
if (pjh.is_object()) {
131+
std::cout << "{";
132+
if (pjh.down()) {
133+
pjh.print(std::cout); // must be a string
134+
std::cout << ":";
135+
pjh.next();
136+
compute_dump(pjh); // let us recurse
137+
while (pjh.next()) {
138+
std::cout << ",";
139+
pjh.print(std::cout);
140+
std::cout << ":";
141+
pjh.next();
142+
compute_dump(pjh); // let us recurse
143+
}
144+
pjh.up();
145+
}
146+
std::cout << "}";
147+
} else if (pjh.is_array()) {
148+
std::cout << "[";
149+
if (pjh.down()) {
150+
compute_dump(pjh); // let us recurse
151+
while (pjh.next()) {
152+
std::cout << ",";
153+
compute_dump(pjh); // let us recurse
154+
}
155+
pjh.up();
156+
}
157+
std::cout << "]";
158+
} else {
159+
pjh.print(std::cout); // just print the lone value
160+
}
161+
}
162+
```
163+
164+
The following function will find all user.id integers:
165+
166+
```C
167+
void simdjson_traverse(std::vector<int64_t> &answer, ParsedJson::iterator &i) {
168+
switch (i.get_type()) {
169+
case '{':
170+
if (i.down()) {
171+
do {
172+
bool founduser = equals(i.get_string(), "user");
173+
i.next(); // move to value
174+
if (i.is_object()) {
175+
if (founduser && i.move_to_key("id")) {
176+
if (i.is_integer()) {
177+
answer.push_back(i.get_integer());
178+
}
179+
i.up();
180+
}
181+
simdjson_traverse(answer, i);
182+
} else if (i.is_array()) {
183+
simdjson_traverse(answer, i);
184+
}
185+
} while (i.next());
186+
i.up();
187+
}
188+
break;
189+
case '[':
190+
if (i.down()) {
191+
do {
192+
if (i.is_object_or_array()) {
193+
simdjson_traverse(answer, i);
194+
}
195+
} while (i.next());
196+
i.up();
197+
}
198+
break;
199+
case 'l':
200+
case 'd':
201+
case 'n':
202+
case 't':
203+
case 'f':
204+
default:
205+
break;
206+
}
207+
}
208+
```
209+
210+
114211
## Various References
115212

116213
- [Google double-conv](https://github.com/google/double-conversion/)

0 commit comments

Comments
 (0)