Skip to content

Commit d204e54

Browse files
committed
Moving tests to a separate file and directory.
1 parent 01ea799 commit d204e54

File tree

5 files changed

+194
-112
lines changed

5 files changed

+194
-112
lines changed

Makefile

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,32 @@ CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Idependencies/d
1111
LIBFLAGS = -ldouble-conversion
1212
#CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Wno-implicit-function-declaration
1313

14-
EXECUTABLES=parse
15-
14+
EXECUTABLES=parse jsoncheck
15+
HEADERS=common_defs.h jsonioutil.h linux-perf-events.h simdjson_internal.h stage1_find_marks.h stage2_flatten.h stage3_ape_machine.h stage4_shovel_machine.h
16+
LIBFILES=stage1_find_marks.cpp stage2_flatten.cpp stage3_ape_machine.cpp stage4_shovel_machine.cpp
1617
EXTRA_EXECUTABLES=parsenocheesy parsenodep8
1718

1819
LIDDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
1920

2021
LIBS=$(LIDDOUBLE)
2122

2223
all: $(LIBS) $(EXECUTABLES)
23-
-./parse
24+
25+
test: jsoncheck
26+
./jsoncheck
2427

2528
$(LIDDOUBLE) : dependencies/double-conversion/README.md
2629
cd dependencies/double-conversion/ && mkdir -p release && cd release && cmake .. && make
2730

28-
parse: main.cpp stage1_find_marks.cpp common_defs.h linux-perf-events.h
29-
$(CXX) $(CXXFLAGS) -o parse stage1_find_marks.cpp stage2_flatten.cpp stage3_ape_machine.cpp stage4_shovel_machine.cpp main.cpp $(LIBFLAGS)
31+
parse: main.cpp $(HEADERS) $(LIBFILES)
32+
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) main.cpp $(LIBFLAGS)
33+
34+
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
35+
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
36+
3037

31-
parsehisto: main.cpp common_defs.h linux-perf-events.h
32-
$(CXX) $(CXXFLAGS) -o parsehisto main.cpp $(LIBFLAGS) -DBUILDHISTOGRAM
38+
parsehisto: main.cpp $(HEADERS) $(LIBFILES)
39+
$(CXX) $(CXXFLAGS) -o parsehisto main.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
3340

3441
testflatten: parse parsenocheesy parsenodep8 parsenodep10 parsenodep12
3542
for filename in jsonexamples/twitter.json jsonexamples/gsoc-2018.json jsonexamples/citm_catalog.json jsonexamples/canada.json ; do \
@@ -43,18 +50,18 @@ testflatten: parse parsenocheesy parsenodep8 parsenodep10 parsenodep12
4350
set +x; \
4451
done
4552

46-
parsenocheesy: main.cpp common_defs.h linux-perf-events.h
47-
$(CXX) $(CXXFLAGS) -o parsenocheesy main.cpp -DSUPPRESS_CHEESY_FLATTEN
53+
parsenocheesy: main.cpp $(HEADERS) $(LIBFILES)
54+
$(CXX) $(CXXFLAGS) -o parsenocheesy main.cpp $(LIBFILES) -DSUPPRESS_CHEESY_FLATTEN
4855

49-
parsenodep8: main.cpp common_defs.h linux-perf-events.h
50-
$(CXX) $(CXXFLAGS) -o parsenodep8 main.cpp -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=8
56+
parsenodep8: main.cpp $(HEADERS) $(LIBFILES)
57+
$(CXX) $(CXXFLAGS) -o parsenodep8 main.cpp $(LIBFILES) -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=8
5158

52-
parsenodep10: main.cpp common_defs.h linux-perf-events.h
53-
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=10
59+
parsenodep10: main.cpp $(HEADERS) $(LIBFILES)
60+
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp $(LIBFILES) -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=10
5461

5562

56-
parsenodep12: main.cpp common_defs.h linux-perf-events.h
57-
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=12
63+
parsenodep12: main.cpp $(HEADERS) $(LIBFILES)
64+
$(CXX) $(CXXFLAGS) -o parsenodep12 main.cpp $(LIBFILES) -DNO_PDEP_PLEASE -DNO_PDEP_WIDTH=12
5865

5966

6067
dependencies/double-conversion/README.md:

jsonioutil.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#ifndef JSONIOUTIL_H
2+
#define JSONIOUTIL_H
3+
4+
#include <exception>
5+
#include <fstream>
6+
#include <iostream>
7+
#include <sstream>
8+
#include <string>
9+
10+
// get a corpus; pad out to cache line so we can always use SIMD
11+
// throws exceptions in case of failure
12+
std::pair<u8 *, size_t> get_corpus(std::string filename) {
13+
std::ifstream is(filename, std::ios::binary);
14+
if (is) {
15+
std::stringstream buffer;
16+
buffer << is.rdbuf();
17+
size_t length = buffer.str().size();
18+
char *aligned_buffer;
19+
if (posix_memalign((void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
20+
throw std::runtime_error("Could not allocate sufficient memory");
21+
};
22+
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
23+
memcpy(aligned_buffer, buffer.str().c_str(), length);
24+
is.close();
25+
return std::make_pair((u8 *)aligned_buffer, length);
26+
}
27+
throw std::runtime_error("could not load corpus");
28+
return std::make_pair((u8 *)0, (size_t)0);
29+
}
30+
31+
#endif

main.cpp

Lines changed: 20 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -39,29 +39,10 @@ using namespace double_conversion;
3939
#include "stage2_flatten.h"
4040
#include "stage3_ape_machine.h"
4141
#include "stage4_shovel_machine.h"
42-
42+
#include "jsonioutil.h"
4343
using namespace std;
4444

45-
// get a corpus; pad out to cache line so we can always use SIMD
46-
pair<u8 *, size_t> get_corpus(string filename) {
47-
ifstream is(filename, ios::binary);
48-
if (is) {
49-
stringstream buffer;
50-
buffer << is.rdbuf();
51-
size_t length = buffer.str().size();
52-
char * aligned_buffer;
53-
if (posix_memalign( (void **)&aligned_buffer, 64, ROUNDUP_N(length, 64))) {
54-
cerr << "Could not allocate memory\n";
55-
exit(1);
56-
};
57-
memset(aligned_buffer, 0x20, ROUNDUP_N(length, 64));
58-
memcpy(aligned_buffer, buffer.str().c_str(), length);
59-
is.close();
60-
return make_pair((u8 *)aligned_buffer, length);
61-
}
62-
throw "No corpus";
63-
return make_pair((u8 *)0, (size_t)0);
64-
}
45+
6546

6647

6748
// https://stackoverflow.com/questions/2616906/how-do-i-output-coloured-text-to-a-linux-terminal
@@ -115,88 +96,20 @@ void colorfuldisplay(ParsedJson & pj, const u8 * buf) {
11596
}
11697

11798

118-
/**
119-
* Does the file filename ends with the given extension.
120-
*/
121-
static bool hasExtension(const char *filename, const char *extension) {
122-
const char *ext = strrchr(filename, '.');
123-
return (ext && !strcmp(ext, extension));
124-
}
12599

126-
bool startsWith(const char *pre, const char *str) {
127-
size_t lenpre = strlen(pre),
128-
lenstr = strlen(str);
129-
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
130-
}
131100

132-
void validate() {
133-
init_state_machine();// to be safe
134-
const char *dirname = "jsonchecker/"; // ugly, hardcoded, brittle
135-
const char *extension = ".json";
136-
size_t dirlen = strlen(dirname);
137-
struct dirent **entry_list;
138-
int c = scandir(dirname, &entry_list, 0, alphasort);
139-
if (c < 0) {
140-
printf("error accessing %s \n", dirname);
141-
return;
142-
}
143-
if (c == 0) {
144-
printf("nothing in dir %s \n", dirname);
145-
return;
146-
}
147-
for (int i = 0; i < c; i++) {
148-
const char *name = entry_list[i]->d_name;
149-
if (hasExtension(name, extension)) {
150-
printf("validating: file %s \n",name);
151-
size_t filelen = strlen(name);
152-
char *fullpath = (char *)malloc(dirlen + filelen + 1);
153-
strcpy(fullpath, dirname);
154-
strcpy(fullpath + dirlen, name);
155-
pair<u8 *, size_t> p = get_corpus(fullpath);
156-
// terrible hack but just to get it working
157-
ParsedJson * pj_ptr = new ParsedJson;
158-
ParsedJson & pj(*pj_ptr);
159-
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
160-
cerr << "Could not allocate memory\n";
161-
return;
162-
};
163-
pj.n_structural_indexes = 0;
164-
u32 max_structures = ROUNDUP_N(p.second, 64) + 2 + 7;
165-
pj.structural_indexes = new u32[max_structures];
166-
find_structural_bits(p.first, p.second, pj);
167-
flatten_indexes(p.second, pj);
168-
bool isok = ape_machine(p.first, p.second, pj);
169-
if(isok)
170-
isok = shovel_machine(p.first, p.second, pj);
171-
if(startsWith("pass",name)) {
172-
if(!isok) printf("warning: file %s should pass but it fails.\n",name);
173-
}
174-
if(startsWith("fail",name)) {
175-
if(isok) printf("warning: file %s should fail but it passes.\n",name);
176-
}
177-
free(pj.structurals);
178-
free(p.first);
179-
delete[] pj.structural_indexes;
180-
free(fullpath);
181-
}
182-
}
183-
for (int i = 0; i < c; ++i) free(entry_list[i]);
184-
free(entry_list);
185-
}
186101

187102
int main(int argc, char * argv[]) {
188103
if (argc != 2) {
189-
cerr << "Usage: " << argv[0] << " <jsonfile>\n";
190-
cout << "We are going to validate:\n" << std::endl;
191-
validate();
104+
cerr << "Usage: " << argv[0] << " <jsonfile>" << endl;
192105
exit(1);
193106
}
194107
pair<u8 *, size_t> p = get_corpus(argv[1]);
195108
ParsedJson * pj_ptr = new ParsedJson;
196109
ParsedJson & pj(*pj_ptr);
197110

198111
if (posix_memalign( (void **)&pj.structurals, 8, ROUNDUP_N(p.second, 64)/8)) {
199-
cerr << "Could not allocate memory\n";
112+
cerr << "Could not allocate memory" << endl;
200113
exit(1);
201114
};
202115

@@ -237,38 +150,44 @@ int main(int argc, char * argv[]) {
237150
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
238151
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
239152
#endif
153+
bool isok = true;
240154
for (u32 i = 0; i < iterations; i++) {
241155
auto start = std::chrono::steady_clock::now();
242156
#ifndef SQUASH_COUNTERS
243157
unified.start();
244158
#endif
245-
find_structural_bits(p.first, p.second, pj);
159+
isok = find_structural_bits(p.first, p.second, pj);
246160
#ifndef SQUASH_COUNTERS
247161
unified.end(results);
248162
cy1 += results[0]; cl1 += results[1];
163+
if(! isok ) break;
249164
unified.start();
250165
#endif
251-
flatten_indexes(p.second, pj);
166+
isok = flatten_indexes(p.second, pj);
252167
#ifndef SQUASH_COUNTERS
253168
unified.end(results);
254169
cy2 += results[0]; cl2 += results[1];
170+
if(! isok ) break;
255171
unified.start();
256172
#endif
257-
ape_machine(p.first, p.second, pj);
173+
isok = ape_machine(p.first, p.second, pj);
258174
#ifndef SQUASH_COUNTERS
259175
unified.end(results);
260176
cy3 += results[0]; cl3 += results[1];
177+
if(! isok ) break;
261178
unified.start();
262179
#endif
263-
shovel_machine(p.first, p.second, pj);
180+
isok = shovel_machine(p.first, p.second, pj);
264181
#ifndef SQUASH_COUNTERS
265182
unified.end(results);
266183
cy4 += results[0]; cl4 += results[1];
267184
#endif
185+
if(! isok ) break;
268186
auto end = std::chrono::steady_clock::now();
269187
std::chrono::duration<double> secs = end - start;
270188
res[i] = secs.count();
271189
}
190+
272191
#ifndef SQUASH_COUNTERS
273192
printf("number of bytes %ld number of structural chars %d ratio %.3f\n", p.second, pj.n_structural_indexes,
274193
(double) pj.n_structural_indexes / p.second);
@@ -302,6 +221,10 @@ int main(int argc, char * argv[]) {
302221
free(pj.structurals);
303222
free(p.first);
304223
delete[] pj.structural_indexes;
305-
delete pj_ptr;
306-
return 0;
224+
delete pj_ptr;
225+
if(! isok ) {
226+
printf(" Parsing failed. \n ");
227+
return EXIT_FAILURE;
228+
}
229+
return EXIT_SUCCESS;
307230
}

stage1_find_marks.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ really_inline u64 cmp_mask_against_input(m256 input_lo, m256 input_hi, m256 mask
1616
}
1717

1818
/*never_inline*/ bool find_structural_bits(const u8 * buf, size_t len, ParsedJson & pj) {
19+
if (len > 0xffffff) {
20+
cerr << "Currently only support JSON files < 16MB\n";
21+
return false;
22+
}
1923
// Useful constant masks
2024
const u64 even_bits = 0x5555555555555555ULL;
2125
const u64 odd_bits = ~even_bits;

0 commit comments

Comments
 (0)