@@ -27,44 +27,49 @@ struct structural_parser : structural_iterator {
2727 current_string_buf_loc{parser.doc ->string_buf .get ()} {
2828 }
2929
30- WARN_UNUSED really_inline error_code start_scope (bool parent_is_array) {
30+ WARN_UNUSED really_inline error_code start_scope (bool is_array) {
31+ depth++;
32+ if (depth >= parser.max_depth ()) { log_error (" Exceeded max depth!" ); return DEPTH_ERROR; }
3133 parser.containing_scope [depth].tape_index = next_tape_index ();
3234 parser.containing_scope [depth].count = 0 ;
3335 tape.skip (); // We don't actually *write* the start element until the end.
34- parser.is_array [depth] = parent_is_array;
35- depth++;
36- if (depth >= parser.max_depth ()) { log_error (" Exceeded max depth!" ); return DEPTH_ERROR; }
36+ parser.is_array [depth] = is_array;
3737 return SUCCESS;
3838 }
3939
4040 WARN_UNUSED really_inline error_code start_document () {
4141 log_start_value (" document" );
42- return start_scope (false );
42+ parser.containing_scope [depth].tape_index = next_tape_index ();
43+ parser.containing_scope [depth].count = 0 ;
44+ tape.skip (); // We don't actually *write* the start element until the end.
45+ parser.is_array [depth] = false ;
46+ if (depth >= parser.max_depth ()) { log_error (" Exceeded max depth!" ); return DEPTH_ERROR; }
47+ return SUCCESS;
4348 }
4449
45- WARN_UNUSED really_inline error_code start_object (bool parent_is_array ) {
50+ WARN_UNUSED really_inline error_code start_object () {
4651 log_start_value (" object" );
47- return start_scope (parent_is_array );
52+ return start_scope (false );
4853 }
4954
50- WARN_UNUSED really_inline error_code start_array (bool parent_is_array ) {
55+ WARN_UNUSED really_inline error_code start_array () {
5156 log_start_value (" array" );
52- return start_scope (parent_is_array );
57+ return start_scope (true );
5358 }
5459
5560 // this function is responsible for annotating the start of the scope
5661 really_inline void end_scope (internal::tape_type start, internal::tape_type end) noexcept {
57- depth--;
58- // write our doc->tape location to the header scope
59- // The root scope gets written *at* the previous location.
60- tape.append (parser.containing_scope [depth].tape_index , end);
62+ // SIMDJSON_ASSUME(depth > 0);
63+ // Write the ending tape element, pointing at the start location
64+ const uint32_t start_tape_index = parser.containing_scope [depth].tape_index ;
65+ tape.append (start_tape_index, end);
66+ // Write the start tape element, pointing at the end location (and including count)
6167 // count can overflow if it exceeds 24 bits... so we saturate
6268 // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff).
63- const uint32_t start_tape_index = parser.containing_scope [depth].tape_index ;
6469 const uint32_t count = parser.containing_scope [depth].count ;
6570 const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count;
66- // This is a load and an OR. It would be possible to just write once at doc->tape[d.tape_index]
6771 tape_writer::write (parser.doc ->tape [start_tape_index], next_tape_index () | (uint64_t (cntsat) << 32 ), start);
72+ depth--;
6873 }
6974
7075 really_inline uint32_t next_tape_index () {
@@ -81,15 +86,38 @@ struct structural_parser : structural_iterator {
8186 }
8287 really_inline void end_document () {
8388 log_end_value (" document" );
84- end_scope (internal::tape_type::ROOT, internal::tape_type::ROOT);
89+ constexpr uint32_t start_tape_index = 0 ;
90+ tape.append (start_tape_index, internal::tape_type::ROOT);
91+ tape_writer::write (parser.doc ->tape [start_tape_index], next_tape_index (), internal::tape_type::ROOT);
92+ }
93+
94+ really_inline void empty_container (internal::tape_type start, internal::tape_type end) {
95+ auto start_index = next_tape_index ();
96+ tape.append (start_index+2 , start);
97+ tape.append (start_index, end);
98+ }
99+ WARN_UNUSED really_inline bool empty_object () {
100+ if (peek_next_char () == ' }' ) {
101+ advance_char ();
102+ log_value (" empty object" );
103+ empty_container (internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT);
104+ return true ;
105+ }
106+ return false ;
107+ }
108+ WARN_UNUSED really_inline bool empty_array () {
109+ if (peek_next_char () == ' ]' ) {
110+ advance_char ();
111+ log_value (" empty array" );
112+ empty_container (internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY);
113+ return true ;
114+ }
115+ return false ;
85116 }
86117
87118 // increment_count increments the count of keys in an object or values in an array.
88- // Note that if you are at the level of the values or elements, the count
89- // must be increment in the preceding depth (depth-1) where the array or
90- // the object resides.
91119 really_inline void increment_count () {
92- parser.containing_scope [depth - 1 ].count ++; // we have a key value pair in the object at parser.depth - 1
120+ parser.containing_scope [depth].count ++; // we have a key value pair in the object at parser.depth - 1
93121 }
94122
95123 really_inline uint8_t *on_start_string () noexcept {
@@ -199,6 +227,16 @@ struct structural_parser : structural_iterator {
199227 return SUCCESS;
200228 }
201229
230+ WARN_UNUSED really_inline error_code start () {
231+ logger::log_start ();
232+
233+ // If there are no structurals left, return EMPTY
234+ if (at_end ()) { return EMPTY; }
235+
236+ // Push the root scope (there is always at least one scope)
237+ return start_document ();
238+ }
239+
202240 WARN_UNUSED really_inline error_code finish () {
203241 end_document ();
204242 parser.next_structural_index = uint32_t (current_structural + 1 - &parser.structural_indexes [0 ]);
@@ -211,29 +249,10 @@ struct structural_parser : structural_iterator {
211249 return SUCCESS;
212250 }
213251
214- really_inline void init () {
215- log_start ();
216- }
217-
218- WARN_UNUSED really_inline error_code start () {
219- // If there are no structurals left, return EMPTY
220- if (at_end (parser.n_structural_indexes )) {
221- return EMPTY;
222- }
223-
224- init ();
225- // Push the root scope (there is always at least one scope)
226- return start_document ();
227- }
228-
229252 really_inline void log_value (const char *type) {
230253 logger::log_line (*this , " " , type, " " );
231254 }
232255
233- static really_inline void log_start () {
234- logger::log_start ();
235- }
236-
237256 really_inline void log_start_value (const char *type) {
238257 logger::log_line (*this , " +" , type, " " );
239258 if (logger::LOG_ENABLED) { logger::log_depth++; }
@@ -261,11 +280,14 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
261280 // Read first value
262281 //
263282 switch (parser.current_char ()) {
264- case ' {' :
265- SIMDJSON_TRY ( parser.start_object (false ) );
283+ case ' {' : {
284+ if (parser.empty_object ()) { goto document_end; }
285+ SIMDJSON_TRY ( parser.start_object () );
266286 goto object_begin;
267- case ' [' :
268- SIMDJSON_TRY ( parser.start_array (false ) );
287+ }
288+ case ' [' : {
289+ if (parser.empty_array ()) { goto document_end; }
290+ SIMDJSON_TRY ( parser.start_array () );
269291 // Make sure the outer array is closed before continuing; otherwise, there are ways we could get
270292 // into memory corruption. See https://github.com/simdjson/simdjson/issues/906
271293 if (!STREAMING) {
@@ -274,14 +296,15 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
274296 }
275297 }
276298 goto array_begin;
277- case ' "' : SIMDJSON_TRY ( parser.parse_string () ); goto finish;
278- case ' t' : SIMDJSON_TRY ( parser.parse_root_true_atom () ); goto finish;
279- case ' f' : SIMDJSON_TRY ( parser.parse_root_false_atom () ); goto finish;
280- case ' n' : SIMDJSON_TRY ( parser.parse_root_null_atom () ); goto finish;
299+ }
300+ case ' "' : SIMDJSON_TRY ( parser.parse_string () ); goto document_end;
301+ case ' t' : SIMDJSON_TRY ( parser.parse_root_true_atom () ); goto document_end;
302+ case ' f' : SIMDJSON_TRY ( parser.parse_root_false_atom () ); goto document_end;
303+ case ' n' : SIMDJSON_TRY ( parser.parse_root_null_atom () ); goto document_end;
281304 case ' -' :
282305 case ' 0' : case ' 1' : case ' 2' : case ' 3' : case ' 4' :
283306 case ' 5' : case ' 6' : case ' 7' : case ' 8' : case ' 9' :
284- SIMDJSON_TRY ( parser.parse_root_number () ); goto finish ;
307+ SIMDJSON_TRY ( parser.parse_root_number () ); goto document_end ;
285308 default :
286309 parser.log_error (" Document starts with a non-value character" );
287310 return TAPE_ERROR;
@@ -291,25 +314,27 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
291314// Object parser states
292315//
293316object_begin:
294- switch (parser.advance_char ()) {
295- case ' "' : {
296- parser.increment_count ();
297- SIMDJSON_TRY ( parser.parse_string (true ) );
298- goto object_key_state;
299- }
300- case ' }' :
301- parser.end_object ();
302- goto scope_end;
303- default :
317+ if (parser.advance_char () != ' "' ) {
304318 parser.log_error (" Object does not start with a key" );
305319 return TAPE_ERROR;
306320 }
321+ parser.increment_count ();
322+ SIMDJSON_TRY ( parser.parse_string (true ) );
323+ goto object_field;
307324
308- object_key_state :
325+ object_field :
309326 if (unlikely ( parser.advance_char () != ' :' )) { parser.log_error (" Missing colon after key in object" ); return TAPE_ERROR; }
310327 switch (parser.advance_char ()) {
311- case ' {' : SIMDJSON_TRY ( parser.start_object (false ) ); goto object_begin;
312- case ' [' : SIMDJSON_TRY ( parser.start_array (false ) ); goto array_begin;
328+ case ' {' : {
329+ if (parser.empty_object ()) { break ; };
330+ SIMDJSON_TRY ( parser.start_object () );
331+ goto object_begin;
332+ }
333+ case ' [' : {
334+ if (parser.empty_array ()) { break ; };
335+ SIMDJSON_TRY ( parser.start_array () );
336+ goto array_begin;
337+ }
313338 case ' "' : SIMDJSON_TRY ( parser.parse_string () ); break ;
314339 case ' t' : SIMDJSON_TRY ( parser.parse_true_atom () ); break ;
315340 case ' f' : SIMDJSON_TRY ( parser.parse_false_atom () ); break ;
@@ -329,7 +354,7 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
329354 parser.increment_count ();
330355 if (unlikely ( parser.advance_char () != ' "' )) { parser.log_error (" Key string missing at beginning of field in object" ); return TAPE_ERROR; }
331356 SIMDJSON_TRY ( parser.parse_string (true ) );
332- goto object_key_state ;
357+ goto object_field ;
333358 case ' }' :
334359 parser.end_object ();
335360 goto scope_end;
@@ -339,25 +364,28 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
339364 }
340365
341366scope_end:
342- if (parser.depth == 1 ) { goto finish ; }
367+ if (parser.depth == 0 ) { goto document_end ; }
343368 if (parser.parser .is_array [parser.depth ]) { goto array_continue; }
344369 goto object_continue;
345370
346371//
347372// Array parser states
348373//
349374array_begin:
350- if (parser.peek_next_char () == ' ]' ) {
351- parser.advance_char ();
352- parser.end_array ();
353- goto scope_end;
354- }
355375 parser.increment_count ();
356376
357- main_array_switch :
377+ array_value :
358378 switch (parser.advance_char ()) {
359- case ' {' : SIMDJSON_TRY ( parser.start_object (true ) ); goto object_begin;
360- case ' [' : SIMDJSON_TRY ( parser.start_array (true ) ); goto array_begin;
379+ case ' {' : {
380+ if (parser.empty_object ()) { break ; };
381+ SIMDJSON_TRY ( parser.start_object () );
382+ goto object_begin;
383+ }
384+ case ' [' : {
385+ if (parser.empty_array ()) { break ; };
386+ SIMDJSON_TRY ( parser.start_array () );
387+ goto array_begin;
388+ }
361389 case ' "' : SIMDJSON_TRY ( parser.parse_string () ); break ;
362390 case ' t' : SIMDJSON_TRY ( parser.parse_true_atom () ); break ;
363391 case ' f' : SIMDJSON_TRY ( parser.parse_false_atom () ); break ;
@@ -375,7 +403,7 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
375403 switch (parser.advance_char ()) {
376404 case ' ,' :
377405 parser.increment_count ();
378- goto main_array_switch ;
406+ goto array_value ;
379407 case ' ]' :
380408 parser.end_array ();
381409 goto scope_end;
@@ -384,9 +412,10 @@ WARN_UNUSED static really_inline error_code parse_structurals(dom_parser_impleme
384412 return TAPE_ERROR;
385413 }
386414
387- finish :
415+ document_end :
388416 return parser.finish ();
389- }
417+
418+ } // parse_structurals()
390419
391420} // namespace stage2
392421} // namespace SIMDJSON_IMPLEMENTATION
0 commit comments