JavaScriptExpert
diff --git a/‎include/simdjson/jsonparser.h‎
Lines changed: 0 additions & 1 deletion b/‎include/simdjson/jsonparser.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎include/simdjson/parsedjson.h‎
Lines changed: 108 additions & 29 deletions b/‎include/simdjson/parsedjson.h‎
Lines changed: 108 additions & 29 deletions
diff --git a/‎src/generic/numberparsing.h‎
Lines changed: 20 additions & 17 deletions b/‎src/generic/numberparsing.h‎
Lines changed: 20 additions & 17 deletions
@@ -35,7 +35,6 @@ int json_parse_implementation(const uint8_t *buf, size_t len, ParsedJson &pj,
     if (reallocated) { // must free before we exit
       aligned_free((void *)buf);
     }
-    pj.error_code = stage1_is_ok;
     return pj.error_code;
   }
   int res = unified_machine<T>(buf, len, pj);
 
@@ -60,49 +60,105 @@ class ParsedJson {
   WARN_UNUSED
   bool dump_raw_tape(std::ostream &os) const;
 
-  // all nodes are stored on the tape using a 64-bit word.
-  //
-  // strings, double and ints are stored as
-  //  a 64-bit word with a pointer to the actual value
-  //
-  //
-  //
-  // for objects or arrays, store [ or {  at the beginning and } and ] at the
-  // end. For the openings ([ or {), we annotate them with a reference to the
-  // location on the tape of the end, and for then closings (} and ]), we
-  // annotate them with a reference to the location of the opening
-  //
-  //
+  really_inline ErrorValues on_error(ErrorValues new_error_code) {
+    error_code = new_error_code;
+    return new_error_code;
+  }
+  really_inline ErrorValues on_success(ErrorValues success_code) {
+    error_code = success_code;
+    valid = true;
+    return success_code;
+  }
+  really_inline bool on_start_document(uint32_t depth) {
+    containing_scope_offset[depth] = get_current_loc();
+    write_tape(0, 'r');
+    return true;
+  }
+  really_inline bool on_start_object(uint32_t depth) {
+    containing_scope_offset[depth] = get_current_loc();
+    write_tape(0, '{');
+    return true;
+  }
+  really_inline bool on_start_array(uint32_t depth) {
+    containing_scope_offset[depth] = get_current_loc();
+    write_tape(0, '[');
+    return true;
+  }
+  // TODO we're not checking this bool
+  really_inline bool on_end_document(uint32_t depth) {
+    // write our tape location to the header scope
+    // The root scope gets written *at* the previous location.
+    annotate_previous_loc(containing_scope_offset[depth], get_current_loc());
+    write_tape(containing_scope_offset[depth], 'r');
+    return true;
+  }
+  really_inline bool on_end_object(uint32_t depth) {
+    // write our tape location to the header scope
+    write_tape(containing_scope_offset[depth], '}');
+    annotate_previous_loc(containing_scope_offset[depth], get_current_loc());
+    return true;
+  }
+  really_inline bool on_end_array(uint32_t depth) {
+    // write our tape location to the header scope
+    write_tape(containing_scope_offset[depth], ']');
+    annotate_previous_loc(containing_scope_offset[depth], get_current_loc());
+    return true;
+  }
 
-  // this should be considered a private function
-  really_inline void write_tape(uint64_t val, uint8_t c) {
-    tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
+  really_inline bool on_true_atom() {
+    write_tape(0, 't');
+    return true;
+  }
+  really_inline bool on_false_atom() {
+    write_tape(0, 'f');
+    return true;
+  }
+  really_inline bool on_null_atom() {
+    write_tape(0, 'n');
+    return true;
+  }
+
+  really_inline uint8_t *on_start_string() {
+    /* we advance the point, accounting for the fact that we have a NULL
+      * termination         */
+    write_tape(current_string_buf_loc - string_buf.get(), '"');
+    return current_string_buf_loc + sizeof(uint32_t);
+  }
+
+  really_inline bool on_end_string(uint8_t *dst) {
+    uint32_t str_length = dst - (current_string_buf_loc + sizeof(uint32_t));
+    // TODO check for overflow in case someone has a crazy string (>=4GB?)
+    // But only add the overflow check when the document itself exceeds 4GB
+    // Currently unneeded because we refuse to parse docs larger or equal to 4GB.
+    memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t));
+    // NULL termination is still handy if you expect all your strings to
+    // be NULL terminated? It comes at a small cost
+    *dst = 0;
+    current_string_buf_loc = dst + 1;
+    return true;
   }
 
-  really_inline void write_tape_s64(int64_t i) {
+  really_inline bool on_number_s64(int64_t value) {
     write_tape(0, 'l');
-    std::memcpy(&tape[current_loc], &i, sizeof(i));
+    std::memcpy(&tape[current_loc], &value, sizeof(value));
     ++current_loc;
+    return true;
   }
-
-  really_inline void write_tape_u64(uint64_t i) {
+  really_inline bool on_number_u64(uint64_t value) {
     write_tape(0, 'u');
-    tape[current_loc++] = i;
+    tape[current_loc++] = value;
+    return true;
   }
-
-  really_inline void write_tape_double(double d) {
+  really_inline bool on_number_double(double value) {
     write_tape(0, 'd');
-    static_assert(sizeof(d) == sizeof(tape[current_loc]), "mismatch size");
-    memcpy(&tape[current_loc++], &d, sizeof(double));
+    static_assert(sizeof(value) == sizeof(tape[current_loc]), "mismatch size");
+    memcpy(&tape[current_loc++], &value, sizeof(double));
     // tape[current_loc++] = *((uint64_t *)&d);
+    return true;
   }
 
   really_inline uint32_t get_current_loc() const { return current_loc; }
 
-  really_inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) {
-    tape[saved_loc] |= val;
-  }
-
   struct InvalidJSON : public std::exception {
     const char *what() const noexcept { return "JSON document is invalid"; }
   };
@@ -134,6 +190,29 @@ class ParsedJson {
   bool valid{false};
   int error_code{simdjson::UNINITIALIZED};
 
+private:
+  // all nodes are stored on the tape using a 64-bit word.
+  //
+  // strings, double and ints are stored as
+  //  a 64-bit word with a pointer to the actual value
+  //
+  //
+  //
+  // for objects or arrays, store [ or {  at the beginning and } and ] at the
+  // end. For the openings ([ or {), we annotate them with a reference to the
+  // location on the tape of the end, and for then closings (} and ]), we
+  // annotate them with a reference to the location of the opening
+  //
+  //
+
+  // this should be considered a private function
+  really_inline void write_tape(uint64_t val, uint8_t c) {
+    tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
+  }
+
+  really_inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) {
+    tape[saved_loc] |= val;
+  }
 };
 
 
 
@@ -1,3 +1,4 @@
+namespace numberparsing {
 
 // Allowable floating-point values range
 // std::numeric_limits<double>::lowest() to std::numeric_limits<double>::max(),
@@ -75,7 +76,7 @@ static const double power_of_ten[] = {
     1e295,  1e296,  1e297,  1e298,  1e299,  1e300,  1e301,  1e302,  1e303,
     1e304,  1e305,  1e306,  1e307,  1e308};
 
-static inline bool is_integer(char c) {
+really_inline bool is_integer(char c) {
   return (c >= '0' && c <= '9');
   // this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers
 }
@@ -104,7 +105,7 @@ is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
 // check quickly whether the next 8 chars are made of digits
 // at a glance, it looks better than Mula's
 // http://0x80.pl/articles/swar-digits-validate.html
-static inline bool is_made_of_eight_digits_fast(const char *chars) {
+really_inline bool is_made_of_eight_digits_fast(const char *chars) {
   uint64_t val;
   // this can read up to 7 bytes beyond the buffer size, but we require
   // SIMDJSON_PADDING of padding
@@ -123,7 +124,7 @@ static inline bool is_made_of_eight_digits_fast(const char *chars) {
 //
 // This function computes base * 10 ^ (- negative_exponent ).
 // It is only even going to be used when negative_exponent is tiny.
-static double subnormal_power10(double base, int64_t negative_exponent) {
+really_inline double subnormal_power10(double base, int64_t negative_exponent) {
     // avoid integer overflows in the pow expression, those values would
     // become zero anyway.
     if(negative_exponent < -1000) {
@@ -144,8 +145,8 @@ static double subnormal_power10(double base, int64_t negative_exponent) {
 //
 // Note: a redesign could avoid this function entirely.
 //
-static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj,
-                                     const uint32_t offset, bool found_minus) {
+never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj,
+                              const uint32_t offset, bool found_minus) {
   const char *p = reinterpret_cast<const char *>(buf + offset);
   bool negative = false;
   if (found_minus) {
@@ -268,7 +269,7 @@ static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj,
         return false;
   }
   double d = negative ? -i : i;
-  pj.write_tape_double(d);
+  pj.on_number_double(d);
 #ifdef JSON_TEST_NUMBERS // for unit testing
   found_float(d, buf + offset);
 #endif
@@ -283,7 +284,7 @@ static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj,
 //
 // This function will almost never be called!!!
 //
-static never_inline bool parse_large_integer(const uint8_t *const buf,
+never_inline bool parse_large_integer(const uint8_t *const buf,
                                              ParsedJson &pj,
                                              const uint32_t offset,
                                              bool found_minus) {
@@ -333,14 +334,14 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
       // as a positive signed integer, but the negative version is 
       // possible.
       constexpr int64_t signed_answer = INT64_MIN;
-      pj.write_tape_s64(signed_answer);
+      pj.on_number_s64(signed_answer);
 #ifdef JSON_TEST_NUMBERS // for unit testing
       found_integer(signed_answer, buf + offset);
 #endif
     } else {
       // we can negate safely
       int64_t signed_answer = -static_cast<int64_t>(i);
-      pj.write_tape_s64(signed_answer);
+      pj.on_number_s64(signed_answer);
 #ifdef JSON_TEST_NUMBERS // for unit testing
       found_integer(signed_answer, buf + offset);
 #endif
@@ -353,12 +354,12 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
 #ifdef JSON_TEST_NUMBERS // for unit testing
       found_integer(i, buf + offset);
 #endif
-      pj.write_tape_s64(i);
+      pj.on_number_s64(i);
     } else {
 #ifdef JSON_TEST_NUMBERS // for unit testing
       found_unsigned_integer(i, buf + offset);
 #endif
-      pj.write_tape_u64(i);
+      pj.on_number_u64(i);
     }
   }
   return is_structural_or_whitespace(*p);
@@ -373,12 +374,13 @@ static never_inline bool parse_large_integer(const uint8_t *const buf,
 // content and append a space before calling this function.
 //
 // Our objective is accurate parsing (ULP of 0 or 1) at high speed.
-static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj,
-                                       const uint32_t offset,
-                                       bool found_minus) {
+really_inline bool parse_number(const uint8_t *const buf,
+                                const uint32_t offset,
+                                bool found_minus,
+                                ParsedJson &pj) {
 #ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes
                                   // useful to skip parsing
-  pj.write_tape_s64(0);           // always write zero
+  pj.on_number_s64(0);           // always write zero
   return true;                    // always succeeds
 #else
   const char *p = reinterpret_cast<const char *>(buf + offset);
@@ -535,7 +537,7 @@ static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj,
     double factor = power_of_ten[power_index];
     factor = negative ? -factor : factor;
     double d = i * factor;
-    pj.write_tape_double(d);
+    pj.on_number_double(d);
 #ifdef JSON_TEST_NUMBERS // for unit testing
     found_float(d, buf + offset);
 #endif
@@ -546,7 +548,7 @@ static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj,
       return parse_large_integer(buf, pj, offset, found_minus);
     }
     i = negative ? 0 - i : i;
-    pj.write_tape_s64(i);
+    pj.on_number_s64(i);
 #ifdef JSON_TEST_NUMBERS // for unit testing
     found_integer(i, buf + offset);
 #endif
@@ -555,3 +557,4 @@ static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj,
 #endif // SIMDJSON_SKIPNUMBERPARSING
 }
 
+} // namespace numberparsing
Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,6 @@ int json_parse_implementation(const uint8_t *buf, size_t len, ParsedJson &pj,`
`35`	`35`	`if (reallocated) { // must free before we exit`
`36`	`36`	`aligned_free((void *)buf);`
`37`	`37`	`}`
`38`		`- pj.error_code = stage1_is_ok;`
`39`	`38`	`return pj.error_code;`
`40`	`39`	`}`
`41`	`40`	`int res = unified_machine<T>(buf, len, pj);`