JavaScriptExpert
diff --git a/‎Makefile‎
Lines changed: 3 additions & 3 deletions b/‎Makefile‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/parsingcompetition.cpp‎
Lines changed: 1 addition & 1 deletion b/‎benchmark/parsingcompetition.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/simdjson/common_defs.h‎
Lines changed: 3 additions & 0 deletions b/‎include/simdjson/common_defs.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎include/simdjson/jsonformatutils.h‎
Lines changed: 173 additions & 71 deletions b/‎include/simdjson/jsonformatutils.h‎
Lines changed: 173 additions & 71 deletions
diff --git a/‎include/simdjson/jsonparser.h‎
Lines changed: 2 additions & 2 deletions b/‎include/simdjson/jsonparser.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎include/simdjson/parsedjson.h‎
Lines changed: 7 additions & 0 deletions b/‎include/simdjson/parsedjson.h‎
Lines changed: 7 additions & 0 deletions
@@ -138,13 +138,13 @@ jsonstats: tools/jsonstats.cpp $(HEADERS) $(LIBFILES)
 ujdecode.o: $(UJSON4C_INCLUDE)
 	$(CC) $(CFLAGS) -c dependencies/ujson4c/src/ujdecode.c
 
-parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBFILES)
+parseandstatcompetition: benchmark/parseandstatcompetition.cpp $(HEADERS) $(LIBFILES) $(LIBS)
 	$(CXX) $(CXXFLAGS)  -o parseandstatcompetition $(LIBFILES) benchmark/parseandstatcompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
 
-distinctuseridcompetition: benchmark/distinctuseridcompetition.cpp $(HEADERS) $(LIBFILES)
+distinctuseridcompetition: benchmark/distinctuseridcompetition.cpp $(HEADERS) $(LIBFILES) $(LIBS)
 	$(CXX) $(CXXFLAGS)  -o distinctuseridcompetition $(LIBFILES) benchmark/distinctuseridcompetition.cpp  -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
 
-parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES)
+parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(LIBS)
 	$(CXX) $(CXXFLAGS)  -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)
 
 allparsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES) $(EXTRAOBJECTS) $(LIBS)
 
@@ -263,7 +263,7 @@ The parser builds a useful immutable (read-only) DOM (document-object model) whi
 To simplify the engineering, we make some assumptions.
 
 - We support UTF-8 (and thus ASCII), nothing else (no Latin, no UTF-16). We do not believe that this is a genuine limitation in the sense that we do not think that there is any serious application that needs to process JSON data without an ASCII or UTF-8 encoding.
-- We store strings as NULL terminated C strings. Thus we implicitly assume that you do not include a NULL character within your string, which is allowed technically speaking if you escape it (\u0000).
+- All strings in the JSON document may have up to 4294967295 bytes in UTF-8 (4GB). To enforce this constraint, we refuse to parse a document that contains more than 4294967295 bytes (4GB). This should accomodate most JSON documents.
 - We assume AVX2 support which is available in all recent mainstream x86 processors produced by AMD and Intel. No support for non-x86 processors is included though it can be done. We plan to support ARM processors (help is invited).
 - In cases of failure, we just report a failure without any indication as to the nature of the problem. (This can be easily improved without affecting performance.)
 - As allowed by the specification, we allow repeated keys within an object (other parsers like sajson do the same).
 
@@ -120,7 +120,7 @@ int main(int argc, char *argv[]) {
   if(!justdata) BEST_TIME("simdjson (dynamic mem) ", build_parsed_json(p).isValid(), true, ,
             repeat, volume, !justdata);
   // (static alloc) 
-  BEST_TIME("simdjson ", json_parse(p, pj), true, , repeat,
+  BEST_TIME("simdjson ", json_parse(p, pj), simdjson::SUCCESS, , repeat,
             volume, !justdata);
 
 
 
@@ -5,6 +5,9 @@
 
 #include <cassert>
 
+// we support documents up to 4GB
+#define SIMDJSON_MAXSIZE_BYTES 0xFFFFFFFF
+
 // the input buf should be readable up to buf + SIMDJSON_PADDING
 #define SIMDJSON_PADDING  sizeof(__m256i)
 
 
@@ -5,87 +5,183 @@
 #include <iomanip>
 #include <iostream>
 
+// ends with zero char
 static inline void print_with_escapes(const unsigned char *src) {
-  while (*src != 0u) {
+  while (*src) {
     switch (*src) {
-    case '\b':
-      putchar('\\');
-      putchar('b');
-      break;
-    case '\f':
-      putchar('\\');
-      putchar('f');
-      break;
-    case '\n':
-      putchar('\\');
-      putchar('n');
-      break;
-    case '\r':
-      putchar('\\');
-      putchar('r');
-      break;
-    case '\"':
-      putchar('\\');
-      putchar('"');
-      break;
-    case '\t':
-      putchar('\\');
-      putchar('t');
-      break;
-    case '\\':
-      putchar('\\');
-      putchar('\\');
-      break;
-    default:
-      if (*src <= 0x1F) {
-        printf("\\u%04x", *src);
-      } else {
-        putchar(*src);
+      case '\b':
+        putchar('\\');
+        putchar('b');
+        break;
+      case '\f':
+        putchar('\\');
+        putchar('f');
+        break;
+      case '\n':
+        putchar('\\');
+        putchar('n');
+        break;
+      case '\r':
+        putchar('\\');
+        putchar('r');
+        break;
+      case '\"':
+        putchar('\\');
+        putchar('"');
+        break;
+      case '\t':
+        putchar('\\');
+        putchar('t');
+        break;
+      case '\\':
+        putchar('\\');
+        putchar('\\');
+        break;
+      default:
+        if (*src <= 0x1F) {
+          printf("\\u%04x", *src);
+        } else {
+          putchar(*src);
+        }
+    }
+    src++;
+  }
 }
+
+// ends with zero char
+static inline void print_with_escapes(const unsigned char *src,
+                                      std::ostream &os) {
+  while (*src) {
+    switch (*src) {
+      case '\b':
+        os << '\\';
+        os << 'b';
+        break;
+      case '\f':
+        os << '\\';
+        os << 'f';
+        break;
+      case '\n':
+        os << '\\';
+        os << 'n';
+        break;
+      case '\r':
+        os << '\\';
+        os << 'r';
+        break;
+      case '\"':
+        os << '\\';
+        os << '"';
+        break;
+      case '\t':
+        os << '\\';
+        os << 't';
+        break;
+      case '\\':
+        os << '\\';
+        os << '\\';
+        break;
+      default:
+        if (*src <= 0x1F) {
+          std::ios::fmtflags f(os.flags());
+          os << std::hex << std::setw(4) << std::setfill('0')
+             << static_cast<int>(*src);
+          os.flags(f);
+        } else {
+          os << *src;
+        }
     }
     src++;
   }
 }
 
-static inline void print_with_escapes(const unsigned char *src, std::ostream &os) {
-  while (*src != 0u) {
+// print len chars
+static inline void print_with_escapes(const unsigned char *src, size_t len) {
+  const unsigned char *finalsrc = src + len;
+  while (src < finalsrc) {
     switch (*src) {
-    case '\b':
-      os << '\\';
-      os << 'b';
-      break;
-    case '\f':
-      os << '\\';
-      os << 'f';
-      break;
-    case '\n':
-      os << '\\';
-      os << 'n';
-      break;
-    case '\r':
-      os << '\\';
-      os << 'r';
-      break;
-    case '\"':
-      os << '\\';
-      os << '"';
-      break;
-    case '\t':
-      os << '\\';
-      os << 't';
-      break;
-    case '\\':
-      os << '\\';
-      os << '\\';
-      break;
-    default:
-      if (*src <= 0x1F) {
-        std::ios::fmtflags f(os.flags());
-        os << std::hex << std::setw(4) << std::setfill('0') << static_cast<int>(*src);
-        os.flags(f);
-      } else {
-        os << *src;
+      case '\b':
+        putchar('\\');
+        putchar('b');
+        break;
+      case '\f':
+        putchar('\\');
+        putchar('f');
+        break;
+      case '\n':
+        putchar('\\');
+        putchar('n');
+        break;
+      case '\r':
+        putchar('\\');
+        putchar('r');
+        break;
+      case '\"':
+        putchar('\\');
+        putchar('"');
+        break;
+      case '\t':
+        putchar('\\');
+        putchar('t');
+        break;
+      case '\\':
+        putchar('\\');
+        putchar('\\');
+        break;
+      default:
+        if (*src <= 0x1F) {
+          printf("\\u%04x", *src);
+        } else {
+          putchar(*src);
+        }
+    }
+    src++;
+  }
 }
+
+// print len chars
+static inline void print_with_escapes(const unsigned char *src,
+                                      std::ostream &os, size_t len) {
+  const unsigned char *finalsrc = src + len;
+  while (src < finalsrc) {
+    switch (*src) {
+      case '\b':
+        os << '\\';
+        os << 'b';
+        break;
+      case '\f':
+        os << '\\';
+        os << 'f';
+        break;
+      case '\n':
+        os << '\\';
+        os << 'n';
+        break;
+      case '\r':
+        os << '\\';
+        os << 'r';
+        break;
+      case '\"':
+        os << '\\';
+        os << '"';
+        break;
+      case '\t':
+        os << '\\';
+        os << 't';
+        break;
+      case '\\':
+        os << '\\';
+        os << '\\';
+        break;
+      default:
+        if (*src <= 0x1F) {
+          std::ios::fmtflags f(os.flags());
+          os << std::hex << std::setw(4) << std::setfill('0')
+             << static_cast<int>(*src);
+          os.flags(f);
+        } else {
+          os << *src;
+        }
     }
     src++;
   }
@@ -95,4 +191,10 @@ static inline void print_with_escapes(const char *src, std::ostream &os) {
   print_with_escapes(reinterpret_cast<const unsigned char *>(src), os);
 }
 
+static inline void print_with_escapes(const char *src, std::ostream &os,
+                                      size_t len) {
+  print_with_escapes(reinterpret_cast<const unsigned char *>(src), os, len);
+}
+
+#
 #endif
@@ -20,7 +20,7 @@ WARN_UNUSED
 int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj, bool reallocifneeded = true);
 
 // Parse a document found in buf, need to preallocate ParsedJson.
-// Return false in case of a failure. You can also check validity
+// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
 // by calling pj.isValid(). The same ParsedJson can be reused for other documents.
 //
 // If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
@@ -33,7 +33,7 @@ inline int json_parse(const char * buf, size_t len, ParsedJson &pj, bool realloc
 }
 
 // Parse a document found in buf, need to preallocate ParsedJson.
-// Return false in case of a failure. You can also check validity
+// Return SUCCESS (an integer = 1) in case of a success. You can also check validity
 // by calling pj.isValid(). The same ParsedJson can be reused for other documents.
 //
 // If reallocifneeded is true (default) then a temporary buffer is created when needed during processing
 
@@ -125,8 +125,12 @@ struct ParsedJson {
     // get the string value at this node (NULL ended); valid only if we're at "
     // note that tabs, and line endings are escaped in the returned value (see print_with_escapes)
     // return value is valid UTF-8
+    // It may contain NULL chars within the string: get_string_length determines the true 
+    // string length.
     const char * get_string() const;
 
+    uint32_t get_string_length() const;
+
     // get the double value at this node; valid only if
     // we're at "d"
     double get_double()  const;
@@ -149,6 +153,9 @@ struct ParsedJson {
     // if successful, we are left pointing at the value,
     // if not, we are still pointing at the object ({)
     // (in case of repeated keys, this only finds the first one)
+    // We seek the key using C's strcmp so if your JSON strings contain
+    // NULL chars, this would trigger a false positive: if you expect that
+    // to be the case, take extra precautions.
     bool move_to_key(const char * key);
 
     // throughout return true if we can do the navigation, false