More cleaning.

lemire · lemire · commit c11eefca32d7 · 2018-11-30T21:31:05.000-05:00
diff --git a/README.md b/README.md
@@ -12,11 +12,11 @@ Goal: Speed up the parsing of JSON per se.
 /...
 
 const char * filename = ... //
-simdjsonstring p = get_corpus(filename);
+std::string_view p = get_corpus(filename);
 ParsedJson pj;
 size_t maxdepth = 1024; // support documents have nesting "depth" up to 1024
 pj.allocateCapacity(p.size(), maxdepth); // allocate memory for parsing up to p.size() bytes
-bool is_ok = json_parse(p.first, p.second, pj); // do the parsing, return false on error
+bool is_ok = json_parse(p, pj); // do the parsing, return false on error
 // parsing is done!
 // js can be reused with other json_parse calls.
 ```
diff --git a/benchmark/minifiercompetition.cpp b/benchmark/minifiercompetition.cpp
@@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
     exit(1);
   }
   const char * filename = argv[optind];
-  simdjsonstring p;
+  std::string_view p;
   try {
     p = get_corpus(filename);
   } catch (const std::exception& e) { // caught by reference to base
@@ -79,20 +79,20 @@ int main(int argc, char *argv[]) {
     std::cout << std::endl;
   }
   char *buffer = allocate_aligned_buffer(p.size() + 1);
-  memcpy(buffer, p.c_str(), p.size());
+  memcpy(buffer, p.data(), p.size());
   buffer[p.size()] = '\0';
 
   int repeat = 10;
   int volume = p.size();
 
-  size_t strlength = rapidstringme((char *)p.c_str()).size();
+  size_t strlength = rapidstringme((char *)p.data()).size();
   if (verbose)
     std::cout << "input length is " << p.size() << " stringified length is "
               << strlength << std::endl;
-  BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.c_str()), , repeat, volume, true);
+  BEST_TIME_NOCHECK("despacing with RapidJSON", rapidstringme((char *)p.data()), , repeat, volume, true);
   BEST_TIME_NOCHECK("despacing with RapidJSON Insitu", rapidstringmeInsitu((char *)buffer),
-                    memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
-  memcpy(buffer, p.c_str(), p.size());
+                    memcpy(buffer, p.data(), p.size()), repeat, volume, true);
+  memcpy(buffer, p.data(), p.size());
 
   size_t outlength =
       jsonminify((const uint8_t *)buffer, p.size(), (uint8_t *)buffer);
@@ -101,18 +101,18 @@ int main(int argc, char *argv[]) {
 
   uint8_t *cbuffer = (uint8_t *)buffer;
   BEST_TIME("jsonminify", jsonminify(cbuffer, p.size(), cbuffer), outlength,
-            memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+            memcpy(buffer, p.data(), p.size()), repeat, volume, true);
   printf("minisize = %zu, original size = %zu  (minified down to %.2f percent of original) \n", outlength, p.size(), outlength * 100.0 / p.size());
 
   /***
    * Is it worth it to minify before parsing?
    ***/
   rapidjson::Document d;
   BEST_TIME("RapidJSON Insitu orig", d.ParseInsitu(buffer).HasParseError(), false,
-            memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+            memcpy(buffer, p.data(), p.size()), repeat, volume, true);
 
   char *minibuffer = allocate_aligned_buffer(p.size() + 1);
-  size_t minisize = jsonminify((const uint8_t *)p.c_str(), p.size(), (uint8_t*) minibuffer);
+  size_t minisize = jsonminify((const uint8_t *)p.data(), p.size(), (uint8_t*) minibuffer);
   minibuffer[minisize] = '\0';
 
   BEST_TIME("RapidJSON Insitu despaced", d.ParseInsitu(buffer).HasParseError(), false,
@@ -122,14 +122,14 @@ int main(int argc, char *argv[]) {
   size_t astbuffersize = p.size() * 2;
   size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t));
 
-  BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+  BEST_TIME("sajson orig", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
 
 
   BEST_TIME("sajson despaced", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(minisize, buffer)).is_valid(), true, memcpy(buffer, minibuffer, p.size()), repeat, volume, true);
 
   ParsedJson pj;
   pj.allocateCapacity(p.size(), 1024);
-  BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+  BEST_TIME("json_parse orig", json_parse((const u8*)buffer, p.size(), pj), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
   
   ParsedJson pj2;
   pj2.allocateCapacity(p.size(), 1024);
diff --git a/benchmark/parse.cpp b/benchmark/parse.cpp
@@ -65,7 +65,7 @@ int main(int argc, char *argv[]) {
     cerr << "warning: ignoring everything after " << argv[optind  + 1] << endl;
   }
   if(verbose) cout << "[verbose] loading " << filename << endl;
-  simdjsonstring p;
+  std::string_view p;
   try {
     p = get_corpus(filename);
   } catch (const std::exception& e) { // caught by reference to base
@@ -118,7 +118,7 @@ int main(int argc, char *argv[]) {
 #ifndef SQUASH_COUNTERS
     unified.start();
 #endif
-    isok = find_structural_bits(p.c_str(), p.size(), pj);
+    isok = find_structural_bits(p.data(), p.size(), pj);
 #ifndef SQUASH_COUNTERS
     unified.end(results);
     cy1 += results[0];
@@ -147,7 +147,7 @@ int main(int argc, char *argv[]) {
     unified.start();
 #endif
 
-    isok = isok && unified_machine(p.c_str(), p.size(), pj);
+    isok = isok && unified_machine(p.data(), p.size(), pj);
 #ifndef SQUASH_COUNTERS
     unified.end(results);
     cy3 += results[0];
diff --git a/benchmark/parsingcompetition.cpp b/benchmark/parsingcompetition.cpp
@@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
   if(optind + 1 < argc) {
     cerr << "warning: ignoring everything after " << argv[optind  + 1] << endl;
   }
-  simdjsonstring p;
+  std::string_view p;
   try {
     p = get_corpus(filename);
   } catch (const std::exception& e) { // caught by reference to base
@@ -93,32 +93,32 @@ int main(int argc, char *argv[]) {
   rapidjson::Document d;
 
   char *buffer = (char *)malloc(p.size() + 1);
-  memcpy(buffer, p.c_str(), p.size());
+  memcpy(buffer, p.data(), p.size());
   buffer[p.size()] = '\0';
 
   BEST_TIME("RapidJSON", 
       d.Parse<kParseValidateEncodingFlag>((const char *)buffer).HasParseError(),
-      false, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+      false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
   BEST_TIME("RapidJSON Insitu", d.ParseInsitu<kParseValidateEncodingFlag>(buffer).HasParseError(), false,
-            memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+            memcpy(buffer, p.data(), p.size()), repeat, volume, true);
 
-  BEST_TIME("sajson (dynamic mem)", sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+  BEST_TIME("sajson (dynamic mem)", sajson::parse(sajson::dynamic_allocation(), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
 
   size_t astbuffersize = p.size();
   size_t * ast_buffer = (size_t *) malloc(astbuffersize * sizeof(size_t));
 
-  BEST_TIME("sajson (static alloc)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+  BEST_TIME("sajson (static alloc)", sajson::parse(sajson::bounded_allocation(ast_buffer, astbuffersize), sajson::mutable_string_view(p.size(), buffer)).is_valid(), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
   std::string json11err;
-  if(all) BEST_TIME("dropbox (json11)     ",  (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )), false, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+  if(all) BEST_TIME("dropbox (json11)     ",  (( json11::Json::parse(buffer,json11err).is_null() ) || ( ! json11err.empty() )), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
 
-  if(all) BEST_TIME("fastjson             ", fastjson_parse(buffer), true, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+  if(all) BEST_TIME("fastjson             ", fastjson_parse(buffer), true, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
   JsonValue value;
   JsonAllocator allocator;
   char *endptr;
-  if(all) BEST_TIME("gason             ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
+  if(all) BEST_TIME("gason             ", jsonParse(buffer, &endptr, &value, allocator), JSON_OK, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
   void *state;
-  if(all) BEST_TIME("ultrajson         ", (UJDecode(buffer, p.size(), NULL, &state) == NULL), false, memcpy(buffer, p.c_str(), p.size()), repeat, volume, true);
-  BEST_TIME("memcpy            ", (memcpy(buffer, p.c_str(), p.size()) == buffer), true, , repeat, volume, true);
+  if(all) BEST_TIME("ultrajson         ", (UJDecode(buffer, p.size(), NULL, &state) == NULL), false, memcpy(buffer, p.data(), p.size()), repeat, volume, true);
+  BEST_TIME("memcpy            ", (memcpy(buffer, p.data(), p.size()) == buffer), true, , repeat, volume, true);
   free(ast_buffer);
   free(buffer);
 }
diff --git a/include/simdjson/jsonminifier.h b/include/simdjson/jsonminifier.h
@@ -14,6 +14,6 @@ static inline size_t jsonminify(const char *buf, size_t len, char *out) {
 }
 
 
-static inline size_t jsonminify(const simdjsonstring & p, char *out) {
-    return jsonminify(p.c_str(), p.size(), out);
+static inline size_t jsonminify(const std::string_view & p, char *out) {
+    return jsonminify(p.data(), p.size(), out);
 }
diff --git a/src/jsonioutil.cpp b/src/jsonioutil.cpp
@@ -10,10 +10,6 @@ char * allocate_aligned_buffer(size_t length) {
     if (posix_memalign((void **)&aligned_buffer, 64, totalpaddedlength)) {
       throw std::runtime_error("Could not allocate sufficient memory");
     };
-    aligned_buffer[length] = '\0';
-    for(size_t i = length + 1; i < totalpaddedlength; i++) aligned_buffer[i] = 0x20;
-    //aligned_buffer[paddedlength] = '\0';
-    //memset(aligned_buffer + length, 0x20, paddedlength - length);
     return aligned_buffer;
 }
 
@@ -29,6 +25,7 @@ std::string_view get_corpus(std::string filename) {
     }
     std::rewind(fp);
     std::fread(buf, 1, len, fp);
+    buf[len] = '\0';
     std::fclose(fp);
     return std::string_view(buf,len);
   }
diff --git a/src/jsonparser.cpp b/src/jsonparser.cpp
@@ -11,9 +11,13 @@ bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {
   bool isok = find_structural_bits(buf, len, pj);
   if (isok) {
     isok = flatten_indexes(len, pj);
+  } else {
+    return false;
   }
   if (isok) {
     isok = unified_machine(buf, len, pj);
+  } else {
+    return false;
   }
   return isok;
 }
diff --git a/src/stage1_find_marks.cpp b/src/stage1_find_marks.cpp
@@ -61,8 +61,9 @@ WARN_UNUSED
   // effectively the very first char is considered to follow "whitespace" for the
   // purposes of psuedo-structural character detection
   u64 prev_iter_ends_pseudo_pred = 1ULL;
-
-  for (size_t idx = 0; idx < len; idx += 64) {
+  size_t lenminus64 = len + 1 < 64 ? 0 : len + 1  - 64; // len + 1 because of the NULL termination
+  size_t idx = 0;
+  for (; idx < lenminus64; idx += 64) {
     __builtin_prefetch(buf + idx + 128);
 #ifdef DEBUG
     cout << "Idx is " << idx << "\n";
@@ -249,21 +250,163 @@ WARN_UNUSED
         "final structurals and pseudo structurals after close quote removal");
     *(u64 *)(pj.structurals + idx / 8) = structurals;
   }
+
+  ////////////////
+  /// we use a giant copy-paste which is ugly.
+  /// but otherwise the string needs to be properly padded or else we
+  /// risk invalidating the UTF-8 checks.
+  ////////////
+  if (idx < len + 1) { // +1 due to NULL termination
+    u8 tmpbuf[64];
+    memset(tmpbuf,0x20,64);
+    memcpy(tmpbuf,buf+idx,len - idx + 1);// +1 due to NULL termination
+    m256 input_lo = _mm256_loadu_si256((const m256 *)(tmpbuf + 0));
+    m256 input_hi = _mm256_loadu_si256((const m256 *)(tmpbuf + 32));
+#ifdef UTF8VALIDATE
+    m256 highbit = _mm256_set1_epi8(0x80);
+    if((_mm256_testz_si256(_mm256_or_si256(input_lo, input_hi),highbit)) == 1) {
+        // it is ascii, we just check continuation
+        has_error = _mm256_or_si256(
+          _mm256_cmpgt_epi8(previous.carried_continuations,
+                          _mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+                                           9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+                                           9, 9, 9, 9, 9, 9, 9, 1)),has_error);
+ 
+    } else {
+        // it is not ascii so we have to do heavy work
+        previous = avxcheckUTF8Bytes(input_lo, &previous, &has_error);
+        previous = avxcheckUTF8Bytes(input_hi, &previous, &has_error);
+    }
+#endif
+    ////////////////////////////////////////////////////////////////////////////////////////////
+    //     Step 1: detect odd sequences of backslashes
+    ////////////////////////////////////////////////////////////////////////////////////////////
+
+    u64 bs_bits =
+        cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('\\'));
+    u64 start_edges = bs_bits & ~(bs_bits << 1);
+    // flip lowest if we have an odd-length run at the end of the prior
+    // iteration
+    u64 even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
+    u64 even_starts = start_edges & even_start_mask;
+    u64 odd_starts = start_edges & ~even_start_mask;
+    u64 even_carries = bs_bits + even_starts;
+
+    u64 odd_carries;
+    // must record the carry-out of our odd-carries out of bit 63; this
+    // indicates whether the sense of any edge going to the next iteration
+    // should be flipped
+    bool iter_ends_odd_backslash =
+        __builtin_uaddll_overflow(bs_bits, odd_starts, &odd_carries);
+
+    odd_carries |=
+        prev_iter_ends_odd_backslash; // push in bit zero as a potential end
+                                      // if we had an odd-numbered run at the
+                                      // end of the previous iteration
+    prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
+    u64 even_carry_ends = even_carries & ~bs_bits;
+    u64 odd_carry_ends = odd_carries & ~bs_bits;
+    u64 even_start_odd_end = even_carry_ends & odd_bits;
+    u64 odd_start_even_end = odd_carry_ends & even_bits;
+    u64 odd_ends = even_start_odd_end | odd_start_even_end;
+
+    ////////////////////////////////////////////////////////////////////////////////////////////
+    //     Step 2: detect insides of quote pairs
+    ////////////////////////////////////////////////////////////////////////////////////////////
+
+    u64 quote_bits =
+        cmp_mask_against_input(input_lo, input_hi, _mm256_set1_epi8('"'));
+    quote_bits = quote_bits & ~odd_ends;
+    u64 quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
+        _mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
+    quote_mask ^= prev_iter_inside_quote;
+    prev_iter_inside_quote = (u64)((s64)quote_mask >> 63); // right shift of a signed value expected to be well-defined and standard compliant as of C++20
+
+    // How do we build up a user traversable data structure
+    // first, do a 'shufti' to detect structural JSON characters
+    // they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
+    // these go into the first 3 buckets of the comparison (1/2/4)
+
+    // we are also interested in the four whitespace characters
+    // space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
+    // these go into the next 2 buckets of the comparison (8/16)
+    const m256 low_nibble_mask = _mm256_setr_epi8(
+        //  0                           9  a   b  c  d
+        16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0, 0,
+        0, 0, 8, 12, 1, 2, 9, 0, 0);
+    const m256 high_nibble_mask = _mm256_setr_epi8(
+        //  0     2   3     5     7
+        8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
+        1, 0, 0, 0, 3, 2, 1, 0, 0);
+
+    m256 structural_shufti_mask = _mm256_set1_epi8(0x7);
+    m256 whitespace_shufti_mask = _mm256_set1_epi8(0x18);
+
+    m256 v_lo = _mm256_and_si256(
+        _mm256_shuffle_epi8(low_nibble_mask, input_lo),
+        _mm256_shuffle_epi8(high_nibble_mask,
+                            _mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
+                                             _mm256_set1_epi8(0x7f))));
+
+    m256 v_hi = _mm256_and_si256(
+        _mm256_shuffle_epi8(low_nibble_mask, input_hi),
+        _mm256_shuffle_epi8(high_nibble_mask,
+                            _mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
+                                             _mm256_set1_epi8(0x7f))));
+    m256 tmp_lo = _mm256_cmpeq_epi8(
+        _mm256_and_si256(v_lo, structural_shufti_mask), _mm256_set1_epi8(0));
+    m256 tmp_hi = _mm256_cmpeq_epi8(
+        _mm256_and_si256(v_hi, structural_shufti_mask), _mm256_set1_epi8(0));
+
+    u64 structural_res_0 = (u32)_mm256_movemask_epi8(tmp_lo);
+    u64 structural_res_1 = _mm256_movemask_epi8(tmp_hi);
+    u64 structurals = ~(structural_res_0 | (structural_res_1 << 32));
+
+    // this additional mask and transfer is non-trivially expensive,
+    // unfortunately
+    m256 tmp_ws_lo = _mm256_cmpeq_epi8(
+        _mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
+    m256 tmp_ws_hi = _mm256_cmpeq_epi8(
+        _mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
+
+    u64 ws_res_0 = (u32)_mm256_movemask_epi8(tmp_ws_lo);
+    u64 ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
+    u64 whitespace = ~(ws_res_0 | (ws_res_1 << 32));
+
+
+    // mask off anything inside quotes
+    structurals &= ~quote_mask;
+
+    // add the real quote bits back into our bitmask as well, so we can
+    // quickly traverse the strings we've spent all this trouble gathering
+    structurals |= quote_bits;
+
+    // Now, establish "pseudo-structural characters". These are non-whitespace
+    // characters that are (a) outside quotes and (b) have a predecessor that's
+    // either whitespace or a structural character. This means that subsequent
+    // passes will get a chance to encounter the first character of every string
+    // of non-whitespace and, if we're parsing an atom like true/false/null or a
+    // number we can stop at the first whitespace or structural character
+    // following it.
+
+    // a qualified predecessor is something that can happen 1 position before an
+    // psuedo-structural character
+    u64 pseudo_pred = structurals | whitespace;
+    u64 shifted_pseudo_pred = (pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
+    prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
+    u64 pseudo_structurals =
+        shifted_pseudo_pred & (~whitespace) & (~quote_mask);
+    structurals |= pseudo_structurals;
+
+    // now, we've used our close quotes all we need to. So let's switch them off
+    // they will be off in the quote mask and on in quote bits.
+    structurals &= ~(quote_bits & ~quote_mask);
+    *(u64 *)(pj.structurals + idx / 8) = structurals;
+  }
   if(buf[len] != '\0') {
       std::cerr << "Your string should be NULL terminated." << std::endl;
       return false;
   }
-  // we are going to zero out everything after len:
-  size_t count_last_64bits = len % 64;
-  if(count_last_64bits != 0) { // we have a "final" word where only count_last_64bits matter
-      u64 lastword = *(u64 *)(pj.structurals + len / 8);
-      printf("last word %zu \n", lastword);
-      printf("count_last_64bits%zu \n", count_last_64bits);
-      lastword &= ( UINT64_C(1) << count_last_64bits) - 1;
-      *(u64 *)(pj.structurals + len / 8) = lastword;
-  }
-
-  //pj.structural_indexes[pj.n_structural_indexes++] = len; // the final NULL is used as a pseudo-structural character
 #ifdef UTF8VALIDATE
   return _mm256_testz_si256(has_error, has_error);
 #else
diff --git a/src/stage2_flatten.cpp b/src/stage2_flatten.cpp
diff --git a/tests/allparserscheckfile.cpp b/tests/allparserscheckfile.cpp
diff --git a/tests/jsoncheck.cpp b/tests/jsoncheck.cpp
diff --git a/tools/minify.cpp b/tools/minify.cpp

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,6 @@ static inline size_t jsonminify(const char buf, size_t len, char out) {`
`14`	`14`	`}`
`15`	`15`
`16`	`16`
`17`		`-static inline size_t jsonminify(const simdjsonstring & p, char *out) {`
`18`		`- return jsonminify(p.c_str(), p.size(), out);`
	`17`	`+static inline size_t jsonminify(const std::string_view & p, char *out) {`
	`18`	`+ return jsonminify(p.data(), p.size(), out);`
`19`	`19`	`}`
Original file line number	Diff line number	Diff line change
`@@ -10,10 +10,6 @@ char * allocate_aligned_buffer(size_t length) {`
`10`	`10`	`if (posix_memalign((void **)&aligned_buffer, 64, totalpaddedlength)) {`
`11`	`11`	`throw std::runtime_error("Could not allocate sufficient memory");`
`12`	`12`	`};`
`13`		`- aligned_buffer[length] = '\0';`
`14`		`- for(size_t i = length + 1; i < totalpaddedlength; i++) aligned_buffer[i] = 0x20;`
`15`		`- //aligned_buffer[paddedlength] = '\0';`
`16`		`- //memset(aligned_buffer + length, 0x20, paddedlength - length);`
`17`	`13`	`return aligned_buffer;`
`18`	`14`	`}`
`19`	`15`
`@@ -29,6 +25,7 @@ std::string_view get_corpus(std::string filename) {`
`29`	`25`	`}`
`30`	`26`	`std::rewind(fp);`
`31`	`27`	`std::fread(buf, 1, len, fp);`
	`28`	`+ buf[len] = '\0';`
`32`	`29`	`std::fclose(fp);`
`33`	`30`	`return std::string_view(buf,len);`
`34`	`31`	`}`
Original file line number	Diff line number	Diff line change
`@@ -11,9 +11,13 @@ bool json_parse(const u8 *buf, size_t len, ParsedJson &pj) {`
`11`	`11`	`bool isok = find_structural_bits(buf, len, pj);`
`12`	`12`	`if (isok) {`
`13`	`13`	`isok = flatten_indexes(len, pj);`
	`14`	`+ } else {`
	`15`	`+ return false;`
`14`	`16`	`}`
`15`	`17`	`if (isok) {`
`16`	`18`	`isok = unified_machine(buf, len, pj);`
	`19`	`+ } else {`
	`20`	`+ return false;`
`17`	`21`	`}`
`18`	`22`	`return isok;`
`19`	`23`	`}`