77#include < thread>
88#include " simdjson/padded_string.h"
99#include " simdjson/simdjson.h"
10- #include " jsoncharutils.h"
1110
1211
1312namespace simdjson {
@@ -233,6 +232,22 @@ template <class string_container> JsonStream<string_container>::~JsonStream() {
233232#endif
234233}
235234
235+ namespace internal {
236+ // returns true if the provided byte value is an ASCII character
237+ static inline bool is_ascii (char c) {
238+ return ((unsigned char )c) <= 127 ;
239+ }
240+
241+ // if the string ends with UTF-8 values, backtrack
242+ // up to the first ASCII character. May return 0.
243+ static inline size_t trimmed_length_safe_utf8 (const char * c, size_t len) {
244+ while ((len > 0 ) and (not is_ascii (c[len - 1 ]))) {
245+ len--;
246+ }
247+ return len;
248+ }
249+ }
250+
236251#ifdef SIMDJSON_THREADS_ENABLED
237252
238253// threaded version of json_parse
@@ -257,7 +272,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
257272 // First time loading
258273 if (!stage_1_thread.joinable ()) {
259274 _batch_size = (std::min)(_batch_size, remaining ());
260- _batch_size = trimmed_length_safe_utf8 ((const char *)buf (), _batch_size);
275+ _batch_size = internal:: trimmed_length_safe_utf8 ((const char *)buf (), _batch_size);
261276 if (_batch_size == 0 ) {
262277 return parser.error = simdjson::UTF8_ERROR;
263278 }
@@ -291,7 +306,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
291306 parser.structural_indexes [find_last_json_buf_idx (buf (), _batch_size, parser)];
292307 _batch_size = (std::min)(_batch_size, remaining () - last_json_buffer_loc);
293308 if (_batch_size > 0 ) {
294- _batch_size = trimmed_length_safe_utf8 (
309+ _batch_size = internal:: trimmed_length_safe_utf8 (
295310 (const char *)(buf () + last_json_buffer_loc), _batch_size);
296311 if (_batch_size == 0 ) {
297312 return parser.error = simdjson::UTF8_ERROR;
@@ -343,7 +358,7 @@ int JsonStream<string_container>::json_parse(document::parser &parser) {
343358 advance (current_buffer_loc);
344359 n_bytes_parsed += current_buffer_loc;
345360 _batch_size = (std::min)(_batch_size, remaining ());
346- _batch_size = trimmed_length_safe_utf8 ((const char *)buf (), _batch_size);
361+ _batch_size = internal:: trimmed_length_safe_utf8 ((const char *)buf (), _batch_size);
347362 auto stage1_is_ok = (error_code)simdjson::active_implementation->stage1 (buf (), _batch_size, parser, true );
348363 if (stage1_is_ok != simdjson::SUCCESS) {
349364 return parser.on_error (stage1_is_ok);
0 commit comments