diff --git a/.circleci/config.yml b/.circleci/config.yml index c571aabd43..c96ee86ee0 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,7 +11,7 @@ executors: environment: CXX: g++-8 CC: gcc-8 - BUILD_FLAGS: + CMAKE_BUILD_FLAGS: CTEST_FLAGS: --output-on-failure gcc9: @@ -20,7 +20,7 @@ executors: environment: CXX: g++-9 CC: gcc-9 - BUILD_FLAGS: + CMAKE_BUILD_FLAGS: CTEST_FLAGS: --output-on-failure gcc10: @@ -29,7 +29,7 @@ executors: environment: CXX: g++-10 CC: gcc-10 - BUILD_FLAGS: + CMAKE_BUILD_FLAGS: CTEST_FLAGS: --output-on-failure clang10: @@ -38,7 +38,7 @@ executors: environment: CXX: clang++-10 CC: clang-10 - BUILD_FLAGS: + CMAKE_BUILD_FLAGS: CTEST_FLAGS: --output-on-failure clang9: @@ -47,7 +47,7 @@ executors: environment: CXX: clang++-9 CC: clang-9 - BUILD_FLAGS: + CMAKE_BUILD_FLAGS: CTEST_FLAGS: --output-on-failure clang6: @@ -56,7 +56,7 @@ executors: environment: CXX: clang++-6.0 CC: clang-6.0 - BUILD_FLAGS: + CMAKE_BUILD_FLAGS: CTEST_FLAGS: --output-on-failure # Reusable test commands (and initializer for clang 6) @@ -191,7 +191,7 @@ jobs: sanitize-gcc10: description: Build and run tests on GCC 10 and AVX 2 with a cmake sanitize build executor: gcc10 - environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE=ON, BUILD_FLAGS: "", CTEST_FLAGS: --output-on-failure -LE explicitonly } + environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE=ON, CTEST_FLAGS: --output-on-failure -LE explicitonly } steps: [ cmake_test ] sanitize-clang10: description: Build and run tests on clang 10 and AVX 2 with a cmake sanitize build @@ -201,13 +201,21 @@ jobs: threadsanitize-gcc10: description: Build and run tests on GCC 10 and AVX 2 with a cmake sanitize build executor: gcc10 - environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE_THREADS=ON, BUILD_FLAGS: "", CTEST_FLAGS: --output-on-failure -LE explicitonly } + environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE_THREADS=ON, CTEST_FLAGS: --output-on-failure -LE explicitonly } steps: [ cmake_test ] threadsanitize-clang10: description: Build and run tests on clang 10 and AVX 2 with a cmake sanitize build executor: clang10 environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE_THREADS=ON, CTEST_FLAGS: --output-on-failure -LE explicitonly } steps: [ cmake_test ] + nocheckeof-clang10: + description: Validate that when __SIMDJSON_CHECK_EOF=0, everything still succeeds + environment: + CXXFLAGS: -D__SIMDJSON_CHECK_EOF=0 + CMAKE_BUILD_FLAGS: --target ondemand_tests + CTEST_FLAGS: --output-on-failure -R ondemand_ + executor: clang10 + steps: [ cmake_test ] # dynamic dynamic-gcc10: description: Build and run tests on GCC 10 and AVX 2 with a cmake dynamic build @@ -262,7 +270,7 @@ jobs: sanitize-haswell-gcc10: description: Build and run tests on GCC 10 and AVX 2 with a cmake sanitize build executor: gcc10 - environment: { CXXFLAGS: -march=haswell, CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE=ON, BUILD_FLAGS: "", CTEST_FLAGS: --output-on-failure -LE explicitonly } + environment: { CXXFLAGS: -march=haswell, CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE=ON, CTEST_FLAGS: --output-on-failure -LE explicitonly } steps: [ cmake_test ] sanitize-haswell-clang10: description: Build and run tests on clang 10 and AVX 2 with a cmake sanitize build diff --git a/include/simdjson/common_defs.h b/include/simdjson/common_defs.h index aa8fe84bf5..a694d8c839 100644 --- a/include/simdjson/common_defs.h +++ b/include/simdjson/common_defs.h @@ -19,6 +19,8 @@ char *to_chars(char *first, const char *last, double value); * Defined in src/from_chars */ double from_chars(const char *first) noexcept; +double from_chars(const char *first, const char* end) noexcept; + } #ifndef SIMDJSON_EXCEPTIONS @@ -251,6 +253,12 @@ namespace std { #endif #endif +// Feature flag for partially-implemented "don't require padding" feature +// TODO remove once feature complete. +#ifndef __SIMDJSON_CHECK_EOF +# define __SIMDJSON_CHECK_EOF 1 +#endif + #if SIMDJSON_CPLUSPLUS17 // if we have C++, then fallthrough is a default attribute # define simdjson_fallthrough [[fallthrough]] diff --git a/include/simdjson/error.h b/include/simdjson/error.h index d2583cd7b7..4ed9bcdff0 100644 --- a/include/simdjson/error.h +++ b/include/simdjson/error.h @@ -10,33 +10,34 @@ namespace simdjson { * All possible errors returned by simdjson. */ enum error_code { - SUCCESS = 0, ///< No error - CAPACITY, ///< This parser can't support a document that big - MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error - DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation - STRING_ERROR, ///< Problem while parsing a string - T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' - F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' - N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' - NUMBER_ERROR, ///< Problem while parsing a number - UTF8_ERROR, ///< the input is not valid UTF-8 - UNINITIALIZED, ///< unknown error, or uninitialized document - EMPTY, ///< no structural element found - UNESCAPED_CHARS, ///< found unescaped characters in a string. - UNCLOSED_STRING, ///< missing quote at the end - UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture - INCORRECT_TYPE, ///< JSON element has a different type than user expected - NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits - INDEX_OUT_OF_BOUNDS, ///< JSON array index too large - NO_SUCH_FIELD, ///< JSON field not found in object - IO_ERROR, ///< Error reading a file - INVALID_JSON_POINTER, ///< Invalid JSON pointer reference - INVALID_URI_FRAGMENT, ///< Invalid URI fragment - UNEXPECTED_ERROR, ///< indicative of a bug in simdjson - PARSER_IN_USE, ///< parser is already in use. - OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order - INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + SUCCESS = 0, ///< No error + CAPACITY, ///< This parser can't support a document that big + MEMALLOC, ///< Error allocating memory, most likely out of memory + TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error + DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation + STRING_ERROR, ///< Problem while parsing a string + T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' + F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' + N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' + NUMBER_ERROR, ///< Problem while parsing a number + UTF8_ERROR, ///< the input is not valid UTF-8 + UNINITIALIZED, ///< unknown error, or uninitialized document + EMPTY, ///< no structural element found + UNESCAPED_CHARS, ///< found unescaped characters in a string. + UNCLOSED_STRING, ///< missing quote at the end + UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture + INCORRECT_TYPE, ///< JSON element has a different type than user expected + NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits + INDEX_OUT_OF_BOUNDS, ///< JSON array index too large + NO_SUCH_FIELD, ///< JSON field not found in object + IO_ERROR, ///< Error reading a file + INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_URI_FRAGMENT, ///< Invalid URI fragment + UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. + OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order + INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. NUM_ERROR_CODES }; diff --git a/include/simdjson/generic/numberparsing.h b/include/simdjson/generic/numberparsing.h index 5ba1ea2037..1abe524df9 100644 --- a/include/simdjson/generic/numberparsing.h +++ b/include/simdjson/generic/numberparsing.h @@ -305,6 +305,20 @@ static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { // to handle that max may be a macro on windows). return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); } +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's @@ -709,6 +723,56 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( return i; } + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX). + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // @@ -751,6 +815,50 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co return negative ? (~i+1) : i; } +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign @@ -828,6 +936,90 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons } return d; } + + +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + } //namespace {} #endif // SIMDJSON_SKIPNUMBERPARSING diff --git a/include/simdjson/generic/ondemand/array-inl.h b/include/simdjson/generic/ondemand/array-inl.h index 88a72e2d70..ba35724b7a 100644 --- a/include/simdjson/generic/ondemand/array-inl.h +++ b/include/simdjson/generic/ondemand/array-inl.h @@ -57,8 +57,9 @@ simdjson_really_inline simdjson_result array::start_root(value_iterator & SIMDJSON_TRY( iter.start_root_array().get(has_value) ); return array(iter); } -simdjson_really_inline array array::started(value_iterator &iter) noexcept { - simdjson_unused bool has_value = iter.started_array(); +simdjson_really_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); return array(iter); } @@ -175,6 +176,10 @@ simdjson_really_inline simdjson_result simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); diff --git a/include/simdjson/generic/ondemand/array.h b/include/simdjson/generic/ondemand/array.h index fe428c8584..131df5a399 100644 --- a/include/simdjson/generic/ondemand/array.h +++ b/include/simdjson/generic/ondemand/array.h @@ -110,7 +110,7 @@ class array { * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ - static simdjson_really_inline array started(value_iterator &iter) noexcept; + static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. @@ -160,6 +160,7 @@ struct simdjson_result : public SIMDJS simdjson_really_inline simdjson_result begin() noexcept; simdjson_really_inline simdjson_result end() noexcept; simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) noexcept; simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; diff --git a/include/simdjson/generic/ondemand/array_iterator-inl.h b/include/simdjson/generic/ondemand/array_iterator-inl.h index c003eb3881..1334f8b350 100644 --- a/include/simdjson/generic/ondemand/array_iterator-inl.h +++ b/include/simdjson/generic/ondemand/array_iterator-inl.h @@ -20,9 +20,9 @@ simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { error_code error; // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. - if ((error = iter.error()) ) { return *this; } - if ((error = iter.skip_child() )) { return *this; } - if ((error = iter.has_next_element().error() )) { return *this; } + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } return *this; } diff --git a/include/simdjson/generic/ondemand/document-inl.h b/include/simdjson/generic/ondemand/document-inl.h index 44b4de2594..045411d501 100644 --- a/include/simdjson/generic/ondemand/document-inl.h +++ b/include/simdjson/generic/ondemand/document-inl.h @@ -21,13 +21,37 @@ inline std::string document::to_debug_string() noexcept { } simdjson_really_inline value_iterator document::resume_value_iterator() noexcept { - return value_iterator(&iter, 1, iter.root_checkpoint()); + return value_iterator(&iter, 1, iter.root_position()); } simdjson_really_inline value_iterator document::get_root_value_iterator() noexcept { return resume_value_iterator(); } -simdjson_really_inline value document::resume_value() noexcept { - return resume_value_iterator(); +simdjson_really_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_really_inline simdjson_result document::get_value_unsafe() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + switch (*iter.peek()) { + case '[': { + array result; + SIMDJSON_TRY( get_array().get(result) ); + return value(result.iter); + } + case '{': { + object result; + SIMDJSON_TRY( get_object().get(result) ); + return value(result.iter); + } + default: + // TODO it is still wrong to convert this to a value! get_root_bool / etc. will not be + // called if you do this. + return value(get_root_value_iterator()); + } } simdjson_really_inline simdjson_result document::get_array() & noexcept { auto value = get_root_value_iterator(); @@ -107,22 +131,22 @@ simdjson_really_inline simdjson_result document::end() & noexcep } simdjson_really_inline simdjson_result document::find_field(std::string_view key) & noexcept { - return resume_value().find_field(key); + return start_or_resume_object().find_field(key); } simdjson_really_inline simdjson_result document::find_field(const char *key) & noexcept { - return resume_value().find_field(key); + return start_or_resume_object().find_field(key); } simdjson_really_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { - return resume_value().find_field_unordered(key); + return start_or_resume_object().find_field_unordered(key); } simdjson_really_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { - return resume_value().find_field_unordered(key); + return start_or_resume_object().find_field_unordered(key); } simdjson_really_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return resume_value()[key]; + return start_or_resume_object()[key]; } simdjson_really_inline simdjson_result document::operator[](const char *key) & noexcept { - return resume_value()[key]; + return start_or_resume_object()[key]; } simdjson_really_inline error_code document::consume() noexcept { @@ -153,7 +177,7 @@ simdjson_really_inline simdjson_result document::raw_json_toke simdjson_really_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { rewind(); // Rewind the document each time at_pointer is called if (json_pointer.empty()) { - return this->resume_value(); + return this->get_value_unsafe(); } json_type t; SIMDJSON_TRY(type().get(t)); diff --git a/include/simdjson/generic/ondemand/document.h b/include/simdjson/generic/ondemand/document.h index 819bb0b571..2d65fc9cfd 100644 --- a/include/simdjson/generic/ondemand/document.h +++ b/include/simdjson/generic/ondemand/document.h @@ -369,7 +369,8 @@ class document { simdjson_really_inline value_iterator resume_value_iterator() noexcept; simdjson_really_inline value_iterator get_root_value_iterator() noexcept; - simdjson_really_inline value resume_value() noexcept; + simdjson_really_inline simdjson_result get_value_unsafe() noexcept; + simdjson_really_inline simdjson_result start_or_resume_object() noexcept; static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; // diff --git a/include/simdjson/generic/ondemand/json_iterator-inl.h b/include/simdjson/generic/ondemand/json_iterator-inl.h index dd90e99492..209cd99854 100644 --- a/include/simdjson/generic/ondemand/json_iterator-inl.h +++ b/include/simdjson/generic/ondemand/json_iterator-inl.h @@ -26,7 +26,7 @@ simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&o } simdjson_really_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, _parser->implementation->structural_indexes.get()), + : token(buf, &_parser->implementation->structural_indexes[0]), parser{_parser}, _string_buf_loc{parser->string_buf.get()}, _depth{1}, @@ -35,10 +35,11 @@ simdjson_really_inline json_iterator::json_iterator(const uint8_t *buf, ondemand { logger::log_headers(); + assert_more_tokens(); } inline void json_iterator::rewind() noexcept { - token.index = _root; + token.set_position( root_position() ); logger::log_headers(); // We start again _string_buf_loc = parser->string_buf.get(); _depth = 1; @@ -83,6 +84,10 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child logger::log_end_value(*this, "skip"); _depth--; if (depth() <= parent_depth) { return SUCCESS; } +#if __SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // __SIMDJSON_CHECK_EOF break; /*case '"': if(*peek() == ':') { @@ -107,8 +112,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child } // Now that we've considered the first value, we only increment/decrement for arrays/objects - auto end = &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; - while (token.index <= end) { + while (position() < end_position()) { switch (*return_current_and_advance()) { case '[': case '{': logger::log_start_value(*this, "skip"); @@ -136,27 +140,43 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child SIMDJSON_POP_DISABLE_WARNINGS simdjson_really_inline bool json_iterator::at_root() const noexcept { - return token.position() == root_checkpoint(); + return position() == root_position(); } simdjson_really_inline bool json_iterator::streaming() const noexcept { return _streaming; } -simdjson_really_inline token_position json_iterator::root_checkpoint() const noexcept { +simdjson_really_inline token_position json_iterator::root_position() const noexcept { return _root; } simdjson_really_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); - // Visual Studio Clang treats unique_ptr.get() as "side effecting." #ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( token.index == _root ); + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); #endif } -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return token.index == &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +simdjson_really_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_really_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_really_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_really_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } inline std::string json_iterator::to_string() const noexcept { @@ -179,26 +199,45 @@ simdjson_really_inline void json_iterator::abandon() noexcept { } simdjson_really_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { + // The following assert_more_tokens is currently disabled because rely on end-of-file buffering. + // assert_more_tokens(); + // This is almost surely related to __SIMDJSON_CHECK_EOF but given that __SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. return token.return_current_and_advance(); } simdjson_really_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { + // The following assert_more_tokens is currently disabled because rely on end-of-file buffering. + // assert_more_tokens(delta+1); + // This is almost surely related to __SIMDJSON_CHECK_EOF but given that __SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. return token.peek(delta); } simdjson_really_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { + assert_more_tokens(delta+1); return token.peek_length(delta); } simdjson_really_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to __SIMDJSON_CHECK_EOF but given that __SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. return token.peek(position); } simdjson_really_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to __SIMDJSON_CHECK_EOF but given that __SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. return token.peek_length(position); } -simdjson_really_inline token_position json_iterator::last_document_position() const noexcept { +simdjson_really_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); // since it has side-effects. @@ -207,7 +246,7 @@ simdjson_really_inline token_position json_iterator::last_document_position() co return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } simdjson_really_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_document_position()); + return token.peek(last_position()); } simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { @@ -240,6 +279,7 @@ simdjson_really_inline error_code json_iterator::report_error(error_code _error, simdjson_really_inline token_position json_iterator::position() const noexcept { return token.position(); } + simdjson_really_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); @@ -253,9 +293,11 @@ simdjson_really_inline void json_iterator::reenter_child(token_position position } #ifdef SIMDJSON_DEVELOPMENT_CHECKS + simdjson_really_inline token_position json_iterator::start_position(depth_t depth) const noexcept { return parser->start_positions[depth]; } + simdjson_really_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { parser->start_positions[depth] = position; } @@ -271,9 +313,11 @@ simdjson_really_inline error_code json_iterator::optional_error(error_code _erro template simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept { + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } // Truncate whitespace to fit the buffer. if (max_len > N-1) { - if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; } + // if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; } max_len = N-1; } @@ -283,20 +327,6 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(c return true; } -template -simdjson_warn_unused simdjson_really_inline bool json_iterator::peek_to_buffer(uint8_t (&tmpbuf)[N]) noexcept { - auto max_len = token.peek_length(); - auto json = token.peek(); - return copy_to_buffer(json, max_len, tmpbuf); -} - -template -simdjson_warn_unused simdjson_really_inline bool json_iterator::advance_to_buffer(uint8_t (&tmpbuf)[N]) noexcept { - auto max_len = peek_length(); - auto json = return_current_and_advance(); - return copy_to_buffer(json, max_len, tmpbuf); -} - } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson diff --git a/include/simdjson/generic/ondemand/json_iterator.h b/include/simdjson/generic/ondemand/json_iterator.h index 3ba6af5973..9165c2d536 100644 --- a/include/simdjson/generic/ondemand/json_iterator.h +++ b/include/simdjson/generic/ondemand/json_iterator.h @@ -87,7 +87,7 @@ class json_iterator { /** * Get the root value iterator */ - simdjson_really_inline token_position root_checkpoint() const noexcept; + simdjson_really_inline token_position root_position() const noexcept; /** * Assert if the iterator is not at the start @@ -97,7 +97,7 @@ class json_iterator { /** * Tell whether the iterator is at the EOF mark */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_really_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). @@ -110,10 +110,22 @@ class json_iterator { simdjson_really_inline void abandon() noexcept; /** - * Advance the current token. + * Advance the current token without modifying depth. */ simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_really_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_really_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * @@ -207,8 +219,6 @@ class json_iterator { simdjson_really_inline error_code optional_error(error_code error, const char *message) noexcept; template simdjson_warn_unused simdjson_really_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept; - template simdjson_warn_unused simdjson_really_inline bool peek_to_buffer(uint8_t (&tmpbuf)[N]) noexcept; - template simdjson_warn_unused simdjson_really_inline bool advance_to_buffer(uint8_t (&tmpbuf)[N]) noexcept; simdjson_really_inline token_position position() const noexcept; simdjson_really_inline void reenter_child(token_position position, depth_t child_depth) noexcept; @@ -225,7 +235,12 @@ class json_iterator { inline void rewind() noexcept; protected: simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; - simdjson_really_inline token_position last_document_position() const noexcept; + /// The last token before the end + simdjson_really_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_really_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_really_inline token_position end() const noexcept; friend class document; friend class document_stream; diff --git a/include/simdjson/generic/ondemand/logger-inl.h b/include/simdjson/generic/ondemand/logger-inl.h index 45ba831fac..0445db5788 100644 --- a/include/simdjson/generic/ondemand/logger-inl.h +++ b/include/simdjson/generic/ondemand/logger-inl.h @@ -122,7 +122,7 @@ inline void log_headers() noexcept { } inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, iter.token.index+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail); + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail); } inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept { if (LOG_ENABLED) { diff --git a/include/simdjson/generic/ondemand/object-inl.h b/include/simdjson/generic/ondemand/object-inl.h index 319b744f38..0bd49e11c3 100644 --- a/include/simdjson/generic/ondemand/object-inl.h +++ b/include/simdjson/generic/ondemand/object-inl.h @@ -34,15 +34,11 @@ simdjson_really_inline simdjson_result object::find_field(const std::stri } simdjson_really_inline simdjson_result object::start(value_iterator &iter) noexcept { - // We don't need to know if the object is empty to start iteration, but we do want to know if there - // is an error--thus `simdjson_unused`. - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_object().get(has_value) ); + SIMDJSON_TRY( iter.start_object().error() ); return object(iter); } simdjson_really_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_root_object().get(has_value) ); + SIMDJSON_TRY( iter.start_root_object().error() ); return object(iter); } simdjson_really_inline error_code object::consume() noexcept { @@ -79,10 +75,11 @@ simdjson_really_inline simdjson_result object::raw_json() noex return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_really_inline object object::started(value_iterator &iter) noexcept { - simdjson_unused bool has_value = iter.started_object(); - return iter; +simdjson_really_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } + simdjson_really_inline object object::resume(const value_iterator &iter) noexcept { return iter; } diff --git a/include/simdjson/generic/ondemand/object.h b/include/simdjson/generic/ondemand/object.h index 08e34efebc..8164646e19 100644 --- a/include/simdjson/generic/ondemand/object.h +++ b/include/simdjson/generic/ondemand/object.h @@ -124,7 +124,7 @@ class object { simdjson_really_inline error_code consume() noexcept; static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; - static simdjson_really_inline object started(value_iterator &iter) noexcept; + static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; static simdjson_really_inline object resume(const value_iterator &iter) noexcept; simdjson_really_inline object(const value_iterator &iter) noexcept; diff --git a/include/simdjson/generic/ondemand/token_iterator-inl.h b/include/simdjson/generic/ondemand/token_iterator-inl.h index 7ef8235993..46886832e6 100644 --- a/include/simdjson/generic/ondemand/token_iterator-inl.h +++ b/include/simdjson/generic/ondemand/token_iterator-inl.h @@ -2,18 +2,20 @@ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline token_iterator::token_iterator(const uint8_t *_buf, token_position _index) noexcept - : buf{_buf}, index{_index} +simdjson_really_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} { } simdjson_really_inline uint32_t token_iterator::current_offset() const noexcept { - return *(index); + return *(_position); } simdjson_really_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(index++)]; + return &buf[*(_position++)]; } simdjson_really_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { @@ -27,39 +29,39 @@ simdjson_really_inline uint32_t token_iterator::peek_length(token_position posit } simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(index+delta)]; + return &buf[*(_position+delta)]; } simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(index+delta); + return *(_position+delta); } simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(index+delta+1) - *(index+delta); + return *(_position+delta+1) - *(_position+delta); } simdjson_really_inline token_position token_iterator::position() const noexcept { - return index; + return _position; } -simdjson_really_inline void token_iterator::set_position(token_position target_checkpoint) noexcept { - index = target_checkpoint; +simdjson_really_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return index == other.index; + return _position == other._position; } simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return index != other.index; + return _position != other._position; } simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return index > other.index; + return _position > other._position; } simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return index >= other.index; + return _position >= other._position; } simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return index < other.index; + return _position < other._position; } simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return index <= other.index; + return _position <= other._position; } } // namespace ondemand diff --git a/include/simdjson/generic/ondemand/token_iterator.h b/include/simdjson/generic/ondemand/token_iterator.h index dafc1d75b0..6c848c228d 100644 --- a/include/simdjson/generic/ondemand/token_iterator.h +++ b/include/simdjson/generic/ondemand/token_iterator.h @@ -23,8 +23,6 @@ class token_iterator { /** * Advance to the next token (returning the current one). - * - * Does not check or update depth/expect_value. Caller is responsible for that. */ simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; /** @@ -60,8 +58,6 @@ class token_iterator { * * @param position The position of the token. * - * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... */ simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; /** @@ -74,13 +70,13 @@ class token_iterator { simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; /** - * Save the current index to be restored later. + * Return the current index. */ simdjson_really_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ - simdjson_really_inline void set_position(token_position target_checkpoint) noexcept; + simdjson_really_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. @@ -93,7 +89,7 @@ class token_iterator { simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept; protected: - simdjson_really_inline token_iterator(const uint8_t *buf, token_position index) noexcept; + simdjson_really_inline token_iterator(const uint8_t *buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). @@ -115,7 +111,7 @@ class token_iterator { simdjson_really_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; - token_position index{}; + token_position _position{}; friend class json_iterator; friend class value_iterator; diff --git a/include/simdjson/generic/ondemand/value_iterator-inl.h b/include/simdjson/generic/ondemand/value_iterator-inl.h index 9376a55474..258e75cf42 100644 --- a/include/simdjson/generic/ondemand/value_iterator-inl.h +++ b/include/simdjson/generic/ondemand/value_iterator-inl.h @@ -2,57 +2,72 @@ namespace simdjson { namespace SIMDJSON_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline value_iterator::value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept - : _json_iter{json_iter}, - _depth{depth}, - _start_position{start_index} +simdjson_really_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} { } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_object() noexcept { - const uint8_t *json; - SIMDJSON_TRY( advance_container_start("object", json) ); - if (*json != '{') { return incorrect_type_error("Not an object"); } + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_object(); } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_object() noexcept { - bool result; - SIMDJSON_TRY( start_object().get(result) ); - if( ! _json_iter->streaming() ) { - // For document streams, we do not know the "last" structural of the current document, so peek_last() is nonesense. - if (*_json_iter->peek_last() != '}') { return _json_iter->report_error(TAPE_ERROR, "object invalid: { at beginning of document unmatched by } at end of document"); } - } - return result; + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); } -simdjson_warn_unused simdjson_really_inline bool value_iterator::started_object() noexcept { +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_object() noexcept { assert_at_container_start(); #ifdef SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, _start_position); + _json_iter->set_start_position(_depth, start_position()); #endif if (*_json_iter->peek() == '}') { logger::log_value(*_json_iter, "empty object"); _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); + end_container(); return false; } - logger::log_start_value(*_json_iter, "object"); return true; } +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if (! _json_iter->streaming() && (*_json_iter->peek_last() != '}')) { + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + return started_object(); +} + +simdjson_warn_unused simdjson_really_inline error_code value_iterator::end_container() noexcept { +#if __SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // __SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. switch (*_json_iter->return_current_and_advance()) { case '}': logger::log_end_value(*_json_iter, "object"); - _json_iter->ascend_to(depth()-1); + SIMDJSON_TRY( end_container() ); return false; case ',': return true; default: - return _json_iter->report_error(TAPE_ERROR, "Missing comma between object fields"); + return report_error(TAPE_ERROR, "Missing comma between object fields"); } } @@ -108,7 +123,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != _start_position) { return OUT_OF_ORDER_ITERATION; } + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } while (has_value) { @@ -197,7 +212,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // this object iterator will blithely scan that object for fields. if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif - has_value = reset_object(); + SIMDJSON_TRY(reset_object().get(has_value)); at_first = true; // 3. When a previous search found a field or an iterator yielded a value: // @@ -214,17 +229,14 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // ``` // } else { - // If someone queried a key but they did access the value, then we are left pointing + // If someone queried a key but they not did access the value, then we are left pointing // at the ':' and we need to move forward through the value... If the value was // processed then skip_child() does not move the iterator (but may adjust the depth). if ((error = skip_child() )) { abandon(); return error; } search_start = _json_iter->position(); - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != _start_position) { return OUT_OF_ORDER_ITERATION; } + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } @@ -241,7 +253,6 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // ^ (depth 0) // ``` // - // Next, we find a match starting from the current position. while (has_value) { SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field @@ -290,7 +301,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // beginning of the object. // (We have already run through the object before, so we've already validated its structure. We // don't check errors in this bit.) - has_value = reset_object(); + SIMDJSON_TRY(reset_object().get(has_value)); while (true) { SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field @@ -344,33 +355,26 @@ simdjson_warn_unused simdjson_really_inline simdjson_result val assert_at_next(); const uint8_t *key = _json_iter->return_current_and_advance(); - if (*(key++) != '"') { return _json_iter->report_error(TAPE_ERROR, "Object key is not a string"); } + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } return raw_json_string(key); } simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_value() noexcept { assert_at_next(); - if (*_json_iter->return_current_and_advance() != ':') { return _json_iter->report_error(TAPE_ERROR, "Missing colon in object field"); } + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } _json_iter->descend_to(depth()+1); return SUCCESS; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_array() noexcept { - const uint8_t *json; - SIMDJSON_TRY( advance_container_start("array", json) ); - if (*json != '[') { return incorrect_type_error("Not an array"); } + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_array(); } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_array() noexcept { - bool result; - SIMDJSON_TRY( start_array().get(result) ); - if( ! _json_iter->streaming() ) { - // For document streams, we do not know the "last" structural of the current document, so peek_last() is nonesense. - if (*_json_iter->peek_last() != ']') { return _json_iter->report_error(TAPE_ERROR, "array invalid: [ at beginning of document unmatched by ] at end of document"); } - } - return result; + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); } inline std::string value_iterator::to_string() const noexcept { @@ -380,35 +384,46 @@ inline std::string value_iterator::to_string() const noexcept { return answer; } -simdjson_warn_unused simdjson_really_inline bool value_iterator::started_array() noexcept { +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); + SIMDJSON_TRY( end_container() ); return false; } - logger::log_start_value(*_json_iter, "array"); _json_iter->descend_to(depth()+1); #ifdef SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, _start_position); + _json_iter->set_start_position(_depth, start_position()); #endif return true; } +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() && (*_json_iter->peek_last() != ']')) { + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + return started_array(); +} + simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); + logger::log_event(*this, "has_next_element"); switch (*_json_iter->return_current_and_advance()) { case ']': logger::log_end_value(*_json_iter, "array"); - _json_iter->ascend_to(depth()-1); + SIMDJSON_TRY( end_container() ); return false; case ',': _json_iter->descend_to(depth()+1); return true; default: - return _json_iter->report_error(TAPE_ERROR, "Missing comma between array elements"); + return report_error(TAPE_ERROR, "Missing comma between array elements"); } } @@ -427,24 +442,35 @@ simdjson_warn_unused simdjson_really_inline simdjson_result va return get_raw_json_string().unescape(_json_iter->string_buf_loc()); } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = advance_start("string"); + auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); return raw_json_string(json+1); } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64() noexcept { - return numberparsing::parse_unsigned(advance_non_root_scalar("uint64")); + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("uint64"); } + return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64() noexcept { - return numberparsing::parse_integer(advance_non_root_scalar("int64")); + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("int64"); } + return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double() noexcept { - return numberparsing::parse_double(advance_non_root_scalar("double")); + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("double"); } + return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_bool() noexcept { - return parse_bool(advance_non_root_scalar("bool")); + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() != INCORRECT_TYPE) { advance_non_root_scalar("bool"); } + return result; } simdjson_really_inline bool value_iterator::is_null() noexcept { - return parse_null(advance_non_root_scalar("null")); + auto result = parse_null(peek_non_root_scalar("null")); + if(result) { advance_non_root_scalar("null"); } + return result; } constexpr const uint32_t MAX_INT_LENGTH = 1024; @@ -457,42 +483,63 @@ simdjson_warn_unused simdjson_really_inline simdjson_result val } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64() noexcept { auto max_len = peek_start_length(); - auto json = advance_root_scalar("uint64"); + auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, _start_position, depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } - return numberparsing::parse_unsigned(tmpbuf); + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() != INCORRECT_TYPE) { advance_root_scalar("uint64"); } + return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64() noexcept { auto max_len = peek_start_length(); - auto json = advance_root_scalar("int64"); + auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, _start_position, depth(), "Root number more than 20 characters"); return NUMBER_ERROR; } - return numberparsing::parse_integer(tmpbuf); + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() != INCORRECT_TYPE) { advance_root_scalar("int64"); } + return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double() noexcept { auto max_len = peek_start_length(); - auto json = advance_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest number: -0.e-308. + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. uint8_t tmpbuf[1074+8+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { logger::log_error(*_json_iter, _start_position, depth(), "Root number more than 1082 characters"); return NUMBER_ERROR; } - return numberparsing::parse_double(tmpbuf); + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() != INCORRECT_TYPE) { advance_root_scalar("double"); } + return result; } simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_bool() noexcept { auto max_len = peek_start_length(); - auto json = advance_root_scalar("bool"); + auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5+1]; if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } + advance_root_scalar("bool"); return parse_bool(tmpbuf); } simdjson_really_inline bool value_iterator::is_root_null() noexcept { auto max_len = peek_start_length(); - auto json = advance_root_scalar("null"); - return max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5])); + auto json = peek_root_scalar("null"); + auto result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(result) { advance_root_scalar("null"); } + return result; } simdjson_warn_unused simdjson_really_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); return _json_iter->skip_child(depth()); @@ -513,17 +560,17 @@ simdjson_really_inline bool value_iterator::is_open() const noexcept { } SIMDJSON_POP_DISABLE_WARNINGS -simdjson_really_inline bool value_iterator::at_eof() const noexcept { - return _json_iter->at_eof(); +simdjson_really_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); } simdjson_really_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.index == _start_position; + return _json_iter->token.position() == start_position(); } simdjson_really_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); - return _json_iter->token.index == _start_position + 1; + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; } simdjson_really_inline void value_iterator::abandon() noexcept { @@ -547,66 +594,99 @@ simdjson_warn_unused simdjson_really_inline json_iterator &value_iterator::json_ } simdjson_really_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(_start_position); + return _json_iter->peek(start_position()); } simdjson_really_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(_start_position); + return _json_iter->peek_length(start_position()); } -simdjson_really_inline const uint8_t *value_iterator::advance_start(const char *type) const noexcept { - logger::log_value(*_json_iter, _start_position, depth(), type); +simdjson_really_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return peek_start(); } // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. assert_at_start(); - auto result = _json_iter->return_current_and_advance(); + return _json_iter->peek(); +} + +simdjson_really_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); - return result; } -simdjson_really_inline error_code value_iterator::advance_container_start(const char *type, const uint8_t *&json) const noexcept { - logger::log_start_value(*_json_iter, _start_position, depth(), type); +simdjson_really_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; if (!is_at_start()) { #ifdef SIMDJSON_DEVELOPMENT_CHECKS if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif json = peek_start(); - return SUCCESS; + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); } - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - json = _json_iter->return_current_and_advance(); + return SUCCESS; } -simdjson_really_inline const uint8_t *value_iterator::advance_root_scalar(const char *type) const noexcept { - logger::log_value(*_json_iter, _start_position, depth(), type); + + +simdjson_really_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_root(); - auto result = _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); - return result; + return _json_iter->peek(); } -simdjson_really_inline const uint8_t *value_iterator::advance_non_root_scalar(const char *type) const noexcept { - logger::log_value(*_json_iter, _start_position, depth(), type); +simdjson_really_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_non_root_start(); - auto result = _json_iter->return_current_and_advance(); + return _json_iter->peek(); +} + +simdjson_really_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_really_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); - return result; } simdjson_really_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, _start_position, depth(), message); + logger::log_error(*_json_iter, start_position(), depth(), message); return INCORRECT_TYPE; } simdjson_really_inline bool value_iterator::is_at_start() const noexcept { - return _json_iter->token.index == _start_position; + return position() == start_position(); } simdjson_really_inline bool value_iterator::is_at_key() const noexcept { @@ -618,50 +698,50 @@ simdjson_really_inline bool value_iterator::is_at_key() const noexcept { simdjson_really_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = _json_iter->token.index - _start_position; + auto delta = position() - start_position(); return delta == 1 || delta == 2; } inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index == _start_position ); + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth ); SIMDJSON_ASSUME( _depth > 0 ); } simdjson_really_inline void value_iterator::move_at_start() noexcept { _json_iter->_depth = _depth; - _json_iter->token.index = _start_position; + _json_iter->token.set_position(_start_position); } simdjson_really_inline void value_iterator::move_at_container_start() noexcept { _json_iter->_depth = _depth; - _json_iter->token.index = _start_position + 1; + _json_iter->token.set_position(_start_position + 1); } -simdjson_really_inline bool value_iterator::reset_array() noexcept { +simdjson_really_inline simdjson_result value_iterator::reset_array() noexcept { move_at_container_start(); return started_array(); } -simdjson_really_inline bool value_iterator::reset_object() noexcept { +simdjson_really_inline simdjson_result value_iterator::reset_object() noexcept { move_at_container_start(); return started_object(); } inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token.index > _start_position ); + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); SIMDJSON_ASSUME( _depth > 0 ); } @@ -684,8 +764,7 @@ simdjson_really_inline bool value_iterator::is_valid() const noexcept { return _json_iter != nullptr; } - -simdjson_really_inline simdjson_result value_iterator::type() noexcept { +simdjson_really_inline simdjson_result value_iterator::type() const noexcept { switch (*peek_start()) { case '{': return json_type::object; @@ -706,6 +785,26 @@ simdjson_really_inline simdjson_result value_iterator::type() noexcep } } +simdjson_really_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_really_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_really_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_really_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_really_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + } // namespace ondemand } // namespace SIMDJSON_IMPLEMENTATION } // namespace simdjson diff --git a/include/simdjson/generic/ondemand/value_iterator.h b/include/simdjson/generic/ondemand/value_iterator.h index 558258dc28..e742198a44 100644 --- a/include/simdjson/generic/ondemand/value_iterator.h +++ b/include/simdjson/generic/ondemand/value_iterator.h @@ -46,7 +46,7 @@ class value_iterator { /** * Tell whether the iterator is at the EOF mark */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_really_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value @@ -83,7 +83,7 @@ class value_iterator { * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result type() noexcept; + simdjson_really_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration @@ -113,11 +113,23 @@ class value_iterator { /** * Start an object iteration after the user has already checked and moved past the {. * - * Does not move the iterator. + * Does not move the iterator unless the object is empty ({}). * * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline bool started_object() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. @@ -127,6 +139,7 @@ class value_iterator { * * @return whether there is another field in the object. * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ simdjson_warn_unused simdjson_really_inline simdjson_result has_next_field() noexcept; @@ -223,13 +236,25 @@ class value_iterator { simdjson_warn_unused simdjson_really_inline simdjson_result start_root_array() noexcept; /** - * Start an array iteration after the user has already checked and moved past the [. + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. * - * Does not move the iterator. + * Does not move the iterator unless the array is empty ([]). * * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline bool started_array() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. @@ -285,12 +310,12 @@ class value_iterator { * Restarts an array iteration. * @returns Whether the array has any elements (returns false for empty). */ - simdjson_really_inline bool reset_array() noexcept; + simdjson_really_inline simdjson_result reset_array() noexcept; /** * Restarts an object iteration. * @returns Whether the object has any fields (returns false for empty). */ - simdjson_really_inline bool reset_object() noexcept; + simdjson_really_inline simdjson_result reset_object() noexcept; /** * move_at_start(): moves us so that we are pointing at the beginning of * the container. It updates the index so that at_start() is true and it @@ -315,12 +340,58 @@ class value_iterator { simdjson_really_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; simdjson_really_inline const uint8_t *peek_start() const noexcept; simdjson_really_inline uint32_t peek_start_length() const noexcept; - simdjson_really_inline const uint8_t *advance_start(const char *type) const noexcept; - simdjson_really_inline error_code advance_container_start(const char *type, const uint8_t *&json) const noexcept; - simdjson_really_inline const uint8_t *advance_root_scalar(const char *type) const noexcept; - simdjson_really_inline const uint8_t *advance_non_root_scalar(const char *type) const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_really_inline void advance_scalar(const char *type) noexcept; + simdjson_really_inline void advance_root_scalar(const char *type) noexcept; + simdjson_really_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_really_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_really_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_really_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_really_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_really_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_really_inline simdjson_result advance_to_value() noexcept; simdjson_really_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_really_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_really_inline bool is_at_start() const noexcept; /** @@ -347,6 +418,18 @@ class value_iterator { inline void assert_at_next() const noexcept; inline void assert_at_non_root_start() const noexcept; + /** Get the starting position of this value */ + simdjson_really_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_really_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_really_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_really_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept; + friend class document; friend class object; friend class array; diff --git a/src/from_chars.cpp b/src/from_chars.cpp index bbefa295af..7173df8695 100644 --- a/src/from_chars.cpp +++ b/src/from_chars.cpp @@ -134,6 +134,86 @@ decimal parse_decimal(const char *&p) noexcept { return answer; } +// This should always succeed since it follows a call to parse_number. +// Will not read at or beyond the "end" pointer. +decimal parse_decimal(const char *&p, const char * end) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + if(p == end) { return answer; } // should never happen + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while ((p != end) && (*p == '0')) { + ++p; + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if ((p != end) && (*p == '.')) { + ++p; + if(p == end) { return answer; } // should never happen + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if ((p != end) && (('e' == *p) || ('E' == *p))) { + ++p; + if(p == end) { return answer; } // should never happen + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while ((p != end) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + namespace { // remove all final zeroes @@ -473,6 +553,12 @@ adjusted_mantissa parse_long_mantissa(const char *first) { return compute_float(d); } +template +adjusted_mantissa parse_long_mantissa(const char *first, const char *end) { + decimal d = parse_decimal(first, end); + return compute_float(d); +} + double from_chars(const char *first) noexcept { bool negative = first[0] == '-'; if (negative) { @@ -489,5 +575,22 @@ double from_chars(const char *first) noexcept { return value; } + +double from_chars(const char *first, const char *end) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first, end); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + } // internal } // simdjson \ No newline at end of file diff --git a/src/generic/stage1/utf8_lookup4_algorithm.h b/src/generic/stage1/utf8_lookup4_algorithm.h index 0a50fe7c6c..a0cfb1f5b8 100644 --- a/src/generic/stage1/utf8_lookup4_algorithm.h +++ b/src/generic/stage1/utf8_lookup4_algorithm.h @@ -167,7 +167,6 @@ using namespace simd; } this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } } // do not forget to call check_eof! diff --git a/src/internal/error_tables.cpp b/src/internal/error_tables.cpp index e8792603e3..e9ce9c3aad 100644 --- a/src/internal/error_tables.cpp +++ b/src/internal/error_tables.cpp @@ -30,7 +30,8 @@ namespace internal { { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }, { PARSER_IN_USE, "Cannot parse a new document while a document is still in use." }, { OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." }, - { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length." } + { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length." }, + { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." } }; // error_messages[] } // namespace internal diff --git a/tests/ondemand/ondemand_array_error_tests.cpp b/tests/ondemand/ondemand_array_error_tests.cpp index 12a5b64f11..0822f7de2c 100644 --- a/tests/ondemand/ondemand_array_error_tests.cpp +++ b/tests/ondemand/ondemand_array_error_tests.cpp @@ -62,18 +62,18 @@ namespace array_error_tests { TEST_START(); ONDEMAND_SUBTEST("missing comma", "[1 1]", assert_iterate(doc, { int64_t(1) }, { TAPE_ERROR })); ONDEMAND_SUBTEST("extra comma ", "[1,,1]", assert_iterate(doc, { int64_t(1) }, { INCORRECT_TYPE, TAPE_ERROR })); - ONDEMAND_SUBTEST("extra comma ", "[,]", assert_iterate(doc, { INCORRECT_TYPE })); - ONDEMAND_SUBTEST("extra comma ", "[,,]", assert_iterate(doc, { INCORRECT_TYPE, INCORRECT_TYPE, TAPE_ERROR })); + ONDEMAND_SUBTEST("extra comma ", "[,]", assert_iterate(doc, { INCORRECT_TYPE, TAPE_ERROR })); + ONDEMAND_SUBTEST("extra comma ", "[,,]", assert_iterate(doc, { INCORRECT_TYPE, TAPE_ERROR })); TEST_SUCCEED(); } bool top_level_array_iterate_unclosed_error() { TEST_START(); - ONDEMAND_SUBTEST("unclosed extra comma", "[,", assert_iterate(doc, { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed extra comma", "[,", assert_iterate(doc, { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed ", "[1 ", assert_iterate(doc, { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed extra comma", "[,,", assert_iterate(doc, { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed ", "[1,", assert_iterate(doc, { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed ", "[1", assert_iterate(doc, { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed ", "[", assert_iterate(doc, { INCOMPLETE_ARRAY_OR_OBJECT })); TEST_SUCCEED(); } @@ -81,21 +81,29 @@ namespace array_error_tests { TEST_START(); ONDEMAND_SUBTEST("missing comma", R"({ "a": [1 1] })", assert_iterate(doc["a"], { int64_t(1) }, { TAPE_ERROR })); ONDEMAND_SUBTEST("extra comma ", R"({ "a": [1,,1] })", assert_iterate(doc["a"], { int64_t(1) }, { INCORRECT_TYPE, TAPE_ERROR })); - ONDEMAND_SUBTEST("extra comma ", R"({ "a": [1,,] })", assert_iterate(doc["a"], { int64_t(1) }, { INCORRECT_TYPE })); - ONDEMAND_SUBTEST("extra comma ", R"({ "a": [,] })", assert_iterate(doc["a"], { INCORRECT_TYPE })); - ONDEMAND_SUBTEST("extra comma ", R"({ "a": [,,] })", assert_iterate(doc["a"], { INCORRECT_TYPE, INCORRECT_TYPE, TAPE_ERROR })); + ONDEMAND_SUBTEST("extra comma ", R"({ "a": [1,,] })", assert_iterate(doc["a"], { int64_t(1) }, { INCORRECT_TYPE, TAPE_ERROR })); + ONDEMAND_SUBTEST("extra comma ", R"({ "a": [,] })", assert_iterate(doc["a"], { INCORRECT_TYPE, TAPE_ERROR})); + ONDEMAND_SUBTEST("extra comma ", R"({ "a": [,,] })", assert_iterate(doc["a"], { INCORRECT_TYPE, TAPE_ERROR })); TEST_SUCCEED(); } bool array_iterate_unclosed_error() { TEST_START(); - ONDEMAND_SUBTEST("unclosed extra comma", R"({ "a": [,)", assert_iterate(doc["a"], { INCORRECT_TYPE, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed extra comma", R"({ "a": [,,)", assert_iterate(doc["a"], { INCORRECT_TYPE, INCORRECT_TYPE, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1 )", assert_iterate(doc["a"], { int64_t(1) }, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed extra comma", R"({ "a": [,)", assert_iterate(doc["a"], { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed extra comma", R"({ "a": [,,)", assert_iterate(doc["a"], { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1 )", assert_iterate(doc["a"], { INCOMPLETE_ARRAY_OR_OBJECT })); // TODO These pass the user values that may run past the end of the buffer if they aren't careful // In particular, if the padding is decorated with the wrong values, we could cause overrun! - ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1,)", assert_iterate(doc["a"], { int64_t(1) }, { INCORRECT_TYPE, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1)", assert_iterate(doc["a"], { NUMBER_ERROR, TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed ", R"({ "a": [)", assert_iterate(doc["a"], { INCORRECT_TYPE, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1,)", assert_iterate(doc["a"], { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a": [1)", assert_iterate(doc["a"], { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed ", R"({ "a": [)", assert_iterate(doc["a"], { INCOMPLETE_ARRAY_OR_OBJECT })); + TEST_SUCCEED(); + } + bool array_iterate_incomplete_error() { + TEST_START(); + ONDEMAND_SUBTEST("unclosed after array", R"([ [1] )", assert_iterate(doc.get_array().at(0), { int64_t(1) }, { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed after array", R"([ [1,])", assert_iterate(doc.get_array().at(0), { int64_t(1) }, { INCORRECT_TYPE, TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed after array", R"([ [1])", assert_iterate(doc.get_array().at(0), { int64_t(1) }, { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed after array", R"([ [])", assert_iterate(doc.get_array().at(0), { INCOMPLETE_ARRAY_OR_OBJECT })); TEST_SUCCEED(); } @@ -177,6 +185,7 @@ namespace array_error_tests { top_level_array_iterate_unclosed_error() && array_iterate_error() && array_iterate_unclosed_error() && + array_iterate_incomplete_error() && #ifdef SIMDJSON_DEVELOPMENT_CHECKS out_of_order_array_iteration_error() && out_of_order_top_level_array_iteration_error() && diff --git a/tests/ondemand/ondemand_json_pointer_tests.cpp b/tests/ondemand/ondemand_json_pointer_tests.cpp index 5aa3d696bb..61bbcc12b4 100644 --- a/tests/ondemand/ondemand_json_pointer_tests.cpp +++ b/tests/ondemand/ondemand_json_pointer_tests.cpp @@ -153,7 +153,7 @@ namespace json_pointer_tests { ASSERT_ERROR(doc.at_pointer(json_pointer).get(val), simdjson::STRING_ERROR); std::cout << "\t- unclosed_object" << std::endl; ASSERT_SUCCESS(parser.iterate(unclosed_object).get(doc)); - ASSERT_ERROR(doc.at_pointer(json_pointer).get(val), simdjson::TAPE_ERROR); + ASSERT_ERROR(doc.at_pointer(json_pointer).get(val), simdjson::INCOMPLETE_ARRAY_OR_OBJECT); std::cout << "\t- missing_bracket_before" << std::endl; ASSERT_SUCCESS(parser.iterate(missing_bracket_before).get(doc)); ASSERT_ERROR(doc.at_pointer(json_pointer).get(val), simdjson::TAPE_ERROR); diff --git a/tests/ondemand/ondemand_misc_tests.cpp b/tests/ondemand/ondemand_misc_tests.cpp index b3b06308e9..0beab70290 100644 --- a/tests/ondemand/ondemand_misc_tests.cpp +++ b/tests/ondemand/ondemand_misc_tests.cpp @@ -5,6 +5,250 @@ using namespace simdjson; namespace misc_tests { using namespace std; + + bool issue1661a() { + TEST_START(); + ondemand::parser parser; + padded_string docdata = R"({"":],"global-groups":[[]}})"_padded; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + ondemand::value global_groups; + ASSERT_SUCCESS(doc["global-groups"].get(global_groups)); + ondemand::json_type global_type; + ASSERT_SUCCESS(global_groups.type().get(global_type)); + ASSERT_EQUAL(global_type, ondemand::json_type::array); + TEST_SUCCEED(); + } + + bool issue1660() { + TEST_START(); + ondemand::parser parser; + padded_string docdata = R"({"globals":{"a":{"shadowable":[}}}})"_padded; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + ondemand::object globals; + ASSERT_SUCCESS(doc["globals"].get(globals)); + for (auto global_field : globals) { + ondemand::value global; + ASSERT_SUCCESS(global_field.value().get(global)); + ondemand::json_type global_type; + ASSERT_SUCCESS(global.type().get(global_type)); + ASSERT_EQUAL(global_type, ondemand::json_type::object); + ondemand::object global_object; + ASSERT_SUCCESS(global.get(global_object)); + ondemand::value shadowable; + ASSERT_SUCCESS(global_object["shadowable"].get(shadowable)); + ASSERT_ERROR(shadowable.get_object(), INCORRECT_TYPE); + ondemand::value badvalue; + auto error = global_object["writable"].get(badvalue); + if(error == SUCCESS) { + return false; + } else { + break; + } + } + TEST_SUCCEED(); + } + + + bool issue1660_with_bool() { + TEST_START(); + ondemand::parser parser; + padded_string docdata = R"({"globals":{"a":{"shadowable":[}}}})"_padded; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + ondemand::object globals; + ASSERT_SUCCESS(doc["globals"].get(globals)); + for (auto global_field : globals) { + ondemand::value global; + ASSERT_SUCCESS(global_field.value().get(global)); + ondemand::json_type global_type; + ASSERT_SUCCESS(global.type().get(global_type)); + ASSERT_EQUAL(global_type, ondemand::json_type::object); + ondemand::object global_object; + ASSERT_SUCCESS(global.get(global_object)); + ondemand::value shadowable; + ASSERT_SUCCESS(global_object["shadowable"].get(shadowable)); + ASSERT_ERROR(shadowable.get_bool(), INCORRECT_TYPE); + ondemand::value badvalue; + auto error = global_object["writable"].get(badvalue); + if(error == SUCCESS) { + return false; + } else { + break; + } + } + TEST_SUCCEED(); + } + + + bool issue1660_with_uint64() { + TEST_START(); + ondemand::parser parser; + padded_string docdata = R"({"globals":{"a":{"shadowable":[}}}})"_padded; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + ondemand::object globals; + ASSERT_SUCCESS(doc["globals"].get(globals)); + for (auto global_field : globals) { + ondemand::value global; + ASSERT_SUCCESS(global_field.value().get(global)); + ondemand::json_type global_type; + ASSERT_SUCCESS(global.type().get(global_type)); + ASSERT_EQUAL(global_type, ondemand::json_type::object); + ondemand::object global_object; + ASSERT_SUCCESS(global.get(global_object)); + ondemand::value shadowable; + ASSERT_SUCCESS(global_object["shadowable"].get(shadowable)); + ASSERT_ERROR(shadowable.get_uint64(), INCORRECT_TYPE); + ondemand::value badvalue; + auto error = global_object["writable"].get(badvalue); + if(error == SUCCESS) { + return false; + } else { + break; + } + } + TEST_SUCCEED(); + } + + + bool issue1660_with_int64() { + TEST_START(); + ondemand::parser parser; + padded_string docdata = R"({"globals":{"a":{"shadowable":[}}}})"_padded; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + ondemand::object globals; + ASSERT_SUCCESS(doc["globals"].get(globals)); + for (auto global_field : globals) { + ondemand::value global; + ASSERT_SUCCESS(global_field.value().get(global)); + ondemand::json_type global_type; + ASSERT_SUCCESS(global.type().get(global_type)); + ASSERT_EQUAL(global_type, ondemand::json_type::object); + ondemand::object global_object; + ASSERT_SUCCESS(global.get(global_object)); + ondemand::value shadowable; + ASSERT_SUCCESS(global_object["shadowable"].get(shadowable)); + ASSERT_ERROR(shadowable.get_int64(), INCORRECT_TYPE); + ondemand::value badvalue; + auto error = global_object["writable"].get(badvalue); + if(error == SUCCESS) { + return false; + } else { + break; + } + } + TEST_SUCCEED(); + } + + bool issue1660_with_double() { + TEST_START(); + ondemand::parser parser; + padded_string docdata = R"({"globals":{"a":{"shadowable":[}}}})"_padded; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + ondemand::object globals; + ASSERT_SUCCESS(doc["globals"].get(globals)); + for (auto global_field : globals) { + ondemand::value global; + ASSERT_SUCCESS(global_field.value().get(global)); + ondemand::json_type global_type; + ASSERT_SUCCESS(global.type().get(global_type)); + ASSERT_EQUAL(global_type, ondemand::json_type::object); + ondemand::object global_object; + ASSERT_SUCCESS(global.get(global_object)); + ondemand::value shadowable; + ASSERT_SUCCESS(global_object["shadowable"].get(shadowable)); + ASSERT_ERROR(shadowable.get_double(), INCORRECT_TYPE); + ondemand::value badvalue; + auto error = global_object["writable"].get(badvalue); + if(error == SUCCESS) { + return false; + } else { + break; + } + } + TEST_SUCCEED(); + } + + + bool issue1660_with_null() { + TEST_START(); + ondemand::parser parser; + padded_string docdata = R"({"globals":{"a":{"shadowable":[}}}})"_padded; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + ondemand::object globals; + ASSERT_SUCCESS(doc["globals"].get(globals)); + for (auto global_field : globals) { + ondemand::value global; + ASSERT_SUCCESS(global_field.value().get(global)); + ondemand::json_type global_type; + ASSERT_SUCCESS(global.type().get(global_type)); + ASSERT_EQUAL(global_type, ondemand::json_type::object); + ondemand::object global_object; + ASSERT_SUCCESS(global.get(global_object)); + ondemand::value shadowable; + ASSERT_SUCCESS(global_object["shadowable"].get(shadowable)); + ASSERT_TRUE(!shadowable.is_null()); + ondemand::value badvalue; + auto error = global_object["writable"].get(badvalue); + if(error == SUCCESS) { + return false; + } else { + break; + } + } + TEST_SUCCEED(); + } + + + bool issue1660_with_string() { + TEST_START(); + ondemand::parser parser; + padded_string docdata = R"({"globals":{"a":{"shadowable":[}}}})"_padded; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + ondemand::object globals; + ASSERT_SUCCESS(doc["globals"].get(globals)); + for (auto global_field : globals) { + ondemand::value global; + ASSERT_SUCCESS(global_field.value().get(global)); + ondemand::json_type global_type; + ASSERT_SUCCESS(global.type().get(global_type)); + ASSERT_EQUAL(global_type, ondemand::json_type::object); + ondemand::object global_object; + ASSERT_SUCCESS(global.get(global_object)); + ondemand::value shadowable; + ASSERT_SUCCESS(global_object["shadowable"].get(shadowable)); + ASSERT_ERROR(shadowable.get_string(), INCORRECT_TYPE); + ondemand::value badvalue; + auto error = global_object["writable"].get(badvalue); + if(error == SUCCESS) { + return false; + } else { + break; + } + } + TEST_SUCCEED(); + } + + bool issue1661() { + TEST_START(); + ondemand::parser parser; + padded_string docdata = R"({"":],"global-groups":[[]}})"_padded; + ondemand::document doc; + ASSERT_SUCCESS(parser.iterate(docdata).get(doc)); + ondemand::object global_groups; + ASSERT_ERROR(doc["global-groups"].get(global_groups), INCORRECT_TYPE); + ondemand::object globals; + auto error = doc["globals"].get(globals); + if(error == SUCCESS) { return false; } + TEST_SUCCEED(); + } + simdjson_warn_unused bool big_integer() { TEST_START(); simdjson::ondemand::parser parser; @@ -89,6 +333,15 @@ namespace misc_tests { bool run() { return + issue1660_with_uint64() && + issue1660_with_int64() && + issue1660_with_double() && + issue1660_with_null() && + issue1660_with_string() && + issue1660_with_bool() && + issue1661a() && + issue1660() && + issue1661() && big_integer_in_string() && big_integer() && raw_json_token() && diff --git a/tests/ondemand/ondemand_object_error_tests.cpp b/tests/ondemand/ondemand_object_error_tests.cpp index c97b78d0d8..18de0d719f 100644 --- a/tests/ondemand/ondemand_object_error_tests.cpp +++ b/tests/ondemand/ondemand_object_error_tests.cpp @@ -61,13 +61,20 @@ namespace object_error_tests { } bool object_iterate_unclosed_error() { TEST_START(); - ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); - // TODO These next two pass the user a value that may run past the end of the buffer if they aren't careful. - // In particular, if the padding is decorated with the wrong values, we could cause overrun! - ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); - ONDEMAND_SUBTEST("unclosed", R"({ )", assert_iterate_object(doc.get_object(), { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_iterate_object(doc.get_object(), { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_iterate_object(doc.get_object(), { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_iterate_object(doc.get_object(), { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_iterate_object(doc.get_object(), { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed", R"({ )", assert_iterate_object(doc.get_object(), { INCOMPLETE_ARRAY_OR_OBJECT })); + TEST_SUCCEED(); + } + bool object_iterate_incomplete_error() { + TEST_START(); + ONDEMAND_SUBTEST("unclosed", R"({ "x": { "a": 1, })", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { TAPE_ERROR })); + ONDEMAND_SUBTEST("unclosed", R"({ "x": { "a": 1 })", assert_iterate_object(doc.get_object(), { "a" }, { int64_t(1) }, { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed", R"({ "x": { "a": })", assert_iterate_object(doc.get_object(), { INCORRECT_TYPE, INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed", R"({ "x": { "a" })", assert_iterate_object(doc.get_object(), { INCOMPLETE_ARRAY_OR_OBJECT })); + ONDEMAND_SUBTEST("unclosed", R"({ "x": { })", assert_iterate_object(doc.get_object(), { INCOMPLETE_ARRAY_OR_OBJECT })); TEST_SUCCEED(); } @@ -81,11 +88,13 @@ namespace object_error_tests { } bool object_lookup_unclosed_error() { TEST_START(); - // TODO This one passes the user a value that may run past the end of the buffer if they aren't careful. - // In particular, if the padding is decorated with the wrong values, we could cause overrun! + #if __SIMDJSON_CHECK_EOF + ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_error(doc["a"], INCOMPLETE_ARRAY_OR_OBJECT)); + #else ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_success(doc["a"])); - ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_error(doc["a"], TAPE_ERROR)); - ONDEMAND_SUBTEST("unclosed", R"({ )", assert_error(doc["a"], TAPE_ERROR)); + #endif + ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_error(doc["a"], INCOMPLETE_ARRAY_OR_OBJECT)); + ONDEMAND_SUBTEST("unclosed", R"({ )", assert_error(doc["a"], INCOMPLETE_ARRAY_OR_OBJECT)); TEST_SUCCEED(); } @@ -109,13 +118,13 @@ namespace object_error_tests { } bool object_lookup_miss_unclosed_error() { TEST_START(); - ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_error(doc["b"], TAPE_ERROR)); + ONDEMAND_SUBTEST("unclosed", R"({ "a": 1, )", assert_error(doc["b"], INCOMPLETE_ARRAY_OR_OBJECT)); // TODO These next two pass the user a value that may run past the end of the buffer if they aren't careful. // In particular, if the padding is decorated with the wrong values, we could cause overrun! - ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_error(doc["b"], TAPE_ERROR)); - ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_error(doc["b"], TAPE_ERROR)); - ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_error(doc["b"], TAPE_ERROR)); - ONDEMAND_SUBTEST("unclosed", R"({ )", assert_error(doc["b"], TAPE_ERROR)); + ONDEMAND_SUBTEST("unclosed", R"({ "a": 1 )", assert_error(doc["b"], INCOMPLETE_ARRAY_OR_OBJECT)); + ONDEMAND_SUBTEST("unclosed", R"({ "a": )", assert_error(doc["b"], INCOMPLETE_ARRAY_OR_OBJECT)); + ONDEMAND_SUBTEST("unclosed", R"({ "a" )", assert_error(doc["b"], INCOMPLETE_ARRAY_OR_OBJECT)); + ONDEMAND_SUBTEST("unclosed", R"({ )", assert_error(doc["b"], INCOMPLETE_ARRAY_OR_OBJECT)); TEST_SUCCEED(); } bool object_lookup_miss_next_error() { diff --git a/tests/ondemand/ondemand_object_find_field_tests.cpp b/tests/ondemand/ondemand_object_find_field_tests.cpp new file mode 100644 index 0000000000..724867a698 --- /dev/null +++ b/tests/ondemand/ondemand_object_find_field_tests.cpp @@ -0,0 +1,191 @@ +#include "simdjson.h" +#include "test_ondemand.h" + +using namespace simdjson; + +namespace object_tests { + using namespace std; + using simdjson::ondemand::json_type; + + bool object_find_field_unordered() { + TEST_START(); + auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; + SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::object object; + ASSERT_SUCCESS( doc_result.get(object) ); + + ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( object.find_field_unordered("d"), NO_SUCH_FIELD ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result object; + object = doc_result.get_object(); + + ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( object.find_field_unordered("d"), NO_SUCH_FIELD ); + return true; + })); + TEST_SUCCEED(); + } + + bool document_object_find_field_unordered() { + TEST_START(); + auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; + SUBTEST("ondemand::document", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::document doc; + ASSERT_SUCCESS( std::move(doc_result).get(doc) ); + ASSERT_EQUAL( doc.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( doc.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( doc.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( doc.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( doc.find_field_unordered("d"), NO_SUCH_FIELD ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + ASSERT_EQUAL( doc_result.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( doc_result.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( doc_result.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( doc_result.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( doc_result.find_field_unordered("d"), NO_SUCH_FIELD ); + return true; + })); + TEST_SUCCEED(); + } + + bool value_object_find_field_unordered() { + TEST_START(); + auto json = R"({ "outer": { "a": 1, "b": 2, "c/d": 3 } })"_padded; + SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::value object; + ASSERT_SUCCESS( doc_result.find_field_unordered("outer").get(object) ); + ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( object.find_field_unordered("d"), NO_SUCH_FIELD ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result object = doc_result.find_field_unordered("outer"); + ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( object.find_field_unordered("d"), NO_SUCH_FIELD ); + return true; + })); + TEST_SUCCEED(); + } + + bool object_find_field() { + TEST_START(); + auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; + SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::object object; + ASSERT_SUCCESS( doc_result.get(object) ); + + ASSERT_EQUAL( object.find_field("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object.find_field("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object.find_field("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_ERROR( object.find_field("a"), NO_SUCH_FIELD ); + ASSERT_ERROR( object.find_field("d"), NO_SUCH_FIELD ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result object; + object = doc_result.get_object(); + + ASSERT_EQUAL( object.find_field("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object.find_field("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object.find_field("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_ERROR( object.find_field("a"), NO_SUCH_FIELD ); + ASSERT_ERROR( object.find_field("d"), NO_SUCH_FIELD ); + return true; + })); + TEST_SUCCEED(); + } + + bool document_object_find_field() { + TEST_START(); + auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; + SUBTEST("ondemand::document", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::document doc; + ASSERT_SUCCESS( std::move(doc_result).get(doc) ); + ASSERT_EQUAL( doc.find_field("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( doc.find_field("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( doc.find_field("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_ERROR( doc.find_field("a"), NO_SUCH_FIELD ); + ASSERT_ERROR( doc.find_field("d"), NO_SUCH_FIELD ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + ASSERT_EQUAL( doc_result.find_field("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( doc_result.find_field("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( doc_result.find_field("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_ERROR( doc_result.find_field("a"), NO_SUCH_FIELD ); + ASSERT_ERROR( doc_result.find_field("d"), NO_SUCH_FIELD ); + return true; + })); + TEST_SUCCEED(); + } + + bool value_object_find_field() { + TEST_START(); + auto json = R"({ "outer": { "a": 1, "b": 2, "c/d": 3 } })"_padded; + SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::value object; + ASSERT_SUCCESS( doc_result.find_field("outer").get(object) ); + ASSERT_EQUAL( object.find_field("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object.find_field("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object.find_field("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_ERROR( object.find_field("a"), NO_SUCH_FIELD ); + ASSERT_ERROR( object.find_field("d"), NO_SUCH_FIELD ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result object = doc_result.find_field("outer"); + ASSERT_EQUAL( object.find_field("a").get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object.find_field("b").get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object.find_field("c/d").get_uint64().value_unsafe(), 3 ); + + ASSERT_ERROR( object.find_field("a"), NO_SUCH_FIELD ); + ASSERT_ERROR( object.find_field("d"), NO_SUCH_FIELD ); + return true; + })); + TEST_SUCCEED(); + } + + bool run() { + return + object_find_field_unordered() && + document_object_find_field_unordered() && + value_object_find_field_unordered() && + object_find_field() && + document_object_find_field() && + value_object_find_field() && + true; + } + +} // namespace object_tests + +int main(int argc, char *argv[]) { + return test_main(argc, argv, object_tests::run); +} diff --git a/tests/ondemand/ondemand_object_index_tests.cpp b/tests/ondemand/ondemand_object_index_tests.cpp new file mode 100644 index 0000000000..fa7ce14c6c --- /dev/null +++ b/tests/ondemand/ondemand_object_index_tests.cpp @@ -0,0 +1,432 @@ +#include "simdjson.h" +#include "test_ondemand.h" + +using namespace simdjson; + +namespace object_tests { + using namespace std; + using simdjson::ondemand::json_type; + + bool object_index() { + TEST_START(); + auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; + SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::object object; + ASSERT_SUCCESS( doc_result.get(object) ); + + ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( object["d"], NO_SUCH_FIELD ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result object; + object = doc_result.get_object(); + + ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( object["d"], NO_SUCH_FIELD ); + return true; + })); + TEST_SUCCEED(); + } + + bool document_object_index() { + TEST_START(); + auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; + SUBTEST("ondemand::document", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::document doc; + ASSERT_SUCCESS( std::move(doc_result).get(doc) ); + ASSERT_EQUAL( doc["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( doc["b"].get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( doc["c/d"].get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( doc["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( doc["d"], NO_SUCH_FIELD ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + ASSERT_EQUAL( doc_result["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( doc_result["b"].get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( doc_result["c/d"].get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( doc_result["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( doc_result["d"], NO_SUCH_FIELD ); + return true; + })); + TEST_SUCCEED(); + } + + bool value_object_index() { + TEST_START(); + auto json = R"({ "outer": { "a": 1, "b": 2, "c/d": 3 } })"_padded; + SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::value object; + ASSERT_SUCCESS( doc_result["outer"].get(object) ); + ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( object["d"], NO_SUCH_FIELD ); + return true; + })); + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result object = doc_result["outer"]; + ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 ); + ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 ); + + ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); + ASSERT_ERROR( object["d"], NO_SUCH_FIELD ); + return true; + })); + TEST_SUCCEED(); + } + + bool document_nested_object_index() { + TEST_START(); + auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + ASSERT_EQUAL( doc_result["x"]["y"]["z"].get_uint64().value_unsafe(), 2 ); + return true; + })); + SUBTEST("ondemand::document", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::document doc; + ASSERT_SUCCESS( std::move(doc_result).get(doc) ); + ASSERT_EQUAL( doc["x"]["y"]["z"].get_uint64().value_unsafe(), 2 ); + return true; + })); + TEST_SUCCEED(); + } + + bool nested_object_index() { + TEST_START(); + auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result object = doc_result.get_object(); + ASSERT_EQUAL( object["x"]["y"]["z"].get_uint64().value_unsafe(), 2 ); + return true; + })); + SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::object object; + ASSERT_SUCCESS( doc_result.get(object) ); + ASSERT_EQUAL( object["x"]["y"]["z"].get_uint64().value_unsafe(), 2 ); + return true; + })); + TEST_SUCCEED(); + } + + bool value_nested_object_index() { + TEST_START(); + auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + simdjson_result x = doc_result["x"]; + ASSERT_EQUAL( x["y"]["z"].get_uint64().value_unsafe(), 2 ); + return true; + })); + SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::value x; + ASSERT_SUCCESS( doc_result["x"].get(x) ); + ASSERT_EQUAL( x["y"]["z"].get_uint64().value_unsafe(), 2 ); + return true; + })); + TEST_SUCCEED(); + } + + bool object_index_partial_children() { + TEST_START(); + auto json = R"( + { + "scalar_ignore": 0, + "empty_array_ignore": [], + "empty_object_ignore": {}, + "object_break": { "x": 3, "y": 33 }, + "object_break_unused": { "x": 4, "y": 44 }, + "object_index": { "x": 5, "y": 55 }, + "object_index_unused": { "x": 6, "y": 66 }, + "array_break": [ 7, 77, 777 ], + "array_break_unused": [ 8, 88, 888 ], + "quadruple_nested_break": { "a": [ { "b": [ 9, 99 ], "c": 999 }, 9999 ], "d": 99999 }, + "actual_value": 10 + } + )"_padded; + SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::object object; + ASSERT_SUCCESS( doc_result.get(object) ); + + ASSERT_SUCCESS( object["scalar_ignore"] ); + std::cout << " - After ignoring empty scalar ..." << std::endl; + + ASSERT_SUCCESS( object["empty_array_ignore"] ); + std::cout << " - After ignoring empty array ..." << std::endl; + + ASSERT_SUCCESS( object["empty_object_ignore"] ); + std::cout << " - After ignoring empty object ..." << std::endl; + + // Break after using first value in child object + { + auto value = object["object_break"]; + for (auto [ child_field, error ] : value.get_object()) { + ASSERT_SUCCESS(error); + ASSERT_EQUAL(child_field.key(), "x"); + uint64_t x; + ASSERT_SUCCESS( child_field.value().get(x) ); + ASSERT_EQUAL(x, 3); + break; // Break after the first value + } + std::cout << " - After using first value in child object ..." << std::endl; + } + + // Break without using first value in child object + { + auto value = object["object_break_unused"]; + for (auto [ child_field, error ] : value.get_object()) { + ASSERT_SUCCESS(error); + ASSERT_EQUAL(child_field.key(), "x"); + break; + } + std::cout << " - After reaching (but not using) first value in child object ..." << std::endl; + } + + // Only look up one field in child object + { + auto value = object["object_index"]; + + uint64_t x; + ASSERT_SUCCESS( value["x"].get(x) ); + ASSERT_EQUAL( x, 5 ); + std::cout << " - After looking up one field in child object ..." << std::endl; + } + + // Only look up one field in child object, but don't use it + { + auto value = object["object_index_unused"]; + + ASSERT_SUCCESS( value["x"] ); + std::cout << " - After looking up (but not using) one field in child object ..." << std::endl; + } + + // Break after first value in child array + { + auto value = object["array_break"]; + + for (auto child_value : value) { + uint64_t x; + ASSERT_SUCCESS( child_value.get(x) ); + ASSERT_EQUAL( x, 7 ); + break; + } + std::cout << " - After using first value in child array ..." << std::endl; + } + + // Break without using first value in child array + { + auto value = object["array_break_unused"]; + + for (auto child_value : value) { + ASSERT_SUCCESS(child_value); + break; + } + std::cout << " - After reaching (but not using) first value in child array ..." << std::endl; + } + + // Break out of multiple child loops + { + auto value = object["quadruple_nested_break"]; + for (auto child1 : value.get_object()) { + for (auto child2 : child1.value().get_array()) { + for (auto child3 : child2.get_object()) { + for (auto child4 : child3.value().get_array()) { + uint64_t x; + ASSERT_SUCCESS( child4.get(x) ); + ASSERT_EQUAL( x, 9 ); + break; + } + break; + } + break; + } + break; + } + std::cout << " - After breaking out of quadruply-nested arrays and objects ..." << std::endl; + } + + // Test the actual value + { + auto value = object["actual_value"]; + uint64_t actual_value; + ASSERT_SUCCESS( value.get(actual_value) ); + ASSERT_EQUAL( actual_value, 10 ); + } + + return true; + })); + + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + ASSERT_SUCCESS( doc_result["scalar_ignore"] ); + std::cout << " - After ignoring empty scalar ..." << std::endl; + + ASSERT_SUCCESS( doc_result["empty_array_ignore"] ); + std::cout << " - After ignoring empty array ..." << std::endl; + + ASSERT_SUCCESS( doc_result["empty_object_ignore"] ); + std::cout << " - After ignoring empty doc_result ..." << std::endl; + + // Break after using first value in child object + { + auto value = doc_result["object_break"]; + for (auto [ child_field, error ] : value.get_object()) { + ASSERT_SUCCESS(error); + ASSERT_EQUAL(child_field.key(), "x"); + uint64_t x; + ASSERT_SUCCESS( child_field.value().get(x) ); + ASSERT_EQUAL(x, 3); + break; // Break after the first value + } + std::cout << " - After using first value in child object ..." << std::endl; + } + + // Break without using first value in child object + { + auto value = doc_result["object_break_unused"]; + for (auto [ child_field, error ] : value.get_object()) { + ASSERT_SUCCESS(error); + ASSERT_EQUAL(child_field.key(), "x"); + break; + } + std::cout << " - After reaching (but not using) first value in child object ..." << std::endl; + } + + // Only look up one field in child object + { + auto value = doc_result["object_index"]; + + uint64_t x; + ASSERT_SUCCESS( value["x"].get(x) ); + ASSERT_EQUAL( x, 5 ); + std::cout << " - After looking up one field in child object ..." << std::endl; + } + + // Only look up one field in child object, but don't use it + { + auto value = doc_result["object_index_unused"]; + + ASSERT_SUCCESS( value["x"] ); + std::cout << " - After looking up (but not using) one field in child object ..." << std::endl; + } + + // Break after first value in child array + { + auto value = doc_result["array_break"]; + + for (auto child_value : value) { + uint64_t x; + ASSERT_SUCCESS( child_value.get(x) ); + ASSERT_EQUAL( x, 7 ); + break; + } + std::cout << " - After using first value in child array ..." << std::endl; + } + + // Break without using first value in child array + { + auto value = doc_result["array_break_unused"]; + + for (auto child_value : value) { + ASSERT_SUCCESS(child_value); + break; + } + std::cout << " - After reaching (but not using) first value in child array ..." << std::endl; + } + + // Break out of multiple child loops + { + auto value = doc_result["quadruple_nested_break"]; + for (auto child1 : value.get_object()) { + for (auto child2 : child1.value().get_array()) { + for (auto child3 : child2.get_object()) { + for (auto child4 : child3.value().get_array()) { + uint64_t x; + ASSERT_SUCCESS( child4.get(x) ); + ASSERT_EQUAL( x, 9 ); + break; + } + break; + } + break; + } + break; + } + std::cout << " - After breaking out of quadruply-nested arrays and objects ..." << std::endl; + } + + // Test the actual value + { + auto value = doc_result["actual_value"]; + uint64_t actual_value; + ASSERT_SUCCESS( value.get(actual_value) ); + ASSERT_EQUAL( actual_value, 10 ); + } + + return true; + })); + + return true; + } + +#if SIMDJSON_EXCEPTIONS + + bool object_index_exception() { + TEST_START(); + auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; + SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { + ondemand::object object = doc_result; + + ASSERT_EQUAL( uint64_t(object["a"]), 1 ); + ASSERT_EQUAL( uint64_t(object["b"]), 2 ); + ASSERT_EQUAL( uint64_t(object["c/d"]), 3 ); + + return true; + })); + TEST_SUCCEED(); + } + bool nested_object_index_exception() { + TEST_START(); + auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; + SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { + ASSERT_EQUAL( uint64_t(doc_result["x"]["y"]["z"]), 2 ); + return true; + })); + TEST_SUCCEED(); + } + +#endif // SIMDJSON_EXCEPTIONS + + bool run() { + return + object_index() && + document_object_index() && + value_object_index() && + nested_object_index() && + document_nested_object_index() && + value_nested_object_index() && + object_index_partial_children() && +#if SIMDJSON_EXCEPTIONS + object_index_exception() && + nested_object_index_exception() && +#endif // SIMDJSON_EXCEPTIONS + true; + } + +} // namespace object_tests + +int main(int argc, char *argv[]) { + return test_main(argc, argv, object_tests::run); +} diff --git a/tests/ondemand/ondemand_object_tests.cpp b/tests/ondemand/ondemand_object_tests.cpp index c36cbc6bd1..c8e87a0b4d 100644 --- a/tests/ondemand/ondemand_object_tests.cpp +++ b/tests/ondemand/ondemand_object_tests.cpp @@ -406,247 +406,6 @@ namespace object_tests { return true; } - bool object_index_partial_children() { - TEST_START(); - auto json = R"( - { - "scalar_ignore": 0, - "empty_array_ignore": [], - "empty_object_ignore": {}, - "object_break": { "x": 3, "y": 33 }, - "object_break_unused": { "x": 4, "y": 44 }, - "object_index": { "x": 5, "y": 55 }, - "object_index_unused": { "x": 6, "y": 66 }, - "array_break": [ 7, 77, 777 ], - "array_break_unused": [ 8, 88, 888 ], - "quadruple_nested_break": { "a": [ { "b": [ 9, 99 ], "c": 999 }, 9999 ], "d": 99999 }, - "actual_value": 10 - } - )"_padded; - SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::object object; - ASSERT_SUCCESS( doc_result.get(object) ); - - ASSERT_SUCCESS( object["scalar_ignore"] ); - std::cout << " - After ignoring empty scalar ..." << std::endl; - - ASSERT_SUCCESS( object["empty_array_ignore"] ); - std::cout << " - After ignoring empty array ..." << std::endl; - - ASSERT_SUCCESS( object["empty_object_ignore"] ); - std::cout << " - After ignoring empty object ..." << std::endl; - - // Break after using first value in child object - { - auto value = object["object_break"]; - for (auto [ child_field, error ] : value.get_object()) { - ASSERT_SUCCESS(error); - ASSERT_EQUAL(child_field.key(), "x"); - uint64_t x; - ASSERT_SUCCESS( child_field.value().get(x) ); - ASSERT_EQUAL(x, 3); - break; // Break after the first value - } - std::cout << " - After using first value in child object ..." << std::endl; - } - - // Break without using first value in child object - { - auto value = object["object_break_unused"]; - for (auto [ child_field, error ] : value.get_object()) { - ASSERT_SUCCESS(error); - ASSERT_EQUAL(child_field.key(), "x"); - break; - } - std::cout << " - After reaching (but not using) first value in child object ..." << std::endl; - } - - // Only look up one field in child object - { - auto value = object["object_index"]; - - uint64_t x; - ASSERT_SUCCESS( value["x"].get(x) ); - ASSERT_EQUAL( x, 5 ); - std::cout << " - After looking up one field in child object ..." << std::endl; - } - - // Only look up one field in child object, but don't use it - { - auto value = object["object_index_unused"]; - - ASSERT_SUCCESS( value["x"] ); - std::cout << " - After looking up (but not using) one field in child object ..." << std::endl; - } - - // Break after first value in child array - { - auto value = object["array_break"]; - - for (auto child_value : value) { - uint64_t x; - ASSERT_SUCCESS( child_value.get(x) ); - ASSERT_EQUAL( x, 7 ); - break; - } - std::cout << " - After using first value in child array ..." << std::endl; - } - - // Break without using first value in child array - { - auto value = object["array_break_unused"]; - - for (auto child_value : value) { - ASSERT_SUCCESS(child_value); - break; - } - std::cout << " - After reaching (but not using) first value in child array ..." << std::endl; - } - - // Break out of multiple child loops - { - auto value = object["quadruple_nested_break"]; - for (auto child1 : value.get_object()) { - for (auto child2 : child1.value().get_array()) { - for (auto child3 : child2.get_object()) { - for (auto child4 : child3.value().get_array()) { - uint64_t x; - ASSERT_SUCCESS( child4.get(x) ); - ASSERT_EQUAL( x, 9 ); - break; - } - break; - } - break; - } - break; - } - std::cout << " - After breaking out of quadruply-nested arrays and objects ..." << std::endl; - } - - // Test the actual value - { - auto value = object["actual_value"]; - uint64_t actual_value; - ASSERT_SUCCESS( value.get(actual_value) ); - ASSERT_EQUAL( actual_value, 10 ); - } - - return true; - })); - - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - ASSERT_SUCCESS( doc_result["scalar_ignore"] ); - std::cout << " - After ignoring empty scalar ..." << std::endl; - - ASSERT_SUCCESS( doc_result["empty_array_ignore"] ); - std::cout << " - After ignoring empty array ..." << std::endl; - - ASSERT_SUCCESS( doc_result["empty_object_ignore"] ); - std::cout << " - After ignoring empty doc_result ..." << std::endl; - - // Break after using first value in child object - { - auto value = doc_result["object_break"]; - for (auto [ child_field, error ] : value.get_object()) { - ASSERT_SUCCESS(error); - ASSERT_EQUAL(child_field.key(), "x"); - uint64_t x; - ASSERT_SUCCESS( child_field.value().get(x) ); - ASSERT_EQUAL(x, 3); - break; // Break after the first value - } - std::cout << " - After using first value in child object ..." << std::endl; - } - - // Break without using first value in child object - { - auto value = doc_result["object_break_unused"]; - for (auto [ child_field, error ] : value.get_object()) { - ASSERT_SUCCESS(error); - ASSERT_EQUAL(child_field.key(), "x"); - break; - } - std::cout << " - After reaching (but not using) first value in child object ..." << std::endl; - } - - // Only look up one field in child object - { - auto value = doc_result["object_index"]; - - uint64_t x; - ASSERT_SUCCESS( value["x"].get(x) ); - ASSERT_EQUAL( x, 5 ); - std::cout << " - After looking up one field in child object ..." << std::endl; - } - - // Only look up one field in child object, but don't use it - { - auto value = doc_result["object_index_unused"]; - - ASSERT_SUCCESS( value["x"] ); - std::cout << " - After looking up (but not using) one field in child object ..." << std::endl; - } - - // Break after first value in child array - { - auto value = doc_result["array_break"]; - - for (auto child_value : value) { - uint64_t x; - ASSERT_SUCCESS( child_value.get(x) ); - ASSERT_EQUAL( x, 7 ); - break; - } - std::cout << " - After using first value in child array ..." << std::endl; - } - - // Break without using first value in child array - { - auto value = doc_result["array_break_unused"]; - - for (auto child_value : value) { - ASSERT_SUCCESS(child_value); - break; - } - std::cout << " - After reaching (but not using) first value in child array ..." << std::endl; - } - - // Break out of multiple child loops - { - auto value = doc_result["quadruple_nested_break"]; - for (auto child1 : value.get_object()) { - for (auto child2 : child1.value().get_array()) { - for (auto child3 : child2.get_object()) { - for (auto child4 : child3.value().get_array()) { - uint64_t x; - ASSERT_SUCCESS( child4.get(x) ); - ASSERT_EQUAL( x, 9 ); - break; - } - break; - } - break; - } - break; - } - std::cout << " - After breaking out of quadruply-nested arrays and objects ..." << std::endl; - } - - // Test the actual value - { - auto value = doc_result["actual_value"]; - uint64_t actual_value; - ASSERT_SUCCESS( value.get(actual_value) ); - ASSERT_EQUAL( actual_value, 10 ); - } - - return true; - })); - - return true; - } - bool iterate_empty_object() { TEST_START(); auto json = R"({})"_padded; @@ -669,60 +428,6 @@ namespace object_tests { TEST_SUCCEED(); } - bool object_index() { - TEST_START(); - auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; - SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::object object; - ASSERT_SUCCESS( doc_result.get(object) ); - - ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( object["d"], NO_SUCH_FIELD ); - return true; - })); - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - simdjson_result object; - object = doc_result.get_object(); - - ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( object["d"], NO_SUCH_FIELD ); - return true; - })); - TEST_SUCCEED(); - } - bool document_object_index() { - TEST_START(); - auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; - SUBTEST("ondemand::document", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::document doc; - ASSERT_SUCCESS( std::move(doc_result).get(doc) ); - ASSERT_EQUAL( doc["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( doc["b"].get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( doc["c/d"].get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( doc["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( doc["d"], NO_SUCH_FIELD ); - return true; - })); - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - ASSERT_EQUAL( doc_result["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( doc_result["b"].get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( doc_result["c/d"].get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( doc_result["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( doc_result["d"], NO_SUCH_FIELD ); - return true; - })); - TEST_SUCCEED(); - } bool value_search_unescaped_key() { TEST_START(); auto json = R"({"k\u0065y": 1})"_padded; @@ -760,32 +465,6 @@ namespace object_tests { })); TEST_SUCCEED(); } - bool value_object_index() { - TEST_START(); - auto json = R"({ "outer": { "a": 1, "b": 2, "c/d": 3 } })"_padded; - SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::value object; - ASSERT_SUCCESS( doc_result["outer"].get(object) ); - ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( object["d"], NO_SUCH_FIELD ); - return true; - })); - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - simdjson_result object = doc_result["outer"]; - ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object["b"].get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object["c/d"].get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( object["a"].get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( object["d"], NO_SUCH_FIELD ); - return true; - })); - TEST_SUCCEED(); - } bool issue_1480() { TEST_START(); @@ -862,223 +541,6 @@ namespace object_tests { TEST_SUCCEED(); } - - bool object_find_field_unordered() { - TEST_START(); - auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; - SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::object object; - ASSERT_SUCCESS( doc_result.get(object) ); - - ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( object.find_field_unordered("d"), NO_SUCH_FIELD ); - return true; - })); - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - simdjson_result object; - object = doc_result.get_object(); - - ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( object.find_field_unordered("d"), NO_SUCH_FIELD ); - return true; - })); - TEST_SUCCEED(); - } - - bool document_object_find_field_unordered() { - TEST_START(); - auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; - SUBTEST("ondemand::document", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::document doc; - ASSERT_SUCCESS( std::move(doc_result).get(doc) ); - ASSERT_EQUAL( doc.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( doc.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( doc.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( doc.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( doc.find_field_unordered("d"), NO_SUCH_FIELD ); - return true; - })); - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - ASSERT_EQUAL( doc_result.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( doc_result.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( doc_result.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( doc_result.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( doc_result.find_field_unordered("d"), NO_SUCH_FIELD ); - return true; - })); - TEST_SUCCEED(); - } - - bool value_object_find_field_unordered() { - TEST_START(); - auto json = R"({ "outer": { "a": 1, "b": 2, "c/d": 3 } })"_padded; - SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::value object; - ASSERT_SUCCESS( doc_result.find_field_unordered("outer").get(object) ); - ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( object.find_field_unordered("d"), NO_SUCH_FIELD ); - return true; - })); - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - simdjson_result object = doc_result.find_field_unordered("outer"); - ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object.find_field_unordered("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object.find_field_unordered("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_EQUAL( object.find_field_unordered("a").get_uint64().value_unsafe(), 1 ); - ASSERT_ERROR( object.find_field_unordered("d"), NO_SUCH_FIELD ); - return true; - })); - TEST_SUCCEED(); - } - - bool object_find_field() { - TEST_START(); - auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; - SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::object object; - ASSERT_SUCCESS( doc_result.get(object) ); - - ASSERT_EQUAL( object.find_field("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object.find_field("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object.find_field("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_ERROR( object.find_field("a"), NO_SUCH_FIELD ); - ASSERT_ERROR( object.find_field("d"), NO_SUCH_FIELD ); - return true; - })); - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - simdjson_result object; - object = doc_result.get_object(); - - ASSERT_EQUAL( object.find_field("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object.find_field("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object.find_field("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_ERROR( object.find_field("a"), NO_SUCH_FIELD ); - ASSERT_ERROR( object.find_field("d"), NO_SUCH_FIELD ); - return true; - })); - TEST_SUCCEED(); - } - - bool document_object_find_field() { - TEST_START(); - auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; - SUBTEST("ondemand::document", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::document doc; - ASSERT_SUCCESS( std::move(doc_result).get(doc) ); - ASSERT_EQUAL( doc.find_field("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( doc.find_field("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( doc.find_field("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_ERROR( doc.find_field("a"), NO_SUCH_FIELD ); - ASSERT_ERROR( doc.find_field("d"), NO_SUCH_FIELD ); - return true; - })); - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - ASSERT_EQUAL( doc_result.find_field("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( doc_result.find_field("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( doc_result.find_field("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_ERROR( doc_result.find_field("a"), NO_SUCH_FIELD ); - ASSERT_ERROR( doc_result.find_field("d"), NO_SUCH_FIELD ); - return true; - })); - TEST_SUCCEED(); - } - - bool value_object_find_field() { - TEST_START(); - auto json = R"({ "outer": { "a": 1, "b": 2, "c/d": 3 } })"_padded; - SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::value object; - ASSERT_SUCCESS( doc_result.find_field("outer").get(object) ); - ASSERT_EQUAL( object.find_field("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object.find_field("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object.find_field("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_ERROR( object.find_field("a"), NO_SUCH_FIELD ); - ASSERT_ERROR( object.find_field("d"), NO_SUCH_FIELD ); - return true; - })); - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - simdjson_result object = doc_result.find_field("outer"); - ASSERT_EQUAL( object.find_field("a").get_uint64().value_unsafe(), 1 ); - ASSERT_EQUAL( object.find_field("b").get_uint64().value_unsafe(), 2 ); - ASSERT_EQUAL( object.find_field("c/d").get_uint64().value_unsafe(), 3 ); - - ASSERT_ERROR( object.find_field("a"), NO_SUCH_FIELD ); - ASSERT_ERROR( object.find_field("d"), NO_SUCH_FIELD ); - return true; - })); - TEST_SUCCEED(); - } - - bool document_nested_object_index() { - TEST_START(); - auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - ASSERT_EQUAL( doc_result["x"]["y"]["z"].get_uint64().value_unsafe(), 2 ); - return true; - })); - SUBTEST("ondemand::document", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::document doc; - ASSERT_SUCCESS( std::move(doc_result).get(doc) ); - ASSERT_EQUAL( doc["x"]["y"]["z"].get_uint64().value_unsafe(), 2 ); - return true; - })); - TEST_SUCCEED(); - } - - bool nested_object_index() { - TEST_START(); - auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - simdjson_result object = doc_result.get_object(); - ASSERT_EQUAL( object["x"]["y"]["z"].get_uint64().value_unsafe(), 2 ); - return true; - })); - SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::object object; - ASSERT_SUCCESS( doc_result.get(object) ); - ASSERT_EQUAL( object["x"]["y"]["z"].get_uint64().value_unsafe(), 2 ); - return true; - })); - TEST_SUCCEED(); - } - - bool value_nested_object_index() { - TEST_START(); - auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - simdjson_result x = doc_result["x"]; - ASSERT_EQUAL( x["y"]["z"].get_uint64().value_unsafe(), 2 ); - return true; - })); - SUBTEST("ondemand::value", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::value x; - ASSERT_SUCCESS( doc_result["x"].get(x) ); - ASSERT_EQUAL( x["y"]["z"].get_uint64().value_unsafe(), 2 ); - return true; - })); - TEST_SUCCEED(); - } - #if SIMDJSON_EXCEPTIONS bool iterate_object_exception() { @@ -1134,30 +596,6 @@ namespace object_tests { TEST_SUCCEED(); } - bool object_index_exception() { - TEST_START(); - auto json = R"({ "a": 1, "b": 2, "c/d": 3})"_padded; - SUBTEST("ondemand::object", test_ondemand_doc(json, [&](auto doc_result) { - ondemand::object object = doc_result; - - ASSERT_EQUAL( uint64_t(object["a"]), 1 ); - ASSERT_EQUAL( uint64_t(object["b"]), 2 ); - ASSERT_EQUAL( uint64_t(object["c/d"]), 3 ); - - return true; - })); - TEST_SUCCEED(); - } - bool nested_object_index_exception() { - TEST_START(); - auto json = R"({ "x": { "y": { "z": 2 } } }})"_padded; - SUBTEST("simdjson_result", test_ondemand_doc(json, [&](auto doc_result) { - ASSERT_EQUAL( uint64_t(doc_result["x"]["y"]["z"]), 2 ); - return true; - })); - TEST_SUCCEED(); - } - #endif // SIMDJSON_EXCEPTIONS bool run() { @@ -1174,25 +612,10 @@ namespace object_tests { #endif iterate_object() && iterate_empty_object() && - object_index() && - document_object_index() && - value_object_index() && - object_find_field_unordered() && - document_object_find_field_unordered() && - value_object_find_field_unordered() && - object_find_field() && - document_object_find_field() && - value_object_find_field() && - nested_object_index() && - document_nested_object_index() && - value_nested_object_index() && iterate_object_partial_children() && - object_index_partial_children() && issue_1480() && #if SIMDJSON_EXCEPTIONS iterate_object_exception() && - object_index_exception() && - nested_object_index_exception() && #endif // SIMDJSON_EXCEPTIONS true; } diff --git a/tests/ondemand/ondemand_wrong_type_error_tests.cpp b/tests/ondemand/ondemand_wrong_type_error_tests.cpp index 972f3c4128..0f0d9aa2f5 100644 --- a/tests/ondemand/ondemand_wrong_type_error_tests.cpp +++ b/tests/ondemand/ondemand_wrong_type_error_tests.cpp @@ -15,7 +15,7 @@ namespace wrong_type_error_tests { return false; \ } \ { \ - padded_string a_json(std::string(R"({ "a": )") + JSON + " })"); \ + padded_string a_json(std::string(R"({ "a": )") + JSON + " }"); \ std::cout << R"(- Subtest: get_)" << (#TYPE) << "() - JSON: " << a_json << std::endl; \ if (!test_ondemand_doc(a_json, [&](auto doc_result) { \ ASSERT_ERROR( doc_result["a"].get_##TYPE(), (ERROR) ); \