Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ executors:
environment:
CXX: g++-8
CC: gcc-8
BUILD_FLAGS:
CMAKE_BUILD_FLAGS:
CTEST_FLAGS: --output-on-failure

gcc9:
Expand All @@ -20,7 +20,7 @@ executors:
environment:
CXX: g++-9
CC: gcc-9
BUILD_FLAGS:
CMAKE_BUILD_FLAGS:
CTEST_FLAGS: --output-on-failure

gcc10:
Expand All @@ -29,7 +29,7 @@ executors:
environment:
CXX: g++-10
CC: gcc-10
BUILD_FLAGS:
CMAKE_BUILD_FLAGS:
CTEST_FLAGS: --output-on-failure

clang10:
Expand All @@ -38,7 +38,7 @@ executors:
environment:
CXX: clang++-10
CC: clang-10
BUILD_FLAGS:
CMAKE_BUILD_FLAGS:
CTEST_FLAGS: --output-on-failure

clang9:
Expand All @@ -47,7 +47,7 @@ executors:
environment:
CXX: clang++-9
CC: clang-9
BUILD_FLAGS:
CMAKE_BUILD_FLAGS:
CTEST_FLAGS: --output-on-failure

clang6:
Expand All @@ -56,7 +56,7 @@ executors:
environment:
CXX: clang++-6.0
CC: clang-6.0
BUILD_FLAGS:
CMAKE_BUILD_FLAGS:
CTEST_FLAGS: --output-on-failure

# Reusable test commands (and initializer for clang 6)
Expand Down Expand Up @@ -191,7 +191,7 @@ jobs:
sanitize-gcc10:
description: Build and run tests on GCC 10 and AVX 2 with a cmake sanitize build
executor: gcc10
environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE=ON, BUILD_FLAGS: "", CTEST_FLAGS: --output-on-failure -LE explicitonly }
environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE=ON, CTEST_FLAGS: --output-on-failure -LE explicitonly }
steps: [ cmake_test ]
sanitize-clang10:
description: Build and run tests on clang 10 and AVX 2 with a cmake sanitize build
Expand All @@ -201,13 +201,21 @@ jobs:
threadsanitize-gcc10:
description: Build and run tests on GCC 10 and AVX 2 with a cmake sanitize build
executor: gcc10
environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE_THREADS=ON, BUILD_FLAGS: "", CTEST_FLAGS: --output-on-failure -LE explicitonly }
environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE_THREADS=ON, CTEST_FLAGS: --output-on-failure -LE explicitonly }
steps: [ cmake_test ]
threadsanitize-clang10:
description: Build and run tests on clang 10 and AVX 2 with a cmake sanitize build
executor: clang10
environment: { CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE_THREADS=ON, CTEST_FLAGS: --output-on-failure -LE explicitonly }
steps: [ cmake_test ]
nocheckeof-clang10:
description: Validate that when __SIMDJSON_CHECK_EOF=0, everything still succeeds
environment:
CXXFLAGS: -D__SIMDJSON_CHECK_EOF=0
CMAKE_BUILD_FLAGS: --target ondemand_tests
CTEST_FLAGS: --output-on-failure -R ondemand_
executor: clang10
steps: [ cmake_test ]
# dynamic
dynamic-gcc10:
description: Build and run tests on GCC 10 and AVX 2 with a cmake dynamic build
Expand Down Expand Up @@ -262,7 +270,7 @@ jobs:
sanitize-haswell-gcc10:
description: Build and run tests on GCC 10 and AVX 2 with a cmake sanitize build
executor: gcc10
environment: { CXXFLAGS: -march=haswell, CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE=ON, BUILD_FLAGS: "", CTEST_FLAGS: --output-on-failure -LE explicitonly }
environment: { CXXFLAGS: -march=haswell, CMAKE_FLAGS: -DBUILD_SHARED_LIBS=ON -DSIMDJSON_SANITIZE=ON, CTEST_FLAGS: --output-on-failure -LE explicitonly }
steps: [ cmake_test ]
sanitize-haswell-clang10:
description: Build and run tests on clang 10 and AVX 2 with a cmake sanitize build
Expand Down
8 changes: 8 additions & 0 deletions include/simdjson/common_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ char *to_chars(char *first, const char *last, double value);
* Defined in src/from_chars
*/
double from_chars(const char *first) noexcept;
double from_chars(const char *first, const char* end) noexcept;

}

#ifndef SIMDJSON_EXCEPTIONS
Expand Down Expand Up @@ -251,6 +253,12 @@ namespace std {
#endif
#endif

// Feature flag for partially-implemented "don't require padding" feature
// TODO remove once feature complete.
#ifndef __SIMDJSON_CHECK_EOF
# define __SIMDJSON_CHECK_EOF 1
#endif

#if SIMDJSON_CPLUSPLUS17
// if we have C++, then fallthrough is a default attribute
# define simdjson_fallthrough [[fallthrough]]
Expand Down
55 changes: 28 additions & 27 deletions include/simdjson/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,34 @@ namespace simdjson {
* All possible errors returned by simdjson.
*/
enum error_code {
SUCCESS = 0, ///< No error
CAPACITY, ///< This parser can't support a document that big
MEMALLOC, ///< Error allocating memory, most likely out of memory
TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error
DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation
STRING_ERROR, ///< Problem while parsing a string
T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't'
F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f'
N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, ///< Problem while parsing a number
UTF8_ERROR, ///< the input is not valid UTF-8
UNINITIALIZED, ///< unknown error, or uninitialized document
EMPTY, ///< no structural element found
UNESCAPED_CHARS, ///< found unescaped characters in a string.
UNCLOSED_STRING, ///< missing quote at the end
UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture
INCORRECT_TYPE, ///< JSON element has a different type than user expected
NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits
INDEX_OUT_OF_BOUNDS, ///< JSON array index too large
NO_SUCH_FIELD, ///< JSON field not found in object
IO_ERROR, ///< Error reading a file
INVALID_JSON_POINTER, ///< Invalid JSON pointer reference
INVALID_URI_FRAGMENT, ///< Invalid URI fragment
UNEXPECTED_ERROR, ///< indicative of a bug in simdjson
PARSER_IN_USE, ///< parser is already in use.
OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order
INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it.
SUCCESS = 0, ///< No error
CAPACITY, ///< This parser can't support a document that big
MEMALLOC, ///< Error allocating memory, most likely out of memory
TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error
DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation
STRING_ERROR, ///< Problem while parsing a string
T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't'
F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f'
N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n'
NUMBER_ERROR, ///< Problem while parsing a number
UTF8_ERROR, ///< the input is not valid UTF-8
UNINITIALIZED, ///< unknown error, or uninitialized document
EMPTY, ///< no structural element found
UNESCAPED_CHARS, ///< found unescaped characters in a string.
UNCLOSED_STRING, ///< missing quote at the end
UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture
INCORRECT_TYPE, ///< JSON element has a different type than user expected
NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits
INDEX_OUT_OF_BOUNDS, ///< JSON array index too large
NO_SUCH_FIELD, ///< JSON field not found in object
IO_ERROR, ///< Error reading a file
INVALID_JSON_POINTER, ///< Invalid JSON pointer reference
INVALID_URI_FRAGMENT, ///< Invalid URI fragment
UNEXPECTED_ERROR, ///< indicative of a bug in simdjson
PARSER_IN_USE, ///< parser is already in use.
OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order
INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it.
INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early.
NUM_ERROR_CODES
};

Expand Down
192 changes: 192 additions & 0 deletions include/simdjson/generic/numberparsing.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,20 @@ static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
// to handle that max may be a macro on windows).
return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
}
static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
*outDouble = simdjson::internal::from_chars(reinterpret_cast<const char *>(ptr), reinterpret_cast<const char *>(end_ptr));
// We do not accept infinite values.

// Detecting finite values in a portable manner is ridiculously hard, ideally
// we would want to do:
// return !std::isfinite(*outDouble);
// but that mysteriously fails under legacy/old libc++ libraries, see
// https://github.com/simdjson/simdjson/issues/1286
//
// Therefore, fall back to this solution (the extra parens are there
// to handle that max may be a macro on windows).
return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
}

// check quickly whether the next 8 chars are made of digits
// at a glance, it looks better than Mula's
Expand Down Expand Up @@ -709,6 +723,56 @@ simdjson_unused simdjson_really_inline simdjson_result<uint64_t> parse_unsigned(
return i;
}


// Parse any number from 0 to 18,446,744,073,709,551,615
// Never read at src_end or beyond
simdjson_unused simdjson_really_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
const uint8_t *p = src;
//
// Parse the integer part.
//
// PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
const uint8_t *const start_digits = p;
uint64_t i = 0;
while ((p != src_end) && parse_digit(*p, i)) { p++; }

// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
// Optimization note: size_t is expected to be unsigned.
size_t digit_count = size_t(p - start_digits);
// The longest positive 64-bit number is 20 digits.
// We do it this way so we don't trigger this branch unless we must.
// Optimization note: the compiler can probably merge
// ((digit_count == 0) || (digit_count > 20))
// into a single branch since digit_count is unsigned.
if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
// Here digit_count > 0.
if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
// We can do the following...
// if (!jsoncharutils::is_structural_or_whitespace(*p)) {
// return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
// }
// as a single table lookup:
if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }

if (digit_count == 20) {
// Positive overflow check:
// - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
// biggest uint64_t.
// - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
// If we got here, it's a 20 digit number starting with the digit "1".
// - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
// than 1,553,255,926,290,448,384.
// - That is smaller than the smallest possible 20-digit number the user could write:
// 10,000,000,000,000,000,000.
// - Therefore, if the number is positive and lower than that, it's overflow.
// - The value we are looking at is less than or equal to 9,223,372,036,854,775,808 (INT64_MAX).
//
if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
}

return i;
}

// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
simdjson_unused simdjson_really_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
//
Expand Down Expand Up @@ -751,6 +815,50 @@ simdjson_unused simdjson_really_inline simdjson_result<int64_t> parse_integer(co
return negative ? (~i+1) : i;
}

// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
// Never read at src_end or beyond
simdjson_unused simdjson_really_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
//
// Check for minus sign
//
if(src == src_end) { return NUMBER_ERROR; }
bool negative = (*src == '-');
const uint8_t *p = src + negative;

//
// Parse the integer part.
//
// PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
const uint8_t *const start_digits = p;
uint64_t i = 0;
while ((p != src_end) && parse_digit(*p, i)) { p++; }

// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
// Optimization note: size_t is expected to be unsigned.
size_t digit_count = size_t(p - start_digits);
// We go from
// -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
// so we can never represent numbers that have more than 19 digits.
size_t longest_digit_count = 19;
// Optimization note: the compiler can probably merge
// ((digit_count == 0) || (digit_count > longest_digit_count))
// into a single branch since digit_count is unsigned.
if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
// Here digit_count > 0.
if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
// We can do the following...
// if (!jsoncharutils::is_structural_or_whitespace(*p)) {
// return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
// }
// as a single table lookup:
if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
// Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
// Performance note: This check is only needed when digit_count == longest_digit_count but it is
// so cheap that we might as well always make it.
if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
return negative ? (~i+1) : i;
}

simdjson_unused simdjson_really_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
//
// Check for minus sign
Expand Down Expand Up @@ -828,6 +936,90 @@ simdjson_unused simdjson_really_inline simdjson_result<double> parse_double(cons
}
return d;
}


// Never read at src_end or beyond
simdjson_unused simdjson_really_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
if(src == src_end) { return NUMBER_ERROR; }
//
// Check for minus sign
//
bool negative = (*src == '-');
src += negative;

//
// Parse the integer part.
//
uint64_t i = 0;
const uint8_t *p = src;
if(p == src_end) { return NUMBER_ERROR; }
p += parse_digit(*p, i);
bool leading_zero = (i == 0);
while ((p != src_end) && parse_digit(*p, i)) { p++; }
// no integer digits, or 0123 (zero must be solo)
if ( p == src ) { return INCORRECT_TYPE; }
if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }

//
// Parse the decimal part.
//
int64_t exponent = 0;
bool overflow;
if (simdjson_likely((p != src_end) && (*p == '.'))) {
p++;
const uint8_t *start_decimal_digits = p;
if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits
p++;
while ((p != src_end) && parse_digit(*p, i)) { p++; }
exponent = -(p - start_decimal_digits);

// Overflow check. More than 19 digits (minus the decimal) may be overflow.
overflow = p-src-1 > 19;
if (simdjson_unlikely(overflow && leading_zero)) {
// Skip leading 0.00000 and see if it still overflows
const uint8_t *start_digits = src + 2;
while (*start_digits == '0') { start_digits++; }
overflow = start_digits-src > 19;
}
} else {
overflow = p-src > 19;
}

//
// Parse the exponent
//
if ((p != src_end) && (*p == 'e' || *p == 'E')) {
p++;
if(p == src_end) { return NUMBER_ERROR; }
bool exp_neg = *p == '-';
p += exp_neg || *p == '+';

uint64_t exp = 0;
const uint8_t *start_exp_digits = p;
while ((p != src_end) && parse_digit(*p, exp)) { p++; }
// no exp digits, or 20+ exp digits
if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }

exponent += exp_neg ? 0-exp : exp;
}

if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; }

overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;

//
// Assemble (or slow-parse) the float
//
double d;
if (simdjson_likely(!overflow)) {
if (compute_float_64(exponent, i, negative, d)) { return d; }
}
if (!parse_float_fallback(src-negative, src_end, &d)) {
return NUMBER_ERROR;
}
return d;
}

} //namespace {}
#endif // SIMDJSON_SKIPNUMBERPARSING

Expand Down
Loading