From 13345cab65952a182d0d6cdb90f90d120ed59d7b Mon Sep 17 00:00:00 2001 From: Pavel Novikov Date: Thu, 18 Sep 2025 21:28:20 +0300 Subject: [PATCH 01/38] added template overload for `integer_times_pow10()` --- include/fast_float/fast_float.h | 14 ++++++++ include/fast_float/parse_number.h | 57 +++++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index a190d7c8..eb822f58 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -63,6 +63,20 @@ integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept; FASTFLOAT_CONSTEXPR20 inline double integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept; +/** + * This function is a template overload of `integer_times_pow10()` + * that returns a floating-point value of type `T` that is one of + * supported floating-point types (e.g. `double`, `float`). + */ +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept; +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept; + /** * from_chars for integer types. */ diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index a44fef0b..d453c145 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -344,44 +344,79 @@ from_chars(UC const *first, UC const *last, T &value, int base) noexcept { return from_chars_advanced(first, last, value, options); } -FASTFLOAT_CONSTEXPR20 inline double -integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept { - double value; +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept { + T value; if (clinger_fast_path_impl(mantissa, decimal_exponent, false, value)) return value; adjusted_mantissa am = - compute_float>(decimal_exponent, mantissa); + compute_float>(decimal_exponent, mantissa); to_float(false, am, value); return value; } -FASTFLOAT_CONSTEXPR20 inline double -integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept { +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept { const bool is_negative = mantissa < 0; const uint64_t m = static_cast(is_negative ? -mantissa : mantissa); - double value; + T value; if (clinger_fast_path_impl(m, decimal_exponent, is_negative, value)) return value; - adjusted_mantissa am = - compute_float>(decimal_exponent, m); + adjusted_mantissa am = compute_float>(decimal_exponent, m); to_float(is_negative, am, value); return value; } +FASTFLOAT_CONSTEXPR20 inline double +integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(mantissa, decimal_exponent); +} + +FASTFLOAT_CONSTEXPR20 inline double +integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(mantissa, decimal_exponent); +} + // the following overloads are here to avoid surprising ambiguity for int, // unsigned, etc. +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value && + std::is_integral::value && + !std::is_signed::value, + T>::type + integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(static_cast(mantissa), + decimal_exponent); +} + +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value && + std::is_integral::value && + std::is_signed::value, + T>::type + integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(static_cast(mantissa), + decimal_exponent); +} + template -FASTFLOAT_CONSTEXPR20 inline typename std::enable_if< +FASTFLOAT_CONSTEXPR20 typename std::enable_if< std::is_integral::value && !std::is_signed::value, double>::type integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { return integer_times_pow10(static_cast(mantissa), decimal_exponent); } template -FASTFLOAT_CONSTEXPR20 inline typename std::enable_if< +FASTFLOAT_CONSTEXPR20 typename std::enable_if< std::is_integral::value && std::is_signed::value, double>::type integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { return integer_times_pow10(static_cast(mantissa), decimal_exponent); From 01e505797b8639a817e5f4d7e6b593b631f07e93 Mon Sep 17 00:00:00 2001 From: Pavel Novikov Date: Thu, 18 Sep 2025 21:28:50 +0300 Subject: [PATCH 02/38] added tests + some refactoring --- tests/basictest.cpp | 393 +++++++++++++++++++++++++++++++------------- 1 file changed, 283 insertions(+), 110 deletions(-) diff --git a/tests/basictest.cpp b/tests/basictest.cpp index dc117526..15ab344d 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -1507,9 +1507,20 @@ TEST_CASE("float.inf") { std::errc::result_out_of_range); verify("3.5028234666e38", std::numeric_limits::infinity(), std::errc::result_out_of_range); + // FLT_MAX + 0.00000007e38 + verify("3.40282357e38", std::numeric_limits::infinity(), + std::errc::result_out_of_range); + // FLT_MAX + 0.0000001e38 + verify("3.4028236e38", std::numeric_limits::infinity(), + std::errc::result_out_of_range); } TEST_CASE("float.general") { + // FLT_TRUE_MIN / 2 + verify("0.7006492e-45", 0.f, std::errc::result_out_of_range); + // FLT_TRUE_MIN / 2 + 0.0000001e-45 + verify("0.7006493e-45", 0x1p-149f); + // max verify("340282346638528859811704183484516925440", 0x1.fffffep+127f); // -max @@ -2086,12 +2097,11 @@ TEST_CASE("bfloat16.general") { } #endif -template -void verify_integer_multiplication_by_power_of_10(Int mantissa, - int decimal_exponent, - double expected) { - const double actual = - fast_float::integer_times_pow10(mantissa, decimal_exponent); +template +void verify_integer_times_pow10_result(Int mantissa, int decimal_exponent, + T actual, U expected) { + static_assert(std::is_same::value, + "expected and actual types must match"); INFO("m * 10^e=" << mantissa << " * 10^" << decimal_exponent << "\n" @@ -2105,45 +2115,173 @@ void verify_integer_multiplication_by_power_of_10(Int mantissa, CHECK_EQ(actual, expected); } -template -void verify_integer_multiplication_by_power_of_10(Int mantissa, - int decimal_exponent) { +template +T calculate_integer_times_pow10_expected_result(Int mantissa, + int decimal_exponent) { std::string constructed_string = std::to_string(mantissa) + "e" + std::to_string(decimal_exponent); - double expected_result; + T expected_result; const auto result = fast_float::from_chars( constructed_string.data(), constructed_string.data() + constructed_string.size(), expected_result); if (result.ec != std::errc()) INFO("Failed to parse: " << constructed_string); - verify_integer_multiplication_by_power_of_10(mantissa, decimal_exponent, - expected_result); + return expected_result; } +template +void verify_integer_times_pow10_dflt(Int mantissa, int decimal_exponent, + double expected) { + static_assert(std::is_integral::value); + + // the "default" overload + const double actual = + fast_float::integer_times_pow10(mantissa, decimal_exponent); + + verify_integer_times_pow10_result(mantissa, decimal_exponent, actual, + expected); +} + +template +void verify_integer_times_pow10_dflt(Int mantissa, int decimal_exponent) { + static_assert(std::is_integral::value); + + const auto expected_result = + calculate_integer_times_pow10_expected_result(mantissa, + decimal_exponent); + + verify_integer_times_pow10_dflt(mantissa, decimal_exponent, expected_result); +} + +template +void verify_integer_times_pow10(Int mantissa, int decimal_exponent, + T expected) { + static_assert(std::is_floating_point::value); + static_assert(std::is_integral::value); + + // explicit specialization + const auto actual = + fast_float::integer_times_pow10(mantissa, decimal_exponent); + + verify_integer_times_pow10_result(mantissa, decimal_exponent, actual, + expected); +} + +template +void verify_integer_times_pow10(Int mantissa, int decimal_exponent) { + static_assert(std::is_floating_point::value); + static_assert(std::is_integral::value); + + const auto expected_result = calculate_integer_times_pow10_expected_result( + mantissa, decimal_exponent); + + verify_integer_times_pow10(mantissa, decimal_exponent, expected_result); +} + +namespace all_supported_types { +template +void verify_integer_times_pow10(Int mantissa, int decimal_exponent) { + static_assert(std::is_integral::value); + + // verify the "default" overload + verify_integer_times_pow10_dflt(mantissa, decimal_exponent); + + // verify explicit specializations + ::verify_integer_times_pow10(mantissa, decimal_exponent); + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#if defined(__STDCPP_FLOAT64_T__) + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#endif +#if defined(__STDCPP_FLOAT32_T__) + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#endif +#if defined(__STDCPP_FLOAT16_T__) + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#endif +#if defined(__STDCPP_BFLOAT16_T__) + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#endif +} +} // namespace all_supported_types + TEST_CASE("integer_times_pow10") { - // explicitly verifying API with different types of integers - verify_integer_multiplication_by_power_of_10(31, -1, 3.1); - verify_integer_multiplication_by_power_of_10(-31, -1, -3.1); - verify_integer_multiplication_by_power_of_10(31, -1, 3.1); - verify_integer_multiplication_by_power_of_10(31415, -4, 3.1415); - verify_integer_multiplication_by_power_of_10(-31415, -4, -3.1415); - verify_integer_multiplication_by_power_of_10(31415, -4, 3.1415); - verify_integer_multiplication_by_power_of_10(314159265, -8, - 3.14159265); - verify_integer_multiplication_by_power_of_10(-314159265, -8, - -3.14159265); - verify_integer_multiplication_by_power_of_10(3141592653, -9, - 3.141592653); - verify_integer_multiplication_by_power_of_10( - 3141592653589793238, -18, 3.141592653589793238); - verify_integer_multiplication_by_power_of_10( - -3141592653589793238, -18, -3.141592653589793238); - verify_integer_multiplication_by_power_of_10( - 3141592653589793238, -18, 3.141592653589793238); - verify_integer_multiplication_by_power_of_10( - -3141592653589793238, -18, -3.141592653589793238); - verify_integer_multiplication_by_power_of_10( + /* explicitly verifying API with different types of integers */ + // double (the "default" overload) + verify_integer_times_pow10_dflt(31, -1, 3.1); + verify_integer_times_pow10_dflt(-31, -1, -3.1); + verify_integer_times_pow10_dflt(31, -1, 3.1); + verify_integer_times_pow10_dflt(31415, -4, 3.1415); + verify_integer_times_pow10_dflt(-31415, -4, -3.1415); + verify_integer_times_pow10_dflt(31415, -4, 3.1415); + verify_integer_times_pow10_dflt(314159265, -8, 3.14159265); + verify_integer_times_pow10_dflt(-314159265, -8, -3.14159265); + verify_integer_times_pow10_dflt(3141592653, -9, 3.141592653); + verify_integer_times_pow10_dflt(314159265, -8, 3.14159265); + verify_integer_times_pow10_dflt(-314159265, -8, -3.14159265); + verify_integer_times_pow10_dflt(3141592653, -9, 3.141592653); + verify_integer_times_pow10_dflt(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10_dflt(-3141592653589793238, -18, + -3.141592653589793238); + verify_integer_times_pow10_dflt(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10_dflt(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10_dflt(-3141592653589793238, -18, + -3.141592653589793238); + verify_integer_times_pow10_dflt(3141592653589793238, -18, + 3.141592653589793238); + // double (explicit specialization) + verify_integer_times_pow10(31, -1, 3.1); + verify_integer_times_pow10(-31, -1, -3.1); + verify_integer_times_pow10(31, -1, 3.1); + verify_integer_times_pow10(31415, -4, 3.1415); + verify_integer_times_pow10(-31415, -4, -3.1415); + verify_integer_times_pow10(31415, -4, 3.1415); + verify_integer_times_pow10(314159265, -8, 3.14159265); + verify_integer_times_pow10(-314159265, -8, -3.14159265); + verify_integer_times_pow10(3141592653, -9, 3.141592653); + verify_integer_times_pow10(314159265, -8, 3.14159265); + verify_integer_times_pow10(-314159265, -8, -3.14159265); + verify_integer_times_pow10(3141592653, -9, + 3.141592653); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10(-3141592653589793238, -18, + -3.141592653589793238); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10(-3141592653589793238, -18, + -3.141592653589793238); + verify_integer_times_pow10( 3141592653589793238, -18, 3.141592653589793238); + // float (explicit specialization) + verify_integer_times_pow10(31, -1, 3.1f); + verify_integer_times_pow10(-31, -1, -3.1f); + verify_integer_times_pow10(31, -1, 3.1f); + verify_integer_times_pow10(31415, -4, 3.1415f); + verify_integer_times_pow10(-31415, -4, -3.1415f); + verify_integer_times_pow10(31415, -4, 3.1415f); + verify_integer_times_pow10(314159265, -8, 3.14159265f); + verify_integer_times_pow10(-314159265, -8, -3.14159265f); + verify_integer_times_pow10(3141592653, -9, 3.14159265f); + verify_integer_times_pow10(314159265, -8, 3.14159265f); + verify_integer_times_pow10(-314159265, -8, -3.14159265f); + verify_integer_times_pow10(3141592653, -9, 3.14159265f); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238f); + verify_integer_times_pow10(-3141592653589793238, -18, + -3.141592653589793238f); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238f); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238f); + verify_integer_times_pow10(-3141592653589793238, -18, + -3.141592653589793238f); + verify_integer_times_pow10( + 3141592653589793238, -18, 3.141592653589793238f); for (int mode : {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, FE_TONEAREST}) { fesetround(mode); @@ -2153,87 +2291,122 @@ TEST_CASE("integer_times_pow10") { ~Guard() { fesetround(FE_TONEAREST); } } guard; - verify_integer_multiplication_by_power_of_10(0, 0); - verify_integer_multiplication_by_power_of_10(1, 0); - verify_integer_multiplication_by_power_of_10(0, 1); - verify_integer_multiplication_by_power_of_10(1, 1); - verify_integer_multiplication_by_power_of_10(-1, 0); - verify_integer_multiplication_by_power_of_10(0, -1); - verify_integer_multiplication_by_power_of_10(-1, -1); - verify_integer_multiplication_by_power_of_10(-1, 1); - verify_integer_multiplication_by_power_of_10(1, -1); - - verify_integer_multiplication_by_power_of_10( + namespace all = all_supported_types; + + all::verify_integer_times_pow10(0, 0); + all::verify_integer_times_pow10(1, 0); + all::verify_integer_times_pow10(0, 1); + all::verify_integer_times_pow10(1, 1); + all::verify_integer_times_pow10(-1, 0); + all::verify_integer_times_pow10(0, -1); + all::verify_integer_times_pow10(-1, -1); + all::verify_integer_times_pow10(-1, 1); + all::verify_integer_times_pow10(1, -1); + + /* denormal min */ + verify_integer_times_pow10_dflt(49406564584124654, -340, + std::numeric_limits::denorm_min()); + verify_integer_times_pow10( 49406564584124654, -340, std::numeric_limits::denorm_min()); - verify_integer_multiplication_by_power_of_10( - 22250738585072014, -324, std::numeric_limits::min()); - verify_integer_multiplication_by_power_of_10( - 17976931348623158, 292, std::numeric_limits::max()); - - // DBL_TRUE_MIN / 2 underflows to 0 - verify_integer_multiplication_by_power_of_10(49406564584124654 / 2, -340, - 0.); - - // DBL_TRUE_MIN / 2 + 0.0000000000000001e-324 rounds to DBL_TRUE_MIN - verify_integer_multiplication_by_power_of_10( + verify_integer_times_pow10(14012984, -52, + std::numeric_limits::denorm_min()); + + /* normal min */ + verify_integer_times_pow10_dflt(22250738585072014, -324, + std::numeric_limits::min()); + verify_integer_times_pow10(22250738585072014, -324, + std::numeric_limits::min()); + verify_integer_times_pow10(11754944, -45, + std::numeric_limits::min()); + + /* max */ + verify_integer_times_pow10_dflt(17976931348623158, 292, + std::numeric_limits::max()); + verify_integer_times_pow10(17976931348623158, 292, + std::numeric_limits::max()); + verify_integer_times_pow10(34028235, 31, + std::numeric_limits::max()); + + /* underflow */ + // (DBL_TRUE_MIN / 2) underflows to 0 + verify_integer_times_pow10_dflt(49406564584124654 / 2, -340, 0.); + verify_integer_times_pow10(49406564584124654 / 2, -340, 0.); + // (FLT_TRUE_MIN / 2) underflows to 0 + verify_integer_times_pow10(14012984 / 2, -52, 0.f); + + /* rounding to denormal min */ + // (DBL_TRUE_MIN / 2 + 0.0000000000000001e-324) rounds to DBL_TRUE_MIN + verify_integer_times_pow10_dflt(49406564584124654 / 2 + 1, -340, + std::numeric_limits::denorm_min()); + verify_integer_times_pow10( 49406564584124654 / 2 + 1, -340, std::numeric_limits::denorm_min()); - - // DBL_MAX + 0.0000000000000001e308 overflows to infinity - verify_integer_multiplication_by_power_of_10( - 17976931348623158 + 1, 292, std::numeric_limits::infinity()); - // DBL_MAX + 0.00000000000000001e308 overflows to infinity - verify_integer_multiplication_by_power_of_10( - 179769313486231580 + 1, 291, std::numeric_limits::infinity()); + // (FLT_TRUE_MIN / 2 + 0.0000001e-45) rounds to FLT_TRUE_MIN + verify_integer_times_pow10(14012984 / 2 + 1, -52, + std::numeric_limits::denorm_min()); + + /* overflow */ + // (DBL_MAX + 0.0000000000000001e308) overflows to infinity + verify_integer_times_pow10_dflt(17976931348623158 + 1, 292, + std::numeric_limits::infinity()); + verify_integer_times_pow10(17976931348623158 + 1, 292, + std::numeric_limits::infinity()); + // (DBL_MAX + 0.00000000000000001e308) overflows to infinity + verify_integer_times_pow10_dflt(179769313486231580 + 1, 291, + std::numeric_limits::infinity()); + verify_integer_times_pow10(179769313486231580 + 1, 291, + std::numeric_limits::infinity()); + // (FLT_MAX + 0.0000001e38) overflows to infinity + verify_integer_times_pow10(34028235 + 1, 31, + std::numeric_limits::infinity()); + // (FLT_MAX + 0.00000007e38) overflows to infinity + verify_integer_times_pow10(340282350 + 7, 30, + std::numeric_limits::infinity()); // loosely verifying correct rounding of 1 to 64 bits // worth of significant digits - verify_integer_multiplication_by_power_of_10(1, 42); - verify_integer_multiplication_by_power_of_10(1, -42); - verify_integer_multiplication_by_power_of_10(12, 42); - verify_integer_multiplication_by_power_of_10(12, -42); - verify_integer_multiplication_by_power_of_10(123, 42); - verify_integer_multiplication_by_power_of_10(123, -42); - verify_integer_multiplication_by_power_of_10(1234, 42); - verify_integer_multiplication_by_power_of_10(1234, -42); - verify_integer_multiplication_by_power_of_10(12345, 42); - verify_integer_multiplication_by_power_of_10(12345, -42); - verify_integer_multiplication_by_power_of_10(123456, 42); - verify_integer_multiplication_by_power_of_10(123456, -42); - verify_integer_multiplication_by_power_of_10(1234567, 42); - verify_integer_multiplication_by_power_of_10(1234567, -42); - verify_integer_multiplication_by_power_of_10(12345678, 42); - verify_integer_multiplication_by_power_of_10(12345678, -42); - verify_integer_multiplication_by_power_of_10(123456789, 42); - verify_integer_multiplication_by_power_of_10(1234567890, 42); - verify_integer_multiplication_by_power_of_10(1234567890, -42); - verify_integer_multiplication_by_power_of_10(12345678901, 42); - verify_integer_multiplication_by_power_of_10(12345678901, -42); - verify_integer_multiplication_by_power_of_10(123456789012, 42); - verify_integer_multiplication_by_power_of_10(123456789012, -42); - verify_integer_multiplication_by_power_of_10(1234567890123, 42); - verify_integer_multiplication_by_power_of_10(1234567890123, -42); - verify_integer_multiplication_by_power_of_10(12345678901234, 42); - verify_integer_multiplication_by_power_of_10(12345678901234, -42); - verify_integer_multiplication_by_power_of_10(123456789012345, 42); - verify_integer_multiplication_by_power_of_10(123456789012345, -42); - verify_integer_multiplication_by_power_of_10(1234567890123456, 42); - verify_integer_multiplication_by_power_of_10(1234567890123456, -42); - verify_integer_multiplication_by_power_of_10(12345678901234567, 42); - verify_integer_multiplication_by_power_of_10(12345678901234567, -42); - verify_integer_multiplication_by_power_of_10(123456789012345678, 42); - verify_integer_multiplication_by_power_of_10(123456789012345678, -42); - verify_integer_multiplication_by_power_of_10(1234567890123456789, 42); - verify_integer_multiplication_by_power_of_10(1234567890123456789, -42); - verify_integer_multiplication_by_power_of_10(12345678901234567890ull, 42); - verify_integer_multiplication_by_power_of_10(12345678901234567890ull, -42); - verify_integer_multiplication_by_power_of_10( - std::numeric_limits::max(), 42); - verify_integer_multiplication_by_power_of_10( - std::numeric_limits::max(), -42); - verify_integer_multiplication_by_power_of_10( - std::numeric_limits::max(), 42); - verify_integer_multiplication_by_power_of_10( - std::numeric_limits::max(), -42); + all::verify_integer_times_pow10(1, 42); + all::verify_integer_times_pow10(1, -42); + all::verify_integer_times_pow10(12, 42); + all::verify_integer_times_pow10(12, -42); + all::verify_integer_times_pow10(123, 42); + all::verify_integer_times_pow10(123, -42); + all::verify_integer_times_pow10(1234, 42); + all::verify_integer_times_pow10(1234, -42); + all::verify_integer_times_pow10(12345, 42); + all::verify_integer_times_pow10(12345, -42); + all::verify_integer_times_pow10(123456, 42); + all::verify_integer_times_pow10(123456, -42); + all::verify_integer_times_pow10(1234567, 42); + all::verify_integer_times_pow10(1234567, -42); + all::verify_integer_times_pow10(12345678, 42); + all::verify_integer_times_pow10(12345678, -42); + all::verify_integer_times_pow10(123456789, 42); + all::verify_integer_times_pow10(1234567890, 42); + all::verify_integer_times_pow10(1234567890, -42); + all::verify_integer_times_pow10(12345678901, 42); + all::verify_integer_times_pow10(12345678901, -42); + all::verify_integer_times_pow10(123456789012, 42); + all::verify_integer_times_pow10(123456789012, -42); + all::verify_integer_times_pow10(1234567890123, 42); + all::verify_integer_times_pow10(1234567890123, -42); + all::verify_integer_times_pow10(12345678901234, 42); + all::verify_integer_times_pow10(12345678901234, -42); + all::verify_integer_times_pow10(123456789012345, 42); + all::verify_integer_times_pow10(123456789012345, -42); + all::verify_integer_times_pow10(1234567890123456, 42); + all::verify_integer_times_pow10(1234567890123456, -42); + all::verify_integer_times_pow10(12345678901234567, 42); + all::verify_integer_times_pow10(12345678901234567, -42); + all::verify_integer_times_pow10(123456789012345678, 42); + all::verify_integer_times_pow10(123456789012345678, -42); + all::verify_integer_times_pow10(1234567890123456789, 42); + all::verify_integer_times_pow10(1234567890123456789, -42); + all::verify_integer_times_pow10(12345678901234567890ull, 42); + all::verify_integer_times_pow10(12345678901234567890ull, -42); + all::verify_integer_times_pow10(std::numeric_limits::max(), 42); + all::verify_integer_times_pow10(std::numeric_limits::max(), -42); + all::verify_integer_times_pow10(std::numeric_limits::max(), 42); + all::verify_integer_times_pow10(std::numeric_limits::max(), -42); } } \ No newline at end of file From e4b949e55b7b29229966b7d19b15477abd21ae4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jeremy=20B=C3=ADcha?= Date: Wed, 24 Sep 2025 16:08:34 -0400 Subject: [PATCH 03/38] README.md: update repology badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8fdddbc5..b3dba46c 100644 --- a/README.md +++ b/README.md @@ -443,7 +443,7 @@ framework](https://github.com/microsoft/LightGBM). Packages ------ -[![Packaging status](https://repology.org/badge/vertical-allrepos/fastfloat.svg)](https://repology.org/project/fastfloat/versions) +[![Packaging status](https://repology.org/badge/vertical-allrepos/fast-float.svg)](https://repology.org/project/fast-float/versions) ## References From 7abb574ffc55080713b528b6b0e271c541f07a53 Mon Sep 17 00:00:00 2001 From: Pavel Novikov Date: Mon, 29 Sep 2025 13:00:28 +0300 Subject: [PATCH 04/38] added doc to README and examples --- README.md | 17 +++++++++++++++++ tests/example_integer_times_pow10.cpp | 26 +++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8fdddbc5..1c7e7964 100644 --- a/README.md +++ b/README.md @@ -401,6 +401,23 @@ except `fast_float::integer_times_pow10()` does not report out-of-range errors, underflows to zero or overflows to infinity when the resulting value is out of range. +You can use template overloads to get the result converted to different +supported floating-point types: `float`, `double`, etc. +For example, to get result as `float` use +`fast_float::integer_times_pow10()` specialization: +```C++ +const uint64_t W = 1234567; +const int Q = 23; +const double result = fast_float::integer_times_pow10(W, Q); +std::cout.precision(7); +std::cout << "float: " << W << " * 10^" << Q << " = " << result << " (" + << (result == 1234567e23f ? "==" : "!=") << "expected)\n"; +``` +outputs +``` +float: 1234567 * 10^23 = 1.234567e+29 (==expected) +``` + Overloads of `fast_float::integer_times_pow10()` are provided for signed and unsigned integer types: `int64_t`, `uint64_t`, etc. diff --git a/tests/example_integer_times_pow10.cpp b/tests/example_integer_times_pow10.cpp index 3e86826c..785daeca 100644 --- a/tests/example_integer_times_pow10.cpp +++ b/tests/example_integer_times_pow10.cpp @@ -2,7 +2,7 @@ #include -int main() { +void default_overload() { const uint64_t W = 12345678901234567; const int Q = 23; const double result = fast_float::integer_times_pow10(W, Q); @@ -10,3 +10,27 @@ int main() { std::cout << W << " * 10^" << Q << " = " << result << " (" << (result == 12345678901234567e23 ? "==" : "!=") << "expected)\n"; } + +void double_specialization() { + const uint64_t W = 12345678901234567; + const int Q = 23; + const double result = fast_float::integer_times_pow10(W, Q); + std::cout.precision(17); + std::cout << "double: " << W << " * 10^" << Q << " = " << result << " (" + << (result == 12345678901234567e23 ? "==" : "!=") << "expected)\n"; +} + +void float_specialization() { + const uint64_t W = 1234567; + const int Q = 23; + const double result = fast_float::integer_times_pow10(W, Q); + std::cout.precision(7); + std::cout << "float: " << W << " * 10^" << Q << " = " << result << " (" + << (result == 1234567e23f ? "==" : "!=") << "expected)\n"; +} + +int main() { + default_overload(); + double_specialization(); + float_specialization(); +} From e9438e64ba6e537c9e0051b210183231aa27a6e3 Mon Sep 17 00:00:00 2001 From: Pavel Novikov Date: Mon, 29 Sep 2025 19:54:22 +0300 Subject: [PATCH 05/38] fixed copy&paste error and minor mess --- README.md | 10 +++++----- tests/example_integer_times_pow10.cpp | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1c7e7964..7a06fb06 100644 --- a/README.md +++ b/README.md @@ -406,16 +406,16 @@ supported floating-point types: `float`, `double`, etc. For example, to get result as `float` use `fast_float::integer_times_pow10()` specialization: ```C++ -const uint64_t W = 1234567; +const uint64_t W = 12345678; const int Q = 23; -const double result = fast_float::integer_times_pow10(W, Q); -std::cout.precision(7); +const float result = fast_float::integer_times_pow10(W, Q); +std::cout.precision(9); std::cout << "float: " << W << " * 10^" << Q << " = " << result << " (" - << (result == 1234567e23f ? "==" : "!=") << "expected)\n"; + << (result == 12345678e23f ? "==" : "!=") << "expected)\n"; ``` outputs ``` -float: 1234567 * 10^23 = 1.234567e+29 (==expected) +float: 12345678 * 10^23 = 1.23456782e+30 (==expected) ``` Overloads of `fast_float::integer_times_pow10()` are provided for diff --git a/tests/example_integer_times_pow10.cpp b/tests/example_integer_times_pow10.cpp index 785daeca..0205c275 100644 --- a/tests/example_integer_times_pow10.cpp +++ b/tests/example_integer_times_pow10.cpp @@ -21,12 +21,12 @@ void double_specialization() { } void float_specialization() { - const uint64_t W = 1234567; + const uint64_t W = 12345678; const int Q = 23; - const double result = fast_float::integer_times_pow10(W, Q); - std::cout.precision(7); + const float result = fast_float::integer_times_pow10(W, Q); + std::cout.precision(9); std::cout << "float: " << W << " * 10^" << Q << " = " << result << " (" - << (result == 1234567e23f ? "==" : "!=") << "expected)\n"; + << (result == 12345678e23f ? "==" : "!=") << "expected)\n"; } int main() { From 197c0ffca7c0252581cd73549102f00b834aa341 Mon Sep 17 00:00:00 2001 From: Pavel Novikov Date: Mon, 29 Sep 2025 21:41:56 +0300 Subject: [PATCH 06/38] clang format --- include/fast_float/float_common.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 62d199ca..46d2f1eb 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -198,12 +198,16 @@ using parse_options = parse_options_t; #ifndef FASTFLOAT_ASSERT #define FASTFLOAT_ASSERT(x) \ - { ((void)(x)); } + { \ + ((void)(x)); \ + } #endif #ifndef FASTFLOAT_DEBUG_ASSERT #define FASTFLOAT_DEBUG_ASSERT(x) \ - { ((void)(x)); } + { \ + ((void)(x)); \ + } #endif // rust style `try!()` macro, or `?` operator From fd98fd668931ba1583e3fd851997d6614e7f21a5 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 29 Sep 2025 14:14:01 -0400 Subject: [PATCH 07/38] specialize for std::float32_t and std::float64_t explicitly credit: @lemire --- include/fast_float/float_common.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 46d2f1eb..34eaa017 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -1170,6 +1170,9 @@ static_assert(std::is_same, uint64_t>::value, static_assert( std::numeric_limits::is_iec559, "std::float64_t must fulfill the requirements of IEC 559 (IEEE 754)"); + +template <> +struct binary_format : public binary_format {}; #endif // __STDCPP_FLOAT64_T__ #ifdef __STDCPP_FLOAT32_T__ @@ -1178,6 +1181,9 @@ static_assert(std::is_same, uint32_t>::value, static_assert( std::numeric_limits::is_iec559, "std::float32_t must fulfill the requirements of IEC 559 (IEEE 754)"); + +template <> +struct binary_format : public binary_format {}; #endif // __STDCPP_FLOAT32_T__ #ifdef __STDCPP_FLOAT16_T__ @@ -1249,7 +1255,6 @@ constexpr chars_format adjust_for_feature_macros(chars_format fmt) { ; } } // namespace detail - } // namespace fast_float #endif From 7262d9454ea59af171645f1c71b438a99fb66415 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 29 Sep 2025 15:08:24 -0400 Subject: [PATCH 08/38] lint --- include/fast_float/float_common.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 34eaa017..1316be33 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -198,16 +198,12 @@ using parse_options = parse_options_t; #ifndef FASTFLOAT_ASSERT #define FASTFLOAT_ASSERT(x) \ - { \ - ((void)(x)); \ - } + { ((void)(x)); } #endif #ifndef FASTFLOAT_DEBUG_ASSERT #define FASTFLOAT_DEBUG_ASSERT(x) \ - { \ - ((void)(x)); \ - } + { ((void)(x)); } #endif // rust style `try!()` macro, or `?` operator From 1ea4d2563e9d3aea40464edde7ba5cb7980723f0 Mon Sep 17 00:00:00 2001 From: Pavel Novikov Date: Tue, 30 Sep 2025 12:16:21 +0300 Subject: [PATCH 09/38] made function non-template +fixed a couple of typos --- include/fast_float/digit_comparison.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index d7ef3d9a..03e70dcc 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -38,11 +38,8 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL, // this algorithm is not even close to optimized, but it has no practical // effect on performance: in order to have a faster algorithm, we'd need // to slow down performance for faster algorithms, and this is still fast. -template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t -scientific_exponent(parsed_number_string_t &num) noexcept { - uint64_t mantissa = num.mantissa; - int32_t exponent = int32_t(num.exponent); +scientific_exponent(uint64_t mantissa, int32_t exponent) noexcept { while (mantissa >= 10000) { mantissa /= 10000; exponent += 4; @@ -398,7 +395,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp))); } - // compare digits, and use it to director rounding + // compare digits, and use it to direct rounding int ord = real_digits.compare(theor_digits); adjusted_mantissa answer = am; round(answer, [ord](adjusted_mantissa &a, int32_t shift) { @@ -419,7 +416,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( return answer; } -// parse the significant digits as a big integer to unambiguously round the +// parse the significant digits as a big integer to unambiguously round // the significant digits. here, we are trying to determine how to round // an extended float representation close to `b+h`, halfway between `b` // (the float rounded-down) and `b+u`, the next positive float. this @@ -438,7 +435,8 @@ digit_comp(parsed_number_string_t &num, adjusted_mantissa am) noexcept { // remove the invalid exponent bias am.power2 -= invalid_am_bias; - int32_t sci_exp = scientific_exponent(num); + int32_t sci_exp = + scientific_exponent(num.mantissa, static_cast(num.exponent)); size_t max_digits = binary_format::max_digits(); size_t digits = 0; bigint bigmant; From 88f6c5e3670f728d172fa828af3d10dd88cccb03 Mon Sep 17 00:00:00 2001 From: Pavel Novikov Date: Sat, 4 Oct 2025 14:35:17 +0300 Subject: [PATCH 10/38] Added corner cases around max value/infinity --- tests/basictest.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/basictest.cpp b/tests/basictest.cpp index 15ab344d..1a5537bb 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -1142,6 +1142,15 @@ TEST_CASE("double.inf") { // DBL_MAX + 0.0000000000000001e308 verify("1.7976931348623159e308", std::numeric_limits::infinity(), std::errc::result_out_of_range); + + // ( (2 - 0.5*2^(−52)) * 2^1023 ) smallest number that overflows to infinity + verify("179769313486231580793728971405303415079934132710037826936173778980444" + "968292764750946649017977587207096330286416692887910946555547851940402" + "630657488671505820681908902000708383676273854845817711531764475730270" + "069855571366959622842914819860834936475292719074168444365510704342711" + "559699508093042880177904174497792", + std::numeric_limits::infinity(), + std::errc::result_out_of_range); } TEST_CASE("double.general") { @@ -1333,6 +1342,15 @@ TEST_CASE("double.general") { std::numeric_limits::infinity(), std::errc::result_out_of_range); verify("-2240084132271013504.131248280843119943687942846658579428", -0x1.f1660a65b00bfp+60); + + // ( (2 - 0.5*2^(−52)) * 2^1023 - 1 ) largest 309 decimal digit number + // that rounds to DBL_MAX + verify("179769313486231580793728971405303415079934132710037826936173778980444" + "968292764750946649017977587207096330286416692887910946555547851940402" + "630657488671505820681908902000708383676273854845817711531764475730270" + "069855571366959622842914819860834936475292719074168444365510704342711" + "559699508093042880177904174497791", + std::numeric_limits::max()); } TEST_CASE("double.decimal_point") { @@ -1513,6 +1531,11 @@ TEST_CASE("float.inf") { // FLT_MAX + 0.0000001e38 verify("3.4028236e38", std::numeric_limits::infinity(), std::errc::result_out_of_range); + + // ( (2 - 0.5*2^(-23)) * 2^127 ) smallest number that overflows to infinity + verify("340282356779733661637539395458142568448", + std::numeric_limits::infinity(), + std::errc::result_out_of_range); } TEST_CASE("float.general") { @@ -1526,6 +1549,11 @@ TEST_CASE("float.general") { // -max verify("-340282346638528859811704183484516925440", -0x1.fffffep+127f); + // ( (2 - 0.5*2^(-23)) * 2^127 - 1 ) largest 39 decimal digits number + // that rounds to FLT_MAX + verify("340282356779733661637539395458142568447", + std::numeric_limits::max()); + verify("-1e-999", -0.0f, std::errc::result_out_of_range); verify("1." "175494140627517859246175898662808184331245864732796240031385942718174" From e77e2bca7ec343bb10999b8438e69cd2185228d7 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 10 Oct 2025 09:23:01 -0400 Subject: [PATCH 11/38] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 4ad0ca82..9a44e5d7 100644 --- a/README.md +++ b/README.md @@ -615,6 +615,11 @@ long digits. The library includes code adapted from Google Wuffs (written by Nigel Tao) which was originally published under the Apache 2.0 license. +## Stars + + +[![Star History Chart](https://api.star-history.com/svg?repos=fastfloat/fast_float&type=Date)](https://www.star-history.com/#fastfloat/fast_float&Date) + ## License From 8ac72791a903ef90cf0297d989036da67ecb17b6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Oct 2025 00:14:47 +0000 Subject: [PATCH 12/38] Bump the github-actions group across 1 directory with 4 updates Bumps the github-actions group with 4 updates in the / directory: [actions/checkout](https://github.com/actions/checkout), [actions/upload-artifact](https://github.com/actions/upload-artifact), [github/codeql-action](https://github.com/github/codeql-action) and [actions/setup-node](https://github.com/actions/setup-node). Updates `actions/checkout` from 4 to 5 - [Release notes](https://github.com/actions/checkout/releases) - [Commits](https://github.com/actions/checkout/compare/v4...v5) Updates `actions/upload-artifact` from 4 to 5 - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v4...v5) Updates `github/codeql-action` from 3 to 4 - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/v3...v4) Updates `actions/setup-node` from 4.4.0 to 6.0.0 - [Release notes](https://github.com/actions/setup-node/releases) - [Commits](https://github.com/actions/setup-node/compare/49933ea5288caeca8642d1e84afbd3f7d6820020...2028fbc5c25fe9cf00d9f06a71cc4710d4507903) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions - dependency-name: actions/upload-artifact dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions - dependency-name: github/codeql-action dependency-version: '4' dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions - dependency-name: actions/setup-node dependency-version: 6.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions ... Signed-off-by: dependabot[bot] --- .github/workflows/cifuzz.yml | 4 ++-- .github/workflows/emscripten.yml | 2 +- .github/workflows/risc.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index fac5cee3..bd9e1e6c 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -20,14 +20,14 @@ jobs: fuzz-seconds: 300 output-sarif: true - name: Upload Crash - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 if: failure() && steps.build.outcome == 'success' with: name: artifacts path: ./out/artifacts - name: Upload Sarif if: always() && steps.build.outcome == 'success' - uses: github/codeql-action/upload-sarif@v3 + uses: github/codeql-action/upload-sarif@v4 with: # Path to SARIF file relative to the root of the repository sarif_file: cifuzz-sarif/results.sarif diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml index 1b00f447..399f0c9e 100644 --- a/.github/workflows/emscripten.yml +++ b/.github/workflows/emscripten.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2 - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 + - uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 - uses: mymindstorm/setup-emsdk@6ab9eb1bda2574c4ddb79809fc9247783eaf9021 # v14 - name: Verify run: emcc -v diff --git a/.github/workflows/risc.yml b/.github/workflows/risc.yml index 68e26cb4..8bc85588 100644 --- a/.github/workflows/risc.yml +++ b/.github/workflows/risc.yml @@ -6,7 +6,7 @@ jobs: build: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install packages run: | sudo apt-get update -q -y From 409d6215b4641547ab9765eaccde3cf5075b3482 Mon Sep 17 00:00:00 2001 From: Raine 'Gravecat' Simmons Date: Sat, 22 Nov 2025 16:11:06 +0000 Subject: [PATCH 13/38] Fixes compilation on GCC/MinGW --- include/fast_float/float_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 1316be33..8605e902 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -407,7 +407,7 @@ full_multiplication(uint64_t a, uint64_t b) { answer.high = __umulh(a, b); answer.low = a * b; #elif defined(FASTFLOAT_32BIT) || \ - (defined(_WIN64) && !defined(__clang__) && !defined(_M_ARM64)) + (defined(_WIN64) && !defined(__clang__) && !defined(_M_ARM64) && !defined(__GNUC__)) answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 #elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__) __uint128_t r = ((__uint128_t)a) * b; From 9d78a01ff7addc33e9fc7a3186c350e6b1165e1d Mon Sep 17 00:00:00 2001 From: Raine 'Gravecat' Simmons Date: Sat, 22 Nov 2025 21:53:37 +0000 Subject: [PATCH 14/38] Fixed formatting with clang-format --- include/fast_float/float_common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 8605e902..18484a66 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -406,8 +406,8 @@ full_multiplication(uint64_t a, uint64_t b) { // But MinGW on ARM64 doesn't have native support for 64-bit multiplications answer.high = __umulh(a, b); answer.low = a * b; -#elif defined(FASTFLOAT_32BIT) || \ - (defined(_WIN64) && !defined(__clang__) && !defined(_M_ARM64) && !defined(__GNUC__)) +#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__) && \ + !defined(_M_ARM64) && !defined(__GNUC__)) answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 #elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__) __uint128_t r = ((__uint128_t)a) * b; From 6b72e26ba798da7273d8af67a372dd7c55f9c809 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 15 Dec 2025 10:28:06 -0500 Subject: [PATCH 15/38] documenting better which types we support --- README.md | 4 +- tests/fast_int.cpp | 195 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9a44e5d7..b039e634 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,9 @@ requires C++11): from_chars_result from_chars(char const *first, char const *last, float &value, ...); from_chars_result from_chars(char const *first, char const *last, double &value, ...); ``` +If they are available on your system, we also support fixed-width floating-point types such as `std::float64_t`, `std::float32_t`, `std::float16_t`, and `std::bfloat16_t`. -You can also parse integer types: - +You can also parse integer types such as `char`, `short`, `long`, `long long`, `unsigned char`, `unsigned short`, `unsigned long`, `unsigned long long`, `bool` (0/1), `int8_t`, `int16_t`, `int32_t`, `int64_t`, `uint8_t`, `uint16_t`, `uint32_t`, `uint64_t`. ```C++ from_chars_result from_chars(char const *first, char const *last, int &value, ...); from_chars_result from_chars(char const *first, char const *last, unsigned &value, ...); diff --git a/tests/fast_int.cpp b/tests/fast_int.cpp index 49044d36..94e76fdb 100644 --- a/tests/fast_int.cpp +++ b/tests/fast_int.cpp @@ -95,6 +95,201 @@ int main() { } } + // char basic test + std::vector const char_basic_test_expected{0, 10, 40, 100, 9}; + std::vector const char_basic_test{"0", "10 ", "40", + "100 with text", "9.999"}; + + for (std::size_t i = 0; i < char_basic_test.size(); ++i) { + auto const f = char_basic_test[i]; + char result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + + if (answer.ec != std::errc()) { + std::cerr << "could not convert to char for input: \"" << f + << "\" because of invalid argument" << std::endl; + return EXIT_FAILURE; + } else if (result != char_basic_test_expected[i]) { + std::cerr << "result \"" << f << "\" did not match with expected char: " + << static_cast(char_basic_test_expected[i]) << std::endl; + return EXIT_FAILURE; + } + } + + // short basic test + std::vector const short_basic_test_expected{0, 10, -40, 1001, 9}; + std::vector const short_basic_test{ + "0", "10 ", "-40", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < short_basic_test.size(); ++i) { + auto const f = short_basic_test[i]; + short result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + + if (answer.ec != std::errc()) { + std::cerr << "could not convert to short for input: \"" << f + << "\" because of invalid argument" << std::endl; + return EXIT_FAILURE; + } else if (result != short_basic_test_expected[i]) { + std::cerr << "result \"" << f << "\" did not match with expected short: " + << short_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // long basic test + std::vector const long_basic_test_expected{0, 10, -40, 1001, 9}; + std::vector const long_basic_test{ + "0", "10 ", "-40", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < long_basic_test.size(); ++i) { + auto const f = long_basic_test[i]; + long result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + + if (answer.ec != std::errc()) { + std::cerr << "could not convert to long for input: \"" << f + << "\" because of invalid argument" << std::endl; + return EXIT_FAILURE; + } else if (result != long_basic_test_expected[i]) { + std::cerr << "result \"" << f << "\" did not match with expected long: " + << long_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // long long basic test + std::vector const long_long_basic_test_expected{0, 10, -40, 1001, + 9}; + std::vector const long_long_basic_test{ + "0", "10 ", "-40", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < long_long_basic_test.size(); ++i) { + auto const f = long_long_basic_test[i]; + long long result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + + if (answer.ec != std::errc()) { + std::cerr << "could not convert to long long for input: \"" << f + << "\" because of invalid argument" << std::endl; + return EXIT_FAILURE; + } else if (result != long_long_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected long long: " + << long_long_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // unsigned char basic test + std::vector const unsigned_char_basic_test_expected{0, 10, 100, + 9}; + std::vector const unsigned_char_basic_test{ + "0", "10 ", "100 with text", "9.999"}; + + for (std::size_t i = 0; i < unsigned_char_basic_test.size(); ++i) { + auto const &f = unsigned_char_basic_test[i]; + unsigned char result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to unsigned char for input: \"" << f + << "\"" << std::endl; + return EXIT_FAILURE; + } else if (result != unsigned_char_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected unsigned char: " + << static_cast(unsigned_char_basic_test_expected[i]) + << std::endl; + return EXIT_FAILURE; + } + } + + // unsigned short basic test + std::vector const unsigned_short_basic_test_expected{0, 10, + 1001, 9}; + std::vector const unsigned_short_basic_test{ + "0", "10 ", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < unsigned_short_basic_test.size(); ++i) { + auto const &f = unsigned_short_basic_test[i]; + unsigned short result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to unsigned short for input: \"" << f + << "\"" << std::endl; + return EXIT_FAILURE; + } else if (result != unsigned_short_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected unsigned short: " + << unsigned_short_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // unsigned long basic test + std::vector const unsigned_long_basic_test_expected{0, 10, + 1001, 9}; + std::vector const unsigned_long_basic_test{ + "0", "10 ", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < unsigned_long_basic_test.size(); ++i) { + auto const &f = unsigned_long_basic_test[i]; + unsigned long result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to unsigned long for input: \"" << f + << "\"" << std::endl; + return EXIT_FAILURE; + } else if (result != unsigned_long_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected unsigned long: " + << unsigned_long_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // unsigned long long basic test + std::vector const unsigned_long_long_basic_test_expected{ + 0, 10, 1001, 9}; + std::vector const unsigned_long_long_basic_test{ + "0", "10 ", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < unsigned_long_long_basic_test.size(); ++i) { + auto const &f = unsigned_long_long_basic_test[i]; + unsigned long long result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to unsigned long long for input: \"" << f + << "\"" << std::endl; + return EXIT_FAILURE; + } else if (result != unsigned_long_long_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected unsigned long long: " + << unsigned_long_long_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // bool basic test + std::vector const bool_basic_test_expected{false, true}; + std::vector const bool_basic_test{"0", "1"}; + + for (std::size_t i = 0; i < bool_basic_test.size(); ++i) { + auto const &f = bool_basic_test[i]; + bool result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to bool for input: \"" << f << "\"" + << std::endl; + return EXIT_FAILURE; + } else if (result != bool_basic_test_expected[i]) { + std::cerr << "result \"" << f << "\" did not match with expected bool: " + << (bool_basic_test_expected[i] ? "true" : "false") + << std::endl; + return EXIT_FAILURE; + } + } + // int invalid error test std::vector const int_invalid_argument_test{ "text", "text with 1002", "+50", " 50"}; From 1cc6cf5a09c54628ea21a2c212ff449169e476eb Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 16 Dec 2025 20:18:15 -0500 Subject: [PATCH 16/38] Revise reference for Mushtak and Lemire paper Updated reference to include publication details and link. --- script/mushtak_lemire.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/script/mushtak_lemire.py b/script/mushtak_lemire.py index 46c8c645..f03715cd 100644 --- a/script/mushtak_lemire.py +++ b/script/mushtak_lemire.py @@ -1,6 +1,6 @@ # # Reference : -# Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to appear) +# Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback, Software: Practice and Experience 53 (6), 2023 https://arxiv.org/abs/2212.06644 # all_tqs = [] @@ -74,8 +74,8 @@ def convergents(cf): for _, w in convergents(continued_fraction(tq, 2 ** 137)): if w >= 2 ** 64: break - if (tq * w) % 2 ** 137 > 2 ** 137 - 2 ** 64: - print(f"SOLUTION: q={j-342} T[q]={tq} w={w}") - found_solution = True + if (tq * w) % 2 ** 137 > 2 ** 137 - 2 ** 64: + print(f"SOLUTION: q={j-342} T[q]={tq} w={w}") + found_solution = True if not found_solution: print("No solutions!") From b2535ce14c375aa31a7420b058858b725ce38b8c Mon Sep 17 00:00:00 2001 From: Arthur Chan Date: Fri, 19 Dec 2025 03:26:50 +0000 Subject: [PATCH 17/38] oss-fuzz: Add unit test build Signed-off-by: Arthur Chan --- fuzz/build.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fuzz/build.sh b/fuzz/build.sh index 5cbe87aa..cce114d8 100644 --- a/fuzz/build.sh +++ b/fuzz/build.sh @@ -5,4 +5,8 @@ $CXX $CFLAGS $CXXFLAGS \ -c $SRC/fast_float/fuzz/from_chars.cc -o from_chars.o $CXX $CFLAGS $CXXFLAGS $LIB_FUZZING_ENGINE from_chars.o \ - -o $OUT/from_chars \ No newline at end of file + -o $OUT/from_chars + +# Build unit tests +cmake -DFASTFLOAT_TEST=ON -DCMAKE_EXE_LINKER_FLAGS="-lpthread" +make From 0fa058eebbba98135d2f712c02dfca2003f2328c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 11:52:48 -0500 Subject: [PATCH 18/38] adding bench_ip Co-authored-by: Shikhar --- benchmarks/CMakeLists.txt | 20 +- benchmarks/apple_arm_events.h | 1117 -------------------------------- benchmarks/bench_ip.cpp | 183 ++++++ benchmarks/benchmark.cpp | 4 +- benchmarks/event_counter.h | 181 ------ benchmarks/linux-perf-events.h | 104 --- 6 files changed, 204 insertions(+), 1405 deletions(-) delete mode 100644 benchmarks/apple_arm_events.h create mode 100644 benchmarks/bench_ip.cpp delete mode 100644 benchmarks/event_counter.h delete mode 100644 benchmarks/linux-perf-events.h diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index b4e03954..2f7336b9 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -1,9 +1,27 @@ +include(FetchContent) + +FetchContent_Declare( + counters + GIT_REPOSITORY https://github.com/lemire/counters.git + GIT_TAG v1.0.1 +) + +FetchContent_MakeAvailable(counters) + add_executable(realbenchmark benchmark.cpp) +target_link_libraries(realbenchmark PRIVATE Counters::counters) +add_executable(bench_ip bench_ip.cpp) +target_link_libraries(bench_ip PRIVATE Counters::counters) + set_property( TARGET realbenchmark PROPERTY CXX_STANDARD 17) - +set_property( + TARGET bench_ip + PROPERTY CXX_STANDARD 17) target_link_libraries(realbenchmark PUBLIC fast_float) +target_link_libraries(bench_ip PUBLIC fast_float) + include(ExternalProject) # Define the external project diff --git a/benchmarks/apple_arm_events.h b/benchmarks/apple_arm_events.h deleted file mode 100644 index f127d14d..00000000 --- a/benchmarks/apple_arm_events.h +++ /dev/null @@ -1,1117 +0,0 @@ -// Original design from: -// ============================================================================= -// XNU kperf/kpc -// Available for 64-bit Intel/Apple Silicon, macOS/iOS, with root privileges -// -// References: -// -// XNU source (since xnu 2422.1.72): -// https://github.com/apple/darwin-xnu/blob/main/osfmk/kern/kpc.h -// https://github.com/apple/darwin-xnu/blob/main/bsd/kern/kern_kpc.c -// -// Lightweight PET (Profile Every Thread, since xnu 3789.1.32): -// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/pet.c -// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/kperf_kpc.c -// -// System Private frameworks (since macOS 10.11, iOS 8.0): -// /System/Library/PrivateFrameworks/kperf.framework -// /System/Library/PrivateFrameworks/kperfdata.framework -// -// Xcode framework (since Xcode 7.0): -// /Applications/Xcode.app/Contents/SharedFrameworks/DVTInstrumentsFoundation.framework -// -// CPU database (plist files) -// macOS (since macOS 10.11): -// /usr/share/kpep/.plist -// iOS (copied from Xcode, since iOS 10.0, Xcode 8.0): -// /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform -// /DeviceSupport//DeveloperDiskImage.dmg/usr/share/kpep/.plist -// -// -// Created by YaoYuan on 2021. -// Released into the public domain (unlicense.org). -// ============================================================================= - -#ifndef M1CYCLES_H -#define M1CYCLES_H - -#include -#include -#include -#include -#include - -#include // for dlopen() and dlsym() -#include // for mach_absolute_time() -#include // for kdebug trace decode -#include // for sysctl() -#include // for usleep() - -struct performance_counters { - double cycles; - double branches; - double missed_branches; - double instructions; - - performance_counters(uint64_t c, uint64_t b, uint64_t m, uint64_t i) - : cycles(c), branches(b), missed_branches(m), instructions(i) {} - - performance_counters(double c, double b, double m, double i) - : cycles(c), branches(b), missed_branches(m), instructions(i) {} - - performance_counters(double init) - : cycles(init), branches(init), missed_branches(init), - instructions(init) {} - - inline performance_counters &operator-=(const performance_counters &other) { - cycles -= other.cycles; - branches -= other.branches; - missed_branches -= other.missed_branches; - instructions -= other.instructions; - return *this; - } - - inline performance_counters &min(const performance_counters &other) { - cycles = other.cycles < cycles ? other.cycles : cycles; - branches = other.branches < branches ? other.branches : branches; - missed_branches = other.missed_branches < missed_branches - ? other.missed_branches - : missed_branches; - instructions = - other.instructions < instructions ? other.instructions : instructions; - return *this; - } - - inline performance_counters &operator+=(const performance_counters &other) { - cycles += other.cycles; - branches += other.branches; - missed_branches += other.missed_branches; - instructions += other.instructions; - return *this; - } - - inline performance_counters &operator/=(double numerator) { - cycles /= numerator; - branches /= numerator; - missed_branches /= numerator; - instructions /= numerator; - return *this; - } -}; - -inline performance_counters operator-(const performance_counters &a, - const performance_counters &b) { - return performance_counters(a.cycles - b.cycles, a.branches - b.branches, - a.missed_branches - b.missed_branches, - a.instructions - b.instructions); -} - -typedef float f32; -typedef double f64; -typedef int8_t i8; -typedef uint8_t u8; -typedef int16_t i16; -typedef uint16_t u16; -typedef int32_t i32; -typedef uint32_t u32; -typedef int64_t i64; -typedef uint64_t u64; -typedef size_t usize; - -// ----------------------------------------------------------------------------- -// header (reverse engineered) -// This framework wraps some sysctl calls to communicate with the kpc in kernel. -// Most functions requires root privileges, or process is "blessed". -// ----------------------------------------------------------------------------- - -// Cross-platform class constants. -#define KPC_CLASS_FIXED (0) -#define KPC_CLASS_CONFIGURABLE (1) -#define KPC_CLASS_POWER (2) -#define KPC_CLASS_RAWPMU (3) - -// Cross-platform class mask constants. -#define KPC_CLASS_FIXED_MASK (1u << KPC_CLASS_FIXED) // 1 -#define KPC_CLASS_CONFIGURABLE_MASK (1u << KPC_CLASS_CONFIGURABLE) // 2 -#define KPC_CLASS_POWER_MASK (1u << KPC_CLASS_POWER) // 4 -#define KPC_CLASS_RAWPMU_MASK (1u << KPC_CLASS_RAWPMU) // 8 - -// PMU version constants. -#define KPC_PMU_ERROR (0) // Error -#define KPC_PMU_INTEL_V3 (1) // Intel -#define KPC_PMU_ARM_APPLE (2) // ARM64 -#define KPC_PMU_INTEL_V2 (3) // Old Intel -#define KPC_PMU_ARM_V2 (4) // Old ARM - -// The maximum number of counters we could read from every class in one go. -// ARMV7: FIXED: 1, CONFIGURABLE: 4 -// ARM32: FIXED: 2, CONFIGURABLE: 6 -// ARM64: FIXED: 2, CONFIGURABLE: CORE_NCTRS - FIXED (6 or 8) -// x86: 32 -#define KPC_MAX_COUNTERS 32 - -// Bits for defining what to do on an action. -// Defined in https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/action.h -#define KPERF_SAMPLER_TH_INFO (1U << 0) -#define KPERF_SAMPLER_TH_SNAPSHOT (1U << 1) -#define KPERF_SAMPLER_KSTACK (1U << 2) -#define KPERF_SAMPLER_USTACK (1U << 3) -#define KPERF_SAMPLER_PMC_THREAD (1U << 4) -#define KPERF_SAMPLER_PMC_CPU (1U << 5) -#define KPERF_SAMPLER_PMC_CONFIG (1U << 6) -#define KPERF_SAMPLER_MEMINFO (1U << 7) -#define KPERF_SAMPLER_TH_SCHEDULING (1U << 8) -#define KPERF_SAMPLER_TH_DISPATCH (1U << 9) -#define KPERF_SAMPLER_TK_SNAPSHOT (1U << 10) -#define KPERF_SAMPLER_SYS_MEM (1U << 11) -#define KPERF_SAMPLER_TH_INSCYC (1U << 12) -#define KPERF_SAMPLER_TK_INFO (1U << 13) - -// Maximum number of kperf action ids. -#define KPERF_ACTION_MAX (32) - -// Maximum number of kperf timer ids. -#define KPERF_TIMER_MAX (8) - -// x86/arm config registers are 64-bit -typedef u64 kpc_config_t; - -/// Print current CPU identification string to the buffer (same as snprintf), -/// such as "cpu_7_8_10b282dc_46". This string can be used to locate the PMC -/// database in /usr/share/kpep. -/// @return string's length, or negative value if error occurs. -/// @note This method does not requires root privileges. -/// @details sysctl get(hw.cputype), get(hw.cpusubtype), -/// get(hw.cpufamily), get(machdep.cpu.model) -static int (*kpc_cpu_string)(char *buf, usize buf_size); - -/// Get the version of KPC that's being run. -/// @return See `PMU version constants` above. -/// @details sysctl get(kpc.pmu_version) -static u32 (*kpc_pmu_version)(void); - -/// Get running PMC classes. -/// @return See `class mask constants` above, -/// 0 if error occurs or no class is set. -/// @details sysctl get(kpc.counting) -static u32 (*kpc_get_counting)(void); - -/// Set PMC classes to enable counting. -/// @param classes See `class mask constants` above, set 0 to shutdown counting. -/// @return 0 for success. -/// @details sysctl set(kpc.counting) -static int (*kpc_set_counting)(u32 classes); - -/// Get running PMC classes for current thread. -/// @return See `class mask constants` above, -/// 0 if error occurs or no class is set. -/// @details sysctl get(kpc.thread_counting) -static u32 (*kpc_get_thread_counting)(void); - -/// Set PMC classes to enable counting for current thread. -/// @param classes See `class mask constants` above, set 0 to shutdown counting. -/// @return 0 for success. -/// @details sysctl set(kpc.thread_counting) -static int (*kpc_set_thread_counting)(u32 classes); - -/// Get how many config registers there are for a given mask. -/// For example: Intel may returns 1 for `KPC_CLASS_FIXED_MASK`, -/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`. -/// @param classes See `class mask constants` above. -/// @return 0 if error occurs or no class is set. -/// @note This method does not requires root privileges. -/// @details sysctl get(kpc.config_count) -static u32 (*kpc_get_config_count)(u32 classes); - -/// Get config registers. -/// @param classes see `class mask constants` above. -/// @param config Config buffer to receive values, should not smaller than -/// kpc_get_config_count(classes) * sizeof(kpc_config_t). -/// @return 0 for success. -/// @details sysctl get(kpc.config_count), get(kpc.config) -static int (*kpc_get_config)(u32 classes, kpc_config_t *config); - -/// Set config registers. -/// @param classes see `class mask constants` above. -/// @param config Config buffer, should not smaller than -/// kpc_get_config_count(classes) * sizeof(kpc_config_t). -/// @return 0 for success. -/// @details sysctl get(kpc.config_count), set(kpc.config) -static int (*kpc_set_config)(u32 classes, kpc_config_t *config); - -/// Get how many counters there are for a given mask. -/// For example: Intel may returns 3 for `KPC_CLASS_FIXED_MASK`, -/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`. -/// @param classes See `class mask constants` above. -/// @note This method does not requires root privileges. -/// @details sysctl get(kpc.counter_count) -static u32 (*kpc_get_counter_count)(u32 classes); - -/// Get counter accumulations. -/// If `all_cpus` is true, the buffer count should not smaller than -/// (cpu_count * counter_count). Otherwize, the buffer count should not smaller -/// than (counter_count). -/// @see kpc_get_counter_count(), kpc_cpu_count(). -/// @param all_cpus true for all CPUs, false for current cpu. -/// @param classes See `class mask constants` above. -/// @param curcpu A pointer to receive current cpu id, can be NULL. -/// @param buf Buffer to receive counter's value. -/// @return 0 for success. -/// @details sysctl get(hw.ncpu), get(kpc.counter_count), get(kpc.counters) -static int (*kpc_get_cpu_counters)(bool all_cpus, u32 classes, int *curcpu, - u64 *buf); - -/// Get counter accumulations for current thread. -/// @param tid Thread id, should be 0. -/// @param buf_count The number of buf's elements (not bytes), -/// should not smaller than kpc_get_counter_count(). -/// @param buf Buffer to receive counter's value. -/// @return 0 for success. -/// @details sysctl get(kpc.thread_counters) -static int (*kpc_get_thread_counters)(u32 tid, u32 buf_count, u64 *buf); - -/// Acquire/release the counters used by the Power Manager. -/// @param val 1:acquire, 0:release -/// @return 0 for success. -/// @details sysctl set(kpc.force_all_ctrs) -static int (*kpc_force_all_ctrs_set)(int val); - -/// Get the state of all_ctrs. -/// @return 0 for success. -/// @details sysctl get(kpc.force_all_ctrs) -static int (*kpc_force_all_ctrs_get)(int *val_out); - -/// Set number of actions, should be `KPERF_ACTION_MAX`. -/// @details sysctl set(kperf.action.count) -static int (*kperf_action_count_set)(u32 count); - -/// Get number of actions. -/// @details sysctl get(kperf.action.count) -static int (*kperf_action_count_get)(u32 *count); - -/// Set what to sample when a trigger fires an action, e.g. -/// `KPERF_SAMPLER_PMC_CPU`. -/// @details sysctl set(kperf.action.samplers) -static int (*kperf_action_samplers_set)(u32 actionid, u32 sample); - -/// Get what to sample when a trigger fires an action. -/// @details sysctl get(kperf.action.samplers) -static int (*kperf_action_samplers_get)(u32 actionid, u32 *sample); - -/// Apply a task filter to the action, -1 to disable filter. -/// @details sysctl set(kperf.action.filter_by_task) -static int (*kperf_action_filter_set_by_task)(u32 actionid, i32 port); - -/// Apply a pid filter to the action, -1 to disable filter. -/// @details sysctl set(kperf.action.filter_by_pid) -static int (*kperf_action_filter_set_by_pid)(u32 actionid, i32 pid); - -/// Set number of time triggers, should be `KPERF_TIMER_MAX`. -/// @details sysctl set(kperf.timer.count) -static int (*kperf_timer_count_set)(u32 count); - -/// Get number of time triggers. -/// @details sysctl get(kperf.timer.count) -static int (*kperf_timer_count_get)(u32 *count); - -/// Set timer number and period. -/// @details sysctl set(kperf.timer.period) -static int (*kperf_timer_period_set)(u32 actionid, u64 tick); - -/// Get timer number and period. -/// @details sysctl get(kperf.timer.period) -static int (*kperf_timer_period_get)(u32 actionid, u64 *tick); - -/// Set timer number and actionid. -/// @details sysctl set(kperf.timer.action) -static int (*kperf_timer_action_set)(u32 actionid, u32 timerid); - -/// Get timer number and actionid. -/// @details sysctl get(kperf.timer.action) -static int (*kperf_timer_action_get)(u32 actionid, u32 *timerid); - -/// Set which timer ID does PET (Profile Every Thread). -/// @details sysctl set(kperf.timer.pet_timer) -static int (*kperf_timer_pet_set)(u32 timerid); - -/// Get which timer ID does PET (Profile Every Thread). -/// @details sysctl get(kperf.timer.pet_timer) -static int (*kperf_timer_pet_get)(u32 *timerid); - -/// Enable or disable sampling. -/// @details sysctl set(kperf.sampling) -static int (*kperf_sample_set)(u32 enabled); - -/// Get is currently sampling. -/// @details sysctl get(kperf.sampling) -static int (*kperf_sample_get)(u32 *enabled); - -/// Reset kperf: stop sampling, kdebug, timers and actions. -/// @return 0 for success. -static int (*kperf_reset)(void); - -/// Nanoseconds to CPU ticks. -static u64 (*kperf_ns_to_ticks)(u64 ns); - -/// CPU ticks to nanoseconds. -static u64 (*kperf_ticks_to_ns)(u64 ticks); - -/// CPU ticks frequency (mach_absolute_time). -static u64 (*kperf_tick_frequency)(void); - -/// Get lightweight PET mode (not in kperf.framework). -static int kperf_lightweight_pet_get(u32 *enabled) { - if (!enabled) - return -1; - usize size = 4; - return sysctlbyname("kperf.lightweight_pet", enabled, &size, NULL, 0); -} - -/// Set lightweight PET mode (not in kperf.framework). -static int kperf_lightweight_pet_set(u32 enabled) { - return sysctlbyname("kperf.lightweight_pet", NULL, NULL, &enabled, 4); -} - -// ----------------------------------------------------------------------------- -// header (reverse engineered) -// This framework provides some functions to access the local CPU database. -// These functions do not require root privileges. -// ----------------------------------------------------------------------------- - -// KPEP CPU archtecture constants. -#define KPEP_ARCH_I386 0 -#define KPEP_ARCH_X86_64 1 -#define KPEP_ARCH_ARM 2 -#define KPEP_ARCH_ARM64 3 - -/// KPEP event (size: 48/28 bytes on 64/32 bit OS) -typedef struct kpep_event { - const char *name; ///< Unique name of a event, such as "INST_RETIRED.ANY". - const char *description; ///< Description for this event. - const char *errata; ///< Errata, currently NULL. - const char *alias; ///< Alias name, such as "Instructions", "Cycles". - const char *fallback; ///< Fallback event name for fixed counter. - u32 mask; - u8 number; - u8 umask; - u8 reserved; - u8 is_fixed; -} kpep_event; - -/// KPEP database (size: 144/80 bytes on 64/32 bit OS) -typedef struct kpep_db { - const char *name; ///< Database name, such as "haswell". - const char *cpu_id; ///< Plist name, such as "cpu_7_8_10b282dc". - const char *marketing_name; ///< Marketing name, such as "Intel Haswell". - void *plist_data; ///< Plist data (CFDataRef), currently NULL. - void *event_map; ///< All events (CFDict). - kpep_event - *event_arr; ///< Event struct buffer (sizeof(kpep_event) * events_count). - kpep_event **fixed_event_arr; ///< Fixed counter events (sizeof(kpep_event *) - ///< * fixed_counter_count) - void *alias_map; ///< All aliases (CFDict). - usize reserved_1; - usize reserved_2; - usize reserved_3; - usize event_count; ///< All events count. - usize alias_count; - usize fixed_counter_count; - usize config_counter_count; - usize power_counter_count; - u32 archtecture; ///< see `KPEP CPU archtecture constants` above. - u32 fixed_counter_bits; - u32 config_counter_bits; - u32 power_counter_bits; -} kpep_db; - -/// KPEP config (size: 80/44 bytes on 64/32 bit OS) -typedef struct kpep_config { - kpep_db *db; - kpep_event **ev_arr; ///< (sizeof(kpep_event *) * counter_count), init NULL - usize *ev_map; ///< (sizeof(usize *) * counter_count), init 0 - usize *ev_idx; ///< (sizeof(usize *) * counter_count), init -1 - u32 *flags; ///< (sizeof(u32 *) * counter_count), init 0 - u64 *kpc_periods; ///< (sizeof(u64 *) * counter_count), init 0 - usize event_count; /// kpep_config_events_count() - usize counter_count; - u32 classes; ///< See `class mask constants` above. - u32 config_counter; - u32 power_counter; - u32 reserved; -} kpep_config; - -/// Error code for kpep_config_xxx() and kpep_db_xxx() functions. -typedef enum { - KPEP_CONFIG_ERROR_NONE = 0, - KPEP_CONFIG_ERROR_INVALID_ARGUMENT = 1, - KPEP_CONFIG_ERROR_OUT_OF_MEMORY = 2, - KPEP_CONFIG_ERROR_IO = 3, - KPEP_CONFIG_ERROR_BUFFER_TOO_SMALL = 4, - KPEP_CONFIG_ERROR_CUR_SYSTEM_UNKNOWN = 5, - KPEP_CONFIG_ERROR_DB_PATH_INVALID = 6, - KPEP_CONFIG_ERROR_DB_NOT_FOUND = 7, - KPEP_CONFIG_ERROR_DB_ARCH_UNSUPPORTED = 8, - KPEP_CONFIG_ERROR_DB_VERSION_UNSUPPORTED = 9, - KPEP_CONFIG_ERROR_DB_CORRUPT = 10, - KPEP_CONFIG_ERROR_EVENT_NOT_FOUND = 11, - KPEP_CONFIG_ERROR_CONFLICTING_EVENTS = 12, - KPEP_CONFIG_ERROR_COUNTERS_NOT_FORCED = 13, - KPEP_CONFIG_ERROR_EVENT_UNAVAILABLE = 14, - KPEP_CONFIG_ERROR_ERRNO = 15, - KPEP_CONFIG_ERROR_MAX -} kpep_config_error_code; - -/// Error description for kpep_config_error_code. -static const char *kpep_config_error_names[KPEP_CONFIG_ERROR_MAX] = { - "none", - "invalid argument", - "out of memory", - "I/O", - "buffer too small", - "current system unknown", - "database path invalid", - "database not found", - "database architecture unsupported", - "database version unsupported", - "database corrupt", - "event not found", - "conflicting events", - "all counters must be forced", - "event unavailable", - "check errno"}; - -/// Error description. -static const char *kpep_config_error_desc(int code) { - if (0 <= code && code < KPEP_CONFIG_ERROR_MAX) { - return kpep_config_error_names[code]; - } - return "unknown error"; -} - -/// Create a config. -/// @param db A kpep db, see kpep_db_create() -/// @param cfg_ptr A pointer to receive the new config. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_create)(kpep_db *db, kpep_config **cfg_ptr); - -/// Free the config. -static void (*kpep_config_free)(kpep_config *cfg); - -/// Add an event to config. -/// @param cfg The config. -/// @param ev_ptr A event pointer. -/// @param flag 0: all, 1: user space only -/// @param err Error bitmap pointer, can be NULL. -/// If return value is `CONFLICTING_EVENTS`, this bitmap contains -/// the conflicted event indices, e.g. "1 << 2" means index 2. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_add_event)(kpep_config *cfg, kpep_event **ev_ptr, - u32 flag, u32 *err); - -/// Remove event at index. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_remove_event)(kpep_config *cfg, usize idx); - -/// Force all counters. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_force_counters)(kpep_config *cfg); - -/// Get events count. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_events_count)(kpep_config *cfg, usize *count_ptr); - -/// Get all event pointers. -/// @param buf A buffer to receive event pointers. -/// @param buf_size The buffer's size in bytes, should not smaller than -/// kpep_config_events_count() * sizeof(void *). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_events)(kpep_config *cfg, kpep_event **buf, - usize buf_size); - -/// Get kpc register configs. -/// @param buf A buffer to receive kpc register configs. -/// @param buf_size The buffer's size in bytes, should not smaller than -/// kpep_config_kpc_count() * sizeof(kpc_config_t). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_kpc)(kpep_config *cfg, kpc_config_t *buf, - usize buf_size); - -/// Get kpc register config count. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_kpc_count)(kpep_config *cfg, usize *count_ptr); - -/// Get kpc classes. -/// @param classes See `class mask constants` above. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_kpc_classes)(kpep_config *cfg, u32 *classes_ptr); - -/// Get the index mapping from event to counter. -/// @param buf A buffer to receive indexes. -/// @param buf_size The buffer's size in bytes, should not smaller than -/// kpep_config_events_count() * sizeof(kpc_config_t). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_kpc_map)(kpep_config *cfg, usize *buf, usize buf_size); - -/// Open a kpep database file in "/usr/share/kpep/" or "/usr/local/share/kpep/". -/// @param name File name, for example "haswell", "cpu_100000c_1_92fb37c8". -/// Pass NULL for current CPU. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_create)(const char *name, kpep_db **db_ptr); - -/// Free the kpep database. -static void (*kpep_db_free)(kpep_db *db); - -/// Get the database's name. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_name)(kpep_db *db, const char **name); - -/// Get the event alias count. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_aliases_count)(kpep_db *db, usize *count); - -/// Get all alias. -/// @param buf A buffer to receive all alias strings. -/// @param buf_size The buffer's size in bytes, -/// should not smaller than kpep_db_aliases_count() * sizeof(void *). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_aliases)(kpep_db *db, const char **buf, usize buf_size); - -/// Get counters count for given classes. -/// @param classes 1: Fixed, 2: Configurable. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_counters_count)(kpep_db *db, u8 classes, usize *count); - -/// Get all event count. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_events_count)(kpep_db *db, usize *count); - -/// Get all events. -/// @param buf A buffer to receive all event pointers. -/// @param buf_size The buffer's size in bytes, -/// should not smaller than kpep_db_events_count() * sizeof(void *). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_events)(kpep_db *db, kpep_event **buf, usize buf_size); - -/// Get one event by name. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_event)(kpep_db *db, const char *name, kpep_event **ev_ptr); - -/// Get event's name. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_event_name)(kpep_event *ev, const char **name_ptr); - -/// Get event's alias. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_event_alias)(kpep_event *ev, const char **alias_ptr); - -/// Get event's description. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_event_description)(kpep_event *ev, const char **str_ptr); - -// ----------------------------------------------------------------------------- -// load kperf/kperfdata dynamic library -// ----------------------------------------------------------------------------- - -typedef struct { - const char *name; - void **impl; -} lib_symbol; - -#define lib_nelems(x) (sizeof(x) / sizeof((x)[0])) -#define lib_symbol_def(name) \ - { #name, (void **)&name } - -static const lib_symbol lib_symbols_kperf[] = { - lib_symbol_def(kpc_pmu_version), - lib_symbol_def(kpc_cpu_string), - lib_symbol_def(kpc_set_counting), - lib_symbol_def(kpc_get_counting), - lib_symbol_def(kpc_set_thread_counting), - lib_symbol_def(kpc_get_thread_counting), - lib_symbol_def(kpc_get_config_count), - lib_symbol_def(kpc_get_counter_count), - lib_symbol_def(kpc_set_config), - lib_symbol_def(kpc_get_config), - lib_symbol_def(kpc_get_cpu_counters), - lib_symbol_def(kpc_get_thread_counters), - lib_symbol_def(kpc_force_all_ctrs_set), - lib_symbol_def(kpc_force_all_ctrs_get), - lib_symbol_def(kperf_action_count_set), - lib_symbol_def(kperf_action_count_get), - lib_symbol_def(kperf_action_samplers_set), - lib_symbol_def(kperf_action_samplers_get), - lib_symbol_def(kperf_action_filter_set_by_task), - lib_symbol_def(kperf_action_filter_set_by_pid), - lib_symbol_def(kperf_timer_count_set), - lib_symbol_def(kperf_timer_count_get), - lib_symbol_def(kperf_timer_period_set), - lib_symbol_def(kperf_timer_period_get), - lib_symbol_def(kperf_timer_action_set), - lib_symbol_def(kperf_timer_action_get), - lib_symbol_def(kperf_sample_set), - lib_symbol_def(kperf_sample_get), - lib_symbol_def(kperf_reset), - lib_symbol_def(kperf_timer_pet_set), - lib_symbol_def(kperf_timer_pet_get), - lib_symbol_def(kperf_ns_to_ticks), - lib_symbol_def(kperf_ticks_to_ns), - lib_symbol_def(kperf_tick_frequency), -}; - -static const lib_symbol lib_symbols_kperfdata[] = { - lib_symbol_def(kpep_config_create), - lib_symbol_def(kpep_config_free), - lib_symbol_def(kpep_config_add_event), - lib_symbol_def(kpep_config_remove_event), - lib_symbol_def(kpep_config_force_counters), - lib_symbol_def(kpep_config_events_count), - lib_symbol_def(kpep_config_events), - lib_symbol_def(kpep_config_kpc), - lib_symbol_def(kpep_config_kpc_count), - lib_symbol_def(kpep_config_kpc_classes), - lib_symbol_def(kpep_config_kpc_map), - lib_symbol_def(kpep_db_create), - lib_symbol_def(kpep_db_free), - lib_symbol_def(kpep_db_name), - lib_symbol_def(kpep_db_aliases_count), - lib_symbol_def(kpep_db_aliases), - lib_symbol_def(kpep_db_counters_count), - lib_symbol_def(kpep_db_events_count), - lib_symbol_def(kpep_db_events), - lib_symbol_def(kpep_db_event), - lib_symbol_def(kpep_event_name), - lib_symbol_def(kpep_event_alias), - lib_symbol_def(kpep_event_description), -}; - -#define lib_path_kperf "/System/Library/PrivateFrameworks/kperf.framework/kperf" -#define lib_path_kperfdata \ - "/System/Library/PrivateFrameworks/kperfdata.framework/kperfdata" - -static bool lib_inited = false; -static bool lib_has_err = false; -static char lib_err_msg[256]; - -static void *lib_handle_kperf = NULL; -static void *lib_handle_kperfdata = NULL; - -static void lib_deinit(void) { - lib_inited = false; - lib_has_err = false; - if (lib_handle_kperf) - dlclose(lib_handle_kperf); - if (lib_handle_kperfdata) - dlclose(lib_handle_kperfdata); - lib_handle_kperf = NULL; - lib_handle_kperfdata = NULL; - for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) { - const lib_symbol *symbol = &lib_symbols_kperf[i]; - *symbol->impl = NULL; - } - for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) { - const lib_symbol *symbol = &lib_symbols_kperfdata[i]; - *symbol->impl = NULL; - } -} - -static bool lib_init(void) { -#define return_err() \ - do { \ - lib_deinit(); \ - lib_inited = true; \ - lib_has_err = true; \ - return false; \ - } while (false) - - if (lib_inited) - return !lib_has_err; - - // load dynamic library - lib_handle_kperf = dlopen(lib_path_kperf, RTLD_LAZY); - if (!lib_handle_kperf) { - snprintf(lib_err_msg, sizeof(lib_err_msg), - "Failed to load kperf.framework, message: %s.", dlerror()); - return_err(); - } - lib_handle_kperfdata = dlopen(lib_path_kperfdata, RTLD_LAZY); - if (!lib_handle_kperfdata) { - snprintf(lib_err_msg, sizeof(lib_err_msg), - "Failed to load kperfdata.framework, message: %s.", dlerror()); - return_err(); - } - - // load symbol address from dynamic library - for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) { - const lib_symbol *symbol = &lib_symbols_kperf[i]; - *symbol->impl = dlsym(lib_handle_kperf, symbol->name); - if (!*symbol->impl) { - snprintf(lib_err_msg, sizeof(lib_err_msg), - "Failed to load kperf function: %s.", symbol->name); - return_err(); - } - } - for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) { - const lib_symbol *symbol = &lib_symbols_kperfdata[i]; - *symbol->impl = dlsym(lib_handle_kperfdata, symbol->name); - if (!*symbol->impl) { - snprintf(lib_err_msg, sizeof(lib_err_msg), - "Failed to load kperfdata function: %s.", symbol->name); - return_err(); - } - } - - lib_inited = true; - lib_has_err = false; - return true; - -#undef return_err -} - -// ----------------------------------------------------------------------------- -// kdebug private structs -// https://github.com/apple/darwin-xnu/blob/main/bsd/sys_private/kdebug_private.h -// ----------------------------------------------------------------------------- - -/* - * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf - * structure. - */ -#if defined(__arm64__) -typedef uint64_t kd_buf_argtype; -#else -typedef uintptr_t kd_buf_argtype; -#endif - -typedef struct { - uint64_t timestamp; - kd_buf_argtype arg1; - kd_buf_argtype arg2; - kd_buf_argtype arg3; - kd_buf_argtype arg4; - kd_buf_argtype arg5; /* the thread ID */ - uint32_t debugid; /* see */ - -/* - * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf - * structure. - */ -#if defined(__LP64__) || defined(__arm64__) - uint32_t cpuid; /* cpu index, from 0 */ - kd_buf_argtype unused; -#endif -} kd_buf; - -/* bits for the type field of kd_regtype */ -#define KDBG_CLASSTYPE 0x10000 -#define KDBG_SUBCLSTYPE 0x20000 -#define KDBG_RANGETYPE 0x40000 -#define KDBG_TYPENONE 0x80000 -#define KDBG_CKTYPES 0xF0000 - -/* only trace at most 4 types of events, at the code granularity */ -#define KDBG_VALCHECK 0x00200000U - -typedef struct { - unsigned int type; - unsigned int value1; - unsigned int value2; - unsigned int value3; - unsigned int value4; -} kd_regtype; - -typedef struct { - /* number of events that can fit in the buffers */ - int nkdbufs; - /* set if trace is disabled */ - int nolog; - /* kd_ctrl_page.flags */ - unsigned int flags; - /* number of threads in thread map */ - int nkdthreads; - /* the owning pid */ - int bufid; -} kbufinfo_t; - -// ----------------------------------------------------------------------------- -// kdebug utils -// ----------------------------------------------------------------------------- - -/// Clean up trace buffers and reset ktrace/kdebug/kperf. -/// @return 0 on success. -static int kdebug_reset(void) { - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREMOVE}; - return sysctl(mib, 3, NULL, NULL, NULL, 0); -} - -/// Disable and reinitialize the trace buffers. -/// @return 0 on success. -static int kdebug_reinit(void) { - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETUP}; - return sysctl(mib, 3, NULL, NULL, NULL, 0); -} - -/// Set debug filter. -static int kdebug_setreg(kd_regtype *kdr) { - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETREG}; - usize size = sizeof(kd_regtype); - return sysctl(mib, 3, kdr, &size, NULL, 0); -} - -/// Set maximum number of trace entries (kd_buf). -/// Only allow allocation up to half the available memory (sane_size). -/// @return 0 on success. -static int kdebug_trace_setbuf(int nbufs) { - int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETBUF, nbufs}; - return sysctl(mib, 4, NULL, NULL, NULL, 0); -} - -/// Enable or disable kdebug trace. -/// Trace buffer must already be initialized. -/// @return 0 on success. -static int kdebug_trace_enable(bool enable) { - int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDENABLE, enable}; - return sysctl(mib, 4, NULL, 0, NULL, 0); -} - -/// Retrieve trace buffer information from kernel. -/// @return 0 on success. -static int kdebug_get_bufinfo(kbufinfo_t *info) { - if (!info) - return -1; - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDGETBUF}; - size_t needed = sizeof(kbufinfo_t); - return sysctl(mib, 3, info, &needed, NULL, 0); -} - -/// Retrieve trace buffers from kernel. -/// @param buf Memory to receive buffer data, array of `kd_buf`. -/// @param len Length of `buf` in bytes. -/// @param count Number of trace entries (kd_buf) obtained. -/// @return 0 on success. -static int kdebug_trace_read(void *buf, usize len, usize *count) { - if (count) - *count = 0; - if (!buf || !len) - return -1; - - // Note: the input and output units are not the same. - // input: bytes - // output: number of kd_buf - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREADTR}; - int ret = sysctl(mib, 3, buf, &len, NULL, 0); - if (ret != 0) - return ret; - *count = len; - return 0; -} - -/// Block until there are new buffers filled or `timeout_ms` have passed. -/// @param timeout_ms timeout milliseconds, 0 means wait forever. -/// @param suc set true if new buffers filled. -/// @return 0 on success. -static int kdebug_wait(usize timeout_ms, bool *suc) { - if (timeout_ms == 0) - return -1; - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDBUFWAIT}; - usize val = timeout_ms; - int ret = sysctl(mib, 3, NULL, &val, NULL, 0); - if (suc) - *suc = !!val; - return ret; -} - -// ----------------------------------------------------------------------------- -// Demo -// ----------------------------------------------------------------------------- - -#define EVENT_NAME_MAX 8 - -typedef struct { - const char *alias; /// name for print - const char *names[EVENT_NAME_MAX]; /// name from pmc db -} event_alias; - -/// Event names from /usr/share/kpep/.plist -static const event_alias profile_events[] = { - {"cycles", - { - "FIXED_CYCLES", // Apple A7-A15//CORE_ACTIVE_CYCLE - "CPU_CLK_UNHALTED.THREAD", // Intel Core 1th-10th - "CPU_CLK_UNHALTED.CORE", // Intel Yonah, Merom - }}, - {"instructions", - { - "FIXED_INSTRUCTIONS", // Apple A7-A15 - "INST_RETIRED.ANY" // Intel Yonah, Merom, Core 1th-10th - }}, - {"branches", - { - "INST_BRANCH", // Apple A7-A15 - "BR_INST_RETIRED.ALL_BRANCHES", // Intel Core 1th-10th - "INST_RETIRED.ANY", // Intel Yonah, Merom - }}, - {"branch-misses", - { - "BRANCH_MISPRED_NONSPEC", // Apple A7-A15, since iOS 15, macOS 12 - "BRANCH_MISPREDICT", // Apple A7-A14 - "BR_MISP_RETIRED.ALL_BRANCHES", // Intel Core 2th-10th - "BR_INST_RETIRED.MISPRED", // Intel Yonah, Merom - }}, -}; - -static kpep_event *get_event(kpep_db *db, const event_alias *alias) { - for (usize j = 0; j < EVENT_NAME_MAX; j++) { - const char *name = alias->names[j]; - if (!name) - break; - kpep_event *ev = NULL; - if (kpep_db_event(db, name, &ev) == 0) { - return ev; - } - } - return NULL; -} - -kpc_config_t regs[KPC_MAX_COUNTERS] = {0}; -usize counter_map[KPC_MAX_COUNTERS] = {0}; -u64 counters_0[KPC_MAX_COUNTERS] = {0}; -u64 counters_1[KPC_MAX_COUNTERS] = {0}; -const usize ev_count = sizeof(profile_events) / sizeof(profile_events[0]); - -bool setup_performance_counters() { - static bool init = false; - static bool worked = false; - - if (init) { - return worked; - } - init = true; - - // load dylib - if (!lib_init()) { - printf("Error: %s\n", lib_err_msg); - return (worked = false); - } - - // check permission - int force_ctrs = 0; - if (kpc_force_all_ctrs_get(&force_ctrs)) { - // printf("Permission denied, xnu/kpc requires root privileges.\n"); - return (worked = false); - } - int ret; - // load pmc db - kpep_db *db = NULL; - if ((ret = kpep_db_create(NULL, &db))) { - printf("Error: cannot load pmc database: %d.\n", ret); - return (worked = false); - } - printf("loaded db: %s (%s)\n", db->name, db->marketing_name); - - // create a config - kpep_config *cfg = NULL; - if ((ret = kpep_config_create(db, &cfg))) { - printf("Failed to create kpep config: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - if ((ret = kpep_config_force_counters(cfg))) { - printf("Failed to force counters: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - - // get events - kpep_event *ev_arr[ev_count] = {0}; - for (usize i = 0; i < ev_count; i++) { - const event_alias *alias = profile_events + i; - ev_arr[i] = get_event(db, alias); - if (!ev_arr[i]) { - printf("Cannot find event: %s.\n", alias->alias); - return (worked = false); - } - } - - // add event to config - for (usize i = 0; i < ev_count; i++) { - kpep_event *ev = ev_arr[i]; - if ((ret = kpep_config_add_event(cfg, &ev, 0, NULL))) { - printf("Failed to add event: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - } - - // prepare buffer and config - u32 classes = 0; - usize reg_count = 0; - if ((ret = kpep_config_kpc_classes(cfg, &classes))) { - printf("Failed get kpc classes: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - if ((ret = kpep_config_kpc_count(cfg, ®_count))) { - printf("Failed get kpc count: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - if ((ret = kpep_config_kpc_map(cfg, counter_map, sizeof(counter_map)))) { - printf("Failed get kpc map: %d (%s).\n", ret, kpep_config_error_desc(ret)); - return (worked = false); - } - if ((ret = kpep_config_kpc(cfg, regs, sizeof(regs)))) { - printf("Failed get kpc registers: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - - // set config to kernel - if ((ret = kpc_force_all_ctrs_set(1))) { - printf("Failed force all ctrs: %d.\n", ret); - return (worked = false); - } - if ((classes & KPC_CLASS_CONFIGURABLE_MASK) && reg_count) { - if ((ret = kpc_set_config(classes, regs))) { - printf("Failed set kpc config: %d.\n", ret); - return (worked = false); - } - } - - // start counting - if ((ret = kpc_set_counting(classes))) { - printf("Failed set counting: %d.\n", ret); - return (worked = false); - } - if ((ret = kpc_set_thread_counting(classes))) { - printf("Failed set thread counting: %d.\n", ret); - return (worked = false); - } - - return (worked = true); -} - -inline performance_counters get_counters() { - static bool warned = false; - int ret; - // get counters before - if ((ret = kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_0))) { - if (!warned) { - - printf("Failed get thread counters before: %d.\n", ret); - warned = true; - } - return 1; - } - /*printf("counters value:\n"); - for (usize i = 0; i < ev_count; i++) { - const event_alias *alias = profile_events + i; - usize idx = counter_map[i]; - u64 val = counters_1[idx] - counters_0[idx]; - printf("%14s: %llu\n", alias->alias, val); - }*/ - return performance_counters{ - counters_0[counter_map[0]], counters_0[counter_map[2]], - counters_0[counter_map[3]], counters_0[counter_map[1]]}; -} - -#endif diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp new file mode 100644 index 00000000..2d12316a --- /dev/null +++ b/benchmarks/bench_ip.cpp @@ -0,0 +1,183 @@ +#include "counters/event_counter.h" +#include "fast_float/fast_float.h" +#include +#include +#include +#include +#include +#include +#include +event_collector collector; + +template +event_aggregate bench(const function_type& function, size_t min_repeat = 10, size_t min_time_ns = 1000000000, size_t max_repeat = 1000000) { + event_aggregate aggregate{}; + size_t N = min_repeat; + if(N == 0) { N = 1; } + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + function(); + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + if((i+1 == N) && (aggregate.total_elapsed_ns() < min_time_ns) && (N +static inline int parse_ip_line(const char *&p, const char *pend, uint32_t &sum, + Parser parse_uint8) { + uint8_t o = 0; + for (int i = 0; i < 4; ++i) { + if (!parse_uint8(p, pend, &o)) + return 0; + sum += o; + if (i != 3) { + if (p == pend || *p != '.') + return 0; + ++p; + } + } + // consume optional '\r' + if (p != pend && *p == '\r') + ++p; + // expect '\n' or end + if (p != pend && *p == '\n') + ++p; + return 1; +} + +static std::string make_ip_line(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { + std::string s; + s.reserve(16); + s += std::to_string(a); + s += '.'; + s += std::to_string(b); + s += '.'; + s += std::to_string(c); + s += '.'; + s += std::to_string(d); + s += '\n'; + return s; +} + +int main() { + constexpr size_t N = 500000; + std::mt19937 rng(1234); + std::uniform_int_distribution dist(0, 255); + + std::string buf; + buf.reserve(N * 16); + + for (size_t i = 0; i < N; ++i) { + uint8_t a = (uint8_t)dist(rng); + uint8_t b = (uint8_t)dist(rng); + uint8_t c = (uint8_t)dist(rng); + uint8_t d = (uint8_t)dist(rng); + buf += make_ip_line(a, b, c, d); + } + + // sentinel to allow 4-byte loads at end + buf.append(4, '\0'); + + const size_t bytes = buf.size() - 4; // exclude sentinel from throughput + const size_t volume = N; + + // validate correctness + { + const char *start = buf.data(); + const char *end = buf.data() + bytes; + const char *p = start; + const char *pend = end; + uint32_t sum = 0; + for (size_t i = 0; i < N; ++i) { + int ok = parse_ip_line(p, pend, sum, parse_u8_fromchars); + if (!ok) { + std::fprintf(stderr, "fromchars parse failed at line %zu\n", i); + std::abort(); + } + p = start; + pend = end; + ok = parse_ip_line(p, pend, sum, parse_u8_fastswar); + if (!ok) { + std::fprintf(stderr, "fastswar parse failed at line %zu\n", i); + std::abort(); + } + } + } + + uint32_t sink = 0; + + pretty_print(volume, bytes, "parse_ip_fromchars", bench([&]() { + const char *p = buf.data(); + const char *pend = buf.data() + bytes; + uint32_t sum = 0; + int ok = 0; + for (size_t i = 0; i < N; ++i) { + ok = parse_ip_line(p, pend, sum, parse_u8_fromchars); + if (!ok) + std::abort(); + } + sink += sum; + })); + + pretty_print(volume, bytes, "parse_ip_fastswar", bench([&]() { + const char *p = buf.data(); + const char *pend = buf.data() + bytes; + uint32_t sum = 0; + int ok = 0; + for (size_t i = 0; i < N; ++i) { + ok = parse_ip_line(p, pend, sum, parse_u8_fastswar); + if (!ok) + std::abort(); + } + sink += sum; + })); + + std::printf("sink=%u\n", sink); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index 05f12330..2cfe15da 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -1,7 +1,7 @@ #if defined(__linux__) || (__APPLE__ && __aarch64__) #define USING_COUNTERS #endif -#include "event_counter.h" +#include "counters/event_counter.h" #include #include "fast_float/fast_float.h" #include @@ -102,7 +102,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name, branches_avg += branches; branches_min = branches_min < branches ? branches_min : branches; - double branch_misses = e.missed_branches(); + double branch_misses = e.branch_misses(); branch_misses_avg += branch_misses; branch_misses_min = branch_misses_min < branch_misses ? branch_misses_min : branch_misses; diff --git a/benchmarks/event_counter.h b/benchmarks/event_counter.h deleted file mode 100644 index cd594787..00000000 --- a/benchmarks/event_counter.h +++ /dev/null @@ -1,181 +0,0 @@ -#ifndef __EVENT_COUNTER_H -#define __EVENT_COUNTER_H - -#include -#ifndef _MSC_VER -#include -#endif -#include - -#include - -#include -#include - -#include "linux-perf-events.h" -#ifdef __linux__ -#include -#endif - -#if (defined(__APPLE__) && __APPLE__) && (defined(__aarch64__) && __aarch64__) -#include "apple_arm_events.h" -#endif - -struct event_count { - std::chrono::duration elapsed; - std::vector event_counts; - - event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {} - - event_count(const std::chrono::duration _elapsed, - const std::vector _event_counts) - : elapsed(_elapsed), event_counts(_event_counts) {} - - event_count(const event_count &other) - : elapsed(other.elapsed), event_counts(other.event_counts) {} - - // The types of counters (so we can read the getter more easily) - enum event_counter_types { - CPU_CYCLES = 0, - INSTRUCTIONS = 1, - BRANCHES = 2, - MISSED_BRANCHES = 3 - }; - - double elapsed_sec() const { - return std::chrono::duration(elapsed).count(); - } - - double elapsed_ns() const { - return std::chrono::duration(elapsed).count(); - } - - double cycles() const { - return static_cast(event_counts[CPU_CYCLES]); - } - - double instructions() const { - return static_cast(event_counts[INSTRUCTIONS]); - } - - double branches() const { - return static_cast(event_counts[BRANCHES]); - } - - double missed_branches() const { - return static_cast(event_counts[MISSED_BRANCHES]); - } - - event_count &operator=(const event_count &other) { - this->elapsed = other.elapsed; - this->event_counts = other.event_counts; - return *this; - } - - event_count operator+(const event_count &other) const { - return event_count(elapsed + other.elapsed, - { - event_counts[0] + other.event_counts[0], - event_counts[1] + other.event_counts[1], - event_counts[2] + other.event_counts[2], - event_counts[3] + other.event_counts[3], - event_counts[4] + other.event_counts[4], - }); - } - - void operator+=(const event_count &other) { *this = *this + other; } -}; - -struct event_aggregate { - bool has_events = false; - int iterations = 0; - event_count total{}; - event_count best{}; - event_count worst{}; - - event_aggregate() = default; - - void operator<<(const event_count &other) { - if (iterations == 0 || other.elapsed < best.elapsed) { - best = other; - } - if (iterations == 0 || other.elapsed > worst.elapsed) { - worst = other; - } - iterations++; - total += other; - } - - double elapsed_sec() const { return total.elapsed_sec() / iterations; } - - double elapsed_ns() const { return total.elapsed_ns() / iterations; } - - double cycles() const { return total.cycles() / iterations; } - - double instructions() const { return total.instructions() / iterations; } - - double branches() const { return total.branches() / iterations; } - - double missed_branches() const { - return total.missed_branches() / iterations; - } -}; - -struct event_collector { - event_count count{}; - std::chrono::time_point start_clock{}; - -#if defined(__linux__) - LinuxEvents linux_events; - - event_collector() - : linux_events(std::vector{ - PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, - PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions - PERF_COUNT_HW_BRANCH_MISSES}) {} - - bool has_events() { return linux_events.is_working(); } -#elif __APPLE__ && __aarch64__ - performance_counters diff; - - event_collector() : diff(0) { setup_performance_counters(); } - - bool has_events() { return setup_performance_counters(); } -#else - event_collector() {} - - bool has_events() { return false; } -#endif - - inline void start() { -#if defined(__linux) - linux_events.start(); -#elif __APPLE__ && __aarch64__ - if (has_events()) { - diff = get_counters(); - } -#endif - start_clock = std::chrono::steady_clock::now(); - } - - inline event_count &end() { - const auto end_clock = std::chrono::steady_clock::now(); -#if defined(__linux) - linux_events.end(count.event_counts); -#elif __APPLE__ && __aarch64__ - if (has_events()) { - performance_counters end = get_counters(); - diff = end - diff; - } - count.event_counts[0] = diff.cycles; - count.event_counts[1] = diff.instructions; - count.event_counts[2] = diff.branches; - count.event_counts[3] = diff.missed_branches; - count.event_counts[4] = 0; -#endif - count.elapsed = end_clock - start_clock; - return count; - } -}; - -#endif diff --git a/benchmarks/linux-perf-events.h b/benchmarks/linux-perf-events.h deleted file mode 100644 index 0a9e5538..00000000 --- a/benchmarks/linux-perf-events.h +++ /dev/null @@ -1,104 +0,0 @@ -#pragma once -#ifdef __linux__ - -#include // for __NR_perf_event_open -#include // for perf event constants -#include // for ioctl -#include // for syscall - -#include // for errno -#include // for memset -#include - -#include -#include - -template class LinuxEvents { - int fd; - bool working; - perf_event_attr attribs{}; - size_t num_events{}; - std::vector temp_result_vec{}; - std::vector ids{}; - -public: - explicit LinuxEvents(std::vector config_vec) : fd(0), working(true) { - memset(&attribs, 0, sizeof(attribs)); - attribs.type = TYPE; - attribs.size = sizeof(attribs); - attribs.disabled = 1; - attribs.exclude_kernel = 1; - attribs.exclude_hv = 1; - - attribs.sample_period = 0; - attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; - const int pid = 0; // the current process - const int cpu = -1; // all CPUs - const unsigned long flags = 0; - - int group = -1; // no group - num_events = config_vec.size(); - ids.resize(config_vec.size()); - uint32_t i = 0; - for (auto config : config_vec) { - attribs.config = config; - int _fd = static_cast( - syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags)); - if (_fd == -1) { - report_error("perf_event_open"); - } - ioctl(_fd, PERF_EVENT_IOC_ID, &ids[i++]); - if (group == -1) { - group = _fd; - fd = _fd; - } - } - - temp_result_vec.resize(num_events * 2 + 1); - } - - ~LinuxEvents() { - if (fd != -1) { - close(fd); - } - } - - inline void start() { - if (fd != -1) { - if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) { - report_error("ioctl(PERF_EVENT_IOC_RESET)"); - } - - if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) { - report_error("ioctl(PERF_EVENT_IOC_ENABLE)"); - } - } - } - - inline void end(std::vector &results) { - if (fd != -1) { - if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) { - report_error("ioctl(PERF_EVENT_IOC_DISABLE)"); - } - - if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) { - report_error("read"); - } - } - // our actual results are in slots 1,3,5, ... of this structure - for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) { - results[i / 2] = temp_result_vec[i]; - } - for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) { - if (ids[i / 2 - 1] != temp_result_vec[i]) { - report_error("event mismatch"); - } - } - } - - bool is_working() { return working; } - -private: - void report_error(const std::string &) { working = false; } -}; -#endif \ No newline at end of file From 6952ffeec06075d26219a251f259d330e07d83fb Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 11:58:36 -0500 Subject: [PATCH 19/38] lint --- benchmarks/bench_ip.cpp | 49 +++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 2d12316a..246c1ff3 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -6,26 +6,31 @@ #include #include #include -#include +#include event_collector collector; -template -event_aggregate bench(const function_type& function, size_t min_repeat = 10, size_t min_time_ns = 1000000000, size_t max_repeat = 1000000) { - event_aggregate aggregate{}; - size_t N = min_repeat; - if(N == 0) { N = 1; } - for (size_t i = 0; i < N; i++) { - std::atomic_thread_fence(std::memory_order_acquire); - collector.start(); - function(); - std::atomic_thread_fence(std::memory_order_release); - event_count allocate_count = collector.end(); - aggregate << allocate_count; - if((i+1 == N) && (aggregate.total_elapsed_ns() < min_time_ns) && (N +event_aggregate bench(const function_type &function, size_t min_repeat = 10, + size_t min_time_ns = 1000000000, + size_t max_repeat = 1000000) { + event_aggregate aggregate{}; + size_t N = min_repeat; + if (N == 0) { + N = 1; + } + for (size_t i = 0; i < N; i++) { + std::atomic_thread_fence(std::memory_order_acquire); + collector.start(); + function(); + std::atomic_thread_fence(std::memory_order_release); + event_count allocate_count = collector.end(); + aggregate << allocate_count; + if ((i + 1 == N) && (aggregate.total_elapsed_ns() < min_time_ns) && + (N < max_repeat)) { + N *= 10; } - return aggregate; + } + return aggregate; } void pretty_print(size_t volume, size_t bytes, std::string name, @@ -45,7 +50,7 @@ void pretty_print(size_t volume, size_t bytes, std::string name, printf("\n"); } -int parse_u8_fastswar(const char *&p, const char *pend, uint8_t *out) { +int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { if (p == pend) return 0; auto r = fast_float::from_chars(p, pend, *out); @@ -142,7 +147,7 @@ int main() { } p = start; pend = end; - ok = parse_ip_line(p, pend, sum, parse_u8_fastswar); + ok = parse_ip_line(p, pend, sum, parse_u8_fastfloat); if (!ok) { std::fprintf(stderr, "fastswar parse failed at line %zu\n", i); std::abort(); @@ -152,7 +157,7 @@ int main() { uint32_t sink = 0; - pretty_print(volume, bytes, "parse_ip_fromchars", bench([&]() { + pretty_print(volume, bytes, "parse_ip_std_fromchars", bench([&]() { const char *p = buf.data(); const char *pend = buf.data() + bytes; uint32_t sum = 0; @@ -165,13 +170,13 @@ int main() { sink += sum; })); - pretty_print(volume, bytes, "parse_ip_fastswar", bench([&]() { + pretty_print(volume, bytes, "parse_ip_fastfloat", bench([&]() { const char *p = buf.data(); const char *pend = buf.data() + bytes; uint32_t sum = 0; int ok = 0; for (size_t i = 0; i < N; ++i) { - ok = parse_ip_line(p, pend, sum, parse_u8_fastswar); + ok = parse_ip_line(p, pend, sum, parse_u8_fastfloat); if (!ok) std::abort(); } From a6685b2a98c21bf3a88cbb1c685e470cc919b272 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 12:02:23 -0500 Subject: [PATCH 20/38] missing header --- benchmarks/bench_ip.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 246c1ff3..33b2c646 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -7,6 +7,7 @@ #include #include #include +#include event_collector collector; template From 5304b3d6116c3ae8f40d38af5e70ba1b2f6589e2 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 14:06:43 -0500 Subject: [PATCH 21/38] saving... --- benchmarks/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 2f7336b9..4e22c9eb 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -3,15 +3,15 @@ include(FetchContent) FetchContent_Declare( counters GIT_REPOSITORY https://github.com/lemire/counters.git - GIT_TAG v1.0.1 + GIT_TAG v1.0.4 ) FetchContent_MakeAvailable(counters) add_executable(realbenchmark benchmark.cpp) -target_link_libraries(realbenchmark PRIVATE Counters::counters) +target_link_libraries(realbenchmark PRIVATE counters::counters) add_executable(bench_ip bench_ip.cpp) -target_link_libraries(bench_ip PRIVATE Counters::counters) +target_link_libraries(bench_ip PRIVATE counters::counters) set_property( TARGET realbenchmark From c54e4a7ababec68b188eee160974048f099492bc Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 16:05:51 -0500 Subject: [PATCH 22/38] updating the deps --- benchmarks/CMakeLists.txt | 2 +- benchmarks/bench_ip.cpp | 35 +++++------------------------------ benchmarks/benchmark.cpp | 10 +++++----- 3 files changed, 11 insertions(+), 36 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 4e22c9eb..cfa48b82 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -3,7 +3,7 @@ include(FetchContent) FetchContent_Declare( counters GIT_REPOSITORY https://github.com/lemire/counters.git - GIT_TAG v1.0.4 + GIT_TAG v2.0.0 ) FetchContent_MakeAvailable(counters) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 33b2c646..782358e2 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -1,4 +1,4 @@ -#include "counters/event_counter.h" +#include "counters/bench.h" #include "fast_float/fast_float.h" #include #include @@ -8,39 +8,14 @@ #include #include #include -event_collector collector; - -template -event_aggregate bench(const function_type &function, size_t min_repeat = 10, - size_t min_time_ns = 1000000000, - size_t max_repeat = 1000000) { - event_aggregate aggregate{}; - size_t N = min_repeat; - if (N == 0) { - N = 1; - } - for (size_t i = 0; i < N; i++) { - std::atomic_thread_fence(std::memory_order_acquire); - collector.start(); - function(); - std::atomic_thread_fence(std::memory_order_release); - event_count allocate_count = collector.end(); - aggregate << allocate_count; - if ((i + 1 == N) && (aggregate.total_elapsed_ns() < min_time_ns) && - (N < max_repeat)) { - N *= 10; - } - } - return aggregate; -} void pretty_print(size_t volume, size_t bytes, std::string name, - event_aggregate agg) { + counters::event_aggregate agg) { printf("%-40s : ", name.c_str()); printf(" %5.2f GB/s ", bytes / agg.fastest_elapsed_ns()); printf(" %5.1f Ma/s ", volume * 1000.0 / agg.fastest_elapsed_ns()); printf(" %5.2f ns/d ", agg.fastest_elapsed_ns() / volume); - if (collector.has_events()) { + if (counters::event_collector().has_events()) { printf(" %5.2f GHz ", agg.fastest_cycles() / agg.fastest_elapsed_ns()); printf(" %5.2f c/d ", agg.fastest_cycles() / volume); printf(" %5.2f i/d ", agg.fastest_instructions() / volume); @@ -158,7 +133,7 @@ int main() { uint32_t sink = 0; - pretty_print(volume, bytes, "parse_ip_std_fromchars", bench([&]() { + pretty_print(volume, bytes, "parse_ip_std_fromchars", counters::bench([&]() { const char *p = buf.data(); const char *pend = buf.data() + bytes; uint32_t sum = 0; @@ -171,7 +146,7 @@ int main() { sink += sum; })); - pretty_print(volume, bytes, "parse_ip_fastfloat", bench([&]() { + pretty_print(volume, bytes, "parse_ip_fastfloat", counters::bench([&]() { const char *p = buf.data(); const char *pend = buf.data() + bytes; uint32_t sum = 0; diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index 2cfe15da..d90038ed 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -50,14 +50,14 @@ double findmax_fastfloat32(std::vector> &s) { return answer; } -event_collector collector{}; +counters::event_collector collector{}; #ifdef USING_COUNTERS template -std::vector +std::vector time_it_ns(std::vector> &lines, T const &function, size_t repeat) { - std::vector aggregate; + std::vector aggregate; bool printed_bug = false; for (size_t i = 0; i < repeat; i++) { collector.start(); @@ -72,7 +72,7 @@ time_it_ns(std::vector> &lines, T const &function, } void pretty_print(double volume, size_t number_of_floats, std::string name, - std::vector events) { + std::vector events) { double volumeMB = volume / (1024. * 1024.); double average_ns{0}; double min_ns{DBL_MAX}; @@ -84,7 +84,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name, double branches_avg{0}; double branch_misses_min{0}; double branch_misses_avg{0}; - for (event_count e : events) { + for (counters::event_count e : events) { double ns = e.elapsed_ns(); average_ns += ns; min_ns = min_ns < ns ? min_ns : ns; From 62ed60e95f338dfa625490f4b9575a480e08f154 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 21:08:59 -0500 Subject: [PATCH 23/38] simplify the benchmark --- benchmarks/CMakeLists.txt | 2 +- benchmarks/bench_ip.cpp | 135 ++++++++++++++++++++++++-------------- 2 files changed, 85 insertions(+), 52 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index cfa48b82..791cf612 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -3,7 +3,7 @@ include(FetchContent) FetchContent_Declare( counters GIT_REPOSITORY https://github.com/lemire/counters.git - GIT_TAG v2.0.0 + GIT_TAG v2.1.0 ) FetchContent_MakeAvailable(counters) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 782358e2..771ebb8a 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -26,6 +26,28 @@ void pretty_print(size_t volume, size_t bytes, std::string name, printf("\n"); } +const char *seek_ip_end(const char *p, const char *pend) { + const char *current = p; + size_t count = 0; + for (; current != pend; ++current) { + if (*current == '.') { + count++; + if (count == 3) { + ++current; + break; + } + } + } + while (current != pend) { + if (*current <= '9' && *current >= '0') { + ++current; + } else { + break; + } + } + return current; +} + int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { if (p == pend) return 0; @@ -39,8 +61,9 @@ int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { static inline int parse_u8_fromchars(const char *&p, const char *pend, uint8_t *out) { - if (p == pend) + if (p == pend) { return 0; + } auto r = std::from_chars(p, pend, *out); if (r.ec == std::errc()) { p = r.ptr; @@ -50,26 +73,35 @@ static inline int parse_u8_fromchars(const char *&p, const char *pend, } template -static inline int parse_ip_line(const char *&p, const char *pend, uint32_t &sum, - Parser parse_uint8) { - uint8_t o = 0; - for (int i = 0; i < 4; ++i) { - if (!parse_uint8(p, pend, &o)) - return 0; - sum += o; - if (i != 3) { - if (p == pend || *p != '.') - return 0; - ++p; - } +std::pair simple_parse_ip_line(const char *p, const char *pend, + Parser parse_uint8) { + uint8_t v1; + if (!parse_uint8(p, pend, &v1)) { + return {false, 0}; + } + if (p == pend || *p++ != '.') { + return {false, 0}; + } + uint8_t v2; + if (!parse_uint8(p, pend, &v2)) { + return {false, 0}; + } + if (p == pend || *p++ != '.') { + return {false, 0}; + } + uint8_t v3; + if (!parse_uint8(p, pend, &v3)) { + return {false, 0}; } - // consume optional '\r' - if (p != pend && *p == '\r') - ++p; - // expect '\n' or end - if (p != pend && *p == '\n') - ++p; - return 1; + if (p == pend || *p++ != '.') { + return {false, 0}; + } + uint8_t v4; + if (!parse_uint8(p, pend, &v4)) { + return {false, 0}; + } + return {true, (uint32_t(v1) << 24) | (uint32_t(v2) << 16) | + (uint32_t(v3) << 8) | uint32_t(v4)}; } static std::string make_ip_line(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { @@ -87,19 +119,22 @@ static std::string make_ip_line(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { } int main() { - constexpr size_t N = 500000; + constexpr size_t N = 15000; std::mt19937 rng(1234); std::uniform_int_distribution dist(0, 255); std::string buf; - buf.reserve(N * 16); + constexpr size_t ip_size = 16; + buf.reserve(N * ip_size); for (size_t i = 0; i < N; ++i) { uint8_t a = (uint8_t)dist(rng); uint8_t b = (uint8_t)dist(rng); uint8_t c = (uint8_t)dist(rng); uint8_t d = (uint8_t)dist(rng); - buf += make_ip_line(a, b, c, d); + std::string ip_line = make_ip_line(a, b, c, d); + ip_line.resize(ip_size, ' '); // pad to fixed size + buf.append(ip_line); } // sentinel to allow 4-byte loads at end @@ -108,30 +143,21 @@ int main() { const size_t bytes = buf.size() - 4; // exclude sentinel from throughput const size_t volume = N; - // validate correctness - { - const char *start = buf.data(); - const char *end = buf.data() + bytes; - const char *p = start; - const char *pend = end; - uint32_t sum = 0; - for (size_t i = 0; i < N; ++i) { - int ok = parse_ip_line(p, pend, sum, parse_u8_fromchars); - if (!ok) { - std::fprintf(stderr, "fromchars parse failed at line %zu\n", i); - std::abort(); - } - p = start; - pend = end; - ok = parse_ip_line(p, pend, sum, parse_u8_fastfloat); - if (!ok) { - std::fprintf(stderr, "fastswar parse failed at line %zu\n", i); - std::abort(); - } - } - } + volatile uint32_t sink = 0; - uint32_t sink = 0; + pretty_print(volume, bytes, "just_seek_ip_end (no parse)", + counters::bench([&]() { + const char *p = buf.data(); + const char *pend = buf.data() + bytes; + uint32_t sum = 0; + int ok = 0; + for (size_t i = 0; i < N; ++i) { + const char *q = seek_ip_end(p, pend); + sum += (uint32_t)(q - p); + p += ip_size; + } + sink += sum; + })); pretty_print(volume, bytes, "parse_ip_std_fromchars", counters::bench([&]() { const char *p = buf.data(); @@ -139,9 +165,13 @@ int main() { uint32_t sum = 0; int ok = 0; for (size_t i = 0; i < N; ++i) { - ok = parse_ip_line(p, pend, sum, parse_u8_fromchars); - if (!ok) + auto [ok, ip] = + simple_parse_ip_line(p, pend, parse_u8_fromchars); + sum += ip; + if (!ok) { std::abort(); + } + p += ip_size; } sink += sum; })); @@ -152,13 +182,16 @@ int main() { uint32_t sum = 0; int ok = 0; for (size_t i = 0; i < N; ++i) { - ok = parse_ip_line(p, pend, sum, parse_u8_fastfloat); - if (!ok) + auto [ok, ip] = + simple_parse_ip_line(p, pend, parse_u8_fastfloat); + sum += ip; + if (!ok) { std::abort(); + } + p += ip_size; } sink += sum; })); - std::printf("sink=%u\n", sink); return EXIT_SUCCESS; } \ No newline at end of file From 55723db171209640190f27051a5d34b7fc4473e8 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 22:01:50 -0500 Subject: [PATCH 24/38] add a memcpy baseline --- benchmarks/bench_ip.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 771ebb8a..36993b83 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -13,12 +13,12 @@ void pretty_print(size_t volume, size_t bytes, std::string name, counters::event_aggregate agg) { printf("%-40s : ", name.c_str()); printf(" %5.2f GB/s ", bytes / agg.fastest_elapsed_ns()); - printf(" %5.1f Ma/s ", volume * 1000.0 / agg.fastest_elapsed_ns()); - printf(" %5.2f ns/d ", agg.fastest_elapsed_ns() / volume); + printf(" %5.1f Mip/s ", volume * 1000.0 / agg.fastest_elapsed_ns()); + printf(" %5.2f ns/ip ", agg.fastest_elapsed_ns() / volume); if (counters::event_collector().has_events()) { printf(" %5.2f GHz ", agg.fastest_cycles() / agg.fastest_elapsed_ns()); - printf(" %5.2f c/d ", agg.fastest_cycles() / volume); - printf(" %5.2f i/d ", agg.fastest_instructions() / volume); + printf(" %5.2f c/ip ", agg.fastest_cycles() / volume); + printf(" %5.2f i/ip ", agg.fastest_instructions() / volume); printf(" %5.2f c/b ", agg.fastest_cycles() / bytes); printf(" %5.2f i/b ", agg.fastest_instructions() / bytes); printf(" %5.2f i/c ", agg.fastest_instructions() / agg.fastest_cycles()); @@ -144,6 +144,13 @@ int main() { const size_t volume = N; volatile uint32_t sink = 0; + std::string buffer(ip_size * N, ' '); + + pretty_print(volume, bytes, "memcpy baseline", + counters::bench([&]() { + std::memcpy((char *)buffer.data(), buf.data(), bytes); + })); + pretty_print(volume, bytes, "just_seek_ip_end (no parse)", counters::bench([&]() { From b5ae54cb21a7b446fe35c3b41f4a738ed55fcaa2 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 22:18:39 -0500 Subject: [PATCH 25/38] adding a memcpy benchmark and ensure inlining. --- benchmarks/bench_ip.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 36993b83..dbfe7ccd 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -26,7 +26,7 @@ void pretty_print(size_t volume, size_t bytes, std::string name, printf("\n"); } -const char *seek_ip_end(const char *p, const char *pend) { +fastfloat_really_inline const char *seek_ip_end(const char *p, const char *pend) { const char *current = p; size_t count = 0; for (; current != pend; ++current) { @@ -48,7 +48,7 @@ const char *seek_ip_end(const char *p, const char *pend) { return current; } -int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { +fastfloat_really_inline int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { if (p == pend) return 0; auto r = fast_float::from_chars(p, pend, *out); @@ -59,7 +59,7 @@ int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { return 0; } -static inline int parse_u8_fromchars(const char *&p, const char *pend, +fastfloat_really_inline int parse_u8_fromchars(const char *&p, const char *pend, uint8_t *out) { if (p == pend) { return 0; @@ -73,7 +73,7 @@ static inline int parse_u8_fromchars(const char *&p, const char *pend, } template -std::pair simple_parse_ip_line(const char *p, const char *pend, +fastfloat_really_inline std::pair simple_parse_ip_line(const char *p, const char *pend, Parser parse_uint8) { uint8_t v1; if (!parse_uint8(p, pend, &v1)) { From bfa7bccea197ec523f3a9966f8e5d5615dbcf490 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 23 Dec 2025 11:46:17 -0500 Subject: [PATCH 26/38] lint --- benchmarks/bench_ip.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index dbfe7ccd..90b07fca 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -26,7 +26,8 @@ void pretty_print(size_t volume, size_t bytes, std::string name, printf("\n"); } -fastfloat_really_inline const char *seek_ip_end(const char *p, const char *pend) { +fastfloat_really_inline const char *seek_ip_end(const char *p, + const char *pend) { const char *current = p; size_t count = 0; for (; current != pend; ++current) { @@ -48,7 +49,8 @@ fastfloat_really_inline const char *seek_ip_end(const char *p, const char *pend) return current; } -fastfloat_really_inline int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { +fastfloat_really_inline int parse_u8_fastfloat(const char *&p, const char *pend, + uint8_t *out) { if (p == pend) return 0; auto r = fast_float::from_chars(p, pend, *out); @@ -60,7 +62,7 @@ fastfloat_really_inline int parse_u8_fastfloat(const char *&p, const char *pend, } fastfloat_really_inline int parse_u8_fromchars(const char *&p, const char *pend, - uint8_t *out) { + uint8_t *out) { if (p == pend) { return 0; } @@ -73,8 +75,8 @@ fastfloat_really_inline int parse_u8_fromchars(const char *&p, const char *pend, } template -fastfloat_really_inline std::pair simple_parse_ip_line(const char *p, const char *pend, - Parser parse_uint8) { +fastfloat_really_inline std::pair +simple_parse_ip_line(const char *p, const char *pend, Parser parse_uint8) { uint8_t v1; if (!parse_uint8(p, pend, &v1)) { return {false, 0}; @@ -146,12 +148,10 @@ int main() { volatile uint32_t sink = 0; std::string buffer(ip_size * N, ' '); - pretty_print(volume, bytes, "memcpy baseline", - counters::bench([&]() { - std::memcpy((char *)buffer.data(), buf.data(), bytes); + pretty_print(volume, bytes, "memcpy baseline", counters::bench([&]() { + std::memcpy((char *)buffer.data(), buf.data(), bytes); })); - pretty_print(volume, bytes, "just_seek_ip_end (no parse)", counters::bench([&]() { const char *p = buf.data(); From 75d01f02e2f520b3f1d9037085023306d857771c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 23 Dec 2025 12:07:15 -0500 Subject: [PATCH 27/38] display the inner count (check) --- benchmarks/bench_ip.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 90b07fca..761ebc11 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -11,6 +11,9 @@ void pretty_print(size_t volume, size_t bytes, std::string name, counters::event_aggregate agg) { + if (agg.inner_count > 1) { + printf("# (inner count: %d)\n", agg.inner_count); + } printf("%-40s : ", name.c_str()); printf(" %5.2f GB/s ", bytes / agg.fastest_elapsed_ns()); printf(" %5.1f Mip/s ", volume * 1000.0 / agg.fastest_elapsed_ns()); From 0b11d0a70f583e6e82febd3e27e308506b4c1dce Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 23 Dec 2025 20:18:26 -0500 Subject: [PATCH 28/38] even simpler bench_ip function --- benchmarks/bench_ip.cpp | 82 +++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 53 deletions(-) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 761ebc11..900c6c87 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -52,61 +52,37 @@ fastfloat_really_inline const char *seek_ip_end(const char *p, return current; } -fastfloat_really_inline int parse_u8_fastfloat(const char *&p, const char *pend, - uint8_t *out) { - if (p == pend) - return 0; - auto r = fast_float::from_chars(p, pend, *out); - if (r.ec == std::errc()) { - p = r.ptr; - return 1; - } - return 0; -} +enum class parse_method { standard, fast_float }; -fastfloat_really_inline int parse_u8_fromchars(const char *&p, const char *pend, - uint8_t *out) { - if (p == pend) { - return 0; - } - auto r = std::from_chars(p, pend, *out); - if (r.ec == std::errc()) { - p = r.ptr; - return 1; - } - return 0; -} -template +template fastfloat_really_inline std::pair -simple_parse_ip_line(const char *p, const char *pend, Parser parse_uint8) { - uint8_t v1; - if (!parse_uint8(p, pend, &v1)) { - return {false, 0}; - } - if (p == pend || *p++ != '.') { - return {false, 0}; - } - uint8_t v2; - if (!parse_uint8(p, pend, &v2)) { - return {false, 0}; - } - if (p == pend || *p++ != '.') { - return {false, 0}; - } - uint8_t v3; - if (!parse_uint8(p, pend, &v3)) { - return {false, 0}; - } - if (p == pend || *p++ != '.') { - return {false, 0}; - } - uint8_t v4; - if (!parse_uint8(p, pend, &v4)) { - return {false, 0}; +simple_parse_ip_line(const char *p, const char *pend) { + const char *current = p; + uint32_t ip = 0; + for (int i = 0; i < 4; ++i) { + uint8_t value; + if constexpr (use_standard == parse_method::standard) { + auto r = std::from_chars(current, pend, value); + if (r.ec != std::errc()) { + return {false, 0}; + } + current = r.ptr; + } else if constexpr (use_standard == parse_method::fast_float) { + auto r = fast_float::from_chars(current, pend, value); + if (r.ec != std::errc()) { + return {false, 0}; + } + current = r.ptr; + } + ip = (ip << 8) | value; + if (i < 3) { + if (current == pend || *current++ != '.') { + return {false, 0}; + } + } } - return {true, (uint32_t(v1) << 24) | (uint32_t(v2) << 16) | - (uint32_t(v3) << 8) | uint32_t(v4)}; + return {true, ip}; } static std::string make_ip_line(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { @@ -176,7 +152,7 @@ int main() { int ok = 0; for (size_t i = 0; i < N; ++i) { auto [ok, ip] = - simple_parse_ip_line(p, pend, parse_u8_fromchars); + simple_parse_ip_line(p, pend); sum += ip; if (!ok) { std::abort(); @@ -193,7 +169,7 @@ int main() { int ok = 0; for (size_t i = 0; i < N; ++i) { auto [ok, ip] = - simple_parse_ip_line(p, pend, parse_u8_fastfloat); + simple_parse_ip_line(p, pend); sum += ip; if (!ok) { std::abort(); From f9ddc75c69d3d935ec700ffa9116d11215df2cf7 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 23 Dec 2025 20:20:00 -0500 Subject: [PATCH 29/38] removing space --- benchmarks/bench_ip.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 900c6c87..825a6b0a 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -54,7 +54,6 @@ fastfloat_really_inline const char *seek_ip_end(const char *p, enum class parse_method { standard, fast_float }; - template fastfloat_really_inline std::pair simple_parse_ip_line(const char *p, const char *pend) { From a6a229a325452ae927e9b2e16bf01b3d69e03703 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 24 Dec 2025 13:59:12 -0500 Subject: [PATCH 30/38] bumping tag --- benchmarks/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 791cf612..4ee57895 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -3,7 +3,7 @@ include(FetchContent) FetchContent_Declare( counters GIT_REPOSITORY https://github.com/lemire/counters.git - GIT_TAG v2.1.0 + GIT_TAG v2.2.0 ) FetchContent_MakeAvailable(counters) From fce0ab61df6752c26cd67184df429b997ccbf11b Mon Sep 17 00:00:00 2001 From: Shikhar Date: Tue, 23 Dec 2025 05:59:26 +0530 Subject: [PATCH 31/38] uint8_t parsing Signed-off-by: Shikhar --- include/fast_float/ascii_number.h | 86 +++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 5683cd47..030fd077 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -509,6 +509,92 @@ parse_int_string(UC const *p, UC const *pend, T &value, UC const *const start_digits = p; + if constexpr (std::is_same_v) { + const size_t len = (size_t)(pend - p); + if (len == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + } else { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + } + return answer; + } + + union { + uint8_t as_str[4]; + uint32_t as_int; + } digits; + + if (cpp20_and_in_constexpr()) { + digits.as_int = 0; + for (size_t j = 0; j < 4 && j < len; ++j) { + digits.as_str[j] = static_cast(p[j]); + } + } else if (len >= 4) { + memcpy(&digits.as_int, p, 4); + } else { + uint32_t b0 = static_cast(p[0]); + uint32_t b1 = (len > 1) ? static_cast(p[1]) : 0xFFu; + uint32_t b2 = (len > 2) ? static_cast(p[2]) : 0xFFu; + uint32_t b3 = 0xFFu; +#if FASTFLOAT_IS_BIG_ENDIAN + digits.as_int = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3; +#else + digits.as_int = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); +#endif + } + + uint32_t magic = + ((digits.as_int + 0x46464646u) | (digits.as_int - 0x30303030u)) & + 0x80808080u; + uint32_t tz = (uint32_t)std::__countr_zero(magic); // 7, 15, 23, 31, or 32 + uint32_t nd = (tz == 32) ? 4 : (tz >> 3); + nd = (uint32_t)std::min((size_t)nd, len); + if (nd == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + return answer; + } + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + if (nd > 3) { + const UC *q = p + nd; + size_t rem = len - nd; + while (rem) { + if (*q < UC('0') || *q > UC('9')) + break; + ++q; + --rem; + } + answer.ec = std::errc::result_out_of_range; + answer.ptr = q; + return answer; + } + + digits.as_int ^= 0x30303030u; + digits.as_int <<= ((4 - nd) * 8); + + uint32_t check = ((digits.as_int >> 24) & 0xff) | + ((digits.as_int >> 8) & 0xff00) | + ((digits.as_int << 8) & 0xff0000); + if (check > 0x00020505) { + answer.ec = std::errc::result_out_of_range; + answer.ptr = p + nd; + return answer; + } + value = (uint8_t)((0x640a01 * digits.as_int) >> 24); + answer.ec = std::errc(); + answer.ptr = p + nd; + return answer; + } + uint64_t i = 0; if (base == 10) { loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible From fdb0eddf9912d687ec196bb488c5f439be6411fd Mon Sep 17 00:00:00 2001 From: Shikhar Date: Tue, 23 Dec 2025 06:13:17 +0530 Subject: [PATCH 32/38] c++14 constexpr Signed-off-by: Shikhar --- include/fast_float/ascii_number.h | 4 +-- include/fast_float/float_common.h | 46 +++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 030fd077..8cd7980a 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -509,7 +509,7 @@ parse_int_string(UC const *p, UC const *pend, T &value, UC const *const start_digits = p; - if constexpr (std::is_same_v) { + FASTFLOAT_IF_CONSTEXPR17(std::is_same::value) { const size_t len = (size_t)(pend - p); if (len == 0) { if (has_leading_zeros) { @@ -550,7 +550,7 @@ parse_int_string(UC const *p, UC const *pend, T &value, uint32_t magic = ((digits.as_int + 0x46464646u) | (digits.as_int - 0x30303030u)) & 0x80808080u; - uint32_t tz = (uint32_t)std::__countr_zero(magic); // 7, 15, 23, 31, or 32 + uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32 uint32_t nd = (tz == 32) ? 4 : (tz >> 3); nd = (uint32_t)std::min((size_t)nd, len); if (nd == 0) { diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 18484a66..ab95e1d5 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -362,6 +362,52 @@ leading_zeroes(uint64_t input_num) { #endif } +/* Helper C++14 constexpr generic implementation of countr_zero for 32-bit */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int +countr_zero_generic_32(uint32_t input_num) { + if (input_num == 0) { + return 32; + } + int last_bit = 0; + if (!(input_num & 0x0000FFFF)) { + input_num >>= 16; + last_bit |= 16; + } + if (!(input_num & 0x00FF)) { + input_num >>= 8; + last_bit |= 8; + } + if (!(input_num & 0x0F)) { + input_num >>= 4; + last_bit |= 4; + } + if (!(input_num & 0x3)) { + input_num >>= 2; + last_bit |= 2; + } + if (!(input_num & 0x1)) { + last_bit |= 1; + } + return last_bit; +} + +/* count trailing zeroes for 32-bit integers */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int +countr_zero_32(uint32_t input_num) { + if (cpp20_and_in_constexpr()) { + return countr_zero_generic_32(input_num); + } +#ifdef FASTFLOAT_VISUAL_STUDIO + unsigned long trailing_zero = 0; + if (_BitScanForward(&trailing_zero, input_num)) { + return (int)trailing_zero; + } + return 32; +#else + return input_num == 0 ? 32 : __builtin_ctz(input_num); +#endif +} + // slow emulation routine for 32-bit fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; From 780c34135951c059e652808c190b607a6a43a5a3 Mon Sep 17 00:00:00 2001 From: Shikhar Date: Tue, 23 Dec 2025 06:17:11 +0530 Subject: [PATCH 33/38] fix macro Signed-off-by: Shikhar --- include/fast_float/ascii_number.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 8cd7980a..6208ef21 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -509,7 +509,7 @@ parse_int_string(UC const *p, UC const *pend, T &value, UC const *const start_digits = p; - FASTFLOAT_IF_CONSTEXPR17(std::is_same::value) { + FASTFLOAT_IF_CONSTEXPR17((std::is_same::value)) { const size_t len = (size_t)(pend - p); if (len == 0) { if (has_leading_zeros) { From 120bdfd713e34db69f2a235ff5d2a22e74952a93 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 24 Dec 2025 15:43:43 -0500 Subject: [PATCH 34/38] adding some ipv4 test --- tests/CMakeLists.txt | 1 + tests/ipv4_test.cpp | 92 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 tests/ipv4_test.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d8ed6f4d..a053581c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -94,6 +94,7 @@ endif() option(FASTFLOAT_EXHAUSTIVE "Exhaustive tests" OFF) if (FASTFLOAT_EXHAUSTIVE) + fast_float_add_cpp_test(ipv4_test) fast_float_add_cpp_test(short_random_string) fast_float_add_cpp_test(exhaustive32_midpoint) fast_float_add_cpp_test(random_string) diff --git a/tests/ipv4_test.cpp b/tests/ipv4_test.cpp new file mode 100644 index 00000000..82ddf9c5 --- /dev/null +++ b/tests/ipv4_test.cpp @@ -0,0 +1,92 @@ + +#include +#include +#include +#include +#include "fast_float/fast_float.h" + +char* uint8_to_chars_manual(char* ptr, uint8_t value) { + if (value == 0) { + *ptr++ = '0'; + return ptr; + } + char* start = ptr; + while (value > 0) { + *ptr++ = '0' + (value % 10); + value /= 10; + } + // Reverse the digits written so far + std::reverse(start, ptr); + return ptr; +} + +void uint32_to_ipv4_string(uint32_t ip, char* buffer) { + uint8_t octets[4] = { + static_cast(ip >> 24), + static_cast(ip >> 16), + static_cast(ip >> 8), + static_cast(ip) + }; + + char* ptr = buffer; + + for (int i = 0; i < 4; ++i) { + ptr = uint8_to_chars_manual(ptr, octets[i]); + + if (i < 3) { + *ptr++ = '.'; + } + } + *ptr = '\0'; +} + +fastfloat_really_inline uint32_t ipv4_string_to_uint32(const char* str, const char* end) { + uint32_t ip = 0; + const char* current = str; + + for (int i = 0; i < 4; ++i) { + uint8_t value; + auto r = fast_float::from_chars(current, end, value); + if (r.ec != std::errc()) { + throw std::invalid_argument("Invalid IP address format"); + } + current = r.ptr; + ip = (ip << 8) | value; + + if (i < 3) { + if (current == end || *current++ != '.') { + throw std::invalid_argument("Invalid IP address format"); + } + } + } + return ip; +} + +bool test_all_ipv4_conversions() { + std::cout << "Testing all IPv4 conversions... 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, ..." << std::endl; + char buffer[16]; + for (uint64_t ip = 0; ip <= 0xFFFFFFFF; ip+=1000) { + if(ip % 10000000 == 0) { + std::cout << "." << std::flush; + } + uint32_to_ipv4_string(static_cast(ip), buffer); + const char* end = buffer + strlen(buffer); + uint32_t parsed_ip = ipv4_string_to_uint32(buffer, end); + if (parsed_ip != ip) { + std::cerr << "Mismatch: original " << ip << ", parsed " << parsed_ip << std::endl; + return false; + } + } + std::cout << std::endl; + return true; +} + +int main() { + if (test_all_ipv4_conversions()) { + std::cout << "All IPv4 conversions passed!" << std::endl; + return EXIT_SUCCESS; + } else { + std::cerr << "IPv4 conversion test failed!" << std::endl; + return EXIT_FAILURE; + } +} \ No newline at end of file From 97cb3ec28dd29ef28d6b675b47fb977ade9b2c49 Mon Sep 17 00:00:00 2001 From: Shikhar Date: Thu, 25 Dec 2025 03:06:22 +0530 Subject: [PATCH 35/38] lint Signed-off-by: Shikhar --- tests/ipv4_test.cpp | 101 ++++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/tests/ipv4_test.cpp b/tests/ipv4_test.cpp index 82ddf9c5..f3055dcb 100644 --- a/tests/ipv4_test.cpp +++ b/tests/ipv4_test.cpp @@ -5,44 +5,42 @@ #include #include "fast_float/fast_float.h" -char* uint8_to_chars_manual(char* ptr, uint8_t value) { - if (value == 0) { - *ptr++ = '0'; - return ptr; - } - char* start = ptr; - while (value > 0) { - *ptr++ = '0' + (value % 10); - value /= 10; - } - // Reverse the digits written so far - std::reverse(start, ptr); +char *uint8_to_chars_manual(char *ptr, uint8_t value) { + if (value == 0) { + *ptr++ = '0'; return ptr; + } + char *start = ptr; + while (value > 0) { + *ptr++ = '0' + (value % 10); + value /= 10; + } + // Reverse the digits written so far + std::reverse(start, ptr); + return ptr; } -void uint32_to_ipv4_string(uint32_t ip, char* buffer) { - uint8_t octets[4] = { - static_cast(ip >> 24), - static_cast(ip >> 16), - static_cast(ip >> 8), - static_cast(ip) - }; +void uint32_to_ipv4_string(uint32_t ip, char *buffer) { + uint8_t octets[4] = {static_cast(ip >> 24), + static_cast(ip >> 16), + static_cast(ip >> 8), static_cast(ip)}; - char* ptr = buffer; + char *ptr = buffer; - for (int i = 0; i < 4; ++i) { - ptr = uint8_to_chars_manual(ptr, octets[i]); + for (int i = 0; i < 4; ++i) { + ptr = uint8_to_chars_manual(ptr, octets[i]); - if (i < 3) { - *ptr++ = '.'; - } + if (i < 3) { + *ptr++ = '.'; } - *ptr = '\0'; + } + *ptr = '\0'; } -fastfloat_really_inline uint32_t ipv4_string_to_uint32(const char* str, const char* end) { +fastfloat_really_inline uint32_t ipv4_string_to_uint32(const char *str, + const char *end) { uint32_t ip = 0; - const char* current = str; + const char *current = str; for (int i = 0; i < 4; ++i) { uint8_t value; @@ -63,30 +61,33 @@ fastfloat_really_inline uint32_t ipv4_string_to_uint32(const char* str, const ch } bool test_all_ipv4_conversions() { - std::cout << "Testing all IPv4 conversions... 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, ..." << std::endl; - char buffer[16]; - for (uint64_t ip = 0; ip <= 0xFFFFFFFF; ip+=1000) { - if(ip % 10000000 == 0) { - std::cout << "." << std::flush; - } - uint32_to_ipv4_string(static_cast(ip), buffer); - const char* end = buffer + strlen(buffer); - uint32_t parsed_ip = ipv4_string_to_uint32(buffer, end); - if (parsed_ip != ip) { - std::cerr << "Mismatch: original " << ip << ", parsed " << parsed_ip << std::endl; - return false; - } + std::cout << "Testing all IPv4 conversions... 0, 1000, 2000, 3000, 4000, " + "5000, 6000, 7000, 8000, 9000, ..." + << std::endl; + char buffer[16]; + for (uint64_t ip = 0; ip <= 0xFFFFFFFF; ip += 1000) { + if (ip % 10000000 == 0) { + std::cout << "." << std::flush; + } + uint32_to_ipv4_string(static_cast(ip), buffer); + const char *end = buffer + strlen(buffer); + uint32_t parsed_ip = ipv4_string_to_uint32(buffer, end); + if (parsed_ip != ip) { + std::cerr << "Mismatch: original " << ip << ", parsed " << parsed_ip + << std::endl; + return false; } - std::cout << std::endl; - return true; + } + std::cout << std::endl; + return true; } int main() { - if (test_all_ipv4_conversions()) { - std::cout << "All IPv4 conversions passed!" << std::endl; - return EXIT_SUCCESS; - } else { - std::cerr << "IPv4 conversion test failed!" << std::endl; - return EXIT_FAILURE; - } + if (test_all_ipv4_conversions()) { + std::cout << "All IPv4 conversions passed!" << std::endl; + return EXIT_SUCCESS; + } else { + std::cerr << "IPv4 conversion test failed!" << std::endl; + return EXIT_FAILURE; + } } \ No newline at end of file From cb813a7765162577bd67af6dec0140a86ea18ad9 Mon Sep 17 00:00:00 2001 From: Pavel Novikov Date: Sat, 27 Dec 2025 00:04:28 +0300 Subject: [PATCH 36/38] fixed UB --- include/fast_float/ascii_number.h | 40 ++++++++++++++++--------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 6208ef21..4e1ce3a1 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -523,33 +523,36 @@ parse_int_string(UC const *p, UC const *pend, T &value, return answer; } - union { - uint8_t as_str[4]; - uint32_t as_int; - } digits; + uint32_t digits; - if (cpp20_and_in_constexpr()) { - digits.as_int = 0; +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST + if (std::is_constant_evaluated()) { + uint8_t str[4]{}; for (size_t j = 0; j < 4 && j < len; ++j) { - digits.as_str[j] = static_cast(p[j]); + str[j] = static_cast(p[j]); } - } else if (len >= 4) { - memcpy(&digits.as_int, p, 4); + digits = std::bit_cast(str); + } +#else + if (false) { + } +#endif + else if (len >= 4) { + ::memcpy(&digits, p, 4); } else { uint32_t b0 = static_cast(p[0]); uint32_t b1 = (len > 1) ? static_cast(p[1]) : 0xFFu; uint32_t b2 = (len > 2) ? static_cast(p[2]) : 0xFFu; uint32_t b3 = 0xFFu; #if FASTFLOAT_IS_BIG_ENDIAN - digits.as_int = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3; + digits = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3; #else - digits.as_int = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + digits = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); #endif } uint32_t magic = - ((digits.as_int + 0x46464646u) | (digits.as_int - 0x30303030u)) & - 0x80808080u; + ((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u; uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32 uint32_t nd = (tz == 32) ? 4 : (tz >> 3); nd = (uint32_t)std::min((size_t)nd, len); @@ -578,18 +581,17 @@ parse_int_string(UC const *p, UC const *pend, T &value, return answer; } - digits.as_int ^= 0x30303030u; - digits.as_int <<= ((4 - nd) * 8); + digits ^= 0x30303030u; + digits <<= ((4 - nd) * 8); - uint32_t check = ((digits.as_int >> 24) & 0xff) | - ((digits.as_int >> 8) & 0xff00) | - ((digits.as_int << 8) & 0xff0000); + uint32_t check = ((digits >> 24) & 0xff) | ((digits >> 8) & 0xff00) | + ((digits << 8) & 0xff0000); if (check > 0x00020505) { answer.ec = std::errc::result_out_of_range; answer.ptr = p + nd; return answer; } - value = (uint8_t)((0x640a01 * digits.as_int) >> 24); + value = (uint8_t)((0x640a01 * digits) >> 24); answer.ec = std::errc(); answer.ptr = p + nd; return answer; From b4d26ec866fe688af148d128c5ef8b8e98823a41 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sat, 27 Dec 2025 12:06:36 -0500 Subject: [PATCH 37/38] v8.1.1 --- CMakeLists.txt | 2 +- README.md | 6 +++--- include/fast_float/float_common.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 645fce7a..99c322b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.14) -project(fast_float VERSION 8.1.0 LANGUAGES CXX) +project(fast_float VERSION 8.1.1 LANGUAGES CXX) set(FASTFLOAT_CXX_STANDARD 11 CACHE STRING "the C++ standard to use for fastfloat") set(CMAKE_CXX_STANDARD ${FASTFLOAT_CXX_STANDARD}) option(FASTFLOAT_TEST "Enable tests" OFF) diff --git a/README.md b/README.md index b039e634..f9a52647 100644 --- a/README.md +++ b/README.md @@ -533,7 +533,7 @@ sufficiently recent version of CMake (3.11 or better at least): FetchContent_Declare( fast_float GIT_REPOSITORY https://github.com/fastfloat/fast_float.git - GIT_TAG tags/v8.1.0 + GIT_TAG tags/v8.1.1 GIT_SHALLOW TRUE) FetchContent_MakeAvailable(fast_float) @@ -549,7 +549,7 @@ You may also use [CPM](https://github.com/cpm-cmake/CPM.cmake), like so: CPMAddPackage( NAME fast_float GITHUB_REPOSITORY "fastfloat/fast_float" - GIT_TAG v8.1.0) + GIT_TAG v8.1.1) ``` ## Using as single header @@ -561,7 +561,7 @@ if desired as described in the command line help. You may directly download automatically generated single-header files: - + ## Benchmarking diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index ab95e1d5..f1c54694 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -17,7 +17,7 @@ #define FASTFLOAT_VERSION_MAJOR 8 #define FASTFLOAT_VERSION_MINOR 1 -#define FASTFLOAT_VERSION_PATCH 0 +#define FASTFLOAT_VERSION_PATCH 1 #define FASTFLOAT_STRINGIZE_IMPL(x) #x #define FASTFLOAT_STRINGIZE(x) FASTFLOAT_STRINGIZE_IMPL(x) From dd77fb5e4c3339725775fd61e51034a5e5a55cbb Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sat, 27 Dec 2025 12:08:58 -0500 Subject: [PATCH 38/38] v8.2.0 --- CMakeLists.txt | 2 +- README.md | 6 +++--- include/fast_float/float_common.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 99c322b2..f1f7a4a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.14) -project(fast_float VERSION 8.1.1 LANGUAGES CXX) +project(fast_float VERSION 8.2.0 LANGUAGES CXX) set(FASTFLOAT_CXX_STANDARD 11 CACHE STRING "the C++ standard to use for fastfloat") set(CMAKE_CXX_STANDARD ${FASTFLOAT_CXX_STANDARD}) option(FASTFLOAT_TEST "Enable tests" OFF) diff --git a/README.md b/README.md index f9a52647..be21066e 100644 --- a/README.md +++ b/README.md @@ -533,7 +533,7 @@ sufficiently recent version of CMake (3.11 or better at least): FetchContent_Declare( fast_float GIT_REPOSITORY https://github.com/fastfloat/fast_float.git - GIT_TAG tags/v8.1.1 + GIT_TAG tags/v8.2.0 GIT_SHALLOW TRUE) FetchContent_MakeAvailable(fast_float) @@ -549,7 +549,7 @@ You may also use [CPM](https://github.com/cpm-cmake/CPM.cmake), like so: CPMAddPackage( NAME fast_float GITHUB_REPOSITORY "fastfloat/fast_float" - GIT_TAG v8.1.1) + GIT_TAG v8.2.0) ``` ## Using as single header @@ -561,7 +561,7 @@ if desired as described in the command line help. You may directly download automatically generated single-header files: - + ## Benchmarking diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index f1c54694..62fe2bf0 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -16,8 +16,8 @@ #include "constexpr_feature_detect.h" #define FASTFLOAT_VERSION_MAJOR 8 -#define FASTFLOAT_VERSION_MINOR 1 -#define FASTFLOAT_VERSION_PATCH 1 +#define FASTFLOAT_VERSION_MINOR 2 +#define FASTFLOAT_VERSION_PATCH 0 #define FASTFLOAT_STRINGIZE_IMPL(x) #x #define FASTFLOAT_STRINGIZE(x) FASTFLOAT_STRINGIZE_IMPL(x)