diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index fac5cee3..bd9e1e6c 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -20,14 +20,14 @@ jobs: fuzz-seconds: 300 output-sarif: true - name: Upload Crash - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 if: failure() && steps.build.outcome == 'success' with: name: artifacts path: ./out/artifacts - name: Upload Sarif if: always() && steps.build.outcome == 'success' - uses: github/codeql-action/upload-sarif@v3 + uses: github/codeql-action/upload-sarif@v4 with: # Path to SARIF file relative to the root of the repository sarif_file: cifuzz-sarif/results.sarif diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml index 1b00f447..399f0c9e 100644 --- a/.github/workflows/emscripten.yml +++ b/.github/workflows/emscripten.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2 - - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 + - uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 - uses: mymindstorm/setup-emsdk@6ab9eb1bda2574c4ddb79809fc9247783eaf9021 # v14 - name: Verify run: emcc -v diff --git a/.github/workflows/risc.yml b/.github/workflows/risc.yml index 68e26cb4..8bc85588 100644 --- a/.github/workflows/risc.yml +++ b/.github/workflows/risc.yml @@ -6,7 +6,7 @@ jobs: build: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install packages run: | sudo apt-get update -q -y diff --git a/CMakeLists.txt b/CMakeLists.txt index 645fce7a..f1f7a4a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.14) -project(fast_float VERSION 8.1.0 LANGUAGES CXX) +project(fast_float VERSION 8.2.0 LANGUAGES CXX) set(FASTFLOAT_CXX_STANDARD 11 CACHE STRING "the C++ standard to use for fastfloat") set(CMAKE_CXX_STANDARD ${FASTFLOAT_CXX_STANDARD}) option(FASTFLOAT_TEST "Enable tests" OFF) diff --git a/README.md b/README.md index 8fdddbc5..be21066e 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,9 @@ requires C++11): from_chars_result from_chars(char const *first, char const *last, float &value, ...); from_chars_result from_chars(char const *first, char const *last, double &value, ...); ``` +If they are available on your system, we also support fixed-width floating-point types such as `std::float64_t`, `std::float32_t`, `std::float16_t`, and `std::bfloat16_t`. -You can also parse integer types: - +You can also parse integer types such as `char`, `short`, `long`, `long long`, `unsigned char`, `unsigned short`, `unsigned long`, `unsigned long long`, `bool` (0/1), `int8_t`, `int16_t`, `int32_t`, `int64_t`, `uint8_t`, `uint16_t`, `uint32_t`, `uint64_t`. ```C++ from_chars_result from_chars(char const *first, char const *last, int &value, ...); from_chars_result from_chars(char const *first, char const *last, unsigned &value, ...); @@ -401,6 +401,23 @@ except `fast_float::integer_times_pow10()` does not report out-of-range errors, underflows to zero or overflows to infinity when the resulting value is out of range. +You can use template overloads to get the result converted to different +supported floating-point types: `float`, `double`, etc. +For example, to get result as `float` use +`fast_float::integer_times_pow10()` specialization: +```C++ +const uint64_t W = 12345678; +const int Q = 23; +const float result = fast_float::integer_times_pow10(W, Q); +std::cout.precision(9); +std::cout << "float: " << W << " * 10^" << Q << " = " << result << " (" + << (result == 12345678e23f ? "==" : "!=") << "expected)\n"; +``` +outputs +``` +float: 12345678 * 10^23 = 1.23456782e+30 (==expected) +``` + Overloads of `fast_float::integer_times_pow10()` are provided for signed and unsigned integer types: `int64_t`, `uint64_t`, etc. @@ -443,7 +460,7 @@ framework](https://github.com/microsoft/LightGBM). Packages ------ -[![Packaging status](https://repology.org/badge/vertical-allrepos/fastfloat.svg)](https://repology.org/project/fastfloat/versions) +[![Packaging status](https://repology.org/badge/vertical-allrepos/fast-float.svg)](https://repology.org/project/fast-float/versions) ## References @@ -516,7 +533,7 @@ sufficiently recent version of CMake (3.11 or better at least): FetchContent_Declare( fast_float GIT_REPOSITORY https://github.com/fastfloat/fast_float.git - GIT_TAG tags/v8.1.0 + GIT_TAG tags/v8.2.0 GIT_SHALLOW TRUE) FetchContent_MakeAvailable(fast_float) @@ -532,7 +549,7 @@ You may also use [CPM](https://github.com/cpm-cmake/CPM.cmake), like so: CPMAddPackage( NAME fast_float GITHUB_REPOSITORY "fastfloat/fast_float" - GIT_TAG v8.1.0) + GIT_TAG v8.2.0) ``` ## Using as single header @@ -544,7 +561,7 @@ if desired as described in the command line help. You may directly download automatically generated single-header files: - + ## Benchmarking @@ -598,6 +615,11 @@ long digits. The library includes code adapted from Google Wuffs (written by Nigel Tao) which was originally published under the Apache 2.0 license. +## Stars + + +[![Star History Chart](https://api.star-history.com/svg?repos=fastfloat/fast_float&type=Date)](https://www.star-history.com/#fastfloat/fast_float&Date) + ## License diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index b4e03954..4ee57895 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -1,9 +1,27 @@ +include(FetchContent) + +FetchContent_Declare( + counters + GIT_REPOSITORY https://github.com/lemire/counters.git + GIT_TAG v2.2.0 +) + +FetchContent_MakeAvailable(counters) + add_executable(realbenchmark benchmark.cpp) +target_link_libraries(realbenchmark PRIVATE counters::counters) +add_executable(bench_ip bench_ip.cpp) +target_link_libraries(bench_ip PRIVATE counters::counters) + set_property( TARGET realbenchmark PROPERTY CXX_STANDARD 17) - +set_property( + TARGET bench_ip + PROPERTY CXX_STANDARD 17) target_link_libraries(realbenchmark PUBLIC fast_float) +target_link_libraries(bench_ip PUBLIC fast_float) + include(ExternalProject) # Define the external project diff --git a/benchmarks/apple_arm_events.h b/benchmarks/apple_arm_events.h deleted file mode 100644 index f127d14d..00000000 --- a/benchmarks/apple_arm_events.h +++ /dev/null @@ -1,1117 +0,0 @@ -// Original design from: -// ============================================================================= -// XNU kperf/kpc -// Available for 64-bit Intel/Apple Silicon, macOS/iOS, with root privileges -// -// References: -// -// XNU source (since xnu 2422.1.72): -// https://github.com/apple/darwin-xnu/blob/main/osfmk/kern/kpc.h -// https://github.com/apple/darwin-xnu/blob/main/bsd/kern/kern_kpc.c -// -// Lightweight PET (Profile Every Thread, since xnu 3789.1.32): -// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/pet.c -// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/kperf_kpc.c -// -// System Private frameworks (since macOS 10.11, iOS 8.0): -// /System/Library/PrivateFrameworks/kperf.framework -// /System/Library/PrivateFrameworks/kperfdata.framework -// -// Xcode framework (since Xcode 7.0): -// /Applications/Xcode.app/Contents/SharedFrameworks/DVTInstrumentsFoundation.framework -// -// CPU database (plist files) -// macOS (since macOS 10.11): -// /usr/share/kpep/.plist -// iOS (copied from Xcode, since iOS 10.0, Xcode 8.0): -// /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform -// /DeviceSupport//DeveloperDiskImage.dmg/usr/share/kpep/.plist -// -// -// Created by YaoYuan on 2021. -// Released into the public domain (unlicense.org). -// ============================================================================= - -#ifndef M1CYCLES_H -#define M1CYCLES_H - -#include -#include -#include -#include -#include - -#include // for dlopen() and dlsym() -#include // for mach_absolute_time() -#include // for kdebug trace decode -#include // for sysctl() -#include // for usleep() - -struct performance_counters { - double cycles; - double branches; - double missed_branches; - double instructions; - - performance_counters(uint64_t c, uint64_t b, uint64_t m, uint64_t i) - : cycles(c), branches(b), missed_branches(m), instructions(i) {} - - performance_counters(double c, double b, double m, double i) - : cycles(c), branches(b), missed_branches(m), instructions(i) {} - - performance_counters(double init) - : cycles(init), branches(init), missed_branches(init), - instructions(init) {} - - inline performance_counters &operator-=(const performance_counters &other) { - cycles -= other.cycles; - branches -= other.branches; - missed_branches -= other.missed_branches; - instructions -= other.instructions; - return *this; - } - - inline performance_counters &min(const performance_counters &other) { - cycles = other.cycles < cycles ? other.cycles : cycles; - branches = other.branches < branches ? other.branches : branches; - missed_branches = other.missed_branches < missed_branches - ? other.missed_branches - : missed_branches; - instructions = - other.instructions < instructions ? other.instructions : instructions; - return *this; - } - - inline performance_counters &operator+=(const performance_counters &other) { - cycles += other.cycles; - branches += other.branches; - missed_branches += other.missed_branches; - instructions += other.instructions; - return *this; - } - - inline performance_counters &operator/=(double numerator) { - cycles /= numerator; - branches /= numerator; - missed_branches /= numerator; - instructions /= numerator; - return *this; - } -}; - -inline performance_counters operator-(const performance_counters &a, - const performance_counters &b) { - return performance_counters(a.cycles - b.cycles, a.branches - b.branches, - a.missed_branches - b.missed_branches, - a.instructions - b.instructions); -} - -typedef float f32; -typedef double f64; -typedef int8_t i8; -typedef uint8_t u8; -typedef int16_t i16; -typedef uint16_t u16; -typedef int32_t i32; -typedef uint32_t u32; -typedef int64_t i64; -typedef uint64_t u64; -typedef size_t usize; - -// ----------------------------------------------------------------------------- -// header (reverse engineered) -// This framework wraps some sysctl calls to communicate with the kpc in kernel. -// Most functions requires root privileges, or process is "blessed". -// ----------------------------------------------------------------------------- - -// Cross-platform class constants. -#define KPC_CLASS_FIXED (0) -#define KPC_CLASS_CONFIGURABLE (1) -#define KPC_CLASS_POWER (2) -#define KPC_CLASS_RAWPMU (3) - -// Cross-platform class mask constants. -#define KPC_CLASS_FIXED_MASK (1u << KPC_CLASS_FIXED) // 1 -#define KPC_CLASS_CONFIGURABLE_MASK (1u << KPC_CLASS_CONFIGURABLE) // 2 -#define KPC_CLASS_POWER_MASK (1u << KPC_CLASS_POWER) // 4 -#define KPC_CLASS_RAWPMU_MASK (1u << KPC_CLASS_RAWPMU) // 8 - -// PMU version constants. -#define KPC_PMU_ERROR (0) // Error -#define KPC_PMU_INTEL_V3 (1) // Intel -#define KPC_PMU_ARM_APPLE (2) // ARM64 -#define KPC_PMU_INTEL_V2 (3) // Old Intel -#define KPC_PMU_ARM_V2 (4) // Old ARM - -// The maximum number of counters we could read from every class in one go. -// ARMV7: FIXED: 1, CONFIGURABLE: 4 -// ARM32: FIXED: 2, CONFIGURABLE: 6 -// ARM64: FIXED: 2, CONFIGURABLE: CORE_NCTRS - FIXED (6 or 8) -// x86: 32 -#define KPC_MAX_COUNTERS 32 - -// Bits for defining what to do on an action. -// Defined in https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/action.h -#define KPERF_SAMPLER_TH_INFO (1U << 0) -#define KPERF_SAMPLER_TH_SNAPSHOT (1U << 1) -#define KPERF_SAMPLER_KSTACK (1U << 2) -#define KPERF_SAMPLER_USTACK (1U << 3) -#define KPERF_SAMPLER_PMC_THREAD (1U << 4) -#define KPERF_SAMPLER_PMC_CPU (1U << 5) -#define KPERF_SAMPLER_PMC_CONFIG (1U << 6) -#define KPERF_SAMPLER_MEMINFO (1U << 7) -#define KPERF_SAMPLER_TH_SCHEDULING (1U << 8) -#define KPERF_SAMPLER_TH_DISPATCH (1U << 9) -#define KPERF_SAMPLER_TK_SNAPSHOT (1U << 10) -#define KPERF_SAMPLER_SYS_MEM (1U << 11) -#define KPERF_SAMPLER_TH_INSCYC (1U << 12) -#define KPERF_SAMPLER_TK_INFO (1U << 13) - -// Maximum number of kperf action ids. -#define KPERF_ACTION_MAX (32) - -// Maximum number of kperf timer ids. -#define KPERF_TIMER_MAX (8) - -// x86/arm config registers are 64-bit -typedef u64 kpc_config_t; - -/// Print current CPU identification string to the buffer (same as snprintf), -/// such as "cpu_7_8_10b282dc_46". This string can be used to locate the PMC -/// database in /usr/share/kpep. -/// @return string's length, or negative value if error occurs. -/// @note This method does not requires root privileges. -/// @details sysctl get(hw.cputype), get(hw.cpusubtype), -/// get(hw.cpufamily), get(machdep.cpu.model) -static int (*kpc_cpu_string)(char *buf, usize buf_size); - -/// Get the version of KPC that's being run. -/// @return See `PMU version constants` above. -/// @details sysctl get(kpc.pmu_version) -static u32 (*kpc_pmu_version)(void); - -/// Get running PMC classes. -/// @return See `class mask constants` above, -/// 0 if error occurs or no class is set. -/// @details sysctl get(kpc.counting) -static u32 (*kpc_get_counting)(void); - -/// Set PMC classes to enable counting. -/// @param classes See `class mask constants` above, set 0 to shutdown counting. -/// @return 0 for success. -/// @details sysctl set(kpc.counting) -static int (*kpc_set_counting)(u32 classes); - -/// Get running PMC classes for current thread. -/// @return See `class mask constants` above, -/// 0 if error occurs or no class is set. -/// @details sysctl get(kpc.thread_counting) -static u32 (*kpc_get_thread_counting)(void); - -/// Set PMC classes to enable counting for current thread. -/// @param classes See `class mask constants` above, set 0 to shutdown counting. -/// @return 0 for success. -/// @details sysctl set(kpc.thread_counting) -static int (*kpc_set_thread_counting)(u32 classes); - -/// Get how many config registers there are for a given mask. -/// For example: Intel may returns 1 for `KPC_CLASS_FIXED_MASK`, -/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`. -/// @param classes See `class mask constants` above. -/// @return 0 if error occurs or no class is set. -/// @note This method does not requires root privileges. -/// @details sysctl get(kpc.config_count) -static u32 (*kpc_get_config_count)(u32 classes); - -/// Get config registers. -/// @param classes see `class mask constants` above. -/// @param config Config buffer to receive values, should not smaller than -/// kpc_get_config_count(classes) * sizeof(kpc_config_t). -/// @return 0 for success. -/// @details sysctl get(kpc.config_count), get(kpc.config) -static int (*kpc_get_config)(u32 classes, kpc_config_t *config); - -/// Set config registers. -/// @param classes see `class mask constants` above. -/// @param config Config buffer, should not smaller than -/// kpc_get_config_count(classes) * sizeof(kpc_config_t). -/// @return 0 for success. -/// @details sysctl get(kpc.config_count), set(kpc.config) -static int (*kpc_set_config)(u32 classes, kpc_config_t *config); - -/// Get how many counters there are for a given mask. -/// For example: Intel may returns 3 for `KPC_CLASS_FIXED_MASK`, -/// returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`. -/// @param classes See `class mask constants` above. -/// @note This method does not requires root privileges. -/// @details sysctl get(kpc.counter_count) -static u32 (*kpc_get_counter_count)(u32 classes); - -/// Get counter accumulations. -/// If `all_cpus` is true, the buffer count should not smaller than -/// (cpu_count * counter_count). Otherwize, the buffer count should not smaller -/// than (counter_count). -/// @see kpc_get_counter_count(), kpc_cpu_count(). -/// @param all_cpus true for all CPUs, false for current cpu. -/// @param classes See `class mask constants` above. -/// @param curcpu A pointer to receive current cpu id, can be NULL. -/// @param buf Buffer to receive counter's value. -/// @return 0 for success. -/// @details sysctl get(hw.ncpu), get(kpc.counter_count), get(kpc.counters) -static int (*kpc_get_cpu_counters)(bool all_cpus, u32 classes, int *curcpu, - u64 *buf); - -/// Get counter accumulations for current thread. -/// @param tid Thread id, should be 0. -/// @param buf_count The number of buf's elements (not bytes), -/// should not smaller than kpc_get_counter_count(). -/// @param buf Buffer to receive counter's value. -/// @return 0 for success. -/// @details sysctl get(kpc.thread_counters) -static int (*kpc_get_thread_counters)(u32 tid, u32 buf_count, u64 *buf); - -/// Acquire/release the counters used by the Power Manager. -/// @param val 1:acquire, 0:release -/// @return 0 for success. -/// @details sysctl set(kpc.force_all_ctrs) -static int (*kpc_force_all_ctrs_set)(int val); - -/// Get the state of all_ctrs. -/// @return 0 for success. -/// @details sysctl get(kpc.force_all_ctrs) -static int (*kpc_force_all_ctrs_get)(int *val_out); - -/// Set number of actions, should be `KPERF_ACTION_MAX`. -/// @details sysctl set(kperf.action.count) -static int (*kperf_action_count_set)(u32 count); - -/// Get number of actions. -/// @details sysctl get(kperf.action.count) -static int (*kperf_action_count_get)(u32 *count); - -/// Set what to sample when a trigger fires an action, e.g. -/// `KPERF_SAMPLER_PMC_CPU`. -/// @details sysctl set(kperf.action.samplers) -static int (*kperf_action_samplers_set)(u32 actionid, u32 sample); - -/// Get what to sample when a trigger fires an action. -/// @details sysctl get(kperf.action.samplers) -static int (*kperf_action_samplers_get)(u32 actionid, u32 *sample); - -/// Apply a task filter to the action, -1 to disable filter. -/// @details sysctl set(kperf.action.filter_by_task) -static int (*kperf_action_filter_set_by_task)(u32 actionid, i32 port); - -/// Apply a pid filter to the action, -1 to disable filter. -/// @details sysctl set(kperf.action.filter_by_pid) -static int (*kperf_action_filter_set_by_pid)(u32 actionid, i32 pid); - -/// Set number of time triggers, should be `KPERF_TIMER_MAX`. -/// @details sysctl set(kperf.timer.count) -static int (*kperf_timer_count_set)(u32 count); - -/// Get number of time triggers. -/// @details sysctl get(kperf.timer.count) -static int (*kperf_timer_count_get)(u32 *count); - -/// Set timer number and period. -/// @details sysctl set(kperf.timer.period) -static int (*kperf_timer_period_set)(u32 actionid, u64 tick); - -/// Get timer number and period. -/// @details sysctl get(kperf.timer.period) -static int (*kperf_timer_period_get)(u32 actionid, u64 *tick); - -/// Set timer number and actionid. -/// @details sysctl set(kperf.timer.action) -static int (*kperf_timer_action_set)(u32 actionid, u32 timerid); - -/// Get timer number and actionid. -/// @details sysctl get(kperf.timer.action) -static int (*kperf_timer_action_get)(u32 actionid, u32 *timerid); - -/// Set which timer ID does PET (Profile Every Thread). -/// @details sysctl set(kperf.timer.pet_timer) -static int (*kperf_timer_pet_set)(u32 timerid); - -/// Get which timer ID does PET (Profile Every Thread). -/// @details sysctl get(kperf.timer.pet_timer) -static int (*kperf_timer_pet_get)(u32 *timerid); - -/// Enable or disable sampling. -/// @details sysctl set(kperf.sampling) -static int (*kperf_sample_set)(u32 enabled); - -/// Get is currently sampling. -/// @details sysctl get(kperf.sampling) -static int (*kperf_sample_get)(u32 *enabled); - -/// Reset kperf: stop sampling, kdebug, timers and actions. -/// @return 0 for success. -static int (*kperf_reset)(void); - -/// Nanoseconds to CPU ticks. -static u64 (*kperf_ns_to_ticks)(u64 ns); - -/// CPU ticks to nanoseconds. -static u64 (*kperf_ticks_to_ns)(u64 ticks); - -/// CPU ticks frequency (mach_absolute_time). -static u64 (*kperf_tick_frequency)(void); - -/// Get lightweight PET mode (not in kperf.framework). -static int kperf_lightweight_pet_get(u32 *enabled) { - if (!enabled) - return -1; - usize size = 4; - return sysctlbyname("kperf.lightweight_pet", enabled, &size, NULL, 0); -} - -/// Set lightweight PET mode (not in kperf.framework). -static int kperf_lightweight_pet_set(u32 enabled) { - return sysctlbyname("kperf.lightweight_pet", NULL, NULL, &enabled, 4); -} - -// ----------------------------------------------------------------------------- -// header (reverse engineered) -// This framework provides some functions to access the local CPU database. -// These functions do not require root privileges. -// ----------------------------------------------------------------------------- - -// KPEP CPU archtecture constants. -#define KPEP_ARCH_I386 0 -#define KPEP_ARCH_X86_64 1 -#define KPEP_ARCH_ARM 2 -#define KPEP_ARCH_ARM64 3 - -/// KPEP event (size: 48/28 bytes on 64/32 bit OS) -typedef struct kpep_event { - const char *name; ///< Unique name of a event, such as "INST_RETIRED.ANY". - const char *description; ///< Description for this event. - const char *errata; ///< Errata, currently NULL. - const char *alias; ///< Alias name, such as "Instructions", "Cycles". - const char *fallback; ///< Fallback event name for fixed counter. - u32 mask; - u8 number; - u8 umask; - u8 reserved; - u8 is_fixed; -} kpep_event; - -/// KPEP database (size: 144/80 bytes on 64/32 bit OS) -typedef struct kpep_db { - const char *name; ///< Database name, such as "haswell". - const char *cpu_id; ///< Plist name, such as "cpu_7_8_10b282dc". - const char *marketing_name; ///< Marketing name, such as "Intel Haswell". - void *plist_data; ///< Plist data (CFDataRef), currently NULL. - void *event_map; ///< All events (CFDict). - kpep_event - *event_arr; ///< Event struct buffer (sizeof(kpep_event) * events_count). - kpep_event **fixed_event_arr; ///< Fixed counter events (sizeof(kpep_event *) - ///< * fixed_counter_count) - void *alias_map; ///< All aliases (CFDict). - usize reserved_1; - usize reserved_2; - usize reserved_3; - usize event_count; ///< All events count. - usize alias_count; - usize fixed_counter_count; - usize config_counter_count; - usize power_counter_count; - u32 archtecture; ///< see `KPEP CPU archtecture constants` above. - u32 fixed_counter_bits; - u32 config_counter_bits; - u32 power_counter_bits; -} kpep_db; - -/// KPEP config (size: 80/44 bytes on 64/32 bit OS) -typedef struct kpep_config { - kpep_db *db; - kpep_event **ev_arr; ///< (sizeof(kpep_event *) * counter_count), init NULL - usize *ev_map; ///< (sizeof(usize *) * counter_count), init 0 - usize *ev_idx; ///< (sizeof(usize *) * counter_count), init -1 - u32 *flags; ///< (sizeof(u32 *) * counter_count), init 0 - u64 *kpc_periods; ///< (sizeof(u64 *) * counter_count), init 0 - usize event_count; /// kpep_config_events_count() - usize counter_count; - u32 classes; ///< See `class mask constants` above. - u32 config_counter; - u32 power_counter; - u32 reserved; -} kpep_config; - -/// Error code for kpep_config_xxx() and kpep_db_xxx() functions. -typedef enum { - KPEP_CONFIG_ERROR_NONE = 0, - KPEP_CONFIG_ERROR_INVALID_ARGUMENT = 1, - KPEP_CONFIG_ERROR_OUT_OF_MEMORY = 2, - KPEP_CONFIG_ERROR_IO = 3, - KPEP_CONFIG_ERROR_BUFFER_TOO_SMALL = 4, - KPEP_CONFIG_ERROR_CUR_SYSTEM_UNKNOWN = 5, - KPEP_CONFIG_ERROR_DB_PATH_INVALID = 6, - KPEP_CONFIG_ERROR_DB_NOT_FOUND = 7, - KPEP_CONFIG_ERROR_DB_ARCH_UNSUPPORTED = 8, - KPEP_CONFIG_ERROR_DB_VERSION_UNSUPPORTED = 9, - KPEP_CONFIG_ERROR_DB_CORRUPT = 10, - KPEP_CONFIG_ERROR_EVENT_NOT_FOUND = 11, - KPEP_CONFIG_ERROR_CONFLICTING_EVENTS = 12, - KPEP_CONFIG_ERROR_COUNTERS_NOT_FORCED = 13, - KPEP_CONFIG_ERROR_EVENT_UNAVAILABLE = 14, - KPEP_CONFIG_ERROR_ERRNO = 15, - KPEP_CONFIG_ERROR_MAX -} kpep_config_error_code; - -/// Error description for kpep_config_error_code. -static const char *kpep_config_error_names[KPEP_CONFIG_ERROR_MAX] = { - "none", - "invalid argument", - "out of memory", - "I/O", - "buffer too small", - "current system unknown", - "database path invalid", - "database not found", - "database architecture unsupported", - "database version unsupported", - "database corrupt", - "event not found", - "conflicting events", - "all counters must be forced", - "event unavailable", - "check errno"}; - -/// Error description. -static const char *kpep_config_error_desc(int code) { - if (0 <= code && code < KPEP_CONFIG_ERROR_MAX) { - return kpep_config_error_names[code]; - } - return "unknown error"; -} - -/// Create a config. -/// @param db A kpep db, see kpep_db_create() -/// @param cfg_ptr A pointer to receive the new config. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_create)(kpep_db *db, kpep_config **cfg_ptr); - -/// Free the config. -static void (*kpep_config_free)(kpep_config *cfg); - -/// Add an event to config. -/// @param cfg The config. -/// @param ev_ptr A event pointer. -/// @param flag 0: all, 1: user space only -/// @param err Error bitmap pointer, can be NULL. -/// If return value is `CONFLICTING_EVENTS`, this bitmap contains -/// the conflicted event indices, e.g. "1 << 2" means index 2. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_add_event)(kpep_config *cfg, kpep_event **ev_ptr, - u32 flag, u32 *err); - -/// Remove event at index. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_remove_event)(kpep_config *cfg, usize idx); - -/// Force all counters. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_force_counters)(kpep_config *cfg); - -/// Get events count. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_events_count)(kpep_config *cfg, usize *count_ptr); - -/// Get all event pointers. -/// @param buf A buffer to receive event pointers. -/// @param buf_size The buffer's size in bytes, should not smaller than -/// kpep_config_events_count() * sizeof(void *). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_events)(kpep_config *cfg, kpep_event **buf, - usize buf_size); - -/// Get kpc register configs. -/// @param buf A buffer to receive kpc register configs. -/// @param buf_size The buffer's size in bytes, should not smaller than -/// kpep_config_kpc_count() * sizeof(kpc_config_t). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_kpc)(kpep_config *cfg, kpc_config_t *buf, - usize buf_size); - -/// Get kpc register config count. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_kpc_count)(kpep_config *cfg, usize *count_ptr); - -/// Get kpc classes. -/// @param classes See `class mask constants` above. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_kpc_classes)(kpep_config *cfg, u32 *classes_ptr); - -/// Get the index mapping from event to counter. -/// @param buf A buffer to receive indexes. -/// @param buf_size The buffer's size in bytes, should not smaller than -/// kpep_config_events_count() * sizeof(kpc_config_t). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_config_kpc_map)(kpep_config *cfg, usize *buf, usize buf_size); - -/// Open a kpep database file in "/usr/share/kpep/" or "/usr/local/share/kpep/". -/// @param name File name, for example "haswell", "cpu_100000c_1_92fb37c8". -/// Pass NULL for current CPU. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_create)(const char *name, kpep_db **db_ptr); - -/// Free the kpep database. -static void (*kpep_db_free)(kpep_db *db); - -/// Get the database's name. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_name)(kpep_db *db, const char **name); - -/// Get the event alias count. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_aliases_count)(kpep_db *db, usize *count); - -/// Get all alias. -/// @param buf A buffer to receive all alias strings. -/// @param buf_size The buffer's size in bytes, -/// should not smaller than kpep_db_aliases_count() * sizeof(void *). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_aliases)(kpep_db *db, const char **buf, usize buf_size); - -/// Get counters count for given classes. -/// @param classes 1: Fixed, 2: Configurable. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_counters_count)(kpep_db *db, u8 classes, usize *count); - -/// Get all event count. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_events_count)(kpep_db *db, usize *count); - -/// Get all events. -/// @param buf A buffer to receive all event pointers. -/// @param buf_size The buffer's size in bytes, -/// should not smaller than kpep_db_events_count() * sizeof(void *). -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_events)(kpep_db *db, kpep_event **buf, usize buf_size); - -/// Get one event by name. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_db_event)(kpep_db *db, const char *name, kpep_event **ev_ptr); - -/// Get event's name. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_event_name)(kpep_event *ev, const char **name_ptr); - -/// Get event's alias. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_event_alias)(kpep_event *ev, const char **alias_ptr); - -/// Get event's description. -/// @return kpep_config_error_code, 0 for success. -static int (*kpep_event_description)(kpep_event *ev, const char **str_ptr); - -// ----------------------------------------------------------------------------- -// load kperf/kperfdata dynamic library -// ----------------------------------------------------------------------------- - -typedef struct { - const char *name; - void **impl; -} lib_symbol; - -#define lib_nelems(x) (sizeof(x) / sizeof((x)[0])) -#define lib_symbol_def(name) \ - { #name, (void **)&name } - -static const lib_symbol lib_symbols_kperf[] = { - lib_symbol_def(kpc_pmu_version), - lib_symbol_def(kpc_cpu_string), - lib_symbol_def(kpc_set_counting), - lib_symbol_def(kpc_get_counting), - lib_symbol_def(kpc_set_thread_counting), - lib_symbol_def(kpc_get_thread_counting), - lib_symbol_def(kpc_get_config_count), - lib_symbol_def(kpc_get_counter_count), - lib_symbol_def(kpc_set_config), - lib_symbol_def(kpc_get_config), - lib_symbol_def(kpc_get_cpu_counters), - lib_symbol_def(kpc_get_thread_counters), - lib_symbol_def(kpc_force_all_ctrs_set), - lib_symbol_def(kpc_force_all_ctrs_get), - lib_symbol_def(kperf_action_count_set), - lib_symbol_def(kperf_action_count_get), - lib_symbol_def(kperf_action_samplers_set), - lib_symbol_def(kperf_action_samplers_get), - lib_symbol_def(kperf_action_filter_set_by_task), - lib_symbol_def(kperf_action_filter_set_by_pid), - lib_symbol_def(kperf_timer_count_set), - lib_symbol_def(kperf_timer_count_get), - lib_symbol_def(kperf_timer_period_set), - lib_symbol_def(kperf_timer_period_get), - lib_symbol_def(kperf_timer_action_set), - lib_symbol_def(kperf_timer_action_get), - lib_symbol_def(kperf_sample_set), - lib_symbol_def(kperf_sample_get), - lib_symbol_def(kperf_reset), - lib_symbol_def(kperf_timer_pet_set), - lib_symbol_def(kperf_timer_pet_get), - lib_symbol_def(kperf_ns_to_ticks), - lib_symbol_def(kperf_ticks_to_ns), - lib_symbol_def(kperf_tick_frequency), -}; - -static const lib_symbol lib_symbols_kperfdata[] = { - lib_symbol_def(kpep_config_create), - lib_symbol_def(kpep_config_free), - lib_symbol_def(kpep_config_add_event), - lib_symbol_def(kpep_config_remove_event), - lib_symbol_def(kpep_config_force_counters), - lib_symbol_def(kpep_config_events_count), - lib_symbol_def(kpep_config_events), - lib_symbol_def(kpep_config_kpc), - lib_symbol_def(kpep_config_kpc_count), - lib_symbol_def(kpep_config_kpc_classes), - lib_symbol_def(kpep_config_kpc_map), - lib_symbol_def(kpep_db_create), - lib_symbol_def(kpep_db_free), - lib_symbol_def(kpep_db_name), - lib_symbol_def(kpep_db_aliases_count), - lib_symbol_def(kpep_db_aliases), - lib_symbol_def(kpep_db_counters_count), - lib_symbol_def(kpep_db_events_count), - lib_symbol_def(kpep_db_events), - lib_symbol_def(kpep_db_event), - lib_symbol_def(kpep_event_name), - lib_symbol_def(kpep_event_alias), - lib_symbol_def(kpep_event_description), -}; - -#define lib_path_kperf "/System/Library/PrivateFrameworks/kperf.framework/kperf" -#define lib_path_kperfdata \ - "/System/Library/PrivateFrameworks/kperfdata.framework/kperfdata" - -static bool lib_inited = false; -static bool lib_has_err = false; -static char lib_err_msg[256]; - -static void *lib_handle_kperf = NULL; -static void *lib_handle_kperfdata = NULL; - -static void lib_deinit(void) { - lib_inited = false; - lib_has_err = false; - if (lib_handle_kperf) - dlclose(lib_handle_kperf); - if (lib_handle_kperfdata) - dlclose(lib_handle_kperfdata); - lib_handle_kperf = NULL; - lib_handle_kperfdata = NULL; - for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) { - const lib_symbol *symbol = &lib_symbols_kperf[i]; - *symbol->impl = NULL; - } - for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) { - const lib_symbol *symbol = &lib_symbols_kperfdata[i]; - *symbol->impl = NULL; - } -} - -static bool lib_init(void) { -#define return_err() \ - do { \ - lib_deinit(); \ - lib_inited = true; \ - lib_has_err = true; \ - return false; \ - } while (false) - - if (lib_inited) - return !lib_has_err; - - // load dynamic library - lib_handle_kperf = dlopen(lib_path_kperf, RTLD_LAZY); - if (!lib_handle_kperf) { - snprintf(lib_err_msg, sizeof(lib_err_msg), - "Failed to load kperf.framework, message: %s.", dlerror()); - return_err(); - } - lib_handle_kperfdata = dlopen(lib_path_kperfdata, RTLD_LAZY); - if (!lib_handle_kperfdata) { - snprintf(lib_err_msg, sizeof(lib_err_msg), - "Failed to load kperfdata.framework, message: %s.", dlerror()); - return_err(); - } - - // load symbol address from dynamic library - for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) { - const lib_symbol *symbol = &lib_symbols_kperf[i]; - *symbol->impl = dlsym(lib_handle_kperf, symbol->name); - if (!*symbol->impl) { - snprintf(lib_err_msg, sizeof(lib_err_msg), - "Failed to load kperf function: %s.", symbol->name); - return_err(); - } - } - for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) { - const lib_symbol *symbol = &lib_symbols_kperfdata[i]; - *symbol->impl = dlsym(lib_handle_kperfdata, symbol->name); - if (!*symbol->impl) { - snprintf(lib_err_msg, sizeof(lib_err_msg), - "Failed to load kperfdata function: %s.", symbol->name); - return_err(); - } - } - - lib_inited = true; - lib_has_err = false; - return true; - -#undef return_err -} - -// ----------------------------------------------------------------------------- -// kdebug private structs -// https://github.com/apple/darwin-xnu/blob/main/bsd/sys_private/kdebug_private.h -// ----------------------------------------------------------------------------- - -/* - * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf - * structure. - */ -#if defined(__arm64__) -typedef uint64_t kd_buf_argtype; -#else -typedef uintptr_t kd_buf_argtype; -#endif - -typedef struct { - uint64_t timestamp; - kd_buf_argtype arg1; - kd_buf_argtype arg2; - kd_buf_argtype arg3; - kd_buf_argtype arg4; - kd_buf_argtype arg5; /* the thread ID */ - uint32_t debugid; /* see */ - -/* - * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf - * structure. - */ -#if defined(__LP64__) || defined(__arm64__) - uint32_t cpuid; /* cpu index, from 0 */ - kd_buf_argtype unused; -#endif -} kd_buf; - -/* bits for the type field of kd_regtype */ -#define KDBG_CLASSTYPE 0x10000 -#define KDBG_SUBCLSTYPE 0x20000 -#define KDBG_RANGETYPE 0x40000 -#define KDBG_TYPENONE 0x80000 -#define KDBG_CKTYPES 0xF0000 - -/* only trace at most 4 types of events, at the code granularity */ -#define KDBG_VALCHECK 0x00200000U - -typedef struct { - unsigned int type; - unsigned int value1; - unsigned int value2; - unsigned int value3; - unsigned int value4; -} kd_regtype; - -typedef struct { - /* number of events that can fit in the buffers */ - int nkdbufs; - /* set if trace is disabled */ - int nolog; - /* kd_ctrl_page.flags */ - unsigned int flags; - /* number of threads in thread map */ - int nkdthreads; - /* the owning pid */ - int bufid; -} kbufinfo_t; - -// ----------------------------------------------------------------------------- -// kdebug utils -// ----------------------------------------------------------------------------- - -/// Clean up trace buffers and reset ktrace/kdebug/kperf. -/// @return 0 on success. -static int kdebug_reset(void) { - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREMOVE}; - return sysctl(mib, 3, NULL, NULL, NULL, 0); -} - -/// Disable and reinitialize the trace buffers. -/// @return 0 on success. -static int kdebug_reinit(void) { - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETUP}; - return sysctl(mib, 3, NULL, NULL, NULL, 0); -} - -/// Set debug filter. -static int kdebug_setreg(kd_regtype *kdr) { - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETREG}; - usize size = sizeof(kd_regtype); - return sysctl(mib, 3, kdr, &size, NULL, 0); -} - -/// Set maximum number of trace entries (kd_buf). -/// Only allow allocation up to half the available memory (sane_size). -/// @return 0 on success. -static int kdebug_trace_setbuf(int nbufs) { - int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETBUF, nbufs}; - return sysctl(mib, 4, NULL, NULL, NULL, 0); -} - -/// Enable or disable kdebug trace. -/// Trace buffer must already be initialized. -/// @return 0 on success. -static int kdebug_trace_enable(bool enable) { - int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDENABLE, enable}; - return sysctl(mib, 4, NULL, 0, NULL, 0); -} - -/// Retrieve trace buffer information from kernel. -/// @return 0 on success. -static int kdebug_get_bufinfo(kbufinfo_t *info) { - if (!info) - return -1; - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDGETBUF}; - size_t needed = sizeof(kbufinfo_t); - return sysctl(mib, 3, info, &needed, NULL, 0); -} - -/// Retrieve trace buffers from kernel. -/// @param buf Memory to receive buffer data, array of `kd_buf`. -/// @param len Length of `buf` in bytes. -/// @param count Number of trace entries (kd_buf) obtained. -/// @return 0 on success. -static int kdebug_trace_read(void *buf, usize len, usize *count) { - if (count) - *count = 0; - if (!buf || !len) - return -1; - - // Note: the input and output units are not the same. - // input: bytes - // output: number of kd_buf - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREADTR}; - int ret = sysctl(mib, 3, buf, &len, NULL, 0); - if (ret != 0) - return ret; - *count = len; - return 0; -} - -/// Block until there are new buffers filled or `timeout_ms` have passed. -/// @param timeout_ms timeout milliseconds, 0 means wait forever. -/// @param suc set true if new buffers filled. -/// @return 0 on success. -static int kdebug_wait(usize timeout_ms, bool *suc) { - if (timeout_ms == 0) - return -1; - int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDBUFWAIT}; - usize val = timeout_ms; - int ret = sysctl(mib, 3, NULL, &val, NULL, 0); - if (suc) - *suc = !!val; - return ret; -} - -// ----------------------------------------------------------------------------- -// Demo -// ----------------------------------------------------------------------------- - -#define EVENT_NAME_MAX 8 - -typedef struct { - const char *alias; /// name for print - const char *names[EVENT_NAME_MAX]; /// name from pmc db -} event_alias; - -/// Event names from /usr/share/kpep/.plist -static const event_alias profile_events[] = { - {"cycles", - { - "FIXED_CYCLES", // Apple A7-A15//CORE_ACTIVE_CYCLE - "CPU_CLK_UNHALTED.THREAD", // Intel Core 1th-10th - "CPU_CLK_UNHALTED.CORE", // Intel Yonah, Merom - }}, - {"instructions", - { - "FIXED_INSTRUCTIONS", // Apple A7-A15 - "INST_RETIRED.ANY" // Intel Yonah, Merom, Core 1th-10th - }}, - {"branches", - { - "INST_BRANCH", // Apple A7-A15 - "BR_INST_RETIRED.ALL_BRANCHES", // Intel Core 1th-10th - "INST_RETIRED.ANY", // Intel Yonah, Merom - }}, - {"branch-misses", - { - "BRANCH_MISPRED_NONSPEC", // Apple A7-A15, since iOS 15, macOS 12 - "BRANCH_MISPREDICT", // Apple A7-A14 - "BR_MISP_RETIRED.ALL_BRANCHES", // Intel Core 2th-10th - "BR_INST_RETIRED.MISPRED", // Intel Yonah, Merom - }}, -}; - -static kpep_event *get_event(kpep_db *db, const event_alias *alias) { - for (usize j = 0; j < EVENT_NAME_MAX; j++) { - const char *name = alias->names[j]; - if (!name) - break; - kpep_event *ev = NULL; - if (kpep_db_event(db, name, &ev) == 0) { - return ev; - } - } - return NULL; -} - -kpc_config_t regs[KPC_MAX_COUNTERS] = {0}; -usize counter_map[KPC_MAX_COUNTERS] = {0}; -u64 counters_0[KPC_MAX_COUNTERS] = {0}; -u64 counters_1[KPC_MAX_COUNTERS] = {0}; -const usize ev_count = sizeof(profile_events) / sizeof(profile_events[0]); - -bool setup_performance_counters() { - static bool init = false; - static bool worked = false; - - if (init) { - return worked; - } - init = true; - - // load dylib - if (!lib_init()) { - printf("Error: %s\n", lib_err_msg); - return (worked = false); - } - - // check permission - int force_ctrs = 0; - if (kpc_force_all_ctrs_get(&force_ctrs)) { - // printf("Permission denied, xnu/kpc requires root privileges.\n"); - return (worked = false); - } - int ret; - // load pmc db - kpep_db *db = NULL; - if ((ret = kpep_db_create(NULL, &db))) { - printf("Error: cannot load pmc database: %d.\n", ret); - return (worked = false); - } - printf("loaded db: %s (%s)\n", db->name, db->marketing_name); - - // create a config - kpep_config *cfg = NULL; - if ((ret = kpep_config_create(db, &cfg))) { - printf("Failed to create kpep config: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - if ((ret = kpep_config_force_counters(cfg))) { - printf("Failed to force counters: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - - // get events - kpep_event *ev_arr[ev_count] = {0}; - for (usize i = 0; i < ev_count; i++) { - const event_alias *alias = profile_events + i; - ev_arr[i] = get_event(db, alias); - if (!ev_arr[i]) { - printf("Cannot find event: %s.\n", alias->alias); - return (worked = false); - } - } - - // add event to config - for (usize i = 0; i < ev_count; i++) { - kpep_event *ev = ev_arr[i]; - if ((ret = kpep_config_add_event(cfg, &ev, 0, NULL))) { - printf("Failed to add event: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - } - - // prepare buffer and config - u32 classes = 0; - usize reg_count = 0; - if ((ret = kpep_config_kpc_classes(cfg, &classes))) { - printf("Failed get kpc classes: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - if ((ret = kpep_config_kpc_count(cfg, ®_count))) { - printf("Failed get kpc count: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - if ((ret = kpep_config_kpc_map(cfg, counter_map, sizeof(counter_map)))) { - printf("Failed get kpc map: %d (%s).\n", ret, kpep_config_error_desc(ret)); - return (worked = false); - } - if ((ret = kpep_config_kpc(cfg, regs, sizeof(regs)))) { - printf("Failed get kpc registers: %d (%s).\n", ret, - kpep_config_error_desc(ret)); - return (worked = false); - } - - // set config to kernel - if ((ret = kpc_force_all_ctrs_set(1))) { - printf("Failed force all ctrs: %d.\n", ret); - return (worked = false); - } - if ((classes & KPC_CLASS_CONFIGURABLE_MASK) && reg_count) { - if ((ret = kpc_set_config(classes, regs))) { - printf("Failed set kpc config: %d.\n", ret); - return (worked = false); - } - } - - // start counting - if ((ret = kpc_set_counting(classes))) { - printf("Failed set counting: %d.\n", ret); - return (worked = false); - } - if ((ret = kpc_set_thread_counting(classes))) { - printf("Failed set thread counting: %d.\n", ret); - return (worked = false); - } - - return (worked = true); -} - -inline performance_counters get_counters() { - static bool warned = false; - int ret; - // get counters before - if ((ret = kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_0))) { - if (!warned) { - - printf("Failed get thread counters before: %d.\n", ret); - warned = true; - } - return 1; - } - /*printf("counters value:\n"); - for (usize i = 0; i < ev_count; i++) { - const event_alias *alias = profile_events + i; - usize idx = counter_map[i]; - u64 val = counters_1[idx] - counters_0[idx]; - printf("%14s: %llu\n", alias->alias, val); - }*/ - return performance_counters{ - counters_0[counter_map[0]], counters_0[counter_map[2]], - counters_0[counter_map[3]], counters_0[counter_map[1]]}; -} - -#endif diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp new file mode 100644 index 00000000..825a6b0a --- /dev/null +++ b/benchmarks/bench_ip.cpp @@ -0,0 +1,182 @@ +#include "counters/bench.h" +#include "fast_float/fast_float.h" +#include +#include +#include +#include +#include +#include +#include +#include + +void pretty_print(size_t volume, size_t bytes, std::string name, + counters::event_aggregate agg) { + if (agg.inner_count > 1) { + printf("# (inner count: %d)\n", agg.inner_count); + } + printf("%-40s : ", name.c_str()); + printf(" %5.2f GB/s ", bytes / agg.fastest_elapsed_ns()); + printf(" %5.1f Mip/s ", volume * 1000.0 / agg.fastest_elapsed_ns()); + printf(" %5.2f ns/ip ", agg.fastest_elapsed_ns() / volume); + if (counters::event_collector().has_events()) { + printf(" %5.2f GHz ", agg.fastest_cycles() / agg.fastest_elapsed_ns()); + printf(" %5.2f c/ip ", agg.fastest_cycles() / volume); + printf(" %5.2f i/ip ", agg.fastest_instructions() / volume); + printf(" %5.2f c/b ", agg.fastest_cycles() / bytes); + printf(" %5.2f i/b ", agg.fastest_instructions() / bytes); + printf(" %5.2f i/c ", agg.fastest_instructions() / agg.fastest_cycles()); + } + printf("\n"); +} + +fastfloat_really_inline const char *seek_ip_end(const char *p, + const char *pend) { + const char *current = p; + size_t count = 0; + for (; current != pend; ++current) { + if (*current == '.') { + count++; + if (count == 3) { + ++current; + break; + } + } + } + while (current != pend) { + if (*current <= '9' && *current >= '0') { + ++current; + } else { + break; + } + } + return current; +} + +enum class parse_method { standard, fast_float }; + +template +fastfloat_really_inline std::pair +simple_parse_ip_line(const char *p, const char *pend) { + const char *current = p; + uint32_t ip = 0; + for (int i = 0; i < 4; ++i) { + uint8_t value; + if constexpr (use_standard == parse_method::standard) { + auto r = std::from_chars(current, pend, value); + if (r.ec != std::errc()) { + return {false, 0}; + } + current = r.ptr; + } else if constexpr (use_standard == parse_method::fast_float) { + auto r = fast_float::from_chars(current, pend, value); + if (r.ec != std::errc()) { + return {false, 0}; + } + current = r.ptr; + } + ip = (ip << 8) | value; + if (i < 3) { + if (current == pend || *current++ != '.') { + return {false, 0}; + } + } + } + return {true, ip}; +} + +static std::string make_ip_line(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { + std::string s; + s.reserve(16); + s += std::to_string(a); + s += '.'; + s += std::to_string(b); + s += '.'; + s += std::to_string(c); + s += '.'; + s += std::to_string(d); + s += '\n'; + return s; +} + +int main() { + constexpr size_t N = 15000; + std::mt19937 rng(1234); + std::uniform_int_distribution dist(0, 255); + + std::string buf; + constexpr size_t ip_size = 16; + buf.reserve(N * ip_size); + + for (size_t i = 0; i < N; ++i) { + uint8_t a = (uint8_t)dist(rng); + uint8_t b = (uint8_t)dist(rng); + uint8_t c = (uint8_t)dist(rng); + uint8_t d = (uint8_t)dist(rng); + std::string ip_line = make_ip_line(a, b, c, d); + ip_line.resize(ip_size, ' '); // pad to fixed size + buf.append(ip_line); + } + + // sentinel to allow 4-byte loads at end + buf.append(4, '\0'); + + const size_t bytes = buf.size() - 4; // exclude sentinel from throughput + const size_t volume = N; + + volatile uint32_t sink = 0; + std::string buffer(ip_size * N, ' '); + + pretty_print(volume, bytes, "memcpy baseline", counters::bench([&]() { + std::memcpy((char *)buffer.data(), buf.data(), bytes); + })); + + pretty_print(volume, bytes, "just_seek_ip_end (no parse)", + counters::bench([&]() { + const char *p = buf.data(); + const char *pend = buf.data() + bytes; + uint32_t sum = 0; + int ok = 0; + for (size_t i = 0; i < N; ++i) { + const char *q = seek_ip_end(p, pend); + sum += (uint32_t)(q - p); + p += ip_size; + } + sink += sum; + })); + + pretty_print(volume, bytes, "parse_ip_std_fromchars", counters::bench([&]() { + const char *p = buf.data(); + const char *pend = buf.data() + bytes; + uint32_t sum = 0; + int ok = 0; + for (size_t i = 0; i < N; ++i) { + auto [ok, ip] = + simple_parse_ip_line(p, pend); + sum += ip; + if (!ok) { + std::abort(); + } + p += ip_size; + } + sink += sum; + })); + + pretty_print(volume, bytes, "parse_ip_fastfloat", counters::bench([&]() { + const char *p = buf.data(); + const char *pend = buf.data() + bytes; + uint32_t sum = 0; + int ok = 0; + for (size_t i = 0; i < N; ++i) { + auto [ok, ip] = + simple_parse_ip_line(p, pend); + sum += ip; + if (!ok) { + std::abort(); + } + p += ip_size; + } + sink += sum; + })); + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index 05f12330..d90038ed 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -1,7 +1,7 @@ #if defined(__linux__) || (__APPLE__ && __aarch64__) #define USING_COUNTERS #endif -#include "event_counter.h" +#include "counters/event_counter.h" #include #include "fast_float/fast_float.h" #include @@ -50,14 +50,14 @@ double findmax_fastfloat32(std::vector> &s) { return answer; } -event_collector collector{}; +counters::event_collector collector{}; #ifdef USING_COUNTERS template -std::vector +std::vector time_it_ns(std::vector> &lines, T const &function, size_t repeat) { - std::vector aggregate; + std::vector aggregate; bool printed_bug = false; for (size_t i = 0; i < repeat; i++) { collector.start(); @@ -72,7 +72,7 @@ time_it_ns(std::vector> &lines, T const &function, } void pretty_print(double volume, size_t number_of_floats, std::string name, - std::vector events) { + std::vector events) { double volumeMB = volume / (1024. * 1024.); double average_ns{0}; double min_ns{DBL_MAX}; @@ -84,7 +84,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name, double branches_avg{0}; double branch_misses_min{0}; double branch_misses_avg{0}; - for (event_count e : events) { + for (counters::event_count e : events) { double ns = e.elapsed_ns(); average_ns += ns; min_ns = min_ns < ns ? min_ns : ns; @@ -102,7 +102,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name, branches_avg += branches; branches_min = branches_min < branches ? branches_min : branches; - double branch_misses = e.missed_branches(); + double branch_misses = e.branch_misses(); branch_misses_avg += branch_misses; branch_misses_min = branch_misses_min < branch_misses ? branch_misses_min : branch_misses; diff --git a/benchmarks/event_counter.h b/benchmarks/event_counter.h deleted file mode 100644 index cd594787..00000000 --- a/benchmarks/event_counter.h +++ /dev/null @@ -1,181 +0,0 @@ -#ifndef __EVENT_COUNTER_H -#define __EVENT_COUNTER_H - -#include -#ifndef _MSC_VER -#include -#endif -#include - -#include - -#include -#include - -#include "linux-perf-events.h" -#ifdef __linux__ -#include -#endif - -#if (defined(__APPLE__) && __APPLE__) && (defined(__aarch64__) && __aarch64__) -#include "apple_arm_events.h" -#endif - -struct event_count { - std::chrono::duration elapsed; - std::vector event_counts; - - event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {} - - event_count(const std::chrono::duration _elapsed, - const std::vector _event_counts) - : elapsed(_elapsed), event_counts(_event_counts) {} - - event_count(const event_count &other) - : elapsed(other.elapsed), event_counts(other.event_counts) {} - - // The types of counters (so we can read the getter more easily) - enum event_counter_types { - CPU_CYCLES = 0, - INSTRUCTIONS = 1, - BRANCHES = 2, - MISSED_BRANCHES = 3 - }; - - double elapsed_sec() const { - return std::chrono::duration(elapsed).count(); - } - - double elapsed_ns() const { - return std::chrono::duration(elapsed).count(); - } - - double cycles() const { - return static_cast(event_counts[CPU_CYCLES]); - } - - double instructions() const { - return static_cast(event_counts[INSTRUCTIONS]); - } - - double branches() const { - return static_cast(event_counts[BRANCHES]); - } - - double missed_branches() const { - return static_cast(event_counts[MISSED_BRANCHES]); - } - - event_count &operator=(const event_count &other) { - this->elapsed = other.elapsed; - this->event_counts = other.event_counts; - return *this; - } - - event_count operator+(const event_count &other) const { - return event_count(elapsed + other.elapsed, - { - event_counts[0] + other.event_counts[0], - event_counts[1] + other.event_counts[1], - event_counts[2] + other.event_counts[2], - event_counts[3] + other.event_counts[3], - event_counts[4] + other.event_counts[4], - }); - } - - void operator+=(const event_count &other) { *this = *this + other; } -}; - -struct event_aggregate { - bool has_events = false; - int iterations = 0; - event_count total{}; - event_count best{}; - event_count worst{}; - - event_aggregate() = default; - - void operator<<(const event_count &other) { - if (iterations == 0 || other.elapsed < best.elapsed) { - best = other; - } - if (iterations == 0 || other.elapsed > worst.elapsed) { - worst = other; - } - iterations++; - total += other; - } - - double elapsed_sec() const { return total.elapsed_sec() / iterations; } - - double elapsed_ns() const { return total.elapsed_ns() / iterations; } - - double cycles() const { return total.cycles() / iterations; } - - double instructions() const { return total.instructions() / iterations; } - - double branches() const { return total.branches() / iterations; } - - double missed_branches() const { - return total.missed_branches() / iterations; - } -}; - -struct event_collector { - event_count count{}; - std::chrono::time_point start_clock{}; - -#if defined(__linux__) - LinuxEvents linux_events; - - event_collector() - : linux_events(std::vector{ - PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, - PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions - PERF_COUNT_HW_BRANCH_MISSES}) {} - - bool has_events() { return linux_events.is_working(); } -#elif __APPLE__ && __aarch64__ - performance_counters diff; - - event_collector() : diff(0) { setup_performance_counters(); } - - bool has_events() { return setup_performance_counters(); } -#else - event_collector() {} - - bool has_events() { return false; } -#endif - - inline void start() { -#if defined(__linux) - linux_events.start(); -#elif __APPLE__ && __aarch64__ - if (has_events()) { - diff = get_counters(); - } -#endif - start_clock = std::chrono::steady_clock::now(); - } - - inline event_count &end() { - const auto end_clock = std::chrono::steady_clock::now(); -#if defined(__linux) - linux_events.end(count.event_counts); -#elif __APPLE__ && __aarch64__ - if (has_events()) { - performance_counters end = get_counters(); - diff = end - diff; - } - count.event_counts[0] = diff.cycles; - count.event_counts[1] = diff.instructions; - count.event_counts[2] = diff.branches; - count.event_counts[3] = diff.missed_branches; - count.event_counts[4] = 0; -#endif - count.elapsed = end_clock - start_clock; - return count; - } -}; - -#endif diff --git a/benchmarks/linux-perf-events.h b/benchmarks/linux-perf-events.h deleted file mode 100644 index 0a9e5538..00000000 --- a/benchmarks/linux-perf-events.h +++ /dev/null @@ -1,104 +0,0 @@ -#pragma once -#ifdef __linux__ - -#include // for __NR_perf_event_open -#include // for perf event constants -#include // for ioctl -#include // for syscall - -#include // for errno -#include // for memset -#include - -#include -#include - -template class LinuxEvents { - int fd; - bool working; - perf_event_attr attribs{}; - size_t num_events{}; - std::vector temp_result_vec{}; - std::vector ids{}; - -public: - explicit LinuxEvents(std::vector config_vec) : fd(0), working(true) { - memset(&attribs, 0, sizeof(attribs)); - attribs.type = TYPE; - attribs.size = sizeof(attribs); - attribs.disabled = 1; - attribs.exclude_kernel = 1; - attribs.exclude_hv = 1; - - attribs.sample_period = 0; - attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; - const int pid = 0; // the current process - const int cpu = -1; // all CPUs - const unsigned long flags = 0; - - int group = -1; // no group - num_events = config_vec.size(); - ids.resize(config_vec.size()); - uint32_t i = 0; - for (auto config : config_vec) { - attribs.config = config; - int _fd = static_cast( - syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags)); - if (_fd == -1) { - report_error("perf_event_open"); - } - ioctl(_fd, PERF_EVENT_IOC_ID, &ids[i++]); - if (group == -1) { - group = _fd; - fd = _fd; - } - } - - temp_result_vec.resize(num_events * 2 + 1); - } - - ~LinuxEvents() { - if (fd != -1) { - close(fd); - } - } - - inline void start() { - if (fd != -1) { - if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) { - report_error("ioctl(PERF_EVENT_IOC_RESET)"); - } - - if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) { - report_error("ioctl(PERF_EVENT_IOC_ENABLE)"); - } - } - } - - inline void end(std::vector &results) { - if (fd != -1) { - if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) { - report_error("ioctl(PERF_EVENT_IOC_DISABLE)"); - } - - if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) { - report_error("read"); - } - } - // our actual results are in slots 1,3,5, ... of this structure - for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) { - results[i / 2] = temp_result_vec[i]; - } - for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) { - if (ids[i / 2 - 1] != temp_result_vec[i]) { - report_error("event mismatch"); - } - } - } - - bool is_working() { return working; } - -private: - void report_error(const std::string &) { working = false; } -}; -#endif \ No newline at end of file diff --git a/fuzz/build.sh b/fuzz/build.sh index 5cbe87aa..cce114d8 100644 --- a/fuzz/build.sh +++ b/fuzz/build.sh @@ -5,4 +5,8 @@ $CXX $CFLAGS $CXXFLAGS \ -c $SRC/fast_float/fuzz/from_chars.cc -o from_chars.o $CXX $CFLAGS $CXXFLAGS $LIB_FUZZING_ENGINE from_chars.o \ - -o $OUT/from_chars \ No newline at end of file + -o $OUT/from_chars + +# Build unit tests +cmake -DFASTFLOAT_TEST=ON -DCMAKE_EXE_LINKER_FLAGS="-lpthread" +make diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 5683cd47..4e1ce3a1 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -509,6 +509,94 @@ parse_int_string(UC const *p, UC const *pend, T &value, UC const *const start_digits = p; + FASTFLOAT_IF_CONSTEXPR17((std::is_same::value)) { + const size_t len = (size_t)(pend - p); + if (len == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + } else { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + } + return answer; + } + + uint32_t digits; + +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST + if (std::is_constant_evaluated()) { + uint8_t str[4]{}; + for (size_t j = 0; j < 4 && j < len; ++j) { + str[j] = static_cast(p[j]); + } + digits = std::bit_cast(str); + } +#else + if (false) { + } +#endif + else if (len >= 4) { + ::memcpy(&digits, p, 4); + } else { + uint32_t b0 = static_cast(p[0]); + uint32_t b1 = (len > 1) ? static_cast(p[1]) : 0xFFu; + uint32_t b2 = (len > 2) ? static_cast(p[2]) : 0xFFu; + uint32_t b3 = 0xFFu; +#if FASTFLOAT_IS_BIG_ENDIAN + digits = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3; +#else + digits = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); +#endif + } + + uint32_t magic = + ((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u; + uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32 + uint32_t nd = (tz == 32) ? 4 : (tz >> 3); + nd = (uint32_t)std::min((size_t)nd, len); + if (nd == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + return answer; + } + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + if (nd > 3) { + const UC *q = p + nd; + size_t rem = len - nd; + while (rem) { + if (*q < UC('0') || *q > UC('9')) + break; + ++q; + --rem; + } + answer.ec = std::errc::result_out_of_range; + answer.ptr = q; + return answer; + } + + digits ^= 0x30303030u; + digits <<= ((4 - nd) * 8); + + uint32_t check = ((digits >> 24) & 0xff) | ((digits >> 8) & 0xff00) | + ((digits << 8) & 0xff0000); + if (check > 0x00020505) { + answer.ec = std::errc::result_out_of_range; + answer.ptr = p + nd; + return answer; + } + value = (uint8_t)((0x640a01 * digits) >> 24); + answer.ec = std::errc(); + answer.ptr = p + nd; + return answer; + } + uint64_t i = 0; if (base == 10) { loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h index d7ef3d9a..03e70dcc 100644 --- a/include/fast_float/digit_comparison.h +++ b/include/fast_float/digit_comparison.h @@ -38,11 +38,8 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL, // this algorithm is not even close to optimized, but it has no practical // effect on performance: in order to have a faster algorithm, we'd need // to slow down performance for faster algorithms, and this is still fast. -template fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t -scientific_exponent(parsed_number_string_t &num) noexcept { - uint64_t mantissa = num.mantissa; - int32_t exponent = int32_t(num.exponent); +scientific_exponent(uint64_t mantissa, int32_t exponent) noexcept { while (mantissa >= 10000) { mantissa /= 10000; exponent += 4; @@ -398,7 +395,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp))); } - // compare digits, and use it to director rounding + // compare digits, and use it to direct rounding int ord = real_digits.compare(theor_digits); adjusted_mantissa answer = am; round(answer, [ord](adjusted_mantissa &a, int32_t shift) { @@ -419,7 +416,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( return answer; } -// parse the significant digits as a big integer to unambiguously round the +// parse the significant digits as a big integer to unambiguously round // the significant digits. here, we are trying to determine how to round // an extended float representation close to `b+h`, halfway between `b` // (the float rounded-down) and `b+u`, the next positive float. this @@ -438,7 +435,8 @@ digit_comp(parsed_number_string_t &num, adjusted_mantissa am) noexcept { // remove the invalid exponent bias am.power2 -= invalid_am_bias; - int32_t sci_exp = scientific_exponent(num); + int32_t sci_exp = + scientific_exponent(num.mantissa, static_cast(num.exponent)); size_t max_digits = binary_format::max_digits(); size_t digits = 0; bigint bigmant; diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h index a190d7c8..eb822f58 100644 --- a/include/fast_float/fast_float.h +++ b/include/fast_float/fast_float.h @@ -63,6 +63,20 @@ integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept; FASTFLOAT_CONSTEXPR20 inline double integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept; +/** + * This function is a template overload of `integer_times_pow10()` + * that returns a floating-point value of type `T` that is one of + * supported floating-point types (e.g. `double`, `float`). + */ +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept; +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept; + /** * from_chars for integer types. */ diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index 62d199ca..62fe2bf0 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -16,7 +16,7 @@ #include "constexpr_feature_detect.h" #define FASTFLOAT_VERSION_MAJOR 8 -#define FASTFLOAT_VERSION_MINOR 1 +#define FASTFLOAT_VERSION_MINOR 2 #define FASTFLOAT_VERSION_PATCH 0 #define FASTFLOAT_STRINGIZE_IMPL(x) #x @@ -362,6 +362,52 @@ leading_zeroes(uint64_t input_num) { #endif } +/* Helper C++14 constexpr generic implementation of countr_zero for 32-bit */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int +countr_zero_generic_32(uint32_t input_num) { + if (input_num == 0) { + return 32; + } + int last_bit = 0; + if (!(input_num & 0x0000FFFF)) { + input_num >>= 16; + last_bit |= 16; + } + if (!(input_num & 0x00FF)) { + input_num >>= 8; + last_bit |= 8; + } + if (!(input_num & 0x0F)) { + input_num >>= 4; + last_bit |= 4; + } + if (!(input_num & 0x3)) { + input_num >>= 2; + last_bit |= 2; + } + if (!(input_num & 0x1)) { + last_bit |= 1; + } + return last_bit; +} + +/* count trailing zeroes for 32-bit integers */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int +countr_zero_32(uint32_t input_num) { + if (cpp20_and_in_constexpr()) { + return countr_zero_generic_32(input_num); + } +#ifdef FASTFLOAT_VISUAL_STUDIO + unsigned long trailing_zero = 0; + if (_BitScanForward(&trailing_zero, input_num)) { + return (int)trailing_zero; + } + return 32; +#else + return input_num == 0 ? 32 : __builtin_ctz(input_num); +#endif +} + // slow emulation routine for 32-bit fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; @@ -406,8 +452,8 @@ full_multiplication(uint64_t a, uint64_t b) { // But MinGW on ARM64 doesn't have native support for 64-bit multiplications answer.high = __umulh(a, b); answer.low = a * b; -#elif defined(FASTFLOAT_32BIT) || \ - (defined(_WIN64) && !defined(__clang__) && !defined(_M_ARM64)) +#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__) && \ + !defined(_M_ARM64) && !defined(__GNUC__)) answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 #elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__) __uint128_t r = ((__uint128_t)a) * b; @@ -1166,6 +1212,9 @@ static_assert(std::is_same, uint64_t>::value, static_assert( std::numeric_limits::is_iec559, "std::float64_t must fulfill the requirements of IEC 559 (IEEE 754)"); + +template <> +struct binary_format : public binary_format {}; #endif // __STDCPP_FLOAT64_T__ #ifdef __STDCPP_FLOAT32_T__ @@ -1174,6 +1223,9 @@ static_assert(std::is_same, uint32_t>::value, static_assert( std::numeric_limits::is_iec559, "std::float32_t must fulfill the requirements of IEC 559 (IEEE 754)"); + +template <> +struct binary_format : public binary_format {}; #endif // __STDCPP_FLOAT32_T__ #ifdef __STDCPP_FLOAT16_T__ @@ -1245,7 +1297,6 @@ constexpr chars_format adjust_for_feature_macros(chars_format fmt) { ; } } // namespace detail - } // namespace fast_float #endif diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index a44fef0b..d453c145 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -344,44 +344,79 @@ from_chars(UC const *first, UC const *last, T &value, int base) noexcept { return from_chars_advanced(first, last, value, options); } -FASTFLOAT_CONSTEXPR20 inline double -integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept { - double value; +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept { + T value; if (clinger_fast_path_impl(mantissa, decimal_exponent, false, value)) return value; adjusted_mantissa am = - compute_float>(decimal_exponent, mantissa); + compute_float>(decimal_exponent, mantissa); to_float(false, am, value); return value; } -FASTFLOAT_CONSTEXPR20 inline double -integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept { +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value, T>::type + integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept { const bool is_negative = mantissa < 0; const uint64_t m = static_cast(is_negative ? -mantissa : mantissa); - double value; + T value; if (clinger_fast_path_impl(m, decimal_exponent, is_negative, value)) return value; - adjusted_mantissa am = - compute_float>(decimal_exponent, m); + adjusted_mantissa am = compute_float>(decimal_exponent, m); to_float(is_negative, am, value); return value; } +FASTFLOAT_CONSTEXPR20 inline double +integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(mantissa, decimal_exponent); +} + +FASTFLOAT_CONSTEXPR20 inline double +integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(mantissa, decimal_exponent); +} + // the following overloads are here to avoid surprising ambiguity for int, // unsigned, etc. +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value && + std::is_integral::value && + !std::is_signed::value, + T>::type + integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(static_cast(mantissa), + decimal_exponent); +} + +template +FASTFLOAT_CONSTEXPR20 + typename std::enable_if::value && + std::is_integral::value && + std::is_signed::value, + T>::type + integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { + return integer_times_pow10(static_cast(mantissa), + decimal_exponent); +} + template -FASTFLOAT_CONSTEXPR20 inline typename std::enable_if< +FASTFLOAT_CONSTEXPR20 typename std::enable_if< std::is_integral::value && !std::is_signed::value, double>::type integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { return integer_times_pow10(static_cast(mantissa), decimal_exponent); } template -FASTFLOAT_CONSTEXPR20 inline typename std::enable_if< +FASTFLOAT_CONSTEXPR20 typename std::enable_if< std::is_integral::value && std::is_signed::value, double>::type integer_times_pow10(Int mantissa, int decimal_exponent) noexcept { return integer_times_pow10(static_cast(mantissa), decimal_exponent); diff --git a/script/mushtak_lemire.py b/script/mushtak_lemire.py index 46c8c645..f03715cd 100644 --- a/script/mushtak_lemire.py +++ b/script/mushtak_lemire.py @@ -1,6 +1,6 @@ # # Reference : -# Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to appear) +# Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback, Software: Practice and Experience 53 (6), 2023 https://arxiv.org/abs/2212.06644 # all_tqs = [] @@ -74,8 +74,8 @@ def convergents(cf): for _, w in convergents(continued_fraction(tq, 2 ** 137)): if w >= 2 ** 64: break - if (tq * w) % 2 ** 137 > 2 ** 137 - 2 ** 64: - print(f"SOLUTION: q={j-342} T[q]={tq} w={w}") - found_solution = True + if (tq * w) % 2 ** 137 > 2 ** 137 - 2 ** 64: + print(f"SOLUTION: q={j-342} T[q]={tq} w={w}") + found_solution = True if not found_solution: print("No solutions!") diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d8ed6f4d..a053581c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -94,6 +94,7 @@ endif() option(FASTFLOAT_EXHAUSTIVE "Exhaustive tests" OFF) if (FASTFLOAT_EXHAUSTIVE) + fast_float_add_cpp_test(ipv4_test) fast_float_add_cpp_test(short_random_string) fast_float_add_cpp_test(exhaustive32_midpoint) fast_float_add_cpp_test(random_string) diff --git a/tests/basictest.cpp b/tests/basictest.cpp index dc117526..1a5537bb 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -1142,6 +1142,15 @@ TEST_CASE("double.inf") { // DBL_MAX + 0.0000000000000001e308 verify("1.7976931348623159e308", std::numeric_limits::infinity(), std::errc::result_out_of_range); + + // ( (2 - 0.5*2^(−52)) * 2^1023 ) smallest number that overflows to infinity + verify("179769313486231580793728971405303415079934132710037826936173778980444" + "968292764750946649017977587207096330286416692887910946555547851940402" + "630657488671505820681908902000708383676273854845817711531764475730270" + "069855571366959622842914819860834936475292719074168444365510704342711" + "559699508093042880177904174497792", + std::numeric_limits::infinity(), + std::errc::result_out_of_range); } TEST_CASE("double.general") { @@ -1333,6 +1342,15 @@ TEST_CASE("double.general") { std::numeric_limits::infinity(), std::errc::result_out_of_range); verify("-2240084132271013504.131248280843119943687942846658579428", -0x1.f1660a65b00bfp+60); + + // ( (2 - 0.5*2^(−52)) * 2^1023 - 1 ) largest 309 decimal digit number + // that rounds to DBL_MAX + verify("179769313486231580793728971405303415079934132710037826936173778980444" + "968292764750946649017977587207096330286416692887910946555547851940402" + "630657488671505820681908902000708383676273854845817711531764475730270" + "069855571366959622842914819860834936475292719074168444365510704342711" + "559699508093042880177904174497791", + std::numeric_limits::max()); } TEST_CASE("double.decimal_point") { @@ -1507,14 +1525,35 @@ TEST_CASE("float.inf") { std::errc::result_out_of_range); verify("3.5028234666e38", std::numeric_limits::infinity(), std::errc::result_out_of_range); + // FLT_MAX + 0.00000007e38 + verify("3.40282357e38", std::numeric_limits::infinity(), + std::errc::result_out_of_range); + // FLT_MAX + 0.0000001e38 + verify("3.4028236e38", std::numeric_limits::infinity(), + std::errc::result_out_of_range); + + // ( (2 - 0.5*2^(-23)) * 2^127 ) smallest number that overflows to infinity + verify("340282356779733661637539395458142568448", + std::numeric_limits::infinity(), + std::errc::result_out_of_range); } TEST_CASE("float.general") { + // FLT_TRUE_MIN / 2 + verify("0.7006492e-45", 0.f, std::errc::result_out_of_range); + // FLT_TRUE_MIN / 2 + 0.0000001e-45 + verify("0.7006493e-45", 0x1p-149f); + // max verify("340282346638528859811704183484516925440", 0x1.fffffep+127f); // -max verify("-340282346638528859811704183484516925440", -0x1.fffffep+127f); + // ( (2 - 0.5*2^(-23)) * 2^127 - 1 ) largest 39 decimal digits number + // that rounds to FLT_MAX + verify("340282356779733661637539395458142568447", + std::numeric_limits::max()); + verify("-1e-999", -0.0f, std::errc::result_out_of_range); verify("1." "175494140627517859246175898662808184331245864732796240031385942718174" @@ -2086,12 +2125,11 @@ TEST_CASE("bfloat16.general") { } #endif -template -void verify_integer_multiplication_by_power_of_10(Int mantissa, - int decimal_exponent, - double expected) { - const double actual = - fast_float::integer_times_pow10(mantissa, decimal_exponent); +template +void verify_integer_times_pow10_result(Int mantissa, int decimal_exponent, + T actual, U expected) { + static_assert(std::is_same::value, + "expected and actual types must match"); INFO("m * 10^e=" << mantissa << " * 10^" << decimal_exponent << "\n" @@ -2105,45 +2143,173 @@ void verify_integer_multiplication_by_power_of_10(Int mantissa, CHECK_EQ(actual, expected); } -template -void verify_integer_multiplication_by_power_of_10(Int mantissa, - int decimal_exponent) { +template +T calculate_integer_times_pow10_expected_result(Int mantissa, + int decimal_exponent) { std::string constructed_string = std::to_string(mantissa) + "e" + std::to_string(decimal_exponent); - double expected_result; + T expected_result; const auto result = fast_float::from_chars( constructed_string.data(), constructed_string.data() + constructed_string.size(), expected_result); if (result.ec != std::errc()) INFO("Failed to parse: " << constructed_string); - verify_integer_multiplication_by_power_of_10(mantissa, decimal_exponent, - expected_result); + return expected_result; +} + +template +void verify_integer_times_pow10_dflt(Int mantissa, int decimal_exponent, + double expected) { + static_assert(std::is_integral::value); + + // the "default" overload + const double actual = + fast_float::integer_times_pow10(mantissa, decimal_exponent); + + verify_integer_times_pow10_result(mantissa, decimal_exponent, actual, + expected); } +template +void verify_integer_times_pow10_dflt(Int mantissa, int decimal_exponent) { + static_assert(std::is_integral::value); + + const auto expected_result = + calculate_integer_times_pow10_expected_result(mantissa, + decimal_exponent); + + verify_integer_times_pow10_dflt(mantissa, decimal_exponent, expected_result); +} + +template +void verify_integer_times_pow10(Int mantissa, int decimal_exponent, + T expected) { + static_assert(std::is_floating_point::value); + static_assert(std::is_integral::value); + + // explicit specialization + const auto actual = + fast_float::integer_times_pow10(mantissa, decimal_exponent); + + verify_integer_times_pow10_result(mantissa, decimal_exponent, actual, + expected); +} + +template +void verify_integer_times_pow10(Int mantissa, int decimal_exponent) { + static_assert(std::is_floating_point::value); + static_assert(std::is_integral::value); + + const auto expected_result = calculate_integer_times_pow10_expected_result( + mantissa, decimal_exponent); + + verify_integer_times_pow10(mantissa, decimal_exponent, expected_result); +} + +namespace all_supported_types { +template +void verify_integer_times_pow10(Int mantissa, int decimal_exponent) { + static_assert(std::is_integral::value); + + // verify the "default" overload + verify_integer_times_pow10_dflt(mantissa, decimal_exponent); + + // verify explicit specializations + ::verify_integer_times_pow10(mantissa, decimal_exponent); + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#if defined(__STDCPP_FLOAT64_T__) + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#endif +#if defined(__STDCPP_FLOAT32_T__) + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#endif +#if defined(__STDCPP_FLOAT16_T__) + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#endif +#if defined(__STDCPP_BFLOAT16_T__) + ::verify_integer_times_pow10(mantissa, decimal_exponent); +#endif +} +} // namespace all_supported_types + TEST_CASE("integer_times_pow10") { - // explicitly verifying API with different types of integers - verify_integer_multiplication_by_power_of_10(31, -1, 3.1); - verify_integer_multiplication_by_power_of_10(-31, -1, -3.1); - verify_integer_multiplication_by_power_of_10(31, -1, 3.1); - verify_integer_multiplication_by_power_of_10(31415, -4, 3.1415); - verify_integer_multiplication_by_power_of_10(-31415, -4, -3.1415); - verify_integer_multiplication_by_power_of_10(31415, -4, 3.1415); - verify_integer_multiplication_by_power_of_10(314159265, -8, - 3.14159265); - verify_integer_multiplication_by_power_of_10(-314159265, -8, - -3.14159265); - verify_integer_multiplication_by_power_of_10(3141592653, -9, - 3.141592653); - verify_integer_multiplication_by_power_of_10( - 3141592653589793238, -18, 3.141592653589793238); - verify_integer_multiplication_by_power_of_10( - -3141592653589793238, -18, -3.141592653589793238); - verify_integer_multiplication_by_power_of_10( - 3141592653589793238, -18, 3.141592653589793238); - verify_integer_multiplication_by_power_of_10( - -3141592653589793238, -18, -3.141592653589793238); - verify_integer_multiplication_by_power_of_10( + /* explicitly verifying API with different types of integers */ + // double (the "default" overload) + verify_integer_times_pow10_dflt(31, -1, 3.1); + verify_integer_times_pow10_dflt(-31, -1, -3.1); + verify_integer_times_pow10_dflt(31, -1, 3.1); + verify_integer_times_pow10_dflt(31415, -4, 3.1415); + verify_integer_times_pow10_dflt(-31415, -4, -3.1415); + verify_integer_times_pow10_dflt(31415, -4, 3.1415); + verify_integer_times_pow10_dflt(314159265, -8, 3.14159265); + verify_integer_times_pow10_dflt(-314159265, -8, -3.14159265); + verify_integer_times_pow10_dflt(3141592653, -9, 3.141592653); + verify_integer_times_pow10_dflt(314159265, -8, 3.14159265); + verify_integer_times_pow10_dflt(-314159265, -8, -3.14159265); + verify_integer_times_pow10_dflt(3141592653, -9, 3.141592653); + verify_integer_times_pow10_dflt(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10_dflt(-3141592653589793238, -18, + -3.141592653589793238); + verify_integer_times_pow10_dflt(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10_dflt(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10_dflt(-3141592653589793238, -18, + -3.141592653589793238); + verify_integer_times_pow10_dflt(3141592653589793238, -18, + 3.141592653589793238); + // double (explicit specialization) + verify_integer_times_pow10(31, -1, 3.1); + verify_integer_times_pow10(-31, -1, -3.1); + verify_integer_times_pow10(31, -1, 3.1); + verify_integer_times_pow10(31415, -4, 3.1415); + verify_integer_times_pow10(-31415, -4, -3.1415); + verify_integer_times_pow10(31415, -4, 3.1415); + verify_integer_times_pow10(314159265, -8, 3.14159265); + verify_integer_times_pow10(-314159265, -8, -3.14159265); + verify_integer_times_pow10(3141592653, -9, 3.141592653); + verify_integer_times_pow10(314159265, -8, 3.14159265); + verify_integer_times_pow10(-314159265, -8, -3.14159265); + verify_integer_times_pow10(3141592653, -9, + 3.141592653); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10(-3141592653589793238, -18, + -3.141592653589793238); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238); + verify_integer_times_pow10(-3141592653589793238, -18, + -3.141592653589793238); + verify_integer_times_pow10( 3141592653589793238, -18, 3.141592653589793238); + // float (explicit specialization) + verify_integer_times_pow10(31, -1, 3.1f); + verify_integer_times_pow10(-31, -1, -3.1f); + verify_integer_times_pow10(31, -1, 3.1f); + verify_integer_times_pow10(31415, -4, 3.1415f); + verify_integer_times_pow10(-31415, -4, -3.1415f); + verify_integer_times_pow10(31415, -4, 3.1415f); + verify_integer_times_pow10(314159265, -8, 3.14159265f); + verify_integer_times_pow10(-314159265, -8, -3.14159265f); + verify_integer_times_pow10(3141592653, -9, 3.14159265f); + verify_integer_times_pow10(314159265, -8, 3.14159265f); + verify_integer_times_pow10(-314159265, -8, -3.14159265f); + verify_integer_times_pow10(3141592653, -9, 3.14159265f); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238f); + verify_integer_times_pow10(-3141592653589793238, -18, + -3.141592653589793238f); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238f); + verify_integer_times_pow10(3141592653589793238, -18, + 3.141592653589793238f); + verify_integer_times_pow10(-3141592653589793238, -18, + -3.141592653589793238f); + verify_integer_times_pow10( + 3141592653589793238, -18, 3.141592653589793238f); for (int mode : {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, FE_TONEAREST}) { fesetround(mode); @@ -2153,87 +2319,122 @@ TEST_CASE("integer_times_pow10") { ~Guard() { fesetround(FE_TONEAREST); } } guard; - verify_integer_multiplication_by_power_of_10(0, 0); - verify_integer_multiplication_by_power_of_10(1, 0); - verify_integer_multiplication_by_power_of_10(0, 1); - verify_integer_multiplication_by_power_of_10(1, 1); - verify_integer_multiplication_by_power_of_10(-1, 0); - verify_integer_multiplication_by_power_of_10(0, -1); - verify_integer_multiplication_by_power_of_10(-1, -1); - verify_integer_multiplication_by_power_of_10(-1, 1); - verify_integer_multiplication_by_power_of_10(1, -1); - - verify_integer_multiplication_by_power_of_10( + namespace all = all_supported_types; + + all::verify_integer_times_pow10(0, 0); + all::verify_integer_times_pow10(1, 0); + all::verify_integer_times_pow10(0, 1); + all::verify_integer_times_pow10(1, 1); + all::verify_integer_times_pow10(-1, 0); + all::verify_integer_times_pow10(0, -1); + all::verify_integer_times_pow10(-1, -1); + all::verify_integer_times_pow10(-1, 1); + all::verify_integer_times_pow10(1, -1); + + /* denormal min */ + verify_integer_times_pow10_dflt(49406564584124654, -340, + std::numeric_limits::denorm_min()); + verify_integer_times_pow10( 49406564584124654, -340, std::numeric_limits::denorm_min()); - verify_integer_multiplication_by_power_of_10( - 22250738585072014, -324, std::numeric_limits::min()); - verify_integer_multiplication_by_power_of_10( - 17976931348623158, 292, std::numeric_limits::max()); - - // DBL_TRUE_MIN / 2 underflows to 0 - verify_integer_multiplication_by_power_of_10(49406564584124654 / 2, -340, - 0.); - - // DBL_TRUE_MIN / 2 + 0.0000000000000001e-324 rounds to DBL_TRUE_MIN - verify_integer_multiplication_by_power_of_10( + verify_integer_times_pow10(14012984, -52, + std::numeric_limits::denorm_min()); + + /* normal min */ + verify_integer_times_pow10_dflt(22250738585072014, -324, + std::numeric_limits::min()); + verify_integer_times_pow10(22250738585072014, -324, + std::numeric_limits::min()); + verify_integer_times_pow10(11754944, -45, + std::numeric_limits::min()); + + /* max */ + verify_integer_times_pow10_dflt(17976931348623158, 292, + std::numeric_limits::max()); + verify_integer_times_pow10(17976931348623158, 292, + std::numeric_limits::max()); + verify_integer_times_pow10(34028235, 31, + std::numeric_limits::max()); + + /* underflow */ + // (DBL_TRUE_MIN / 2) underflows to 0 + verify_integer_times_pow10_dflt(49406564584124654 / 2, -340, 0.); + verify_integer_times_pow10(49406564584124654 / 2, -340, 0.); + // (FLT_TRUE_MIN / 2) underflows to 0 + verify_integer_times_pow10(14012984 / 2, -52, 0.f); + + /* rounding to denormal min */ + // (DBL_TRUE_MIN / 2 + 0.0000000000000001e-324) rounds to DBL_TRUE_MIN + verify_integer_times_pow10_dflt(49406564584124654 / 2 + 1, -340, + std::numeric_limits::denorm_min()); + verify_integer_times_pow10( 49406564584124654 / 2 + 1, -340, std::numeric_limits::denorm_min()); - - // DBL_MAX + 0.0000000000000001e308 overflows to infinity - verify_integer_multiplication_by_power_of_10( - 17976931348623158 + 1, 292, std::numeric_limits::infinity()); - // DBL_MAX + 0.00000000000000001e308 overflows to infinity - verify_integer_multiplication_by_power_of_10( - 179769313486231580 + 1, 291, std::numeric_limits::infinity()); + // (FLT_TRUE_MIN / 2 + 0.0000001e-45) rounds to FLT_TRUE_MIN + verify_integer_times_pow10(14012984 / 2 + 1, -52, + std::numeric_limits::denorm_min()); + + /* overflow */ + // (DBL_MAX + 0.0000000000000001e308) overflows to infinity + verify_integer_times_pow10_dflt(17976931348623158 + 1, 292, + std::numeric_limits::infinity()); + verify_integer_times_pow10(17976931348623158 + 1, 292, + std::numeric_limits::infinity()); + // (DBL_MAX + 0.00000000000000001e308) overflows to infinity + verify_integer_times_pow10_dflt(179769313486231580 + 1, 291, + std::numeric_limits::infinity()); + verify_integer_times_pow10(179769313486231580 + 1, 291, + std::numeric_limits::infinity()); + // (FLT_MAX + 0.0000001e38) overflows to infinity + verify_integer_times_pow10(34028235 + 1, 31, + std::numeric_limits::infinity()); + // (FLT_MAX + 0.00000007e38) overflows to infinity + verify_integer_times_pow10(340282350 + 7, 30, + std::numeric_limits::infinity()); // loosely verifying correct rounding of 1 to 64 bits // worth of significant digits - verify_integer_multiplication_by_power_of_10(1, 42); - verify_integer_multiplication_by_power_of_10(1, -42); - verify_integer_multiplication_by_power_of_10(12, 42); - verify_integer_multiplication_by_power_of_10(12, -42); - verify_integer_multiplication_by_power_of_10(123, 42); - verify_integer_multiplication_by_power_of_10(123, -42); - verify_integer_multiplication_by_power_of_10(1234, 42); - verify_integer_multiplication_by_power_of_10(1234, -42); - verify_integer_multiplication_by_power_of_10(12345, 42); - verify_integer_multiplication_by_power_of_10(12345, -42); - verify_integer_multiplication_by_power_of_10(123456, 42); - verify_integer_multiplication_by_power_of_10(123456, -42); - verify_integer_multiplication_by_power_of_10(1234567, 42); - verify_integer_multiplication_by_power_of_10(1234567, -42); - verify_integer_multiplication_by_power_of_10(12345678, 42); - verify_integer_multiplication_by_power_of_10(12345678, -42); - verify_integer_multiplication_by_power_of_10(123456789, 42); - verify_integer_multiplication_by_power_of_10(1234567890, 42); - verify_integer_multiplication_by_power_of_10(1234567890, -42); - verify_integer_multiplication_by_power_of_10(12345678901, 42); - verify_integer_multiplication_by_power_of_10(12345678901, -42); - verify_integer_multiplication_by_power_of_10(123456789012, 42); - verify_integer_multiplication_by_power_of_10(123456789012, -42); - verify_integer_multiplication_by_power_of_10(1234567890123, 42); - verify_integer_multiplication_by_power_of_10(1234567890123, -42); - verify_integer_multiplication_by_power_of_10(12345678901234, 42); - verify_integer_multiplication_by_power_of_10(12345678901234, -42); - verify_integer_multiplication_by_power_of_10(123456789012345, 42); - verify_integer_multiplication_by_power_of_10(123456789012345, -42); - verify_integer_multiplication_by_power_of_10(1234567890123456, 42); - verify_integer_multiplication_by_power_of_10(1234567890123456, -42); - verify_integer_multiplication_by_power_of_10(12345678901234567, 42); - verify_integer_multiplication_by_power_of_10(12345678901234567, -42); - verify_integer_multiplication_by_power_of_10(123456789012345678, 42); - verify_integer_multiplication_by_power_of_10(123456789012345678, -42); - verify_integer_multiplication_by_power_of_10(1234567890123456789, 42); - verify_integer_multiplication_by_power_of_10(1234567890123456789, -42); - verify_integer_multiplication_by_power_of_10(12345678901234567890ull, 42); - verify_integer_multiplication_by_power_of_10(12345678901234567890ull, -42); - verify_integer_multiplication_by_power_of_10( - std::numeric_limits::max(), 42); - verify_integer_multiplication_by_power_of_10( - std::numeric_limits::max(), -42); - verify_integer_multiplication_by_power_of_10( - std::numeric_limits::max(), 42); - verify_integer_multiplication_by_power_of_10( - std::numeric_limits::max(), -42); + all::verify_integer_times_pow10(1, 42); + all::verify_integer_times_pow10(1, -42); + all::verify_integer_times_pow10(12, 42); + all::verify_integer_times_pow10(12, -42); + all::verify_integer_times_pow10(123, 42); + all::verify_integer_times_pow10(123, -42); + all::verify_integer_times_pow10(1234, 42); + all::verify_integer_times_pow10(1234, -42); + all::verify_integer_times_pow10(12345, 42); + all::verify_integer_times_pow10(12345, -42); + all::verify_integer_times_pow10(123456, 42); + all::verify_integer_times_pow10(123456, -42); + all::verify_integer_times_pow10(1234567, 42); + all::verify_integer_times_pow10(1234567, -42); + all::verify_integer_times_pow10(12345678, 42); + all::verify_integer_times_pow10(12345678, -42); + all::verify_integer_times_pow10(123456789, 42); + all::verify_integer_times_pow10(1234567890, 42); + all::verify_integer_times_pow10(1234567890, -42); + all::verify_integer_times_pow10(12345678901, 42); + all::verify_integer_times_pow10(12345678901, -42); + all::verify_integer_times_pow10(123456789012, 42); + all::verify_integer_times_pow10(123456789012, -42); + all::verify_integer_times_pow10(1234567890123, 42); + all::verify_integer_times_pow10(1234567890123, -42); + all::verify_integer_times_pow10(12345678901234, 42); + all::verify_integer_times_pow10(12345678901234, -42); + all::verify_integer_times_pow10(123456789012345, 42); + all::verify_integer_times_pow10(123456789012345, -42); + all::verify_integer_times_pow10(1234567890123456, 42); + all::verify_integer_times_pow10(1234567890123456, -42); + all::verify_integer_times_pow10(12345678901234567, 42); + all::verify_integer_times_pow10(12345678901234567, -42); + all::verify_integer_times_pow10(123456789012345678, 42); + all::verify_integer_times_pow10(123456789012345678, -42); + all::verify_integer_times_pow10(1234567890123456789, 42); + all::verify_integer_times_pow10(1234567890123456789, -42); + all::verify_integer_times_pow10(12345678901234567890ull, 42); + all::verify_integer_times_pow10(12345678901234567890ull, -42); + all::verify_integer_times_pow10(std::numeric_limits::max(), 42); + all::verify_integer_times_pow10(std::numeric_limits::max(), -42); + all::verify_integer_times_pow10(std::numeric_limits::max(), 42); + all::verify_integer_times_pow10(std::numeric_limits::max(), -42); } } \ No newline at end of file diff --git a/tests/example_integer_times_pow10.cpp b/tests/example_integer_times_pow10.cpp index 3e86826c..0205c275 100644 --- a/tests/example_integer_times_pow10.cpp +++ b/tests/example_integer_times_pow10.cpp @@ -2,7 +2,7 @@ #include -int main() { +void default_overload() { const uint64_t W = 12345678901234567; const int Q = 23; const double result = fast_float::integer_times_pow10(W, Q); @@ -10,3 +10,27 @@ int main() { std::cout << W << " * 10^" << Q << " = " << result << " (" << (result == 12345678901234567e23 ? "==" : "!=") << "expected)\n"; } + +void double_specialization() { + const uint64_t W = 12345678901234567; + const int Q = 23; + const double result = fast_float::integer_times_pow10(W, Q); + std::cout.precision(17); + std::cout << "double: " << W << " * 10^" << Q << " = " << result << " (" + << (result == 12345678901234567e23 ? "==" : "!=") << "expected)\n"; +} + +void float_specialization() { + const uint64_t W = 12345678; + const int Q = 23; + const float result = fast_float::integer_times_pow10(W, Q); + std::cout.precision(9); + std::cout << "float: " << W << " * 10^" << Q << " = " << result << " (" + << (result == 12345678e23f ? "==" : "!=") << "expected)\n"; +} + +int main() { + default_overload(); + double_specialization(); + float_specialization(); +} diff --git a/tests/fast_int.cpp b/tests/fast_int.cpp index 49044d36..94e76fdb 100644 --- a/tests/fast_int.cpp +++ b/tests/fast_int.cpp @@ -95,6 +95,201 @@ int main() { } } + // char basic test + std::vector const char_basic_test_expected{0, 10, 40, 100, 9}; + std::vector const char_basic_test{"0", "10 ", "40", + "100 with text", "9.999"}; + + for (std::size_t i = 0; i < char_basic_test.size(); ++i) { + auto const f = char_basic_test[i]; + char result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + + if (answer.ec != std::errc()) { + std::cerr << "could not convert to char for input: \"" << f + << "\" because of invalid argument" << std::endl; + return EXIT_FAILURE; + } else if (result != char_basic_test_expected[i]) { + std::cerr << "result \"" << f << "\" did not match with expected char: " + << static_cast(char_basic_test_expected[i]) << std::endl; + return EXIT_FAILURE; + } + } + + // short basic test + std::vector const short_basic_test_expected{0, 10, -40, 1001, 9}; + std::vector const short_basic_test{ + "0", "10 ", "-40", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < short_basic_test.size(); ++i) { + auto const f = short_basic_test[i]; + short result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + + if (answer.ec != std::errc()) { + std::cerr << "could not convert to short for input: \"" << f + << "\" because of invalid argument" << std::endl; + return EXIT_FAILURE; + } else if (result != short_basic_test_expected[i]) { + std::cerr << "result \"" << f << "\" did not match with expected short: " + << short_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // long basic test + std::vector const long_basic_test_expected{0, 10, -40, 1001, 9}; + std::vector const long_basic_test{ + "0", "10 ", "-40", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < long_basic_test.size(); ++i) { + auto const f = long_basic_test[i]; + long result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + + if (answer.ec != std::errc()) { + std::cerr << "could not convert to long for input: \"" << f + << "\" because of invalid argument" << std::endl; + return EXIT_FAILURE; + } else if (result != long_basic_test_expected[i]) { + std::cerr << "result \"" << f << "\" did not match with expected long: " + << long_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // long long basic test + std::vector const long_long_basic_test_expected{0, 10, -40, 1001, + 9}; + std::vector const long_long_basic_test{ + "0", "10 ", "-40", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < long_long_basic_test.size(); ++i) { + auto const f = long_long_basic_test[i]; + long long result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + + if (answer.ec != std::errc()) { + std::cerr << "could not convert to long long for input: \"" << f + << "\" because of invalid argument" << std::endl; + return EXIT_FAILURE; + } else if (result != long_long_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected long long: " + << long_long_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // unsigned char basic test + std::vector const unsigned_char_basic_test_expected{0, 10, 100, + 9}; + std::vector const unsigned_char_basic_test{ + "0", "10 ", "100 with text", "9.999"}; + + for (std::size_t i = 0; i < unsigned_char_basic_test.size(); ++i) { + auto const &f = unsigned_char_basic_test[i]; + unsigned char result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to unsigned char for input: \"" << f + << "\"" << std::endl; + return EXIT_FAILURE; + } else if (result != unsigned_char_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected unsigned char: " + << static_cast(unsigned_char_basic_test_expected[i]) + << std::endl; + return EXIT_FAILURE; + } + } + + // unsigned short basic test + std::vector const unsigned_short_basic_test_expected{0, 10, + 1001, 9}; + std::vector const unsigned_short_basic_test{ + "0", "10 ", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < unsigned_short_basic_test.size(); ++i) { + auto const &f = unsigned_short_basic_test[i]; + unsigned short result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to unsigned short for input: \"" << f + << "\"" << std::endl; + return EXIT_FAILURE; + } else if (result != unsigned_short_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected unsigned short: " + << unsigned_short_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // unsigned long basic test + std::vector const unsigned_long_basic_test_expected{0, 10, + 1001, 9}; + std::vector const unsigned_long_basic_test{ + "0", "10 ", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < unsigned_long_basic_test.size(); ++i) { + auto const &f = unsigned_long_basic_test[i]; + unsigned long result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to unsigned long for input: \"" << f + << "\"" << std::endl; + return EXIT_FAILURE; + } else if (result != unsigned_long_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected unsigned long: " + << unsigned_long_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // unsigned long long basic test + std::vector const unsigned_long_long_basic_test_expected{ + 0, 10, 1001, 9}; + std::vector const unsigned_long_long_basic_test{ + "0", "10 ", "1001 with text", "9.999"}; + + for (std::size_t i = 0; i < unsigned_long_long_basic_test.size(); ++i) { + auto const &f = unsigned_long_long_basic_test[i]; + unsigned long long result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to unsigned long long for input: \"" << f + << "\"" << std::endl; + return EXIT_FAILURE; + } else if (result != unsigned_long_long_basic_test_expected[i]) { + std::cerr << "result \"" << f + << "\" did not match with expected unsigned long long: " + << unsigned_long_long_basic_test_expected[i] << std::endl; + return EXIT_FAILURE; + } + } + + // bool basic test + std::vector const bool_basic_test_expected{false, true}; + std::vector const bool_basic_test{"0", "1"}; + + for (std::size_t i = 0; i < bool_basic_test.size(); ++i) { + auto const &f = bool_basic_test[i]; + bool result; + auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result); + if (answer.ec != std::errc()) { + std::cerr << "could not convert to bool for input: \"" << f << "\"" + << std::endl; + return EXIT_FAILURE; + } else if (result != bool_basic_test_expected[i]) { + std::cerr << "result \"" << f << "\" did not match with expected bool: " + << (bool_basic_test_expected[i] ? "true" : "false") + << std::endl; + return EXIT_FAILURE; + } + } + // int invalid error test std::vector const int_invalid_argument_test{ "text", "text with 1002", "+50", " 50"}; diff --git a/tests/ipv4_test.cpp b/tests/ipv4_test.cpp new file mode 100644 index 00000000..f3055dcb --- /dev/null +++ b/tests/ipv4_test.cpp @@ -0,0 +1,93 @@ + +#include +#include +#include +#include +#include "fast_float/fast_float.h" + +char *uint8_to_chars_manual(char *ptr, uint8_t value) { + if (value == 0) { + *ptr++ = '0'; + return ptr; + } + char *start = ptr; + while (value > 0) { + *ptr++ = '0' + (value % 10); + value /= 10; + } + // Reverse the digits written so far + std::reverse(start, ptr); + return ptr; +} + +void uint32_to_ipv4_string(uint32_t ip, char *buffer) { + uint8_t octets[4] = {static_cast(ip >> 24), + static_cast(ip >> 16), + static_cast(ip >> 8), static_cast(ip)}; + + char *ptr = buffer; + + for (int i = 0; i < 4; ++i) { + ptr = uint8_to_chars_manual(ptr, octets[i]); + + if (i < 3) { + *ptr++ = '.'; + } + } + *ptr = '\0'; +} + +fastfloat_really_inline uint32_t ipv4_string_to_uint32(const char *str, + const char *end) { + uint32_t ip = 0; + const char *current = str; + + for (int i = 0; i < 4; ++i) { + uint8_t value; + auto r = fast_float::from_chars(current, end, value); + if (r.ec != std::errc()) { + throw std::invalid_argument("Invalid IP address format"); + } + current = r.ptr; + ip = (ip << 8) | value; + + if (i < 3) { + if (current == end || *current++ != '.') { + throw std::invalid_argument("Invalid IP address format"); + } + } + } + return ip; +} + +bool test_all_ipv4_conversions() { + std::cout << "Testing all IPv4 conversions... 0, 1000, 2000, 3000, 4000, " + "5000, 6000, 7000, 8000, 9000, ..." + << std::endl; + char buffer[16]; + for (uint64_t ip = 0; ip <= 0xFFFFFFFF; ip += 1000) { + if (ip % 10000000 == 0) { + std::cout << "." << std::flush; + } + uint32_to_ipv4_string(static_cast(ip), buffer); + const char *end = buffer + strlen(buffer); + uint32_t parsed_ip = ipv4_string_to_uint32(buffer, end); + if (parsed_ip != ip) { + std::cerr << "Mismatch: original " << ip << ", parsed " << parsed_ip + << std::endl; + return false; + } + } + std::cout << std::endl; + return true; +} + +int main() { + if (test_all_ipv4_conversions()) { + std::cout << "All IPv4 conversions passed!" << std::endl; + return EXIT_SUCCESS; + } else { + std::cerr << "IPv4 conversion test failed!" << std::endl; + return EXIT_FAILURE; + } +} \ No newline at end of file