diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml
index fac5cee3..bd9e1e6c 100644
--- a/.github/workflows/cifuzz.yml
+++ b/.github/workflows/cifuzz.yml
@@ -20,14 +20,14 @@ jobs:
        fuzz-seconds: 300
        output-sarif: true
    - name: Upload Crash
-     uses: actions/upload-artifact@v4
+     uses: actions/upload-artifact@v5
      if: failure() && steps.build.outcome == 'success'
      with:
        name: artifacts
        path: ./out/artifacts
    - name: Upload Sarif
      if: always() && steps.build.outcome == 'success'
-     uses: github/codeql-action/upload-sarif@v3
+     uses: github/codeql-action/upload-sarif@v4
      with:
       # Path to SARIF file relative to the root of the repository
       sarif_file: cifuzz-sarif/results.sarif
diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml
index 1b00f447..399f0c9e 100644
--- a/.github/workflows/emscripten.yml
+++ b/.github/workflows/emscripten.yml
@@ -5,7 +5,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 # v4.2.2
-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
+      - uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
       - uses: mymindstorm/setup-emsdk@6ab9eb1bda2574c4ddb79809fc9247783eaf9021 # v14
       - name: Verify
         run: emcc -v
diff --git a/.github/workflows/risc.yml b/.github/workflows/risc.yml
index 68e26cb4..8bc85588 100644
--- a/.github/workflows/risc.yml
+++ b/.github/workflows/risc.yml
@@ -6,7 +6,7 @@ jobs:
   build:
     runs-on: ubuntu-24.04
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v5
     - name: Install packages
       run: |
         sudo apt-get update -q -y
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 645fce7a..f1f7a4a2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.14)
 
 
-project(fast_float VERSION 8.1.0 LANGUAGES CXX)
+project(fast_float VERSION 8.2.0 LANGUAGES CXX)
 set(FASTFLOAT_CXX_STANDARD 11 CACHE STRING "the C++ standard to use for fastfloat")
 set(CMAKE_CXX_STANDARD ${FASTFLOAT_CXX_STANDARD})
 option(FASTFLOAT_TEST "Enable tests" OFF)
diff --git a/README.md b/README.md
index 8fdddbc5..be21066e 100644
--- a/README.md
+++ b/README.md
@@ -18,9 +18,9 @@ requires C++11):
 from_chars_result from_chars(char const *first, char const *last, float &value, ...);
 from_chars_result from_chars(char const *first, char const *last, double &value, ...);
 ```
+If they are available on your system, we also support fixed-width floating-point types such as `std::float64_t`, `std::float32_t`, `std::float16_t`, and `std::bfloat16_t`.
 
-You can also parse integer types:
-
+You can also parse integer types such as `char`, `short`, `long`, `long long`,  `unsigned char`, `unsigned short`, `unsigned long`, `unsigned long long`, `bool` (0/1),  `int8_t`, `int16_t`, `int32_t`, `int64_t`, `uint8_t`, `uint16_t`, `uint32_t`, `uint64_t`.
 ```C++
 from_chars_result from_chars(char const *first, char const *last, int &value, ...);
 from_chars_result from_chars(char const *first, char const *last, unsigned &value, ...);
@@ -401,6 +401,23 @@ except `fast_float::integer_times_pow10()` does not report out-of-range errors,
 underflows to zero or overflows to infinity when the resulting value is
 out of range.
 
+You can use template overloads to get the result converted to different
+supported floating-point types: `float`, `double`, etc.
+For example, to get result as `float` use
+`fast_float::integer_times_pow10<float>()` specialization:
+```C++
+const uint64_t W = 12345678;
+const int Q = 23;
+const float result = fast_float::integer_times_pow10<float>(W, Q);
+std::cout.precision(9);
+std::cout << "float: " << W << " * 10^" << Q << " = " << result << " ("
+          << (result == 12345678e23f ? "==" : "!=") << "expected)\n";
+```
+outputs
+```
+float: 12345678 * 10^23 = 1.23456782e+30 (==expected)
+```
+
 Overloads of `fast_float::integer_times_pow10()` are provided for
 signed and unsigned integer types: `int64_t`, `uint64_t`, etc.
 
@@ -443,7 +460,7 @@ framework](https://github.com/microsoft/LightGBM).
 Packages
 ------
 
-[![Packaging status](https://repology.org/badge/vertical-allrepos/fastfloat.svg)](https://repology.org/project/fastfloat/versions)
+[![Packaging status](https://repology.org/badge/vertical-allrepos/fast-float.svg)](https://repology.org/project/fast-float/versions)
 
 
 ## References
@@ -516,7 +533,7 @@ sufficiently recent version of CMake (3.11 or better at least):
 FetchContent_Declare(
   fast_float
   GIT_REPOSITORY https://github.com/fastfloat/fast_float.git
-  GIT_TAG tags/v8.1.0
+  GIT_TAG tags/v8.2.0
   GIT_SHALLOW TRUE)
 
 FetchContent_MakeAvailable(fast_float)
@@ -532,7 +549,7 @@ You may also use [CPM](https://github.com/cpm-cmake/CPM.cmake), like so:
 CPMAddPackage(
   NAME fast_float
   GITHUB_REPOSITORY "fastfloat/fast_float"
-  GIT_TAG v8.1.0)
+  GIT_TAG v8.2.0)
 ```
 
 ## Using as single header
@@ -544,7 +561,7 @@ if desired as described in the command line help.
 
 You may directly download automatically generated single-header files:
 
-<https://github.com/fastfloat/fast_float/releases/download/v8.1.0/fast_float.h>
+<https://github.com/fastfloat/fast_float/releases/download/v8.2.0/fast_float.h>
 
 ## Benchmarking
 
@@ -598,6 +615,11 @@ long digits.
 The library includes code adapted from Google Wuffs (written by Nigel Tao) which
 was originally published under the Apache 2.0 license.
 
+## Stars
+
+
+[![Star History Chart](https://api.star-history.com/svg?repos=fastfloat/fast_float&type=Date)](https://www.star-history.com/#fastfloat/fast_float&Date)
+
 ## License
 
 <sup>
diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index b4e03954..4ee57895 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -1,9 +1,27 @@
+include(FetchContent)
+
+FetchContent_Declare(
+  counters
+  GIT_REPOSITORY https://github.com/lemire/counters.git
+  GIT_TAG v2.2.0
+)
+
+FetchContent_MakeAvailable(counters)
+
 add_executable(realbenchmark benchmark.cpp)
+target_link_libraries(realbenchmark PRIVATE counters::counters)
+add_executable(bench_ip bench_ip.cpp)
+target_link_libraries(bench_ip PRIVATE counters::counters)
+
 set_property(
     TARGET realbenchmark
     PROPERTY CXX_STANDARD 17)
-
+set_property(
+    TARGET bench_ip
+    PROPERTY CXX_STANDARD 17)
 target_link_libraries(realbenchmark PUBLIC fast_float)
+target_link_libraries(bench_ip PUBLIC fast_float)
+
 include(ExternalProject)
 
 # Define the external project
diff --git a/benchmarks/apple_arm_events.h b/benchmarks/apple_arm_events.h
deleted file mode 100644
index f127d14d..00000000
--- a/benchmarks/apple_arm_events.h
+++ /dev/null
@@ -1,1117 +0,0 @@
-// Original design from:
-// =============================================================================
-// XNU kperf/kpc
-// Available for 64-bit Intel/Apple Silicon, macOS/iOS, with root privileges
-//
-// References:
-//
-// XNU source (since xnu 2422.1.72):
-// https://github.com/apple/darwin-xnu/blob/main/osfmk/kern/kpc.h
-// https://github.com/apple/darwin-xnu/blob/main/bsd/kern/kern_kpc.c
-//
-// Lightweight PET (Profile Every Thread, since xnu 3789.1.32):
-// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/pet.c
-// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/kperf_kpc.c
-//
-// System Private frameworks (since macOS 10.11, iOS 8.0):
-// /System/Library/PrivateFrameworks/kperf.framework
-// /System/Library/PrivateFrameworks/kperfdata.framework
-//
-// Xcode framework (since Xcode 7.0):
-// /Applications/Xcode.app/Contents/SharedFrameworks/DVTInstrumentsFoundation.framework
-//
-// CPU database (plist files)
-// macOS (since macOS 10.11):
-//     /usr/share/kpep/<name>.plist
-// iOS (copied from Xcode, since iOS 10.0, Xcode 8.0):
-//     /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
-//     /DeviceSupport/<version>/DeveloperDiskImage.dmg/usr/share/kpep/<name>.plist
-//
-//
-// Created by YaoYuan <ibireme@gmail.com> on 2021.
-// Released into the public domain (unlicense.org).
-// =============================================================================
-
-#ifndef M1CYCLES_H
-#define M1CYCLES_H
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <dlfcn.h>          // for dlopen() and dlsym()
-#include <mach/mach_time.h> // for mach_absolute_time()
-#include <sys/kdebug.h>     // for kdebug trace decode
-#include <sys/sysctl.h>     // for sysctl()
-#include <unistd.h>         // for usleep()
-
-struct performance_counters {
-  double cycles;
-  double branches;
-  double missed_branches;
-  double instructions;
-
-  performance_counters(uint64_t c, uint64_t b, uint64_t m, uint64_t i)
-      : cycles(c), branches(b), missed_branches(m), instructions(i) {}
-
-  performance_counters(double c, double b, double m, double i)
-      : cycles(c), branches(b), missed_branches(m), instructions(i) {}
-
-  performance_counters(double init)
-      : cycles(init), branches(init), missed_branches(init),
-        instructions(init) {}
-
-  inline performance_counters &operator-=(const performance_counters &other) {
-    cycles -= other.cycles;
-    branches -= other.branches;
-    missed_branches -= other.missed_branches;
-    instructions -= other.instructions;
-    return *this;
-  }
-
-  inline performance_counters &min(const performance_counters &other) {
-    cycles = other.cycles < cycles ? other.cycles : cycles;
-    branches = other.branches < branches ? other.branches : branches;
-    missed_branches = other.missed_branches < missed_branches
-                          ? other.missed_branches
-                          : missed_branches;
-    instructions =
-        other.instructions < instructions ? other.instructions : instructions;
-    return *this;
-  }
-
-  inline performance_counters &operator+=(const performance_counters &other) {
-    cycles += other.cycles;
-    branches += other.branches;
-    missed_branches += other.missed_branches;
-    instructions += other.instructions;
-    return *this;
-  }
-
-  inline performance_counters &operator/=(double numerator) {
-    cycles /= numerator;
-    branches /= numerator;
-    missed_branches /= numerator;
-    instructions /= numerator;
-    return *this;
-  }
-};
-
-inline performance_counters operator-(const performance_counters &a,
-                                      const performance_counters &b) {
-  return performance_counters(a.cycles - b.cycles, a.branches - b.branches,
-                              a.missed_branches - b.missed_branches,
-                              a.instructions - b.instructions);
-}
-
-typedef float f32;
-typedef double f64;
-typedef int8_t i8;
-typedef uint8_t u8;
-typedef int16_t i16;
-typedef uint16_t u16;
-typedef int32_t i32;
-typedef uint32_t u32;
-typedef int64_t i64;
-typedef uint64_t u64;
-typedef size_t usize;
-
-// -----------------------------------------------------------------------------
-// <kperf.framework> header (reverse engineered)
-// This framework wraps some sysctl calls to communicate with the kpc in kernel.
-// Most functions requires root privileges, or process is "blessed".
-// -----------------------------------------------------------------------------
-
-// Cross-platform class constants.
-#define KPC_CLASS_FIXED (0)
-#define KPC_CLASS_CONFIGURABLE (1)
-#define KPC_CLASS_POWER (2)
-#define KPC_CLASS_RAWPMU (3)
-
-// Cross-platform class mask constants.
-#define KPC_CLASS_FIXED_MASK (1u << KPC_CLASS_FIXED)               // 1
-#define KPC_CLASS_CONFIGURABLE_MASK (1u << KPC_CLASS_CONFIGURABLE) // 2
-#define KPC_CLASS_POWER_MASK (1u << KPC_CLASS_POWER)               // 4
-#define KPC_CLASS_RAWPMU_MASK (1u << KPC_CLASS_RAWPMU)             // 8
-
-// PMU version constants.
-#define KPC_PMU_ERROR (0)     // Error
-#define KPC_PMU_INTEL_V3 (1)  // Intel
-#define KPC_PMU_ARM_APPLE (2) // ARM64
-#define KPC_PMU_INTEL_V2 (3)  // Old Intel
-#define KPC_PMU_ARM_V2 (4)    // Old ARM
-
-// The maximum number of counters we could read from every class in one go.
-// ARMV7: FIXED: 1, CONFIGURABLE: 4
-// ARM32: FIXED: 2, CONFIGURABLE: 6
-// ARM64: FIXED: 2, CONFIGURABLE: CORE_NCTRS - FIXED (6 or 8)
-// x86: 32
-#define KPC_MAX_COUNTERS 32
-
-// Bits for defining what to do on an action.
-// Defined in https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/action.h
-#define KPERF_SAMPLER_TH_INFO (1U << 0)
-#define KPERF_SAMPLER_TH_SNAPSHOT (1U << 1)
-#define KPERF_SAMPLER_KSTACK (1U << 2)
-#define KPERF_SAMPLER_USTACK (1U << 3)
-#define KPERF_SAMPLER_PMC_THREAD (1U << 4)
-#define KPERF_SAMPLER_PMC_CPU (1U << 5)
-#define KPERF_SAMPLER_PMC_CONFIG (1U << 6)
-#define KPERF_SAMPLER_MEMINFO (1U << 7)
-#define KPERF_SAMPLER_TH_SCHEDULING (1U << 8)
-#define KPERF_SAMPLER_TH_DISPATCH (1U << 9)
-#define KPERF_SAMPLER_TK_SNAPSHOT (1U << 10)
-#define KPERF_SAMPLER_SYS_MEM (1U << 11)
-#define KPERF_SAMPLER_TH_INSCYC (1U << 12)
-#define KPERF_SAMPLER_TK_INFO (1U << 13)
-
-// Maximum number of kperf action ids.
-#define KPERF_ACTION_MAX (32)
-
-// Maximum number of kperf timer ids.
-#define KPERF_TIMER_MAX (8)
-
-// x86/arm config registers are 64-bit
-typedef u64 kpc_config_t;
-
-/// Print current CPU identification string to the buffer (same as snprintf),
-/// such as "cpu_7_8_10b282dc_46". This string can be used to locate the PMC
-/// database in /usr/share/kpep.
-/// @return string's length, or negative value if error occurs.
-/// @note This method does not requires root privileges.
-/// @details sysctl get(hw.cputype), get(hw.cpusubtype),
-///                 get(hw.cpufamily), get(machdep.cpu.model)
-static int (*kpc_cpu_string)(char *buf, usize buf_size);
-
-/// Get the version of KPC that's being run.
-/// @return See `PMU version constants` above.
-/// @details sysctl get(kpc.pmu_version)
-static u32 (*kpc_pmu_version)(void);
-
-/// Get running PMC classes.
-/// @return See `class mask constants` above,
-///         0 if error occurs or no class is set.
-/// @details sysctl get(kpc.counting)
-static u32 (*kpc_get_counting)(void);
-
-/// Set PMC classes to enable counting.
-/// @param classes See `class mask constants` above, set 0 to shutdown counting.
-/// @return 0 for success.
-/// @details sysctl set(kpc.counting)
-static int (*kpc_set_counting)(u32 classes);
-
-/// Get running PMC classes for current thread.
-/// @return See `class mask constants` above,
-///         0 if error occurs or no class is set.
-/// @details sysctl get(kpc.thread_counting)
-static u32 (*kpc_get_thread_counting)(void);
-
-/// Set PMC classes to enable counting for current thread.
-/// @param classes See `class mask constants` above, set 0 to shutdown counting.
-/// @return 0 for success.
-/// @details sysctl set(kpc.thread_counting)
-static int (*kpc_set_thread_counting)(u32 classes);
-
-/// Get how many config registers there are for a given mask.
-/// For example: Intel may returns 1 for `KPC_CLASS_FIXED_MASK`,
-///                        returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`.
-/// @param classes See `class mask constants` above.
-/// @return 0 if error occurs or no class is set.
-/// @note This method does not requires root privileges.
-/// @details sysctl get(kpc.config_count)
-static u32 (*kpc_get_config_count)(u32 classes);
-
-/// Get config registers.
-/// @param classes see `class mask constants` above.
-/// @param config Config buffer to receive values, should not smaller than
-///               kpc_get_config_count(classes) * sizeof(kpc_config_t).
-/// @return 0 for success.
-/// @details sysctl get(kpc.config_count), get(kpc.config)
-static int (*kpc_get_config)(u32 classes, kpc_config_t *config);
-
-/// Set config registers.
-/// @param classes see `class mask constants` above.
-/// @param config Config buffer, should not smaller than
-///               kpc_get_config_count(classes) * sizeof(kpc_config_t).
-/// @return 0 for success.
-/// @details sysctl get(kpc.config_count), set(kpc.config)
-static int (*kpc_set_config)(u32 classes, kpc_config_t *config);
-
-/// Get how many counters there are for a given mask.
-/// For example: Intel may returns 3 for `KPC_CLASS_FIXED_MASK`,
-///                        returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`.
-/// @param classes See `class mask constants` above.
-/// @note This method does not requires root privileges.
-/// @details sysctl get(kpc.counter_count)
-static u32 (*kpc_get_counter_count)(u32 classes);
-
-/// Get counter accumulations.
-/// If `all_cpus` is true, the buffer count should not smaller than
-/// (cpu_count * counter_count). Otherwize, the buffer count should not smaller
-/// than (counter_count).
-/// @see kpc_get_counter_count(), kpc_cpu_count().
-/// @param all_cpus true for all CPUs, false for current cpu.
-/// @param classes See `class mask constants` above.
-/// @param curcpu A pointer to receive current cpu id, can be NULL.
-/// @param buf Buffer to receive counter's value.
-/// @return 0 for success.
-/// @details sysctl get(hw.ncpu), get(kpc.counter_count), get(kpc.counters)
-static int (*kpc_get_cpu_counters)(bool all_cpus, u32 classes, int *curcpu,
-                                   u64 *buf);
-
-/// Get counter accumulations for current thread.
-/// @param tid Thread id, should be 0.
-/// @param buf_count The number of buf's elements (not bytes),
-///                  should not smaller than kpc_get_counter_count().
-/// @param buf Buffer to receive counter's value.
-/// @return 0 for success.
-/// @details sysctl get(kpc.thread_counters)
-static int (*kpc_get_thread_counters)(u32 tid, u32 buf_count, u64 *buf);
-
-/// Acquire/release the counters used by the Power Manager.
-/// @param val 1:acquire, 0:release
-/// @return 0 for success.
-/// @details sysctl set(kpc.force_all_ctrs)
-static int (*kpc_force_all_ctrs_set)(int val);
-
-/// Get the state of all_ctrs.
-/// @return 0 for success.
-/// @details sysctl get(kpc.force_all_ctrs)
-static int (*kpc_force_all_ctrs_get)(int *val_out);
-
-/// Set number of actions, should be `KPERF_ACTION_MAX`.
-/// @details sysctl set(kperf.action.count)
-static int (*kperf_action_count_set)(u32 count);
-
-/// Get number of actions.
-/// @details sysctl get(kperf.action.count)
-static int (*kperf_action_count_get)(u32 *count);
-
-/// Set what to sample when a trigger fires an action, e.g.
-/// `KPERF_SAMPLER_PMC_CPU`.
-/// @details sysctl set(kperf.action.samplers)
-static int (*kperf_action_samplers_set)(u32 actionid, u32 sample);
-
-/// Get what to sample when a trigger fires an action.
-/// @details sysctl get(kperf.action.samplers)
-static int (*kperf_action_samplers_get)(u32 actionid, u32 *sample);
-
-/// Apply a task filter to the action, -1 to disable filter.
-/// @details sysctl set(kperf.action.filter_by_task)
-static int (*kperf_action_filter_set_by_task)(u32 actionid, i32 port);
-
-/// Apply a pid filter to the action, -1 to disable filter.
-/// @details sysctl set(kperf.action.filter_by_pid)
-static int (*kperf_action_filter_set_by_pid)(u32 actionid, i32 pid);
-
-/// Set number of time triggers, should be `KPERF_TIMER_MAX`.
-/// @details sysctl set(kperf.timer.count)
-static int (*kperf_timer_count_set)(u32 count);
-
-/// Get number of time triggers.
-/// @details sysctl get(kperf.timer.count)
-static int (*kperf_timer_count_get)(u32 *count);
-
-/// Set timer number and period.
-/// @details sysctl set(kperf.timer.period)
-static int (*kperf_timer_period_set)(u32 actionid, u64 tick);
-
-/// Get timer number and period.
-/// @details sysctl get(kperf.timer.period)
-static int (*kperf_timer_period_get)(u32 actionid, u64 *tick);
-
-/// Set timer number and actionid.
-/// @details sysctl set(kperf.timer.action)
-static int (*kperf_timer_action_set)(u32 actionid, u32 timerid);
-
-/// Get timer number and actionid.
-/// @details sysctl get(kperf.timer.action)
-static int (*kperf_timer_action_get)(u32 actionid, u32 *timerid);
-
-/// Set which timer ID does PET (Profile Every Thread).
-/// @details sysctl set(kperf.timer.pet_timer)
-static int (*kperf_timer_pet_set)(u32 timerid);
-
-/// Get which timer ID does PET (Profile Every Thread).
-/// @details sysctl get(kperf.timer.pet_timer)
-static int (*kperf_timer_pet_get)(u32 *timerid);
-
-/// Enable or disable sampling.
-/// @details sysctl set(kperf.sampling)
-static int (*kperf_sample_set)(u32 enabled);
-
-/// Get is currently sampling.
-/// @details sysctl get(kperf.sampling)
-static int (*kperf_sample_get)(u32 *enabled);
-
-/// Reset kperf: stop sampling, kdebug, timers and actions.
-/// @return 0 for success.
-static int (*kperf_reset)(void);
-
-/// Nanoseconds to CPU ticks.
-static u64 (*kperf_ns_to_ticks)(u64 ns);
-
-/// CPU ticks to nanoseconds.
-static u64 (*kperf_ticks_to_ns)(u64 ticks);
-
-/// CPU ticks frequency (mach_absolute_time).
-static u64 (*kperf_tick_frequency)(void);
-
-/// Get lightweight PET mode (not in kperf.framework).
-static int kperf_lightweight_pet_get(u32 *enabled) {
-  if (!enabled)
-    return -1;
-  usize size = 4;
-  return sysctlbyname("kperf.lightweight_pet", enabled, &size, NULL, 0);
-}
-
-/// Set lightweight PET mode (not in kperf.framework).
-static int kperf_lightweight_pet_set(u32 enabled) {
-  return sysctlbyname("kperf.lightweight_pet", NULL, NULL, &enabled, 4);
-}
-
-// -----------------------------------------------------------------------------
-// <kperfdata.framework> header (reverse engineered)
-// This framework provides some functions to access the local CPU database.
-// These functions do not require root privileges.
-// -----------------------------------------------------------------------------
-
-// KPEP CPU archtecture constants.
-#define KPEP_ARCH_I386 0
-#define KPEP_ARCH_X86_64 1
-#define KPEP_ARCH_ARM 2
-#define KPEP_ARCH_ARM64 3
-
-/// KPEP event (size: 48/28 bytes on 64/32 bit OS)
-typedef struct kpep_event {
-  const char *name; ///< Unique name of a event, such as "INST_RETIRED.ANY".
-  const char *description; ///< Description for this event.
-  const char *errata;      ///< Errata, currently NULL.
-  const char *alias;       ///< Alias name, such as "Instructions", "Cycles".
-  const char *fallback;    ///< Fallback event name for fixed counter.
-  u32 mask;
-  u8 number;
-  u8 umask;
-  u8 reserved;
-  u8 is_fixed;
-} kpep_event;
-
-/// KPEP database (size: 144/80 bytes on 64/32 bit OS)
-typedef struct kpep_db {
-  const char *name;           ///< Database name, such as "haswell".
-  const char *cpu_id;         ///< Plist name, such as "cpu_7_8_10b282dc".
-  const char *marketing_name; ///< Marketing name, such as "Intel Haswell".
-  void *plist_data;           ///< Plist data (CFDataRef), currently NULL.
-  void *event_map; ///< All events (CFDict<CFSTR(event_name), kpep_event *>).
-  kpep_event
-      *event_arr; ///< Event struct buffer (sizeof(kpep_event) * events_count).
-  kpep_event **fixed_event_arr; ///< Fixed counter events (sizeof(kpep_event *)
-                                ///< * fixed_counter_count)
-  void *alias_map; ///< All aliases (CFDict<CFSTR(event_name), kpep_event *>).
-  usize reserved_1;
-  usize reserved_2;
-  usize reserved_3;
-  usize event_count; ///< All events count.
-  usize alias_count;
-  usize fixed_counter_count;
-  usize config_counter_count;
-  usize power_counter_count;
-  u32 archtecture; ///< see `KPEP CPU archtecture constants` above.
-  u32 fixed_counter_bits;
-  u32 config_counter_bits;
-  u32 power_counter_bits;
-} kpep_db;
-
-/// KPEP config (size: 80/44 bytes on 64/32 bit OS)
-typedef struct kpep_config {
-  kpep_db *db;
-  kpep_event **ev_arr; ///< (sizeof(kpep_event *) * counter_count), init NULL
-  usize *ev_map;       ///< (sizeof(usize *) * counter_count), init 0
-  usize *ev_idx;       ///< (sizeof(usize *) * counter_count), init -1
-  u32 *flags;          ///< (sizeof(u32 *) * counter_count), init 0
-  u64 *kpc_periods;    ///< (sizeof(u64 *) * counter_count), init 0
-  usize event_count;   /// kpep_config_events_count()
-  usize counter_count;
-  u32 classes; ///< See `class mask constants` above.
-  u32 config_counter;
-  u32 power_counter;
-  u32 reserved;
-} kpep_config;
-
-/// Error code for kpep_config_xxx() and kpep_db_xxx() functions.
-typedef enum {
-  KPEP_CONFIG_ERROR_NONE = 0,
-  KPEP_CONFIG_ERROR_INVALID_ARGUMENT = 1,
-  KPEP_CONFIG_ERROR_OUT_OF_MEMORY = 2,
-  KPEP_CONFIG_ERROR_IO = 3,
-  KPEP_CONFIG_ERROR_BUFFER_TOO_SMALL = 4,
-  KPEP_CONFIG_ERROR_CUR_SYSTEM_UNKNOWN = 5,
-  KPEP_CONFIG_ERROR_DB_PATH_INVALID = 6,
-  KPEP_CONFIG_ERROR_DB_NOT_FOUND = 7,
-  KPEP_CONFIG_ERROR_DB_ARCH_UNSUPPORTED = 8,
-  KPEP_CONFIG_ERROR_DB_VERSION_UNSUPPORTED = 9,
-  KPEP_CONFIG_ERROR_DB_CORRUPT = 10,
-  KPEP_CONFIG_ERROR_EVENT_NOT_FOUND = 11,
-  KPEP_CONFIG_ERROR_CONFLICTING_EVENTS = 12,
-  KPEP_CONFIG_ERROR_COUNTERS_NOT_FORCED = 13,
-  KPEP_CONFIG_ERROR_EVENT_UNAVAILABLE = 14,
-  KPEP_CONFIG_ERROR_ERRNO = 15,
-  KPEP_CONFIG_ERROR_MAX
-} kpep_config_error_code;
-
-/// Error description for kpep_config_error_code.
-static const char *kpep_config_error_names[KPEP_CONFIG_ERROR_MAX] = {
-    "none",
-    "invalid argument",
-    "out of memory",
-    "I/O",
-    "buffer too small",
-    "current system unknown",
-    "database path invalid",
-    "database not found",
-    "database architecture unsupported",
-    "database version unsupported",
-    "database corrupt",
-    "event not found",
-    "conflicting events",
-    "all counters must be forced",
-    "event unavailable",
-    "check errno"};
-
-/// Error description.
-static const char *kpep_config_error_desc(int code) {
-  if (0 <= code && code < KPEP_CONFIG_ERROR_MAX) {
-    return kpep_config_error_names[code];
-  }
-  return "unknown error";
-}
-
-/// Create a config.
-/// @param db A kpep db, see kpep_db_create()
-/// @param cfg_ptr A pointer to receive the new config.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_create)(kpep_db *db, kpep_config **cfg_ptr);
-
-/// Free the config.
-static void (*kpep_config_free)(kpep_config *cfg);
-
-/// Add an event to config.
-/// @param cfg The config.
-/// @param ev_ptr A event pointer.
-/// @param flag 0: all, 1: user space only
-/// @param err Error bitmap pointer, can be NULL.
-///            If return value is `CONFLICTING_EVENTS`, this bitmap contains
-///            the conflicted event indices, e.g. "1 << 2" means index 2.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_add_event)(kpep_config *cfg, kpep_event **ev_ptr,
-                                    u32 flag, u32 *err);
-
-/// Remove event at index.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_remove_event)(kpep_config *cfg, usize idx);
-
-/// Force all counters.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_force_counters)(kpep_config *cfg);
-
-/// Get events count.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_events_count)(kpep_config *cfg, usize *count_ptr);
-
-/// Get all event pointers.
-/// @param buf A buffer to receive event pointers.
-/// @param buf_size The buffer's size in bytes, should not smaller than
-///                 kpep_config_events_count() * sizeof(void *).
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_events)(kpep_config *cfg, kpep_event **buf,
-                                 usize buf_size);
-
-/// Get kpc register configs.
-/// @param buf A buffer to receive kpc register configs.
-/// @param buf_size The buffer's size in bytes, should not smaller than
-///                 kpep_config_kpc_count() * sizeof(kpc_config_t).
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_kpc)(kpep_config *cfg, kpc_config_t *buf,
-                              usize buf_size);
-
-/// Get kpc register config count.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_kpc_count)(kpep_config *cfg, usize *count_ptr);
-
-/// Get kpc classes.
-/// @param classes See `class mask constants` above.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_kpc_classes)(kpep_config *cfg, u32 *classes_ptr);
-
-/// Get the index mapping from event to counter.
-/// @param buf A buffer to receive indexes.
-/// @param buf_size The buffer's size in bytes, should not smaller than
-///                 kpep_config_events_count() * sizeof(kpc_config_t).
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_config_kpc_map)(kpep_config *cfg, usize *buf, usize buf_size);
-
-/// Open a kpep database file in "/usr/share/kpep/" or "/usr/local/share/kpep/".
-/// @param name File name, for example "haswell", "cpu_100000c_1_92fb37c8".
-///             Pass NULL for current CPU.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_db_create)(const char *name, kpep_db **db_ptr);
-
-/// Free the kpep database.
-static void (*kpep_db_free)(kpep_db *db);
-
-/// Get the database's name.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_db_name)(kpep_db *db, const char **name);
-
-/// Get the event alias count.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_db_aliases_count)(kpep_db *db, usize *count);
-
-/// Get all alias.
-/// @param buf A buffer to receive all alias strings.
-/// @param buf_size The buffer's size in bytes,
-///        should not smaller than kpep_db_aliases_count() * sizeof(void *).
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_db_aliases)(kpep_db *db, const char **buf, usize buf_size);
-
-/// Get counters count for given classes.
-/// @param classes 1: Fixed, 2: Configurable.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_db_counters_count)(kpep_db *db, u8 classes, usize *count);
-
-/// Get all event count.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_db_events_count)(kpep_db *db, usize *count);
-
-/// Get all events.
-/// @param buf A buffer to receive all event pointers.
-/// @param buf_size The buffer's size in bytes,
-///        should not smaller than kpep_db_events_count() * sizeof(void *).
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_db_events)(kpep_db *db, kpep_event **buf, usize buf_size);
-
-/// Get one event by name.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_db_event)(kpep_db *db, const char *name, kpep_event **ev_ptr);
-
-/// Get event's name.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_event_name)(kpep_event *ev, const char **name_ptr);
-
-/// Get event's alias.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_event_alias)(kpep_event *ev, const char **alias_ptr);
-
-/// Get event's description.
-/// @return kpep_config_error_code, 0 for success.
-static int (*kpep_event_description)(kpep_event *ev, const char **str_ptr);
-
-// -----------------------------------------------------------------------------
-// load kperf/kperfdata dynamic library
-// -----------------------------------------------------------------------------
-
-typedef struct {
-  const char *name;
-  void **impl;
-} lib_symbol;
-
-#define lib_nelems(x) (sizeof(x) / sizeof((x)[0]))
-#define lib_symbol_def(name)                                                   \
-  { #name, (void **)&name }
-
-static const lib_symbol lib_symbols_kperf[] = {
-    lib_symbol_def(kpc_pmu_version),
-    lib_symbol_def(kpc_cpu_string),
-    lib_symbol_def(kpc_set_counting),
-    lib_symbol_def(kpc_get_counting),
-    lib_symbol_def(kpc_set_thread_counting),
-    lib_symbol_def(kpc_get_thread_counting),
-    lib_symbol_def(kpc_get_config_count),
-    lib_symbol_def(kpc_get_counter_count),
-    lib_symbol_def(kpc_set_config),
-    lib_symbol_def(kpc_get_config),
-    lib_symbol_def(kpc_get_cpu_counters),
-    lib_symbol_def(kpc_get_thread_counters),
-    lib_symbol_def(kpc_force_all_ctrs_set),
-    lib_symbol_def(kpc_force_all_ctrs_get),
-    lib_symbol_def(kperf_action_count_set),
-    lib_symbol_def(kperf_action_count_get),
-    lib_symbol_def(kperf_action_samplers_set),
-    lib_symbol_def(kperf_action_samplers_get),
-    lib_symbol_def(kperf_action_filter_set_by_task),
-    lib_symbol_def(kperf_action_filter_set_by_pid),
-    lib_symbol_def(kperf_timer_count_set),
-    lib_symbol_def(kperf_timer_count_get),
-    lib_symbol_def(kperf_timer_period_set),
-    lib_symbol_def(kperf_timer_period_get),
-    lib_symbol_def(kperf_timer_action_set),
-    lib_symbol_def(kperf_timer_action_get),
-    lib_symbol_def(kperf_sample_set),
-    lib_symbol_def(kperf_sample_get),
-    lib_symbol_def(kperf_reset),
-    lib_symbol_def(kperf_timer_pet_set),
-    lib_symbol_def(kperf_timer_pet_get),
-    lib_symbol_def(kperf_ns_to_ticks),
-    lib_symbol_def(kperf_ticks_to_ns),
-    lib_symbol_def(kperf_tick_frequency),
-};
-
-static const lib_symbol lib_symbols_kperfdata[] = {
-    lib_symbol_def(kpep_config_create),
-    lib_symbol_def(kpep_config_free),
-    lib_symbol_def(kpep_config_add_event),
-    lib_symbol_def(kpep_config_remove_event),
-    lib_symbol_def(kpep_config_force_counters),
-    lib_symbol_def(kpep_config_events_count),
-    lib_symbol_def(kpep_config_events),
-    lib_symbol_def(kpep_config_kpc),
-    lib_symbol_def(kpep_config_kpc_count),
-    lib_symbol_def(kpep_config_kpc_classes),
-    lib_symbol_def(kpep_config_kpc_map),
-    lib_symbol_def(kpep_db_create),
-    lib_symbol_def(kpep_db_free),
-    lib_symbol_def(kpep_db_name),
-    lib_symbol_def(kpep_db_aliases_count),
-    lib_symbol_def(kpep_db_aliases),
-    lib_symbol_def(kpep_db_counters_count),
-    lib_symbol_def(kpep_db_events_count),
-    lib_symbol_def(kpep_db_events),
-    lib_symbol_def(kpep_db_event),
-    lib_symbol_def(kpep_event_name),
-    lib_symbol_def(kpep_event_alias),
-    lib_symbol_def(kpep_event_description),
-};
-
-#define lib_path_kperf "/System/Library/PrivateFrameworks/kperf.framework/kperf"
-#define lib_path_kperfdata                                                     \
-  "/System/Library/PrivateFrameworks/kperfdata.framework/kperfdata"
-
-static bool lib_inited = false;
-static bool lib_has_err = false;
-static char lib_err_msg[256];
-
-static void *lib_handle_kperf = NULL;
-static void *lib_handle_kperfdata = NULL;
-
-static void lib_deinit(void) {
-  lib_inited = false;
-  lib_has_err = false;
-  if (lib_handle_kperf)
-    dlclose(lib_handle_kperf);
-  if (lib_handle_kperfdata)
-    dlclose(lib_handle_kperfdata);
-  lib_handle_kperf = NULL;
-  lib_handle_kperfdata = NULL;
-  for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) {
-    const lib_symbol *symbol = &lib_symbols_kperf[i];
-    *symbol->impl = NULL;
-  }
-  for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) {
-    const lib_symbol *symbol = &lib_symbols_kperfdata[i];
-    *symbol->impl = NULL;
-  }
-}
-
-static bool lib_init(void) {
-#define return_err()                                                           \
-  do {                                                                         \
-    lib_deinit();                                                              \
-    lib_inited = true;                                                         \
-    lib_has_err = true;                                                        \
-    return false;                                                              \
-  } while (false)
-
-  if (lib_inited)
-    return !lib_has_err;
-
-  // load dynamic library
-  lib_handle_kperf = dlopen(lib_path_kperf, RTLD_LAZY);
-  if (!lib_handle_kperf) {
-    snprintf(lib_err_msg, sizeof(lib_err_msg),
-             "Failed to load kperf.framework, message: %s.", dlerror());
-    return_err();
-  }
-  lib_handle_kperfdata = dlopen(lib_path_kperfdata, RTLD_LAZY);
-  if (!lib_handle_kperfdata) {
-    snprintf(lib_err_msg, sizeof(lib_err_msg),
-             "Failed to load kperfdata.framework, message: %s.", dlerror());
-    return_err();
-  }
-
-  // load symbol address from dynamic library
-  for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) {
-    const lib_symbol *symbol = &lib_symbols_kperf[i];
-    *symbol->impl = dlsym(lib_handle_kperf, symbol->name);
-    if (!*symbol->impl) {
-      snprintf(lib_err_msg, sizeof(lib_err_msg),
-               "Failed to load kperf function: %s.", symbol->name);
-      return_err();
-    }
-  }
-  for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) {
-    const lib_symbol *symbol = &lib_symbols_kperfdata[i];
-    *symbol->impl = dlsym(lib_handle_kperfdata, symbol->name);
-    if (!*symbol->impl) {
-      snprintf(lib_err_msg, sizeof(lib_err_msg),
-               "Failed to load kperfdata function: %s.", symbol->name);
-      return_err();
-    }
-  }
-
-  lib_inited = true;
-  lib_has_err = false;
-  return true;
-
-#undef return_err
-}
-
-// -----------------------------------------------------------------------------
-// kdebug private structs
-// https://github.com/apple/darwin-xnu/blob/main/bsd/sys_private/kdebug_private.h
-// -----------------------------------------------------------------------------
-
-/*
- * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf
- * structure.
- */
-#if defined(__arm64__)
-typedef uint64_t kd_buf_argtype;
-#else
-typedef uintptr_t kd_buf_argtype;
-#endif
-
-typedef struct {
-  uint64_t timestamp;
-  kd_buf_argtype arg1;
-  kd_buf_argtype arg2;
-  kd_buf_argtype arg3;
-  kd_buf_argtype arg4;
-  kd_buf_argtype arg5; /* the thread ID */
-  uint32_t debugid;    /* see <sys/kdebug.h> */
-
-/*
- * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf
- * structure.
- */
-#if defined(__LP64__) || defined(__arm64__)
-  uint32_t cpuid; /* cpu index, from 0 */
-  kd_buf_argtype unused;
-#endif
-} kd_buf;
-
-/* bits for the type field of kd_regtype */
-#define KDBG_CLASSTYPE 0x10000
-#define KDBG_SUBCLSTYPE 0x20000
-#define KDBG_RANGETYPE 0x40000
-#define KDBG_TYPENONE 0x80000
-#define KDBG_CKTYPES 0xF0000
-
-/* only trace at most 4 types of events, at the code granularity */
-#define KDBG_VALCHECK 0x00200000U
-
-typedef struct {
-  unsigned int type;
-  unsigned int value1;
-  unsigned int value2;
-  unsigned int value3;
-  unsigned int value4;
-} kd_regtype;
-
-typedef struct {
-  /* number of events that can fit in the buffers */
-  int nkdbufs;
-  /* set if trace is disabled */
-  int nolog;
-  /* kd_ctrl_page.flags */
-  unsigned int flags;
-  /* number of threads in thread map */
-  int nkdthreads;
-  /* the owning pid */
-  int bufid;
-} kbufinfo_t;
-
-// -----------------------------------------------------------------------------
-// kdebug utils
-// -----------------------------------------------------------------------------
-
-/// Clean up trace buffers and reset ktrace/kdebug/kperf.
-/// @return 0 on success.
-static int kdebug_reset(void) {
-  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREMOVE};
-  return sysctl(mib, 3, NULL, NULL, NULL, 0);
-}
-
-/// Disable and reinitialize the trace buffers.
-/// @return 0 on success.
-static int kdebug_reinit(void) {
-  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETUP};
-  return sysctl(mib, 3, NULL, NULL, NULL, 0);
-}
-
-/// Set debug filter.
-static int kdebug_setreg(kd_regtype *kdr) {
-  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETREG};
-  usize size = sizeof(kd_regtype);
-  return sysctl(mib, 3, kdr, &size, NULL, 0);
-}
-
-/// Set maximum number of trace entries (kd_buf).
-/// Only allow allocation up to half the available memory (sane_size).
-/// @return 0 on success.
-static int kdebug_trace_setbuf(int nbufs) {
-  int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETBUF, nbufs};
-  return sysctl(mib, 4, NULL, NULL, NULL, 0);
-}
-
-/// Enable or disable kdebug trace.
-/// Trace buffer must already be initialized.
-/// @return 0 on success.
-static int kdebug_trace_enable(bool enable) {
-  int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDENABLE, enable};
-  return sysctl(mib, 4, NULL, 0, NULL, 0);
-}
-
-/// Retrieve trace buffer information from kernel.
-/// @return 0 on success.
-static int kdebug_get_bufinfo(kbufinfo_t *info) {
-  if (!info)
-    return -1;
-  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDGETBUF};
-  size_t needed = sizeof(kbufinfo_t);
-  return sysctl(mib, 3, info, &needed, NULL, 0);
-}
-
-/// Retrieve trace buffers from kernel.
-/// @param buf Memory to receive buffer data, array of `kd_buf`.
-/// @param len Length of `buf` in bytes.
-/// @param count Number of trace entries (kd_buf) obtained.
-/// @return 0 on success.
-static int kdebug_trace_read(void *buf, usize len, usize *count) {
-  if (count)
-    *count = 0;
-  if (!buf || !len)
-    return -1;
-
-  // Note: the input and output units are not the same.
-  // input: bytes
-  // output: number of kd_buf
-  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREADTR};
-  int ret = sysctl(mib, 3, buf, &len, NULL, 0);
-  if (ret != 0)
-    return ret;
-  *count = len;
-  return 0;
-}
-
-/// Block until there are new buffers filled or `timeout_ms` have passed.
-/// @param timeout_ms timeout milliseconds, 0 means wait forever.
-/// @param suc set true if new buffers filled.
-/// @return 0 on success.
-static int kdebug_wait(usize timeout_ms, bool *suc) {
-  if (timeout_ms == 0)
-    return -1;
-  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDBUFWAIT};
-  usize val = timeout_ms;
-  int ret = sysctl(mib, 3, NULL, &val, NULL, 0);
-  if (suc)
-    *suc = !!val;
-  return ret;
-}
-
-// -----------------------------------------------------------------------------
-// Demo
-// -----------------------------------------------------------------------------
-
-#define EVENT_NAME_MAX 8
-
-typedef struct {
-  const char *alias;                 /// name for print
-  const char *names[EVENT_NAME_MAX]; /// name from pmc db
-} event_alias;
-
-/// Event names from /usr/share/kpep/<name>.plist
-static const event_alias profile_events[] = {
-    {"cycles",
-     {
-         "FIXED_CYCLES",            // Apple A7-A15//CORE_ACTIVE_CYCLE
-         "CPU_CLK_UNHALTED.THREAD", // Intel Core 1th-10th
-         "CPU_CLK_UNHALTED.CORE",   // Intel Yonah, Merom
-     }},
-    {"instructions",
-     {
-         "FIXED_INSTRUCTIONS", // Apple A7-A15
-         "INST_RETIRED.ANY"    // Intel Yonah, Merom, Core 1th-10th
-     }},
-    {"branches",
-     {
-         "INST_BRANCH",                  // Apple A7-A15
-         "BR_INST_RETIRED.ALL_BRANCHES", // Intel Core 1th-10th
-         "INST_RETIRED.ANY",             // Intel Yonah, Merom
-     }},
-    {"branch-misses",
-     {
-         "BRANCH_MISPRED_NONSPEC",       // Apple A7-A15, since iOS 15, macOS 12
-         "BRANCH_MISPREDICT",            // Apple A7-A14
-         "BR_MISP_RETIRED.ALL_BRANCHES", // Intel Core 2th-10th
-         "BR_INST_RETIRED.MISPRED",      // Intel Yonah, Merom
-     }},
-};
-
-static kpep_event *get_event(kpep_db *db, const event_alias *alias) {
-  for (usize j = 0; j < EVENT_NAME_MAX; j++) {
-    const char *name = alias->names[j];
-    if (!name)
-      break;
-    kpep_event *ev = NULL;
-    if (kpep_db_event(db, name, &ev) == 0) {
-      return ev;
-    }
-  }
-  return NULL;
-}
-
-kpc_config_t regs[KPC_MAX_COUNTERS] = {0};
-usize counter_map[KPC_MAX_COUNTERS] = {0};
-u64 counters_0[KPC_MAX_COUNTERS] = {0};
-u64 counters_1[KPC_MAX_COUNTERS] = {0};
-const usize ev_count = sizeof(profile_events) / sizeof(profile_events[0]);
-
-bool setup_performance_counters() {
-  static bool init = false;
-  static bool worked = false;
-
-  if (init) {
-    return worked;
-  }
-  init = true;
-
-  // load dylib
-  if (!lib_init()) {
-    printf("Error: %s\n", lib_err_msg);
-    return (worked = false);
-  }
-
-  // check permission
-  int force_ctrs = 0;
-  if (kpc_force_all_ctrs_get(&force_ctrs)) {
-    // printf("Permission denied, xnu/kpc requires root privileges.\n");
-    return (worked = false);
-  }
-  int ret;
-  // load pmc db
-  kpep_db *db = NULL;
-  if ((ret = kpep_db_create(NULL, &db))) {
-    printf("Error: cannot load pmc database: %d.\n", ret);
-    return (worked = false);
-  }
-  printf("loaded db: %s (%s)\n", db->name, db->marketing_name);
-
-  // create a config
-  kpep_config *cfg = NULL;
-  if ((ret = kpep_config_create(db, &cfg))) {
-    printf("Failed to create kpep config: %d (%s).\n", ret,
-           kpep_config_error_desc(ret));
-    return (worked = false);
-  }
-  if ((ret = kpep_config_force_counters(cfg))) {
-    printf("Failed to force counters: %d (%s).\n", ret,
-           kpep_config_error_desc(ret));
-    return (worked = false);
-  }
-
-  // get events
-  kpep_event *ev_arr[ev_count] = {0};
-  for (usize i = 0; i < ev_count; i++) {
-    const event_alias *alias = profile_events + i;
-    ev_arr[i] = get_event(db, alias);
-    if (!ev_arr[i]) {
-      printf("Cannot find event: %s.\n", alias->alias);
-      return (worked = false);
-    }
-  }
-
-  // add event to config
-  for (usize i = 0; i < ev_count; i++) {
-    kpep_event *ev = ev_arr[i];
-    if ((ret = kpep_config_add_event(cfg, &ev, 0, NULL))) {
-      printf("Failed to add event: %d (%s).\n", ret,
-             kpep_config_error_desc(ret));
-      return (worked = false);
-    }
-  }
-
-  // prepare buffer and config
-  u32 classes = 0;
-  usize reg_count = 0;
-  if ((ret = kpep_config_kpc_classes(cfg, &classes))) {
-    printf("Failed get kpc classes: %d (%s).\n", ret,
-           kpep_config_error_desc(ret));
-    return (worked = false);
-  }
-  if ((ret = kpep_config_kpc_count(cfg, &reg_count))) {
-    printf("Failed get kpc count: %d (%s).\n", ret,
-           kpep_config_error_desc(ret));
-    return (worked = false);
-  }
-  if ((ret = kpep_config_kpc_map(cfg, counter_map, sizeof(counter_map)))) {
-    printf("Failed get kpc map: %d (%s).\n", ret, kpep_config_error_desc(ret));
-    return (worked = false);
-  }
-  if ((ret = kpep_config_kpc(cfg, regs, sizeof(regs)))) {
-    printf("Failed get kpc registers: %d (%s).\n", ret,
-           kpep_config_error_desc(ret));
-    return (worked = false);
-  }
-
-  // set config to kernel
-  if ((ret = kpc_force_all_ctrs_set(1))) {
-    printf("Failed force all ctrs: %d.\n", ret);
-    return (worked = false);
-  }
-  if ((classes & KPC_CLASS_CONFIGURABLE_MASK) && reg_count) {
-    if ((ret = kpc_set_config(classes, regs))) {
-      printf("Failed set kpc config: %d.\n", ret);
-      return (worked = false);
-    }
-  }
-
-  // start counting
-  if ((ret = kpc_set_counting(classes))) {
-    printf("Failed set counting: %d.\n", ret);
-    return (worked = false);
-  }
-  if ((ret = kpc_set_thread_counting(classes))) {
-    printf("Failed set thread counting: %d.\n", ret);
-    return (worked = false);
-  }
-
-  return (worked = true);
-}
-
-inline performance_counters get_counters() {
-  static bool warned = false;
-  int ret;
-  // get counters before
-  if ((ret = kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_0))) {
-    if (!warned) {
-
-      printf("Failed get thread counters before: %d.\n", ret);
-      warned = true;
-    }
-    return 1;
-  }
-  /*printf("counters value:\n");
-     for (usize i = 0; i < ev_count; i++) {
-         const event_alias *alias = profile_events + i;
-         usize idx = counter_map[i];
-         u64 val = counters_1[idx] - counters_0[idx];
-         printf("%14s: %llu\n", alias->alias, val);
-     }*/
-  return performance_counters{
-      counters_0[counter_map[0]], counters_0[counter_map[2]],
-      counters_0[counter_map[3]], counters_0[counter_map[1]]};
-}
-
-#endif
diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp
new file mode 100644
index 00000000..825a6b0a
--- /dev/null
+++ b/benchmarks/bench_ip.cpp
@@ -0,0 +1,182 @@
+#include "counters/bench.h"
+#include "fast_float/fast_float.h"
+#include <charconv>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <random>
+#include <atomic>
+#include <string>
+
+void pretty_print(size_t volume, size_t bytes, std::string name,
+                  counters::event_aggregate agg) {
+  if (agg.inner_count > 1) {
+    printf("# (inner count: %d)\n", agg.inner_count);
+  }
+  printf("%-40s : ", name.c_str());
+  printf(" %5.2f GB/s ", bytes / agg.fastest_elapsed_ns());
+  printf(" %5.1f Mip/s ", volume * 1000.0 / agg.fastest_elapsed_ns());
+  printf(" %5.2f ns/ip ", agg.fastest_elapsed_ns() / volume);
+  if (counters::event_collector().has_events()) {
+    printf(" %5.2f GHz ", agg.fastest_cycles() / agg.fastest_elapsed_ns());
+    printf(" %5.2f c/ip ", agg.fastest_cycles() / volume);
+    printf(" %5.2f i/ip ", agg.fastest_instructions() / volume);
+    printf(" %5.2f c/b ", agg.fastest_cycles() / bytes);
+    printf(" %5.2f i/b ", agg.fastest_instructions() / bytes);
+    printf(" %5.2f i/c ", agg.fastest_instructions() / agg.fastest_cycles());
+  }
+  printf("\n");
+}
+
+fastfloat_really_inline const char *seek_ip_end(const char *p,
+                                                const char *pend) {
+  const char *current = p;
+  size_t count = 0;
+  for (; current != pend; ++current) {
+    if (*current == '.') {
+      count++;
+      if (count == 3) {
+        ++current;
+        break;
+      }
+    }
+  }
+  while (current != pend) {
+    if (*current <= '9' && *current >= '0') {
+      ++current;
+    } else {
+      break;
+    }
+  }
+  return current;
+}
+
+enum class parse_method { standard, fast_float };
+
+template <parse_method use_standard>
+fastfloat_really_inline std::pair<bool, uint32_t>
+simple_parse_ip_line(const char *p, const char *pend) {
+  const char *current = p;
+  uint32_t ip = 0;
+  for (int i = 0; i < 4; ++i) {
+    uint8_t value;
+    if constexpr (use_standard == parse_method::standard) {
+      auto r = std::from_chars(current, pend, value);
+      if (r.ec != std::errc()) {
+        return {false, 0};
+      }
+      current = r.ptr;
+    } else if constexpr (use_standard == parse_method::fast_float) {
+      auto r = fast_float::from_chars(current, pend, value);
+      if (r.ec != std::errc()) {
+        return {false, 0};
+      }
+      current = r.ptr;
+    }
+    ip = (ip << 8) | value;
+    if (i < 3) {
+      if (current == pend || *current++ != '.') {
+        return {false, 0};
+      }
+    }
+  }
+  return {true, ip};
+}
+
+static std::string make_ip_line(uint8_t a, uint8_t b, uint8_t c, uint8_t d) {
+  std::string s;
+  s.reserve(16);
+  s += std::to_string(a);
+  s += '.';
+  s += std::to_string(b);
+  s += '.';
+  s += std::to_string(c);
+  s += '.';
+  s += std::to_string(d);
+  s += '\n';
+  return s;
+}
+
+int main() {
+  constexpr size_t N = 15000;
+  std::mt19937 rng(1234);
+  std::uniform_int_distribution<int> dist(0, 255);
+
+  std::string buf;
+  constexpr size_t ip_size = 16;
+  buf.reserve(N * ip_size);
+
+  for (size_t i = 0; i < N; ++i) {
+    uint8_t a = (uint8_t)dist(rng);
+    uint8_t b = (uint8_t)dist(rng);
+    uint8_t c = (uint8_t)dist(rng);
+    uint8_t d = (uint8_t)dist(rng);
+    std::string ip_line = make_ip_line(a, b, c, d);
+    ip_line.resize(ip_size, ' '); // pad to fixed size
+    buf.append(ip_line);
+  }
+
+  // sentinel to allow 4-byte loads at end
+  buf.append(4, '\0');
+
+  const size_t bytes = buf.size() - 4; // exclude sentinel from throughput
+  const size_t volume = N;
+
+  volatile uint32_t sink = 0;
+  std::string buffer(ip_size * N, ' ');
+
+  pretty_print(volume, bytes, "memcpy baseline", counters::bench([&]() {
+                 std::memcpy((char *)buffer.data(), buf.data(), bytes);
+               }));
+
+  pretty_print(volume, bytes, "just_seek_ip_end (no parse)",
+               counters::bench([&]() {
+                 const char *p = buf.data();
+                 const char *pend = buf.data() + bytes;
+                 uint32_t sum = 0;
+                 int ok = 0;
+                 for (size_t i = 0; i < N; ++i) {
+                   const char *q = seek_ip_end(p, pend);
+                   sum += (uint32_t)(q - p);
+                   p += ip_size;
+                 }
+                 sink += sum;
+               }));
+
+  pretty_print(volume, bytes, "parse_ip_std_fromchars", counters::bench([&]() {
+                 const char *p = buf.data();
+                 const char *pend = buf.data() + bytes;
+                 uint32_t sum = 0;
+                 int ok = 0;
+                 for (size_t i = 0; i < N; ++i) {
+                   auto [ok, ip] =
+                       simple_parse_ip_line<parse_method::standard>(p, pend);
+                   sum += ip;
+                   if (!ok) {
+                     std::abort();
+                   }
+                   p += ip_size;
+                 }
+                 sink += sum;
+               }));
+
+  pretty_print(volume, bytes, "parse_ip_fastfloat", counters::bench([&]() {
+                 const char *p = buf.data();
+                 const char *pend = buf.data() + bytes;
+                 uint32_t sum = 0;
+                 int ok = 0;
+                 for (size_t i = 0; i < N; ++i) {
+                   auto [ok, ip] =
+                       simple_parse_ip_line<parse_method::fast_float>(p, pend);
+                   sum += ip;
+                   if (!ok) {
+                     std::abort();
+                   }
+                   p += ip_size;
+                 }
+                 sink += sum;
+               }));
+
+  return EXIT_SUCCESS;
+}
\ No newline at end of file
diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp
index 05f12330..d90038ed 100644
--- a/benchmarks/benchmark.cpp
+++ b/benchmarks/benchmark.cpp
@@ -1,7 +1,7 @@
 #if defined(__linux__) || (__APPLE__ && __aarch64__)
 #define USING_COUNTERS
 #endif
-#include "event_counter.h"
+#include "counters/event_counter.h"
 #include <algorithm>
 #include "fast_float/fast_float.h"
 #include <chrono>
@@ -50,14 +50,14 @@ double findmax_fastfloat32(std::vector<std::basic_string<CharT>> &s) {
   return answer;
 }
 
-event_collector collector{};
+counters::event_collector collector{};
 
 #ifdef USING_COUNTERS
 template <class T, class CharT>
-std::vector<event_count>
+std::vector<counters::event_count>
 time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
            size_t repeat) {
-  std::vector<event_count> aggregate;
+  std::vector<counters::event_count> aggregate;
   bool printed_bug = false;
   for (size_t i = 0; i < repeat; i++) {
     collector.start();
@@ -72,7 +72,7 @@ time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
 }
 
 void pretty_print(double volume, size_t number_of_floats, std::string name,
-                  std::vector<event_count> events) {
+                  std::vector<counters::event_count> events) {
   double volumeMB = volume / (1024. * 1024.);
   double average_ns{0};
   double min_ns{DBL_MAX};
@@ -84,7 +84,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name,
   double branches_avg{0};
   double branch_misses_min{0};
   double branch_misses_avg{0};
-  for (event_count e : events) {
+  for (counters::event_count e : events) {
     double ns = e.elapsed_ns();
     average_ns += ns;
     min_ns = min_ns < ns ? min_ns : ns;
@@ -102,7 +102,7 @@ void pretty_print(double volume, size_t number_of_floats, std::string name,
     branches_avg += branches;
     branches_min = branches_min < branches ? branches_min : branches;
 
-    double branch_misses = e.missed_branches();
+    double branch_misses = e.branch_misses();
     branch_misses_avg += branch_misses;
     branch_misses_min =
         branch_misses_min < branch_misses ? branch_misses_min : branch_misses;
diff --git a/benchmarks/event_counter.h b/benchmarks/event_counter.h
deleted file mode 100644
index cd594787..00000000
--- a/benchmarks/event_counter.h
+++ /dev/null
@@ -1,181 +0,0 @@
-#ifndef __EVENT_COUNTER_H
-#define __EVENT_COUNTER_H
-
-#include <cctype>
-#ifndef _MSC_VER
-#include <dirent.h>
-#endif
-#include <cinttypes>
-
-#include <cstring>
-
-#include <chrono>
-#include <vector>
-
-#include "linux-perf-events.h"
-#ifdef __linux__
-#include <libgen.h>
-#endif
-
-#if (defined(__APPLE__) && __APPLE__) && (defined(__aarch64__) && __aarch64__)
-#include "apple_arm_events.h"
-#endif
-
-struct event_count {
-  std::chrono::duration<double> elapsed;
-  std::vector<unsigned long long> event_counts;
-
-  event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {}
-
-  event_count(const std::chrono::duration<double> _elapsed,
-              const std::vector<unsigned long long> _event_counts)
-      : elapsed(_elapsed), event_counts(_event_counts) {}
-
-  event_count(const event_count &other)
-      : elapsed(other.elapsed), event_counts(other.event_counts) {}
-
-  // The types of counters (so we can read the getter more easily)
-  enum event_counter_types {
-    CPU_CYCLES = 0,
-    INSTRUCTIONS = 1,
-    BRANCHES = 2,
-    MISSED_BRANCHES = 3
-  };
-
-  double elapsed_sec() const {
-    return std::chrono::duration<double>(elapsed).count();
-  }
-
-  double elapsed_ns() const {
-    return std::chrono::duration<double, std::nano>(elapsed).count();
-  }
-
-  double cycles() const {
-    return static_cast<double>(event_counts[CPU_CYCLES]);
-  }
-
-  double instructions() const {
-    return static_cast<double>(event_counts[INSTRUCTIONS]);
-  }
-
-  double branches() const {
-    return static_cast<double>(event_counts[BRANCHES]);
-  }
-
-  double missed_branches() const {
-    return static_cast<double>(event_counts[MISSED_BRANCHES]);
-  }
-
-  event_count &operator=(const event_count &other) {
-    this->elapsed = other.elapsed;
-    this->event_counts = other.event_counts;
-    return *this;
-  }
-
-  event_count operator+(const event_count &other) const {
-    return event_count(elapsed + other.elapsed,
-                       {
-                           event_counts[0] + other.event_counts[0],
-                           event_counts[1] + other.event_counts[1],
-                           event_counts[2] + other.event_counts[2],
-                           event_counts[3] + other.event_counts[3],
-                           event_counts[4] + other.event_counts[4],
-                       });
-  }
-
-  void operator+=(const event_count &other) { *this = *this + other; }
-};
-
-struct event_aggregate {
-  bool has_events = false;
-  int iterations = 0;
-  event_count total{};
-  event_count best{};
-  event_count worst{};
-
-  event_aggregate() = default;
-
-  void operator<<(const event_count &other) {
-    if (iterations == 0 || other.elapsed < best.elapsed) {
-      best = other;
-    }
-    if (iterations == 0 || other.elapsed > worst.elapsed) {
-      worst = other;
-    }
-    iterations++;
-    total += other;
-  }
-
-  double elapsed_sec() const { return total.elapsed_sec() / iterations; }
-
-  double elapsed_ns() const { return total.elapsed_ns() / iterations; }
-
-  double cycles() const { return total.cycles() / iterations; }
-
-  double instructions() const { return total.instructions() / iterations; }
-
-  double branches() const { return total.branches() / iterations; }
-
-  double missed_branches() const {
-    return total.missed_branches() / iterations;
-  }
-};
-
-struct event_collector {
-  event_count count{};
-  std::chrono::time_point<std::chrono::steady_clock> start_clock{};
-
-#if defined(__linux__)
-  LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
-
-  event_collector()
-      : linux_events(std::vector<int>{
-            PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS,
-            PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions
-            PERF_COUNT_HW_BRANCH_MISSES}) {}
-
-  bool has_events() { return linux_events.is_working(); }
-#elif __APPLE__ && __aarch64__
-  performance_counters diff;
-
-  event_collector() : diff(0) { setup_performance_counters(); }
-
-  bool has_events() { return setup_performance_counters(); }
-#else
-  event_collector() {}
-
-  bool has_events() { return false; }
-#endif
-
-  inline void start() {
-#if defined(__linux)
-    linux_events.start();
-#elif __APPLE__ && __aarch64__
-    if (has_events()) {
-      diff = get_counters();
-    }
-#endif
-    start_clock = std::chrono::steady_clock::now();
-  }
-
-  inline event_count &end() {
-    const auto end_clock = std::chrono::steady_clock::now();
-#if defined(__linux)
-    linux_events.end(count.event_counts);
-#elif __APPLE__ && __aarch64__
-    if (has_events()) {
-      performance_counters end = get_counters();
-      diff = end - diff;
-    }
-    count.event_counts[0] = diff.cycles;
-    count.event_counts[1] = diff.instructions;
-    count.event_counts[2] = diff.branches;
-    count.event_counts[3] = diff.missed_branches;
-    count.event_counts[4] = 0;
-#endif
-    count.elapsed = end_clock - start_clock;
-    return count;
-  }
-};
-
-#endif
diff --git a/benchmarks/linux-perf-events.h b/benchmarks/linux-perf-events.h
deleted file mode 100644
index 0a9e5538..00000000
--- a/benchmarks/linux-perf-events.h
+++ /dev/null
@@ -1,104 +0,0 @@
-#pragma once
-#ifdef __linux__
-
-#include <asm/unistd.h>       // for __NR_perf_event_open
-#include <linux/perf_event.h> // for perf event constants
-#include <sys/ioctl.h>        // for ioctl
-#include <unistd.h>           // for syscall
-
-#include <cerrno>  // for errno
-#include <cstring> // for memset
-#include <stdexcept>
-
-#include <iostream>
-#include <vector>
-
-template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
-  int fd;
-  bool working;
-  perf_event_attr attribs{};
-  size_t num_events{};
-  std::vector<uint64_t> temp_result_vec{};
-  std::vector<uint64_t> ids{};
-
-public:
-  explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
-    memset(&attribs, 0, sizeof(attribs));
-    attribs.type = TYPE;
-    attribs.size = sizeof(attribs);
-    attribs.disabled = 1;
-    attribs.exclude_kernel = 1;
-    attribs.exclude_hv = 1;
-
-    attribs.sample_period = 0;
-    attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
-    const int pid = 0;  // the current process
-    const int cpu = -1; // all CPUs
-    const unsigned long flags = 0;
-
-    int group = -1; // no group
-    num_events = config_vec.size();
-    ids.resize(config_vec.size());
-    uint32_t i = 0;
-    for (auto config : config_vec) {
-      attribs.config = config;
-      int _fd = static_cast<int>(
-          syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags));
-      if (_fd == -1) {
-        report_error("perf_event_open");
-      }
-      ioctl(_fd, PERF_EVENT_IOC_ID, &ids[i++]);
-      if (group == -1) {
-        group = _fd;
-        fd = _fd;
-      }
-    }
-
-    temp_result_vec.resize(num_events * 2 + 1);
-  }
-
-  ~LinuxEvents() {
-    if (fd != -1) {
-      close(fd);
-    }
-  }
-
-  inline void start() {
-    if (fd != -1) {
-      if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
-        report_error("ioctl(PERF_EVENT_IOC_RESET)");
-      }
-
-      if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
-        report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
-      }
-    }
-  }
-
-  inline void end(std::vector<unsigned long long> &results) {
-    if (fd != -1) {
-      if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
-        report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
-      }
-
-      if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
-        report_error("read");
-      }
-    }
-    // our actual results are in slots 1,3,5, ... of this structure
-    for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) {
-      results[i / 2] = temp_result_vec[i];
-    }
-    for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) {
-      if (ids[i / 2 - 1] != temp_result_vec[i]) {
-        report_error("event mismatch");
-      }
-    }
-  }
-
-  bool is_working() { return working; }
-
-private:
-  void report_error(const std::string &) { working = false; }
-};
-#endif
\ No newline at end of file
diff --git a/fuzz/build.sh b/fuzz/build.sh
index 5cbe87aa..cce114d8 100644
--- a/fuzz/build.sh
+++ b/fuzz/build.sh
@@ -5,4 +5,8 @@ $CXX $CFLAGS $CXXFLAGS \
      -c $SRC/fast_float/fuzz/from_chars.cc -o from_chars.o
 
 $CXX $CFLAGS $CXXFLAGS $LIB_FUZZING_ENGINE from_chars.o \
-     -o $OUT/from_chars
\ No newline at end of file
+     -o $OUT/from_chars
+
+# Build unit tests
+cmake -DFASTFLOAT_TEST=ON -DCMAKE_EXE_LINKER_FLAGS="-lpthread"
+make
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
index 5683cd47..4e1ce3a1 100644
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@@ -509,6 +509,94 @@ parse_int_string(UC const *p, UC const *pend, T &value,
 
   UC const *const start_digits = p;
 
+  FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint8_t>::value)) {
+    const size_t len = (size_t)(pend - p);
+    if (len == 0) {
+      if (has_leading_zeros) {
+        value = 0;
+        answer.ec = std::errc();
+        answer.ptr = p;
+      } else {
+        answer.ec = std::errc::invalid_argument;
+        answer.ptr = first;
+      }
+      return answer;
+    }
+
+    uint32_t digits;
+
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST
+    if (std::is_constant_evaluated()) {
+      uint8_t str[4]{};
+      for (size_t j = 0; j < 4 && j < len; ++j) {
+        str[j] = static_cast<uint8_t>(p[j]);
+      }
+      digits = std::bit_cast<uint32_t>(str);
+    }
+#else
+    if (false) {
+    }
+#endif
+    else if (len >= 4) {
+      ::memcpy(&digits, p, 4);
+    } else {
+      uint32_t b0 = static_cast<uint8_t>(p[0]);
+      uint32_t b1 = (len > 1) ? static_cast<uint8_t>(p[1]) : 0xFFu;
+      uint32_t b2 = (len > 2) ? static_cast<uint8_t>(p[2]) : 0xFFu;
+      uint32_t b3 = 0xFFu;
+#if FASTFLOAT_IS_BIG_ENDIAN
+      digits = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3;
+#else
+      digits = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
+#endif
+    }
+
+    uint32_t magic =
+        ((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u;
+    uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32
+    uint32_t nd = (tz == 32) ? 4 : (tz >> 3);
+    nd = (uint32_t)std::min((size_t)nd, len);
+    if (nd == 0) {
+      if (has_leading_zeros) {
+        value = 0;
+        answer.ec = std::errc();
+        answer.ptr = p;
+        return answer;
+      }
+      answer.ec = std::errc::invalid_argument;
+      answer.ptr = first;
+      return answer;
+    }
+    if (nd > 3) {
+      const UC *q = p + nd;
+      size_t rem = len - nd;
+      while (rem) {
+        if (*q < UC('0') || *q > UC('9'))
+          break;
+        ++q;
+        --rem;
+      }
+      answer.ec = std::errc::result_out_of_range;
+      answer.ptr = q;
+      return answer;
+    }
+
+    digits ^= 0x30303030u;
+    digits <<= ((4 - nd) * 8);
+
+    uint32_t check = ((digits >> 24) & 0xff) | ((digits >> 8) & 0xff00) |
+                     ((digits << 8) & 0xff0000);
+    if (check > 0x00020505) {
+      answer.ec = std::errc::result_out_of_range;
+      answer.ptr = p + nd;
+      return answer;
+    }
+    value = (uint8_t)((0x640a01 * digits) >> 24);
+    answer.ec = std::errc();
+    answer.ptr = p + nd;
+    return answer;
+  }
+
   uint64_t i = 0;
   if (base == 10) {
     loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h
index d7ef3d9a..03e70dcc 100644
--- a/include/fast_float/digit_comparison.h
+++ b/include/fast_float/digit_comparison.h
@@ -38,11 +38,8 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
 // this algorithm is not even close to optimized, but it has no practical
 // effect on performance: in order to have a faster algorithm, we'd need
 // to slow down performance for faster algorithms, and this is still fast.
-template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t
-scientific_exponent(parsed_number_string_t<UC> &num) noexcept {
-  uint64_t mantissa = num.mantissa;
-  int32_t exponent = int32_t(num.exponent);
+scientific_exponent(uint64_t mantissa, int32_t exponent) noexcept {
   while (mantissa >= 10000) {
     mantissa /= 10000;
     exponent += 4;
@@ -398,7 +395,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
     FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
   }
 
-  // compare digits, and use it to director rounding
+  // compare digits, and use it to direct rounding
   int ord = real_digits.compare(theor_digits);
   adjusted_mantissa answer = am;
   round<T>(answer, [ord](adjusted_mantissa &a, int32_t shift) {
@@ -419,7 +416,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
   return answer;
 }
 
-// parse the significant digits as a big integer to unambiguously round the
+// parse the significant digits as a big integer to unambiguously round
 // the significant digits. here, we are trying to determine how to round
 // an extended float representation close to `b+h`, halfway between `b`
 // (the float rounded-down) and `b+u`, the next positive float. this
@@ -438,7 +435,8 @@ digit_comp(parsed_number_string_t<UC> &num, adjusted_mantissa am) noexcept {
   // remove the invalid exponent bias
   am.power2 -= invalid_am_bias;
 
-  int32_t sci_exp = scientific_exponent(num);
+  int32_t sci_exp =
+      scientific_exponent(num.mantissa, static_cast<int32_t>(num.exponent));
   size_t max_digits = binary_format<T>::max_digits();
   size_t digits = 0;
   bigint bigmant;
diff --git a/include/fast_float/fast_float.h b/include/fast_float/fast_float.h
index a190d7c8..eb822f58 100644
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@@ -63,6 +63,20 @@ integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept;
 FASTFLOAT_CONSTEXPR20 inline double
 integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
 
+/**
+ * This function is a template overload of `integer_times_pow10()`
+ * that returns a floating-point value of type `T` that is one of
+ * supported floating-point types (e.g. `double`, `float`).
+ */
+template <typename T>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value, T>::type
+    integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept;
+template <typename T>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value, T>::type
+    integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
+
 /**
  * from_chars for integer types.
  */
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
index 62d199ca..62fe2bf0 100644
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
@@ -16,7 +16,7 @@
 #include "constexpr_feature_detect.h"
 
 #define FASTFLOAT_VERSION_MAJOR 8
-#define FASTFLOAT_VERSION_MINOR 1
+#define FASTFLOAT_VERSION_MINOR 2
 #define FASTFLOAT_VERSION_PATCH 0
 
 #define FASTFLOAT_STRINGIZE_IMPL(x) #x
@@ -362,6 +362,52 @@ leading_zeroes(uint64_t input_num) {
 #endif
 }
 
+/* Helper C++14 constexpr generic implementation of countr_zero for 32-bit */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int
+countr_zero_generic_32(uint32_t input_num) {
+  if (input_num == 0) {
+    return 32;
+  }
+  int last_bit = 0;
+  if (!(input_num & 0x0000FFFF)) {
+    input_num >>= 16;
+    last_bit |= 16;
+  }
+  if (!(input_num & 0x00FF)) {
+    input_num >>= 8;
+    last_bit |= 8;
+  }
+  if (!(input_num & 0x0F)) {
+    input_num >>= 4;
+    last_bit |= 4;
+  }
+  if (!(input_num & 0x3)) {
+    input_num >>= 2;
+    last_bit |= 2;
+  }
+  if (!(input_num & 0x1)) {
+    last_bit |= 1;
+  }
+  return last_bit;
+}
+
+/* count trailing zeroes for 32-bit integers */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int
+countr_zero_32(uint32_t input_num) {
+  if (cpp20_and_in_constexpr()) {
+    return countr_zero_generic_32(input_num);
+  }
+#ifdef FASTFLOAT_VISUAL_STUDIO
+  unsigned long trailing_zero = 0;
+  if (_BitScanForward(&trailing_zero, input_num)) {
+    return (int)trailing_zero;
+  }
+  return 32;
+#else
+  return input_num == 0 ? 32 : __builtin_ctz(input_num);
+#endif
+}
+
 // slow emulation routine for 32-bit
 fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) {
   return x * (uint64_t)y;
@@ -406,8 +452,8 @@ full_multiplication(uint64_t a, uint64_t b) {
   // But MinGW on ARM64 doesn't have native support for 64-bit multiplications
   answer.high = __umulh(a, b);
   answer.low = a * b;
-#elif defined(FASTFLOAT_32BIT) ||                                              \
-    (defined(_WIN64) && !defined(__clang__) && !defined(_M_ARM64))
+#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__) &&   \
+                                   !defined(_M_ARM64) && !defined(__GNUC__))
   answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64
 #elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__)
   __uint128_t r = ((__uint128_t)a) * b;
@@ -1166,6 +1212,9 @@ static_assert(std::is_same<equiv_uint_t<std::float64_t>, uint64_t>::value,
 static_assert(
     std::numeric_limits<std::float64_t>::is_iec559,
     "std::float64_t must fulfill the requirements of IEC 559 (IEEE 754)");
+
+template <>
+struct binary_format<std::float64_t> : public binary_format<double> {};
 #endif // __STDCPP_FLOAT64_T__
 
 #ifdef __STDCPP_FLOAT32_T__
@@ -1174,6 +1223,9 @@ static_assert(std::is_same<equiv_uint_t<std::float32_t>, uint32_t>::value,
 static_assert(
     std::numeric_limits<std::float32_t>::is_iec559,
     "std::float32_t must fulfill the requirements of IEC 559 (IEEE 754)");
+
+template <>
+struct binary_format<std::float32_t> : public binary_format<float> {};
 #endif // __STDCPP_FLOAT32_T__
 
 #ifdef __STDCPP_FLOAT16_T__
@@ -1245,7 +1297,6 @@ constexpr chars_format adjust_for_feature_macros(chars_format fmt) {
       ;
 }
 } // namespace detail
-
 } // namespace fast_float
 
 #endif
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
index a44fef0b..d453c145 100644
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@@ -344,44 +344,79 @@ from_chars(UC const *first, UC const *last, T &value, int base) noexcept {
   return from_chars_advanced(first, last, value, options);
 }
 
-FASTFLOAT_CONSTEXPR20 inline double
-integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept {
-  double value;
+template <typename T>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value, T>::type
+    integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept {
+  T value;
   if (clinger_fast_path_impl(mantissa, decimal_exponent, false, value))
     return value;
 
   adjusted_mantissa am =
-      compute_float<binary_format<double>>(decimal_exponent, mantissa);
+      compute_float<binary_format<T>>(decimal_exponent, mantissa);
   to_float(false, am, value);
   return value;
 }
 
-FASTFLOAT_CONSTEXPR20 inline double
-integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept {
+template <typename T>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value, T>::type
+    integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept {
   const bool is_negative = mantissa < 0;
   const uint64_t m = static_cast<uint64_t>(is_negative ? -mantissa : mantissa);
 
-  double value;
+  T value;
   if (clinger_fast_path_impl(m, decimal_exponent, is_negative, value))
     return value;
 
-  adjusted_mantissa am =
-      compute_float<binary_format<double>>(decimal_exponent, m);
+  adjusted_mantissa am = compute_float<binary_format<T>>(decimal_exponent, m);
   to_float(is_negative, am, value);
   return value;
 }
 
+FASTFLOAT_CONSTEXPR20 inline double
+integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10<double>(mantissa, decimal_exponent);
+}
+
+FASTFLOAT_CONSTEXPR20 inline double
+integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10<double>(mantissa, decimal_exponent);
+}
+
 // the following overloads are here to avoid surprising ambiguity for int,
 // unsigned, etc.
+template <typename T, typename Int>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value &&
+                                std::is_integral<Int>::value &&
+                                !std::is_signed<Int>::value,
+                            T>::type
+    integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10<T>(static_cast<uint64_t>(mantissa),
+                                decimal_exponent);
+}
+
+template <typename T, typename Int>
+FASTFLOAT_CONSTEXPR20
+    typename std::enable_if<is_supported_float_type<T>::value &&
+                                std::is_integral<Int>::value &&
+                                std::is_signed<Int>::value,
+                            T>::type
+    integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+  return integer_times_pow10<T>(static_cast<int64_t>(mantissa),
+                                decimal_exponent);
+}
+
 template <typename Int>
-FASTFLOAT_CONSTEXPR20 inline typename std::enable_if<
+FASTFLOAT_CONSTEXPR20 typename std::enable_if<
     std::is_integral<Int>::value && !std::is_signed<Int>::value, double>::type
 integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
   return integer_times_pow10(static_cast<uint64_t>(mantissa), decimal_exponent);
 }
 
 template <typename Int>
-FASTFLOAT_CONSTEXPR20 inline typename std::enable_if<
+FASTFLOAT_CONSTEXPR20 typename std::enable_if<
     std::is_integral<Int>::value && std::is_signed<Int>::value, double>::type
 integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
   return integer_times_pow10(static_cast<int64_t>(mantissa), decimal_exponent);
diff --git a/script/mushtak_lemire.py b/script/mushtak_lemire.py
index 46c8c645..f03715cd 100644
--- a/script/mushtak_lemire.py
+++ b/script/mushtak_lemire.py
@@ -1,6 +1,6 @@
 #
 # Reference :
-# Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to appear)
+# Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback, Software: Practice and Experience 53 (6), 2023 https://arxiv.org/abs/2212.06644
 #
 
 all_tqs = []
@@ -74,8 +74,8 @@ def convergents(cf):
     for _, w in convergents(continued_fraction(tq, 2 ** 137)):
         if w >= 2 ** 64:
             break
-            if (tq * w) % 2 ** 137 > 2 ** 137 - 2 ** 64:
-                print(f"SOLUTION: q={j-342} T[q]={tq} w={w}")
-                found_solution = True
+        if (tq * w) % 2 ** 137 > 2 ** 137 - 2 ** 64:
+            print(f"SOLUTION: q={j-342} T[q]={tq} w={w}")
+            found_solution = True
 if not found_solution:
     print("No solutions!")
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index d8ed6f4d..a053581c 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -94,6 +94,7 @@ endif()
 option(FASTFLOAT_EXHAUSTIVE "Exhaustive tests" OFF)
 
 if (FASTFLOAT_EXHAUSTIVE)
+  fast_float_add_cpp_test(ipv4_test)
   fast_float_add_cpp_test(short_random_string)
   fast_float_add_cpp_test(exhaustive32_midpoint)
   fast_float_add_cpp_test(random_string)
diff --git a/tests/basictest.cpp b/tests/basictest.cpp
index dc117526..1a5537bb 100644
--- a/tests/basictest.cpp
+++ b/tests/basictest.cpp
@@ -1142,6 +1142,15 @@ TEST_CASE("double.inf") {
   // DBL_MAX + 0.0000000000000001e308
   verify("1.7976931348623159e308", std::numeric_limits<double>::infinity(),
          std::errc::result_out_of_range);
+
+  // ( (2 - 0.5*2^(−52)) * 2^1023 ) smallest number that overflows to infinity
+  verify("179769313486231580793728971405303415079934132710037826936173778980444"
+         "968292764750946649017977587207096330286416692887910946555547851940402"
+         "630657488671505820681908902000708383676273854845817711531764475730270"
+         "069855571366959622842914819860834936475292719074168444365510704342711"
+         "559699508093042880177904174497792",
+         std::numeric_limits<double>::infinity(),
+         std::errc::result_out_of_range);
 }
 
 TEST_CASE("double.general") {
@@ -1333,6 +1342,15 @@ TEST_CASE("double.general") {
       std::numeric_limits<double>::infinity(), std::errc::result_out_of_range);
   verify("-2240084132271013504.131248280843119943687942846658579428",
          -0x1.f1660a65b00bfp+60);
+
+  // ( (2 - 0.5*2^(−52)) * 2^1023 - 1 ) largest 309 decimal digit number
+  // that rounds to DBL_MAX
+  verify("179769313486231580793728971405303415079934132710037826936173778980444"
+         "968292764750946649017977587207096330286416692887910946555547851940402"
+         "630657488671505820681908902000708383676273854845817711531764475730270"
+         "069855571366959622842914819860834936475292719074168444365510704342711"
+         "559699508093042880177904174497791",
+         std::numeric_limits<double>::max());
 }
 
 TEST_CASE("double.decimal_point") {
@@ -1507,14 +1525,35 @@ TEST_CASE("float.inf") {
          std::errc::result_out_of_range);
   verify("3.5028234666e38", std::numeric_limits<float>::infinity(),
          std::errc::result_out_of_range);
+  // FLT_MAX + 0.00000007e38
+  verify("3.40282357e38", std::numeric_limits<float>::infinity(),
+         std::errc::result_out_of_range);
+  // FLT_MAX + 0.0000001e38
+  verify("3.4028236e38", std::numeric_limits<float>::infinity(),
+         std::errc::result_out_of_range);
+
+  // ( (2 - 0.5*2^(-23)) * 2^127 ) smallest number that overflows to infinity
+  verify("340282356779733661637539395458142568448",
+         std::numeric_limits<float>::infinity(),
+         std::errc::result_out_of_range);
 }
 
 TEST_CASE("float.general") {
+  // FLT_TRUE_MIN / 2
+  verify("0.7006492e-45", 0.f, std::errc::result_out_of_range);
+  // FLT_TRUE_MIN / 2 + 0.0000001e-45
+  verify("0.7006493e-45", 0x1p-149f);
+
   // max
   verify("340282346638528859811704183484516925440", 0x1.fffffep+127f);
   // -max
   verify("-340282346638528859811704183484516925440", -0x1.fffffep+127f);
 
+  // ( (2 - 0.5*2^(-23)) * 2^127 - 1 ) largest 39 decimal digits number
+  // that rounds to FLT_MAX
+  verify("340282356779733661637539395458142568447",
+         std::numeric_limits<float>::max());
+
   verify("-1e-999", -0.0f, std::errc::result_out_of_range);
   verify("1."
          "175494140627517859246175898662808184331245864732796240031385942718174"
@@ -2086,12 +2125,11 @@ TEST_CASE("bfloat16.general") {
 }
 #endif
 
-template <typename Int>
-void verify_integer_multiplication_by_power_of_10(Int mantissa,
-                                                  int decimal_exponent,
-                                                  double expected) {
-  const double actual =
-      fast_float::integer_times_pow10(mantissa, decimal_exponent);
+template <typename Int, typename T, typename U>
+void verify_integer_times_pow10_result(Int mantissa, int decimal_exponent,
+                                       T actual, U expected) {
+  static_assert(std::is_same<T, U>::value,
+                "expected and actual types must match");
 
   INFO("m * 10^e=" << mantissa << " * 10^" << decimal_exponent
                    << "\n"
@@ -2105,45 +2143,173 @@ void verify_integer_multiplication_by_power_of_10(Int mantissa,
   CHECK_EQ(actual, expected);
 }
 
-template <typename Int>
-void verify_integer_multiplication_by_power_of_10(Int mantissa,
-                                                  int decimal_exponent) {
+template <typename T, typename Int>
+T calculate_integer_times_pow10_expected_result(Int mantissa,
+                                                int decimal_exponent) {
   std::string constructed_string =
       std::to_string(mantissa) + "e" + std::to_string(decimal_exponent);
-  double expected_result;
+  T expected_result;
   const auto result = fast_float::from_chars(
       constructed_string.data(),
       constructed_string.data() + constructed_string.size(), expected_result);
   if (result.ec != std::errc())
     INFO("Failed to parse: " << constructed_string);
-  verify_integer_multiplication_by_power_of_10(mantissa, decimal_exponent,
-                                               expected_result);
+  return expected_result;
+}
+
+template <typename Int>
+void verify_integer_times_pow10_dflt(Int mantissa, int decimal_exponent,
+                                     double expected) {
+  static_assert(std::is_integral<Int>::value);
+
+  // the "default" overload
+  const double actual =
+      fast_float::integer_times_pow10(mantissa, decimal_exponent);
+
+  verify_integer_times_pow10_result(mantissa, decimal_exponent, actual,
+                                    expected);
 }
 
+template <typename Int>
+void verify_integer_times_pow10_dflt(Int mantissa, int decimal_exponent) {
+  static_assert(std::is_integral<Int>::value);
+
+  const auto expected_result =
+      calculate_integer_times_pow10_expected_result<double>(mantissa,
+                                                            decimal_exponent);
+
+  verify_integer_times_pow10_dflt(mantissa, decimal_exponent, expected_result);
+}
+
+template <typename T, typename Int>
+void verify_integer_times_pow10(Int mantissa, int decimal_exponent,
+                                T expected) {
+  static_assert(std::is_floating_point<T>::value);
+  static_assert(std::is_integral<Int>::value);
+
+  // explicit specialization
+  const auto actual =
+      fast_float::integer_times_pow10<T>(mantissa, decimal_exponent);
+
+  verify_integer_times_pow10_result(mantissa, decimal_exponent, actual,
+                                    expected);
+}
+
+template <typename T, typename Int>
+void verify_integer_times_pow10(Int mantissa, int decimal_exponent) {
+  static_assert(std::is_floating_point<T>::value);
+  static_assert(std::is_integral<Int>::value);
+
+  const auto expected_result = calculate_integer_times_pow10_expected_result<T>(
+      mantissa, decimal_exponent);
+
+  verify_integer_times_pow10(mantissa, decimal_exponent, expected_result);
+}
+
+namespace all_supported_types {
+template <typename Int>
+void verify_integer_times_pow10(Int mantissa, int decimal_exponent) {
+  static_assert(std::is_integral<Int>::value);
+
+  // verify the "default" overload
+  verify_integer_times_pow10_dflt(mantissa, decimal_exponent);
+
+  // verify explicit specializations
+  ::verify_integer_times_pow10<double>(mantissa, decimal_exponent);
+  ::verify_integer_times_pow10<float>(mantissa, decimal_exponent);
+#if defined(__STDCPP_FLOAT64_T__)
+  ::verify_integer_times_pow10<std::float64_t>(mantissa, decimal_exponent);
+#endif
+#if defined(__STDCPP_FLOAT32_T__)
+  ::verify_integer_times_pow10<std::float32_t>(mantissa, decimal_exponent);
+#endif
+#if defined(__STDCPP_FLOAT16_T__)
+  ::verify_integer_times_pow10<std::float16_t>(mantissa, decimal_exponent);
+#endif
+#if defined(__STDCPP_BFLOAT16_T__)
+  ::verify_integer_times_pow10<std::bfloat16_t>(mantissa, decimal_exponent);
+#endif
+}
+} // namespace all_supported_types
+
 TEST_CASE("integer_times_pow10") {
-  // explicitly verifying API with different types of integers
-  verify_integer_multiplication_by_power_of_10<int8_t>(31, -1, 3.1);
-  verify_integer_multiplication_by_power_of_10<int8_t>(-31, -1, -3.1);
-  verify_integer_multiplication_by_power_of_10<uint8_t>(31, -1, 3.1);
-  verify_integer_multiplication_by_power_of_10<int16_t>(31415, -4, 3.1415);
-  verify_integer_multiplication_by_power_of_10<int16_t>(-31415, -4, -3.1415);
-  verify_integer_multiplication_by_power_of_10<uint16_t>(31415, -4, 3.1415);
-  verify_integer_multiplication_by_power_of_10<int32_t>(314159265, -8,
-                                                        3.14159265);
-  verify_integer_multiplication_by_power_of_10<int32_t>(-314159265, -8,
-                                                        -3.14159265);
-  verify_integer_multiplication_by_power_of_10<uint32_t>(3141592653, -9,
-                                                         3.141592653);
-  verify_integer_multiplication_by_power_of_10<int64_t>(
-      3141592653589793238, -18, 3.141592653589793238);
-  verify_integer_multiplication_by_power_of_10<int64_t>(
-      -3141592653589793238, -18, -3.141592653589793238);
-  verify_integer_multiplication_by_power_of_10<uint64_t>(
-      3141592653589793238, -18, 3.141592653589793238);
-  verify_integer_multiplication_by_power_of_10<long long>(
-      -3141592653589793238, -18, -3.141592653589793238);
-  verify_integer_multiplication_by_power_of_10<unsigned long long>(
+  /* explicitly verifying API with different types of integers */
+  // double (the "default" overload)
+  verify_integer_times_pow10_dflt<int8_t>(31, -1, 3.1);
+  verify_integer_times_pow10_dflt<int8_t>(-31, -1, -3.1);
+  verify_integer_times_pow10_dflt<uint8_t>(31, -1, 3.1);
+  verify_integer_times_pow10_dflt<int16_t>(31415, -4, 3.1415);
+  verify_integer_times_pow10_dflt<int16_t>(-31415, -4, -3.1415);
+  verify_integer_times_pow10_dflt<uint16_t>(31415, -4, 3.1415);
+  verify_integer_times_pow10_dflt<int32_t>(314159265, -8, 3.14159265);
+  verify_integer_times_pow10_dflt<int32_t>(-314159265, -8, -3.14159265);
+  verify_integer_times_pow10_dflt<uint32_t>(3141592653, -9, 3.141592653);
+  verify_integer_times_pow10_dflt<long>(314159265, -8, 3.14159265);
+  verify_integer_times_pow10_dflt<long>(-314159265, -8, -3.14159265);
+  verify_integer_times_pow10_dflt<unsigned long>(3141592653, -9, 3.141592653);
+  verify_integer_times_pow10_dflt<int64_t>(3141592653589793238, -18,
+                                           3.141592653589793238);
+  verify_integer_times_pow10_dflt<int64_t>(-3141592653589793238, -18,
+                                           -3.141592653589793238);
+  verify_integer_times_pow10_dflt<uint64_t>(3141592653589793238, -18,
+                                            3.141592653589793238);
+  verify_integer_times_pow10_dflt<long long>(3141592653589793238, -18,
+                                             3.141592653589793238);
+  verify_integer_times_pow10_dflt<long long>(-3141592653589793238, -18,
+                                             -3.141592653589793238);
+  verify_integer_times_pow10_dflt<unsigned long long>(3141592653589793238, -18,
+                                                      3.141592653589793238);
+  // double (explicit specialization)
+  verify_integer_times_pow10<double, int8_t>(31, -1, 3.1);
+  verify_integer_times_pow10<double, int8_t>(-31, -1, -3.1);
+  verify_integer_times_pow10<double, uint8_t>(31, -1, 3.1);
+  verify_integer_times_pow10<double, int16_t>(31415, -4, 3.1415);
+  verify_integer_times_pow10<double, int16_t>(-31415, -4, -3.1415);
+  verify_integer_times_pow10<double, uint16_t>(31415, -4, 3.1415);
+  verify_integer_times_pow10<double, int32_t>(314159265, -8, 3.14159265);
+  verify_integer_times_pow10<double, int32_t>(-314159265, -8, -3.14159265);
+  verify_integer_times_pow10<double, uint32_t>(3141592653, -9, 3.141592653);
+  verify_integer_times_pow10<double, long>(314159265, -8, 3.14159265);
+  verify_integer_times_pow10<double, long>(-314159265, -8, -3.14159265);
+  verify_integer_times_pow10<double, unsigned long>(3141592653, -9,
+                                                    3.141592653);
+  verify_integer_times_pow10<double, int64_t>(3141592653589793238, -18,
+                                              3.141592653589793238);
+  verify_integer_times_pow10<double, int64_t>(-3141592653589793238, -18,
+                                              -3.141592653589793238);
+  verify_integer_times_pow10<double, uint64_t>(3141592653589793238, -18,
+                                               3.141592653589793238);
+  verify_integer_times_pow10<double, long long>(3141592653589793238, -18,
+                                                3.141592653589793238);
+  verify_integer_times_pow10<double, long long>(-3141592653589793238, -18,
+                                                -3.141592653589793238);
+  verify_integer_times_pow10<double, unsigned long long>(
       3141592653589793238, -18, 3.141592653589793238);
+  // float (explicit specialization)
+  verify_integer_times_pow10<float, int8_t>(31, -1, 3.1f);
+  verify_integer_times_pow10<float, int8_t>(-31, -1, -3.1f);
+  verify_integer_times_pow10<float, uint8_t>(31, -1, 3.1f);
+  verify_integer_times_pow10<float, int16_t>(31415, -4, 3.1415f);
+  verify_integer_times_pow10<float, int16_t>(-31415, -4, -3.1415f);
+  verify_integer_times_pow10<float, uint16_t>(31415, -4, 3.1415f);
+  verify_integer_times_pow10<float, int32_t>(314159265, -8, 3.14159265f);
+  verify_integer_times_pow10<float, int32_t>(-314159265, -8, -3.14159265f);
+  verify_integer_times_pow10<float, uint32_t>(3141592653, -9, 3.14159265f);
+  verify_integer_times_pow10<float, long>(314159265, -8, 3.14159265f);
+  verify_integer_times_pow10<float, long>(-314159265, -8, -3.14159265f);
+  verify_integer_times_pow10<float, unsigned long>(3141592653, -9, 3.14159265f);
+  verify_integer_times_pow10<float, int64_t>(3141592653589793238, -18,
+                                             3.141592653589793238f);
+  verify_integer_times_pow10<float, int64_t>(-3141592653589793238, -18,
+                                             -3.141592653589793238f);
+  verify_integer_times_pow10<float, uint64_t>(3141592653589793238, -18,
+                                              3.141592653589793238f);
+  verify_integer_times_pow10<float, long long>(3141592653589793238, -18,
+                                               3.141592653589793238f);
+  verify_integer_times_pow10<float, long long>(-3141592653589793238, -18,
+                                               -3.141592653589793238f);
+  verify_integer_times_pow10<float, unsigned long long>(
+      3141592653589793238, -18, 3.141592653589793238f);
 
   for (int mode : {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, FE_TONEAREST}) {
     fesetround(mode);
@@ -2153,87 +2319,122 @@ TEST_CASE("integer_times_pow10") {
       ~Guard() { fesetround(FE_TONEAREST); }
     } guard;
 
-    verify_integer_multiplication_by_power_of_10(0, 0);
-    verify_integer_multiplication_by_power_of_10(1, 0);
-    verify_integer_multiplication_by_power_of_10(0, 1);
-    verify_integer_multiplication_by_power_of_10(1, 1);
-    verify_integer_multiplication_by_power_of_10(-1, 0);
-    verify_integer_multiplication_by_power_of_10(0, -1);
-    verify_integer_multiplication_by_power_of_10(-1, -1);
-    verify_integer_multiplication_by_power_of_10(-1, 1);
-    verify_integer_multiplication_by_power_of_10(1, -1);
-
-    verify_integer_multiplication_by_power_of_10(
+    namespace all = all_supported_types;
+
+    all::verify_integer_times_pow10(0, 0);
+    all::verify_integer_times_pow10(1, 0);
+    all::verify_integer_times_pow10(0, 1);
+    all::verify_integer_times_pow10(1, 1);
+    all::verify_integer_times_pow10(-1, 0);
+    all::verify_integer_times_pow10(0, -1);
+    all::verify_integer_times_pow10(-1, -1);
+    all::verify_integer_times_pow10(-1, 1);
+    all::verify_integer_times_pow10(1, -1);
+
+    /* denormal min */
+    verify_integer_times_pow10_dflt(49406564584124654, -340,
+                                    std::numeric_limits<double>::denorm_min());
+    verify_integer_times_pow10<double>(
         49406564584124654, -340, std::numeric_limits<double>::denorm_min());
-    verify_integer_multiplication_by_power_of_10(
-        22250738585072014, -324, std::numeric_limits<double>::min());
-    verify_integer_multiplication_by_power_of_10(
-        17976931348623158, 292, std::numeric_limits<double>::max());
-
-    // DBL_TRUE_MIN / 2 underflows to 0
-    verify_integer_multiplication_by_power_of_10(49406564584124654 / 2, -340,
-                                                 0.);
-
-    // DBL_TRUE_MIN / 2 + 0.0000000000000001e-324 rounds to DBL_TRUE_MIN
-    verify_integer_multiplication_by_power_of_10(
+    verify_integer_times_pow10<float>(14012984, -52,
+                                      std::numeric_limits<float>::denorm_min());
+
+    /* normal min */
+    verify_integer_times_pow10_dflt(22250738585072014, -324,
+                                    std::numeric_limits<double>::min());
+    verify_integer_times_pow10<double>(22250738585072014, -324,
+                                       std::numeric_limits<double>::min());
+    verify_integer_times_pow10<float>(11754944, -45,
+                                      std::numeric_limits<float>::min());
+
+    /* max */
+    verify_integer_times_pow10_dflt(17976931348623158, 292,
+                                    std::numeric_limits<double>::max());
+    verify_integer_times_pow10<double>(17976931348623158, 292,
+                                       std::numeric_limits<double>::max());
+    verify_integer_times_pow10<float>(34028235, 31,
+                                      std::numeric_limits<float>::max());
+
+    /* underflow */
+    // (DBL_TRUE_MIN / 2) underflows to 0
+    verify_integer_times_pow10_dflt(49406564584124654 / 2, -340, 0.);
+    verify_integer_times_pow10<double>(49406564584124654 / 2, -340, 0.);
+    // (FLT_TRUE_MIN / 2) underflows to 0
+    verify_integer_times_pow10<float>(14012984 / 2, -52, 0.f);
+
+    /* rounding to denormal min */
+    // (DBL_TRUE_MIN / 2 + 0.0000000000000001e-324) rounds to DBL_TRUE_MIN
+    verify_integer_times_pow10_dflt(49406564584124654 / 2 + 1, -340,
+                                    std::numeric_limits<double>::denorm_min());
+    verify_integer_times_pow10<double>(
         49406564584124654 / 2 + 1, -340,
         std::numeric_limits<double>::denorm_min());
-
-    // DBL_MAX + 0.0000000000000001e308 overflows to infinity
-    verify_integer_multiplication_by_power_of_10(
-        17976931348623158 + 1, 292, std::numeric_limits<double>::infinity());
-    // DBL_MAX + 0.00000000000000001e308 overflows to infinity
-    verify_integer_multiplication_by_power_of_10(
-        179769313486231580 + 1, 291, std::numeric_limits<double>::infinity());
+    // (FLT_TRUE_MIN / 2 + 0.0000001e-45) rounds to FLT_TRUE_MIN
+    verify_integer_times_pow10<float>(14012984 / 2 + 1, -52,
+                                      std::numeric_limits<float>::denorm_min());
+
+    /* overflow */
+    // (DBL_MAX + 0.0000000000000001e308) overflows to infinity
+    verify_integer_times_pow10_dflt(17976931348623158 + 1, 292,
+                                    std::numeric_limits<double>::infinity());
+    verify_integer_times_pow10<double>(17976931348623158 + 1, 292,
+                                       std::numeric_limits<double>::infinity());
+    // (DBL_MAX + 0.00000000000000001e308) overflows to infinity
+    verify_integer_times_pow10_dflt(179769313486231580 + 1, 291,
+                                    std::numeric_limits<double>::infinity());
+    verify_integer_times_pow10<double>(179769313486231580 + 1, 291,
+                                       std::numeric_limits<double>::infinity());
+    // (FLT_MAX + 0.0000001e38) overflows to infinity
+    verify_integer_times_pow10<float>(34028235 + 1, 31,
+                                      std::numeric_limits<float>::infinity());
+    // (FLT_MAX + 0.00000007e38) overflows to infinity
+    verify_integer_times_pow10<float>(340282350 + 7, 30,
+                                      std::numeric_limits<float>::infinity());
 
     // loosely verifying correct rounding of 1 to 64 bits
     // worth of significant digits
-    verify_integer_multiplication_by_power_of_10(1, 42);
-    verify_integer_multiplication_by_power_of_10(1, -42);
-    verify_integer_multiplication_by_power_of_10(12, 42);
-    verify_integer_multiplication_by_power_of_10(12, -42);
-    verify_integer_multiplication_by_power_of_10(123, 42);
-    verify_integer_multiplication_by_power_of_10(123, -42);
-    verify_integer_multiplication_by_power_of_10(1234, 42);
-    verify_integer_multiplication_by_power_of_10(1234, -42);
-    verify_integer_multiplication_by_power_of_10(12345, 42);
-    verify_integer_multiplication_by_power_of_10(12345, -42);
-    verify_integer_multiplication_by_power_of_10(123456, 42);
-    verify_integer_multiplication_by_power_of_10(123456, -42);
-    verify_integer_multiplication_by_power_of_10(1234567, 42);
-    verify_integer_multiplication_by_power_of_10(1234567, -42);
-    verify_integer_multiplication_by_power_of_10(12345678, 42);
-    verify_integer_multiplication_by_power_of_10(12345678, -42);
-    verify_integer_multiplication_by_power_of_10(123456789, 42);
-    verify_integer_multiplication_by_power_of_10(1234567890, 42);
-    verify_integer_multiplication_by_power_of_10(1234567890, -42);
-    verify_integer_multiplication_by_power_of_10(12345678901, 42);
-    verify_integer_multiplication_by_power_of_10(12345678901, -42);
-    verify_integer_multiplication_by_power_of_10(123456789012, 42);
-    verify_integer_multiplication_by_power_of_10(123456789012, -42);
-    verify_integer_multiplication_by_power_of_10(1234567890123, 42);
-    verify_integer_multiplication_by_power_of_10(1234567890123, -42);
-    verify_integer_multiplication_by_power_of_10(12345678901234, 42);
-    verify_integer_multiplication_by_power_of_10(12345678901234, -42);
-    verify_integer_multiplication_by_power_of_10(123456789012345, 42);
-    verify_integer_multiplication_by_power_of_10(123456789012345, -42);
-    verify_integer_multiplication_by_power_of_10(1234567890123456, 42);
-    verify_integer_multiplication_by_power_of_10(1234567890123456, -42);
-    verify_integer_multiplication_by_power_of_10(12345678901234567, 42);
-    verify_integer_multiplication_by_power_of_10(12345678901234567, -42);
-    verify_integer_multiplication_by_power_of_10(123456789012345678, 42);
-    verify_integer_multiplication_by_power_of_10(123456789012345678, -42);
-    verify_integer_multiplication_by_power_of_10(1234567890123456789, 42);
-    verify_integer_multiplication_by_power_of_10(1234567890123456789, -42);
-    verify_integer_multiplication_by_power_of_10(12345678901234567890ull, 42);
-    verify_integer_multiplication_by_power_of_10(12345678901234567890ull, -42);
-    verify_integer_multiplication_by_power_of_10(
-        std::numeric_limits<int64_t>::max(), 42);
-    verify_integer_multiplication_by_power_of_10(
-        std::numeric_limits<int64_t>::max(), -42);
-    verify_integer_multiplication_by_power_of_10(
-        std::numeric_limits<uint64_t>::max(), 42);
-    verify_integer_multiplication_by_power_of_10(
-        std::numeric_limits<uint64_t>::max(), -42);
+    all::verify_integer_times_pow10(1, 42);
+    all::verify_integer_times_pow10(1, -42);
+    all::verify_integer_times_pow10(12, 42);
+    all::verify_integer_times_pow10(12, -42);
+    all::verify_integer_times_pow10(123, 42);
+    all::verify_integer_times_pow10(123, -42);
+    all::verify_integer_times_pow10(1234, 42);
+    all::verify_integer_times_pow10(1234, -42);
+    all::verify_integer_times_pow10(12345, 42);
+    all::verify_integer_times_pow10(12345, -42);
+    all::verify_integer_times_pow10(123456, 42);
+    all::verify_integer_times_pow10(123456, -42);
+    all::verify_integer_times_pow10(1234567, 42);
+    all::verify_integer_times_pow10(1234567, -42);
+    all::verify_integer_times_pow10(12345678, 42);
+    all::verify_integer_times_pow10(12345678, -42);
+    all::verify_integer_times_pow10(123456789, 42);
+    all::verify_integer_times_pow10(1234567890, 42);
+    all::verify_integer_times_pow10(1234567890, -42);
+    all::verify_integer_times_pow10(12345678901, 42);
+    all::verify_integer_times_pow10(12345678901, -42);
+    all::verify_integer_times_pow10(123456789012, 42);
+    all::verify_integer_times_pow10(123456789012, -42);
+    all::verify_integer_times_pow10(1234567890123, 42);
+    all::verify_integer_times_pow10(1234567890123, -42);
+    all::verify_integer_times_pow10(12345678901234, 42);
+    all::verify_integer_times_pow10(12345678901234, -42);
+    all::verify_integer_times_pow10(123456789012345, 42);
+    all::verify_integer_times_pow10(123456789012345, -42);
+    all::verify_integer_times_pow10(1234567890123456, 42);
+    all::verify_integer_times_pow10(1234567890123456, -42);
+    all::verify_integer_times_pow10(12345678901234567, 42);
+    all::verify_integer_times_pow10(12345678901234567, -42);
+    all::verify_integer_times_pow10(123456789012345678, 42);
+    all::verify_integer_times_pow10(123456789012345678, -42);
+    all::verify_integer_times_pow10(1234567890123456789, 42);
+    all::verify_integer_times_pow10(1234567890123456789, -42);
+    all::verify_integer_times_pow10(12345678901234567890ull, 42);
+    all::verify_integer_times_pow10(12345678901234567890ull, -42);
+    all::verify_integer_times_pow10(std::numeric_limits<int64_t>::max(), 42);
+    all::verify_integer_times_pow10(std::numeric_limits<int64_t>::max(), -42);
+    all::verify_integer_times_pow10(std::numeric_limits<uint64_t>::max(), 42);
+    all::verify_integer_times_pow10(std::numeric_limits<uint64_t>::max(), -42);
   }
 }
\ No newline at end of file
diff --git a/tests/example_integer_times_pow10.cpp b/tests/example_integer_times_pow10.cpp
index 3e86826c..0205c275 100644
--- a/tests/example_integer_times_pow10.cpp
+++ b/tests/example_integer_times_pow10.cpp
@@ -2,7 +2,7 @@
 
 #include <iostream>
 
-int main() {
+void default_overload() {
   const uint64_t W = 12345678901234567;
   const int Q = 23;
   const double result = fast_float::integer_times_pow10(W, Q);
@@ -10,3 +10,27 @@ int main() {
   std::cout << W << " * 10^" << Q << " = " << result << " ("
             << (result == 12345678901234567e23 ? "==" : "!=") << "expected)\n";
 }
+
+void double_specialization() {
+  const uint64_t W = 12345678901234567;
+  const int Q = 23;
+  const double result = fast_float::integer_times_pow10<double>(W, Q);
+  std::cout.precision(17);
+  std::cout << "double: " << W << " * 10^" << Q << " = " << result << " ("
+            << (result == 12345678901234567e23 ? "==" : "!=") << "expected)\n";
+}
+
+void float_specialization() {
+  const uint64_t W = 12345678;
+  const int Q = 23;
+  const float result = fast_float::integer_times_pow10<float>(W, Q);
+  std::cout.precision(9);
+  std::cout << "float: " << W << " * 10^" << Q << " = " << result << " ("
+            << (result == 12345678e23f ? "==" : "!=") << "expected)\n";
+}
+
+int main() {
+  default_overload();
+  double_specialization();
+  float_specialization();
+}
diff --git a/tests/fast_int.cpp b/tests/fast_int.cpp
index 49044d36..94e76fdb 100644
--- a/tests/fast_int.cpp
+++ b/tests/fast_int.cpp
@@ -95,6 +95,201 @@ int main() {
     }
   }
 
+  // char basic test
+  std::vector<char> const char_basic_test_expected{0, 10, 40, 100, 9};
+  std::vector<std::string_view> const char_basic_test{"0", "10 ", "40",
+                                                      "100 with text", "9.999"};
+
+  for (std::size_t i = 0; i < char_basic_test.size(); ++i) {
+    auto const f = char_basic_test[i];
+    char result;
+    auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result);
+
+    if (answer.ec != std::errc()) {
+      std::cerr << "could not convert to char for input: \"" << f
+                << "\" because of invalid argument" << std::endl;
+      return EXIT_FAILURE;
+    } else if (result != char_basic_test_expected[i]) {
+      std::cerr << "result \"" << f << "\" did not match with expected char: "
+                << static_cast<int>(char_basic_test_expected[i]) << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
+  // short basic test
+  std::vector<short> const short_basic_test_expected{0, 10, -40, 1001, 9};
+  std::vector<std::string_view> const short_basic_test{
+      "0", "10 ", "-40", "1001 with text", "9.999"};
+
+  for (std::size_t i = 0; i < short_basic_test.size(); ++i) {
+    auto const f = short_basic_test[i];
+    short result;
+    auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result);
+
+    if (answer.ec != std::errc()) {
+      std::cerr << "could not convert to short for input: \"" << f
+                << "\" because of invalid argument" << std::endl;
+      return EXIT_FAILURE;
+    } else if (result != short_basic_test_expected[i]) {
+      std::cerr << "result \"" << f << "\" did not match with expected short: "
+                << short_basic_test_expected[i] << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
+  // long basic test
+  std::vector<long> const long_basic_test_expected{0, 10, -40, 1001, 9};
+  std::vector<std::string_view> const long_basic_test{
+      "0", "10 ", "-40", "1001 with text", "9.999"};
+
+  for (std::size_t i = 0; i < long_basic_test.size(); ++i) {
+    auto const f = long_basic_test[i];
+    long result;
+    auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result);
+
+    if (answer.ec != std::errc()) {
+      std::cerr << "could not convert to long for input: \"" << f
+                << "\" because of invalid argument" << std::endl;
+      return EXIT_FAILURE;
+    } else if (result != long_basic_test_expected[i]) {
+      std::cerr << "result \"" << f << "\" did not match with expected long: "
+                << long_basic_test_expected[i] << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
+  // long long basic test
+  std::vector<long long> const long_long_basic_test_expected{0, 10, -40, 1001,
+                                                             9};
+  std::vector<std::string_view> const long_long_basic_test{
+      "0", "10 ", "-40", "1001 with text", "9.999"};
+
+  for (std::size_t i = 0; i < long_long_basic_test.size(); ++i) {
+    auto const f = long_long_basic_test[i];
+    long long result;
+    auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result);
+
+    if (answer.ec != std::errc()) {
+      std::cerr << "could not convert to long long for input: \"" << f
+                << "\" because of invalid argument" << std::endl;
+      return EXIT_FAILURE;
+    } else if (result != long_long_basic_test_expected[i]) {
+      std::cerr << "result \"" << f
+                << "\" did not match with expected long long: "
+                << long_long_basic_test_expected[i] << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
+  // unsigned char basic test
+  std::vector<unsigned char> const unsigned_char_basic_test_expected{0, 10, 100,
+                                                                     9};
+  std::vector<std::string_view> const unsigned_char_basic_test{
+      "0", "10 ", "100 with text", "9.999"};
+
+  for (std::size_t i = 0; i < unsigned_char_basic_test.size(); ++i) {
+    auto const &f = unsigned_char_basic_test[i];
+    unsigned char result;
+    auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result);
+    if (answer.ec != std::errc()) {
+      std::cerr << "could not convert to unsigned char for input: \"" << f
+                << "\"" << std::endl;
+      return EXIT_FAILURE;
+    } else if (result != unsigned_char_basic_test_expected[i]) {
+      std::cerr << "result \"" << f
+                << "\" did not match with expected unsigned char: "
+                << static_cast<int>(unsigned_char_basic_test_expected[i])
+                << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
+  // unsigned short basic test
+  std::vector<unsigned short> const unsigned_short_basic_test_expected{0, 10,
+                                                                       1001, 9};
+  std::vector<std::string_view> const unsigned_short_basic_test{
+      "0", "10 ", "1001 with text", "9.999"};
+
+  for (std::size_t i = 0; i < unsigned_short_basic_test.size(); ++i) {
+    auto const &f = unsigned_short_basic_test[i];
+    unsigned short result;
+    auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result);
+    if (answer.ec != std::errc()) {
+      std::cerr << "could not convert to unsigned short for input: \"" << f
+                << "\"" << std::endl;
+      return EXIT_FAILURE;
+    } else if (result != unsigned_short_basic_test_expected[i]) {
+      std::cerr << "result \"" << f
+                << "\" did not match with expected unsigned short: "
+                << unsigned_short_basic_test_expected[i] << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
+  // unsigned long basic test
+  std::vector<unsigned long> const unsigned_long_basic_test_expected{0, 10,
+                                                                     1001, 9};
+  std::vector<std::string_view> const unsigned_long_basic_test{
+      "0", "10 ", "1001 with text", "9.999"};
+
+  for (std::size_t i = 0; i < unsigned_long_basic_test.size(); ++i) {
+    auto const &f = unsigned_long_basic_test[i];
+    unsigned long result;
+    auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result);
+    if (answer.ec != std::errc()) {
+      std::cerr << "could not convert to unsigned long for input: \"" << f
+                << "\"" << std::endl;
+      return EXIT_FAILURE;
+    } else if (result != unsigned_long_basic_test_expected[i]) {
+      std::cerr << "result \"" << f
+                << "\" did not match with expected unsigned long: "
+                << unsigned_long_basic_test_expected[i] << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
+  // unsigned long long basic test
+  std::vector<unsigned long long> const unsigned_long_long_basic_test_expected{
+      0, 10, 1001, 9};
+  std::vector<std::string_view> const unsigned_long_long_basic_test{
+      "0", "10 ", "1001 with text", "9.999"};
+
+  for (std::size_t i = 0; i < unsigned_long_long_basic_test.size(); ++i) {
+    auto const &f = unsigned_long_long_basic_test[i];
+    unsigned long long result;
+    auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result);
+    if (answer.ec != std::errc()) {
+      std::cerr << "could not convert to unsigned long long for input: \"" << f
+                << "\"" << std::endl;
+      return EXIT_FAILURE;
+    } else if (result != unsigned_long_long_basic_test_expected[i]) {
+      std::cerr << "result \"" << f
+                << "\" did not match with expected unsigned long long: "
+                << unsigned_long_long_basic_test_expected[i] << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
+  // bool basic test
+  std::vector<bool> const bool_basic_test_expected{false, true};
+  std::vector<std::string_view> const bool_basic_test{"0", "1"};
+
+  for (std::size_t i = 0; i < bool_basic_test.size(); ++i) {
+    auto const &f = bool_basic_test[i];
+    bool result;
+    auto answer = fast_float::from_chars(f.data(), f.data() + f.size(), result);
+    if (answer.ec != std::errc()) {
+      std::cerr << "could not convert to bool for input: \"" << f << "\""
+                << std::endl;
+      return EXIT_FAILURE;
+    } else if (result != bool_basic_test_expected[i]) {
+      std::cerr << "result \"" << f << "\" did not match with expected bool: "
+                << (bool_basic_test_expected[i] ? "true" : "false")
+                << std::endl;
+      return EXIT_FAILURE;
+    }
+  }
+
   // int invalid error test
   std::vector<std::string_view> const int_invalid_argument_test{
       "text", "text with 1002", "+50", " 50"};
diff --git a/tests/ipv4_test.cpp b/tests/ipv4_test.cpp
new file mode 100644
index 00000000..f3055dcb
--- /dev/null
+++ b/tests/ipv4_test.cpp
@@ -0,0 +1,93 @@
+
+#include <charconv>
+#include <cstdint>
+#include <iostream>
+#include <algorithm>
+#include "fast_float/fast_float.h"
+
+char *uint8_to_chars_manual(char *ptr, uint8_t value) {
+  if (value == 0) {
+    *ptr++ = '0';
+    return ptr;
+  }
+  char *start = ptr;
+  while (value > 0) {
+    *ptr++ = '0' + (value % 10);
+    value /= 10;
+  }
+  // Reverse the digits written so far
+  std::reverse(start, ptr);
+  return ptr;
+}
+
+void uint32_to_ipv4_string(uint32_t ip, char *buffer) {
+  uint8_t octets[4] = {static_cast<uint8_t>(ip >> 24),
+                       static_cast<uint8_t>(ip >> 16),
+                       static_cast<uint8_t>(ip >> 8), static_cast<uint8_t>(ip)};
+
+  char *ptr = buffer;
+
+  for (int i = 0; i < 4; ++i) {
+    ptr = uint8_to_chars_manual(ptr, octets[i]);
+
+    if (i < 3) {
+      *ptr++ = '.';
+    }
+  }
+  *ptr = '\0';
+}
+
+fastfloat_really_inline uint32_t ipv4_string_to_uint32(const char *str,
+                                                       const char *end) {
+  uint32_t ip = 0;
+  const char *current = str;
+
+  for (int i = 0; i < 4; ++i) {
+    uint8_t value;
+    auto r = fast_float::from_chars(current, end, value);
+    if (r.ec != std::errc()) {
+      throw std::invalid_argument("Invalid IP address format");
+    }
+    current = r.ptr;
+    ip = (ip << 8) | value;
+
+    if (i < 3) {
+      if (current == end || *current++ != '.') {
+        throw std::invalid_argument("Invalid IP address format");
+      }
+    }
+  }
+  return ip;
+}
+
+bool test_all_ipv4_conversions() {
+  std::cout << "Testing all IPv4 conversions... 0, 1000, 2000, 3000, 4000, "
+               "5000, 6000, 7000, 8000, 9000, ..."
+            << std::endl;
+  char buffer[16];
+  for (uint64_t ip = 0; ip <= 0xFFFFFFFF; ip += 1000) {
+    if (ip % 10000000 == 0) {
+      std::cout << "." << std::flush;
+    }
+    uint32_to_ipv4_string(static_cast<uint32_t>(ip), buffer);
+    const char *end = buffer + strlen(buffer);
+    uint32_t parsed_ip = ipv4_string_to_uint32(buffer, end);
+    if (parsed_ip != ip) {
+      std::cerr << "Mismatch: original " << ip << ", parsed " << parsed_ip
+                << std::endl;
+      return false;
+    }
+  }
+  std::cout << std::endl;
+  return true;
+}
+
+int main() {
+  if (test_all_ipv4_conversions()) {
+    std::cout << "All IPv4 conversions passed!" << std::endl;
+    return EXIT_SUCCESS;
+  } else {
+    std::cerr << "IPv4 conversion test failed!" << std::endl;
+    return EXIT_FAILURE;
+  }
+}
\ No newline at end of file