diff --git a/.bintray.in b/.bintray.in
deleted file mode 100644
index 4336d65c..00000000
--- a/.bintray.in
+++ /dev/null
@@ -1,37 +0,0 @@
-{
-    /* Bintray package information.
-       In case the package already exists on Bintray, only the name, repo and subject
-       fields are mandatory. */
-
-    "package": {
-        "name": "releases", // Bintray package name
-        "repo": "tinyobjloader", // Bintray repository name
-        "subject": "syoyo" // Bintray subject (user or organization)
-    },
-
-    /* Package version information.
-       In case the version already exists on Bintray, only the name fields is mandatory. */
-
-    "version": {
-        "name": "@VERSION@",
-        "desc": "@VERSION@",
-        "released": "@DATE@",
-        "vcs_tag": "@VERSION@",
-        "gpgSign": false
-    },
-
-    /* Configure the files you would like to upload to Bintray and their upload path.
-    You can define one or more groups of patterns.
-    Each group contains three patterns:
-
-    includePattern: Pattern in the form of Ruby regular expression, indicating the path of files to be uploaded to Bintray.
-    excludePattern: Optional. Pattern in the form of Ruby regular expression, indicating the path of files to be removed from the list of files specified by the includePattern.
-    uploadPattern: Upload path on Bintray. The path can contain symbols in the form of $1, $2,... that are replaced with capturing groups defined in the include pattern.
-
-    Note: Regular expressions defined as part of the includePattern property must be wrapped with brackets. */
-
-    "files":
-        [ {"includePattern": "dist/(.*)", "uploadPattern": "$1"} ],
-    "publish": true
-}
-
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 00000000..0fd988d9
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,15 @@
+# These are supported funding model platforms
+
+github: syoyo # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
+polar: # Replace with a single Polar username
+buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
+thanks_dev: # Replace with a single thanks.dev username
+custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..3ba13e0c
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1 @@
+blank_issues_enabled: false
diff --git a/.github/ISSUE_TEMPLATE/issue-report.md b/.github/ISSUE_TEMPLATE/issue-report.md
new file mode 100644
index 00000000..1361a329
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/issue-report.md
@@ -0,0 +1,28 @@
+---
+name: Issue report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the issue**
+A clear and concise description of what the issue is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Compile TinyObjLoader with '...'
+2. Load .obj file '...'
+3. See error
+
+Please attach minimal and reproducible files(source codes, .obj/.mtl files, etc)
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Environment**
+ - TinyObjLoader version
+ - OS: [e.g. Linux]
+ - Compiler  [e.g. gcc 7.3]
+ - Other environment [e.g. Python version if you use python binding]
diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
new file mode 100644
index 00000000..77d39d3c
--- /dev/null
+++ b/.github/workflows/cron.yml
@@ -0,0 +1,22 @@
+name: Close inactive issues
+on:
+  schedule:
+    - cron: "30 1 * * *"
+
+jobs:
+  close-issues:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/stale@v9
+        with:
+          days-before-issue-stale: 14
+          days-before-issue-close: 14
+          stale-issue-label: "stale"
+          stale-issue-message: "This issue is stale because it has been open for 14 days with no activity."
+          close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
+          days-before-pr-stale: -1
+          days-before-pr-close: -1
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
new file mode 100644
index 00000000..b40cf7a0
--- /dev/null
+++ b/.github/workflows/python.yml
@@ -0,0 +1,196 @@
+name: Python
+
+on: [push, pull_request]
+
+jobs:
+  check_format:
+    name: Check Python code format
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python and install dependencies
+        working-directory: tests/python
+        run: uv sync --project . --python 3.13
+
+      - name: Check code format
+        working-directory: tests/python
+        run: uv run --project . black --check ../..
+
+  build_wheels_quick:
+    name: Build wheels for quick testing
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true  # Optional, use if you use setuptools_scm
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v3.3.1
+        env:
+          # These are the only wheels we need for the `test_wheels` job. For
+          # faster iteration times, we limit the job to only build these wheels.
+          # Restrict to CPython to avoid building unused PyPy wheels.
+          CIBW_BUILD: "cp*-manylinux_x86_64"
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-wheels-quick
+          path: ./wheelhouse/*.whl
+
+  test_wheels:
+    name: Test wheels with Python ${{ matrix.python-version }} and NumPy ${{ matrix.numpy-version }}
+    needs: [build_wheels_quick]
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - python-version: "3.9"
+            numpy-version: "1.25.2"
+          - python-version: "3.10"
+            numpy-version: "1.26.4"
+          - python-version: "3.11"
+            numpy-version: "1.26.4"
+          - python-version: "3.12"
+            numpy-version: "1.26.4"
+          - python-version: "3.11"
+            numpy-version: "2.4.2"
+          - python-version: "3.12"
+            numpy-version: "2.4.2"
+    
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Download wheel artifacts
+        uses: actions/download-artifact@v4
+        with:
+          pattern: cibw-wheels-quick
+          path: dist
+          merge-multiple: true
+      
+      - name: Set up Python ${{ matrix.python-version }} and install dependencies
+        working-directory: tests/python
+        run: uv sync --project . --python ${{ matrix.python-version }}
+
+      - name: Install NumPy ${{ matrix.numpy-version }}
+        working-directory: tests/python
+        run: |
+          uv pip install --project . --only-binary :all: numpy==${{ matrix.numpy-version }}
+
+      - name: Install manylinux wheel built for Python ${{ matrix.python-version }}
+        working-directory: tests/python
+        run: uv pip install --project . ../../dist/*cp$(echo ${{ matrix.python-version }} | tr -d .)*.whl
+      
+      - name: Run tests
+        working-directory: tests/python
+        run: uv run --project . pytest
+
+  build_wheels_main:
+    name: Build remaining wheels on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true  # Optional, use if you use setuptools_scm
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v3.3.1
+        env:
+            CIBW_ARCHS_MACOS: "x86_64 universal2 arm64"
+            CIBW_ARCHS_WINDOWS: "AMD64"
+            # The quick build has already taken care of manylinux.
+            CIBW_SKIP: "*-manylinux_x86_64"
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-wheels-main-${{ matrix.os }}-${{ strategy.job-index }}
+          path: ./wheelhouse/*.whl
+
+  # It looks cibuildwheels did not clean build folder(CMake), and it results to Windows arm64 build failure(trying to reuse x86 build of .obj)
+  # So supply separated build job for Windows ARM64 build
+  # TODO: clean build folder using CIBW_BEFORE_ALL?
+  build_wheels_win_arm64:
+    name: Build ARM64 wheels on Windows
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          fetch-tags: true  # Optional, use if you use setuptools_scm
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v3.3.1
+        env:
+            CIBW_ARCHS_WINDOWS: "ARM64"
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-wheels-win-arm64
+          path: ./wheelhouse/*.whl
+
+  make_sdist:
+    name: Make SDist
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0  # Optional, use if you use setuptools_scm
+        fetch-tags: true  # Optional, use if you use setuptools_scm
+
+    - name: Build SDist
+      run: pipx run build --sdist
+
+    - uses: actions/upload-artifact@v4
+      with:
+        name: cibw-sdist
+        path: dist/*.tar.gz
+
+  upload_all:
+    needs: [build_wheels_quick, build_wheels_main, build_wheels_win_arm64, make_sdist]
+    runs-on: ubuntu-latest
+    environment: release
+    permissions:
+      # IMPORTANT: this permission is mandatory for trusted publishing
+      id-token: write
+    # upload to PyPI on every tag starting with 'v'
+    # NOTE: Without github.event_name & githug.ref check, `upload_all` task is still triggered on 'main' branch push.
+    # (then get 'Branch "main" is not allowed to deploy to release due to environment protection rules.' error)
+    # So still do event_name and github.ref check.
+    # TODO: Make it work only using Github `environment` feature.
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+    # alternatively, to publish when a GitHub Release is created, use the following rule:
+    # if: github.event_name == 'push' && github.event.action == 'published'
+    steps:
+    - uses: actions/download-artifact@v4
+      with:
+        pattern: cibw-*
+        path: dist
+        merge-multiple: true
+
+    - uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        # Use Trusted Publisher feature:
+        # https://docs.pypi.org/trusted-publishers/
+        #  so no use of PYPI_API_TOKEN
+        #password: ${{ secrets.PYPI_API_TOKEN }}
+        #
+        # Avoid race condition when using multiple CIs
+        skip-existing: true
+        verbose: true
diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml
new file mode 100644
index 00000000..3a7ffc82
--- /dev/null
+++ b/.github/workflows/unit.yml
@@ -0,0 +1,22 @@
+name: Unit Tests
+
+on:
+  push:
+    branches:
+      - '**'
+    tags:
+      - 'v*'
+  pull_request:
+    branches:
+      - '**'
+
+jobs:
+  unit_linux:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Run unit tests
+        run: |
+          cd tests
+          make check
diff --git a/.gitignore b/.gitignore
index cd219d8e..4bd2eb9d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,12 @@ build/
 /python/*.egg-info
 /python/.eggs
 /python/tiny_obj_loader.h
+/tests/tester
+/tests/tester.dSYM
+/_codeql_build_dir/
+/_codeql_detected_source_root
+/python/_version.py
+/tinyobjloader.egg-info/
+**/__pycache__/
+
+/Testing/Temporary/
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 06b2d758..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,81 +0,0 @@
-language: cpp
-sudo: required
-matrix:
-  include:
-  - addons: &1
-      apt:
-        sources:
-        - george-edison55-precise-backports
-        - ubuntu-toolchain-r-test
-        - llvm-toolchain-precise-3.7
-        packages:
-        - cmake
-        - cmake-data
-        - ninja-build
-        - g++-4.9
-        - clang-3.7
-    compiler: clang
-    env: DEPLOY_BUILD=1 COMPILER_VERSION=3.7 BUILD_TYPE=Debug
-  - addons: *1
-    compiler: clang
-    env: COMPILER_VERSION=3.7 BUILD_TYPE=Release
-  - addons: &2
-      apt:
-        sources:
-        - george-edison55-precise-backports
-        - ubuntu-toolchain-r-test
-        packages:
-        - cmake
-        - cmake-data
-        - ninja-build
-        - g++-4.9
-    compiler: gcc
-    env: COMPILER_VERSION=4.9 BUILD_TYPE=Debug
-  - addons: *2
-    compiler: gcc
-    env: COMPILER_VERSION=4.9 BUILD_TYPE=Release
-  - addons: *1
-    compiler: clang
-    env: COMPILER_VERSION=3.7 BUILD_TYPE=Debug
-before_install:
-- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew upgrade; fi
-- if [ -n "$REPORT_COVERAGE" ]; then sudo apt-get update python; fi
-- if [ -n "$REPORT_COVERAGE" ]; then sudo apt-get install python-dev libffi-dev libssl-dev; fi
-- if [ -n "$REPORT_COVERAGE" ]; then sudo pip install --upgrade pip; fi
-- if [ -n "$REPORT_COVERAGE" ]; then CXX=g++ pip install --user requests[security]; fi
-- if [ -n "$REPORT_COVERAGE" ]; then CXX=g++ pip install --user cpp-coveralls; fi
-script:
-- cd tests
-- make check
-- if [ -n "$REPORT_COVERAGE" ]; then coveralls -b . -r .. -e examples -e tools -e
-  jni -e python -e images -E ".*CompilerId.*" -E ".*feature_tests.*" ; fi
-- cd ..
-- rm -rf dist
-- mkdir dist
-- cp tiny_obj_loader.h dist/
-
-before_deploy:
-  - echo "Creating description file for bintray."
-  - ./tools/travis_postbuild.sh
-
-deploy:
-  - provider: bintray
-    file: ".bintray.json"
-    user: "syoyo"
-    key:
-      secure: W4F1VZcDcVOMe8Ymvo0bHery/JSmVhadl1NgAnGus6o7zVw7ChElKA1ho/NtqUbtoW8o1qUKMJdLQeh786jolocZJEJlns9JZ5FCet6H2b3kITfUa4GR5T11V/ZYwL3SajW8vZ1xu5UrpP5HHgFMYtxb1MFrNLDI60sh0RnyV/qFFBnCJGZPagF/M1mzbJeDml5xK5lShH0r8QpH+7MeQ1J8ungEyJ7UCyr1ao8gY9eq1/05IpHR9vri/d48EXQWHbqtI8EwCc7064oCYQGyYcLsD4yPEokwrdelkCvDquSpJLmbJENfZCc4vZGXsykjnQ8+gltJomBAivQFB9vc06ETEJssMzitbrfEZUrqFwZj/HZM7CYGXfGQWltL828SppCjsuWrgQ/VYXM5UgRpmhlxbqnuyxnYvKZ9EDW4+EnMkOmIl7WSDovp8E/4CZ0ghs+YyFS4SrgeqFCXS8bvxrkDUUPSipHuGBOt02fRnccKzU+3zU6Q5fghyLczz4ZtnOdk+Niz/njyF0SZfPYTUgb3GzAJ8Su6kvWJCAGdedON3n1F/TtybCE2dIdATxaO2uFQbwYjSOCiq209oCJ7MrsQZibRsa5a9YXyjlLkPxwOeVwo8wJyJclqWswIkhdSO8xvTnkwESv4yLzLutEOlBVlQbJzpyuS6vx0yHOYkwc=
-    all_branches: true
-    on:
-      repo: syoyo/tinyobjloader
-      condition: -n "$DEPLOY_BUILD"
-      tags: true
-    skip_cleanup: true
-  - provider: releases
-    api_key:
-      secure: AsXameK4GJn6h6wMmDrKTr7q/o9EI7hX7zWg1W6VaFBQKfkBvOmjJolWimjl6HMoRZ1NpMmK5GDm3zBlTUeABtgVBIyNWgE9vWS39ff6D5iQKcgScFsJkyILt0GikBqbN2pLGQ2t/M1Qh6n1sEIfzqekiCcF5Qvy5yYlYvHtaRGV02QeYAej/xx15/9SMuKTncHhjf63ClYPu8ODid7QUegJUvlQUeXoPsBDbaXMH2uDWoBWF7etX7G2Iob4NE8GX+ZP6dj+Ogi7p4HXThK650mzLL/pUl584EjjY/vePqx0cFhtpiRwvrW8SNPI1aJ1Phwa1enLRUgfS3bnkwQAMw/SCXSK2lnCvkUAXyTgpG03HWrZURj4vhEPXc7qHooO+dsfmi+JanYLaSDyrGpgQznLGjCMnVATimry0KxSufUY8Wt72Wh+nf7N0IgTUCjl32sWnQd/MRZPkxFuaf1h7r9RoH9KZY0yIOV09gABEFCGrOIZA2FcyhC2G26Bc4zyNrfMFpZ2DI76qdcWNdJGkRkpxtH9sGU8JgZu6Em2f1e6+SLgkBsPxbhRk5PwdhA9AXE2p9PmQqhO3jJKusGBZSoHAF7TlwagRY2J01yJxF7ge6zG9U8QuBqs1bB1zdnE34fHWOgs4st3inC+oBDOhvnEg1Nm/qeYVWMBzpwclSg=
-    file: tiny_obj_loader.h
-    all_branches: true
-    on:
-      repo: syoyo/tinyobjloader
-      tags: true
-    skip_cleanup: true
diff --git a/BUILD.bazel b/BUILD.bazel
new file mode 100644
index 00000000..dda60c76
--- /dev/null
+++ b/BUILD.bazel
@@ -0,0 +1,9 @@
+cc_library(
+    name = "tinyobjloader",
+    hdrs = ["tiny_obj_loader.h"],
+    copts = select({
+        "@platforms//os:windows": [],
+        "//conditions:default": ["-Wno-maybe-uninitialized"],
+    }),
+    visibility = ["//visibility:public"],
+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bbd7633b..9aea91fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,13 +1,23 @@
 #Tiny Object Loader Cmake configuration file.
 #This configures the Cmake system with multiple properties, depending
 #on the platform and configuration it is set to build in.
-project(tinyobjloader)
-cmake_minimum_required(VERSION 2.8.11)
-set(TINYOBJLOADER_SOVERSION 1)
-set(TINYOBJLOADER_VERSION 1.0.4)
+cmake_minimum_required(VERSION 3.16)
+project(tinyobjloader CXX)
+set(TINYOBJLOADER_SOVERSION 2)
+set(TINYOBJLOADER_VERSION 2.0.0-rc.13)
+set(PY_TARGET "pytinyobjloader")
 
 #optional double precision support
 option(TINYOBJLOADER_USE_DOUBLE "Build library with double precision instead of single (float)" OFF)
+option(TINYOBJLOADER_WITH_PYTHON "Build Python module(for developer). Use pyproject.toml/setup.py to build Python module for end-users" OFF)
+option(TINYOBJLOADER_PREFER_LOCAL_PYTHON_INSTALLATION
+      "Prefer locally-installed Python interpreter than system or conda/brew installed Python. Please specify your Python interpreter   with `Python3_EXECUTABLE` cmake option if you enable this option."
+      OFF)
+
+list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/sanitizers)
+find_package(Sanitizers) # Address sanitizer (-DSANITIZE_ADDRESS=ON)
+
 
 if(TINYOBJLOADER_USE_DOUBLE)
   set(LIBRARY_NAME ${PROJECT_NAME}_double)
@@ -15,7 +25,6 @@ else()
   set(LIBRARY_NAME ${PROJECT_NAME})
 endif()
 
-
 #Folder Shortcuts
 set(TINYOBJLOADEREXAMPLES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/examples)
 
@@ -46,7 +55,36 @@ set(TINYOBJLOADER_RUNTIME_DIR ${CMAKE_INSTALL_BINDIR})
 
 option(TINYOBJLOADER_BUILD_TEST_LOADER "Build Example Loader Application" OFF)
 
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Build standalone .so for Python binding(for developer)
+if (TINYOBJLOADER_WITH_PYTHON)
+
+  if(TINYOBJLOADER_PREFER_LOCAL_PYTHON_INSTALLATION)
+    #message(STATUS "Local Python")
+    set(Python3_FIND_FRAMEWORK NEVER) # Do not search framework python
+    set(Python3_FIND_STRATEGY LOCATION)
+    set(Python3_FIND_REGISTRY NEVER) # Windows only
+  else()
+    set(Python3_FIND_FRAMEWORK LAST
+    )# Prefer Brew/Conda to Apple framework python
+  endif()
+
+  find_package(
+    Python3
+    COMPONENTS Interpreter Development
+    REQUIRED)
+
+  find_package(pybind11 CONFIG REQUIRED)
+
+endif()
+
+
+
 add_library(${LIBRARY_NAME} ${tinyobjloader-Source})
+add_sanitizers(${LIBRARY_NAME})
 
 if(BUILD_SHARED_LIBS)
   set_target_properties(${LIBRARY_NAME} PROPERTIES
@@ -85,6 +123,24 @@ if(TINYOBJLOADER_BUILD_OBJ_STICHER)
     )
 endif()
 
+if (TINYOBJLOADER_WITH_PYTHON)
+  # pybind11 method:
+  pybind11_add_module(${PY_TARGET} ${CMAKE_SOURCE_DIR}/python/bindings.cc ${CMAKE_SOURCE_DIR}/python/tiny_obj_loader.cc)
+
+  add_sanitizers(${PY_TARGET})
+  set_target_properties(${PY_TARGET} PROPERTIES OUTPUT_NAME "tinyobjloader")
+
+  # copy .so to jdepp/
+  add_custom_command(
+    TARGET ${PY_TARGET}
+    POST_BUILD
+    COMMAND "${CMAKE_COMMAND}" -E copy "$<TARGET_FILE:${PY_TARGET}>"
+            "${CMAKE_SOURCE_DIR}/python/$<TARGET_FILE_NAME:${PY_TARGET}>"
+    COMMENT "copying tinyobjloader native python module file to python/"
+    VERBATIM)
+
+endif()
+
 #Write CMake package config files
 include(CMakePackageConfigHelpers)
 
@@ -109,6 +165,11 @@ write_basic_package_version_file(${PROJECT_NAME}-config-version.cmake
 #pkg-config file
 configure_file(${PROJECT_NAME}.pc.in ${LIBRARY_NAME}.pc @ONLY)
 
+if(DEFINED ENV{LIB_FUZZING_ENGINE})
+  add_executable(fuzz_ParseFromString fuzzer/fuzz_ParseFromString.cc)
+  target_link_libraries(fuzz_ParseFromString ${LIBRARY_NAME} $ENV{LIB_FUZZING_ENGINE})
+endif()
+
 #Installation
 install(TARGETS
   ${LIBRARY_NAME}
diff --git a/LICENSE b/LICENSE
index 3af18aba..e9fbe447 100644
--- a/LICENSE
+++ b/LICENSE
@@ -19,3 +19,24 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
+
+----------------------------------
+
+mapbox/earcut.hpp
+
+ISC License
+
+Copyright (c) 2015, Mapbox
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
+
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..d2632da5
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,8 @@
+include pyproject.toml
+include setup.py
+include README.md
+include LICENSE
+include python/sample.py
+include python/bindings.cc
+include python/tiny_obj_loader.cc
+include tiny_obj_loader.h
diff --git a/MODULE.bazel b/MODULE.bazel
new file mode 100644
index 00000000..f8859286
--- /dev/null
+++ b/MODULE.bazel
@@ -0,0 +1,9 @@
+module(
+    name = "tinyobjloader",
+    compatibility_level = 1,
+)
+
+bazel_dep(
+    name = "platforms",
+    version = "0.0.8",
+)
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..87715260
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+# Use this for strict compilation check(will work on clang 3.8+)
+#EXTRA_CXXFLAGS := -fsanitize=address,undefined -Wall -Werror -Weverything -DTINYOBJLOADER_ENABLE_THREADED=1 -Wno-c++98-compat
+#EXTRA_CXXFLAGS := -Weverything -Wall -DTINYOBJLOADER_ENABLE_THREADED=1 -Wno-c++98-compat
+# Note: fast_float is now bundled by default (no separate include path needed).
+# Define TINYOBJLOADER_DISABLE_FAST_FLOAT to opt out of the bundled parser.
+EXTRA_CXXFLAGS :=
+
+all:
+	g++  $(EXTRA_CXXFLAGS) -DTINYOBJLOADER_USE_DOUBLE=1 -std=c++11 -g -O0 -o loader_example loader_example.cc
+	#clang++  $(EXTRA_CXXFLAGS) -DTINYOBJLOADER_USE_DOUBLE=1 -std=c++11 -g -O2 -o loader_example loader_example.cc
+
+lint:
+	./cpplint.py tiny_gltf_loader.h
diff --git a/README.md b/README.md
index 6160babd..0150156a 100644
--- a/README.md
+++ b/README.md
@@ -1,38 +1,32 @@
 # tinyobjloader
 
-[![Join the chat at https://gitter.im/syoyo/tinyobjloader](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/syoyo/tinyobjloader?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+[![PyPI version](https://badge.fury.io/py/tinyobjloader.svg)](https://badge.fury.io/py/tinyobjloader)
 
-[![Build Status](https://travis-ci.org/syoyo/tinyobjloader.svg)](https://travis-ci.org/syoyo/tinyobjloader)
-
-[![AZ Build Status](https://dev.azure.com/syoyo/lte%20oss/_apis/build/status/syoyo.tinyobjloader?branchName=master)](https://dev.azure.com/syoyo/lte%20oss/_build/latest?definitionId=2&branchName=master)
-
-[![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/tlb421q3t2oyobcn/branch/master?svg=true)](https://ci.appveyor.com/project/syoyo/tinyobjloader/branch/master)
-
-[![Coverage Status](https://coveralls.io/repos/github/syoyo/tinyobjloader/badge.svg?branch=master)](https://coveralls.io/github/syoyo/tinyobjloader?branch=master)
-
-[![Download](https://api.bintray.com/packages/conan/conan-center/tinyobjloader%3A_/images/download.svg)](https://bintray.com/conan/conan-center/tinyobjloader%3A_/_latestVersion)
-
-Tiny but powerful single file wavefront obj loader written in C++03. No dependency except for C++ STL. It can parse over 10M polygons with moderate memory and time.
+Tiny but powerful single file wavefront obj loader written in C++11. No dependency except for C++ STL. It can parse over 10M polygons with moderate memory and time.
 
 `tinyobjloader` is good for embedding .obj loader to your (global illumination) renderer ;-)
 
-If you are looking for C89 version, please see https://github.com/syoyo/tinyobjloader-c .
+If you are looking for C99 version, please see https://github.com/syoyo/tinyobjloader-c .
 
-Notice!
--------
+Version notice
+--------------
+
+We recommend using the `release` (main) branch. It contains the v2.0 release candidate. Most features are now nearly robust and stable. (The remaining task for release v2.0 is polishing C++ and Python API, and fix built-in triangulation code).
 
 We have released new version v1.0.0 on 20 Aug, 2016.
 Old version is available as `v0.9.x` branch https://github.com/syoyo/tinyobjloader/tree/v0.9.x
 
 ## What's new
 
+* 29 Jul, 2021 : Added Mapbox's earcut for robust triangulation. Also fixes triangulation bug(still there is some issue in built-in triangulation algorithm: https://github.com/tinyobjloader/tinyobjloader/issues/319).
+* 19 Feb, 2020 : The repository has been moved to https://github.com/tinyobjloader/tinyobjloader !
 * 18 May, 2019 : Python binding!(See `python` folder. Also see https://pypi.org/project/tinyobjloader/)
 * 14 Apr, 2019 : Bump version v2.0.0 rc0. New C++ API and python bindings!(1.x API still exists for backward compatibility)
 * 20 Aug, 2016 : Bump version v1.0.0. New data structure and API!
 
 ## Requirements
 
-* C++03 compiler
+* C++11 compiler
 
 ### Old version
 
@@ -68,7 +62,14 @@ TinyObjLoader is successfully used in ...
 * Lighthouse2: https://github.com/jbikker/lighthouse2
 * rayrender(an open source R package for raytracing scenes in created in R): https://github.com/tylermorganwall/rayrender
 * liblava - A modern C++ and easy-to-use framework for the Vulkan API. [MIT]: https://github.com/liblava/liblava
-* Your project here! (Letting us know via github issue is welcome!)
+* rtxON - Simple Vulkan raytracing tutorials  https://github.com/iOrange/rtxON
+* metal-ray-tracer - Writing ray-tracer using Metal Performance Shaders https://github.com/sergeyreznik/metal-ray-tracer https://sergeyreznik.github.io/metal-ray-tracer/index.html
+* Supernova Engine - 2D and 3D projects with Lua or C++ in data oriented design: https://github.com/supernovaengine/supernova
+* AGE (Arc Game Engine) - An open-source engine for building 2D & 3D real-time rendering and interactive contents: https://github.com/MohitSethi99/ArcGameEngine
+* [Wicked Engine<img src="https://github.com/turanszkij/WickedEngine/blob/master/Content/logo_small.png" width="28px" align="center"/>](https://github.com/turanszkij/WickedEngine) - 3D engine with modern graphics
+* [Lumina Game Engine](https://github.com/MrDrElliot/LuminaEngine) - A modern, high-performance game engine built with Vulkan
+* lacecore: Python polygonal mesh library optimized for cloud computation https://github.com/lace/lacecore
+* Your project here! (Plese send PR)
 
 ### Old version(v0.9.x)
 
@@ -102,10 +103,7 @@ TinyObjLoader is successfully used in ...
   * Vertex color(as an extension: https://blender.stackexchange.com/questions/31997/how-can-i-get-vertex-painted-obj-files-to-import-into-blender)
 * Texcoord
 * Normal
-* Material
-  * Unknown material attributes are returned as key-value(value is string) map.
 * Crease tag('t'). This is OpenSubdiv specific(not in wavefront .obj specification)
-* PBR material extension for .MTL. Its proposed here: http://exocortex.com/blog/extending_wavefront_mtl_to_support_pbr
 * Callback API for custom loading.
 * Double precision support(for HPC application).
 * Smoothing group
@@ -122,12 +120,16 @@ TinyObjLoader is successfully used in ...
 * [ ] surface.
 * [ ] Free form curve/surfaces
 
+### Material
+
+* PBR material extension for .MTL. Please see [pbr-mtl.md](pbr-mtl.md) for details.
+* Texture options
+* Unknown material attributes are returned as key-value(value is string) map.
 
 ## TODO
 
 * [ ] Fix obj_sticker example.
 * [ ] More unit test codes.
-* [x] Texture options
 
 ## License
 
@@ -136,6 +138,7 @@ TinyObjLoader is licensed under MIT license.
 ### Third party licenses.
 
 * pybind11 : BSD-style license.
+* mapbox earcut.hpp: ISC License.
 
 ## Usage
 
@@ -143,7 +146,17 @@ TinyObjLoader is licensed under MIT license.
 
 One option is to simply copy the header file into your project and to make sure that `TINYOBJLOADER_IMPLEMENTATION` is defined exactly once.
 
-Tinyobjlaoder is also available as a [conan package](https://bintray.com/conan/conan-center/tinyobjloader%3A_/_latestVersion). Conan integrates with many build systems and lets you avoid manual dependency installation. Their [documentation](https://docs.conan.io/en/latest/getting_started.html) is a great starting point.
+### Building tinyobjloader - Using vcpkg(not recommended though)
+
+Although it is not a recommended way, you can download and install tinyobjloader using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
+
+    git clone https://github.com/Microsoft/vcpkg.git
+    cd vcpkg
+    ./bootstrap-vcpkg.sh
+    ./vcpkg integrate install
+    ./vcpkg install tinyobjloader
+
+The tinyobjloader port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
 
 ### Data format
 
@@ -226,10 +239,39 @@ TinyObjLoader now use `real_t` for floating point data type.
 Default is `float(32bit)`.
 You can enable `double(64bit)` precision by using `TINYOBJLOADER_USE_DOUBLE` define.
 
-#### Example code
+### High-performance float parsing (fast_float)
+
+By default, TinyObjLoader embeds [fast_float v8.0.2](https://github.com/fastfloat/fast_float)
+for ~3× faster, bit-exact ASCII-to-float conversion (equivalent to `strtod` but without locale overhead).
+
+To opt out and use the built-in hand-written parser instead, define:
+
+```c++
+#define TINYOBJLOADER_DISABLE_FAST_FLOAT
+#define TINYOBJLOADER_IMPLEMENTATION
+#include "tiny_obj_loader.h"
+```
+
+**Note:** If your project already includes `fast_float` under the `fast_float` namespace,
+defining `TINYOBJLOADER_DISABLE_FAST_FLOAT` avoids a redefinition conflict.
+
+### Robust triangulation
+
+When you enable `triangulation`(default is enabled),
+TinyObjLoader triangulate polygons(faces with 4 or more vertices).
+
+Built-in triangulation code may not work well in some polygon shape.
+
+You can define `TINYOBJLOADER_USE_MAPBOX_EARCUT` for robust triangulation using `mapbox/earcut.hpp`.
+This requires C++11 compiler though. And you need to copy `mapbox/earcut.hpp` to your project.
+If you have your own `mapbox/earcut.hpp` file incuded in your project, you can define `TINYOBJLOADER_DONOT_INCLUDE_MAPBOX_EARCUT` so that `mapbox/earcut.hpp` is not included inside of `tiny_obj_loader.h`.
+
+#### Example code (Deprecated API)
 
 ```c++
 #define TINYOBJLOADER_IMPLEMENTATION // define this in only *one* .cc
+// Optional. define TINYOBJLOADER_USE_MAPBOX_EARCUT gives robust triangulation. Requires C++11
+//#define TINYOBJLOADER_USE_MAPBOX_EARCUT
 #include "tiny_obj_loader.h"
 
 std::string inputfile = "cornell_box.obj";
@@ -259,24 +301,33 @@ for (size_t s = 0; s < shapes.size(); s++) {
   // Loop over faces(polygon)
   size_t index_offset = 0;
   for (size_t f = 0; f < shapes[s].mesh.num_face_vertices.size(); f++) {
-    int fv = shapes[s].mesh.num_face_vertices[f];
+    size_t fv = size_t(shapes[s].mesh.num_face_vertices[f]);
 
     // Loop over vertices in the face.
     for (size_t v = 0; v < fv; v++) {
       // access to vertex
       tinyobj::index_t idx = shapes[s].mesh.indices[index_offset + v];
-      tinyobj::real_t vx = attrib.vertices[3*idx.vertex_index+0];
-      tinyobj::real_t vy = attrib.vertices[3*idx.vertex_index+1];
-      tinyobj::real_t vz = attrib.vertices[3*idx.vertex_index+2];
-      tinyobj::real_t nx = attrib.normals[3*idx.normal_index+0];
-      tinyobj::real_t ny = attrib.normals[3*idx.normal_index+1];
-      tinyobj::real_t nz = attrib.normals[3*idx.normal_index+2];
-      tinyobj::real_t tx = attrib.texcoords[2*idx.texcoord_index+0];
-      tinyobj::real_t ty = attrib.texcoords[2*idx.texcoord_index+1];
+
+      tinyobj::real_t vx = attrib.vertices[3*size_t(idx.vertex_index)+0];
+      tinyobj::real_t vy = attrib.vertices[3*size_t(idx.vertex_index)+1];
+      tinyobj::real_t vz = attrib.vertices[3*size_t(idx.vertex_index)+2];
+
+      // Check if `normal_index` is zero or positive. negative = no normal data
+      if (idx.normal_index >= 0) {
+        tinyobj::real_t nx = attrib.normals[3*size_t(idx.normal_index)+0];
+        tinyobj::real_t ny = attrib.normals[3*size_t(idx.normal_index)+1];
+        tinyobj::real_t nz = attrib.normals[3*size_t(idx.normal_index)+2];
+      }
+
+      // Check if `texcoord_index` is zero or positive. negative = no texcoord data
+      if (idx.texcoord_index >= 0) {
+        tinyobj::real_t tx = attrib.texcoords[2*size_t(idx.texcoord_index)+0];
+        tinyobj::real_t ty = attrib.texcoords[2*size_t(idx.texcoord_index)+1];
+      }
       // Optional: vertex colors
-      // tinyobj::real_t red = attrib.colors[3*idx.vertex_index+0];
-      // tinyobj::real_t green = attrib.colors[3*idx.vertex_index+1];
-      // tinyobj::real_t blue = attrib.colors[3*idx.vertex_index+2];
+      // tinyobj::real_t red   = attrib.colors[3*size_t(idx.vertex_index)+0];
+      // tinyobj::real_t green = attrib.colors[3*size_t(idx.vertex_index)+1];
+      // tinyobj::real_t blue  = attrib.colors[3*size_t(idx.vertex_index)+2];
     }
     index_offset += fv;
 
@@ -287,6 +338,80 @@ for (size_t s = 0; s < shapes.size(); s++) {
 
 ```
 
+#### Example code (New Object Oriented API)
+
+```c++
+#define TINYOBJLOADER_IMPLEMENTATION // define this in only *one* .cc
+// Optional. define TINYOBJLOADER_USE_MAPBOX_EARCUT gives robust triangulation. Requires C++11
+//#define TINYOBJLOADER_USE_MAPBOX_EARCUT
+#include "tiny_obj_loader.h"
+
+
+std::string inputfile = "cornell_box.obj";
+tinyobj::ObjReaderConfig reader_config;
+reader_config.mtl_search_path = "./"; // Path to material files
+
+tinyobj::ObjReader reader;
+
+if (!reader.ParseFromFile(inputfile, reader_config)) {
+  if (!reader.Error().empty()) {
+      std::cerr << "TinyObjReader: " << reader.Error();
+  }
+  exit(1);
+}
+
+if (!reader.Warning().empty()) {
+  std::cout << "TinyObjReader: " << reader.Warning();
+}
+
+auto& attrib = reader.GetAttrib();
+auto& shapes = reader.GetShapes();
+auto& materials = reader.GetMaterials();
+
+// Loop over shapes
+for (size_t s = 0; s < shapes.size(); s++) {
+  // Loop over faces(polygon)
+  size_t index_offset = 0;
+  for (size_t f = 0; f < shapes[s].mesh.num_face_vertices.size(); f++) {
+    size_t fv = size_t(shapes[s].mesh.num_face_vertices[f]);
+
+    // Loop over vertices in the face.
+    for (size_t v = 0; v < fv; v++) {
+      // access to vertex
+      tinyobj::index_t idx = shapes[s].mesh.indices[index_offset + v];
+      tinyobj::real_t vx = attrib.vertices[3*size_t(idx.vertex_index)+0];
+      tinyobj::real_t vy = attrib.vertices[3*size_t(idx.vertex_index)+1];
+      tinyobj::real_t vz = attrib.vertices[3*size_t(idx.vertex_index)+2];
+
+      // Check if `normal_index` is zero or positive. negative = no normal data
+      if (idx.normal_index >= 0) {
+        tinyobj::real_t nx = attrib.normals[3*size_t(idx.normal_index)+0];
+        tinyobj::real_t ny = attrib.normals[3*size_t(idx.normal_index)+1];
+        tinyobj::real_t nz = attrib.normals[3*size_t(idx.normal_index)+2];
+      }
+
+      // Check if `texcoord_index` is zero or positive. negative = no texcoord data
+      if (idx.texcoord_index >= 0) {
+        tinyobj::real_t tx = attrib.texcoords[2*size_t(idx.texcoord_index)+0];
+        tinyobj::real_t ty = attrib.texcoords[2*size_t(idx.texcoord_index)+1];
+      }
+
+      // Optional: vertex colors
+      // tinyobj::real_t red   = attrib.colors[3*size_t(idx.vertex_index)+0];
+      // tinyobj::real_t green = attrib.colors[3*size_t(idx.vertex_index)+1];
+      // tinyobj::real_t blue  = attrib.colors[3*size_t(idx.vertex_index)+2];
+    }
+    index_offset += fv;
+
+    // per-face material
+    shapes[s].mesh.material_ids[f];
+  }
+}
+
+```
+
+
+
 ## Optimized loader
 
 Optimized multi-threaded .obj loader is available at `experimental/` directory.
@@ -300,6 +425,26 @@ Here is some benchmark result. Time are measured on MacBook 12(Early 2016, Core
   * baseline(v1.0.x): 6800 msecs(2.3x faster than old version)
   * optimised: 1500 msecs(10x faster than old version, 4.5x faster than baseline)
 
+## Python binding
+
+```
+$ python -m pip install tinyobjloader
+```
+
+See [python/sample.py](python/sample.py) for example use of Python binding of tinyobjloader.
+
+### CI + PyPI upload
+
+cibuildwheels + twine upload for each git tagging event is handled in Github Actions.
+
+#### How to bump version(For developer)
+
+* Bump version in CMakeLists.txt
+* Commit and push `release`. Confirm C.I. build is OK.
+* Create tag starting with `v`(e.g. `v2.1.0`)
+* `git push --tags`
+  * version settings is automatically handled in python binding through setuptools_scm.
+  * cibuildwheels + pypi upload (through twine) will be automatically triggered in Github Actions.
 
 ## Tests
 
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index 89fd1007..00000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-version: 1.0.{build}
-
-platform: x64
-
-install:
-  #######################################################################################
-  # All external dependencies are installed in C:\projects\deps
-  #######################################################################################
-  - mkdir C:\projects\deps
-
-  #######################################################################################
-  # Install Ninja
-  #######################################################################################
-  - set NINJA_URL="https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-win.zip"
-  - appveyor DownloadFile %NINJA_URL% -FileName ninja.zip
-  - 7z x ninja.zip -oC:\projects\deps\ninja > nul
-  - set PATH=C:\projects\deps\ninja;%PATH%
-  - ninja --version
-
-build_script:
-  - cd tests
-  - vcbuild.bat
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
deleted file mode 100644
index 213a90ff..00000000
--- a/azure-pipelines.yml
+++ /dev/null
@@ -1,101 +0,0 @@
-variables:
-  CIBW_BEFORE_BUILD: pip install pybind11==2.4.3
-  CIBW_SKIP: cp27-win*
-
-jobs:
-  - job: unit_linux
-    pool: { vmImage: "ubuntu-latest" }
-    steps:
-      - script: |
-          cd tests
-          make && ./tester
-        displayName: Run unit tests
-
-  - job: python_linux
-    pool: { vmImage: "Ubuntu-16.04" }
-    steps:
-      - task: UsePythonVersion@0
-      - bash: |
-          # Make the header files available to the build.
-          cp *.h python
-          python -m pip install --upgrade pip
-          pip install cibuildwheel==0.12.0
-          cd python
-          cibuildwheel --output-dir wheelhouse .
-      - task: PublishBuildArtifacts@1
-        inputs: { pathtoPublish: "python/wheelhouse" }
-      - script: |
-          pip install black==19.10b0
-          black --check python/
-        displayName: Check Python code format
-
-  - job: python_macos
-    pool: { vmImage: "macOS-10.13" }
-    variables:
-      # Support C++11: https://github.com/joerick/cibuildwheel/pull/156
-      MACOSX_DEPLOYMENT_TARGET: 10.9
-    steps:
-      - task: UsePythonVersion@0
-      - bash: |
-          # Make the header files available to the build.
-          cp *.h python
-          python -m pip install --upgrade pip
-          pip install cibuildwheel==0.12.0
-          cd python
-          cibuildwheel --output-dir wheelhouse .
-      - task: PublishBuildArtifacts@1
-        inputs: { pathtoPublish: "python/wheelhouse" }
-
-  - job: python_windows
-    pool: { vmImage: "vs2017-win2016" }
-    steps:
-      - {
-          task: UsePythonVersion@0,
-          inputs: { versionSpec: "2.7", architecture: x86 },
-        }
-      - {
-          task: UsePythonVersion@0,
-          inputs: { versionSpec: "2.7", architecture: x64 },
-        }
-      - {
-          task: UsePythonVersion@0,
-          inputs: { versionSpec: "3.5", architecture: x86 },
-        }
-      - {
-          task: UsePythonVersion@0,
-          inputs: { versionSpec: "3.5", architecture: x64 },
-        }
-      - {
-          task: UsePythonVersion@0,
-          inputs: { versionSpec: "3.6", architecture: x86 },
-        }
-      - {
-          task: UsePythonVersion@0,
-          inputs: { versionSpec: "3.6", architecture: x64 },
-        }
-      - {
-          task: UsePythonVersion@0,
-          inputs: { versionSpec: "3.7", architecture: x86 },
-        }
-      - {
-          task: UsePythonVersion@0,
-          inputs: { versionSpec: "3.7", architecture: x64 },
-        }
-      - script: choco install vcpython27 -f -y
-        displayName: Install Visual C++ for Python 2.7
-      - bash: |
-          cp *.h python
-          python -m pip install --upgrade pip
-          pip install cibuildwheel==0.12.0
-          cd python
-          cibuildwheel --output-dir wheelhouse .
-      - task: PublishBuildArtifacts@1
-        inputs: { pathtoPublish: "python/wheelhouse" }
-
-trigger:
-  - master
-
-pr:
-  branches:
-    include:
-      - "*"
diff --git a/bootstrap-cmake-linux-with-pyhthon.sh b/bootstrap-cmake-linux-with-pyhthon.sh
new file mode 100755
index 00000000..96cf4bf6
--- /dev/null
+++ b/bootstrap-cmake-linux-with-pyhthon.sh
@@ -0,0 +1,20 @@
+curdir=`pwd`
+
+builddir=${curdir}/build_python_module
+
+rm -rf ${builddir}
+mkdir ${builddir}
+
+# set path to pybind11
+# If you install pybind11 through pip, its usually installed to <site-package path>/pybind11.
+pybind11_path=`python -c "import site; print (site.getsitepackages()[0])"`
+echo ${pybind11_path}
+
+CC=clang CXX=clang++ \
+  pybind11_DIR=${pybind11_path}/pybind11 \
+  cmake \
+  -B${builddir} \
+  -DCMAKE_VERBOSE_MAKEFILE=1 \
+  -DTINYOBJLOADER_WITH_PYTHON=1 
+
+cd ${curdir}
diff --git a/cmake/ClangClCMakeCompileRules.cmake b/cmake/ClangClCMakeCompileRules.cmake
new file mode 100644
index 00000000..a3bcf1c2
--- /dev/null
+++ b/cmake/ClangClCMakeCompileRules.cmake
@@ -0,0 +1,9 @@
+# macOS paths usually start with /Users/*. Unfortunately, clang-cl interprets
+# paths starting with /U as macro undefines, so we need to put a -- before the
+# input file path to force it to be treated as a path. CMake's compilation rules
+# should be tweaked accordingly, but until that's done, and to support older
+# CMake versions, overriding compilation rules works well enough. This file will
+# be included by cmake after the default compilation rules have already been set
+# up, so we can just modify them instead of duplicating them entirely.
+string(REPLACE "-c <SOURCE>" "-c -- <SOURCE>" CMAKE_C_COMPILE_OBJECT "${CMAKE_C_COMPILE_OBJECT}")
+string(REPLACE "-c <SOURCE>" "-c -- <SOURCE>" CMAKE_CXX_COMPILE_OBJECT "${CMAKE_CXX_COMPILE_OBJECT}")
diff --git a/cmake/aarch64-linux-gnu.toolchain b/cmake/aarch64-linux-gnu.toolchain
new file mode 100644
index 00000000..cdcdaf25
--- /dev/null
+++ b/cmake/aarch64-linux-gnu.toolchain
@@ -0,0 +1,14 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
+set(CMAKE_C_COMPILER_TARGET aarch64-linux-gnu)
+
+set(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu/)
+
+# Sync with GitHub Actions config
+set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
diff --git a/cmake/clang-cl-msvc-windows.cmake b/cmake/clang-cl-msvc-windows.cmake
new file mode 100644
index 00000000..e2eac142
--- /dev/null
+++ b/cmake/clang-cl-msvc-windows.cmake
@@ -0,0 +1,327 @@
+# From llvm/cmake/platforms/WinMsvc.cmake
+# Modified to use clang-cl on native Windows.
+
+# Cross toolchain configuration for using clang-cl on non-Windows hosts to
+# target MSVC.
+#
+# Usage:
+# cmake -G Ninja
+#    -DCMAKE_TOOLCHAIN_FILE=/path/to/this/file
+#    -DHOST_ARCH=[aarch64|arm64|armv7|arm|i686|x86|x86_64|x64]
+#    -DLLVM_NATIVE_TOOLCHAIN=/path/to/llvm/installation
+#    -DMSVC_BASE=/path/to/MSVC/system/libraries/and/includes
+#    -DWINSDK_BASE=/path/to/windows-sdk
+#    -DWINSDK_VER=windows sdk version folder name
+#
+# HOST_ARCH:
+#    The architecture to build for.
+#
+# LLVM_NATIVE_TOOLCHAIN:
+#   *Absolute path* to a folder containing the toolchain which will be used to
+#   build.  At a minimum, this folder should have a bin directory with a
+#   copy of clang-cl, clang, clang++, and lld-link, as well as a lib directory
+#   containing clang's system resource directory.
+#
+# MSVC_BASE:
+#   *Absolute path* to the folder containing MSVC headers and system libraries.
+#   The layout of the folder matches that which is intalled by MSVC 2017 on
+#   Windows, and should look like this:
+#
+# ${MSVC_BASE}
+#   include
+#     vector
+#     stdint.h
+#     etc...
+#   lib
+#     x64
+#       libcmt.lib
+#       msvcrt.lib
+#       etc...
+#     x86
+#       libcmt.lib
+#       msvcrt.lib
+#       etc...
+#
+# For versions of MSVC < 2017, or where you have a hermetic toolchain in a
+# custom format, you must use symlinks or restructure it to look like the above.
+#
+# WINSDK_BASE:
+#   Together with WINSDK_VER, determines the location of Windows SDK headers
+#   and libraries.
+#
+# WINSDK_VER:
+#   Together with WINSDK_BASE, determines the locations of Windows SDK headers
+#   and libraries.
+#
+# WINSDK_BASE and WINSDK_VER work together to define a folder layout that matches
+# that of the Windows SDK installation on a standard Windows machine.  It should
+# match the layout described below.
+#
+# Note that if you install Windows SDK to a windows machine and simply copy the
+# files, it will already be in the correct layout.
+#
+# ${WINSDK_BASE}
+#   Include
+#     ${WINSDK_VER}
+#       shared
+#       ucrt
+#       um
+#         windows.h
+#         etc...
+#   Lib
+#     ${WINSDK_VER}
+#       ucrt
+#         x64
+#         x86
+#           ucrt.lib
+#           etc...
+#       um
+#         x64
+#         x86
+#           kernel32.lib
+#           etc
+#
+# IMPORTANT: In order for this to work, you will need a valid copy of the Windows
+# SDK and C++ STL headers and libraries on your host.  Additionally, since the
+# Windows libraries and headers are not case-correct, this toolchain file sets
+# up a VFS overlay for the SDK headers and case-correcting symlinks for the
+# libraries when running on a case-sensitive filesystem.
+
+
+# When configuring CMake with a toolchain file against a top-level CMakeLists.txt,
+# it will actually run CMake many times, once for each small test program used to
+# determine what features a compiler supports.  Unfortunately, none of these
+# invocations share a CMakeCache.txt with the top-level invocation, meaning they
+# won't see the value of any arguments the user passed via -D.  Since these are
+# necessary to properly configure MSVC in both the top-level configuration as well as
+# all feature-test invocations, we set environment variables with the values so that
+# these environments get inherited by child invocations. We can switch to
+# CMAKE_TRY_COMPILE_PLATFORM_VARIABLES once our minimum supported CMake version
+# is 3.6 or greater.
+function(init_user_prop prop)
+  if(${prop})
+    set(ENV{_${prop}} "${${prop}}")
+  else()
+    set(${prop} "$ENV{_${prop}}" PARENT_SCOPE)
+  endif()
+endfunction()
+
+function(generate_winsdk_vfs_overlay winsdk_include_dir output_path)
+  set(include_dirs)
+  file(GLOB_RECURSE entries LIST_DIRECTORIES true "${winsdk_include_dir}/*")
+  foreach(entry ${entries})
+    if(IS_DIRECTORY "${entry}")
+      list(APPEND include_dirs "${entry}")
+    endif()
+  endforeach()
+
+  file(WRITE "${output_path}"  "version: 0\n")
+  file(APPEND "${output_path}" "case-sensitive: false\n")
+  file(APPEND "${output_path}" "roots:\n")
+
+  foreach(dir ${include_dirs})
+    file(GLOB headers RELATIVE "${dir}" "${dir}/*.h")
+    if(NOT headers)
+      continue()
+    endif()
+
+    file(APPEND "${output_path}" "  - name: \"${dir}\"\n")
+    file(APPEND "${output_path}" "    type: directory\n")
+    file(APPEND "${output_path}" "    contents:\n")
+
+    foreach(header ${headers})
+      file(APPEND "${output_path}" "      - name: \"${header}\"\n")
+      file(APPEND "${output_path}" "        type: file\n")
+      file(APPEND "${output_path}" "        external-contents: \"${dir}/${header}\"\n")
+    endforeach()
+  endforeach()
+endfunction()
+
+function(generate_winsdk_lib_symlinks winsdk_um_lib_dir output_dir)
+  execute_process(COMMAND "${CMAKE_COMMAND}" -E make_directory "${output_dir}")
+  file(GLOB libraries RELATIVE "${winsdk_um_lib_dir}" "${winsdk_um_lib_dir}/*")
+  foreach(library ${libraries})
+    string(TOLOWER "${library}" all_lowercase_symlink_name)
+    if(NOT library STREQUAL all_lowercase_symlink_name)
+      execute_process(COMMAND "${CMAKE_COMMAND}"
+                              -E create_symlink
+                              "${winsdk_um_lib_dir}/${library}"
+                              "${output_dir}/${all_lowercase_symlink_name}")
+    endif()
+
+    get_filename_component(name_we "${library}" NAME_WE)
+    get_filename_component(ext "${library}" EXT)
+    string(TOLOWER "${ext}" lowercase_ext)
+    set(lowercase_ext_symlink_name "${name_we}${lowercase_ext}")
+    if(NOT library STREQUAL lowercase_ext_symlink_name AND
+       NOT all_lowercase_symlink_name STREQUAL lowercase_ext_symlink_name)
+      execute_process(COMMAND "${CMAKE_COMMAND}"
+                              -E create_symlink
+                              "${winsdk_um_lib_dir}/${library}"
+                              "${output_dir}/${lowercase_ext_symlink_name}")
+    endif()
+  endforeach()
+endfunction()
+
+set(CMAKE_SYSTEM_NAME Windows)
+set(CMAKE_SYSTEM_VERSION 10.0)
+set(CMAKE_SYSTEM_PROCESSOR AMD64)
+
+init_user_prop(HOST_ARCH)
+init_user_prop(LLVM_NATIVE_TOOLCHAIN)
+init_user_prop(MSVC_BASE)
+init_user_prop(WINSDK_BASE)
+init_user_prop(WINSDK_VER)
+
+if(NOT HOST_ARCH)
+  set(HOST_ARCH x86_64)
+endif()
+if(HOST_ARCH STREQUAL "aarch64" OR HOST_ARCH STREQUAL "arm64")
+  set(TRIPLE_ARCH "aarch64")
+  set(WINSDK_ARCH "arm64")
+elseif(HOST_ARCH STREQUAL "armv7" OR HOST_ARCH STREQUAL "arm")
+  set(TRIPLE_ARCH "armv7")
+  set(WINSDK_ARCH "arm")
+elseif(HOST_ARCH STREQUAL "i686" OR HOST_ARCH STREQUAL "x86")
+  set(TRIPLE_ARCH "i686")
+  set(WINSDK_ARCH "x86")
+elseif(HOST_ARCH STREQUAL "x86_64" OR HOST_ARCH STREQUAL "x64")
+  set(TRIPLE_ARCH "x86_64")
+  set(WINSDK_ARCH "x64")
+else()
+  message(SEND_ERROR "Unknown host architecture ${HOST_ARCH}. Must be aarch64 (or arm64), armv7 (or arm), i686 (or x86), or x86_64 (or x64).")
+endif()
+
+set(MSVC_INCLUDE "${MSVC_BASE}/include")
+set(ATLMFC_INCLUDE "${MSVC_BASE}/atlmfc/include")
+set(MSVC_LIB "${MSVC_BASE}/lib")
+set(ATLMFC_LIB "${MSVC_BASE}/atlmfc/lib")
+set(WINSDK_INCLUDE "${WINSDK_BASE}/Include/${WINSDK_VER}")
+set(WINSDK_LIB "${WINSDK_BASE}/Lib/${WINSDK_VER}")
+
+# Do some sanity checking to make sure we can find a native toolchain and
+# that the Windows SDK / MSVC STL directories look kosher.
+if(NOT EXISTS "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl.exe" OR
+   NOT EXISTS "${LLVM_NATIVE_TOOLCHAIN}/bin/lld-link.exe")
+  message(SEND_ERROR
+          "LLVM_NATIVE_TOOLCHAIN folder '${LLVM_NATIVE_TOOLCHAIN}' does not "
+          "point to a valid directory containing bin/clang-cl.exe and bin/lld-link.exe "
+          "binaries")
+endif()
+
+if(NOT EXISTS "${MSVC_BASE}" OR
+   NOT EXISTS "${MSVC_INCLUDE}" OR
+   NOT EXISTS "${MSVC_LIB}")
+  message(SEND_ERROR
+          "CMake variable MSVC_BASE must point to a folder containing MSVC "
+          "system headers and libraries")
+endif()
+
+if(NOT EXISTS "${WINSDK_BASE}" OR
+   NOT EXISTS "${WINSDK_INCLUDE}" OR
+   NOT EXISTS "${WINSDK_LIB}")
+  message(SEND_ERROR
+          "CMake variable WINSDK_BASE and WINSDK_VER must resolve to a valid "
+          "Windows SDK installation")
+endif()
+
+if(NOT EXISTS "${WINSDK_INCLUDE}/um/Windows.h")
+  message(SEND_ERROR "Cannot find Windows.h")
+endif()
+if(NOT EXISTS "${WINSDK_INCLUDE}/um/WINDOWS.H")
+  set(case_sensitive_filesystem TRUE)
+endif()
+
+set(CMAKE_C_COMPILER "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl.exe" CACHE FILEPATH "")
+set(CMAKE_CXX_COMPILER "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl.exe" CACHE FILEPATH "")
+set(CMAKE_LINKER "${LLVM_NATIVE_TOOLCHAIN}/bin/lld-link.exe" CACHE FILEPATH "")
+
+# Even though we're cross-compiling, we need some native tools (e.g. llvm-tblgen), and those
+# native tools have to be built before we can start doing the cross-build.  LLVM supports
+# a CROSS_TOOLCHAIN_FLAGS_NATIVE argument which consists of a list of flags to pass to CMake
+# when configuring the NATIVE portion of the cross-build.  By default we construct this so
+# that it points to the tools in the same location as the native clang-cl that we're using.
+list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_ASM_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang")
+list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_C_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang")
+list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_CXX_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang++")
+
+set(CROSS_TOOLCHAIN_FLAGS_NATIVE "${_CTF_NATIVE_DEFAULT}" CACHE STRING "")
+
+set(COMPILE_FLAGS
+    -D_CRT_SECURE_NO_WARNINGS
+    --target=${TRIPLE_ARCH}-windows-msvc
+    -fms-compatibility-version=19.11
+    -imsvc "\"${ATLMFC_INCLUDE}\""
+    -imsvc "\"${MSVC_INCLUDE}\""
+    -imsvc "\"${WINSDK_INCLUDE}/ucrt\""
+    -imsvc "\"${WINSDK_INCLUDE}/shared\""
+    -imsvc "\"${WINSDK_INCLUDE}/um\""
+    -imsvc "\"${WINSDK_INCLUDE}/winrt\"")
+
+if(case_sensitive_filesystem)
+  # Ensure all sub-configures use the top-level VFS overlay instead of generating their own.
+  init_user_prop(winsdk_vfs_overlay_path)
+  if(NOT winsdk_vfs_overlay_path)
+    set(winsdk_vfs_overlay_path "${CMAKE_BINARY_DIR}/winsdk_vfs_overlay.yaml")
+    generate_winsdk_vfs_overlay("${WINSDK_BASE}/Include/${WINSDK_VER}" "${winsdk_vfs_overlay_path}")
+    init_user_prop(winsdk_vfs_overlay_path)
+  endif()
+  list(APPEND COMPILE_FLAGS
+       -Xclang -ivfsoverlay -Xclang "${winsdk_vfs_overlay_path}")
+endif()
+
+string(REPLACE ";" " " COMPILE_FLAGS "${COMPILE_FLAGS}")
+
+# We need to preserve any flags that were passed in by the user. However, we
+# can't append to CMAKE_C_FLAGS and friends directly, because toolchain files
+# will be re-invoked on each reconfigure and therefore need to be idempotent.
+# The assignments to the _INITIAL cache variables don't use FORCE, so they'll
+# only be populated on the initial configure, and their values won't change
+# afterward.
+set(_CMAKE_C_FLAGS_INITIAL "${CMAKE_C_FLAGS}" CACHE STRING "")
+set(CMAKE_C_FLAGS "${_CMAKE_C_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE)
+
+set(_CMAKE_CXX_FLAGS_INITIAL "${CMAKE_CXX_FLAGS}" CACHE STRING "")
+set(CMAKE_CXX_FLAGS "${_CMAKE_CXX_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE)
+
+set(LINK_FLAGS
+    # Prevent CMake from attempting to invoke mt.exe. It only recognizes the slashed form and not the dashed form.
+    /manifest:no
+
+    -libpath:"${ATLMFC_LIB}/${WINSDK_ARCH}"
+    -libpath:"${MSVC_LIB}/${WINSDK_ARCH}"
+    -libpath:"${WINSDK_LIB}/ucrt/${WINSDK_ARCH}"
+    -libpath:"${WINSDK_LIB}/um/${WINSDK_ARCH}")
+
+if(case_sensitive_filesystem)
+  # Ensure all sub-configures use the top-level symlinks dir instead of generating their own.
+  init_user_prop(winsdk_lib_symlinks_dir)
+  if(NOT winsdk_lib_symlinks_dir)
+    set(winsdk_lib_symlinks_dir "${CMAKE_BINARY_DIR}/winsdk_lib_symlinks")
+    generate_winsdk_lib_symlinks("${WINSDK_BASE}/Lib/${WINSDK_VER}/um/${WINSDK_ARCH}" "${winsdk_lib_symlinks_dir}")
+    init_user_prop(winsdk_lib_symlinks_dir)
+  endif()
+  list(APPEND LINK_FLAGS
+       -libpath:"${winsdk_lib_symlinks_dir}")
+endif()
+
+string(REPLACE ";" " " LINK_FLAGS "${LINK_FLAGS}")
+
+# See explanation for compiler flags above for the _INITIAL variables.
+set(_CMAKE_EXE_LINKER_FLAGS_INITIAL "${CMAKE_EXE_LINKER_FLAGS}" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "${_CMAKE_EXE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE)
+
+set(_CMAKE_MODULE_LINKER_FLAGS_INITIAL "${CMAKE_MODULE_LINKER_FLAGS}" CACHE STRING "")
+set(CMAKE_MODULE_LINKER_FLAGS "${_CMAKE_MODULE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE)
+
+set(_CMAKE_SHARED_LINKER_FLAGS_INITIAL "${CMAKE_SHARED_LINKER_FLAGS}" CACHE STRING "")
+set(CMAKE_SHARED_LINKER_FLAGS "${_CMAKE_SHARED_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE)
+
+# CMake populates these with a bunch of unnecessary libraries, which requires
+# extra case-correcting symlinks and what not. Instead, let projects explicitly
+# control which libraries they require.
+set(CMAKE_C_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
+set(CMAKE_CXX_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
+
+# Allow clang-cl to work with macOS paths.
+set(CMAKE_USER_MAKE_RULES_OVERRIDE "${CMAKE_CURRENT_LIST_DIR}/ClangClCMakeCompileRules.cmake")
diff --git a/cmake/clang-cl-msvc-wsl.cmake b/cmake/clang-cl-msvc-wsl.cmake
new file mode 100644
index 00000000..ffe21314
--- /dev/null
+++ b/cmake/clang-cl-msvc-wsl.cmake
@@ -0,0 +1,327 @@
+# From llvm/cmake/platforms/WinMsvc.cmake
+# Modified to use clang-cl on native Windows.
+
+# Cross toolchain configuration for using clang-cl on non-Windows hosts to
+# target MSVC.
+#
+# Usage:
+# cmake -G Ninja
+#    -DCMAKE_TOOLCHAIN_FILE=/path/to/this/file
+#    -DHOST_ARCH=[aarch64|arm64|armv7|arm|i686|x86|x86_64|x64]
+#    -DLLVM_NATIVE_TOOLCHAIN=/path/to/llvm/installation
+#    -DMSVC_BASE=/path/to/MSVC/system/libraries/and/includes
+#    -DWINSDK_BASE=/path/to/windows-sdk
+#    -DWINSDK_VER=windows sdk version folder name
+#
+# HOST_ARCH:
+#    The architecture to build for.
+#
+# LLVM_NATIVE_TOOLCHAIN:
+#   *Absolute path* to a folder containing the toolchain which will be used to
+#   build.  At a minimum, this folder should have a bin directory with a
+#   copy of clang-cl, clang, clang++, and lld-link, as well as a lib directory
+#   containing clang's system resource directory.
+#
+# MSVC_BASE:
+#   *Absolute path* to the folder containing MSVC headers and system libraries.
+#   The layout of the folder matches that which is intalled by MSVC 2017 on
+#   Windows, and should look like this:
+#
+# ${MSVC_BASE}
+#   include
+#     vector
+#     stdint.h
+#     etc...
+#   lib
+#     x64
+#       libcmt.lib
+#       msvcrt.lib
+#       etc...
+#     x86
+#       libcmt.lib
+#       msvcrt.lib
+#       etc...
+#
+# For versions of MSVC < 2017, or where you have a hermetic toolchain in a
+# custom format, you must use symlinks or restructure it to look like the above.
+#
+# WINSDK_BASE:
+#   Together with WINSDK_VER, determines the location of Windows SDK headers
+#   and libraries.
+#
+# WINSDK_VER:
+#   Together with WINSDK_BASE, determines the locations of Windows SDK headers
+#   and libraries.
+#
+# WINSDK_BASE and WINSDK_VER work together to define a folder layout that matches
+# that of the Windows SDK installation on a standard Windows machine.  It should
+# match the layout described below.
+#
+# Note that if you install Windows SDK to a windows machine and simply copy the
+# files, it will already be in the correct layout.
+#
+# ${WINSDK_BASE}
+#   Include
+#     ${WINSDK_VER}
+#       shared
+#       ucrt
+#       um
+#         windows.h
+#         etc...
+#   Lib
+#     ${WINSDK_VER}
+#       ucrt
+#         x64
+#         x86
+#           ucrt.lib
+#           etc...
+#       um
+#         x64
+#         x86
+#           kernel32.lib
+#           etc
+#
+# IMPORTANT: In order for this to work, you will need a valid copy of the Windows
+# SDK and C++ STL headers and libraries on your host.  Additionally, since the
+# Windows libraries and headers are not case-correct, this toolchain file sets
+# up a VFS overlay for the SDK headers and case-correcting symlinks for the
+# libraries when running on a case-sensitive filesystem.
+
+
+# When configuring CMake with a toolchain file against a top-level CMakeLists.txt,
+# it will actually run CMake many times, once for each small test program used to
+# determine what features a compiler supports.  Unfortunately, none of these
+# invocations share a CMakeCache.txt with the top-level invocation, meaning they
+# won't see the value of any arguments the user passed via -D.  Since these are
+# necessary to properly configure MSVC in both the top-level configuration as well as
+# all feature-test invocations, we set environment variables with the values so that
+# these environments get inherited by child invocations. We can switch to
+# CMAKE_TRY_COMPILE_PLATFORM_VARIABLES once our minimum supported CMake version
+# is 3.6 or greater.
+function(init_user_prop prop)
+  if(${prop})
+    set(ENV{_${prop}} "${${prop}}")
+  else()
+    set(${prop} "$ENV{_${prop}}" PARENT_SCOPE)
+  endif()
+endfunction()
+
+function(generate_winsdk_vfs_overlay winsdk_include_dir output_path)
+  set(include_dirs)
+  file(GLOB_RECURSE entries LIST_DIRECTORIES true "${winsdk_include_dir}/*")
+  foreach(entry ${entries})
+    if(IS_DIRECTORY "${entry}")
+      list(APPEND include_dirs "${entry}")
+    endif()
+  endforeach()
+
+  file(WRITE "${output_path}"  "version: 0\n")
+  file(APPEND "${output_path}" "case-sensitive: false\n")
+  file(APPEND "${output_path}" "roots:\n")
+
+  foreach(dir ${include_dirs})
+    file(GLOB headers RELATIVE "${dir}" "${dir}/*.h")
+    if(NOT headers)
+      continue()
+    endif()
+
+    file(APPEND "${output_path}" "  - name: \"${dir}\"\n")
+    file(APPEND "${output_path}" "    type: directory\n")
+    file(APPEND "${output_path}" "    contents:\n")
+
+    foreach(header ${headers})
+      file(APPEND "${output_path}" "      - name: \"${header}\"\n")
+      file(APPEND "${output_path}" "        type: file\n")
+      file(APPEND "${output_path}" "        external-contents: \"${dir}/${header}\"\n")
+    endforeach()
+  endforeach()
+endfunction()
+
+function(generate_winsdk_lib_symlinks winsdk_um_lib_dir output_dir)
+  execute_process(COMMAND "${CMAKE_COMMAND}" -E make_directory "${output_dir}")
+  file(GLOB libraries RELATIVE "${winsdk_um_lib_dir}" "${winsdk_um_lib_dir}/*")
+  foreach(library ${libraries})
+    string(TOLOWER "${library}" all_lowercase_symlink_name)
+    if(NOT library STREQUAL all_lowercase_symlink_name)
+      execute_process(COMMAND "${CMAKE_COMMAND}"
+                              -E create_symlink
+                              "${winsdk_um_lib_dir}/${library}"
+                              "${output_dir}/${all_lowercase_symlink_name}")
+    endif()
+
+    get_filename_component(name_we "${library}" NAME_WE)
+    get_filename_component(ext "${library}" EXT)
+    string(TOLOWER "${ext}" lowercase_ext)
+    set(lowercase_ext_symlink_name "${name_we}${lowercase_ext}")
+    if(NOT library STREQUAL lowercase_ext_symlink_name AND
+       NOT all_lowercase_symlink_name STREQUAL lowercase_ext_symlink_name)
+      execute_process(COMMAND "${CMAKE_COMMAND}"
+                              -E create_symlink
+                              "${winsdk_um_lib_dir}/${library}"
+                              "${output_dir}/${lowercase_ext_symlink_name}")
+    endif()
+  endforeach()
+endfunction()
+
+set(CMAKE_SYSTEM_NAME Windows)
+set(CMAKE_SYSTEM_VERSION 10.0)
+set(CMAKE_SYSTEM_PROCESSOR AMD64)
+
+init_user_prop(HOST_ARCH)
+init_user_prop(LLVM_NATIVE_TOOLCHAIN)
+init_user_prop(MSVC_BASE)
+init_user_prop(WINSDK_BASE)
+init_user_prop(WINSDK_VER)
+
+if(NOT HOST_ARCH)
+  set(HOST_ARCH x86_64)
+endif()
+if(HOST_ARCH STREQUAL "aarch64" OR HOST_ARCH STREQUAL "arm64")
+  set(TRIPLE_ARCH "aarch64")
+  set(WINSDK_ARCH "arm64")
+elseif(HOST_ARCH STREQUAL "armv7" OR HOST_ARCH STREQUAL "arm")
+  set(TRIPLE_ARCH "armv7")
+  set(WINSDK_ARCH "arm")
+elseif(HOST_ARCH STREQUAL "i686" OR HOST_ARCH STREQUAL "x86")
+  set(TRIPLE_ARCH "i686")
+  set(WINSDK_ARCH "x86")
+elseif(HOST_ARCH STREQUAL "x86_64" OR HOST_ARCH STREQUAL "x64")
+  set(TRIPLE_ARCH "x86_64")
+  set(WINSDK_ARCH "x64")
+else()
+  message(SEND_ERROR "Unknown host architecture ${HOST_ARCH}. Must be aarch64 (or arm64), armv7 (or arm), i686 (or x86), or x86_64 (or x64).")
+endif()
+
+set(MSVC_INCLUDE "${MSVC_BASE}/include")
+set(ATLMFC_INCLUDE "${MSVC_BASE}/atlmfc/include")
+set(MSVC_LIB "${MSVC_BASE}/lib")
+set(ATLMFC_LIB "${MSVC_BASE}/atlmfc/lib")
+set(WINSDK_INCLUDE "${WINSDK_BASE}/Include/${WINSDK_VER}")
+set(WINSDK_LIB "${WINSDK_BASE}/Lib/${WINSDK_VER}")
+
+# Do some sanity checking to make sure we can find a native toolchain and
+# that the Windows SDK / MSVC STL directories look kosher.
+if(NOT EXISTS "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl" OR
+   NOT EXISTS "${LLVM_NATIVE_TOOLCHAIN}/bin/lld-link")
+  message(SEND_ERROR
+          "LLVM_NATIVE_TOOLCHAIN folder '${LLVM_NATIVE_TOOLCHAIN}' does not "
+          "point to a valid directory containing bin/clang-cl and bin/lld-link "
+          "binaries")
+endif()
+
+if(NOT EXISTS "${MSVC_BASE}" OR
+   NOT EXISTS "${MSVC_INCLUDE}" OR
+   NOT EXISTS "${MSVC_LIB}")
+  message(SEND_ERROR
+          "CMake variable MSVC_BASE must point to a folder containing MSVC "
+          "system headers and libraries")
+endif()
+
+if(NOT EXISTS "${WINSDK_BASE}" OR
+   NOT EXISTS "${WINSDK_INCLUDE}" OR
+   NOT EXISTS "${WINSDK_LIB}")
+  message(SEND_ERROR
+          "CMake variable WINSDK_BASE and WINSDK_VER must resolve to a valid "
+          "Windows SDK installation")
+endif()
+
+if(NOT EXISTS "${WINSDK_INCLUDE}/um/Windows.h")
+  message(SEND_ERROR "Cannot find Windows.h")
+endif()
+if(NOT EXISTS "${WINSDK_INCLUDE}/um/WINDOWS.H")
+  set(case_sensitive_filesystem TRUE)
+endif()
+
+set(CMAKE_C_COMPILER "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl" CACHE FILEPATH "")
+set(CMAKE_CXX_COMPILER "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl" CACHE FILEPATH "")
+set(CMAKE_LINKER "${LLVM_NATIVE_TOOLCHAIN}/bin/lld-link" CACHE FILEPATH "")
+
+# Even though we're cross-compiling, we need some native tools (e.g. llvm-tblgen), and those
+# native tools have to be built before we can start doing the cross-build.  LLVM supports
+# a CROSS_TOOLCHAIN_FLAGS_NATIVE argument which consists of a list of flags to pass to CMake
+# when configuring the NATIVE portion of the cross-build.  By default we construct this so
+# that it points to the tools in the same location as the native clang-cl that we're using.
+list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_ASM_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang")
+list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_C_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang")
+list(APPEND _CTF_NATIVE_DEFAULT "-DCMAKE_CXX_COMPILER=${LLVM_NATIVE_TOOLCHAIN}/bin/clang++")
+
+set(CROSS_TOOLCHAIN_FLAGS_NATIVE "${_CTF_NATIVE_DEFAULT}" CACHE STRING "")
+
+set(COMPILE_FLAGS
+    -D_CRT_SECURE_NO_WARNINGS
+    --target=${TRIPLE_ARCH}-windows-msvc
+    -fms-compatibility-version=19.11
+    -imsvc "\"${ATLMFC_INCLUDE}\""
+    -imsvc "\"${MSVC_INCLUDE}\""
+    -imsvc "\"${WINSDK_INCLUDE}/ucrt\""
+    -imsvc "\"${WINSDK_INCLUDE}/shared\""
+    -imsvc "\"${WINSDK_INCLUDE}/um\""
+    -imsvc "\"${WINSDK_INCLUDE}/winrt\"")
+
+if(case_sensitive_filesystem)
+  # Ensure all sub-configures use the top-level VFS overlay instead of generating their own.
+  init_user_prop(winsdk_vfs_overlay_path)
+  if(NOT winsdk_vfs_overlay_path)
+    set(winsdk_vfs_overlay_path "${CMAKE_BINARY_DIR}/winsdk_vfs_overlay.yaml")
+    generate_winsdk_vfs_overlay("${WINSDK_BASE}/Include/${WINSDK_VER}" "${winsdk_vfs_overlay_path}")
+    init_user_prop(winsdk_vfs_overlay_path)
+  endif()
+  list(APPEND COMPILE_FLAGS
+       -Xclang -ivfsoverlay -Xclang "${winsdk_vfs_overlay_path}")
+endif()
+
+string(REPLACE ";" " " COMPILE_FLAGS "${COMPILE_FLAGS}")
+
+# We need to preserve any flags that were passed in by the user. However, we
+# can't append to CMAKE_C_FLAGS and friends directly, because toolchain files
+# will be re-invoked on each reconfigure and therefore need to be idempotent.
+# The assignments to the _INITIAL cache variables don't use FORCE, so they'll
+# only be populated on the initial configure, and their values won't change
+# afterward.
+set(_CMAKE_C_FLAGS_INITIAL "${CMAKE_C_FLAGS}" CACHE STRING "")
+set(CMAKE_C_FLAGS "${_CMAKE_C_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE)
+
+set(_CMAKE_CXX_FLAGS_INITIAL "${CMAKE_CXX_FLAGS}" CACHE STRING "")
+set(CMAKE_CXX_FLAGS "${_CMAKE_CXX_FLAGS_INITIAL} ${COMPILE_FLAGS}" CACHE STRING "" FORCE)
+
+set(LINK_FLAGS
+    # Prevent CMake from attempting to invoke mt.exe. It only recognizes the slashed form and not the dashed form.
+    /manifest:no
+
+    -libpath:"${ATLMFC_LIB}/${WINSDK_ARCH}"
+    -libpath:"${MSVC_LIB}/${WINSDK_ARCH}"
+    -libpath:"${WINSDK_LIB}/ucrt/${WINSDK_ARCH}"
+    -libpath:"${WINSDK_LIB}/um/${WINSDK_ARCH}")
+
+if(case_sensitive_filesystem)
+  # Ensure all sub-configures use the top-level symlinks dir instead of generating their own.
+  init_user_prop(winsdk_lib_symlinks_dir)
+  if(NOT winsdk_lib_symlinks_dir)
+    set(winsdk_lib_symlinks_dir "${CMAKE_BINARY_DIR}/winsdk_lib_symlinks")
+    generate_winsdk_lib_symlinks("${WINSDK_BASE}/Lib/${WINSDK_VER}/um/${WINSDK_ARCH}" "${winsdk_lib_symlinks_dir}")
+    init_user_prop(winsdk_lib_symlinks_dir)
+  endif()
+  list(APPEND LINK_FLAGS
+       -libpath:"${winsdk_lib_symlinks_dir}")
+endif()
+
+string(REPLACE ";" " " LINK_FLAGS "${LINK_FLAGS}")
+
+# See explanation for compiler flags above for the _INITIAL variables.
+set(_CMAKE_EXE_LINKER_FLAGS_INITIAL "${CMAKE_EXE_LINKER_FLAGS}" CACHE STRING "")
+set(CMAKE_EXE_LINKER_FLAGS "${_CMAKE_EXE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE)
+
+set(_CMAKE_MODULE_LINKER_FLAGS_INITIAL "${CMAKE_MODULE_LINKER_FLAGS}" CACHE STRING "")
+set(CMAKE_MODULE_LINKER_FLAGS "${_CMAKE_MODULE_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE)
+
+set(_CMAKE_SHARED_LINKER_FLAGS_INITIAL "${CMAKE_SHARED_LINKER_FLAGS}" CACHE STRING "")
+set(CMAKE_SHARED_LINKER_FLAGS "${_CMAKE_SHARED_LINKER_FLAGS_INITIAL} ${LINK_FLAGS}" CACHE STRING "" FORCE)
+
+# CMake populates these with a bunch of unnecessary libraries, which requires
+# extra case-correcting symlinks and what not. Instead, let projects explicitly
+# control which libraries they require.
+set(CMAKE_C_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
+set(CMAKE_CXX_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
+
+# Allow clang-cl to work with macOS paths.
+set(CMAKE_USER_MAKE_RULES_OVERRIDE "${CMAKE_CURRENT_LIST_DIR}/ClangClCMakeCompileRules.cmake")
diff --git a/cmake/linux_i386.toolchain.cmake b/cmake/linux_i386.toolchain.cmake
new file mode 100644
index 00000000..9c4a5094
--- /dev/null
+++ b/cmake/linux_i386.toolchain.cmake
@@ -0,0 +1,17 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR "i386")
+set(CMAKE_C_COMPILER_TARGET i386-linux-gnu)
+
+# Assume debian/ubuntu
+#set(CMAKE_FIND_ROOT_PATH /usr/lib/i386-linux-gnu/)
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+# https://stackoverflow.com/questions/41557927/using-usr-lib-i386-linux-gnu-instead-of-usr-lib-x86-64-linux-gnu-to-find-libra
+set(FIND_LIBRARY_USE_LIB64_PATHS OFF)
diff --git a/cmake/llvm-mingw-cross.cmake b/cmake/llvm-mingw-cross.cmake
new file mode 100644
index 00000000..f7e1759c
--- /dev/null
+++ b/cmake/llvm-mingw-cross.cmake
@@ -0,0 +1,24 @@
+SET(CMAKE_SYSTEM_NAME Windows)
+
+IF (DEFINED ENV{LLVM_MINGW_DIR})
+  SET(LLVM_MINGW_ROOT "$ENV{LLVM_MINGW_DIR}")
+ELSE ()
+  SET(LLVM_MINGW_ROOT "/mnt/data/local/llvm-mingw-20200325-ubuntu-18.04")
+ENDIF()
+
+
+SET(CMAKE_C_COMPILER ${LLVM_MINGW_ROOT}/bin/x86_64-w64-mingw32-clang)
+SET(CMAKE_CXX_COMPILER ${LLVM_MINGW_ROOT}/bin/x86_64-w64-mingw32-clang++)
+SET(CMAKE_RC_COMPILER ${LLVM_MINGW_ROOT}/bin/x86_64-w64-mingw32-windres)
+
+#SET(CMAKE_C_LINK_EXECUTABLE x86_64-w64-mingw32-gcc)
+#SET(CMAKE_CXX_LINK_EXECUTABLE x86_64-w64-mingw32-g++)
+
+SET(CMAKE_FIND_ROOT_PATH ${LLVM_MINGW_ROOT}/x86_64-w64-mingw32)
+
+# We may need some advanced thread APIs to compile, so enable 0x601(Win7) if required.
+# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_WIN32_WINNT=0x601")
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/cmake/llvm-mingw-win64.cmake b/cmake/llvm-mingw-win64.cmake
new file mode 100644
index 00000000..439b5240
--- /dev/null
+++ b/cmake/llvm-mingw-win64.cmake
@@ -0,0 +1,20 @@
+SET(CMAKE_SYSTEM_NAME Windows)
+
+IF (DEFINED ENV{LLVM_MINGW_DIR})
+  SET(LLVM_MINGW_ROOT "$ENV{LLVM_MINGW_DIR}")
+ELSE ()
+  SET(LLVM_MINGW_ROOT "C:/ProgramData/llvm-mingw")
+ENDIF()
+
+SET(CMAKE_C_COMPILER ${LLVM_MINGW_ROOT}/bin/x86_64-w64-mingw32-clang.exe)
+SET(CMAKE_CXX_COMPILER ${LLVM_MINGW_ROOT}/bin/x86_64-w64-mingw32-clang++.exe)
+SET(CMAKE_RC_COMPILER ${LLVM_MINGW_ROOT}/bin/x86_64-w64-mingw32-windres.exe)
+
+SET(CMAKE_FIND_ROOT_PATH ${LLVM_MINGW_ROOT}/x86_64-w64-mingw32)
+
+# We may need some advanced thread APIs to compile tinyusz. use 0x601(Win7) if required
+# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_WIN32_WINNT=0x601")
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/cmake/mingw64-cross.cmake b/cmake/mingw64-cross.cmake
new file mode 100644
index 00000000..479cdd4d
--- /dev/null
+++ b/cmake/mingw64-cross.cmake
@@ -0,0 +1,20 @@
+SET(CMAKE_SYSTEM_NAME Windows)
+
+IF (DEFINED ENV{MINGW_GCC_DIR})
+  SET(MINGW_GCC_ROOT "$ENV{MINGW_GCC_DIR}")
+ELSE ()
+  # Assume mingw cross compiler is installed in your system
+  SET(MINGW_GCC_ROOT "/usr")
+ENDIF()
+
+# win32 may fail to compile with C++11 threads.
+
+SET(CMAKE_C_COMPILER ${MINGW_GCC_ROOT}/bin/x86_64-w64-mingw32-gcc-posix)
+SET(CMAKE_CXX_COMPILER ${MINGW_GCC_ROOT}/bin/x86_64-w64-mingw32-g++-posix)
+SET(CMAKE_RC_COMPILER ${MINGW_GCC_ROOT}/bin/x86_64-w64-mingw32-windres)
+
+SET(CMAKE_FIND_ROOT_PATH ${MINGW_GCC_ROOT}/x86_64-w64-mingw32)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
diff --git a/cmake/sanitizers/FindASan.cmake b/cmake/sanitizers/FindASan.cmake
new file mode 100644
index 00000000..98ea7cb3
--- /dev/null
+++ b/cmake/sanitizers/FindASan.cmake
@@ -0,0 +1,59 @@
+# The MIT License (MIT)
+#
+# Copyright (c)
+#   2013 Matthew Arsenault
+#   2015-2016 RWTH Aachen University, Federal Republic of Germany
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+option(SANITIZE_ADDRESS "Enable AddressSanitizer for sanitized targets." Off)
+
+set(FLAG_CANDIDATES
+    # Clang 3.2+ use this version. The no-omit-frame-pointer option is optional.
+    "-g -fsanitize=address -fno-omit-frame-pointer"
+    "-g -fsanitize=address"
+
+    # Older deprecated flag for ASan
+    "-g -faddress-sanitizer"
+)
+
+
+if (SANITIZE_ADDRESS AND (SANITIZE_THREAD OR SANITIZE_MEMORY))
+    message(FATAL_ERROR "AddressSanitizer is not compatible with "
+        "ThreadSanitizer or MemorySanitizer.")
+endif ()
+
+
+include(sanitize-helpers)
+
+if (SANITIZE_ADDRESS)
+    sanitizer_check_compiler_flags("${FLAG_CANDIDATES}" "AddressSanitizer"
+        "ASan")
+
+    find_program(ASan_WRAPPER "asan-wrapper" PATHS ${CMAKE_MODULE_PATH})
+	mark_as_advanced(ASan_WRAPPER)
+endif ()
+
+function (add_sanitize_address TARGET)
+    if (NOT SANITIZE_ADDRESS)
+        return()
+    endif ()
+
+    sanitizer_add_flags(${TARGET} "AddressSanitizer" "ASan")
+endfunction ()
diff --git a/cmake/sanitizers/FindMSan.cmake b/cmake/sanitizers/FindMSan.cmake
new file mode 100644
index 00000000..22d0050e
--- /dev/null
+++ b/cmake/sanitizers/FindMSan.cmake
@@ -0,0 +1,57 @@
+# The MIT License (MIT)
+#
+# Copyright (c)
+#   2013 Matthew Arsenault
+#   2015-2016 RWTH Aachen University, Federal Republic of Germany
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+option(SANITIZE_MEMORY "Enable MemorySanitizer for sanitized targets." Off)
+
+set(FLAG_CANDIDATES
+    "-g -fsanitize=memory"
+)
+
+
+include(sanitize-helpers)
+
+if (SANITIZE_MEMORY)
+    if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+        message(WARNING "MemorySanitizer disabled for target ${TARGET} because "
+            "MemorySanitizer is supported for Linux systems only.")
+        set(SANITIZE_MEMORY Off CACHE BOOL
+            "Enable MemorySanitizer for sanitized targets." FORCE)
+    elseif (NOT ${CMAKE_SIZEOF_VOID_P} EQUAL 8)
+        message(WARNING "MemorySanitizer disabled for target ${TARGET} because "
+            "MemorySanitizer is supported for 64bit systems only.")
+        set(SANITIZE_MEMORY Off CACHE BOOL
+            "Enable MemorySanitizer for sanitized targets." FORCE)
+    else ()
+        sanitizer_check_compiler_flags("${FLAG_CANDIDATES}" "MemorySanitizer"
+            "MSan")
+    endif ()
+endif ()
+
+function (add_sanitize_memory TARGET)
+    if (NOT SANITIZE_MEMORY)
+        return()
+    endif ()
+
+    sanitizer_add_flags(${TARGET} "MemorySanitizer" "MSan")
+endfunction ()
diff --git a/cmake/sanitizers/FindSanitizers.cmake b/cmake/sanitizers/FindSanitizers.cmake
new file mode 100755
index 00000000..101bab84
--- /dev/null
+++ b/cmake/sanitizers/FindSanitizers.cmake
@@ -0,0 +1,94 @@
+# The MIT License (MIT)
+#
+# Copyright (c)
+#   2013 Matthew Arsenault
+#   2015-2016 RWTH Aachen University, Federal Republic of Germany
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# If any of the used compiler is a GNU compiler, add a second option to static
+# link against the sanitizers.
+option(SANITIZE_LINK_STATIC "Try to link static against sanitizers." Off)
+
+
+
+
+set(FIND_QUIETLY_FLAG "")
+if (DEFINED Sanitizers_FIND_QUIETLY)
+    set(FIND_QUIETLY_FLAG "QUIET")
+endif ()
+
+find_package(ASan ${FIND_QUIETLY_FLAG})
+find_package(TSan ${FIND_QUIETLY_FLAG})
+find_package(MSan ${FIND_QUIETLY_FLAG})
+find_package(UBSan ${FIND_QUIETLY_FLAG})
+
+
+
+
+function(sanitizer_add_blacklist_file FILE)
+    if(NOT IS_ABSOLUTE ${FILE})
+        set(FILE "${CMAKE_CURRENT_SOURCE_DIR}/${FILE}")
+    endif()
+    get_filename_component(FILE "${FILE}" REALPATH)
+
+    sanitizer_check_compiler_flags("-fsanitize-blacklist=${FILE}"
+        "SanitizerBlacklist" "SanBlist")
+endfunction()
+
+function(add_sanitizers ...)
+    # If no sanitizer is enabled, return immediately.
+    if (NOT (SANITIZE_ADDRESS OR SANITIZE_MEMORY OR SANITIZE_THREAD OR
+        SANITIZE_UNDEFINED))
+        return()
+    endif ()
+
+    foreach (TARGET ${ARGV})
+        # Check if this target will be compiled by exactly one compiler. Other-
+        # wise sanitizers can't be used and a warning should be printed once.
+        get_target_property(TARGET_TYPE ${TARGET} TYPE)
+        if (TARGET_TYPE STREQUAL "INTERFACE_LIBRARY")
+            message(WARNING "Can't use any sanitizers for target ${TARGET}, "
+                    "because it is an interface library and cannot be "
+                    "compiled directly.")
+            return()
+        endif ()
+        sanitizer_target_compilers(${TARGET} TARGET_COMPILER)
+        list(LENGTH TARGET_COMPILER NUM_COMPILERS)
+        if (NUM_COMPILERS GREATER 1)
+            message(WARNING "Can't use any sanitizers for target ${TARGET}, "
+                    "because it will be compiled by incompatible compilers. "
+                    "Target will be compiled without sanitizers.")
+            return()
+
+        # If the target is compiled by no or no known compiler, give a warning.
+        elseif (NUM_COMPILERS EQUAL 0)
+            message(WARNING "Sanitizers for target ${TARGET} may not be"
+                    " usable, because it uses no or an unknown compiler. "
+                    "This is a false warning for targets using only "
+		    "object lib(s) as input.")
+        endif ()
+
+        # Add sanitizers for target.
+        add_sanitize_address(${TARGET})
+        add_sanitize_thread(${TARGET})
+        add_sanitize_memory(${TARGET})
+        add_sanitize_undefined(${TARGET})
+	endforeach ()
+endfunction(add_sanitizers)
diff --git a/cmake/sanitizers/FindTSan.cmake b/cmake/sanitizers/FindTSan.cmake
new file mode 100644
index 00000000..3cba3c03
--- /dev/null
+++ b/cmake/sanitizers/FindTSan.cmake
@@ -0,0 +1,65 @@
+# The MIT License (MIT)
+#
+# Copyright (c)
+#   2013 Matthew Arsenault
+#   2015-2016 RWTH Aachen University, Federal Republic of Germany
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+option(SANITIZE_THREAD "Enable ThreadSanitizer for sanitized targets." Off)
+
+set(FLAG_CANDIDATES
+    "-g -fsanitize=thread"
+)
+
+
+# ThreadSanitizer is not compatible with MemorySanitizer.
+if (SANITIZE_THREAD AND SANITIZE_MEMORY)
+    message(FATAL_ERROR "ThreadSanitizer is not compatible with "
+        "MemorySanitizer.")
+endif ()
+
+
+include(sanitize-helpers)
+
+if (SANITIZE_THREAD)
+  if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND
+      NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
+        message(WARNING "ThreadSanitizer disabled for target ${TARGET} because "
+          "ThreadSanitizer is supported for Linux systems and macOS only.")
+        set(SANITIZE_THREAD Off CACHE BOOL
+            "Enable ThreadSanitizer for sanitized targets." FORCE)
+    elseif (NOT ${CMAKE_SIZEOF_VOID_P} EQUAL 8)
+        message(WARNING "ThreadSanitizer disabled for target ${TARGET} because "
+            "ThreadSanitizer is supported for 64bit systems only.")
+        set(SANITIZE_THREAD Off CACHE BOOL
+            "Enable ThreadSanitizer for sanitized targets." FORCE)
+    else ()
+        sanitizer_check_compiler_flags("${FLAG_CANDIDATES}" "ThreadSanitizer"
+            "TSan")
+    endif ()
+endif ()
+
+function (add_sanitize_thread TARGET)
+    if (NOT SANITIZE_THREAD)
+        return()
+    endif ()
+
+    sanitizer_add_flags(${TARGET} "ThreadSanitizer" "TSan")
+endfunction ()
diff --git a/cmake/sanitizers/FindUBSan.cmake b/cmake/sanitizers/FindUBSan.cmake
new file mode 100644
index 00000000..ae103f71
--- /dev/null
+++ b/cmake/sanitizers/FindUBSan.cmake
@@ -0,0 +1,46 @@
+# The MIT License (MIT)
+#
+# Copyright (c)
+#   2013 Matthew Arsenault
+#   2015-2016 RWTH Aachen University, Federal Republic of Germany
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+option(SANITIZE_UNDEFINED
+    "Enable UndefinedBehaviorSanitizer for sanitized targets." Off)
+
+set(FLAG_CANDIDATES
+    "-g -fsanitize=undefined"
+)
+
+
+include(sanitize-helpers)
+
+if (SANITIZE_UNDEFINED)
+    sanitizer_check_compiler_flags("${FLAG_CANDIDATES}"
+        "UndefinedBehaviorSanitizer" "UBSan")
+endif ()
+
+function (add_sanitize_undefined TARGET)
+    if (NOT SANITIZE_UNDEFINED)
+        return()
+    endif ()
+
+    sanitizer_add_flags(${TARGET} "UndefinedBehaviorSanitizer" "UBSan")
+endfunction ()
diff --git a/cmake/sanitizers/asan-wrapper b/cmake/sanitizers/asan-wrapper
new file mode 100755
index 00000000..5d541033
--- /dev/null
+++ b/cmake/sanitizers/asan-wrapper
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+# The MIT License (MIT)
+#
+# Copyright (c)
+#   2013 Matthew Arsenault
+#   2015-2016 RWTH Aachen University, Federal Republic of Germany
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# This script is a wrapper for AddressSanitizer. In some special cases you need
+# to preload AddressSanitizer to avoid error messages - e.g. if you're
+# preloading another library to your application. At the moment this script will
+# only do something, if we're running on a Linux platform. OSX might not be
+# affected.
+
+
+# Exit immediately, if platform is not Linux.
+if [ "$(uname)" != "Linux" ]
+then
+    exec $@
+fi
+
+
+# Get the used libasan of the application ($1). If a libasan was found, it will
+# be prepended to LD_PRELOAD.
+libasan=$(ldd $1 | grep libasan | sed "s/^[[:space:]]//" | cut -d' ' -f1)
+if [ -n "$libasan" ]
+then
+    if [ -n "$LD_PRELOAD" ]
+    then
+        export LD_PRELOAD="$libasan:$LD_PRELOAD"
+    else
+        export LD_PRELOAD="$libasan"
+    fi
+fi
+
+# Execute the application.
+exec $@
diff --git a/cmake/sanitizers/sanitize-helpers.cmake b/cmake/sanitizers/sanitize-helpers.cmake
new file mode 100755
index 00000000..3649b074
--- /dev/null
+++ b/cmake/sanitizers/sanitize-helpers.cmake
@@ -0,0 +1,177 @@
+# The MIT License (MIT)
+#
+# Copyright (c)
+#   2013 Matthew Arsenault
+#   2015-2016 RWTH Aachen University, Federal Republic of Germany
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# Helper function to get the language of a source file.
+function (sanitizer_lang_of_source FILE RETURN_VAR)
+    get_filename_component(LONGEST_EXT "${FILE}" EXT)
+    # If extension is empty return. This can happen for extensionless headers
+    if("${LONGEST_EXT}" STREQUAL "")
+       set(${RETURN_VAR} "" PARENT_SCOPE)
+       return()
+    endif()
+    # Get shortest extension as some files can have dot in their names
+    string(REGEX REPLACE "^.*(\\.[^.]+)$" "\\1" FILE_EXT ${LONGEST_EXT})
+    string(TOLOWER "${FILE_EXT}" FILE_EXT)
+    string(SUBSTRING "${FILE_EXT}" 1 -1 FILE_EXT)
+
+    get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
+    foreach (LANG ${ENABLED_LANGUAGES})
+        list(FIND CMAKE_${LANG}_SOURCE_FILE_EXTENSIONS "${FILE_EXT}" TEMP)
+        if (NOT ${TEMP} EQUAL -1)
+            set(${RETURN_VAR} "${LANG}" PARENT_SCOPE)
+            return()
+        endif ()
+    endforeach()
+
+    set(${RETURN_VAR} "" PARENT_SCOPE)
+endfunction ()
+
+
+# Helper function to get compilers used by a target.
+function (sanitizer_target_compilers TARGET RETURN_VAR)
+    # Check if all sources for target use the same compiler. If a target uses
+    # e.g. C and Fortran mixed and uses different compilers (e.g. clang and
+    # gfortran) this can trigger huge problems, because different compilers may
+    # use different implementations for sanitizers.
+    set(BUFFER "")
+    get_target_property(TSOURCES ${TARGET} SOURCES)
+    foreach (FILE ${TSOURCES})
+        # If expression was found, FILE is a generator-expression for an object
+        # library. Object libraries will be ignored.
+        string(REGEX MATCH "TARGET_OBJECTS:([^ >]+)" _file ${FILE})
+        if ("${_file}" STREQUAL "")
+            sanitizer_lang_of_source(${FILE} LANG)
+            if (LANG)
+                list(APPEND BUFFER ${CMAKE_${LANG}_COMPILER_ID})
+            endif ()
+        endif ()
+    endforeach ()
+
+    list(REMOVE_DUPLICATES BUFFER)
+    set(${RETURN_VAR} "${BUFFER}" PARENT_SCOPE)
+endfunction ()
+
+
+# Helper function to check compiler flags for language compiler.
+function (sanitizer_check_compiler_flag FLAG LANG VARIABLE)
+    if (${LANG} STREQUAL "C")
+        include(CheckCCompilerFlag)
+        check_c_compiler_flag("${FLAG}" ${VARIABLE})
+
+    elseif (${LANG} STREQUAL "CXX")
+        include(CheckCXXCompilerFlag)
+        check_cxx_compiler_flag("${FLAG}" ${VARIABLE})
+
+    elseif (${LANG} STREQUAL "Fortran")
+        # CheckFortranCompilerFlag was introduced in CMake 3.x. To be compatible
+        # with older Cmake versions, we will check if this module is present
+        # before we use it. Otherwise we will define Fortran coverage support as
+        # not available.
+        include(CheckFortranCompilerFlag OPTIONAL RESULT_VARIABLE INCLUDED)
+        if (INCLUDED)
+            check_fortran_compiler_flag("${FLAG}" ${VARIABLE})
+        elseif (NOT CMAKE_REQUIRED_QUIET)
+            message(STATUS "Performing Test ${VARIABLE}")
+            message(STATUS "Performing Test ${VARIABLE}"
+                " - Failed (Check not supported)")
+        endif ()
+    endif()
+endfunction ()
+
+
+# Helper function to test compiler flags.
+function (sanitizer_check_compiler_flags FLAG_CANDIDATES NAME PREFIX)
+    set(CMAKE_REQUIRED_QUIET ${${PREFIX}_FIND_QUIETLY})
+
+    get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
+    foreach (LANG ${ENABLED_LANGUAGES})
+        # Sanitizer flags are not dependend on language, but the used compiler.
+        # So instead of searching flags foreach language, search flags foreach
+        # compiler used.
+        set(COMPILER ${CMAKE_${LANG}_COMPILER_ID})
+        if (NOT DEFINED ${PREFIX}_${COMPILER}_FLAGS)
+            foreach (FLAG ${FLAG_CANDIDATES})
+                if(NOT CMAKE_REQUIRED_QUIET)
+                    message(STATUS "Try ${COMPILER} ${NAME} flag = [${FLAG}]")
+                endif()
+
+                set(CMAKE_REQUIRED_FLAGS "${FLAG}")
+                unset(${PREFIX}_FLAG_DETECTED CACHE)
+                sanitizer_check_compiler_flag("${FLAG}" ${LANG}
+                    ${PREFIX}_FLAG_DETECTED)
+
+                if (${PREFIX}_FLAG_DETECTED)
+                    # If compiler is a GNU compiler, search for static flag, if
+                    # SANITIZE_LINK_STATIC is enabled.
+                    if (SANITIZE_LINK_STATIC AND (${COMPILER} STREQUAL "GNU"))
+                        string(TOLOWER ${PREFIX} PREFIX_lower)
+                        sanitizer_check_compiler_flag(
+                            "-static-lib${PREFIX_lower}" ${LANG}
+                            ${PREFIX}_STATIC_FLAG_DETECTED)
+
+                        if (${PREFIX}_STATIC_FLAG_DETECTED)
+                            set(FLAG "-static-lib${PREFIX_lower} ${FLAG}")
+                        endif ()
+                    endif ()
+
+                    set(${PREFIX}_${COMPILER}_FLAGS "${FLAG}" CACHE STRING
+                        "${NAME} flags for ${COMPILER} compiler.")
+                    mark_as_advanced(${PREFIX}_${COMPILER}_FLAGS)
+                    break()
+                endif ()
+            endforeach ()
+
+            if (NOT ${PREFIX}_FLAG_DETECTED)
+                set(${PREFIX}_${COMPILER}_FLAGS "" CACHE STRING
+                    "${NAME} flags for ${COMPILER} compiler.")
+                mark_as_advanced(${PREFIX}_${COMPILER}_FLAGS)
+
+                message(WARNING "${NAME} is not available for ${COMPILER} "
+                        "compiler. Targets using this compiler will be "
+                        "compiled without ${NAME}.")
+            endif ()
+        endif ()
+    endforeach ()
+endfunction ()
+
+
+# Helper to assign sanitizer flags for TARGET.
+function (sanitizer_add_flags TARGET NAME PREFIX)
+    # Get list of compilers used by target and check, if sanitizer is available
+    # for this target. Other compiler checks like check for conflicting
+    # compilers will be done in add_sanitizers function.
+    sanitizer_target_compilers(${TARGET} TARGET_COMPILER)
+    list(LENGTH TARGET_COMPILER NUM_COMPILERS)
+    if ("${${PREFIX}_${TARGET_COMPILER}_FLAGS}" STREQUAL "")
+        return()
+    endif()
+
+    # Set compile- and link-flags for target.
+    set_property(TARGET ${TARGET} APPEND_STRING
+        PROPERTY COMPILE_FLAGS " ${${PREFIX}_${TARGET_COMPILER}_FLAGS}")
+    set_property(TARGET ${TARGET} APPEND_STRING
+        PROPERTY COMPILE_FLAGS " ${SanBlist_${TARGET_COMPILER}_FLAGS}")
+    set_property(TARGET ${TARGET} APPEND_STRING
+        PROPERTY LINK_FLAGS " ${${PREFIX}_${TARGET_COMPILER}_FLAGS}")
+endfunction ()
diff --git a/examples/obj_sticher/obj_writer.cc b/examples/obj_sticher/obj_writer.cc
index 9ea8d7c4..31a2c895 100644
--- a/examples/obj_sticher/obj_writer.cc
+++ b/examples/obj_sticher/obj_writer.cc
@@ -26,7 +26,7 @@ bool WriteMat(const std::string& filename, const std::vector<tinyobj::material_t
     fprintf(fp, "Ka %f %f %f\n", mat.ambient[0], mat.ambient[1], mat.ambient[2]);
     fprintf(fp, "Kd %f %f %f\n", mat.diffuse[0], mat.diffuse[1], mat.diffuse[2]);
     fprintf(fp, "Ks %f %f %f\n", mat.specular[0], mat.specular[1], mat.specular[2]);
-    fprintf(fp, "Kt %f %f %f\n", mat.transmittance[0], mat.specular[1], mat.specular[2]);
+    fprintf(fp, "Kt %f %f %f\n", mat.transmittance[0], mat.transmittance[1], mat.transmittance[2]);
     fprintf(fp, "Ke %f %f %f\n", mat.emission[0], mat.emission[1], mat.emission[2]);
     fprintf(fp, "Ns %f\n", mat.shininess);
     fprintf(fp, "Ni %f\n", mat.ior);
diff --git a/examples/skin_weight/Makefile b/examples/skin_weight/Makefile
new file mode 100644
index 00000000..59e4e3c3
--- /dev/null
+++ b/examples/skin_weight/Makefile
@@ -0,0 +1,2 @@
+all:
+	clang++ -std=c++11 -o skin_weight -I../../ -g main.cc
diff --git a/examples/skin_weight/README.md b/examples/skin_weight/README.md
new file mode 100644
index 00000000..800d8472
--- /dev/null
+++ b/examples/skin_weight/README.md
@@ -0,0 +1,7 @@
+This example printf skin weight of vertex(`vw`). TinyObjLoader extension.
+
+## Run example
+
+```
+$ ./skin_weight ../../models/skin-weight.obj
+```
diff --git a/examples/skin_weight/main.cc b/examples/skin_weight/main.cc
new file mode 100644
index 00000000..a3770afe
--- /dev/null
+++ b/examples/skin_weight/main.cc
@@ -0,0 +1,103 @@
+//
+// g++ -g -std=c++11 main.cc
+//
+#define TINYOBJLOADER_IMPLEMENTATION
+#include "tiny_obj_loader.h"
+
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include <unordered_map> // C++11
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#if __has_warning("-Wzero-as-null-pointer-constant")
+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#endif
+
+static void ConstructVertexWeight(
+  const std::vector<tinyobj::real_t> &vertices,
+  const std::vector<tinyobj::skin_weight_t> &skin_weights,
+  std::vector<tinyobj::skin_weight_t> *vertex_skin_weights)
+{
+  size_t num_vertices = vertices.size() / 3;
+
+  vertex_skin_weights->resize(num_vertices);
+
+  for (size_t i = 0; i < skin_weights.size(); i++) {
+    const tinyobj::skin_weight_t &skin = skin_weights[i];
+
+    assert(skin.vertex_id >= 0);
+    assert(skin.vertex_id < num_vertices);
+
+    (*vertex_skin_weights)[skin.vertex_id] = skin;
+  }
+
+  // now you can lookup i'th vertex skin weight by `vertex_skin_weights[i]`
+
+
+}
+
+static bool TestLoadObj(const char* filename, const char* basepath = nullptr,
+                        bool triangulate = true) {
+  std::cout << "Loading " << filename << std::endl;
+
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err, filename,
+                              basepath, triangulate);
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+
+  if (!ret) {
+    printf("Failed to load/parse .obj.\n");
+    return false;
+  }
+
+  std::vector<tinyobj::skin_weight_t> vertex_skin_weights;
+
+  ConstructVertexWeight(
+    attrib.vertices,
+    attrib.skin_weights,
+    &vertex_skin_weights);
+
+  for (size_t v = 0; v < vertex_skin_weights.size(); v++) {
+    std::cout << "vertex[" << v << "] num_weights = " << vertex_skin_weights[v].weightValues.size() << "\n";
+    for (size_t w = 0; w < vertex_skin_weights[v].weightValues.size(); w++) {
+      std::cout << "  w[" << w << "] joint = " << vertex_skin_weights[v].weightValues[w].joint_id
+                << ", weight = " << vertex_skin_weights[v].weightValues[w].weight << "\n";
+    }
+  }
+
+  return true;
+}
+
+
+int main(int argc, char** argv) {
+  if (argc < 2) {
+    std::cerr << "Need input.obj\n";
+    return EXIT_FAILURE;
+  }
+
+  const char* basepath = nullptr;
+  if (argc > 2) {
+   basepath = argv[2];
+  }
+  assert(true == TestLoadObj(argv[1], basepath));
+
+  return 0;
+}
diff --git a/examples/viewer/.gitignore b/examples/viewer/.gitignore
new file mode 100644
index 00000000..378eac25
--- /dev/null
+++ b/examples/viewer/.gitignore
@@ -0,0 +1 @@
+build
diff --git a/examples/viewer/CMakeLists.txt b/examples/viewer/CMakeLists.txt
new file mode 100644
index 00000000..df3af268
--- /dev/null
+++ b/examples/viewer/CMakeLists.txt
@@ -0,0 +1,29 @@
+# cmake -S . -B build && cmake --build build && build/app
+
+# cmake -S . -B build -G "Ninja" && cmake --build build && build/app
+# cmake -S . -B build -G "CodeBlocks - Ninja" && cmake --build build && build/app
+# cmake -S . -B build -G "Visual Studio 17 2022" && cmake --build build && build/app
+
+cmake_minimum_required(VERSION 3.18)
+project( app VERSION 0.1 )
+
+file(GLOB SOURCE_FILES "*.c*" )
+add_executable(app ${SOURCE_FILES})
+
+find_package( OpenGL REQUIRED )
+find_package( glfw3 REQUIRED )
+
+set(ADDITIONAL_LIBRARIES "")
+if(WIN32)
+set(ADDITIONAL_LIBRARIES winmm)
+endif()
+
+set(GLEW_LIBRARY "")
+if(UNIX)
+set(GLEW_LIBRARY GLEW)
+else()
+find_package( glew REQUIRED )
+set(GLEW_LIBRARY GLEW::glew)
+endif()
+
+target_link_libraries(${PROJECT_NAME} OpenGL::GL OpenGL::GLU glfw ${ADDITIONAL_LIBRARIES} ${GLEW_LIBRARY} )
diff --git a/examples/viewer/README.md b/examples/viewer/README.md
index 9cb032c7..76207bfa 100644
--- a/examples/viewer/README.md
+++ b/examples/viewer/README.md
@@ -5,6 +5,8 @@
 * premake5
 * glfw3
 * glew
+* xcursor
+* xinerama
 
 ## Build on MaCOSX
 
@@ -37,6 +39,7 @@ Then,
 
 ## TODO
 
+* [ ] Alpha texturing.
 * [ ] Support per-face material.
 * [ ] Use shader-based GL rendering.
 * [ ] PBR shader support.
diff --git a/examples/viewer/stb_image.h b/examples/viewer/stb_image.h
index a3c11299..d60371b9 100644
--- a/examples/viewer/stb_image.h
+++ b/examples/viewer/stb_image.h
@@ -1,5 +1,5 @@
-/* stb_image - v2.12 - public domain image loader - http://nothings.org/stb_image.h
-                                     no warranty implied; use at your own risk
+/* stb_image - v2.27 - public domain image loader - http://nothings.org/stb
+                                  no warranty implied; use at your own risk
 
    Do this:
       #define STB_IMAGE_IMPLEMENTATION
@@ -21,7 +21,7 @@
           avoid problematic images and only need the trivial interface
 
       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
-      PNG 1/2/4/8-bit-per-channel (16 bpc not supported)
+      PNG 1/2/4/8/16-bit-per-channel
 
       TGA (not sure what subset, if a subset)
       BMP non-1bpp, non-RLE
@@ -42,136 +42,34 @@
    Full documentation under "DOCUMENTATION" below.
 
 
-   Revision 2.00 release notes:
-
-      - Progressive JPEG is now supported.
-
-      - PPM and PGM binary formats are now supported, thanks to Ken Miller.
-
-      - x86 platforms now make use of SSE2 SIMD instructions for
-        JPEG decoding, and ARM platforms can use NEON SIMD if requested.
-        This work was done by Fabian "ryg" Giesen. SSE2 is used by
-        default, but NEON must be enabled explicitly; see docs.
-
-        With other JPEG optimizations included in this version, we see
-        2x speedup on a JPEG on an x86 machine, and a 1.5x speedup
-        on a JPEG on an ARM machine, relative to previous versions of this
-        library. The same results will not obtain for all JPGs and for all
-        x86/ARM machines. (Note that progressive JPEGs are significantly
-        slower to decode than regular JPEGs.) This doesn't mean that this
-        is the fastest JPEG decoder in the land; rather, it brings it
-        closer to parity with standard libraries. If you want the fastest
-        decode, look elsewhere. (See "Philosophy" section of docs below.)
-
-        See final bullet items below for more info on SIMD.
-
-      - Added STBI_MALLOC, STBI_REALLOC, and STBI_FREE macros for replacing
-        the memory allocator. Unlike other STBI libraries, these macros don't
-        support a context parameter, so if you need to pass a context in to
-        the allocator, you'll have to store it in a global or a thread-local
-        variable.
-
-      - Split existing STBI_NO_HDR flag into two flags, STBI_NO_HDR and
-        STBI_NO_LINEAR.
-            STBI_NO_HDR:     suppress implementation of .hdr reader format
-            STBI_NO_LINEAR:  suppress high-dynamic-range light-linear float API
-
-      - You can suppress implementation of any of the decoders to reduce
-        your code footprint by #defining one or more of the following
-        symbols before creating the implementation.
-
-            STBI_NO_JPEG
-            STBI_NO_PNG
-            STBI_NO_BMP
-            STBI_NO_PSD
-            STBI_NO_TGA
-            STBI_NO_GIF
-            STBI_NO_HDR
-            STBI_NO_PIC
-            STBI_NO_PNM   (.ppm and .pgm)
-
-      - You can request *only* certain decoders and suppress all other ones
-        (this will be more forward-compatible, as addition of new decoders
-        doesn't require you to disable them explicitly):
-
-            STBI_ONLY_JPEG
-            STBI_ONLY_PNG
-            STBI_ONLY_BMP
-            STBI_ONLY_PSD
-            STBI_ONLY_TGA
-            STBI_ONLY_GIF
-            STBI_ONLY_HDR
-            STBI_ONLY_PIC
-            STBI_ONLY_PNM   (.ppm and .pgm)
-
-         Note that you can define multiples of these, and you will get all
-         of them ("only x" and "only y" is interpreted to mean "only x&y").
-
-       - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
-         want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
-
-      - Compilation of all SIMD code can be suppressed with
-            #define STBI_NO_SIMD
-        It should not be necessary to disable SIMD unless you have issues
-        compiling (e.g. using an x86 compiler which doesn't support SSE
-        intrinsics or that doesn't support the method used to detect
-        SSE2 support at run-time), and even those can be reported as
-        bugs so I can refine the built-in compile-time checking to be
-        smarter.
-
-      - The old STBI_SIMD system which allowed installing a user-defined
-        IDCT etc. has been removed. If you need this, don't upgrade. My
-        assumption is that almost nobody was doing this, and those who
-        were will find the built-in SIMD more satisfactory anyway.
-
-      - RGB values computed for JPEG images are slightly different from
-        previous versions of stb_image. (This is due to using less
-        integer precision in SIMD.) The C code has been adjusted so
-        that the same RGB values will be computed regardless of whether
-        SIMD support is available, so your app should always produce
-        consistent results. But these results are slightly different from
-        previous versions. (Specifically, about 3% of available YCbCr values
-        will compute different RGB results from pre-1.49 versions by +-1;
-        most of the deviating values are one smaller in the G channel.)
-
-      - If you must produce consistent results with previous versions of
-        stb_image, #define STBI_JPEG_OLD and you will get the same results
-        you used to; however, you will not get the SIMD speedups for
-        the YCbCr-to-RGB conversion step (although you should still see
-        significant JPEG speedup from the other changes).
-
-        Please note that STBI_JPEG_OLD is a temporary feature; it will be
-        removed in future versions of the library. It is only intended for
-        near-term back-compatibility use.
-
-
-   Latest revision history:
+LICENSE
+
+  See end of file for license information.
+
+RECENT REVISION HISTORY:
+
+      2.27  (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
+      2.26  (2020-07-13) many minor fixes
+      2.25  (2020-02-02) fix warnings
+      2.24  (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
+      2.23  (2019-08-11) fix clang static analysis warning
+      2.22  (2019-03-04) gif fixes, fix warnings
+      2.21  (2019-02-25) fix typo in comment
+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+      2.19  (2018-02-11) fix warning
+      2.18  (2018-01-30) fix warnings
+      2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
+      2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
+      2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
                          RGB-format JPEG; remove white matting in PSD;
-                         allocate large structures on the stack; 
+                         allocate large structures on the stack;
                          correct channel count for PNG & BMP
       2.10  (2016-01-22) avoid warning introduced in 2.09
       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
-      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
-      2.07  (2015-09-13) partial animated GIF support
-                         limited 16-bit PSD support
-                         minor bugs, code cleanup, and compiler warnings
-      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
-      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
-      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
-      2.03  (2015-04-12) additional corruption checking
-                         stbi_set_flip_vertically_on_load
-                         fix NEON support; fix mingw support
-      2.02  (2015-01-19) fix incorrect assert, fix warning
-      2.01  (2015-01-17) fix various warnings
-      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
-      2.00  (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD
-                         progressive JPEG
-                         PGM/PPM support
-                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
-                         STBI_NO_*, STBI_ONLY_*
-                         GIF bugfix
 
    See end of file for full revision history.
 
@@ -186,34 +84,43 @@
     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
-    urraka@github (animated gif)           Junggon Kim (PNM comments)
-                                           Daniel Gibson (16-bit TGA)
-
- Optimizations & bugfixes
-    Fabian "ryg" Giesen
-    Arseny Kapoulkine
+    github:urraka (animated gif)           Junggon Kim (PNM comments)
+    Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
+                                           socks-the-fox (16-bit PNG)
+                                           Jeremy Sawicki (handle all ImageNet JPGs)
+ Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
+    Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
+    Arseny Kapoulkine                      Simon Breuss (16-bit PNM)
+    John-Mark Allen
+    Carmelo J Fdez-Aguera
 
  Bug & warning fixes
-    Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
-    Christpher Lloyd        Martin Golini      Jerry Jansson      Joseph Thomson
-    Dave Moore              Roy Eltham         Hayaki Saito       Phil Jordan
-    Won Chun                Luke Graham        Johan Duparc       Nathan Reed
-    the Horde3D community   Thomas Ruf         Ronny Chevalier    Nick Verigakis
-    Janez Zemva             John Bartholomew   Michal Cichon      svdijk@github
-    Jonathan Blow           Ken Hamada         Tero Hanninen      Baldur Karlsson
-    Laurent Gomila          Cort Stratton      Sergio Gonzalez    romigrou@github
-    Aruelien Pocheville     Thibault Reuille   Cass Everitt       Matthew Gregan
-    Ryamond Barbiero        Paul Du Bois       Engin Manap        snagar@github
-    Michaelangel007@github  Oriol Ferrer Mesia socks-the-fox
-    Blazej Dariusz Roszkowski
-
-
-LICENSE
-
-This software is dual-licensed to the public domain and under the following
-license: you are granted a perpetual, irrevocable license to copy, modify,
-publish, and distribute this file as you see fit.
-
+    Marc LeBlanc            David Woo          Guillaume George     Martins Mozeiko
+    Christpher Lloyd        Jerry Jansson      Joseph Thomson       Blazej Dariusz Roszkowski
+    Phil Jordan                                Dave Moore           Roy Eltham
+    Hayaki Saito            Nathan Reed        Won Chun
+    Luke Graham             Johan Duparc       Nick Verigakis       the Horde3D community
+    Thomas Ruf              Ronny Chevalier                         github:rlyeh
+    Janez Zemva             John Bartholomew   Michal Cichon        github:romigrou
+    Jonathan Blow           Ken Hamada         Tero Hanninen        github:svdijk
+    Eugene Golushkov        Laurent Gomila     Cort Stratton        github:snagar
+    Aruelien Pocheville     Sergio Gonzalez    Thibault Reuille     github:Zelex
+    Cass Everitt            Ryamond Barbiero                        github:grim210
+    Paul Du Bois            Engin Manap        Aldo Culquicondor    github:sammyhw
+    Philipp Wiesemann       Dale Weiler        Oriol Ferrer Mesia   github:phprus
+    Josh Tobin                                 Matthew Gregan       github:poppolopoppo
+    Julian Raschke          Gregory Mullen     Christian Floisand   github:darealshinji
+    Baldur Karlsson         Kevin Schmidt      JR Smith             github:Michaelangel007
+                            Brad Weinberger    Matvey Cherevko      github:mosra
+    Luca Sas                Alexander Veselov  Zack Middleton       [reserved]
+    Ryan C. Gordon          [reserved]                              [reserved]
+                     DO NOT ADD YOUR NAME HERE
+
+                     Jacko Dirks
+
+  To add your name to the credits, pick a random blank space in the middle and fill it.
+  80% of merge conflicts on stb PRs are due to people adding their name at the end
+  of the credits.
 */
 
 #ifndef STBI_INCLUDE_STB_IMAGE_H
@@ -222,10 +129,8 @@ publish, and distribute this file as you see fit.
 // DOCUMENTATION
 //
 // Limitations:
-//    - no 16-bit-per-channel PNG
 //    - no 12-bit-per-channel JPEG
 //    - no JPEGs with arithmetic coding
-//    - no 1-bit BMP
 //    - GIF always returns *comp=4
 //
 // Basic usage (see HDR discussion below for HDR usage):
@@ -238,10 +143,10 @@ publish, and distribute this file as you see fit.
 //    stbi_image_free(data)
 //
 // Standard parameters:
-//    int *x       -- outputs image width in pixels
-//    int *y       -- outputs image height in pixels
-//    int *comp    -- outputs # of image components in image file
-//    int req_comp -- if non-zero, # of image components requested in result
+//    int *x                 -- outputs image width in pixels
+//    int *y                 -- outputs image height in pixels
+//    int *channels_in_file  -- outputs # of image components in image file
+//    int desired_channels   -- if non-zero, # of image components requested in result
 //
 // The return value from an image loader is an 'unsigned char *' which points
 // to the pixel data, or NULL on an allocation failure or if the image is
@@ -249,11 +154,12 @@ publish, and distribute this file as you see fit.
 // with each pixel consisting of N interleaved 8-bit components; the first
 // pixel pointed to is top-left-most in the image. There is no padding between
 // image scanlines or between pixels, regardless of format. The number of
-// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
-// If req_comp is non-zero, *comp has the number of components that _would_
-// have been output otherwise. E.g. if you set req_comp to 4, you will always
-// get RGBA output, but you can check *comp to see if it's trivially opaque
-// because e.g. there were only 3 channels in the source image.
+// components N is 'desired_channels' if desired_channels is non-zero, or
+// *channels_in_file otherwise. If desired_channels is non-zero,
+// *channels_in_file has the number of components that _would_ have been
+// output otherwise. E.g. if you set desired_channels to 4, you will always
+// get RGBA output, but you can check *channels_in_file to see if it's trivially
+// opaque because e.g. there were only 3 channels in the source image.
 //
 // An output image with N components has the following components interleaved
 // in this order in each pixel:
@@ -265,14 +171,50 @@ publish, and distribute this file as you see fit.
 //       4           red, green, blue, alpha
 //
 // If image loading fails for any reason, the return value will be NULL,
-// and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
-// can be queried for an extremely brief, end-user unfriendly explanation
-// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
-// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
+// and *x, *y, *channels_in_file will be unchanged. The function
+// stbi_failure_reason() can be queried for an extremely brief, end-user
+// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
 // more user-friendly ones.
 //
 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
 //
+// To query the width, height and component count of an image without having to
+// decode the full file, you can use the stbi_info family of functions:
+//
+//   int x,y,n,ok;
+//   ok = stbi_info(filename, &x, &y, &n);
+//   // returns ok=1 and sets x, y, n if image is a supported format,
+//   // 0 otherwise.
+//
+// Note that stb_image pervasively uses ints in its public API for sizes,
+// including sizes of memory buffers. This is now part of the API and thus
+// hard to change without causing breakage. As a result, the various image
+// loaders all have certain limits on image size; these differ somewhat
+// by format but generally boil down to either just under 2GB or just under
+// 1GB. When the decoded image would be larger than this, stb_image decoding
+// will fail.
+//
+// Additionally, stb_image will reject image files that have any of their
+// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
+// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
+// the only way to have an image with such dimensions load correctly
+// is for it to have a rather extreme aspect ratio. Either way, the
+// assumption here is that such larger images are likely to be malformed
+// or malicious. If you do need to load an image with individual dimensions
+// larger than that, and it still fits in the overall size limit, you can
+// #define STBI_MAX_DIMENSIONS on your own to be something larger.
+//
+// ===========================================================================
+//
+// UNICODE:
+//
+//   If compiling for Windows and you wish to use Unicode filenames, compile
+//   with
+//       #define STBI_WINDOWS_UTF8
+//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
+//   Windows wchar_t filenames to utf8.
+//
 // ===========================================================================
 //
 // Philosophy
@@ -285,15 +227,15 @@ publish, and distribute this file as you see fit.
 //
 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
 // and for best performance I may provide less-easy-to-use APIs that give higher
-// performance, in addition to the easy to use ones. Nevertheless, it's important
+// performance, in addition to the easy-to-use ones. Nevertheless, it's important
 // to keep in mind that from the standpoint of you, a client of this library,
-// all you care about is #1 and #3, and stb libraries do not emphasize #3 above all.
+// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
 //
 // Some secondary priorities arise directly from the first two, some of which
-// make more explicit reasons why performance can't be emphasized.
+// provide more explicit reasons why performance can't be emphasized.
 //
 //    - Portable ("ease of use")
-//    - Small footprint ("easy to maintain")
+//    - Small source code footprint ("easy to maintain")
 //    - No dependencies ("ease of use")
 //
 // ===========================================================================
@@ -325,13 +267,6 @@ publish, and distribute this file as you see fit.
 // (at least this is true for iOS and Android). Therefore, the NEON support is
 // toggled by a build flag: define STBI_NEON to get NEON loops.
 //
-// The output of the JPEG decoder is slightly different from versions where
-// SIMD support was introduced (that is, for versions before 1.49). The
-// difference is only +-1 in the 8-bit RGB channels, and only on a small
-// fraction of pixels. You can force the pre-1.49 behavior by defining
-// STBI_JPEG_OLD, but this will disable some of the SIMD decoding path
-// and hence cost some performance.
-//
 // If for some reason you do not want to use any of SIMD code, or if
 // you have issues compiling it, you can disable it entirely by
 // defining STBI_NO_SIMD.
@@ -340,11 +275,10 @@ publish, and distribute this file as you see fit.
 //
 // HDR image support   (disable by defining STBI_NO_HDR)
 //
-// stb_image now supports loading HDR images in general, and currently
-// the Radiance .HDR file format, although the support is provided
-// generically. You can still load any file through the existing interface;
-// if you attempt to load an HDR file, it will be automatically remapped to
-// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
+// stb_image supports loading HDR images in general, and currently the Radiance
+// .HDR file format specifically. You can still load any file through the existing
+// interface; if you attempt to load an HDR file, it will be automatically remapped
+// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
 // both of these constants can be reconfigured through this interface:
 //
 //     stbi_hdr_to_ldr_gamma(2.2f);
@@ -376,18 +310,59 @@ publish, and distribute this file as you see fit.
 //
 // iPhone PNG support:
 //
-// By default we convert iphone-formatted PNGs back to RGB, even though
-// they are internally encoded differently. You can disable this conversion
-// by by calling stbi_convert_iphone_png_to_rgb(0), in which case
-// you will always just get the native iphone "format" through (which
-// is BGR stored in RGB).
+// We optionally support converting iPhone-formatted PNGs (which store
+// premultiplied BGRA) back to RGB, even though they're internally encoded
+// differently. To enable this conversion, call
+// stbi_convert_iphone_png_to_rgb(1).
 //
 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
 // pixel to remove any premultiplied alpha *only* if the image file explicitly
 // says there's premultiplied data (currently only happens in iPhone images,
 // and only if iPhone convert-to-rgb processing is on).
 //
-
+// ===========================================================================
+//
+// ADDITIONAL CONFIGURATION
+//
+//  - You can suppress implementation of any of the decoders to reduce
+//    your code footprint by #defining one or more of the following
+//    symbols before creating the implementation.
+//
+//        STBI_NO_JPEG
+//        STBI_NO_PNG
+//        STBI_NO_BMP
+//        STBI_NO_PSD
+//        STBI_NO_TGA
+//        STBI_NO_GIF
+//        STBI_NO_HDR
+//        STBI_NO_PIC
+//        STBI_NO_PNM   (.ppm and .pgm)
+//
+//  - You can request *only* certain decoders and suppress all other ones
+//    (this will be more forward-compatible, as addition of new decoders
+//    doesn't require you to disable them explicitly):
+//
+//        STBI_ONLY_JPEG
+//        STBI_ONLY_PNG
+//        STBI_ONLY_BMP
+//        STBI_ONLY_PSD
+//        STBI_ONLY_TGA
+//        STBI_ONLY_GIF
+//        STBI_ONLY_HDR
+//        STBI_ONLY_PIC
+//        STBI_ONLY_PNM   (.ppm and .pgm)
+//
+//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+//
+//  - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater
+//    than that size (in either width or height) without further processing.
+//    This is to let programs in the wild set an upper bound to prevent
+//    denial-of-service attacks on untrusted data, as one could generate a
+//    valid image of gigantic dimensions and force stb_image to allocate a
+//    huge block of memory and spend disproportionate time decoding it. By
+//    default this is set to (1 << 24), which is 16777216, but that's still
+//    very big.
 
 #ifndef STBI_NO_STDIO
 #include <stdio.h>
@@ -397,7 +372,7 @@ publish, and distribute this file as you see fit.
 
 enum
 {
-   STBI_default = 0, // only used for req_comp
+   STBI_default = 0, // only used for desired_channels
 
    STBI_grey       = 1,
    STBI_grey_alpha = 2,
@@ -405,17 +380,21 @@ enum
    STBI_rgb_alpha  = 4
 };
 
+#include <stdlib.h>
 typedef unsigned char stbi_uc;
+typedef unsigned short stbi_us;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#ifndef STBIDEF
 #ifdef STB_IMAGE_STATIC
 #define STBIDEF static
 #else
 #define STBIDEF extern
 #endif
+#endif
 
 //////////////////////////////////////////////////////////////////////////////
 //
@@ -433,22 +412,52 @@ typedef struct
    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
 } stbi_io_callbacks;
 
-STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *comp, int req_comp);
-STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *comp, int req_comp);
-STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *comp, int req_comp);
+////////////////////////////////////
+//
+// 8-bits-per-channel interface
+//
+
+STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
 
 #ifndef STBI_NO_STDIO
-STBIDEF stbi_uc *stbi_load_from_file  (FILE *f,                  int *x, int *y, int *comp, int req_comp);
+STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
 // for stbi_load_from_file, file pointer is left pointing immediately after image
 #endif
 
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
+#endif
+
+#ifdef STBI_WINDOWS_UTF8
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
+#endif
+
+////////////////////////////////////
+//
+// 16-bits-per-channel interface
+//
+
+STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+#endif
+
+////////////////////////////////////
+//
+// float-per-channel interface
+//
 #ifndef STBI_NO_LINEAR
-   STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *comp, int req_comp);
-   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
-   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
+   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
 
    #ifndef STBI_NO_STDIO
-   STBIDEF float *stbi_loadf_from_file  (FILE *f,                int *x, int *y, int *comp, int req_comp);
+   STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+   STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
    #endif
 #endif
 
@@ -472,7 +481,7 @@ STBIDEF int      stbi_is_hdr_from_file(FILE *f);
 
 
 // get a VERY brief reason for failure
-// NOT THREADSAFE
+// on most compilers (and ALL modern mainstream compilers) this is threadsafe
 STBIDEF const char *stbi_failure_reason  (void);
 
 // free the loaded image -- this is just free()
@@ -481,11 +490,14 @@ STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
 // get image dimensions & components without fully decoding
 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
+STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
+STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
 
 #ifndef STBI_NO_STDIO
-STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
-STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
-
+STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
+STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
+STBIDEF int      stbi_is_16_bit          (char const *filename);
+STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
 #endif
 
 
@@ -502,6 +514,13 @@ STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
 // flip the image vertically, so the first pixel in the output array is the bottom left
 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
 
+// as above, but only applies to images loaded on the thread that calls the function
+// this function is only available if your compiler supports thread-local variables;
+// calling it will fail to link if your compiler doesn't
+STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
+
 // ZLIB client - used by PNG, available for other purposes
 
 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
@@ -566,9 +585,10 @@ STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
 #include <stddef.h> // ptrdiff_t on osx
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 
 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
-#include <math.h>  // ldexp
+#include <math.h>  // ldexp, pow
 #endif
 
 #ifndef STBI_NO_STDIO
@@ -580,6 +600,12 @@ STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
 #define STBI_ASSERT(x) assert(x)
 #endif
 
+#ifdef __cplusplus
+#define STBI_EXTERN extern "C"
+#else
+#define STBI_EXTERN extern
+#endif
+
 
 #ifndef _MSC_VER
    #ifdef __cplusplus
@@ -591,6 +617,23 @@ STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch
    #define stbi_inline __forceinline
 #endif
 
+#ifndef STBI_NO_THREAD_LOCALS
+   #if defined(__cplusplus) &&  __cplusplus >= 201103L
+      #define STBI_THREAD_LOCAL       thread_local
+   #elif defined(__GNUC__) && __GNUC__ < 5
+      #define STBI_THREAD_LOCAL       __thread
+   #elif defined(_MSC_VER)
+      #define STBI_THREAD_LOCAL       __declspec(thread)
+   #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
+      #define STBI_THREAD_LOCAL       _Thread_local
+   #endif
+
+   #ifndef STBI_THREAD_LOCAL
+      #if defined(__GNUC__)
+        #define STBI_THREAD_LOCAL       __thread
+      #endif
+   #endif
+#endif
 
 #ifdef _MSC_VER
 typedef unsigned short stbi__uint16;
@@ -621,7 +664,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
 #ifdef STBI_HAS_LROTL
    #define stbi_lrot(x,y)  _lrotl(x,y)
 #else
-   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
+   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (-(y) & 31)))
 #endif
 
 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
@@ -649,12 +692,14 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
 #define STBI__X86_TARGET
 #endif
 
-#if defined(__GNUC__) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
-// NOTE: not clear do we actually need this for the 64-bit path?
+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
-// (but compiling with -msse2 allows the compiler to use SSE2 everywhere;
-// this is just broken and gcc are jerks for not fixing it properly
-// http://www.virtualdub.org/blog/pivot/entry.php?id=363 )
+// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
+// but previous attempts to provide the SSE2 functions with runtime
+// detection caused numerous issues. The way architecture extensions are
+// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
+// New behavior: if compiled with -msse2, we use SSE2 without any
+// detection; if not, we don't use it at all.
 #define STBI_NO_SIMD
 #endif
 
@@ -702,25 +747,27 @@ static int stbi__cpuid3(void)
 
 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
 
-static int stbi__sse2_available()
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
 {
    int info3 = stbi__cpuid3();
    return ((info3 >> 26) & 1) != 0;
 }
+#endif
+
 #else // assume GCC-style if not VC++
 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 
-static int stbi__sse2_available()
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
 {
-#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later
-   // GCC 4.8+ has a nice way to do this
-   return __builtin_cpu_supports("sse2");
-#else
-   // portable way to do this, preferably without using GCC inline ASM?
-   // just bail for now.
-   return 0;
-#endif
+   // If we're even attempting to compile this on GCC/Clang, that means
+   // -msse2 is on, which means the compiler is allowed to use SSE2
+   // instructions at will, and so are we.
+   return 1;
 }
+#endif
+
 #endif
 #endif
 
@@ -731,14 +778,21 @@ static int stbi__sse2_available()
 
 #ifdef STBI_NEON
 #include <arm_neon.h>
-// assume GCC or Clang on ARM targets
+#ifdef _MSC_VER
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+#else
 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
 #endif
+#endif
 
 #ifndef STBI_SIMD_ALIGN
 #define STBI_SIMD_ALIGN(type, name) type name
 #endif
 
+#ifndef STBI_MAX_DIMENSIONS
+#define STBI_MAX_DIMENSIONS (1 << 24)
+#endif
+
 ///////////////////////////////////////////////
 //
 //  stbi__context struct and start_xxx functions
@@ -756,6 +810,7 @@ typedef struct
    int read_from_callbacks;
    int buflen;
    stbi_uc buffer_start[128];
+   int callback_already_read;
 
    stbi_uc *img_buffer, *img_buffer_end;
    stbi_uc *img_buffer_original, *img_buffer_original_end;
@@ -769,6 +824,7 @@ static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
 {
    s->io.read = NULL;
    s->read_from_callbacks = 0;
+   s->callback_already_read = 0;
    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
 }
@@ -780,7 +836,8 @@ static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *
    s->io_user_data = user;
    s->buflen = sizeof(s->buffer_start);
    s->read_from_callbacks = 1;
-   s->img_buffer_original = s->buffer_start;
+   s->callback_already_read = 0;
+   s->img_buffer = s->img_buffer_original = s->buffer_start;
    stbi__refill_buffer(s);
    s->img_buffer_original_end = s->img_buffer_end;
 }
@@ -794,12 +851,17 @@ static int stbi__stdio_read(void *user, char *data, int size)
 
 static void stbi__stdio_skip(void *user, int n)
 {
+   int ch;
    fseek((FILE*) user, n, SEEK_CUR);
+   ch = fgetc((FILE*) user);  /* have to read a byte to reset feof()'s flag */
+   if (ch != EOF) {
+      ungetc(ch, (FILE *) user);  /* push byte back onto stream if valid. */
+   }
 }
 
 static int stbi__stdio_eof(void *user)
 {
-   return feof((FILE*) user);
+   return feof((FILE*) user) || ferror((FILE *) user);
 }
 
 static stbi_io_callbacks stbi__stdio_callbacks =
@@ -827,79 +889,180 @@ static void stbi__rewind(stbi__context *s)
    s->img_buffer_end = s->img_buffer_original_end;
 }
 
+enum
+{
+   STBI_ORDER_RGB,
+   STBI_ORDER_BGR
+};
+
+typedef struct
+{
+   int bits_per_channel;
+   int num_channels;
+   int channel_order;
+} stbi__result_info;
+
 #ifndef STBI_NO_JPEG
 static int      stbi__jpeg_test(stbi__context *s);
-static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PNG
 static int      stbi__png_test(stbi__context *s);
-static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__png_is16(stbi__context *s);
 #endif
 
 #ifndef STBI_NO_BMP
 static int      stbi__bmp_test(stbi__context *s);
-static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_TGA
 static int      stbi__tga_test(stbi__context *s);
-static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PSD
 static int      stbi__psd_test(stbi__context *s);
-static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__psd_is16(stbi__context *s);
 #endif
 
 #ifndef STBI_NO_HDR
 static int      stbi__hdr_test(stbi__context *s);
-static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PIC
 static int      stbi__pic_test(stbi__context *s);
-static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_GIF
 static int      stbi__gif_test(stbi__context *s);
-static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
 #endif
 
 #ifndef STBI_NO_PNM
 static int      stbi__pnm_test(stbi__context *s);
-static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
+static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__pnm_is16(stbi__context *s);
 #endif
 
-// this is not threadsafe
-static const char *stbi__g_failure_reason;
+static
+#ifdef STBI_THREAD_LOCAL
+STBI_THREAD_LOCAL
+#endif
+const char *stbi__g_failure_reason;
 
 STBIDEF const char *stbi_failure_reason(void)
 {
    return stbi__g_failure_reason;
 }
 
+#ifndef STBI_NO_FAILURE_STRINGS
 static int stbi__err(const char *str)
 {
    stbi__g_failure_reason = str;
    return 0;
 }
+#endif
 
 static void *stbi__malloc(size_t size)
 {
     return STBI_MALLOC(size);
 }
 
+// stb_image uses ints pervasively, including for offset calculations.
+// therefore the largest decoded image size we can support with the
+// current code, even on 64-bit targets, is INT_MAX. this is not a
+// significant limitation for the intended use case.
+//
+// we do, however, need to make sure our size calculations don't
+// overflow. hence a few helper functions for size calculations that
+// multiply integers together, making sure that they're non-negative
+// and no overflow occurs.
+
+// return 1 if the sum is valid, 0 on overflow.
+// negative terms are considered invalid.
+static int stbi__addsizes_valid(int a, int b)
+{
+   if (b < 0) return 0;
+   // now 0 <= b <= INT_MAX, hence also
+   // 0 <= INT_MAX - b <= INTMAX.
+   // And "a + b <= INT_MAX" (which might overflow) is the
+   // same as a <= INT_MAX - b (no overflow)
+   return a <= INT_MAX - b;
+}
+
+// returns 1 if the product is valid, 0 on overflow.
+// negative factors are considered invalid.
+static int stbi__mul2sizes_valid(int a, int b)
+{
+   if (a < 0 || b < 0) return 0;
+   if (b == 0) return 1; // mul-by-0 is always safe
+   // portable way to check for no overflows in a*b
+   return a <= INT_MAX/b;
+}
+
+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad2sizes_valid(int a, int b, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
+}
+#endif
+
+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad3sizes_valid(int a, int b, int c, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__addsizes_valid(a*b*c, add);
+}
+
+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
+}
+#endif
+
+#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
+// mallocs with size overflow checking
+static void *stbi__malloc_mad2(int a, int b, int add)
+{
+   if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
+   return stbi__malloc(a*b + add);
+}
+#endif
+
+static void *stbi__malloc_mad3(int a, int b, int c, int add)
+{
+   if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
+   return stbi__malloc(a*b*c + add);
+}
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
+static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
+{
+   if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
+   return stbi__malloc(a*b*c*d + add);
+}
+#endif
+
 // stbi__err - error
 // stbi__errpf - error returning pointer to float
 // stbi__errpuc - error returning pointer to unsigned char
@@ -928,40 +1091,69 @@ static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
 #endif
 
-static int stbi__vertically_flip_on_load = 0;
+static int stbi__vertically_flip_on_load_global = 0;
 
 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
 {
-    stbi__vertically_flip_on_load = flag_true_if_should_flip;
+   stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
 }
 
-static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+#ifndef STBI_THREAD_LOCAL
+#define stbi__vertically_flip_on_load  stbi__vertically_flip_on_load_global
+#else
+static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
+
+STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
 {
-   #ifndef STBI_NO_JPEG
-   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp);
-   #endif
+   stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
+   stbi__vertically_flip_on_load_set = 1;
+}
+
+#define stbi__vertically_flip_on_load  (stbi__vertically_flip_on_load_set       \
+                                         ? stbi__vertically_flip_on_load_local  \
+                                         : stbi__vertically_flip_on_load_global)
+#endif // STBI_THREAD_LOCAL
+
+static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
+{
+   memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
+   ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
+   ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+   ri->num_channels = 0;
+
+   // test the formats with a very explicit header first (at least a FOURCC
+   // or distinctive magic number first)
    #ifndef STBI_NO_PNG
-   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp);
+   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_BMP
-   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp);
+   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_GIF
-   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp);
+   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_PSD
-   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp);
+   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
+   #else
+   STBI_NOTUSED(bpc);
    #endif
    #ifndef STBI_NO_PIC
-   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp);
+   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
+   #endif
+
+   // then the formats that can end up attempting to load with just 1 or 2
+   // bytes matching expectations; these are prone to false positives, so
+   // try them later
+   #ifndef STBI_NO_JPEG
+   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
    #endif
    #ifndef STBI_NO_PNM
-   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp);
+   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
    #endif
 
    #ifndef STBI_NO_HDR
    if (stbi__hdr_test(s)) {
-      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp);
+      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
    }
    #endif
@@ -969,66 +1161,179 @@ static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *com
    #ifndef STBI_NO_TGA
    // test tga last because it's a crappy test!
    if (stbi__tga_test(s))
-      return stbi__tga_load(s,x,y,comp,req_comp);
+      return stbi__tga_load(s,x,y,comp,req_comp, ri);
    #endif
 
    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
 }
 
-static unsigned char *stbi__load_flip(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
 {
-   unsigned char *result = stbi__load_main(s, x, y, comp, req_comp);
+   int i;
+   int img_len = w * h * channels;
+   stbi_uc *reduced;
 
-   if (stbi__vertically_flip_on_load && result != NULL) {
-      int w = *x, h = *y;
-      int depth = req_comp ? req_comp : *comp;
-      int row,col,z;
-      stbi_uc temp;
-
-      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
-      for (row = 0; row < (h>>1); row++) {
-         for (col = 0; col < w; col++) {
-            for (z = 0; z < depth; z++) {
-               temp = result[(row * w + col) * depth + z];
-               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
-               result[((h - row - 1) * w + col) * depth + z] = temp;
-            }
-         }
+   reduced = (stbi_uc *) stbi__malloc(img_len);
+   if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
+
+   STBI_FREE(orig);
+   return reduced;
+}
+
+static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
+{
+   int i;
+   int img_len = w * h * channels;
+   stbi__uint16 *enlarged;
+
+   enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
+   if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
+
+   STBI_FREE(orig);
+   return enlarged;
+}
+
+static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
+{
+   int row;
+   size_t bytes_per_row = (size_t)w * bytes_per_pixel;
+   stbi_uc temp[2048];
+   stbi_uc *bytes = (stbi_uc *)image;
+
+   for (row = 0; row < (h>>1); row++) {
+      stbi_uc *row0 = bytes + row*bytes_per_row;
+      stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
+      // swap row0 with row1
+      size_t bytes_left = bytes_per_row;
+      while (bytes_left) {
+         size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
+         memcpy(temp, row0, bytes_copy);
+         memcpy(row0, row1, bytes_copy);
+         memcpy(row1, temp, bytes_copy);
+         row0 += bytes_copy;
+         row1 += bytes_copy;
+         bytes_left -= bytes_copy;
       }
    }
+}
 
-   return result;
+#ifndef STBI_NO_GIF
+static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
+{
+   int slice;
+   int slice_size = w * h * bytes_per_pixel;
+
+   stbi_uc *bytes = (stbi_uc *)image;
+   for (slice = 0; slice < z; ++slice) {
+      stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
+      bytes += slice_size;
+   }
 }
+#endif
 
-#ifndef STBI_NO_HDR
+static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
+
+   if (result == NULL)
+      return NULL;
+
+   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
+   if (ri.bits_per_channel != 8) {
+      result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 8;
+   }
+
+   // @TODO: move stbi__convert_format to here
+
+   if (stbi__vertically_flip_on_load) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
+   }
+
+   return (unsigned char *) result;
+}
+
+static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
+
+   if (result == NULL)
+      return NULL;
+
+   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
+   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
+
+   if (ri.bits_per_channel != 16) {
+      result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 16;
+   }
+
+   // @TODO: move stbi__convert_format16 to here
+   // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
+
+   if (stbi__vertically_flip_on_load) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
+   }
+
+   return (stbi__uint16 *) result;
+}
+
+#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
 {
    if (stbi__vertically_flip_on_load && result != NULL) {
-      int w = *x, h = *y;
-      int depth = req_comp ? req_comp : *comp;
-      int row,col,z;
-      float temp;
-
-      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
-      for (row = 0; row < (h>>1); row++) {
-         for (col = 0; col < w; col++) {
-            for (z = 0; z < depth; z++) {
-               temp = result[(row * w + col) * depth + z];
-               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
-               result[((h - row - 1) * w + col) * depth + z] = temp;
-            }
-         }
-      }
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
    }
 }
 #endif
 
 #ifndef STBI_NO_STDIO
 
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
+STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
+#endif
+
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
+{
+	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
+}
+#endif
+
 static FILE *stbi__fopen(char const *filename, char const *mode)
 {
    FILE *f;
+#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
+   wchar_t wMode[64];
+   wchar_t wFilename[1024];
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
+      return 0;
+
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
+      return 0;
+
 #if defined(_MSC_VER) && _MSC_VER >= 1400
+	if (0 != _wfopen_s(&f, wFilename, wMode))
+		f = 0;
+#else
+   f = _wfopen(wFilename, wMode);
+#endif
+
+#elif defined(_MSC_VER) && _MSC_VER >= 1400
    if (0 != fopen_s(&f, filename, mode))
       f=0;
 #else
@@ -1053,42 +1358,98 @@ STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req
    unsigned char *result;
    stbi__context s;
    stbi__start_file(&s,f);
-   result = stbi__load_flip(&s,x,y,comp,req_comp);
+   result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+   if (result) {
+      // need to 'unget' all the characters in the IO buffer
+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+   }
+   return result;
+}
+
+STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__uint16 *result;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
    if (result) {
       // need to 'unget' all the characters in the IO buffer
       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
    }
    return result;
 }
+
+STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   stbi__uint16 *result;
+   if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
+   result = stbi_load_from_file_16(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+
 #endif //!STBI_NO_STDIO
 
+STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
 {
    stbi__context s;
    stbi__start_mem(&s,buffer,len);
-   return stbi__load_flip(&s,x,y,comp,req_comp);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
 }
 
 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
 {
    stbi__context s;
    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
-   return stbi__load_flip(&s,x,y,comp,req_comp);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
 }
 
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+   unsigned char *result;
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+
+   result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
+   if (stbi__vertically_flip_on_load) {
+      stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
+   }
+
+   return result;
+}
+#endif
+
 #ifndef STBI_NO_LINEAR
 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
 {
    unsigned char *data;
    #ifndef STBI_NO_HDR
    if (stbi__hdr_test(s)) {
-      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp);
+      stbi__result_info ri;
+      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
       if (hdr_data)
          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
       return hdr_data;
    }
    #endif
-   data = stbi__load_flip(s, x, y, comp, req_comp);
+   data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
    if (data)
       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
@@ -1158,12 +1519,16 @@ STBIDEF int      stbi_is_hdr          (char const *filename)
    return result;
 }
 
-STBIDEF int      stbi_is_hdr_from_file(FILE *f)
+STBIDEF int stbi_is_hdr_from_file(FILE *f)
 {
    #ifndef STBI_NO_HDR
+   long pos = ftell(f);
+   int res;
    stbi__context s;
    stbi__start_file(&s,f);
-   return stbi__hdr_test(&s);
+   res = stbi__hdr_test(&s);
+   fseek(f, pos, SEEK_SET);
+   return res;
    #else
    STBI_NOTUSED(f);
    return 0;
@@ -1212,6 +1577,7 @@ enum
 static void stbi__refill_buffer(stbi__context *s)
 {
    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
+   s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
    if (n == 0) {
       // at end of file, treat same as if from memory, but need to handle case
       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
@@ -1236,6 +1602,9 @@ stbi_inline static stbi_uc stbi__get8(stbi__context *s)
    return 0;
 }
 
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
 stbi_inline static int stbi__at_eof(stbi__context *s)
 {
    if (s->io.read) {
@@ -1247,9 +1616,14 @@ stbi_inline static int stbi__at_eof(stbi__context *s)
 
    return s->img_buffer >= s->img_buffer_end;
 }
+#endif
 
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
+// nothing
+#else
 static void stbi__skip(stbi__context *s, int n)
 {
+   if (n == 0) return;  // already there!
    if (n < 0) {
       s->img_buffer = s->img_buffer_end;
       return;
@@ -1264,7 +1638,11 @@ static void stbi__skip(stbi__context *s, int n)
    }
    s->img_buffer += n;
 }
+#endif
 
+#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
+// nothing
+#else
 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
 {
    if (s->io.read) {
@@ -1288,18 +1666,27 @@ static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
    } else
       return 0;
 }
+#endif
 
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+// nothing
+#else
 static int stbi__get16be(stbi__context *s)
 {
    int z = stbi__get8(s);
    return (z << 8) + stbi__get8(s);
 }
+#endif
 
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
+// nothing
+#else
 static stbi__uint32 stbi__get32be(stbi__context *s)
 {
    stbi__uint32 z = stbi__get16be(s);
    return (z << 16) + stbi__get16be(s);
 }
+#endif
 
 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
 // nothing
@@ -1315,13 +1702,16 @@ static int stbi__get16le(stbi__context *s)
 static stbi__uint32 stbi__get32le(stbi__context *s)
 {
    stbi__uint32 z = stbi__get16le(s);
-   return z + (stbi__get16le(s) << 16);
+   z += (stbi__uint32)stbi__get16le(s) << 16;
+   return z;
 }
 #endif
 
 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
 
-
+#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
 //////////////////////////////////////////////////////////////////////////////
 //
 //  generic converter from built-in img_n to req_comp
@@ -1337,7 +1727,11 @@ static stbi_uc stbi__compute_y(int r, int g, int b)
 {
    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
 }
+#endif
 
+#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
+// nothing
+#else
 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
 {
    int i,j;
@@ -1346,7 +1740,7 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
    if (req_comp == img_n) return data;
    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
 
-   good = (unsigned char *) stbi__malloc(req_comp * x * y);
+   good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
    if (good == NULL) {
       STBI_FREE(data);
       return stbi__errpuc("outofmem", "Out of memory");
@@ -1356,37 +1750,97 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r
       unsigned char *src  = data + j * x * img_n   ;
       unsigned char *dest = good + j * x * req_comp;
 
-      #define COMBO(a,b)  ((a)*8+(b))
-      #define CASE(a,b)   case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
       // convert source image with img_n components to one with req_comp components;
       // avoid switch per pixel, so use switch per scanline and massive macros
-      switch (COMBO(img_n, req_comp)) {
-         CASE(1,2) dest[0]=src[0], dest[1]=255; break;
-         CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
-         CASE(2,1) dest[0]=src[0]; break;
-         CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
-         CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
-         CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
-         CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
-         CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
-         CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
-         CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
-         default: STBI_ASSERT(0);
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                  } break;
+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255;    } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                    } break;
+         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
       }
-      #undef CASE
+      #undef STBI__CASE
    }
 
    STBI_FREE(data);
    return good;
 }
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+// nothing
+#else
+static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
+{
+   return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
+}
+#endif
+
+#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
+// nothing
+#else
+static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+   int i,j;
+   stbi__uint16 *good;
+
+   if (req_comp == img_n) return data;
+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+   good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
+   if (good == NULL) {
+      STBI_FREE(data);
+      return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+   }
+
+   for (j=0; j < (int) y; ++j) {
+      stbi__uint16 *src  = data + j * x * img_n   ;
+      stbi__uint16 *dest = good + j * x * req_comp;
+
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      // convert source image with img_n components to one with req_comp components;
+      // avoid switch per pixel, so use switch per scanline and massive macros
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                     } break;
+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                       } break;
+         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
+      }
+      #undef STBI__CASE
+   }
+
+   STBI_FREE(data);
+   return good;
+}
+#endif
 
 #ifndef STBI_NO_LINEAR
 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
 {
    int i,k,n;
-   float *output = (float *) stbi__malloc(x * y * comp * sizeof(float));
+   float *output;
+   if (!data) return NULL;
+   output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
    // compute number of non-alpha components
    if (comp & 1) n = comp; else n = comp-1;
@@ -1394,7 +1848,11 @@ static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
       for (k=0; k < n; ++k) {
          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
       }
-      if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
+   }
+   if (n < comp) {
+      for (i=0; i < x*y; ++i) {
+         output[i*comp + n] = data[i*comp + n]/255.0f;
+      }
    }
    STBI_FREE(data);
    return output;
@@ -1406,7 +1864,9 @@ static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
 {
    int i,k,n;
-   stbi_uc *output = (stbi_uc *) stbi__malloc(x * y * comp);
+   stbi_uc *output;
+   if (!data) return NULL;
+   output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
    // compute number of non-alpha components
    if (comp & 1) n = comp; else n = comp-1;
@@ -1471,7 +1931,7 @@ typedef struct
    stbi__context *s;
    stbi__huffman huff_dc[4];
    stbi__huffman huff_ac[4];
-   stbi_uc dequant[4][64];
+   stbi__uint16 dequant[4][64];
    stbi__int16 fast_ac[4][1 << FAST_BITS];
 
 // sizes for components, interleaved MCUs
@@ -1507,6 +1967,8 @@ typedef struct
    int            succ_high;
    int            succ_low;
    int            eob_run;
+   int            jfif;
+   int            app14_color_transform; // Adobe APP14 tag
    int            rgb;
 
    int scan_n, order[4];
@@ -1520,7 +1982,8 @@ typedef struct
 
 static int stbi__build_huffman(stbi__huffman *h, int *count)
 {
-   int i,j,k=0,code;
+   int i,j,k=0;
+   unsigned int code;
    // build size list for each symbol (from JPEG spec)
    for (i=0; i < 16; ++i)
       for (j=0; j < count[i]; ++j)
@@ -1536,7 +1999,7 @@ static int stbi__build_huffman(stbi__huffman *h, int *count)
       if (h->size[k] == j) {
          while (h->size[k] == j)
             h->code[k++] = (stbi__uint16) (code++);
-         if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
+         if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
       }
       // compute largest code + 1 for this size, preshifted as needed later
       h->maxcode[j] = code << (16-j);
@@ -1577,10 +2040,10 @@ static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
             // magnitude code followed by receive_extend code
             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
             int m = 1 << (magbits - 1);
-            if (k < m) k += (-1 << magbits) + 1;
+            if (k < m) k += (~0U << magbits) + 1;
             // if the result is small enough, we can fit it in fast_ac table
             if (k >= -128 && k <= 127)
-               fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
+               fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
          }
       }
    }
@@ -1589,9 +2052,10 @@ static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
 {
    do {
-      int b = j->nomore ? 0 : stbi__get8(j->s);
+      unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
       if (b == 0xff) {
          int c = stbi__get8(j->s);
+         while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
          if (c != 0) {
             j->marker = (unsigned char) c;
             j->nomore = 1;
@@ -1604,7 +2068,7 @@ static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
 }
 
 // (1 << n) - 1
-static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
+static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
 
 // decode a jpeg huffman value from the bitstream
 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
@@ -1657,7 +2121,7 @@ stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
 }
 
 // bias[n] = (-1<<n) + 1
-static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
+static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
 
 // combined JPEG 'receive' and JPEG 'extend', since baseline
 // always extends everything it receives.
@@ -1667,13 +2131,12 @@ stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
    int sgn;
    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
 
-   sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
+   sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
    k = stbi_lrot(j->code_buffer, n);
-   STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
    j->code_buffer = k & ~stbi__bmask[n];
    k &= stbi__bmask[n];
    j->code_bits -= n;
-   return k + (stbi__jbias[n] & ~sgn);
+   return k + (stbi__jbias[n] & (sgn - 1));
 }
 
 // get some unsigned bits
@@ -1700,7 +2163,7 @@ stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
 
 // given a value that's at position X in the zigzag stream,
 // where does it appear in the 8x8 matrix coded as row-major?
-static stbi_uc stbi__jpeg_dezigzag[64+15] =
+static const stbi_uc stbi__jpeg_dezigzag[64+15] =
 {
     0,  1,  8, 16,  9,  2,  3, 10,
    17, 24, 32, 25, 18, 11,  4,  5,
@@ -1716,14 +2179,14 @@ static stbi_uc stbi__jpeg_dezigzag[64+15] =
 };
 
 // decode one 64-entry block--
-static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
+static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
 {
    int diff,dc,k;
    int t;
 
    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
    t = stbi__jpeg_huff_decode(j, hdc);
-   if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+   if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
 
    // 0 all the ac values now so we can do it 32-bits at a time
    memset(data,0,64*sizeof(data[0]));
@@ -1780,11 +2243,12 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__
       // first scan for DC coefficient, must be first
       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
       t = stbi__jpeg_huff_decode(j, hdc);
+      if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
       diff = t ? stbi__extend_receive(j, t) : 0;
 
       dc = j->img_comp[b].dc_pred + diff;
       j->img_comp[b].dc_pred = dc;
-      data[0] = (short) (dc << j->succ_low);
+      data[0] = (short) (dc * (1 << j->succ_low));
    } else {
       // refinement scan for DC coefficient
       if (stbi__jpeg_get_bit(j))
@@ -1821,7 +2285,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
             j->code_buffer <<= s;
             j->code_bits -= s;
             zig = stbi__jpeg_dezigzag[k++];
-            data[zig] = (short) ((r >> 8) << shift);
+            data[zig] = (short) ((r >> 8) * (1 << shift));
          } else {
             int rs = stbi__jpeg_huff_decode(j, hac);
             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
@@ -1839,7 +2303,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__
             } else {
                k += r;
                zig = stbi__jpeg_dezigzag[k++];
-               data[zig] = (short) (stbi__extend_receive(j,s) << shift);
+               data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
             }
          }
       } while (k <= j->spec_end);
@@ -1926,7 +2390,7 @@ stbi_inline static stbi_uc stbi__clamp(int x)
 }
 
 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
-#define stbi__fsh(x)  ((x) << 12)
+#define stbi__fsh(x)  ((x) * 4096)
 
 // derived from jidctint -- DCT_ISLOW
 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
@@ -1981,7 +2445,7 @@ static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
          //    (1|2|3|4|5|6|7)==0          0     seconds
          //    all separate               -0.047 seconds
          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
-         int dcterm = d[0] << 2;
+         int dcterm = d[0]*4;
          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
       } else {
          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
@@ -2425,7 +2889,7 @@ static stbi_uc stbi__get_marker(stbi__jpeg *j)
    x = stbi__get8(j->s);
    if (x != 0xff) return STBI__MARKER_none;
    while (x == 0xff)
-      x = stbi__get8(j->s);
+      x = stbi__get8(j->s); // consume repeated 0xff fill bytes
    return x;
 }
 
@@ -2440,7 +2904,7 @@ static void stbi__jpeg_reset(stbi__jpeg *j)
    j->code_bits = 0;
    j->code_buffer = 0;
    j->nomore = 0;
-   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
+   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
    j->marker = STBI__MARKER_none;
    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
    j->eob_run = 0;
@@ -2572,7 +3036,7 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
    }
 }
 
-static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant)
+static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
 {
    int i;
    for (i=0; i < 64; ++i)
@@ -2614,13 +3078,14 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
          L = stbi__get16be(z->s)-2;
          while (L > 0) {
             int q = stbi__get8(z->s);
-            int p = q >> 4;
+            int p = q >> 4, sixteen = (p != 0);
             int t = q & 15,i;
-            if (p != 0) return stbi__err("bad DQT type","Corrupt JPEG");
+            if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
+
             for (i=0; i < 64; ++i)
-               z->dequant[t][stbi__jpeg_dezigzag[i]] = stbi__get8(z->s);
-            L -= 65;
+               z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
+            L -= (sixteen ? 129 : 65);
          }
          return L==0;
 
@@ -2653,12 +3118,50 @@ static int stbi__process_marker(stbi__jpeg *z, int m)
          }
          return L==0;
    }
+
    // check for comment block or APP blocks
    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
-      stbi__skip(z->s, stbi__get16be(z->s)-2);
+      L = stbi__get16be(z->s);
+      if (L < 2) {
+         if (m == 0xFE)
+            return stbi__err("bad COM len","Corrupt JPEG");
+         else
+            return stbi__err("bad APP len","Corrupt JPEG");
+      }
+      L -= 2;
+
+      if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
+         static const unsigned char tag[5] = {'J','F','I','F','\0'};
+         int ok = 1;
+         int i;
+         for (i=0; i < 5; ++i)
+            if (stbi__get8(z->s) != tag[i])
+               ok = 0;
+         L -= 5;
+         if (ok)
+            z->jfif = 1;
+      } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
+         static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
+         int ok = 1;
+         int i;
+         for (i=0; i < 6; ++i)
+            if (stbi__get8(z->s) != tag[i])
+               ok = 0;
+         L -= 6;
+         if (ok) {
+            stbi__get8(z->s); // version
+            stbi__get16be(z->s); // flags0
+            stbi__get16be(z->s); // flags1
+            z->app14_color_transform = stbi__get8(z->s); // color transform
+            L -= 6;
+         }
+      }
+
+      stbi__skip(z->s, L);
       return 1;
    }
-   return 0;
+
+   return stbi__err("unknown marker","Corrupt JPEG");
 }
 
 // after we see SOS
@@ -2701,6 +3204,28 @@ static int stbi__process_scan_header(stbi__jpeg *z)
    return 1;
 }
 
+static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
+{
+   int i;
+   for (i=0; i < ncomp; ++i) {
+      if (z->img_comp[i].raw_data) {
+         STBI_FREE(z->img_comp[i].raw_data);
+         z->img_comp[i].raw_data = NULL;
+         z->img_comp[i].data = NULL;
+      }
+      if (z->img_comp[i].raw_coeff) {
+         STBI_FREE(z->img_comp[i].raw_coeff);
+         z->img_comp[i].raw_coeff = 0;
+         z->img_comp[i].coeff = 0;
+      }
+      if (z->img_comp[i].linebuf) {
+         STBI_FREE(z->img_comp[i].linebuf);
+         z->img_comp[i].linebuf = NULL;
+      }
+   }
+   return why;
+}
+
 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 {
    stbi__context *s = z->s;
@@ -2709,8 +3234,10 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
    c = stbi__get8(s);
-   if (c != 3 && c != 1) return stbi__err("bad component count","Corrupt JPEG");    // JFIF requires
+   if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
    s->img_n = c;
    for (i=0; i < c; ++i) {
       z->img_comp[i].data = NULL;
@@ -2721,15 +3248,10 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 
    z->rgb = 0;
    for (i=0; i < s->img_n; ++i) {
-      static unsigned char rgb[3] = { 'R', 'G', 'B' };
+      static const unsigned char rgb[3] = { 'R', 'G', 'B' };
       z->img_comp[i].id = stbi__get8(s);
-      if (z->img_comp[i].id != i+1)   // JFIF requires
-         if (z->img_comp[i].id != i) {  // some version of jpegtran outputs non-JFIF-compliant files!
-            // somethings output this (see http://fileformats.archiveteam.org/wiki/JPEG#Color_format)
-            if (z->img_comp[i].id != rgb[i])
-               return stbi__err("bad component ID","Corrupt JPEG");
-            ++z->rgb;
-         }
+      if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
+         ++z->rgb;
       q = stbi__get8(s);
       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
@@ -2738,18 +3260,26 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 
    if (scan != STBI__SCAN_load) return 1;
 
-   if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+   if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
 
    for (i=0; i < s->img_n; ++i) {
       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
    }
 
+   // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
+   // and I've never seen a non-corrupted JPEG file actually use them
+   for (i=0; i < s->img_n; ++i) {
+      if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
+      if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
+   }
+
    // compute interleaved mcu info
    z->img_h_max = h_max;
    z->img_v_max = v_max;
    z->img_mcu_w = h_max * 8;
    z->img_mcu_h = v_max * 8;
+   // these sizes can't be more than 17 bits
    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
 
@@ -2761,28 +3291,27 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
       // the bogus oversized data from using interleaved MCUs and their
       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
       // discard the extra data until colorspace conversion
+      //
+      // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
+      // so these muls can't overflow with 32-bit ints (which we require)
       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
-      z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
-
-      if (z->img_comp[i].raw_data == NULL) {
-         for(--i; i >= 0; --i) {
-            STBI_FREE(z->img_comp[i].raw_data);
-            z->img_comp[i].raw_data = NULL;
-         }
-         return stbi__err("outofmem", "Out of memory");
-      }
+      z->img_comp[i].coeff = 0;
+      z->img_comp[i].raw_coeff = 0;
+      z->img_comp[i].linebuf = NULL;
+      z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
+      if (z->img_comp[i].raw_data == NULL)
+         return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
       // align blocks for idct using mmx/sse
       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
-      z->img_comp[i].linebuf = NULL;
       if (z->progressive) {
-         z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3;
-         z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3;
-         z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15);
+         // w2, h2 are multiples of 8 (see above)
+         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
+         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
+         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
+         if (z->img_comp[i].raw_coeff == NULL)
+            return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
-      } else {
-         z->img_comp[i].coeff = 0;
-         z->img_comp[i].raw_coeff = 0;
       }
    }
 
@@ -2801,6 +3330,8 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan)
 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
 {
    int m;
+   z->jfif = 0;
+   z->app14_color_transform = -1; // valid values are 0,1,2
    z->marker = STBI__MARKER_none; // initialize cached marker to empty
    m = stbi__get_marker(z);
    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
@@ -2842,12 +3373,15 @@ static int stbi__decode_jpeg_image(stbi__jpeg *j)
                if (x == 255) {
                   j->marker = stbi__get8(j->s);
                   break;
-               } else if (x != 0) {
-                  return stbi__err("junk before marker", "Corrupt JPEG");
                }
             }
             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
          }
+      } else if (stbi__DNL(m)) {
+         int Ld = stbi__get16be(j->s);
+         stbi__uint32 NL = stbi__get16be(j->s);
+         if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
+         if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
       } else {
          if (!stbi__process_marker(j, m)) return 0;
       }
@@ -3066,38 +3600,9 @@ static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_
    return out;
 }
 
-#ifdef STBI_JPEG_OLD
-// this is the same YCbCr-to-RGB calculation that stb_image has used
-// historically before the algorithm changes in 1.49
-#define float2fixed(x)  ((int) ((x) * 65536 + 0.5))
-static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
-{
-   int i;
-   for (i=0; i < count; ++i) {
-      int y_fixed = (y[i] << 16) + 32768; // rounding
-      int r,g,b;
-      int cr = pcr[i] - 128;
-      int cb = pcb[i] - 128;
-      r = y_fixed + cr*float2fixed(1.40200f);
-      g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
-      b = y_fixed                            + cb*float2fixed(1.77200f);
-      r >>= 16;
-      g >>= 16;
-      b >>= 16;
-      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
-      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
-      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
-      out[0] = (stbi_uc)r;
-      out[1] = (stbi_uc)g;
-      out[2] = (stbi_uc)b;
-      out[3] = 255;
-      out += step;
-   }
-}
-#else
 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
 // to make sure the code produces the same results in both SIMD and scalar
-#define float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
+#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
 {
    int i;
@@ -3106,9 +3611,9 @@ static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc
       int r,g,b;
       int cr = pcr[i] - 128;
       int cb = pcb[i] - 128;
-      r = y_fixed +  cr* float2fixed(1.40200f);
-      g = y_fixed + (cr*-float2fixed(0.71414f)) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
-      b = y_fixed                               +   cb* float2fixed(1.77200f);
+      r = y_fixed +  cr* stbi__float2fixed(1.40200f);
+      g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
       r >>= 20;
       g >>= 20;
       b >>= 20;
@@ -3122,7 +3627,6 @@ static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc
       out += step;
    }
 }
-#endif
 
 #if defined(STBI_SSE2) || defined(STBI_NEON)
 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
@@ -3241,9 +3745,9 @@ static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc cons
       int r,g,b;
       int cr = pcr[i] - 128;
       int cb = pcb[i] - 128;
-      r = y_fixed + cr* float2fixed(1.40200f);
-      g = y_fixed + cr*-float2fixed(0.71414f) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
-      b = y_fixed                             +   cb* float2fixed(1.77200f);
+      r = y_fixed + cr* stbi__float2fixed(1.40200f);
+      g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
       r >>= 20;
       g >>= 20;
       b >>= 20;
@@ -3269,18 +3773,14 @@ static void stbi__setup_jpeg(stbi__jpeg *j)
 #ifdef STBI_SSE2
    if (stbi__sse2_available()) {
       j->idct_block_kernel = stbi__idct_simd;
-      #ifndef STBI_JPEG_OLD
       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-      #endif
       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
    }
 #endif
 
 #ifdef STBI_NEON
    j->idct_block_kernel = stbi__idct_simd;
-   #ifndef STBI_JPEG_OLD
    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-   #endif
    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
 #endif
 }
@@ -3288,23 +3788,7 @@ static void stbi__setup_jpeg(stbi__jpeg *j)
 // clean up the temporary component buffers
 static void stbi__cleanup_jpeg(stbi__jpeg *j)
 {
-   int i;
-   for (i=0; i < j->s->img_n; ++i) {
-      if (j->img_comp[i].raw_data) {
-         STBI_FREE(j->img_comp[i].raw_data);
-         j->img_comp[i].raw_data = NULL;
-         j->img_comp[i].data = NULL;
-      }
-      if (j->img_comp[i].raw_coeff) {
-         STBI_FREE(j->img_comp[i].raw_coeff);
-         j->img_comp[i].raw_coeff = 0;
-         j->img_comp[i].coeff = 0;
-      }
-      if (j->img_comp[i].linebuf) {
-         STBI_FREE(j->img_comp[i].linebuf);
-         j->img_comp[i].linebuf = NULL;
-      }
-   }
+   stbi__free_jpeg_components(j, j->s->img_n, 0);
 }
 
 typedef struct
@@ -3317,9 +3801,16 @@ typedef struct
    int ypos;    // which pre-expansion row we're on
 } stbi__resample;
 
+// fast 0..255 * 0..255 => 0..255 rounded multiplication
+static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
+{
+   unsigned int t = x*y + 128;
+   return (stbi_uc) ((t + (t >>8)) >> 8);
+}
+
 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
 {
-   int n, decode_n;
+   int n, decode_n, is_rgb;
    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
 
    // validate req_comp
@@ -3329,19 +3820,25 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
 
    // determine actual number of components to generate
-   n = req_comp ? req_comp : z->s->img_n;
+   n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
 
-   if (z->s->img_n == 3 && n < 3)
+   is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
+
+   if (z->s->img_n == 3 && n < 3 && !is_rgb)
       decode_n = 1;
    else
       decode_n = z->s->img_n;
 
+   // nothing to do if no components requested; check this now to avoid
+   // accessing uninitialized coutput[0] later
+   if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
+
    // resample and color-convert
    {
       int k;
       unsigned int i,j;
       stbi_uc *output;
-      stbi_uc *coutput[4];
+      stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
 
       stbi__resample res_comp[4];
 
@@ -3368,7 +3865,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
       }
 
       // can't error after this so, this is safe
-      output = (stbi_uc *) stbi__malloc(n * z->s->img_x * z->s->img_y + 1);
+      output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
 
       // now go ahead and resample
@@ -3391,7 +3888,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
          if (n >= 3) {
             stbi_uc *y = coutput[0];
             if (z->s->img_n == 3) {
-               if (z->rgb == 3) {
+               if (is_rgb) {
                   for (i=0; i < z->s->img_x; ++i) {
                      out[0] = y[i];
                      out[1] = coutput[1][i];
@@ -3402,6 +3899,28 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
                } else {
                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
                }
+            } else if (z->s->img_n == 4) {
+               if (z->app14_color_transform == 0) { // CMYK
+                  for (i=0; i < z->s->img_x; ++i) {
+                     stbi_uc m = coutput[3][i];
+                     out[0] = stbi__blinn_8x8(coutput[0][i], m);
+                     out[1] = stbi__blinn_8x8(coutput[1][i], m);
+                     out[2] = stbi__blinn_8x8(coutput[2][i], m);
+                     out[3] = 255;
+                     out += n;
+                  }
+               } else if (z->app14_color_transform == 2) { // YCCK
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+                  for (i=0; i < z->s->img_x; ++i) {
+                     stbi_uc m = coutput[3][i];
+                     out[0] = stbi__blinn_8x8(255 - out[0], m);
+                     out[1] = stbi__blinn_8x8(255 - out[1], m);
+                     out[2] = stbi__blinn_8x8(255 - out[2], m);
+                     out += n;
+                  }
+               } else { // YCbCr + alpha?  Ignore the fourth channel for now
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+               }
             } else
                for (i=0; i < z->s->img_x; ++i) {
                   out[0] = out[1] = out[2] = y[i];
@@ -3409,25 +3928,55 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp
                   out += n;
                }
          } else {
-            stbi_uc *y = coutput[0];
-            if (n == 1)
-               for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
-            else
-               for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
+            if (is_rgb) {
+               if (n == 1)
+                  for (i=0; i < z->s->img_x; ++i)
+                     *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+               else {
+                  for (i=0; i < z->s->img_x; ++i, out += 2) {
+                     out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+                     out[1] = 255;
+                  }
+               }
+            } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
+               for (i=0; i < z->s->img_x; ++i) {
+                  stbi_uc m = coutput[3][i];
+                  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
+                  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
+                  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
+                  out[0] = stbi__compute_y(r, g, b);
+                  out[1] = 255;
+                  out += n;
+               }
+            } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
+               for (i=0; i < z->s->img_x; ++i) {
+                  out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
+                  out[1] = 255;
+                  out += n;
+               }
+            } else {
+               stbi_uc *y = coutput[0];
+               if (n == 1)
+                  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
+               else
+                  for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
+            }
          }
       }
       stbi__cleanup_jpeg(z);
       *out_x = z->s->img_x;
       *out_y = z->s->img_y;
-      if (comp) *comp  = z->s->img_n; // report original components, not output
+      if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
       return output;
    }
 }
 
-static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    unsigned char* result;
    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
+   if (!j) return stbi__errpuc("outofmem", "Out of memory");
+   STBI_NOTUSED(ri);
    j->s = s;
    stbi__setup_jpeg(j);
    result = load_jpeg_image(j, x,y,comp,req_comp);
@@ -3438,11 +3987,13 @@ static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *com
 static int stbi__jpeg_test(stbi__context *s)
 {
    int r;
-   stbi__jpeg j;
-   j.s = s;
-   stbi__setup_jpeg(&j);
-   r = stbi__decode_jpeg_header(&j, STBI__SCAN_type);
+   stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+   if (!j) return stbi__err("outofmem", "Out of memory");
+   j->s = s;
+   stbi__setup_jpeg(j);
+   r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
    stbi__rewind(s);
+   STBI_FREE(j);
    return r;
 }
 
@@ -3454,7 +4005,7 @@ static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
    }
    if (x) *x = j->s->img_x;
    if (y) *y = j->s->img_y;
-   if (comp) *comp = j->s->img_n;
+   if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
    return 1;
 }
 
@@ -3462,6 +4013,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
 {
    int result;
    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
+   if (!j) return stbi__err("outofmem", "Out of memory");
    j->s = s;
    result = stbi__jpeg_info_raw(j, x, y, comp);
    STBI_FREE(j);
@@ -3481,6 +4033,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
 // fast-way is faster to check than jpeg huffman, but slow way is slower
 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
+#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
 
 // zlib-style huffman encoding
 // (jpegs packs from left, zlib from right, so can't share code)
@@ -3490,8 +4043,8 @@ typedef struct
    stbi__uint16 firstcode[16];
    int maxcode[17];
    stbi__uint16 firstsymbol[16];
-   stbi_uc  size[288];
-   stbi__uint16 value[288];
+   stbi_uc  size[STBI__ZNSYMS];
+   stbi__uint16 value[STBI__ZNSYMS];
 } stbi__zhuffman;
 
 stbi_inline static int stbi__bitreverse16(int n)
@@ -3511,7 +4064,7 @@ stbi_inline static int stbi__bit_reverse(int v, int bits)
    return stbi__bitreverse16(v) >> (16-bits);
 }
 
-static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
+static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
 {
    int i,k=0;
    int code, next_code[16], sizes[17];
@@ -3578,16 +4131,23 @@ typedef struct
    stbi__zhuffman z_length, z_distance;
 } stbi__zbuf;
 
+stbi_inline static int stbi__zeof(stbi__zbuf *z)
+{
+   return (z->zbuffer >= z->zbuffer_end);
+}
+
 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
 {
-   if (z->zbuffer >= z->zbuffer_end) return 0;
-   return *z->zbuffer++;
+   return stbi__zeof(z) ? 0 : *z->zbuffer++;
 }
 
 static void stbi__fill_bits(stbi__zbuf *z)
 {
    do {
-      STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
+      if (z->code_buffer >= (1U << z->num_bits)) {
+        z->zbuffer = z->zbuffer_end;  /* treat this as EOF so we fail. */
+        return;
+      }
       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
       z->num_bits += 8;
    } while (z->num_bits <= 24);
@@ -3612,10 +4172,11 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
    for (s=STBI__ZFAST_BITS+1; ; ++s)
       if (k < z->maxcode[s])
          break;
-   if (s == 16) return -1; // invalid code!
+   if (s >= 16) return -1; // invalid code!
    // code size is s, so:
    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
-   STBI_ASSERT(z->size[b] == s);
+   if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
+   if (z->size[b] != s) return -1;  // was originally an assert, but report failure instead.
    a->code_buffer >>= s;
    a->num_bits -= s;
    return z->value[b];
@@ -3624,7 +4185,12 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
 {
    int b,s;
-   if (a->num_bits < 16) stbi__fill_bits(a);
+   if (a->num_bits < 16) {
+      if (stbi__zeof(a)) {
+         return -1;   /* report error for unexpected end of data. */
+      }
+      stbi__fill_bits(a);
+   }
    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
    if (b) {
       s = b >> 9;
@@ -3638,13 +4204,16 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
 {
    char *q;
-   int cur, limit, old_limit;
+   unsigned int cur, limit, old_limit;
    z->zout = zout;
    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
-   cur   = (int) (z->zout     - z->zout_start);
-   limit = old_limit = (int) (z->zout_end - z->zout_start);
-   while (cur + n > limit)
+   cur   = (unsigned int) (z->zout - z->zout_start);
+   limit = old_limit = (unsigned) (z->zout_end - z->zout_start);
+   if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory");
+   while (cur + n > limit) {
+      if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory");
       limit *= 2;
+   }
    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
    STBI_NOTUSED(old_limit);
    if (q == NULL) return stbi__err("outofmem", "Out of memory");
@@ -3654,18 +4223,18 @@ static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room
    return 1;
 }
 
-static int stbi__zlength_base[31] = {
+static const int stbi__zlength_base[31] = {
    3,4,5,6,7,8,9,10,11,13,
    15,17,19,23,27,31,35,43,51,59,
    67,83,99,115,131,163,195,227,258,0,0 };
 
-static int stbi__zlength_extra[31]=
+static const int stbi__zlength_extra[31]=
 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
 
-static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
 
-static int stbi__zdist_extra[32] =
+static const int stbi__zdist_extra[32] =
 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
 
 static int stbi__parse_huffman_block(stbi__zbuf *a)
@@ -3712,7 +4281,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a)
 
 static int stbi__compute_huffman_codes(stbi__zbuf *a)
 {
-   static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+   static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
    stbi__zhuffman z_codelength;
    stbi_uc lencodes[286+32+137];//padding for maximum single op
    stbi_uc codelength_sizes[19];
@@ -3721,6 +4290,7 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a)
    int hlit  = stbi__zreceive(a,5) + 257;
    int hdist = stbi__zreceive(a,5) + 1;
    int hclen = stbi__zreceive(a,4) + 4;
+   int ntot  = hlit + hdist;
 
    memset(codelength_sizes, 0, sizeof(codelength_sizes));
    for (i=0; i < hclen; ++i) {
@@ -3730,27 +4300,30 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a)
    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
 
    n = 0;
-   while (n < hlit + hdist) {
+   while (n < ntot) {
       int c = stbi__zhuffman_decode(a, &z_codelength);
       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
       if (c < 16)
          lencodes[n++] = (stbi_uc) c;
-      else if (c == 16) {
-         c = stbi__zreceive(a,2)+3;
-         memset(lencodes+n, lencodes[n-1], c);
-         n += c;
-      } else if (c == 17) {
-         c = stbi__zreceive(a,3)+3;
-         memset(lencodes+n, 0, c);
-         n += c;
-      } else {
-         STBI_ASSERT(c == 18);
-         c = stbi__zreceive(a,7)+11;
-         memset(lencodes+n, 0, c);
+      else {
+         stbi_uc fill = 0;
+         if (c == 16) {
+            c = stbi__zreceive(a,2)+3;
+            if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
+            fill = lencodes[n-1];
+         } else if (c == 17) {
+            c = stbi__zreceive(a,3)+3;
+         } else if (c == 18) {
+            c = stbi__zreceive(a,7)+11;
+         } else {
+            return stbi__err("bad codelengths", "Corrupt PNG");
+         }
+         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
+         memset(lencodes+n, fill, c);
          n += c;
       }
    }
-   if (n != hlit+hdist) return stbi__err("bad codelengths","Corrupt PNG");
+   if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
    return 1;
@@ -3769,7 +4342,7 @@ static int stbi__parse_uncompressed_block(stbi__zbuf *a)
       a->code_buffer >>= 8;
       a->num_bits -= 8;
    }
-   STBI_ASSERT(a->num_bits == 0);
+   if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG");
    // now fill header the normal way
    while (k < 4)
       header[k++] = stbi__zget8(a);
@@ -3791,6 +4364,7 @@ static int stbi__parse_zlib_header(stbi__zbuf *a)
    int cm    = cmf & 15;
    /* int cinfo = cmf >> 4; */
    int flg   = stbi__zget8(a);
+   if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
@@ -3798,9 +4372,24 @@ static int stbi__parse_zlib_header(stbi__zbuf *a)
    return 1;
 }
 
-// @TODO: should statically initialize these for optimal thread safety
-static stbi_uc stbi__zdefault_length[288], stbi__zdefault_distance[32];
-static void stbi__init_zdefaults(void)
+static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
+{
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
+};
+static const stbi_uc stbi__zdefault_distance[32] =
+{
+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
+};
+/*
+Init algorithm:
 {
    int i;   // use <= to match clearly with spec
    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
@@ -3810,6 +4399,7 @@ static void stbi__init_zdefaults(void)
 
    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
 }
+*/
 
 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
 {
@@ -3828,8 +4418,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
       } else {
          if (type == 1) {
             // use fixed code lengths
-            if (!stbi__zdefault_distance[31]) stbi__init_zdefaults();
-            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
+            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , STBI__ZNSYMS)) return 0;
             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
          } else {
             if (!stbi__compute_huffman_codes(a)) return 0;
@@ -3953,7 +4542,7 @@ static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
 
 static int stbi__check_png_header(stbi__context *s)
 {
-   static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
+   static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
    int i;
    for (i=0; i < 8; ++i)
       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
@@ -3999,7 +4588,7 @@ static int stbi__paeth(int a, int b, int c)
    return c;
 }
 
-static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
 
 // create the png data from post-deflated data
 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
@@ -4016,31 +4605,33 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
    int width = x;
 
    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
-   a->out = (stbi_uc *) stbi__malloc(x * y * output_bytes); // extra bytes to write off the end into
+   a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
    if (!a->out) return stbi__err("outofmem", "Out of memory");
 
+   if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
    img_len = (img_width_bytes + 1) * y;
-   if (s->img_x == x && s->img_y == y) {
-      if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG");
-   } else { // interlaced:
-      if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
-   }
+
+   // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
+   // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
+   // so just check for raw_len < img_len always.
+   if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
 
    for (j=0; j < y; ++j) {
       stbi_uc *cur = a->out + stride*j;
-      stbi_uc *prior = cur - stride;
+      stbi_uc *prior;
       int filter = *raw++;
 
       if (filter > 4)
          return stbi__err("invalid filter","Corrupt PNG");
 
       if (depth < 8) {
-         STBI_ASSERT(img_width_bytes <= x);
+         if (img_width_bytes > x) return stbi__err("invalid width","Corrupt PNG");
          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
          filter_bytes = 1;
          width = img_width_bytes;
       }
+      prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
 
       // if first row, use special filter that doesn't sample previous row
       if (j == 0) filter = first_row_filter[filter];
@@ -4081,37 +4672,37 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
       // this is a little gross, so that we don't switch per-pixel or per-component
       if (depth < 8 || img_n == out_n) {
          int nk = (width - 1)*filter_bytes;
-         #define CASE(f) \
+         #define STBI__CASE(f) \
              case f:     \
                 for (k=0; k < nk; ++k)
          switch (filter) {
             // "none" filter turns into a memcpy here; make that explicit.
             case STBI__F_none:         memcpy(cur, raw, nk); break;
-            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
-            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
-            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
-            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
-            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
+            STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
+            STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+            STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
+            STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
+            STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
+            STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
          }
-         #undef CASE
+         #undef STBI__CASE
          raw += nk;
       } else {
          STBI_ASSERT(img_n+1 == out_n);
-         #define CASE(f) \
+         #define STBI__CASE(f) \
              case f:     \
                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
                    for (k=0; k < filter_bytes; ++k)
          switch (filter) {
-            CASE(STBI__F_none)         cur[k] = raw[k]; break;
-            CASE(STBI__F_sub)          cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); break;
-            CASE(STBI__F_up)           cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
-            CASE(STBI__F_avg)          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); break;
-            CASE(STBI__F_paeth)        cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); break;
-            CASE(STBI__F_avg_first)    cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); break;
-            CASE(STBI__F_paeth_first)  cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); break;
+            STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
+            STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
+            STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+            STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
+            STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
+            STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
+            STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
          }
-         #undef CASE
+         #undef STBI__CASE
 
          // the loop above sets the high byte of the pixels' alpha, but for
          // 16 bit png files we also need the low byte set. we'll do that here.
@@ -4214,13 +4805,16 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r
 
 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
 {
+   int bytes = (depth == 16 ? 2 : 1);
+   int out_bytes = out_n * bytes;
    stbi_uc *final;
    int p;
    if (!interlaced)
       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
 
    // de-interlacing
-   final = (stbi_uc *) stbi__malloc(a->s->img_x * a->s->img_y * out_n);
+   final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
+   if (!final) return stbi__err("outofmem", "Out of memory");
    for (p=0; p < 7; ++p) {
       int xorig[] = { 0,4,0,2,0,1,0 };
       int yorig[] = { 0,0,4,0,2,0,1 };
@@ -4240,8 +4834,8 @@ static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint3
             for (i=0; i < x; ++i) {
                int out_y = j*yspc[p]+yorig[p];
                int out_x = i*xspc[p]+xorig[p];
-               memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n,
-                      a->out + (j*x+i)*out_n, out_n);
+               memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
+                      a->out + (j*x+i)*out_bytes, out_bytes);
             }
          }
          STBI_FREE(a->out);
@@ -4309,7 +4903,7 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
    stbi_uc *p, *temp_out, *orig = a->out;
 
-   p = (stbi_uc *) stbi__malloc(pixel_count * pal_img_n);
+   p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
    if (p == NULL) return stbi__err("outofmem", "Out of memory");
 
    // between here and free(out) below, exitting would leak
@@ -4341,39 +4935,46 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int
    return 1;
 }
 
-static int stbi__reduce_png(stbi__png *p)
-{
-   int i;
-   int img_len = p->s->img_x * p->s->img_y * p->s->img_out_n;
-   stbi_uc *reduced;
-   stbi__uint16 *orig = (stbi__uint16*)p->out;
-
-   if (p->depth != 16) return 1; // don't need to do anything if not 16-bit data
-
-   reduced = (stbi_uc *)stbi__malloc(img_len);
-   if (p == NULL) return stbi__err("outofmem", "Out of memory");
-
-   for (i = 0; i < img_len; ++i) reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is a decent approx of 16->8 bit scaling
+static int stbi__unpremultiply_on_load_global = 0;
+static int stbi__de_iphone_flag_global = 0;
 
-   p->out = reduced;
-   STBI_FREE(orig);
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+{
+   stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
+}
 
-   return 1;
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+{
+   stbi__de_iphone_flag_global = flag_true_if_should_convert;
 }
 
-static int stbi__unpremultiply_on_load = 0;
-static int stbi__de_iphone_flag = 0;
+#ifndef STBI_THREAD_LOCAL
+#define stbi__unpremultiply_on_load  stbi__unpremultiply_on_load_global
+#define stbi__de_iphone_flag  stbi__de_iphone_flag_global
+#else
+static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
+static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
 
-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+STBIDEF void stbi__unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
 {
-   stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
+   stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
+   stbi__unpremultiply_on_load_set = 1;
 }
 
-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
 {
-   stbi__de_iphone_flag = flag_true_if_should_convert;
+   stbi__de_iphone_flag_local = flag_true_if_should_convert;
+   stbi__de_iphone_flag_set = 1;
 }
 
+#define stbi__unpremultiply_on_load  (stbi__unpremultiply_on_load_set           \
+                                       ? stbi__unpremultiply_on_load_local      \
+                                       : stbi__unpremultiply_on_load_global)
+#define stbi__de_iphone_flag  (stbi__de_iphone_flag_set                         \
+                                ? stbi__de_iphone_flag_local                    \
+                                : stbi__de_iphone_flag_global)
+#endif // STBI_THREAD_LOCAL
+
 static void stbi__de_iphone(stbi__png *z)
 {
    stbi__context *s = z->s;
@@ -4395,9 +4996,10 @@ static void stbi__de_iphone(stbi__png *z)
             stbi_uc a = p[3];
             stbi_uc t = p[0];
             if (a) {
-               p[0] = p[2] * 255 / a;
-               p[1] = p[1] * 255 / a;
-               p[2] =  t   * 255 / a;
+               stbi_uc half = a / 2;
+               p[0] = (p[2] * 255 + half) / a;
+               p[1] = (p[1] * 255 + half) / a;
+               p[2] = ( t   * 255 + half) / a;
             } else {
                p[0] = p[2];
                p[2] = t;
@@ -4416,12 +5018,12 @@ static void stbi__de_iphone(stbi__png *z)
    }
 }
 
-#define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
+#define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
 
 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
 {
    stbi_uc palette[1024], pal_img_n=0;
-   stbi_uc has_trans=0, tc[3];
+   stbi_uc has_trans=0, tc[3]={0};
    stbi__uint16 tc16[3];
    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
    int first=1,k,interlace=0, color=0, is_iphone=0;
@@ -4447,11 +5049,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
             first = 0;
             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
-            s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
-            s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
+            s->img_x = stbi__get32be(s);
+            s->img_y = stbi__get32be(s);
+            if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+            if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
-			if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
+            if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
@@ -4500,7 +5104,7 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
                has_trans = 1;
                if (z->depth == 16) {
-                  for (k = 0; k < s->img_n; ++k) tc16[k] = stbi__get16be(s); // copy the values as-is
+                  for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
                } else {
                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
                }
@@ -4560,8 +5164,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
                if (req_comp >= 3) s->img_out_n = req_comp;
                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
                   return 0;
+            } else if (has_trans) {
+               // non-paletted image with tRNS -> source image has (constant) alpha
+               ++s->img_n;
             }
             STBI_FREE(z->expanded); z->expanded = NULL;
+            // end of PNG chunk, read and skip CRC
+            stbi__get32be(s);
             return 1;
          }
 
@@ -4587,20 +5196,24 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
    }
 }
 
-static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp)
+static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
 {
-   unsigned char *result=NULL;
+   void *result=NULL;
    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
-      if (p->depth == 16) {
-         if (!stbi__reduce_png(p)) {
-            return result;
-         }
-      }
+      if (p->depth <= 8)
+         ri->bits_per_channel = 8;
+      else if (p->depth == 16)
+         ri->bits_per_channel = 16;
+      else
+         return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
       result = p->out;
       p->out = NULL;
       if (req_comp && req_comp != p->s->img_out_n) {
-         result = stbi__convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         if (ri->bits_per_channel == 8)
+            result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         else
+            result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
          p->s->img_out_n = req_comp;
          if (result == NULL) return result;
       }
@@ -4615,11 +5228,11 @@ static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req
    return result;
 }
 
-static unsigned char *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi__png p;
    p.s = s;
-   return stbi__do_png(&p, x,y,comp,req_comp);
+   return stbi__do_png(&p, x,y,comp,req_comp, ri);
 }
 
 static int stbi__png_test(stbi__context *s)
@@ -4648,6 +5261,19 @@ static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
    p.s = s;
    return stbi__png_info_raw(&p, x, y, comp);
 }
+
+static int stbi__png_is16(stbi__context *s)
+{
+   stbi__png p;
+   p.s = s;
+   if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
+	   return 0;
+   if (p.depth != 16) {
+      stbi__rewind(p.s);
+      return 0;
+   }
+   return 1;
+}
 #endif
 
 // Microsoft/Windows BMP image
@@ -4681,11 +5307,11 @@ static int stbi__high_bit(unsigned int z)
 {
    int n=0;
    if (z == 0) return -1;
-   if (z >= 0x10000) n += 16, z >>= 16;
-   if (z >= 0x00100) n +=  8, z >>=  8;
-   if (z >= 0x00010) n +=  4, z >>=  4;
-   if (z >= 0x00004) n +=  2, z >>=  2;
-   if (z >= 0x00002) n +=  1, z >>=  1;
+   if (z >= 0x10000) { n += 16; z >>= 16; }
+   if (z >= 0x00100) { n +=  8; z >>=  8; }
+   if (z >= 0x00010) { n +=  4; z >>=  4; }
+   if (z >= 0x00004) { n +=  2; z >>=  2; }
+   if (z >= 0x00002) { n +=  1;/* >>=  1;*/ }
    return n;
 }
 
@@ -4699,29 +5325,62 @@ static int stbi__bitcount(unsigned int a)
    return a & 0xff;
 }
 
-static int stbi__shiftsigned(int v, int shift, int bits)
-{
-   int result;
-   int z=0;
-
-   if (shift < 0) v <<= -shift;
-   else v >>= shift;
-   result = v;
-
-   z = bits;
-   while (z < 8) {
-      result += v >> z;
-      z += bits;
-   }
-   return result;
+// extract an arbitrarily-aligned N-bit value (N=bits)
+// from v, and then make it 8-bits long and fractionally
+// extend it to full full range.
+static int stbi__shiftsigned(unsigned int v, int shift, int bits)
+{
+   static unsigned int mul_table[9] = {
+      0,
+      0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
+      0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
+   };
+   static unsigned int shift_table[9] = {
+      0, 0,0,1,0,2,4,6,0,
+   };
+   if (shift < 0)
+      v <<= -shift;
+   else
+      v >>= shift;
+   STBI_ASSERT(v < 256);
+   v >>= (8-bits);
+   STBI_ASSERT(bits >= 0 && bits <= 8);
+   return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
 }
 
 typedef struct
 {
    int bpp, offset, hsz;
    unsigned int mr,mg,mb,ma, all_a;
+   int extra_read;
 } stbi__bmp_data;
 
+static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
+{
+   // BI_BITFIELDS specifies masks explicitly, don't override
+   if (compress == 3)
+      return 1;
+
+   if (compress == 0) {
+      if (info->bpp == 16) {
+         info->mr = 31u << 10;
+         info->mg = 31u <<  5;
+         info->mb = 31u <<  0;
+      } else if (info->bpp == 32) {
+         info->mr = 0xffu << 16;
+         info->mg = 0xffu <<  8;
+         info->mb = 0xffu <<  0;
+         info->ma = 0xffu << 24;
+         info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+      } else {
+         // otherwise, use defaults, which is all-0
+         info->mr = info->mg = info->mb = info->ma = 0;
+      }
+      return 1;
+   }
+   return 0; // error
+}
+
 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
 {
    int hsz;
@@ -4732,7 +5391,10 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
    info->offset = stbi__get32le(s);
    info->hsz = hsz = stbi__get32le(s);
    info->mr = info->mg = info->mb = info->ma = 0;
-   
+   info->extra_read = 14;
+
+   if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP");
+
    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
    if (hsz == 12) {
       s->img_x = stbi__get16le(s);
@@ -4743,10 +5405,11 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
    }
    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
    info->bpp = stbi__get16le(s);
-   if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
    if (hsz != 12) {
       int compress = stbi__get32le(s);
       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+      if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
+      if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
       stbi__get32le(s); // discard sizeof
       stbi__get32le(s); // discard hres
       stbi__get32le(s); // discard vres
@@ -4761,21 +5424,12 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
          }
          if (info->bpp == 16 || info->bpp == 32) {
             if (compress == 0) {
-               if (info->bpp == 32) {
-                  info->mr = 0xffu << 16;
-                  info->mg = 0xffu <<  8;
-                  info->mb = 0xffu <<  0;
-                  info->ma = 0xffu << 24;
-                  info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
-               } else {
-                  info->mr = 31u << 10;
-                  info->mg = 31u <<  5;
-                  info->mb = 31u <<  0;
-               }
+               stbi__bmp_set_mask_defaults(info, compress);
             } else if (compress == 3) {
                info->mr = stbi__get32le(s);
                info->mg = stbi__get32le(s);
                info->mb = stbi__get32le(s);
+               info->extra_read += 12;
                // not documented, but generated by photoshop and handled by mspaint
                if (info->mr == info->mg && info->mg == info->mb) {
                   // ?!?!?
@@ -4785,6 +5439,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
                return stbi__errpuc("bad BMP", "bad BMP");
          }
       } else {
+         // V4/V5 header
          int i;
          if (hsz != 108 && hsz != 124)
             return stbi__errpuc("bad BMP", "bad BMP");
@@ -4792,6 +5447,8 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
          info->mg = stbi__get32le(s);
          info->mb = stbi__get32le(s);
          info->ma = stbi__get32le(s);
+         if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
+            stbi__bmp_set_mask_defaults(info, compress);
          stbi__get32le(s); // discard color space
          for (i=0; i < 12; ++i)
             stbi__get32le(s); // discard color space parameters
@@ -4807,7 +5464,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
 }
 
 
-static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi_uc *out;
    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
@@ -4815,14 +5472,18 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
    int psize=0,i,j,width;
    int flip_vertically, pad, target;
    stbi__bmp_data info;
+   STBI_NOTUSED(ri);
 
-   info.all_a = 255;   
+   info.all_a = 255;
    if (stbi__bmp_parse_header(s, &info) == NULL)
       return NULL; // error code already set
 
    flip_vertically = ((int) s->img_y) > 0;
    s->img_y = abs((int) s->img_y);
 
+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
    mr = info.mr;
    mg = info.mg;
    mb = info.mb;
@@ -4831,19 +5492,31 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
 
    if (info.hsz == 12) {
       if (info.bpp < 24)
-         psize = (info.offset - 14 - 24) / 3;
+         psize = (info.offset - info.extra_read - 24) / 3;
    } else {
       if (info.bpp < 16)
-         psize = (info.offset - 14 - info.hsz) >> 2;
+         psize = (info.offset - info.extra_read - info.hsz) >> 2;
+   }
+   if (psize == 0) {
+      if (info.offset != s->callback_already_read + (s->img_buffer - s->img_buffer_original)) {
+        return stbi__errpuc("bad offset", "Corrupt BMP");
+      }
    }
 
-   s->img_n = ma ? 4 : 3;
+   if (info.bpp == 24 && ma == 0xff000000)
+      s->img_n = 3;
+   else
+      s->img_n = ma ? 4 : 3;
    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
       target = req_comp;
    else
       target = s->img_n; // if they want monochrome, we'll post-convert
 
-   out = (stbi_uc *) stbi__malloc(target * s->img_x * s->img_y);
+   // sanity-check size
+   if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
+      return stbi__errpuc("too large", "Corrupt BMP");
+
+   out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
    if (info.bpp < 16) {
       int z=0;
@@ -4855,36 +5528,56 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
          if (info.hsz != 12) stbi__get8(s);
          pal[i][3] = 255;
       }
-      stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
-      if (info.bpp == 4) width = (s->img_x + 1) >> 1;
+      stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
+      if (info.bpp == 1) width = (s->img_x + 7) >> 3;
+      else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
       else if (info.bpp == 8) width = s->img_x;
       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
       pad = (-width)&3;
-      for (j=0; j < (int) s->img_y; ++j) {
-         for (i=0; i < (int) s->img_x; i += 2) {
-            int v=stbi__get8(s),v2=0;
-            if (info.bpp == 4) {
-               v2 = v & 15;
-               v >>= 4;
+      if (info.bpp == 1) {
+         for (j=0; j < (int) s->img_y; ++j) {
+            int bit_offset = 7, v = stbi__get8(s);
+            for (i=0; i < (int) s->img_x; ++i) {
+               int color = (v>>bit_offset)&0x1;
+               out[z++] = pal[color][0];
+               out[z++] = pal[color][1];
+               out[z++] = pal[color][2];
+               if (target == 4) out[z++] = 255;
+               if (i+1 == (int) s->img_x) break;
+               if((--bit_offset) < 0) {
+                  bit_offset = 7;
+                  v = stbi__get8(s);
+               }
             }
-            out[z++] = pal[v][0];
-            out[z++] = pal[v][1];
-            out[z++] = pal[v][2];
-            if (target == 4) out[z++] = 255;
-            if (i+1 == (int) s->img_x) break;
-            v = (info.bpp == 8) ? stbi__get8(s) : v2;
-            out[z++] = pal[v][0];
-            out[z++] = pal[v][1];
-            out[z++] = pal[v][2];
-            if (target == 4) out[z++] = 255;
+            stbi__skip(s, pad);
+         }
+      } else {
+         for (j=0; j < (int) s->img_y; ++j) {
+            for (i=0; i < (int) s->img_x; i += 2) {
+               int v=stbi__get8(s),v2=0;
+               if (info.bpp == 4) {
+                  v2 = v & 15;
+                  v >>= 4;
+               }
+               out[z++] = pal[v][0];
+               out[z++] = pal[v][1];
+               out[z++] = pal[v][2];
+               if (target == 4) out[z++] = 255;
+               if (i+1 == (int) s->img_x) break;
+               v = (info.bpp == 8) ? stbi__get8(s) : v2;
+               out[z++] = pal[v][0];
+               out[z++] = pal[v][1];
+               out[z++] = pal[v][2];
+               if (target == 4) out[z++] = 255;
+            }
+            stbi__skip(s, pad);
          }
-         stbi__skip(s, pad);
       }
    } else {
       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
       int z = 0;
       int easy=0;
-      stbi__skip(s, info.offset - 14 - info.hsz);
+      stbi__skip(s, info.offset - info.extra_read - info.hsz);
       if (info.bpp == 24) width = 3 * s->img_x;
       else if (info.bpp == 16) width = 2*s->img_x;
       else /* bpp = 32 and pad = 0 */ width=0;
@@ -4902,6 +5595,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
+         if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
       }
       for (j=0; j < (int) s->img_y; ++j) {
          if (easy) {
@@ -4919,7 +5613,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
             int bpp = info.bpp;
             for (i=0; i < (int) s->img_x; ++i) {
                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
-               int a;
+               unsigned int a;
                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
@@ -4931,7 +5625,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
          stbi__skip(s, pad);
       }
    }
-   
+
    // if alpha channel is all 0s, replace with all 255s
    if (target == 4 && all_a == 0)
       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
@@ -4943,7 +5637,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
          stbi_uc *p1 = out +      j     *s->img_x*target;
          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
          for (i=0; i < (int) s->img_x*target; ++i) {
-            t = p1[i], p1[i] = p2[i], p2[i] = t;
+            t = p1[i]; p1[i] = p2[i]; p2[i] = t;
          }
       }
    }
@@ -4967,14 +5661,14 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int
 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
 {
    // only RGB or RGBA (incl. 16bit) or grey allowed
-   if(is_rgb16) *is_rgb16 = 0;
+   if (is_rgb16) *is_rgb16 = 0;
    switch(bits_per_pixel) {
       case 8:  return STBI_grey;
       case 16: if(is_grey) return STBI_grey_alpha;
-            // else: fall-through
+               // fallthrough
       case 15: if(is_rgb16) *is_rgb16 = 1;
-            return STBI_rgb;
-      case 24: // fall-through
+               return STBI_rgb;
+      case 24: // fallthrough
       case 32: return bits_per_pixel/8;
       default: return 0;
    }
@@ -5077,18 +5771,18 @@ static int stbi__tga_test(stbi__context *s)
 }
 
 // read 16bit value and convert to 24bit RGB
-void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
+static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
 {
-   stbi__uint16 px = stbi__get16le(s);
+   stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
    stbi__uint16 fiveBitMask = 31;
    // we have 3 channels with 5bits each
    int r = (px >> 10) & fiveBitMask;
    int g = (px >> 5) & fiveBitMask;
    int b = px & fiveBitMask;
    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
-   out[0] = (r * 255)/31;
-   out[1] = (g * 255)/31;
-   out[2] = (b * 255)/31;
+   out[0] = (stbi_uc)((r * 255)/31);
+   out[1] = (stbi_uc)((g * 255)/31);
+   out[2] = (stbi_uc)((b * 255)/31);
 
    // some people claim that the most significant bit might be used for alpha
    // (possibly if an alpha-bit is set in the "image descriptor byte")
@@ -5096,7 +5790,7 @@ void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
 }
 
-static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    //   read in the TGA header stuff
    int tga_offset = stbi__get8(s);
@@ -5118,10 +5812,16 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
    unsigned char *tga_data;
    unsigned char *tga_palette = NULL;
    int i, j;
-   unsigned char raw_data[4];
+   unsigned char raw_data[4] = {0};
    int RLE_count = 0;
    int RLE_repeating = 0;
    int read_next_pixel = 1;
+   STBI_NOTUSED(ri);
+   STBI_NOTUSED(tga_x_origin); // @TODO
+   STBI_NOTUSED(tga_y_origin); // @TODO
+
+   if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
 
    //   do a tiny bit of precessing
    if ( tga_image_type >= 8 )
@@ -5143,7 +5843,10 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
    *y = tga_height;
    if (comp) *comp = tga_comp;
 
-   tga_data = (unsigned char*)stbi__malloc( (size_t)tga_width * tga_height * tga_comp );
+   if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
+      return stbi__errpuc("too large", "Corrupt TGA");
+
+   tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
 
    // skip to the data's starting position (offset usually = 0)
@@ -5159,10 +5862,15 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
       //   do I need to load a palette?
       if ( tga_indexed)
       {
+         if (tga_palette_len == 0) {  /* you have to have at least one entry! */
+            STBI_FREE(tga_data);
+            return stbi__errpuc("bad palette", "Corrupt TGA");
+         }
+
          //   any data to skip? (offset usually = 0)
          stbi__skip(s, tga_palette_start );
          //   load the palette
-         tga_palette = (unsigned char*)stbi__malloc( tga_palette_len * tga_comp );
+         tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
          if (!tga_palette) {
             STBI_FREE(tga_data);
             return stbi__errpuc("outofmem", "Out of memory");
@@ -5282,6 +5990,7 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int
    //   Microsoft's C compilers happy... [8^(
    tga_palette_start = tga_palette_len = tga_palette_bits =
          tga_x_origin = tga_y_origin = 0;
+   STBI_NOTUSED(tga_palette_start);
    //   OK, done
    return tga_data;
 }
@@ -5298,14 +6007,53 @@ static int stbi__psd_test(stbi__context *s)
    return r;
 }
 
-static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
+{
+   int count, nleft, len;
+
+   count = 0;
+   while ((nleft = pixelCount - count) > 0) {
+      len = stbi__get8(s);
+      if (len == 128) {
+         // No-op.
+      } else if (len < 128) {
+         // Copy next len+1 bytes literally.
+         len++;
+         if (len > nleft) return 0; // corrupt data
+         count += len;
+         while (len) {
+            *p = stbi__get8(s);
+            p += 4;
+            len--;
+         }
+      } else if (len > 128) {
+         stbi_uc   val;
+         // Next -len+1 bytes in the dest are replicated from next source byte.
+         // (Interpret len as a negative 8-bit int.)
+         len = 257 - len;
+         if (len > nleft) return 0; // corrupt data
+         val = stbi__get8(s);
+         count += len;
+         while (len) {
+            *p = val;
+            p += 4;
+            len--;
+         }
+      }
+   }
+
+   return 1;
+}
+
+static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
 {
-   int   pixelCount;
+   int pixelCount;
    int channelCount, compression;
-   int channel, i, count, len;
+   int channel, i;
    int bitdepth;
    int w,h;
    stbi_uc *out;
+   STBI_NOTUSED(ri);
 
    // Check identifier
    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
@@ -5327,6 +6075,9 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
    h = stbi__get32be(s);
    w = stbi__get32be(s);
 
+   if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
    // Make sure the depth is 8 bits.
    bitdepth = stbi__get16be(s);
    if (bitdepth != 8 && bitdepth != 16)
@@ -5362,8 +6113,18 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
    if (compression > 1)
       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
 
+   // Check size
+   if (!stbi__mad3sizes_valid(4, w, h, 0))
+      return stbi__errpuc("too large", "Corrupt PSD");
+
    // Create the destination image.
-   out = (stbi_uc *) stbi__malloc(4 * w*h);
+
+   if (!compression && bitdepth == 16 && bpc == 16) {
+      out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
+      ri->bits_per_channel = 16;
+   } else
+      out = (stbi_uc *) stbi__malloc(4 * w*h);
+
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
    pixelCount = w*h;
 
@@ -5380,7 +6141,7 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
       //     Else if n is 128, noop.
       // Endloop
 
-      // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
+      // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
       // which we're going to just skip.
       stbi__skip(s, h * channelCount * 2 );
 
@@ -5395,82 +6156,86 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int
                *p = (channel == 3 ? 255 : 0);
          } else {
             // Read the RLE data.
-            count = 0;
-            while (count < pixelCount) {
-               len = stbi__get8(s);
-               if (len == 128) {
-                  // No-op.
-               } else if (len < 128) {
-                  // Copy next len+1 bytes literally.
-                  len++;
-                  count += len;
-                  while (len) {
-                     *p = stbi__get8(s);
-                     p += 4;
-                     len--;
-                  }
-               } else if (len > 128) {
-                  stbi_uc   val;
-                  // Next -len+1 bytes in the dest are replicated from next source byte.
-                  // (Interpret len as a negative 8-bit int.)
-                  len ^= 0x0FF;
-                  len += 2;
-                  val = stbi__get8(s);
-                  count += len;
-                  while (len) {
-                     *p = val;
-                     p += 4;
-                     len--;
-                  }
-               }
+            if (!stbi__psd_decode_rle(s, p, pixelCount)) {
+               STBI_FREE(out);
+               return stbi__errpuc("corrupt", "bad RLE data");
             }
          }
       }
 
    } else {
       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
-      // where each channel consists of an 8-bit value for each pixel in the image.
+      // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
 
       // Read the data by channel.
       for (channel = 0; channel < 4; channel++) {
-         stbi_uc *p;
-
-         p = out + channel;
          if (channel >= channelCount) {
             // Fill this channel with default data.
-            stbi_uc val = channel == 3 ? 255 : 0;
-            for (i = 0; i < pixelCount; i++, p += 4)
-               *p = val;
-         } else {
-            // Read the data.
-            if (bitdepth == 16) {
-               for (i = 0; i < pixelCount; i++, p += 4)
-                  *p = (stbi_uc) (stbi__get16be(s) >> 8);
+            if (bitdepth == 16 && bpc == 16) {
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               stbi__uint16 val = channel == 3 ? 65535 : 0;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = val;
             } else {
+               stbi_uc *p = out+channel;
+               stbi_uc val = channel == 3 ? 255 : 0;
                for (i = 0; i < pixelCount; i++, p += 4)
-                  *p = stbi__get8(s);
+                  *p = val;
+            }
+         } else {
+            if (ri->bits_per_channel == 16) {    // output bpc
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = (stbi__uint16) stbi__get16be(s);
+            } else {
+               stbi_uc *p = out+channel;
+               if (bitdepth == 16) {  // input bpc
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = (stbi_uc) (stbi__get16be(s) >> 8);
+               } else {
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = stbi__get8(s);
+               }
             }
          }
       }
    }
 
+   // remove weird white matte from PSD
    if (channelCount >= 4) {
-      for (i=0; i < w*h; ++i) {
-         unsigned char *pixel = out + 4*i;
-         if (pixel[3] != 0 && pixel[3] != 255) {
-            // remove weird white matte from PSD
-            float a = pixel[3] / 255.0f;
-            float ra = 1.0f / a;
-            float inv_a = 255.0f * (1 - ra);
-            pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
-            pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
-            pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+      if (ri->bits_per_channel == 16) {
+         for (i=0; i < w*h; ++i) {
+            stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 65535) {
+               float a = pixel[3] / 65535.0f;
+               float ra = 1.0f / a;
+               float inv_a = 65535.0f * (1 - ra);
+               pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
+               pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
+               pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
+            }
+         }
+      } else {
+         for (i=0; i < w*h; ++i) {
+            unsigned char *pixel = out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 255) {
+               float a = pixel[3] / 255.0f;
+               float ra = 1.0f / a;
+               float inv_a = 255.0f * (1 - ra);
+               pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
+               pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
+               pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+            }
          }
       }
    }
 
+   // convert to desired output format
    if (req_comp && req_comp != 4) {
-      out = stbi__convert_format(out, 4, req_comp, w, h);
+      if (ri->bits_per_channel == 16)
+         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
+      else
+         out = stbi__convert_format(out, 4, req_comp, w, h);
       if (out == NULL) return out; // stbi__convert_format frees input on failure
    }
 
@@ -5654,25 +6419,33 @@ static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *c
    return result;
 }
 
-static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp)
+static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
 {
    stbi_uc *result;
-   int i, x,y;
+   int i, x,y, internal_comp;
+   STBI_NOTUSED(ri);
+
+   if (!comp) comp = &internal_comp;
 
    for (i=0; i<92; ++i)
       stbi__get8(s);
 
    x = stbi__get16be(s);
    y = stbi__get16be(s);
+
+   if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
-   if ((1 << 28) / x < y) return stbi__errpuc("too large", "Image too large to decode");
+   if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
 
    stbi__get32be(s); //skip `ratio'
    stbi__get16be(s); //skip `fields'
    stbi__get16be(s); //skip `pad'
 
    // intermediate buffer is RGBA
-   result = (stbi_uc *) stbi__malloc(x*y*4);
+   result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
+   if (!result) return stbi__errpuc("outofmem", "Out of memory");
    memset(result, 0xff, x*y*4);
 
    if (!stbi__pic_load_core(s,x,y,comp, result)) {
@@ -5709,11 +6482,13 @@ typedef struct
 typedef struct
 {
    int w,h;
-   stbi_uc *out, *old_out;             // output buffer (always 4 components)
-   int flags, bgindex, ratio, transparent, eflags, delay;
+   stbi_uc *out;                 // output buffer (always 4 components)
+   stbi_uc *background;          // The current "background" as far as a gif is concerned
+   stbi_uc *history;
+   int flags, bgindex, ratio, transparent, eflags;
    stbi_uc  pal[256][4];
    stbi_uc lpal[256][4];
-   stbi__gif_lzw codes[4096];
+   stbi__gif_lzw codes[8192];
    stbi_uc *color_table;
    int parse, step;
    int lflags;
@@ -5721,6 +6496,7 @@ typedef struct
    int max_x, max_y;
    int cur_x, cur_y;
    int line_size;
+   int delay;
 } stbi__gif;
 
 static int stbi__gif_test_raw(stbi__context *s)
@@ -5769,6 +6545,9 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in
    g->ratio = stbi__get8(s);
    g->transparent = -1;
 
+   if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+   if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
+
    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
 
    if (is_info) return 1;
@@ -5782,6 +6561,7 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in
 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
 {
    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
+   if (!g) return stbi__err("outofmem", "Out of memory");
    if (!stbi__gif_header(s, g, comp, 1)) {
       STBI_FREE(g);
       stbi__rewind( s );
@@ -5796,6 +6576,7 @@ static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
 {
    stbi_uc *p, *c;
+   int idx;
 
    // recurse to decode the prefixes, since the linked-list is backwards,
    // and working backwards through an interleaved image would be nasty
@@ -5804,10 +6585,12 @@ static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
 
    if (g->cur_y >= g->max_y) return;
 
-   p = &g->out[g->cur_x + g->cur_y];
-   c = &g->color_table[g->codes[code].suffix * 4];
+   idx = g->cur_x + g->cur_y;
+   p = &g->out[idx];
+   g->history[idx / 4] = 1;
 
-   if (c[3] >= 128) {
+   c = &g->color_table[g->codes[code].suffix * 4];
+   if (c[3] > 128) { // don't render transparent pixels;
       p[0] = c[2];
       p[1] = c[1];
       p[2] = c[0];
@@ -5881,11 +6664,16 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
                stbi__skip(s,len);
             return g->out;
          } else if (code <= avail) {
-            if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
+            if (first) {
+               return stbi__errpuc("no clear code", "Corrupt GIF");
+            }
 
             if (oldcode >= 0) {
                p = &g->codes[avail++];
-               if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
+               if (avail > 8192) {
+                  return stbi__errpuc("too many codes", "Corrupt GIF");
+               }
+
                p->prefix = (stbi__int16) oldcode;
                p->first = g->codes[oldcode].first;
                p->suffix = (code == avail) ? p->first : g->codes[code].first;
@@ -5907,59 +6695,77 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
    }
 }
 
-static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
-{
-   int x, y;
-   stbi_uc *c = g->pal[g->bgindex];
-   for (y = y0; y < y1; y += 4 * g->w) {
-      for (x = x0; x < x1; x += 4) {
-         stbi_uc *p  = &g->out[y + x];
-         p[0] = c[2];
-         p[1] = c[1];
-         p[2] = c[0];
-         p[3] = 0;
-      }
-   }
-}
-
 // this function is designed to support animated gifs, although stb_image doesn't support it
-static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
+// two back is the image from two frames ago, used for a very specific disposal format
+static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
 {
-   int i;
-   stbi_uc *prev_out = 0;
+   int dispose;
+   int first_frame;
+   int pi;
+   int pcount;
+   STBI_NOTUSED(req_comp);
 
-   if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
-      return 0; // stbi__g_failure_reason set by stbi__gif_header
+   // on first frame, any non-written pixels get the background colour (non-transparent)
+   first_frame = 0;
+   if (g->out == 0) {
+      if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
+      if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
+         return stbi__errpuc("too large", "GIF image is too large");
+      pcount = g->w * g->h;
+      g->out = (stbi_uc *) stbi__malloc(4 * pcount);
+      g->background = (stbi_uc *) stbi__malloc(4 * pcount);
+      g->history = (stbi_uc *) stbi__malloc(pcount);
+      if (!g->out || !g->background || !g->history)
+         return stbi__errpuc("outofmem", "Out of memory");
+
+      // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
+      // background colour is only used for pixels that are not rendered first frame, after that "background"
+      // color refers to the color that was there the previous frame.
+      memset(g->out, 0x00, 4 * pcount);
+      memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
+      memset(g->history, 0x00, pcount);        // pixels that were affected previous frame
+      first_frame = 1;
+   } else {
+      // second frame - how do we dispose of the previous one?
+      dispose = (g->eflags & 0x1C) >> 2;
+      pcount = g->w * g->h;
 
-   prev_out = g->out;
-   g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
-   if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
+      if ((dispose == 3) && (two_back == 0)) {
+         dispose = 2; // if I don't have an image to revert back to, default to the old background
+      }
 
-   switch ((g->eflags & 0x1C) >> 2) {
-      case 0: // unspecified (also always used on 1st frame)
-         stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
-         break;
-      case 1: // do not dispose
-         if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
-         g->old_out = prev_out;
-         break;
-      case 2: // dispose to background
-         if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
-         stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
-         break;
-      case 3: // dispose to previous
-         if (g->old_out) {
-            for (i = g->start_y; i < g->max_y; i += 4 * g->w)
-               memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
+      if (dispose == 3) { // use previous graphic
+         for (pi = 0; pi < pcount; ++pi) {
+            if (g->history[pi]) {
+               memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
+            }
          }
-         break;
+      } else if (dispose == 2) {
+         // restore what was changed last frame to background before that frame;
+         for (pi = 0; pi < pcount; ++pi) {
+            if (g->history[pi]) {
+               memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
+            }
+         }
+      } else {
+         // This is a non-disposal case eithe way, so just
+         // leave the pixels as is, and they will become the new background
+         // 1: do not dispose
+         // 0:  not specified.
+      }
+
+      // background is what out is after the undoing of the previou frame;
+      memcpy( g->background, g->out, 4 * g->w * g->h );
    }
 
+   // clear my history;
+   memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
+
    for (;;) {
-      switch (stbi__get8(s)) {
+      int tag = stbi__get8(s);
+      switch (tag) {
          case 0x2C: /* Image Descriptor */
          {
-            int prev_trans = -1;
             stbi__int32 x, y, w, h;
             stbi_uc *o;
 
@@ -5978,6 +6784,13 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
             g->cur_x   = g->start_x;
             g->cur_y   = g->start_y;
 
+            // if the width of the specified rectangle is 0, that means
+            // we may not see *any* pixels or the image is malformed;
+            // to make sure this is caught, move the current y down to
+            // max_y (which is what out_gif_code checks).
+            if (w == 0)
+               g->cur_y = g->max_y;
+
             g->lflags = stbi__get8(s);
 
             if (g->lflags & 0x40) {
@@ -5992,19 +6805,24 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
                g->color_table = (stbi_uc *) g->lpal;
             } else if (g->flags & 0x80) {
-               if (g->transparent >= 0 && (g->eflags & 0x01)) {
-                  prev_trans = g->pal[g->transparent][3];
-                  g->pal[g->transparent][3] = 0;
-               }
                g->color_table = (stbi_uc *) g->pal;
             } else
                return stbi__errpuc("missing color table", "Corrupt GIF");
 
             o = stbi__process_gif_raster(s, g);
-            if (o == NULL) return NULL;
-
-            if (prev_trans != -1)
-               g->pal[g->transparent][3] = (stbi_uc) prev_trans;
+            if (!o) return NULL;
+
+            // if this was the first frame,
+            pcount = g->w * g->h;
+            if (first_frame && (g->bgindex > 0)) {
+               // if first frame, any pixel not drawn to gets the background color
+               for (pi = 0; pi < pcount; ++pi) {
+                  if (g->history[pi] == 0) {
+                     g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
+                     memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
+                  }
+               }
+            }
 
             return o;
          }
@@ -6012,19 +6830,35 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
          case 0x21: // Comment Extension.
          {
             int len;
-            if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
+            int ext = stbi__get8(s);
+            if (ext == 0xF9) { // Graphic Control Extension.
                len = stbi__get8(s);
                if (len == 4) {
                   g->eflags = stbi__get8(s);
-                  g->delay = stbi__get16le(s);
-                  g->transparent = stbi__get8(s);
+                  g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
+
+                  // unset old transparent
+                  if (g->transparent >= 0) {
+                     g->pal[g->transparent][3] = 255;
+                  }
+                  if (g->eflags & 0x01) {
+                     g->transparent = stbi__get8(s);
+                     if (g->transparent >= 0) {
+                        g->pal[g->transparent][3] = 0;
+                     }
+                  } else {
+                     // don't need transparent
+                     stbi__skip(s, 1);
+                     g->transparent = -1;
+                  }
                } else {
                   stbi__skip(s, len);
                   break;
                }
             }
-            while ((len = stbi__get8(s)) != 0)
+            while ((len = stbi__get8(s)) != 0) {
                stbi__skip(s, len);
+            }
             break;
          }
 
@@ -6035,27 +6869,130 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i
             return stbi__errpuc("unknown code", "Corrupt GIF");
       }
    }
+}
 
-   STBI_NOTUSED(req_comp);
+static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
+{
+   STBI_FREE(g->out);
+   STBI_FREE(g->history);
+   STBI_FREE(g->background);
+
+   if (out) STBI_FREE(out);
+   if (delays && *delays) STBI_FREE(*delays);
+   return stbi__errpuc("outofmem", "Out of memory");
+}
+
+static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+   if (stbi__gif_test(s)) {
+      int layers = 0;
+      stbi_uc *u = 0;
+      stbi_uc *out = 0;
+      stbi_uc *two_back = 0;
+      stbi__gif g;
+      int stride;
+      int out_size = 0;
+      int delays_size = 0;
+
+      STBI_NOTUSED(out_size);
+      STBI_NOTUSED(delays_size);
+
+      memset(&g, 0, sizeof(g));
+      if (delays) {
+         *delays = 0;
+      }
+
+      do {
+         u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
+         if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
+
+         if (u) {
+            *x = g.w;
+            *y = g.h;
+            ++layers;
+            stride = g.w * g.h * 4;
+
+            if (out) {
+               void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
+               if (!tmp)
+                  return stbi__load_gif_main_outofmem(&g, out, delays);
+               else {
+                   out = (stbi_uc*) tmp;
+                   out_size = layers * stride;
+               }
+
+               if (delays) {
+                  int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
+                  if (!new_delays)
+                     return stbi__load_gif_main_outofmem(&g, out, delays);
+                  *delays = new_delays;
+                  delays_size = layers * sizeof(int);
+               }
+            } else {
+               out = (stbi_uc*)stbi__malloc( layers * stride );
+               if (!out)
+                  return stbi__load_gif_main_outofmem(&g, out, delays);
+               out_size = layers * stride;
+               if (delays) {
+                  *delays = (int*) stbi__malloc( layers * sizeof(int) );
+                  if (!*delays)
+                     return stbi__load_gif_main_outofmem(&g, out, delays);
+                  delays_size = layers * sizeof(int);
+               }
+            }
+            memcpy( out + ((layers - 1) * stride), u, stride );
+            if (layers >= 2) {
+               two_back = out - 2 * stride;
+            }
+
+            if (delays) {
+               (*delays)[layers - 1U] = g.delay;
+            }
+         }
+      } while (u != 0);
+
+      // free temp buffer;
+      STBI_FREE(g.out);
+      STBI_FREE(g.history);
+      STBI_FREE(g.background);
+
+      // do the final conversion after loading everything;
+      if (req_comp && req_comp != 4)
+         out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
+
+      *z = layers;
+      return out;
+   } else {
+      return stbi__errpuc("not GIF", "Image was not as a gif type.");
+   }
 }
 
-static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi_uc *u = 0;
-   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
-   memset(g, 0, sizeof(*g));
+   stbi__gif g;
+   memset(&g, 0, sizeof(g));
+   STBI_NOTUSED(ri);
 
-   u = stbi__gif_load_next(s, g, comp, req_comp);
+   u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
    if (u) {
-      *x = g->w;
-      *y = g->h;
+      *x = g.w;
+      *y = g.h;
+
+      // moved conversion to after successful load so that the same
+      // can be done for multiple frames.
       if (req_comp && req_comp != 4)
-         u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
+         u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
+   } else if (g.out) {
+      // if there was an error and we allocated an image buffer, free it!
+      STBI_FREE(g.out);
    }
-   else if (g->out)
-      STBI_FREE(g->out);
-   STBI_FREE(g);
+
+   // free buffers needed for multiple frame loading;
+   STBI_FREE(g.history);
+   STBI_FREE(g.background);
+
    return u;
 }
 
@@ -6069,20 +7006,24 @@ static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
 // Radiance RGBE HDR loader
 // originally by Nicolas Schulz
 #ifndef STBI_NO_HDR
-static int stbi__hdr_test_core(stbi__context *s)
+static int stbi__hdr_test_core(stbi__context *s, const char *signature)
 {
-   const char *signature = "#?RADIANCE\n";
    int i;
    for (i=0; signature[i]; ++i)
       if (stbi__get8(s) != signature[i])
-         return 0;
+          return 0;
+   stbi__rewind(s);
    return 1;
 }
 
 static int stbi__hdr_test(stbi__context* s)
 {
-   int r = stbi__hdr_test_core(s);
+   int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
    stbi__rewind(s);
+   if(!r) {
+       r = stbi__hdr_test_core(s, "#?RGBE\n");
+       stbi__rewind(s);
+   }
    return r;
 }
 
@@ -6136,7 +7077,7 @@ static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
    }
 }
 
-static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    char buffer[STBI__HDR_BUFLEN];
    char *token;
@@ -6147,10 +7088,12 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
    int len;
    unsigned char count, value;
    int i, j, k, c1,c2, z;
-
+   const char *headerToken;
+   STBI_NOTUSED(ri);
 
    // Check identifier
-   if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
+   headerToken = stbi__hdr_gettoken(s,buffer);
+   if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
       return stbi__errpf("not HDR", "Corrupt HDR image");
 
    // Parse header
@@ -6173,14 +7116,22 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
    token += 3;
    width = (int) strtol(token, NULL, 10);
 
+   if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
+   if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
+
    *x = width;
    *y = height;
 
    if (comp) *comp = 3;
    if (req_comp == 0) req_comp = 3;
 
+   if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
+      return stbi__errpf("too large", "HDR image is too large");
+
    // Read data
-   hdr_data = (float *) stbi__malloc(height * width * req_comp * sizeof(float));
+   hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
+   if (!hdr_data)
+      return stbi__errpf("outofmem", "Out of memory");
 
    // Load image data
    // image data is stored as some number of sca
@@ -6219,20 +7170,29 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
          len <<= 8;
          len |= stbi__get8(s);
          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
-         if (scanline == NULL) scanline = (stbi_uc *) stbi__malloc(width * 4);
+         if (scanline == NULL) {
+            scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
+            if (!scanline) {
+               STBI_FREE(hdr_data);
+               return stbi__errpf("outofmem", "Out of memory");
+            }
+         }
 
          for (k = 0; k < 4; ++k) {
+            int nleft;
             i = 0;
-            while (i < width) {
+            while ((nleft = width - i) > 0) {
                count = stbi__get8(s);
                if (count > 128) {
                   // Run
                   value = stbi__get8(s);
                   count -= 128;
+                  if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
                   for (z = 0; z < count; ++z)
                      scanline[i++ * 4 + k] = value;
                } else {
                   // Dump
+                  if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
                   for (z = 0; z < count; ++z)
                      scanline[i++ * 4 + k] = stbi__get8(s);
                }
@@ -6241,7 +7201,8 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re
          for (i=0; i < width; ++i)
             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
       }
-      STBI_FREE(scanline);
+      if (scanline)
+         STBI_FREE(scanline);
    }
 
    return hdr_data;
@@ -6252,6 +7213,11 @@ static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
    char buffer[STBI__HDR_BUFLEN];
    char *token;
    int valid = 0;
+   int dummy;
+
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
 
    if (stbi__hdr_test(s) == 0) {
        stbi__rewind( s );
@@ -6293,14 +7259,20 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
    void *p;
    stbi__bmp_data info;
 
-   info.all_a = 255;   
+   info.all_a = 255;
    p = stbi__bmp_parse_header(s, &info);
-   stbi__rewind( s );
-   if (p == NULL)
+   if (p == NULL) {
+      stbi__rewind( s );
       return 0;
-   *x = s->img_x;
-   *y = s->img_y;
-   *comp = info.ma ? 4 : 3;
+   }
+   if (x) *x = s->img_x;
+   if (y) *y = s->img_y;
+   if (comp) {
+      if (info.bpp == 24 && info.ma == 0xff000000)
+         *comp = 3;
+      else
+         *comp = info.ma ? 4 : 3;
+   }
    return 1;
 }
 #endif
@@ -6308,7 +7280,10 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
 #ifndef STBI_NO_PSD
 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
 {
-   int channelCount;
+   int channelCount, dummy, depth;
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
    if (stbi__get32be(s) != 0x38425053) {
        stbi__rewind( s );
        return 0;
@@ -6325,7 +7300,8 @@ static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
    }
    *y = stbi__get32be(s);
    *x = stbi__get32be(s);
-   if (stbi__get16be(s) != 8) {
+   depth = stbi__get16be(s);
+   if (depth != 8 && depth != 16) {
        stbi__rewind( s );
        return 0;
    }
@@ -6336,14 +7312,45 @@ static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
    *comp = 4;
    return 1;
 }
+
+static int stbi__psd_is16(stbi__context *s)
+{
+   int channelCount, depth;
+   if (stbi__get32be(s) != 0x38425053) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 1) {
+       stbi__rewind( s );
+       return 0;
+   }
+   stbi__skip(s, 6);
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   STBI_NOTUSED(stbi__get32be(s));
+   STBI_NOTUSED(stbi__get32be(s));
+   depth = stbi__get16be(s);
+   if (depth != 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   return 1;
+}
 #endif
 
 #ifndef STBI_NO_PIC
 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
 {
-   int act_comp=0,num_packets=0,chained;
+   int act_comp=0,num_packets=0,chained,dummy;
    stbi__pic_packet packets[10];
 
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
       stbi__rewind(s);
       return 0;
@@ -6403,7 +7410,6 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
 // Known limitations:
 //    Does not support comments in the header section
 //    Does not support ASCII image data (formats P2 and P3)
-//    Does not support 16-bit-per-channel
 
 #ifndef STBI_NO_PNM
 
@@ -6419,18 +7425,28 @@ static int      stbi__pnm_test(stbi__context *s)
    return 1;
 }
 
-static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
 {
    stbi_uc *out;
-   if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
+   STBI_NOTUSED(ri);
+
+   ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
+   if (ri->bits_per_channel == 0)
       return 0;
+
+   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
+
    *x = s->img_x;
    *y = s->img_y;
-   *comp = s->img_n;
+   if (comp) *comp = s->img_n;
+
+   if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
+      return stbi__errpuc("too large", "PNM too large");
 
-   out = (stbi_uc *) stbi__malloc(s->img_n * s->img_x * s->img_y);
+   out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
    if (!out) return stbi__errpuc("outofmem", "Out of memory");
-   stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
+   stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8));
 
    if (req_comp && req_comp != s->img_n) {
       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
@@ -6477,16 +7493,20 @@ static int      stbi__pnm_getinteger(stbi__context *s, char *c)
 
 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
 {
-   int maxv;
+   int maxv, dummy;
    char c, p, t;
 
-   stbi__rewind( s );
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
+   stbi__rewind(s);
 
    // Get identifier
    p = (char) stbi__get8(s);
    t = (char) stbi__get8(s);
    if (p != 'P' || (t != '5' && t != '6')) {
-       stbi__rewind( s );
+       stbi__rewind(s);
        return 0;
    }
 
@@ -6502,11 +7522,19 @@ static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
    stbi__pnm_skip_whitespace(s, &c);
 
    maxv = stbi__pnm_getinteger(s, &c);  // read max value
-
-   if (maxv > 255)
-      return stbi__err("max value > 255", "PPM image not 8-bit");
+   if (maxv > 65535)
+      return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
+   else if (maxv > 255)
+      return 16;
    else
-      return 1;
+      return 8;
+}
+
+static int stbi__pnm_is16(stbi__context *s)
+{
+   if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
+	   return 1;
+   return 0;
 }
 #endif
 
@@ -6552,6 +7580,22 @@ static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
 }
 
+static int stbi__is_16_main(stbi__context *s)
+{
+   #ifndef STBI_NO_PNG
+   if (stbi__png_is16(s))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_is16(s))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_is16(s))  return 1;
+   #endif
+   return 0;
+}
+
 #ifndef STBI_NO_STDIO
 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
 {
@@ -6573,6 +7617,27 @@ STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
    fseek(f,pos,SEEK_SET);
    return r;
 }
+
+STBIDEF int stbi_is_16_bit(char const *filename)
+{
+    FILE *f = stbi__fopen(filename, "rb");
+    int result;
+    if (!f) return stbi__err("can't fopen", "Unable to open file");
+    result = stbi_is_16_bit_from_file(f);
+    fclose(f);
+    return result;
+}
+
+STBIDEF int stbi_is_16_bit_from_file(FILE *f)
+{
+   int r;
+   stbi__context s;
+   long pos = ftell(f);
+   stbi__start_file(&s, f);
+   r = stbi__is_16_main(&s);
+   fseek(f,pos,SEEK_SET);
+   return r;
+}
 #endif // !STBI_NO_STDIO
 
 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
@@ -6589,10 +7654,44 @@ STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int
    return stbi__info_main(&s,x,y,comp);
 }
 
+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__is_16_main(&s);
+}
+
+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
+   return stbi__is_16_main(&s);
+}
+
 #endif // STB_IMAGE_IMPLEMENTATION
 
 /*
    revision history:
+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+      2.19  (2018-02-11) fix warning
+      2.18  (2018-01-30) fix warnings
+      2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
+                         1-bit BMP
+                         *_is_16_bit api
+                         avoid warnings
+      2.16  (2017-07-23) all functions have 16-bit variants;
+                         STBI_NO_STDIO works again;
+                         compilation fixes;
+                         fix rounding in unpremultiply;
+                         optimize vertical flip;
+                         disable raw_len validation;
+                         documentation fixes
+      2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
+                         warning fixes; disable run-time SSE detection on gcc;
+                         uniform handling of optional "return" values;
+                         thread-safe initialization of zlib tables
+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+      2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
       2.11  (2016-04-02) allocate large structures on the stack
                          remove white matting for transparent PSD
@@ -6753,3 +7852,46 @@ STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int
       0.50  (2006-11-19)
               first released version
 */
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/examples/viewer/viewer.cc b/examples/viewer/viewer.cc
index 9ab1142c..2e6bd952 100644
--- a/examples/viewer/viewer.cc
+++ b/examples/viewer/viewer.cc
@@ -11,6 +11,7 @@
 #include <map>
 #include <string>
 #include <vector>
+#include <unordered_map>
 
 #include <GL/glew.h>
 
@@ -23,13 +24,24 @@
 #include <GLFW/glfw3.h>
 
 #define TINYOBJLOADER_IMPLEMENTATION
+// TINYOBJLOADER_USE_MAPBOX_EARCUT: Enable better triangulation. Requires C++11
+//#define TINYOBJLOADER_USE_MAPBOX_EARCUT
 #include "../../tiny_obj_loader.h"
 
 #include "trackball.h"
 
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Weverything"
+#endif
+
 #define STB_IMAGE_IMPLEMENTATION
 #include "stb_image.h"
 
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
 #ifdef _WIN32
 #ifdef __cplusplus
 extern "C" {
@@ -140,6 +152,10 @@ bool mouseRightPressed;
 float curr_quat[4];
 float prev_quat[4];
 float eye[3], lookat[3], up[3];
+float g_angleX = 0.0f; // in degree
+float g_angleY = 0.0f; // in degree
+bool g_show_wire = true;
+bool g_cull_face = false;
 
 GLFWwindow* window;
 
@@ -181,9 +197,9 @@ static void CalcNormal(float N[3], float v0[3], float v1[3], float v2[3]) {
   v20[1] = v2[1] - v0[1];
   v20[2] = v2[2] - v0[2];
 
-  N[0] = v20[1] * v10[2] - v20[2] * v10[1];
-  N[1] = v20[2] * v10[0] - v20[0] * v10[2];
-  N[2] = v20[0] * v10[1] - v20[1] * v10[0];
+  N[0] = v10[1] * v20[2] - v10[2] * v20[1];
+  N[1] = v10[2] * v20[0] - v10[0] * v20[2];
+  N[2] = v10[0] * v20[1] - v10[1] * v20[0];
 
   float len2 = N[0] * N[0] + N[1] * N[1] + N[2] * N[2];
   if (len2 > 0.0f) {
@@ -206,6 +222,68 @@ struct vec3 {
   }
 };
 
+struct mat3 {
+  float m[3][3];
+  mat3() {
+    m[0][0] = 1.0f;
+    m[0][1] = 0.0f;
+    m[0][2] = 0.0f;
+    m[1][0] = 0.0f;
+    m[1][1] = 1.0f;
+    m[1][2] = 0.0f;
+    m[2][0] = 0.0f;
+    m[2][1] = 0.0f;
+    m[2][2] = 1.0f;
+  }
+};
+
+struct mat4 {
+  float m[4][4];
+  mat4() {
+    m[0][0] = 1.0f;
+    m[0][1] = 0.0f;
+    m[0][2] = 0.0f;
+    m[0][3] = 0.0f;
+    m[1][0] = 0.0f;
+    m[1][1] = 1.0f;
+    m[1][2] = 0.0f;
+    m[1][3] = 0.0f;
+    m[2][0] = 0.0f;
+    m[2][1] = 0.0f;
+    m[2][2] = 1.0f;
+    m[2][3] = 0.0f;
+    m[3][0] = 0.0f;
+    m[3][1] = 0.0f;
+    m[3][2] = 0.0f;
+    m[3][3] = 1.0f;
+  }
+};
+
+
+void matmul3x3(const mat3 &a, const mat3 &b, mat3 &dst) {
+  for (size_t i = 0; i < 3; i++) {
+    for (size_t j = 0; j < 3; j++) {
+      float v = 0.0f;
+      for (size_t k = 0; k < 3; k++) {
+        v += a.m[i][k] * b.m[k][j];
+      }
+      dst.m[i][j] = v;
+    }
+  }
+}
+
+void matmul4x4(const mat4 &a, const mat4 &b, mat4 &dst) {
+  for (size_t i = 0; i < 4; i++) {
+    for (size_t j = 0; j < 4; j++) {
+      float v = 0.0f;
+      for (size_t k = 0; k < 4; k++) {
+        v += a.m[i][k] * b.m[k][j];
+      }
+      dst.m[i][j] = v;
+    }
+  }
+}
+
 void normalizeVector(vec3 &v) {
   float len2 = v.v[0] * v.v[0] + v.v[1] * v.v[1] + v.v[2] * v.v[2];
   if (len2 > 0.0f) {
@@ -217,6 +295,77 @@ void normalizeVector(vec3 &v) {
   }
 }
 
+// Maya-like turntable
+// Reference:
+// https://gamedev.stackexchange.com/questions/204367/implementing-a-maya-like-orbit-camera-in-vulkan-opengl
+//
+// angleX, angleY = angle in degree.
+// TODO: scale
+static void turntable(float angleX, float angleY, float center[3], float dst[4][4]) {
+  float pivot[3];
+  pivot[0] = center[0];
+  pivot[1] = center[1];
+  pivot[2] = center[2];
+
+  // rotate Y
+  const float kPI = 3.141592f;
+  float cosY = std::cos(kPI * angleY / 180.0f);
+  float sinY = std::sin(kPI * angleY / 180.0f);
+
+  mat3 rotY;
+  rotY.m[0][0] = cosY;
+  rotY.m[0][1] = 0.0f;
+  rotY.m[0][2] = -sinY;
+  rotY.m[1][0] = 0.0f;
+  rotY.m[1][1] = 1.0f;
+  rotY.m[1][2] = 0.0f;
+  rotY.m[2][0] = sinY;
+  rotY.m[2][1] = 0.0f;
+  rotY.m[2][2] = cosY;
+
+  float cosX = std::cos(kPI * angleX / 180.0f);
+  float sinX = std::sin(kPI * angleX / 180.0f);
+
+  mat3 rotX;
+  rotX.m[0][0] = 1.0f;
+  rotX.m[0][1] = 0.0f;
+  rotX.m[0][2] = 0.0f;
+  rotX.m[1][0] = 0.0f;
+  rotX.m[1][1] = cosX;
+  rotX.m[1][2] = sinX;
+  rotX.m[2][0] = 0.0f;
+  rotX.m[2][1] = -sinX;
+  rotX.m[2][2] = cosX;
+
+
+
+}
+
+/*
+  There are 2 approaches here to automatically generating vertex normals. The
+  old approach (computeSmoothingNormals) doesn't handle multiple smoothing
+  groups properly, as it effectively merges all smoothing groups present in the
+  OBJ file into a single group. However, it can be useful when the OBJ file
+  contains vertex normals which you want to use, but is missing some, as it
+  will attempt to fill in the missing normals without generating new shapes.
+
+  The new approach (computeSmoothingShapes, computeAllSmoothingNormals) handles
+  multiple smoothing groups but is a bit more complicated, as handling this
+  correctly requires potentially generating new vertices (and hence shapes).
+  In general, the new approach is most useful if your OBJ file is missing
+  vertex normals entirely, and instead relies on smoothing groups to correctly
+  generate them as a pre-process. That said, it can be used to reliably
+  generate vertex normals in the general case. If you want to always generate
+  normals in this way, simply force set regen_all_normals to true below. By
+  default, it's only true when there are no vertex normals present. One other
+  thing to keep in mind is that the statistics printed apply to the model
+  *prior* to shape regeneration, so you'd need to print them again if you want
+  to see the new statistics.
+
+  TODO(syoyo): import computeSmoothingShapes and computeAllSmoothingNormals to
+  tinyobjloader as utility functions.
+*/
+
 // Check if `mesh_t` contains smoothing group id.
 bool hasSmoothingGroup(const tinyobj::shape_t& shape)
 {
@@ -284,6 +433,138 @@ void computeSmoothingNormals(const tinyobj::attrib_t& attrib, const tinyobj::sha
   }
 
 }  // computeSmoothingNormals
+
+static void computeAllSmoothingNormals(tinyobj::attrib_t& attrib,
+                                       std::vector<tinyobj::shape_t>& shapes) {
+  vec3 p[3];
+  for (size_t s = 0, slen = shapes.size(); s < slen; ++s) {
+    const tinyobj::shape_t& shape(shapes[s]);
+    size_t facecount = shape.mesh.num_face_vertices.size();
+    assert(shape.mesh.smoothing_group_ids.size());
+
+    for (size_t f = 0, flen = facecount; f < flen; ++f) {
+      for (unsigned int v = 0; v < 3; ++v) {
+        tinyobj::index_t idx = shape.mesh.indices[3*f + v];
+        assert(idx.vertex_index != -1);
+        p[v].v[0] = attrib.vertices[3*idx.vertex_index  ];
+        p[v].v[1] = attrib.vertices[3*idx.vertex_index+1];
+        p[v].v[2] = attrib.vertices[3*idx.vertex_index+2];
+      }
+
+      // cross(p[1] - p[0], p[2] - p[0])
+      float nx = (p[1].v[1] - p[0].v[1]) * (p[2].v[2] - p[0].v[2]) -
+                 (p[1].v[2] - p[0].v[2]) * (p[2].v[1] - p[0].v[1]);
+      float ny = (p[1].v[2] - p[0].v[2]) * (p[2].v[0] - p[0].v[0]) -
+                 (p[1].v[0] - p[0].v[0]) * (p[2].v[2] - p[0].v[2]);
+      float nz = (p[1].v[0] - p[0].v[0]) * (p[2].v[1] - p[0].v[1]) -
+                 (p[1].v[1] - p[0].v[1]) * (p[2].v[0] - p[0].v[0]);
+
+      // Don't normalize here.
+      for (unsigned int v = 0; v < 3; ++v) {
+        tinyobj::index_t idx = shape.mesh.indices[3*f + v];
+        attrib.normals[3*idx.normal_index  ] += nx;
+        attrib.normals[3*idx.normal_index+1] += ny;
+        attrib.normals[3*idx.normal_index+2] += nz;
+      }
+    }
+  }
+
+  assert(attrib.normals.size() % 3 == 0);
+  for (size_t i = 0, nlen = attrib.normals.size() / 3; i < nlen; ++i) {
+    tinyobj::real_t& nx = attrib.normals[3*i  ];
+    tinyobj::real_t& ny = attrib.normals[3*i+1];
+    tinyobj::real_t& nz = attrib.normals[3*i+2];
+    tinyobj::real_t len = sqrtf(nx*nx + ny*ny + nz*nz);
+    tinyobj::real_t scale = len == 0 ? 0 : 1 / len;
+    nx *= scale;
+    ny *= scale;
+    nz *= scale;
+  }
+}
+
+static void computeSmoothingShape(tinyobj::attrib_t& inattrib, tinyobj::shape_t& inshape,
+                                  std::vector<std::pair<unsigned int, unsigned int>>& sortedids,
+                                  unsigned int idbegin, unsigned int idend,
+                                  std::vector<tinyobj::shape_t>& outshapes,
+                                  tinyobj::attrib_t& outattrib) {
+  unsigned int sgroupid = sortedids[idbegin].first;
+  bool hasmaterials = inshape.mesh.material_ids.size();
+  // Make a new shape from the set of faces in the range [idbegin, idend).
+  outshapes.emplace_back();
+  tinyobj::shape_t& outshape = outshapes.back();
+  outshape.name = inshape.name;
+  // Skip lines and points.
+
+  std::unordered_map<unsigned int, unsigned int> remap;
+  for (unsigned int id = idbegin; id < idend; ++id) {
+    unsigned int face = sortedids[id].second;
+
+    outshape.mesh.num_face_vertices.push_back(3); // always triangles
+    if (hasmaterials)
+      outshape.mesh.material_ids.push_back(inshape.mesh.material_ids[face]);
+    outshape.mesh.smoothing_group_ids.push_back(sgroupid);
+    // Skip tags.
+
+    for (unsigned int v = 0; v < 3; ++v) {
+      tinyobj::index_t inidx = inshape.mesh.indices[3*face + v], outidx;
+      assert(inidx.vertex_index != -1);
+      auto iter = remap.find(inidx.vertex_index);
+      // Smooth group 0 disables smoothing so no shared vertices in that case.
+      if (sgroupid && iter != remap.end()) {
+        outidx.vertex_index = (*iter).second;
+        outidx.normal_index = outidx.vertex_index;
+        outidx.texcoord_index = (inidx.texcoord_index == -1) ? -1 : outidx.vertex_index;
+      }
+      else {
+        assert(outattrib.vertices.size() % 3 == 0);
+        unsigned int offset = static_cast<unsigned int>(outattrib.vertices.size() / 3);
+        outidx.vertex_index = outidx.normal_index = offset;
+        outidx.texcoord_index = (inidx.texcoord_index == -1) ? -1 : offset;
+        outattrib.vertices.push_back(inattrib.vertices[3*inidx.vertex_index  ]);
+        outattrib.vertices.push_back(inattrib.vertices[3*inidx.vertex_index+1]);
+        outattrib.vertices.push_back(inattrib.vertices[3*inidx.vertex_index+2]);
+        outattrib.normals.push_back(0.0f);
+        outattrib.normals.push_back(0.0f);
+        outattrib.normals.push_back(0.0f);
+        if (inidx.texcoord_index != -1) {
+          outattrib.texcoords.push_back(inattrib.texcoords[2*inidx.texcoord_index  ]);
+          outattrib.texcoords.push_back(inattrib.texcoords[2*inidx.texcoord_index+1]);
+        }
+        remap[inidx.vertex_index] = offset;
+      }
+      outshape.mesh.indices.push_back(outidx);
+    }
+  }
+}
+
+static void computeSmoothingShapes(tinyobj::attrib_t &inattrib,
+                                   std::vector<tinyobj::shape_t>& inshapes,
+                                   std::vector<tinyobj::shape_t>& outshapes,
+                                   tinyobj::attrib_t& outattrib) {
+  for (size_t s = 0, slen = inshapes.size() ; s < slen; ++s) {
+    tinyobj::shape_t& inshape = inshapes[s];
+
+    unsigned int numfaces = static_cast<unsigned int>(inshape.mesh.smoothing_group_ids.size());
+    assert(numfaces);
+    std::vector<std::pair<unsigned int,unsigned int>> sortedids(numfaces);
+    for (unsigned int i = 0; i < numfaces; ++i)
+      sortedids[i] = std::make_pair(inshape.mesh.smoothing_group_ids[i], i);
+    sort(sortedids.begin(), sortedids.end());
+
+    unsigned int activeid = sortedids[0].first;
+    unsigned int id = activeid, idbegin = 0, idend = 0;
+    // Faces are now bundled by smoothing group id, create shapes from these.
+    while (idbegin < numfaces) {
+      while (activeid == id && ++idend < numfaces)
+        id = sortedids[idend].first;
+      computeSmoothingShape(inattrib, inshape, sortedids, idbegin, idend,
+                            outshapes, outattrib);
+      activeid = id;
+      idbegin = idend;
+    }
+  }
+}
+
 }  // namespace
 
 static bool LoadObjAndConvert(float bmin[3], float bmax[3],
@@ -291,8 +572,8 @@ static bool LoadObjAndConvert(float bmin[3], float bmax[3],
                               std::vector<tinyobj::material_t>& materials,
                               std::map<std::string, GLuint>& textures,
                               const char* filename) {
-  tinyobj::attrib_t attrib;
-  std::vector<tinyobj::shape_t> shapes;
+  tinyobj::attrib_t inattrib;
+  std::vector<tinyobj::shape_t> inshapes;
 
   timerutil tm;
 
@@ -310,7 +591,7 @@ static bool LoadObjAndConvert(float bmin[3], float bmax[3],
 
   std::string warn;
   std::string err;
-  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err, filename,
+  bool ret = tinyobj::LoadObj(&inattrib, &inshapes, &materials, &warn, &err, filename,
                               base_dir.c_str());
   if (!warn.empty()) {
     std::cout << "WARN: " << warn << std::endl;
@@ -328,11 +609,11 @@ static bool LoadObjAndConvert(float bmin[3], float bmax[3],
 
   printf("Parsing time: %d [ms]\n", (int)tm.msec());
 
-  printf("# of vertices  = %d\n", (int)(attrib.vertices.size()) / 3);
-  printf("# of normals   = %d\n", (int)(attrib.normals.size()) / 3);
-  printf("# of texcoords = %d\n", (int)(attrib.texcoords.size()) / 2);
+  printf("# of vertices  = %d\n", (int)(inattrib.vertices.size()) / 3);
+  printf("# of normals   = %d\n", (int)(inattrib.normals.size()) / 3);
+  printf("# of texcoords = %d\n", (int)(inattrib.texcoords.size()) / 2);
   printf("# of materials = %d\n", (int)materials.size());
-  printf("# of shapes    = %d\n", (int)shapes.size());
+  printf("# of shapes    = %d\n", (int)inshapes.size());
 
   // Append `default` material
   materials.push_back(tinyobj::material_t());
@@ -399,6 +680,17 @@ static bool LoadObjAndConvert(float bmin[3], float bmax[3],
   bmin[0] = bmin[1] = bmin[2] = std::numeric_limits<float>::max();
   bmax[0] = bmax[1] = bmax[2] = -std::numeric_limits<float>::max();
 
+  bool regen_all_normals = inattrib.normals.size() == 0;
+  tinyobj::attrib_t outattrib;
+  std::vector<tinyobj::shape_t> outshapes;
+  if (regen_all_normals) {
+    computeSmoothingShapes(inattrib, inshapes, outshapes, outattrib);
+    computeAllSmoothingNormals(outattrib, outshapes);
+  }
+
+  std::vector<tinyobj::shape_t>& shapes = regen_all_normals ? outshapes : inshapes;
+  tinyobj::attrib_t& attrib = regen_all_normals ? outattrib : inattrib;
+
   {
     for (size_t s = 0; s < shapes.size(); s++) {
       DrawObject o;
@@ -406,7 +698,7 @@ static bool LoadObjAndConvert(float bmin[3], float bmax[3],
 
       // Check for smoothing group and compute smoothing normals
       std::map<int, vec3> smoothVertexNormals;
-      if (hasSmoothingGroup(shapes[s]) > 0) {
+      if (!regen_all_normals && (hasSmoothingGroup(shapes[s]) > 0)) {
         std::cout << "Compute smoothingNormal for shape [" << s << "]" << std::endl;
         computeSmoothingNormals(attrib, shapes[s], smoothVertexNormals);
       }
@@ -653,8 +945,19 @@ static void keyboardFunc(GLFWwindow* window, int key, int scancode, int action,
       mv_z += -1;
     // camera.move(mv_x * 0.05, mv_y * 0.05, mv_z * 0.05);
     // Close window
-    if (key == GLFW_KEY_Q || key == GLFW_KEY_ESCAPE)
+    if (key == GLFW_KEY_Q || key == GLFW_KEY_ESCAPE) {
       glfwSetWindowShouldClose(window, GL_TRUE);
+    }
+
+    if (key == GLFW_KEY_W) {
+      // toggle wireframe
+      g_show_wire = !g_show_wire;
+    }
+
+    if (key == GLFW_KEY_C) {
+      // cull option
+      g_cull_face = !g_cull_face;
+    }
 
     // init_frame = true;
   }
@@ -718,7 +1021,11 @@ static void Draw(const std::vector<DrawObject>& drawObjects,
                  std::vector<tinyobj::material_t>& materials,
                  std::map<std::string, GLuint>& textures) {
   glPolygonMode(GL_FRONT, GL_FILL);
-  glPolygonMode(GL_BACK, GL_FILL);
+  if (g_cull_face) {
+    glPolygonMode(GL_BACK, GL_LINE);
+  } else {
+    glPolygonMode(GL_BACK, GL_FILL);
+  }
 
   glEnable(GL_POLYGON_OFFSET_FILL);
   glPolygonOffset(1.0, 1.0);
@@ -753,29 +1060,31 @@ static void Draw(const std::vector<DrawObject>& drawObjects,
   }
 
   // draw wireframe
-  glDisable(GL_POLYGON_OFFSET_FILL);
-  glPolygonMode(GL_FRONT, GL_LINE);
-  glPolygonMode(GL_BACK, GL_LINE);
+  if (g_show_wire) {
+    glDisable(GL_POLYGON_OFFSET_FILL);
+    glPolygonMode(GL_FRONT, GL_LINE);
+    glPolygonMode(GL_BACK, GL_LINE);
+
+    glColor3f(0.0f, 0.0f, 0.4f);
+    for (size_t i = 0; i < drawObjects.size(); i++) {
+      DrawObject o = drawObjects[i];
+      if (o.vb_id < 1) {
+        continue;
+      }
 
-  glColor3f(0.0f, 0.0f, 0.4f);
-  for (size_t i = 0; i < drawObjects.size(); i++) {
-    DrawObject o = drawObjects[i];
-    if (o.vb_id < 1) {
-      continue;
+      glBindBuffer(GL_ARRAY_BUFFER, o.vb_id);
+      glEnableClientState(GL_VERTEX_ARRAY);
+      glEnableClientState(GL_NORMAL_ARRAY);
+      glDisableClientState(GL_COLOR_ARRAY);
+      glDisableClientState(GL_TEXTURE_COORD_ARRAY);
+      glVertexPointer(3, GL_FLOAT, stride, (const void*)0);
+      glNormalPointer(GL_FLOAT, stride, (const void*)(sizeof(float) * 3));
+      glColorPointer(3, GL_FLOAT, stride, (const void*)(sizeof(float) * 6));
+      glTexCoordPointer(2, GL_FLOAT, stride, (const void*)(sizeof(float) * 9));
+
+      glDrawArrays(GL_TRIANGLES, 0, 3 * o.numTriangles);
+      CheckErrors("drawarrays");
     }
-
-    glBindBuffer(GL_ARRAY_BUFFER, o.vb_id);
-    glEnableClientState(GL_VERTEX_ARRAY);
-    glEnableClientState(GL_NORMAL_ARRAY);
-    glDisableClientState(GL_COLOR_ARRAY);
-    glDisableClientState(GL_TEXTURE_COORD_ARRAY);
-    glVertexPointer(3, GL_FLOAT, stride, (const void*)0);
-    glNormalPointer(GL_FLOAT, stride, (const void*)(sizeof(float) * 3));
-    glColorPointer(3, GL_FLOAT, stride, (const void*)(sizeof(float) * 6));
-    glTexCoordPointer(2, GL_FLOAT, stride, (const void*)(sizeof(float) * 9));
-
-    glDrawArrays(GL_TRIANGLES, 0, 3 * o.numTriangles);
-    CheckErrors("drawarrays");
   }
 }
 
@@ -815,6 +1124,11 @@ int main(int argc, char** argv) {
     return 1;
   }
 
+  std::cout << "W : Toggle wireframe\n";
+  std::cout << "C : Toggle face culling\n";
+  //std::cout << "K, J, H, L, P, N : Move camera\n";
+  std::cout << "Q, Esc : quit\n";
+
   glfwMakeContextCurrent(window);
   glfwSwapInterval(1);
 
@@ -862,15 +1176,25 @@ int main(int argc, char** argv) {
     GLfloat mat[4][4];
     gluLookAt(eye[0], eye[1], eye[2], lookat[0], lookat[1], lookat[2], up[0],
               up[1], up[2]);
+
+    float center[3];
+    center[0] = 0.5 * (bmax[0] + bmin[0]);
+    center[1] = 0.5 * (bmax[1] + bmin[1]);
+    center[2] = 0.5 * (bmax[2] + bmin[2]);
+    float rotm[4][4];
+    turntable(g_angleX, g_angleY, center, rotm);
+
     build_rotmatrix(mat, curr_quat);
     glMultMatrixf(&mat[0][0]);
 
     // Fit to -1, 1
     glScalef(1.0f / maxExtent, 1.0f / maxExtent, 1.0f / maxExtent);
 
+#if 0
     // Centerize object.
     glTranslatef(-0.5 * (bmax[0] + bmin[0]), -0.5 * (bmax[1] + bmin[1]),
                  -0.5 * (bmax[2] + bmin[2]));
+#endif
 
     Draw(gDrawObjects, materials, textures);
 
diff --git a/experimental/viewer.cc b/experimental/viewer.cc
index ba77d27a..4886b784 100644
--- a/experimental/viewer.cc
+++ b/experimental/viewer.cc
@@ -677,7 +677,7 @@ int main(int argc, char **argv)
     return -1;
   }
 
-  std::cout << "GLFW OK." << std::endl;
+  std::cout << "GLFW Init OK." << std::endl;
 
 
   window = glfwCreateWindow(width, height, "Obj viewer", NULL, NULL);
diff --git a/fuzzer/README.md b/fuzzer/README.md
new file mode 100644
index 00000000..1cd63a29
--- /dev/null
+++ b/fuzzer/README.md
@@ -0,0 +1,51 @@
+# Fuzzing test
+
+Do fuzzing test for tinyobjloader
+
+## Supported API
+
+* [x] ParseFromString
+
+## Requirements
+
+* clang with fuzzer support(`-fsanitize=fuzzer`. at least clang 8.0 should work)
+
+## Setup
+
+### Ubuntu 18.04
+
+```
+$ sudo apt install clang++-8
+$ sudo apt install libfuzzer-8-dev
+```
+
+Optionally, if you didn't set `update-alternatives` you can set `clang++` to point to `clang++8`
+
+```
+$ sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-8 10
+$ sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-8 10
+```
+
+## How to compile
+
+Fuzz target is compiled with the rest of the project when environment variable `LIB_FUZZING_ENGINE` is defined when running cmake
+With clang, you can compile with 
+```
+$ export LIB_FUZZING_ENGINE=-fsanitize=fuzzer
+$ mkdir build && cd build
+$ cmake .. -DBUILD_SHARED_LIBS=OFF
+$ make -j $(nproc)
+```
+
+## How to run
+
+Increase memory limit. e.g. `-rss_limit_mb=2000`
+cf libfuzzer.info for all options
+
+```
+$ ./fuzz_ParseFromString -rss_limit_mb=2000
+```
+
+## Regression tests
+
+See `regression_runner/`
diff --git a/fuzzer/fuzz_ParseFromString.cc b/fuzzer/fuzz_ParseFromString.cc
new file mode 100644
index 00000000..aa45f89a
--- /dev/null
+++ b/fuzzer/fuzz_ParseFromString.cc
@@ -0,0 +1,26 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <string.h>
+
+#define TINYOBJLOADER_IMPLEMENTATION // define this in only *one* .cc
+#include "tiny_obj_loader.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+    tinyobj::ObjReaderConfig reader_config;
+    tinyobj::ObjReader reader;
+    if (Size < 2) {
+        return 0;
+    }
+    for (size_t i = 0; i < Size-1; i++) {
+        if (Data[i] == 0) {
+            std::string obj_text (reinterpret_cast<const char*>(Data), i);
+            std::string mtl_text (reinterpret_cast<const char*>(Data+i+1), Size-i-1);
+            reader.ParseFromString(obj_text, mtl_text,reader_config);
+            return 0;
+        }
+    }
+    return 0;
+}
+
diff --git a/fuzzer/regression_runner/Makefile b/fuzzer/regression_runner/Makefile
new file mode 100644
index 00000000..f2c38a0d
--- /dev/null
+++ b/fuzzer/regression_runner/Makefile
@@ -0,0 +1,2 @@
+all:
+	clang++ -fsanitize=address,undefined ../../loader_example.cc
diff --git a/fuzzer/regression_runner/README.md b/fuzzer/regression_runner/README.md
new file mode 100644
index 00000000..f59b9f6a
--- /dev/null
+++ b/fuzzer/regression_runner/README.md
@@ -0,0 +1,11 @@
+# Run fuzzer regression tests
+
+Currently we only support Linux + clang.
+
+## How to run
+
+```
+$ make
+$ ./a.out ../regressions/<regression_file>
+```
+
diff --git a/fuzzer/regressions/clusterfuzz-testcase-minimized-fuzz_ParseFromString-4877060179886080 b/fuzzer/regressions/clusterfuzz-testcase-minimized-fuzz_ParseFromString-4877060179886080
new file mode 100644
index 00000000..e5094497
Binary files /dev/null and b/fuzzer/regressions/clusterfuzz-testcase-minimized-fuzz_ParseFromString-4877060179886080 differ
diff --git a/fuzzer/runner.py b/fuzzer/runner.py
index 0c06d4ba..a647d3ce 100644
--- a/fuzzer/runner.py
+++ b/fuzzer/runner.py
@@ -2,10 +2,11 @@
 import glob
 import subprocess
 
+
 def main():
     for g in glob.glob("../tests/afl/id*"):
         print(g)
-    
+
         cmd = ["../a.out", g]
 
         proc = subprocess.Popen(cmd)
diff --git a/loader_example.cc b/loader_example.cc
index 69e55976..21feb684 100644
--- a/loader_example.cc
+++ b/loader_example.cc
@@ -257,7 +257,7 @@ static void PrintInfo(const tinyobj::attrib_t& attrib,
     printf("  material.Pm     = %f\n", static_cast<const double>(materials[i].metallic));
     printf("  material.Ps     = %f\n", static_cast<const double>(materials[i].sheen));
     printf("  material.Pc     = %f\n", static_cast<const double>(materials[i].clearcoat_thickness));
-    printf("  material.Pcr    = %f\n", static_cast<const double>(materials[i].clearcoat_thickness));
+    printf("  material.Pcr    = %f\n", static_cast<const double>(materials[i].clearcoat_roughness));
     printf("  material.aniso  = %f\n", static_cast<const double>(materials[i].anisotropy));
     printf("  material.anisor = %f\n", static_cast<const double>(materials[i].anisotropy_rotation));
     printf("  material.map_Ke = %s\n", materials[i].emissive_texname.c_str());
@@ -379,12 +379,12 @@ static bool TestStreamLoadObj() {
    public:
     MaterialStringStreamReader(const std::string& matSStream)
         : m_matSStream(matSStream) {}
-    virtual ~MaterialStringStreamReader() {}
+    virtual ~MaterialStringStreamReader() TINYOBJ_OVERRIDE {}
     virtual bool operator()(const std::string& matId,
                             std::vector<material_t>* materials,
                             std::map<std::string, int>* matMap,
                             std::string* warn,
-                            std::string* err) {
+                            std::string* err) TINYOBJ_OVERRIDE {
       (void)err;
       (void)matId;
       LoadMtl(matMap, materials, &m_matSStream, warn, err);
diff --git a/mapbox/LICENSE b/mapbox/LICENSE
new file mode 100644
index 00000000..8bafb577
--- /dev/null
+++ b/mapbox/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2015, Mapbox
+
+Permission to use, copy, modify, and/or distribute this software for any purpose
+with or without fee is hereby granted, provided that the above copyright notice
+and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+THIS SOFTWARE.
diff --git a/mapbox/earcut.hpp b/mapbox/earcut.hpp
new file mode 100644
index 00000000..01bd7e96
--- /dev/null
+++ b/mapbox/earcut.hpp
@@ -0,0 +1,820 @@
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <memory>
+#include <vector>
+
+namespace mapbox {
+
+namespace util {
+
+template <std::size_t I, typename T> struct nth {
+    inline static typename std::tuple_element<I, T>::type
+    get(const T& t) { return std::get<I>(t); };
+};
+
+}
+
+namespace detail {
+
+template <typename N = uint32_t>
+class Earcut {
+public:
+    std::vector<N> indices;
+    std::size_t vertices = 0;
+
+    template <typename Polygon>
+    void operator()(const Polygon& points);
+
+private:
+    struct Node {
+        Node(N index, double x_, double y_) : i(index), x(x_), y(y_) {}
+        Node(const Node&) = delete;
+        Node& operator=(const Node&) = delete;
+        Node(Node&&) = delete;
+        Node& operator=(Node&&) = delete;
+
+        const N i;
+        const double x;
+        const double y;
+
+        // previous and next vertice nodes in a polygon ring
+        Node* prev = nullptr;
+        Node* next = nullptr;
+
+        // z-order curve value
+        int32_t z = 0;
+
+        // previous and next nodes in z-order
+        Node* prevZ = nullptr;
+        Node* nextZ = nullptr;
+
+        // indicates whether this is a steiner point
+        bool steiner = false;
+    };
+
+    template <typename Ring> Node* linkedList(const Ring& points, const bool clockwise);
+    Node* filterPoints(Node* start, Node* end = nullptr);
+    void earcutLinked(Node* ear, int pass = 0);
+    bool isEar(Node* ear);
+    bool isEarHashed(Node* ear);
+    Node* cureLocalIntersections(Node* start);
+    void splitEarcut(Node* start);
+    template <typename Polygon> Node* eliminateHoles(const Polygon& points, Node* outerNode);
+    Node* eliminateHole(Node* hole, Node* outerNode);
+    Node* findHoleBridge(Node* hole, Node* outerNode);
+    bool sectorContainsSector(const Node* m, const Node* p);
+    void indexCurve(Node* start);
+    Node* sortLinked(Node* list);
+    int32_t zOrder(const double x_, const double y_);
+    Node* getLeftmost(Node* start);
+    bool pointInTriangle(double ax, double ay, double bx, double by, double cx, double cy, double px, double py) const;
+    bool isValidDiagonal(Node* a, Node* b);
+    double area(const Node* p, const Node* q, const Node* r) const;
+    bool equals(const Node* p1, const Node* p2);
+    bool intersects(const Node* p1, const Node* q1, const Node* p2, const Node* q2);
+    bool onSegment(const Node* p, const Node* q, const Node* r);
+    int sign(double val);
+    bool intersectsPolygon(const Node* a, const Node* b);
+    bool locallyInside(const Node* a, const Node* b);
+    bool middleInside(const Node* a, const Node* b);
+    Node* splitPolygon(Node* a, Node* b);
+    template <typename Point> Node* insertNode(std::size_t i, const Point& p, Node* last);
+    void removeNode(Node* p);
+
+    bool hashing;
+    double minX, maxX;
+    double minY, maxY;
+    double inv_size = 0;
+
+    template <typename T, typename Alloc = std::allocator<T>>
+    class ObjectPool {
+    public:
+        ObjectPool() { }
+        ObjectPool(std::size_t blockSize_) {
+            reset(blockSize_);
+        }
+        ~ObjectPool() {
+            clear();
+        }
+        template <typename... Args>
+        T* construct(Args&&... args) {
+            if (currentIndex >= blockSize) {
+                currentBlock = alloc_traits::allocate(alloc, blockSize);
+                allocations.emplace_back(currentBlock);
+                currentIndex = 0;
+            }
+            T* object = &currentBlock[currentIndex++];
+            alloc_traits::construct(alloc, object, std::forward<Args>(args)...);
+            return object;
+        }
+        void reset(std::size_t newBlockSize) {
+            for (auto allocation : allocations) {
+                alloc_traits::deallocate(alloc, allocation, blockSize);
+            }
+            allocations.clear();
+            blockSize = std::max<std::size_t>(1, newBlockSize);
+            currentBlock = nullptr;
+            currentIndex = blockSize;
+        }
+        void clear() { reset(blockSize); }
+    private:
+        T* currentBlock = nullptr;
+        std::size_t currentIndex = 1;
+        std::size_t blockSize = 1;
+        std::vector<T*> allocations;
+        Alloc alloc;
+        typedef typename std::allocator_traits<Alloc> alloc_traits;
+    };
+    ObjectPool<Node> nodes;
+};
+
+template <typename N> template <typename Polygon>
+void Earcut<N>::operator()(const Polygon& points) {
+    // reset
+    indices.clear();
+    vertices = 0;
+
+    if (points.empty()) return;
+
+    double x;
+    double y;
+    int threshold = 80;
+    std::size_t len = 0;
+
+    for (size_t i = 0; threshold >= 0 && i < points.size(); i++) {
+        threshold -= static_cast<int>(points[i].size());
+        len += points[i].size();
+    }
+
+    //estimate size of nodes and indices
+    nodes.reset(len * 3 / 2);
+    indices.reserve(len + points[0].size());
+
+    Node* outerNode = linkedList(points[0], true);
+    if (!outerNode || outerNode->prev == outerNode->next) return;
+
+    if (points.size() > 1) outerNode = eliminateHoles(points, outerNode);
+
+    // if the shape is not too simple, we'll use z-order curve hash later; calculate polygon bbox
+    hashing = threshold < 0;
+    if (hashing) {
+        Node* p = outerNode->next;
+        minX = maxX = outerNode->x;
+        minY = maxY = outerNode->y;
+        do {
+            x = p->x;
+            y = p->y;
+            minX = std::min<double>(minX, x);
+            minY = std::min<double>(minY, y);
+            maxX = std::max<double>(maxX, x);
+            maxY = std::max<double>(maxY, y);
+            p = p->next;
+        } while (p != outerNode);
+
+        // minX, minY and size are later used to transform coords into integers for z-order calculation
+        inv_size = std::max<double>(maxX - minX, maxY - minY);
+        inv_size = inv_size != .0 ? (1. / inv_size) : .0;
+    }
+
+    earcutLinked(outerNode);
+
+    nodes.clear();
+}
+
+// create a circular doubly linked list from polygon points in the specified winding order
+template <typename N> template <typename Ring>
+typename Earcut<N>::Node*
+Earcut<N>::linkedList(const Ring& points, const bool clockwise) {
+    using Point = typename Ring::value_type;
+    double sum = 0;
+    const std::size_t len = points.size();
+    std::size_t i, j;
+    Node* last = nullptr;
+
+    // calculate original winding order of a polygon ring
+    for (i = 0, j = len > 0 ? len - 1 : 0; i < len; j = i++) {
+        const auto& p1 = points[i];
+        const auto& p2 = points[j];
+        const double p20 = util::nth<0, Point>::get(p2);
+        const double p10 = util::nth<0, Point>::get(p1);
+        const double p11 = util::nth<1, Point>::get(p1);
+        const double p21 = util::nth<1, Point>::get(p2);
+        sum += (p20 - p10) * (p11 + p21);
+    }
+
+    // link points into circular doubly-linked list in the specified winding order
+    if (clockwise == (sum > 0)) {
+        for (i = 0; i < len; i++) last = insertNode(vertices + i, points[i], last);
+    } else {
+        for (i = len; i-- > 0;) last = insertNode(vertices + i, points[i], last);
+    }
+
+    if (last && equals(last, last->next)) {
+        removeNode(last);
+        last = last->next;
+    }
+
+    vertices += len;
+
+    return last;
+}
+
+// eliminate colinear or duplicate points
+template <typename N>
+typename Earcut<N>::Node*
+Earcut<N>::filterPoints(Node* start, Node* end) {
+    if (!end) end = start;
+
+    Node* p = start;
+    bool again;
+    do {
+        again = false;
+
+        if (!p->steiner && (equals(p, p->next) || area(p->prev, p, p->next) == 0)) {
+            removeNode(p);
+            p = end = p->prev;
+
+            if (p == p->next) break;
+            again = true;
+
+        } else {
+            p = p->next;
+        }
+    } while (again || p != end);
+
+    return end;
+}
+
+// main ear slicing loop which triangulates a polygon (given as a linked list)
+template <typename N>
+void Earcut<N>::earcutLinked(Node* ear, int pass) {
+    if (!ear) return;
+
+    // interlink polygon nodes in z-order
+    if (!pass && hashing) indexCurve(ear);
+
+    Node* stop = ear;
+    Node* prev;
+    Node* next;
+
+    int iterations = 0;
+
+    // iterate through ears, slicing them one by one
+    while (ear->prev != ear->next) {
+        iterations++;
+        prev = ear->prev;
+        next = ear->next;
+
+        if (hashing ? isEarHashed(ear) : isEar(ear)) {
+            // cut off the triangle
+            indices.emplace_back(prev->i);
+            indices.emplace_back(ear->i);
+            indices.emplace_back(next->i);
+
+            removeNode(ear);
+
+            // skipping the next vertice leads to less sliver triangles
+            ear = next->next;
+            stop = next->next;
+
+            continue;
+        }
+
+        ear = next;
+
+        // if we looped through the whole remaining polygon and can't find any more ears
+        if (ear == stop) {
+            // try filtering points and slicing again
+            if (!pass) earcutLinked(filterPoints(ear), 1);
+
+            // if this didn't work, try curing all small self-intersections locally
+            else if (pass == 1) {
+                ear = cureLocalIntersections(filterPoints(ear));
+                earcutLinked(ear, 2);
+
+            // as a last resort, try splitting the remaining polygon into two
+            } else if (pass == 2) splitEarcut(ear);
+
+            break;
+        }
+    }
+}
+
+// check whether a polygon node forms a valid ear with adjacent nodes
+template <typename N>
+bool Earcut<N>::isEar(Node* ear) {
+    const Node* a = ear->prev;
+    const Node* b = ear;
+    const Node* c = ear->next;
+
+    if (area(a, b, c) >= 0) return false; // reflex, can't be an ear
+
+    // now make sure we don't have other points inside the potential ear
+    Node* p = ear->next->next;
+
+    while (p != ear->prev) {
+        if (pointInTriangle(a->x, a->y, b->x, b->y, c->x, c->y, p->x, p->y) &&
+            area(p->prev, p, p->next) >= 0) return false;
+        p = p->next;
+    }
+
+    return true;
+}
+
+template <typename N>
+bool Earcut<N>::isEarHashed(Node* ear) {
+    const Node* a = ear->prev;
+    const Node* b = ear;
+    const Node* c = ear->next;
+
+    if (area(a, b, c) >= 0) return false; // reflex, can't be an ear
+
+    // triangle bbox; min & max are calculated like this for speed
+    const double minTX = std::min<double>(a->x, std::min<double>(b->x, c->x));
+    const double minTY = std::min<double>(a->y, std::min<double>(b->y, c->y));
+    const double maxTX = std::max<double>(a->x, std::max<double>(b->x, c->x));
+    const double maxTY = std::max<double>(a->y, std::max<double>(b->y, c->y));
+
+    // z-order range for the current triangle bbox;
+    const int32_t minZ = zOrder(minTX, minTY);
+    const int32_t maxZ = zOrder(maxTX, maxTY);
+
+    // first look for points inside the triangle in increasing z-order
+    Node* p = ear->nextZ;
+
+    while (p && p->z <= maxZ) {
+        if (p != ear->prev && p != ear->next &&
+            pointInTriangle(a->x, a->y, b->x, b->y, c->x, c->y, p->x, p->y) &&
+            area(p->prev, p, p->next) >= 0) return false;
+        p = p->nextZ;
+    }
+
+    // then look for points in decreasing z-order
+    p = ear->prevZ;
+
+    while (p && p->z >= minZ) {
+        if (p != ear->prev && p != ear->next &&
+            pointInTriangle(a->x, a->y, b->x, b->y, c->x, c->y, p->x, p->y) &&
+            area(p->prev, p, p->next) >= 0) return false;
+        p = p->prevZ;
+    }
+
+    return true;
+}
+
+// go through all polygon nodes and cure small local self-intersections
+template <typename N>
+typename Earcut<N>::Node*
+Earcut<N>::cureLocalIntersections(Node* start) {
+    Node* p = start;
+    do {
+        Node* a = p->prev;
+        Node* b = p->next->next;
+
+        // a self-intersection where edge (v[i-1],v[i]) intersects (v[i+1],v[i+2])
+        if (!equals(a, b) && intersects(a, p, p->next, b) && locallyInside(a, b) && locallyInside(b, a)) {
+            indices.emplace_back(a->i);
+            indices.emplace_back(p->i);
+            indices.emplace_back(b->i);
+
+            // remove two nodes involved
+            removeNode(p);
+            removeNode(p->next);
+
+            p = start = b;
+        }
+        p = p->next;
+    } while (p != start);
+
+    return filterPoints(p);
+}
+
+// try splitting polygon into two and triangulate them independently
+template <typename N>
+void Earcut<N>::splitEarcut(Node* start) {
+    // look for a valid diagonal that divides the polygon into two
+    Node* a = start;
+    do {
+        Node* b = a->next->next;
+        while (b != a->prev) {
+            if (a->i != b->i && isValidDiagonal(a, b)) {
+                // split the polygon in two by the diagonal
+                Node* c = splitPolygon(a, b);
+
+                // filter colinear points around the cuts
+                a = filterPoints(a, a->next);
+                c = filterPoints(c, c->next);
+
+                // run earcut on each half
+                earcutLinked(a);
+                earcutLinked(c);
+                return;
+            }
+            b = b->next;
+        }
+        a = a->next;
+    } while (a != start);
+}
+
+// link every hole into the outer loop, producing a single-ring polygon without holes
+template <typename N> template <typename Polygon>
+typename Earcut<N>::Node*
+Earcut<N>::eliminateHoles(const Polygon& points, Node* outerNode) {
+    const size_t len = points.size();
+
+    std::vector<Node*> queue;
+    for (size_t i = 1; i < len; i++) {
+        Node* list = linkedList(points[i], false);
+        if (list) {
+            if (list == list->next) list->steiner = true;
+            queue.push_back(getLeftmost(list));
+        }
+    }
+    std::sort(queue.begin(), queue.end(), [](const Node* a, const Node* b) {
+        return a->x < b->x;
+    });
+
+    // process holes from left to right
+    for (size_t i = 0; i < queue.size(); i++) {
+        outerNode = eliminateHole(queue[i], outerNode);
+        outerNode = filterPoints(outerNode, outerNode->next);
+    }
+
+    return outerNode;
+}
+
+// find a bridge between vertices that connects hole with an outer ring and and link it
+template <typename N>
+typename Earcut<N>::Node*
+Earcut<N>::eliminateHole(Node* hole, Node* outerNode) {
+    Node* bridge = findHoleBridge(hole, outerNode);
+    if (!bridge) {
+        return outerNode;
+    }
+
+    Node* bridgeReverse = splitPolygon(bridge, hole);
+
+    // filter collinear points around the cuts
+    Node* filteredBridge = filterPoints(bridge, bridge->next);
+    filterPoints(bridgeReverse, bridgeReverse->next);
+
+    // Check if input node was removed by the filtering
+    return outerNode == bridge ? filteredBridge : outerNode;
+}
+
+// David Eberly's algorithm for finding a bridge between hole and outer polygon
+template <typename N>
+typename Earcut<N>::Node*
+Earcut<N>::findHoleBridge(Node* hole, Node* outerNode) {
+    Node* p = outerNode;
+    double hx = hole->x;
+    double hy = hole->y;
+    double qx = -std::numeric_limits<double>::infinity();
+    Node* m = nullptr;
+
+    // find a segment intersected by a ray from the hole's leftmost Vertex to the left;
+    // segment's endpoint with lesser x will be potential connection Vertex
+    do {
+        if (hy <= p->y && hy >= p->next->y && p->next->y != p->y) {
+          double x = p->x + (hy - p->y) * (p->next->x - p->x) / (p->next->y - p->y);
+          if (x <= hx && x > qx) {
+            qx = x;
+            if (x == hx) {
+                if (hy == p->y) return p;
+                if (hy == p->next->y) return p->next;
+            }
+            m = p->x < p->next->x ? p : p->next;
+          }
+        }
+        p = p->next;
+    } while (p != outerNode);
+
+    if (!m) return 0;
+
+    if (hx == qx) return m; // hole touches outer segment; pick leftmost endpoint
+
+    // look for points inside the triangle of hole Vertex, segment intersection and endpoint;
+    // if there are no points found, we have a valid connection;
+    // otherwise choose the Vertex of the minimum angle with the ray as connection Vertex
+
+    const Node* stop = m;
+    double tanMin = std::numeric_limits<double>::infinity();
+    double tanCur = 0;
+
+    p = m;
+    double mx = m->x;
+    double my = m->y;
+
+    do {
+        if (hx >= p->x && p->x >= mx && hx != p->x &&
+            pointInTriangle(hy < my ? hx : qx, hy, mx, my, hy < my ? qx : hx, hy, p->x, p->y)) {
+
+            tanCur = std::abs(hy - p->y) / (hx - p->x); // tangential
+
+            if (locallyInside(p, hole) &&
+                (tanCur < tanMin || (tanCur == tanMin && (p->x > m->x || sectorContainsSector(m, p))))) {
+                m = p;
+                tanMin = tanCur;
+            }
+        }
+
+        p = p->next;
+    } while (p != stop);
+
+    return m;
+}
+
+// whether sector in vertex m contains sector in vertex p in the same coordinates
+template <typename N>
+bool Earcut<N>::sectorContainsSector(const Node* m, const Node* p) {
+    return area(m->prev, m, p->prev) < 0 && area(p->next, m, m->next) < 0;
+}
+
+// interlink polygon nodes in z-order
+template <typename N>
+void Earcut<N>::indexCurve(Node* start) {
+    assert(start);
+    Node* p = start;
+
+    do {
+        p->z = p->z ? p->z : zOrder(p->x, p->y);
+        p->prevZ = p->prev;
+        p->nextZ = p->next;
+        p = p->next;
+    } while (p != start);
+
+    p->prevZ->nextZ = nullptr;
+    p->prevZ = nullptr;
+
+    sortLinked(p);
+}
+
+// Simon Tatham's linked list merge sort algorithm
+// http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html
+template <typename N>
+typename Earcut<N>::Node*
+Earcut<N>::sortLinked(Node* list) {
+    assert(list);
+    Node* p;
+    Node* q;
+    Node* e;
+    Node* tail;
+    int i, numMerges, pSize, qSize;
+    int inSize = 1;
+
+    for (;;) {
+        p = list;
+        list = nullptr;
+        tail = nullptr;
+        numMerges = 0;
+
+        while (p) {
+            numMerges++;
+            q = p;
+            pSize = 0;
+            for (i = 0; i < inSize; i++) {
+                pSize++;
+                q = q->nextZ;
+                if (!q) break;
+            }
+
+            qSize = inSize;
+
+            while (pSize > 0 || (qSize > 0 && q)) {
+
+                if (pSize == 0) {
+                    e = q;
+                    q = q->nextZ;
+                    qSize--;
+                } else if (qSize == 0 || !q) {
+                    e = p;
+                    p = p->nextZ;
+                    pSize--;
+                } else if (p->z <= q->z) {
+                    e = p;
+                    p = p->nextZ;
+                    pSize--;
+                } else {
+                    e = q;
+                    q = q->nextZ;
+                    qSize--;
+                }
+
+                if (tail) tail->nextZ = e;
+                else list = e;
+
+                e->prevZ = tail;
+                tail = e;
+            }
+
+            p = q;
+        }
+
+        tail->nextZ = nullptr;
+
+        if (numMerges <= 1) return list;
+
+        inSize *= 2;
+    }
+}
+
+// z-order of a Vertex given coords and size of the data bounding box
+template <typename N>
+int32_t Earcut<N>::zOrder(const double x_, const double y_) {
+    // coords are transformed into non-negative 15-bit integer range
+    int32_t x = static_cast<int32_t>(32767.0 * (x_ - minX) * inv_size);
+    int32_t y = static_cast<int32_t>(32767.0 * (y_ - minY) * inv_size);
+
+    x = (x | (x << 8)) & 0x00FF00FF;
+    x = (x | (x << 4)) & 0x0F0F0F0F;
+    x = (x | (x << 2)) & 0x33333333;
+    x = (x | (x << 1)) & 0x55555555;
+
+    y = (y | (y << 8)) & 0x00FF00FF;
+    y = (y | (y << 4)) & 0x0F0F0F0F;
+    y = (y | (y << 2)) & 0x33333333;
+    y = (y | (y << 1)) & 0x55555555;
+
+    return x | (y << 1);
+}
+
+// find the leftmost node of a polygon ring
+template <typename N>
+typename Earcut<N>::Node*
+Earcut<N>::getLeftmost(Node* start) {
+    Node* p = start;
+    Node* leftmost = start;
+    do {
+        if (p->x < leftmost->x || (p->x == leftmost->x && p->y < leftmost->y))
+            leftmost = p;
+        p = p->next;
+    } while (p != start);
+
+    return leftmost;
+}
+
+// check if a point lies within a convex triangle
+template <typename N>
+bool Earcut<N>::pointInTriangle(double ax, double ay, double bx, double by, double cx, double cy, double px, double py) const {
+    return (cx - px) * (ay - py) - (ax - px) * (cy - py) >= 0 &&
+           (ax - px) * (by - py) - (bx - px) * (ay - py) >= 0 &&
+           (bx - px) * (cy - py) - (cx - px) * (by - py) >= 0;
+}
+
+// check if a diagonal between two polygon nodes is valid (lies in polygon interior)
+template <typename N>
+bool Earcut<N>::isValidDiagonal(Node* a, Node* b) {
+    return a->next->i != b->i && a->prev->i != b->i && !intersectsPolygon(a, b) && // dones't intersect other edges
+           ((locallyInside(a, b) && locallyInside(b, a) && middleInside(a, b) && // locally visible
+            (area(a->prev, a, b->prev) != 0.0 || area(a, b->prev, b) != 0.0)) || // does not create opposite-facing sectors
+            (equals(a, b) && area(a->prev, a, a->next) > 0 && area(b->prev, b, b->next) > 0)); // special zero-length case
+}
+
+// signed area of a triangle
+template <typename N>
+double Earcut<N>::area(const Node* p, const Node* q, const Node* r) const {
+    return (q->y - p->y) * (r->x - q->x) - (q->x - p->x) * (r->y - q->y);
+}
+
+// check if two points are equal
+template <typename N>
+bool Earcut<N>::equals(const Node* p1, const Node* p2) {
+    return p1->x == p2->x && p1->y == p2->y;
+}
+
+// check if two segments intersect
+template <typename N>
+bool Earcut<N>::intersects(const Node* p1, const Node* q1, const Node* p2, const Node* q2) {
+    int o1 = sign(area(p1, q1, p2));
+    int o2 = sign(area(p1, q1, q2));
+    int o3 = sign(area(p2, q2, p1));
+    int o4 = sign(area(p2, q2, q1));
+
+    if (o1 != o2 && o3 != o4) return true; // general case
+
+    if (o1 == 0 && onSegment(p1, p2, q1)) return true; // p1, q1 and p2 are collinear and p2 lies on p1q1
+    if (o2 == 0 && onSegment(p1, q2, q1)) return true; // p1, q1 and q2 are collinear and q2 lies on p1q1
+    if (o3 == 0 && onSegment(p2, p1, q2)) return true; // p2, q2 and p1 are collinear and p1 lies on p2q2
+    if (o4 == 0 && onSegment(p2, q1, q2)) return true; // p2, q2 and q1 are collinear and q1 lies on p2q2
+
+    return false;
+}
+
+// for collinear points p, q, r, check if point q lies on segment pr
+template <typename N>
+bool Earcut<N>::onSegment(const Node* p, const Node* q, const Node* r) {
+    return q->x <= std::max<double>(p->x, r->x) &&
+        q->x >= std::min<double>(p->x, r->x) &&
+        q->y <= std::max<double>(p->y, r->y) &&
+        q->y >= std::min<double>(p->y, r->y);
+}
+
+template <typename N>
+int Earcut<N>::sign(double val) {
+    return (0.0 < val) - (val < 0.0);
+}
+
+// check if a polygon diagonal intersects any polygon segments
+template <typename N>
+bool Earcut<N>::intersectsPolygon(const Node* a, const Node* b) {
+    const Node* p = a;
+    do {
+        if (p->i != a->i && p->next->i != a->i && p->i != b->i && p->next->i != b->i &&
+                intersects(p, p->next, a, b)) return true;
+        p = p->next;
+    } while (p != a);
+
+    return false;
+}
+
+// check if a polygon diagonal is locally inside the polygon
+template <typename N>
+bool Earcut<N>::locallyInside(const Node* a, const Node* b) {
+    return area(a->prev, a, a->next) < 0 ?
+        area(a, b, a->next) >= 0 && area(a, a->prev, b) >= 0 :
+        area(a, b, a->prev) < 0 || area(a, a->next, b) < 0;
+}
+
+// check if the middle Vertex of a polygon diagonal is inside the polygon
+template <typename N>
+bool Earcut<N>::middleInside(const Node* a, const Node* b) {
+    const Node* p = a;
+    bool inside = false;
+    double px = (a->x + b->x) / 2;
+    double py = (a->y + b->y) / 2;
+    do {
+        if (((p->y > py) != (p->next->y > py)) && p->next->y != p->y &&
+                (px < (p->next->x - p->x) * (py - p->y) / (p->next->y - p->y) + p->x))
+            inside = !inside;
+        p = p->next;
+    } while (p != a);
+
+    return inside;
+}
+
+// link two polygon vertices with a bridge; if the vertices belong to the same ring, it splits
+// polygon into two; if one belongs to the outer ring and another to a hole, it merges it into a
+// single ring
+template <typename N>
+typename Earcut<N>::Node*
+Earcut<N>::splitPolygon(Node* a, Node* b) {
+    Node* a2 = nodes.construct(a->i, a->x, a->y);
+    Node* b2 = nodes.construct(b->i, b->x, b->y);
+    Node* an = a->next;
+    Node* bp = b->prev;
+
+    a->next = b;
+    b->prev = a;
+
+    a2->next = an;
+    an->prev = a2;
+
+    b2->next = a2;
+    a2->prev = b2;
+
+    bp->next = b2;
+    b2->prev = bp;
+
+    return b2;
+}
+
+// create a node and util::optionally link it with previous one (in a circular doubly linked list)
+template <typename N> template <typename Point>
+typename Earcut<N>::Node*
+Earcut<N>::insertNode(std::size_t i, const Point& pt, Node* last) {
+    Node* p = nodes.construct(static_cast<N>(i), util::nth<0, Point>::get(pt), util::nth<1, Point>::get(pt));
+
+    if (!last) {
+        p->prev = p;
+        p->next = p;
+
+    } else {
+        assert(last);
+        p->next = last->next;
+        p->prev = last;
+        last->next->prev = p;
+        last->next = p;
+    }
+    return p;
+}
+
+template <typename N>
+void Earcut<N>::removeNode(Node* p) {
+    p->next->prev = p->prev;
+    p->prev->next = p->next;
+
+    if (p->prevZ) p->prevZ->nextZ = p->nextZ;
+    if (p->nextZ) p->nextZ->prevZ = p->prevZ;
+}
+}
+
+template <typename N = uint32_t, typename Polygon>
+std::vector<N> earcut(const Polygon& poly) {
+    mapbox::detail::Earcut<N> earcut;
+    earcut(poly);
+    return std::move(earcut.indices);
+}
+}
diff --git a/models/cube-vertex-w-component.obj b/models/cube-vertex-w-component.obj
new file mode 100644
index 00000000..b909f26d
--- /dev/null
+++ b/models/cube-vertex-w-component.obj
@@ -0,0 +1,31 @@
+mtllib cube.mtl
+
+v 0.000000 2.000000 2.000000 0.1 
+v 0.000000 0.000000 2.000000 0.2 
+v 2.000000 0.000000 2.000000 0.3 
+v 2.000000 2.000000 2.000000 0.4 
+v 0.000000 2.000000 0.000000 0.5 
+v 0.000000 0.000000 0.000000 0.6 
+v 2.000000 0.000000 0.000000 0.7 
+v 2.000000 2.000000 0.000000 0.8 
+# 8 vertices
+
+g front cube
+usemtl white
+f 1 2 3 4
+g back cube
+# expects white material
+f 8 7 6 5
+g right cube
+usemtl red
+f 4 3 7 8
+g top cube
+usemtl white
+f 5 1 4 8
+g left cube
+usemtl green
+f 5 6 2 1
+g bottom cube
+usemtl white
+f 2 6 7 3
+# 6 elements
diff --git a/models/cube_w_BOM.mtl b/models/cube_w_BOM.mtl
new file mode 100644
index 00000000..96255b54
--- /dev/null
+++ b/models/cube_w_BOM.mtl
@@ -0,0 +1,24 @@
+﻿newmtl white
+Ka 0 0 0
+Kd 1 1 1
+Ks 0 0 0
+
+newmtl red
+Ka 0 0 0
+Kd 1 0 0
+Ks 0 0 0
+
+newmtl green
+Ka 0 0 0
+Kd 0 1 0
+Ks 0 0 0
+
+newmtl blue
+Ka 0 0 0
+Kd 0 0 1
+Ks 0 0 0
+
+newmtl light
+Ka 20 20 20
+Kd 1 1 1
+Ks 0 0 0
diff --git a/models/cube_w_BOM.obj b/models/cube_w_BOM.obj
new file mode 100644
index 00000000..3c395f04
--- /dev/null
+++ b/models/cube_w_BOM.obj
@@ -0,0 +1,32 @@
+﻿mtllib cube_w_BOM.mtl
+
+v 0.000000 2.000000 2.000000
+v 0.000000 0.000000 2.000000
+v 2.000000 0.000000 2.000000
+v 2.000000 2.000000 2.000000
+v 0.000000 2.000000 0.000000
+v 0.000000 0.000000 0.000000
+v 2.000000 0.000000 0.000000
+v 2.000000 2.000000 0.000000
+# 8 vertices
+
+g front cube
+usemtl white
+f 1 2 3 4
+# two white spaces between 'back' and 'cube'
+g back  cube
+# expects white material
+f 8 7 6 5
+g right cube
+usemtl red
+f 4 3 7 8
+g top cube
+usemtl white
+f 5 1 4 8
+g left cube
+usemtl green
+f 5 6 2 1
+g bottom cube
+usemtl white
+f 2 6 7 3
+# 6 elements
diff --git a/models/invalid-relative-texture-index.obj b/models/invalid-relative-texture-index.obj
new file mode 100644
index 00000000..ed3a571a
--- /dev/null
+++ b/models/invalid-relative-texture-index.obj
@@ -0,0 +1,2 @@
+vt 0 0
+f 1/-1 1/-1 1/-2
\ No newline at end of file
diff --git a/models/invalid-relative-vertex-index.obj b/models/invalid-relative-vertex-index.obj
new file mode 100644
index 00000000..bddc54a9
--- /dev/null
+++ b/models/invalid-relative-vertex-index.obj
@@ -0,0 +1 @@
+f -4 -3 -2
\ No newline at end of file
diff --git a/models/issue-295-trianguation-failure.obj b/models/issue-295-trianguation-failure.obj
new file mode 100644
index 00000000..f3b2649c
--- /dev/null
+++ b/models/issue-295-trianguation-failure.obj
@@ -0,0 +1,38 @@
+#mtllib invalid.mtl
+v 14678.0 0.0 9605.0
+v 14678.0 1.0 9605.0
+v 14678.0 0.0 9606.0
+v 14678.0 1.0 9606.0
+v 14678.0 0.0 9607.0
+v 14678.0 1.0 9607.0
+v 14678.0 0.0 9608.0
+v 14678.0 1.0 9608.0
+v 14679.0 0.0 9605.0
+v 14679.0 1.0 9605.0
+v 14679.0 0.0 9606.0
+v 14679.0 1.0 9606.0
+v 14679.0 0.0 9607.0
+v 14679.0 1.0 9607.0
+v 14679.0 0.0 9608.0
+v 14679.0 1.0 9608.0
+# UV
+vt 0.0 0.0
+vt 1.0 0.0
+vt 1.0 1.0
+vt 0.0 1.0
+#usemtl invalid
+o invalid
+f 9/4 11/1 3/2 1/3
+f 4/1 12/2 10/3 2/4
+f 2/3 10/4 9/1 1/2
+f 3/2 4/3 2/4 1/1
+f 10/3 12/4 11/1 9/2
+f 11/4 13/1 5/2 3/3
+f 6/1 14/2 12/3 4/4
+f 5/2 6/3 4/4 3/1
+f 12/3 14/4 13/1 11/2
+f 13/4 15/1 7/2 5/3
+f 8/1 16/2 14/3 6/4
+f 15/2 16/3 8/4 7/1
+f 7/2 8/3 6/4 5/1
+f 14/3 16/4 15/1 13/2
diff --git a/models/issue-319-002.obj b/models/issue-319-002.obj
new file mode 100644
index 00000000..8e056fa0
--- /dev/null
+++ b/models/issue-319-002.obj
@@ -0,0 +1,39 @@
+###
+#
+# OBJ File Generated by Meshlab
+#
+####
+# Object ZH2_001.obj
+#
+# Vertices: 19
+# Faces: 3
+#
+####
+v 8219.830078 6406.934082 9.603000
+v 8219.632812 6406.582031 9.603000
+v 8219.632812 6406.582031 9.139000
+v 8219.973633 6405.420898 9.139000
+v 8211.128906 6404.090820 9.139000
+v 8211.128906 6404.090820 9.603000
+v 8211.469727 6402.930176 9.139000
+v 8211.469727 6402.930176 9.603000
+v 8211.133789 6402.831055 9.603000
+v 8210.793945 6403.992188 9.603000
+v 8210.713867 6404.264160 9.603000
+v 8211.840820 6403.038086 9.139000
+v 8219.899414 6404.861816 9.139000
+v 8219.755859 6405.352051 9.139000
+v 8211.985352 6402.544922 9.139000
+v 8232.911133 6378.534180 55.848999
+v 8226.281250 6376.591797 55.848999
+v 8226.341797 6376.384766 55.848999
+v 8233.450195 6378.466797 55.852001
+v 8233.450195 6378.466797 55.852001
+# 19 vertices, 0 vertices normals
+
+f 2 1 11 10 9 8 6
+f 5 7 12 15 13 14 4 3
+f 18 19 20 16 17
+# 3 faces, 0 coords texture
+
+# End of File
diff --git a/models/issue-319-003.obj b/models/issue-319-003.obj
new file mode 100644
index 00000000..882a25c1
--- /dev/null
+++ b/models/issue-319-003.obj
@@ -0,0 +1,27 @@
+####
+#
+# OBJ File Generated by Meshlab
+#
+####
+# Object new 1.obj
+#
+# Vertices: 10
+# Faces: 1
+#
+####
+v 8434.808594 6083.654785 2.387000
+v 8434.808594 6083.654785 71.633003
+v 8432.309570 6092.206055 71.633003
+v 8432.309570 6092.206055 63.955002
+v 8432.309570 6092.206055 2.387000
+v 8433.083984 6089.560059 71.633003
+v 8433.161133 6089.293945 71.633003
+v 8432.309570 6092.206055 64.323997
+v 8432.309570 6092.206055 67.152000
+v 8432.309570 6092.206055 68.078003
+# 10 vertices, 0 vertices normals
+
+f 6 7 2 1 5 4 8 9 10 3
+# 1 faces, 0 coords texture
+
+# End of File
diff --git a/models/issue-330.obj b/models/issue-330.obj
new file mode 100644
index 00000000..aa46631c
--- /dev/null
+++ b/models/issue-330.obj
@@ -0,0 +1,12 @@
+v -105.342712 40.184242 -16.056709
+v -105.463989 40.202003 -16.003181
+v -105.564941 40.207558 -15.934708
+v -105.722252 40.151146 -16.112091
+v -105.610237 40.191372 -16.176643
+v -105.667282 40.189800 -15.864197
+v -105.751717 40.125790 -15.794304
+# 7 vertices, 0 vertices normals
+
+f 2 5 4 3
+f 4 6 3
+# 2 faces, 0 coords texture
diff --git a/models/issue-356-leading-spaces-newmtl.mtl b/models/issue-356-leading-spaces-newmtl.mtl
new file mode 100644
index 00000000..f5a388e9
--- /dev/null
+++ b/models/issue-356-leading-spaces-newmtl.mtl
@@ -0,0 +1,2 @@
+newmtl  aaa
+Ka 1.000000 1.000000 1.000000
diff --git a/models/issue-356-leading-spaces-newmtl.obj b/models/issue-356-leading-spaces-newmtl.obj
new file mode 100644
index 00000000..b41984a8
--- /dev/null
+++ b/models/issue-356-leading-spaces-newmtl.obj
@@ -0,0 +1,2 @@
+mtllib  issue-356-leading-spaces-newmtl.mtl
+usemtl  aaa
diff --git a/models/issue-389-comment.obj b/models/issue-389-comment.obj
new file mode 100644
index 00000000..cf16d926
--- /dev/null
+++ b/models/issue-389-comment.obj
@@ -0,0 +1,44 @@
+g Part 1
+v 0.0576127 0.0488792 0.0423 
+v 0.0576127 0.0488792 0 
+v -0.0483158 0.0488792 0 
+v -0.0483158 0.0488792 0.0423 
+v -0.0483158 -0.0139454 0 
+v -0.0483158 -0.0139454 0.0423 
+v 0.0576127 -0.0139454 0 
+v 0.0576127 -0.0139454 0.0423 
+vn 0 1 0 
+vn -1 0 0 
+vn 0 -1 0 
+vn 1 0 0 
+vn 0 0 1 
+vn 0 0 -1 
+o mesh0
+f 1//1 2//1 3//1 
+f 3//1 4//1 1//1 
+o mesh1
+f 4//2 3//2 5//2 
+f 5//2 6//2 4//2 
+o mesh2
+f 6//3 5//3 7//3 
+f 7//3 8//3 6//3 
+o mesh3
+f 8//4 7//4 2//4 
+f 2//4 1//4 8//4 
+o mesh4
+f 8//5 1//5 4//5 
+f 4//5 6//5 8//5 
+o mesh5
+f 5//6 3//6 2//6 
+f 2//6 7//6 5//6 
+
+# Zusätzliche Linien (aus der Oberseite)
+o lines
+v 0.0576127 0.0488792 0.0423 # Startpunkt Linie 1 (Ecke 1 Oberseite)
+v 0.0576127 0.0488792 0.2423 # Endpunkt Linie 1 (2m Höhe)
+v -0.0483158 -0.0139454 0.0423 # Startpunkt Linie 2 (Ecke 6 Oberseite)
+v -0.0483158 -0.0139454 0.2423 # Endpunkt Linie 2 (2m Höhe)
+
+# Linien
+l 1 9 # Linie 1
+l 6 10 # Linie 2
diff --git a/models/issue-391.mtl b/models/issue-391.mtl
new file mode 100644
index 00000000..c23ced4b
--- /dev/null
+++ b/models/issue-391.mtl
@@ -0,0 +1,4 @@
+newmtl has_kd
+Kd 1 0 0
+newmtl has_map
+map_Kd test.png
\ No newline at end of file
diff --git a/models/issue-391.obj b/models/issue-391.obj
new file mode 100644
index 00000000..06d8774b
--- /dev/null
+++ b/models/issue-391.obj
@@ -0,0 +1,9 @@
+mtllib issue-391.mtl
+v 0 0 0
+v 1 0 0
+v 0 1 0
+vn 0 0 1
+usemtl has_map
+f 1//1 2//1 3//1
+usemtl has_kd
+f 1//1 2//1 3//1
\ No newline at end of file
diff --git a/models/issue-400-num-face-vertices.obj b/models/issue-400-num-face-vertices.obj
new file mode 100644
index 00000000..77e25b48
--- /dev/null
+++ b/models/issue-400-num-face-vertices.obj
@@ -0,0 +1,15 @@
+# Regression test model for issue #400 - numpy_num_face_vertices()
+# Mixed quad and triangle faces to verify correct uint type handling.
+# With the bug (unsigned char instead of unsigned int in the numpy binding),
+# numpy_num_face_vertices() returned all zeros for quad (4-vertex) faces.
+v 0.0 0.0 0.0
+v 1.0 0.0 0.0
+v 1.0 1.0 0.0
+v 0.0 1.0 0.0
+v 0.5 0.5 1.0
+# quad face (num_face_vertices = 4)
+f 1 2 3 4
+# triangle face (num_face_vertices = 3)
+f 1 2 5
+# triangle face (num_face_vertices = 3)
+f 2 3 5
diff --git a/models/mtl filename with whitespace issue46.mtl b/models/mtl filename with whitespace issue46.mtl
new file mode 100644
index 00000000..b79d99b0
--- /dev/null
+++ b/models/mtl filename with whitespace issue46.mtl	
@@ -0,0 +1,4 @@
+newmtl green
+Ka 0 0 0
+Kd 0 1 0
+Ks 0 0 0
diff --git a/models/mtl filename with whitespace issue46.obj b/models/mtl filename with whitespace issue46.obj
new file mode 100644
index 00000000..72d1dc9e
--- /dev/null
+++ b/models/mtl filename with whitespace issue46.obj	
@@ -0,0 +1,31 @@
+mtllib invalid-file-without-spaces.mtl invalid\ file\ with\ spaces.mtl  mtl\ filename\ with\ whitespace\ issue46.mtl
+
+v 0.000000 2.000000 2.000000
+v 0.000000 0.000000 2.000000
+v 2.000000 0.000000 2.000000
+v 2.000000 2.000000 2.000000
+v 0.000000 2.000000 0.000000
+v 0.000000 0.000000 0.000000
+v 2.000000 0.000000 0.000000
+v 2.000000 2.000000 0.000000
+# 8 vertices
+
+g front cube
+usemtl green
+f 1 2 3 4
+g back cube
+usemtl green
+f 8 7 6 5
+g right cube
+usemtl green
+f 4 3 7 8
+g left cube
+usemtl green
+f 5 6 2 1
+g top cube
+usemtl green
+f 5 1 4 8
+g bottom cube
+usemtl green
+f 2 6 7 3
+# 6 elements
diff --git a/models/numeric-edge-cases.obj b/models/numeric-edge-cases.obj
new file mode 100644
index 00000000..71d8c156
--- /dev/null
+++ b/models/numeric-edge-cases.obj
@@ -0,0 +1,64 @@
+# Numeric edge cases for fast_float migration testing
+# Each vertex exercises a different parsing path.
+
+# v0: basic integers and zero
+v 0 0 0
+
+# v1: simple decimals
+v 1.5 -2.25 3.125
+
+# v2: leading decimal dot (no integer part)
+v .5 -.75 .001
+
+# v3: trailing dot (no fractional part)
+v 1. -2. 100.
+
+# v4: scientific notation (lowercase e)
+v 1.5e2 -3.0e-4 7e10
+
+# v5: scientific notation (uppercase E)
+v 2.5E3 -1.0E-2 4E+5
+
+# v6: leading + sign
+v +1.0 +0.5 +100
+
+# v7: leading zeros
+v 007.5 -003.14 000.001
+
+# v8: very small subnormal-range value
+v 1e-300 -1e-300 5e-310
+
+# v9: very large value near overflow
+v 1.7976931348623157e+308 -1e+308 1e+307
+
+# v10: negative zero
+v -0 -0.0 -0.0e0
+
+# v11: exponent with leading zeros
+v 1.5e002 -3.0e+007 7e-003
+
+# v12: single digit values
+v 0 1 9
+
+# v13: mixed sign exponents
+v 1e+0 1e-0 -1e+0
+
+# v14: max precision decimal (many digits)
+v 3.141592653589793 2.718281828459045 1.4142135623730951
+
+# v15: one as exponent boundary
+v 1e1 1e-1 -1e1
+
+# Normals to test normal parsing path too
+vn 0.0 1.0 0.0
+vn -0.707107 0.0 0.707107
+vn 1e-5 -1e-5 0.99999
+
+# Texture coords with edge values
+vt 0.0 0.0
+vt 1.0 1.0
+vt 0.5 .5
+vt +0.25 +0.75
+
+f 1//1 2//1 3//1
+f 4//2 5//2 6//2
diff --git a/models/skin-weight.obj b/models/skin-weight.obj
new file mode 100644
index 00000000..41f182f5
--- /dev/null
+++ b/models/skin-weight.obj
@@ -0,0 +1,43 @@
+mtllib cube.mtl
+
+v 0.000000 2.000000 2.000000
+v 0.000000 0.000000 2.000000
+v 2.000000 0.000000 2.000000
+v 2.000000 2.000000 2.000000
+v 0.000000 2.000000 0.000000
+v 0.000000 0.000000 0.000000
+v 2.000000 0.000000 0.000000
+v 2.000000 2.000000 0.000000
+# 8 vertices
+
+vw 0 0 1.0
+vw 1 0 0.5 1 0.5
+vw 2 1 1.0
+vw 3 2 1.0
+vw 4 3 1.0
+vw 5 0 0.25 1 0.25 2 0.25 3 0.25
+# No weight for 6th vertex
+# vw 6 0 1.0
+vw 7 0 1.0
+# max 4 joints
+
+g front cube
+usemtl white
+f 1 2 3 4
+# two white spaces between 'back' and 'cube'
+g back  cube
+# expects white material
+f 8 7 6 5
+g right cube
+usemtl red
+f 4 3 7 8
+g top cube
+usemtl white
+f 5 1 4 8
+g left cube
+usemtl green
+f 5 6 2 1
+g bottom cube
+usemtl white
+f 2 6 7 3
+# 6 elements
diff --git a/models/texcoord-w-mixed.obj b/models/texcoord-w-mixed.obj
new file mode 100644
index 00000000..16421ae5
--- /dev/null
+++ b/models/texcoord-w-mixed.obj
@@ -0,0 +1,15 @@
+# OBJ file with mixed 3-component and 2-component texture coordinates
+# Tests that texcoord_ws correctly stores 0.0 for omitted w values
+v 0 0 0
+v 1 0 0
+v 1 1 0
+v 0 1 0
+
+# vt lines alternating: w present, w omitted, w present, w omitted
+vt 0.0 0.0 0.5
+vt 1.0 0.0
+vt 1.0 1.0 0.75
+vt 0.0 1.0
+
+f 1/1 2/2 3/3
+f 1/1 3/3 4/4
diff --git a/models/texcoord-w.obj b/models/texcoord-w.obj
new file mode 100644
index 00000000..019ea7ae
--- /dev/null
+++ b/models/texcoord-w.obj
@@ -0,0 +1,14 @@
+# OBJ file with 3-component texture coordinates to test texcoord_ws parsing
+v 0 0 0
+v 1 0 0
+v 1 1 0
+v 0 1 0
+
+# texture coords with optional w component
+vt 0.0 0.0 0.5
+vt 1.0 0.0 0.25
+vt 1.0 1.0 0.75
+vt 0.0 1.0 0.0
+
+f 1/1 2/2 3/3
+f 1/1 3/3 4/4
diff --git a/models/utf8-path-test.mtl b/models/utf8-path-test.mtl
new file mode 100644
index 00000000..b89434a9
--- /dev/null
+++ b/models/utf8-path-test.mtl
@@ -0,0 +1,4 @@
+newmtl Material
+Ka 0 0 0
+Kd 0.8 0.8 0.8
+Ks 0 0 0
diff --git a/models/utf8-path-test.obj b/models/utf8-path-test.obj
new file mode 100644
index 00000000..a70ea094
--- /dev/null
+++ b/models/utf8-path-test.obj
@@ -0,0 +1,6 @@
+mtllib utf8-path-test.mtl
+v 0.0 0.0 0.0
+v 1.0 0.0 0.0
+v 0.0 1.0 0.0
+usemtl Material
+f 1 2 3
diff --git a/pbr-mtl.md b/pbr-mtl.md
new file mode 100644
index 00000000..b5856216
--- /dev/null
+++ b/pbr-mtl.md
@@ -0,0 +1,29 @@
+## PBR material extension.
+
+The spec can be found in either
+
+https://benhouston3d.com/blog/extended-wavefront-obj-mtl-for-pbr/
+
+or Internet Archive: https://web.archive.org/web/20230210121526/http://exocortex.com/blog/extending_wavefront_mtl_to_support_pbr
+
+* Kd/map_Kd (base/diffuse) // reuse
+* Ks/map_Ks (specular) // reuse
+* d or Tr (opacity) // reuse
+* map_d/map_Tr (opacitymap) // reuse
+* Tf (translucency) // reuse
+* bump/-bm (bump map) // reuse
+* disp (displacement map) // reuse
+
+PBR material parameters as defined by the Disney PBR.
+
+* Pr/map_Pr (roughness) // new
+* Pm/map_Pm (metallic) // new
+* Ps/map_Ps (sheen) // new
+* Pc (clearcoat thickness) // new
+* Pcr (clearcoat roughness) // new
+* Ke/map_Ke (emissive) // new
+* aniso (anisotropy) // new
+* anisor (anisotropy rotation) // new
+* norm (normal map) // new
+
+EoL.
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..bcf10b1c
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,55 @@
+[build-system]
+
+requires = [
+    # NOTE: setuptools_scm>=8 is not supported in py3.6 cibuildwheel env.
+    # so use older setuptools_scm for a while
+    #"setuptools>=64",
+    #"setuptools_scm>=8",
+    "setuptools>=45",
+    "setuptools_scm[toml]<8",
+    "wheel",
+    "pybind11>=2.10.0",
+]
+build-backend = "setuptools.build_meta"
+
+[tool.black]
+line-length = 140
+force-exclude = '''
+(
+    /deps/.*$
+    | /kuroga.py$
+    | /config-msvc.py$
+    | /config-posix.py$
+    | ^python/build/.*$
+    | ^python/dist/.*$
+    | ^python/tinyobjloader.egg-info/.*$
+)
+'''
+
+[project]
+name = "tinyobjloader"
+license = { text = "MIT AND ISC" }
+
+# version: Use setuptools_scm
+dynamic = ["version", "classifiers", "authors", "description"]
+
+
+readme = {file = "README.md", content-type = "text/markdown"}
+
+# Project URLs in pyproject.toml is not mature.
+# so write it to setup.py
+# https://github.com/pypa/packaging-problems/issues/606
+#
+# [project.urils]
+
+
+[tool.setuptools_scm]
+# setuptools_scm>=8
+#version_file = "python/_version.py"
+
+# setuptools_scm<8
+write_to = "python/_version.py"
+
+[tool.cibuildwheel]
+# Disable aarch64 build since it's too slow to build(docker + qemu).
+skip = ["cp38-*", "cp314t-*", "*_aarch64*"]
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
deleted file mode 100644
index 90ef93d6..00000000
--- a/python/MANIFEST.in
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copy the header file into the python/ folder.
-include ../tiny_obj_loader.h
-# Include it in the source distribution.
-include tiny_obj_loader.h
-
-include pyproject.toml
diff --git a/python/Makefile b/python/Makefile
index 06352eda..ede9c2d2 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -1,5 +1,5 @@
 all:
-	python setup.py build
+	cd .. && python -m pip install .
 
 t:
 	python sample.py
diff --git a/python/README.md b/python/README.md
index f7b3d232..8f9aa5d2 100644
--- a/python/README.md
+++ b/python/README.md
@@ -3,6 +3,18 @@
 `tinyobjloader` is a python wrapper for C++ wavefront .obj loader.
 `tinyobjloader` is rather fast and feature rich than other pure python version of .obj loader.
 
+## Requirements
+
+* python 3.6+
+
+## Install
+
+You can install `tinyobjloader` with pip.
+
+```
+$ pip install tinyobjloader
+```
+
 ## Quick tutorial
 
 ```py
@@ -54,24 +66,22 @@ https://github.com/syoyo/tinyobjloader/blob/master/python/sample.py
 
 ## How to build
 
-Using `cibuildwheel` is an recommended way to build a python module.
+Using `cibuildwheel` is a recommended way to build a python module.
 See $tinyobjloader/azure-pipelines.yml for details.
 
 ### Developer build
 
-Edit `setup.py` and uncomment `Developer option` lines
-
 Assume pip is installed.
 
 ```
-$ pip install pybind11
-$ python setup.py build
+$ git clone https://github.com/tinyobjloader/tinyobjloader
+$ cd tinyobjloader
+$ python -m pip install .
 ```
 
 ## License
 
-MIT license.
+MIT(tinyobjloader) and ISC(mapbox earcut) license.
 
 ## TODO
  * [ ] Writer saver
-
diff --git a/python/bindings.cc b/python/bindings.cc
index 58aaab82..a303e01f 100644
--- a/python/bindings.cc
+++ b/python/bindings.cc
@@ -8,7 +8,7 @@
 
 // define some helper functions for pybind11
 #define TINY_OBJ_LOADER_PYTHON_BINDING
-#include "tiny_obj_loader.h"
+#include "../tiny_obj_loader.h"
 
 namespace py = pybind11;
 
@@ -38,15 +38,41 @@ PYBIND11_MODULE(tinyobjloader, tobj_module)
   py::class_<attrib_t>(tobj_module, "attrib_t")
     .def(py::init<>())
     .def_readonly("vertices", &attrib_t::vertices)
+    .def_readonly("vertex_weights", &attrib_t::vertex_weights)
+    .def_readonly("skin_weights", &attrib_t::skin_weights)
+    .def_readonly("normals", &attrib_t::normals)
+    .def_readonly("texcoords", &attrib_t::texcoords)
+    .def_readonly("colors", &attrib_t::colors)
     .def("numpy_vertices", [] (attrib_t &instance) {
         auto ret = py::array_t<real_t>(instance.vertices.size());
         py::buffer_info buf = ret.request();
         memcpy(buf.ptr, instance.vertices.data(), instance.vertices.size() * sizeof(real_t));
         return ret;
     })
-    .def_readonly("normals", &attrib_t::normals)
-    .def_readonly("texcoords", &attrib_t::texcoords)
-    .def_readonly("colors", &attrib_t::colors)
+    .def("numpy_vertex_weights", [] (attrib_t &instance) {
+        auto ret = py::array_t<real_t>(instance.vertex_weights.size());
+        py::buffer_info buf = ret.request();
+        memcpy(buf.ptr, instance.vertex_weights.data(), instance.vertex_weights.size() * sizeof(real_t));
+        return ret;
+    })
+    .def("numpy_normals", [] (attrib_t &instance) {
+        auto ret = py::array_t<real_t>(instance.normals.size());
+        py::buffer_info buf = ret.request();
+        memcpy(buf.ptr, instance.normals.data(), instance.normals.size() * sizeof(real_t));
+        return ret;
+    })
+    .def("numpy_texcoords", [] (attrib_t &instance) {
+        auto ret = py::array_t<real_t>(instance.texcoords.size());
+        py::buffer_info buf = ret.request();
+        memcpy(buf.ptr, instance.texcoords.data(), instance.texcoords.size() * sizeof(real_t));
+        return ret;
+    })
+    .def("numpy_colors", [] (attrib_t &instance) {
+        auto ret = py::array_t<real_t>(instance.colors.size());
+        py::buffer_info buf = ret.request();
+        memcpy(buf.ptr, instance.colors.data(), instance.colors.size() * sizeof(real_t));
+        return ret;
+    })
     ;
 
   py::class_<shape_t>(tobj_module, "shape_t")
@@ -119,20 +145,56 @@ PYBIND11_MODULE(tinyobjloader, tobj_module)
     .def("GetCustomParameter", &material_t::GetCustomParameter)
     ;
 
-  py::class_<mesh_t>(tobj_module, "mesh_t")
+  py::class_<mesh_t>(tobj_module, "mesh_t", py::buffer_protocol())
     .def(py::init<>())
     .def_readonly("num_face_vertices", &mesh_t::num_face_vertices)
     .def("numpy_num_face_vertices", [] (mesh_t &instance) {
-        auto ret = py::array_t<unsigned char>(instance.num_face_vertices.size());
+        using T = typename std::remove_reference<decltype(instance.num_face_vertices)>::type::value_type;
+        auto ret = py::array_t<T>(instance.num_face_vertices.size());
         py::buffer_info buf = ret.request();
-        memcpy(buf.ptr, instance.num_face_vertices.data(), instance.num_face_vertices.size() * sizeof(unsigned char));
+        memcpy(buf.ptr, instance.num_face_vertices.data(), instance.num_face_vertices.size() * sizeof(T));
         return ret;
     })
+    .def("vertex_indices", [](mesh_t &self) {
+      // NOTE: we cannot use py::buffer_info and py:buffer as a return type.
+      // py::memoriview is not suited for vertex indices usecase, since indices data may be used after
+      // deleting C++ mesh_t object in Python world.
+      //
+      // So create a dedicated Python object(std::vector<int>) 
+      
+      std::vector<int> indices;
+      indices.resize(self.indices.size());
+      for (size_t i = 0; i < self.indices.size(); i++) {
+        indices[i] = self.indices[i].vertex_index;
+      }
+
+      return indices;
+    })
+    .def("normal_indices", [](mesh_t &self) {
+      
+      std::vector<int> indices;
+      indices.resize(self.indices.size());
+      for (size_t i = 0; i < self.indices.size(); i++) {
+        indices[i] = self.indices[i].normal_index;
+      }
+
+      return indices;
+    })
+    .def("texcoord_indices", [](mesh_t &self) {
+      
+      std::vector<int> indices;
+      indices.resize(self.indices.size());
+      for (size_t i = 0; i < self.indices.size(); i++) {
+        indices[i] = self.indices[i].texcoord_index;
+      }
+
+      return indices;
+    })
     .def_readonly("indices", &mesh_t::indices)
     .def("numpy_indices", [] (mesh_t &instance) {
         // Flatten indexes. index_t is composed of 3 ints(vertex_index, normal_index, texcoord_index).
         // numpy_indices = [0, -1, -1, 1, -1, -1, ...]
-        // C++11 or later should pack POD struct tightly and does not reorder variables, 
+        // C++11 or later should pack POD struct tightly and does not reorder variables,
         // so we can memcpy to copy data.
         // Still, we check the size of struct and byte offsets of each variable just for sure.
         static_assert(sizeof(index_t) == 12, "sizeof(index_t) must be 12");
@@ -144,13 +206,44 @@ PYBIND11_MODULE(tinyobjloader, tobj_module)
         py::buffer_info buf = ret.request();
         memcpy(buf.ptr, instance.indices.data(), instance.indices.size() * 3 * sizeof(int));
         return ret;
+    })
+    .def_readonly("material_ids", &mesh_t::material_ids)
+    .def("numpy_material_ids", [] (mesh_t &instance) {
+        auto ret = py::array_t<int>(instance.material_ids.size());
+        py::buffer_info buf = ret.request();
+        memcpy(buf.ptr, instance.material_ids.data(), instance.material_ids.size() * sizeof(int));
+        return ret;
     });
 
   py::class_<lines_t>(tobj_module, "lines_t")
-    .def(py::init<>());
+    .def(py::init<>())
+    .def_readonly("indices", &lines_t::indices)
+    .def_readonly("num_line_vertices", &lines_t::num_line_vertices)
+    ;
 
   py::class_<points_t>(tobj_module, "points_t")
-    .def(py::init<>());
+    .def(py::init<>())
+    .def_readonly("indices", &points_t::indices)
+    ;
 
+  py::class_<joint_and_weight_t>(tobj_module, "joint_and_weight_t")
+    .def(py::init<>())
+    .def_readonly("joint_id", &joint_and_weight_t::joint_id, "Joint index(NOTE: Joint info is provided externally, not from .obj")
+    .def_readonly("weight", &joint_and_weight_t::weight, "Weight value(NOTE: weight is not normalized)")
+    ;
+
+  py::class_<skin_weight_t>(tobj_module, "skin_weight_t")
+    .def(py::init<>())
+    .def_readonly("vertex_id", &skin_weight_t::vertex_id)
+    .def_readonly("weightValues", &skin_weight_t::weightValues)
+    ;
+
+  py::class_<tag_t>(tobj_module, "tag_t")
+    .def(py::init<>())
+    .def_readonly("name", &tag_t::name)
+    .def_readonly("intValues", &tag_t::intValues)  
+    .def_readonly("floatValues", &tag_t::floatValues)  
+    .def_readonly("stringValues", &tag_t::stringValues)  
+    ;
 }
 
diff --git a/python/pyproject.toml b/python/pyproject.toml
deleted file mode 100644
index 51e27c42..00000000
--- a/python/pyproject.toml
+++ /dev/null
@@ -1,2 +0,0 @@
-[build-system]
-requires = ["setuptools", "wheel", "pybind11>=2.3"]
diff --git a/python/sample.py b/python/sample.py
index 5a74972d..45c97612 100644
--- a/python/sample.py
+++ b/python/sample.py
@@ -1,8 +1,21 @@
 import sys
 import tinyobjloader
 
+is_numpy_available = False
+try:
+    import numpy
+
+    is_numpy_available = True
+except:
+    print(
+        "NumPy not installed. Do not use numpy_*** API. If you encounter slow performance, see a performance tips for non-numpy API https://github.com/tinyobjloader/tinyobjloader/issues/275"
+    )
+
 filename = "../models/cornell_box.obj"
 
+if len(sys.argv) > 1:
+    filename = sys.argv[1]
+
 
 reader = tinyobjloader.ObjReader()
 
@@ -24,9 +37,12 @@
     print("Warn:", reader.Warning())
 
 attrib = reader.GetAttrib()
-print("attrib.vertices = ", len(attrib.vertices))
-print("attrib.normals = ", len(attrib.normals))
-print("attrib.texcoords = ", len(attrib.texcoords))
+print("len(attrib.vertices) = ", len(attrib.vertices))
+print("len(attrib.vertex_weights) = ", len(attrib.vertex_weights))
+print("len(attrib.normals) = ", len(attrib.normals))
+print("len(attrib.texcoords) = ", len(attrib.texcoords))
+print("len(attrib.colors) = ", len(attrib.colors))
+print("len(attrib.skin_weights) = ", len(attrib.skin_weights))
 
 # vertex data must be `xyzxyzxyz...`
 assert len(attrib.vertices) % 3 == 0
@@ -37,16 +53,53 @@
 # texcoords data must be `uvuvuv...`
 assert len(attrib.texcoords) % 2 == 0
 
-for (i, v) in enumerate(attrib.vertices):
+# colors data must be `rgbrgbrgb...`
+assert len(attrib.texcoords) % 3 == 0
+
+# Performance note
+# (direct?) array access through member variable is quite slow.
+# https://github.com/tinyobjloader/tinyobjloader/issues/275#issuecomment-753465833
+#
+# We encourage first copy(?) varible to Python world:
+#
+# vertices = attrib.vertices
+#
+# for i in range(...)
+#   v = vertices[i]
+#
+# Or please consider using numpy_*** interface(e.g. numpy_vertices())
+
+for i, v in enumerate(attrib.vertices):
     print("v[{}] = {}".format(i, v))
 
-for (i, v) in enumerate(attrib.normals):
+# vw is filled with 1.0 if [w] component is not present in `v` line in .obj
+for i, w in enumerate(attrib.vertex_weights):
+    print("vweight[{}] = {}".format(i, w))
+
+for i, v in enumerate(attrib.normals):
     print("vn[{}] = {}".format(i, v))
 
-for (i, v) in enumerate(attrib.texcoords):
-    print("vt[{}] = {}".format(i, t))
+for i, v in enumerate(attrib.texcoords):
+    print("vt[{}] = {}".format(i, v))
+
+for i, v in enumerate(attrib.colors):
+    print("vcol[{}] = {}".format(i, v))
 
-print("numpy_vertices = {}".format(attrib.numpy_vertices()))
+if len(attrib.skin_weights):
+    print("num skin weights", len(attrib.skin_weights))
+
+    for i, skin in enumerate(attrib.skin_weights):
+        print("skin_weight[{}]".format(i))
+        print("  vertex_id = ", skin.vertex_id)
+        print("  len(weights) = ", len(skin.weightValues))
+        for k, w in enumerate(skin.weightValues):
+            print("    [{}] joint_id: {}, weight: {}".format(k, w.joint_id, w.weight))
+
+if is_numpy_available:
+    print("numpy_v = {}".format(attrib.numpy_vertices()))
+    print("numpy_vn = {}".format(attrib.numpy_normals()))
+    print("numpy_vt = {}".format(attrib.numpy_texcoords()))
+    print("numpy_vcol = {}".format(attrib.numpy_colors()))
 
 materials = reader.GetMaterials()
 print("Num materials: ", len(materials))
@@ -69,9 +122,17 @@
 for shape in shapes:
     print(shape.name)
     print("len(num_indices) = {}".format(len(shape.mesh.indices)))
-    for (i, idx) in enumerate(shape.mesh.indices):
+    for i, idx in enumerate(shape.mesh.indices):
         print("[{}] v_idx {}".format(i, idx.vertex_index))
         print("[{}] vn_idx {}".format(i, idx.normal_index))
         print("[{}] vt_idx {}".format(i, idx.texcoord_index))
-    print("numpy_indices = {}".format(shape.mesh.numpy_indices()))
-    print("numpy_num_face_vertices = {}".format(shape.mesh.numpy_num_face_vertices()))
+    print("material_ids = {}".format(shape.mesh.material_ids))
+
+    # faster access to indices
+    a = shape.mesh.vertex_indices()
+    print("vertex_indices", shape.mesh.vertex_indices())
+
+    if is_numpy_available:
+        print("numpy_indices = {}".format(shape.mesh.numpy_indices()))
+        print("numpy_num_face_vertices = {}".format(shape.mesh.numpy_num_face_vertices()))
+        print("numpy_material_ids = {}".format(shape.mesh.numpy_material_ids()))
diff --git a/python/setup.py b/python/setup.py
deleted file mode 100644
index bf1b8410..00000000
--- a/python/setup.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import setuptools
-import platform
-
-with open("README.md", "r") as fh:
-    long_description = fh.read()
-
-# Adapted from https://github.com/pybind/python_example/blob/master/setup.py
-class get_pybind_include(object):
-    """Helper class to determine the pybind11 include path
-    The purpose of this class is to postpone importing pybind11
-    until it is actually installed, so that the ``get_include()``
-    method can be invoked. """
-
-    def __init__(self, user=False, pep517=False):
-        self.user = user
-        self.pep517 = pep517
-
-    def __str__(self):
-        import os
-        import pybind11
-
-        interpreter_include_path = pybind11.get_include(self.user)
-
-        if self.pep517:
-            # When pybind11 is installed permanently in site packages, the headers
-            # will be in the interpreter include path above. PEP 517 provides an
-            # experimental feature for build system dependencies. When installing
-            # a package from a source distribvution, first its build dependencies
-            # are installed in a temporary location. pybind11 does not return the
-            # correct path for this condition, so we glom together a second path,
-            # and ultimately specify them _both_ in the include search path.
-            # https://github.com/pybind/pybind11/issues/1067
-            return os.path.abspath(
-                os.path.join(
-                    os.path.dirname(pybind11.__file__),
-                    "..",
-                    "..",
-                    "..",
-                    "..",
-                    "include",
-                    os.path.basename(interpreter_include_path),
-                )
-            )
-        else:
-            return interpreter_include_path
-
-
-ext_compile_args = ["-std=c++11"]
-ext_link_args = []
-
-# Developer option
-#
-# if platform.system() == "Darwin":
-#    # XCode10 or later does not support libstdc++, so we need to use libc++.
-#    # macosx-version 10.6 does not support libc++, so we require min macosx version 10.9.
-#    ext_compile_args.append("-stdlib=libc++")
-#    ext_compile_args.append("-mmacosx-version-min=10.9")
-#    ext_link_args.append("-stdlib=libc++")
-#    ext_link_args.append("-mmacosx-version-min=10.9")
-
-# `tiny_obj_loader.cc` contains implementation of tiny_obj_loader.
-m = setuptools.Extension(
-    "tinyobjloader",
-    extra_compile_args=ext_compile_args,
-    extra_link_args=ext_link_args,
-    sources=["bindings.cc", "tiny_obj_loader.cc"],
-    include_dirs=[
-        # Support `build_ext` finding tinyobjloader (without first running
-        # `sdist`).
-        "..",
-        # Support `build_ext` finding pybind 11 (provided it's permanently
-        # installed).
-        get_pybind_include(),
-        get_pybind_include(user=True),
-        # Support building from a source distribution finding pybind11 from
-        # a PEP 517 temporary install.
-        get_pybind_include(pep517=True),
-    ],
-    language="c++",
-)
-
-
-setuptools.setup(
-    name="tinyobjloader",
-    version="0.1",
-    description="Python module for tinyobjloader",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    author="Syoyo Fujita",
-    author_email="syoyo@lighttransport.com",
-    url="https://github.com/syoyo/tinyobjloader",
-    classifiers=["License :: OSI Approved :: MIT License"],
-    packages=setuptools.find_packages(),
-    ext_modules=[m],
-)
diff --git a/python/tiny_obj_loader.cc b/python/tiny_obj_loader.cc
index 11d49865..821542e7 100644
--- a/python/tiny_obj_loader.cc
+++ b/python/tiny_obj_loader.cc
@@ -2,5 +2,8 @@
 // Need also define this in `binding.cc`(and all compilation units)
 #define TINYOBJLOADER_USE_DOUBLE
 
+// Use robust triangulation by using Mapbox earcut.
+#define TINYOBJLOADER_USE_MAPBOX_EARCUT
+
 #define TINYOBJLOADER_IMPLEMENTATION
-#include "tiny_obj_loader.h"
+#include "../tiny_obj_loader.h"
diff --git a/sandbox/parse_fp/CMakeLists.txt b/sandbox/parse_fp/CMakeLists.txt
new file mode 100644
index 00000000..c0c9df9e
--- /dev/null
+++ b/sandbox/parse_fp/CMakeLists.txt
@@ -0,0 +1,17 @@
+cmake_minimum_required(VERSION 3.0)
+project(parse_fp_test CXX)
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+add_executable(test_parse_fp test_parse_fp.cc)
+
+# Optional: enable optimizations for benchmark mode
+if(CMAKE_BUILD_TYPE STREQUAL "Release")
+  if(MSVC)
+    target_compile_options(test_parse_fp PRIVATE /O2)
+  else()
+    target_compile_options(test_parse_fp PRIVATE -O2)
+  endif()
+endif()
diff --git a/sandbox/parse_fp/LICENSE-APACHE b/sandbox/parse_fp/LICENSE-APACHE
new file mode 100644
index 00000000..26f4398f
--- /dev/null
+++ b/sandbox/parse_fp/LICENSE-APACHE
@@ -0,0 +1,190 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2021 The fast_float authors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/sandbox/parse_fp/LICENSE-BOOST b/sandbox/parse_fp/LICENSE-BOOST
new file mode 100644
index 00000000..127a5bc3
--- /dev/null
+++ b/sandbox/parse_fp/LICENSE-BOOST
@@ -0,0 +1,23 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/sandbox/parse_fp/LICENSE-MIT b/sandbox/parse_fp/LICENSE-MIT
new file mode 100644
index 00000000..2fb2a37a
--- /dev/null
+++ b/sandbox/parse_fp/LICENSE-MIT
@@ -0,0 +1,27 @@
+MIT License
+
+Copyright (c) 2021 The fast_float authors
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/sandbox/parse_fp/fast_float.h b/sandbox/parse_fp/fast_float.h
new file mode 100644
index 00000000..cb044c28
--- /dev/null
+++ b/sandbox/parse_fp/fast_float.h
@@ -0,0 +1,4443 @@
+// fast_float by Daniel Lemire
+// fast_float by João Paulo Magalhaes
+//
+//
+// with contributions from Eugene Golushkov
+// with contributions from Maksim Kita
+// with contributions from Marcin Wojdyr
+// with contributions from Neal Richardson
+// with contributions from Tim Paine
+// with contributions from Fabio Pellacini
+// with contributions from Lénárd Szolnoki
+// with contributions from Jan Pharago
+// with contributions from Maya Warrier
+// with contributions from Taha Khokhar
+// with contributions from Anders Dalvander
+//
+//
+// Licensed under the Apache License, Version 2.0, or the
+// MIT License or the Boost License. This file may not be copied,
+// modified, or distributed except according to those terms.
+//
+// MIT License Notice
+//
+//    MIT License
+//
+//    Copyright (c) 2021 The fast_float authors
+//
+//    Permission is hereby granted, free of charge, to any
+//    person obtaining a copy of this software and associated
+//    documentation files (the "Software"), to deal in the
+//    Software without restriction, including without
+//    limitation the rights to use, copy, modify, merge,
+//    publish, distribute, sublicense, and/or sell copies of
+//    the Software, and to permit persons to whom the Software
+//    is furnished to do so, subject to the following
+//    conditions:
+//
+//    The above copyright notice and this permission notice
+//    shall be included in all copies or substantial portions
+//    of the Software.
+//
+//    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+//    ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+//    TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+//    PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+//    SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+//    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+//    OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+//    IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+//    DEALINGS IN THE SOFTWARE.
+//
+// Apache License (Version 2.0) Notice
+//
+//    Copyright 2021 The fast_float authors
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//
+// BOOST License Notice
+//
+//    Boost Software License - Version 1.0 - August 17th, 2003
+//
+//    Permission is hereby granted, free of charge, to any person or organization
+//    obtaining a copy of the software and accompanying documentation covered by
+//    this license (the "Software") to use, reproduce, display, distribute,
+//    execute, and transmit the Software, and to prepare derivative works of the
+//    Software, and to permit third-parties to whom the Software is furnished to
+//    do so, all subject to the following:
+//
+//    The copyright notices in the Software and this entire statement, including
+//    the above license grant, this restriction and the following disclaimer,
+//    must be included in all copies of the Software, in whole or in part, and
+//    all derivative works of the Software, unless such copies or derivative
+//    works are solely in the form of machine-executable object code generated by
+//    a source language processor.
+//
+//    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//    FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+//    SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+//    FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+//    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+//    DEALINGS IN THE SOFTWARE.
+//
+
+#ifndef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+#define FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+
+#ifdef __has_include
+#if __has_include(<version>)
+#include <version>
+#endif
+#endif
+
+// Testing for https://wg21.link/N3652, adopted in C++14
+#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304
+#define FASTFLOAT_CONSTEXPR14 constexpr
+#else
+#define FASTFLOAT_CONSTEXPR14
+#endif
+
+#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L
+#define FASTFLOAT_HAS_BIT_CAST 1
+#else
+#define FASTFLOAT_HAS_BIT_CAST 0
+#endif
+
+#if defined(__cpp_lib_is_constant_evaluated) &&                                \
+    __cpp_lib_is_constant_evaluated >= 201811L
+#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1
+#else
+#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0
+#endif
+
+#if defined(__cpp_if_constexpr) && __cpp_if_constexpr >= 201606L
+#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
+#else
+#define FASTFLOAT_IF_CONSTEXPR17(x) if (x)
+#endif
+
+// Testing for relevant C++20 constexpr library features
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST &&           \
+    defined(__cpp_lib_constexpr_algorithms) &&                                 \
+    __cpp_lib_constexpr_algorithms >= 201806L /*For std::copy and std::fill*/
+#define FASTFLOAT_CONSTEXPR20 constexpr
+#define FASTFLOAT_IS_CONSTEXPR 1
+#else
+#define FASTFLOAT_CONSTEXPR20
+#define FASTFLOAT_IS_CONSTEXPR 0
+#endif
+
+#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 0
+#else
+#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 1
+#endif
+
+#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+
+#ifndef FASTFLOAT_FLOAT_COMMON_H
+#define FASTFLOAT_FLOAT_COMMON_H
+
+#include <cfloat>
+#include <cstdint>
+#include <cassert>
+#include <cstring>
+#include <limits>
+#include <type_traits>
+#include <system_error>
+#ifdef __has_include
+#if __has_include(<stdfloat>) && (__cplusplus > 202002L || (defined(_MSVC_LANG) && (_MSVC_LANG > 202002L)))
+#include <stdfloat>
+#endif
+#endif
+
+#define FASTFLOAT_VERSION_MAJOR 8
+#define FASTFLOAT_VERSION_MINOR 0
+#define FASTFLOAT_VERSION_PATCH 2
+
+#define FASTFLOAT_STRINGIZE_IMPL(x) #x
+#define FASTFLOAT_STRINGIZE(x) FASTFLOAT_STRINGIZE_IMPL(x)
+
+#define FASTFLOAT_VERSION_STR                                                  \
+  FASTFLOAT_STRINGIZE(FASTFLOAT_VERSION_MAJOR)                                 \
+  "." FASTFLOAT_STRINGIZE(FASTFLOAT_VERSION_MINOR) "." FASTFLOAT_STRINGIZE(    \
+      FASTFLOAT_VERSION_PATCH)
+
+#define FASTFLOAT_VERSION                                                      \
+  (FASTFLOAT_VERSION_MAJOR * 10000 + FASTFLOAT_VERSION_MINOR * 100 +           \
+   FASTFLOAT_VERSION_PATCH)
+
+namespace fast_float {
+
+enum class chars_format : uint64_t;
+
+namespace detail {
+constexpr chars_format basic_json_fmt = chars_format(1 << 5);
+constexpr chars_format basic_fortran_fmt = chars_format(1 << 6);
+} // namespace detail
+
+enum class chars_format : uint64_t {
+  scientific = 1 << 0,
+  fixed = 1 << 2,
+  hex = 1 << 3,
+  no_infnan = 1 << 4,
+  // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6
+  json = uint64_t(detail::basic_json_fmt) | fixed | scientific | no_infnan,
+  // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed.
+  json_or_infnan = uint64_t(detail::basic_json_fmt) | fixed | scientific,
+  fortran = uint64_t(detail::basic_fortran_fmt) | fixed | scientific,
+  general = fixed | scientific,
+  allow_leading_plus = 1 << 7,
+  skip_white_space = 1 << 8,
+};
+
+template <typename UC> struct from_chars_result_t {
+  UC const *ptr;
+  std::errc ec;
+};
+
+using from_chars_result = from_chars_result_t<char>;
+
+template <typename UC> struct parse_options_t {
+  constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
+                                     UC dot = UC('.'), int b = 10)
+      : format(fmt), decimal_point(dot), base(b) {}
+
+  /** Which number formats are accepted */
+  chars_format format;
+  /** The character used as decimal point */
+  UC decimal_point;
+  /** The base used for integers */
+  int base;
+};
+
+using parse_options = parse_options_t<char>;
+
+} // namespace fast_float
+
+#if FASTFLOAT_HAS_BIT_CAST
+#include <bit>
+#endif
+
+#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) ||            \
+     defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) ||          \
+     defined(__MINGW64__) || defined(__s390x__) ||                             \
+     (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) ||      \
+      defined(__PPC64LE__)) ||                                                 \
+     defined(__loongarch64))
+#define FASTFLOAT_64BIT 1
+#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) ||             \
+       defined(__arm__) || defined(_M_ARM) || defined(__ppc__) ||              \
+       defined(__MINGW32__) || defined(__EMSCRIPTEN__))
+#define FASTFLOAT_32BIT 1
+#else
+  // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow.
+// We can never tell the register width, but the SIZE_MAX is a good
+// approximation. UINTPTR_MAX and INTPTR_MAX are optional, so avoid them for max
+// portability.
+#if SIZE_MAX == 0xffff
+#error Unknown platform (16-bit, unsupported)
+#elif SIZE_MAX == 0xffffffff
+#define FASTFLOAT_32BIT 1
+#elif SIZE_MAX == 0xffffffffffffffff
+#define FASTFLOAT_64BIT 1
+#else
+#error Unknown platform (not 32-bit, not 64-bit?)
+#endif
+#endif
+
+#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__)) ||           \
+    (defined(_M_ARM64) && !defined(__MINGW32__))
+#include <intrin.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define FASTFLOAT_VISUAL_STUDIO 1
+#endif
+
+#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
+#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined _WIN32
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#else
+#if defined(__APPLE__) || defined(__FreeBSD__)
+#include <machine/endian.h>
+#elif defined(sun) || defined(__sun)
+#include <sys/byteorder.h>
+#elif defined(__MVS__)
+#include <sys/endian.h>
+#else
+#ifdef __has_include
+#if __has_include(<endian.h>)
+#include <endian.h>
+#endif //__has_include(<endian.h>)
+#endif //__has_include
+#endif
+#
+#ifndef __BYTE_ORDER__
+// safe choice
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#endif
+#
+#ifndef __ORDER_LITTLE_ENDIAN__
+// safe choice
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#endif
+#
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#else
+#define FASTFLOAT_IS_BIG_ENDIAN 1
+#endif
+#endif
+
+#if defined(__SSE2__) || (defined(FASTFLOAT_VISUAL_STUDIO) &&                  \
+                          (defined(_M_AMD64) || defined(_M_X64) ||             \
+                           (defined(_M_IX86_FP) && _M_IX86_FP == 2)))
+#define FASTFLOAT_SSE2 1
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define FASTFLOAT_NEON 1
+#endif
+
+#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_NEON)
+#define FASTFLOAT_HAS_SIMD 1
+#endif
+
+#if defined(__GNUC__)
+// disable -Wcast-align=strict (GCC only)
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS                                        \
+  _Pragma("GCC diagnostic push")                                               \
+      _Pragma("GCC diagnostic ignored \"-Wcast-align\"")
+#else
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS
+#endif
+
+#if defined(__GNUC__)
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS _Pragma("GCC diagnostic pop")
+#else
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
+
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#define fastfloat_really_inline __forceinline
+#else
+#define fastfloat_really_inline inline __attribute__((always_inline))
+#endif
+
+#ifndef FASTFLOAT_ASSERT
+#define FASTFLOAT_ASSERT(x)                                                    \
+  { ((void)(x)); }
+#endif
+
+#ifndef FASTFLOAT_DEBUG_ASSERT
+#define FASTFLOAT_DEBUG_ASSERT(x)                                              \
+  { ((void)(x)); }
+#endif
+
+// rust style `try!()` macro, or `?` operator
+#define FASTFLOAT_TRY(x)                                                       \
+  {                                                                            \
+    if (!(x))                                                                  \
+      return false;                                                            \
+  }
+
+#define FASTFLOAT_ENABLE_IF(...)                                               \
+  typename std::enable_if<(__VA_ARGS__), int>::type
+
+namespace fast_float {
+
+fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED
+  return std::is_constant_evaluated();
+#else
+  return false;
+#endif
+}
+
+template <typename T>
+struct is_supported_float_type
+    : std::integral_constant<
+          bool, std::is_same<T, double>::value || std::is_same<T, float>::value
+#ifdef __STDCPP_FLOAT64_T__
+                    || std::is_same<T, std::float64_t>::value
+#endif
+#ifdef __STDCPP_FLOAT32_T__
+                    || std::is_same<T, std::float32_t>::value
+#endif
+#ifdef __STDCPP_FLOAT16_T__
+                    || std::is_same<T, std::float16_t>::value
+#endif
+#ifdef __STDCPP_BFLOAT16_T__
+                    || std::is_same<T, std::bfloat16_t>::value
+#endif
+          > {
+};
+
+template <typename T>
+using equiv_uint_t = typename std::conditional<
+    sizeof(T) == 1, uint8_t,
+    typename std::conditional<
+        sizeof(T) == 2, uint16_t,
+        typename std::conditional<sizeof(T) == 4, uint32_t,
+                                  uint64_t>::type>::type>::type;
+
+template <typename T> struct is_supported_integer_type : std::is_integral<T> {};
+
+template <typename UC>
+struct is_supported_char_type
+    : std::integral_constant<bool, std::is_same<UC, char>::value ||
+                                       std::is_same<UC, wchar_t>::value ||
+                                       std::is_same<UC, char16_t>::value ||
+                                       std::is_same<UC, char32_t>::value
+#ifdef __cpp_char8_t
+                                       || std::is_same<UC, char8_t>::value
+#endif
+                             > {
+};
+
+// Compares two ASCII strings in a case insensitive manner.
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR14 bool
+fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase,
+                      size_t length) {
+  for (size_t i = 0; i < length; ++i) {
+    UC const actual = actual_mixedcase[i];
+    if ((actual < 256 ? actual | 32 : actual) != expected_lowercase[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+
+// a pointer and a length to a contiguous block of memory
+template <typename T> struct span {
+  T const *ptr;
+  size_t length;
+
+  constexpr span(T const *_ptr, size_t _length) : ptr(_ptr), length(_length) {}
+
+  constexpr span() : ptr(nullptr), length(0) {}
+
+  constexpr size_t len() const noexcept { return length; }
+
+  FASTFLOAT_CONSTEXPR14 const T &operator[](size_t index) const noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    return ptr[index];
+  }
+};
+
+struct value128 {
+  uint64_t low;
+  uint64_t high;
+
+  constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {}
+
+  constexpr value128() : low(0), high(0) {}
+};
+
+/* Helper C++14 constexpr generic implementation of leading_zeroes */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int
+leading_zeroes_generic(uint64_t input_num, int last_bit = 0) {
+  if (input_num & uint64_t(0xffffffff00000000)) {
+    input_num >>= 32;
+    last_bit |= 32;
+  }
+  if (input_num & uint64_t(0xffff0000)) {
+    input_num >>= 16;
+    last_bit |= 16;
+  }
+  if (input_num & uint64_t(0xff00)) {
+    input_num >>= 8;
+    last_bit |= 8;
+  }
+  if (input_num & uint64_t(0xf0)) {
+    input_num >>= 4;
+    last_bit |= 4;
+  }
+  if (input_num & uint64_t(0xc)) {
+    input_num >>= 2;
+    last_bit |= 2;
+  }
+  if (input_num & uint64_t(0x2)) { /* input_num >>=  1; */
+    last_bit |= 1;
+  }
+  return 63 - last_bit;
+}
+
+/* result might be undefined when input_num is zero */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int
+leading_zeroes(uint64_t input_num) {
+  assert(input_num > 0);
+  if (cpp20_and_in_constexpr()) {
+    return leading_zeroes_generic(input_num);
+  }
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#if defined(_M_X64) || defined(_M_ARM64)
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  _BitScanReverse64(&leading_zero, input_num);
+  return (int)(63 - leading_zero);
+#else
+  return leading_zeroes_generic(input_num);
+#endif
+#else
+  return __builtin_clzll(input_num);
+#endif
+}
+
+// slow emulation routine for 32-bit
+fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
+umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = (uint64_t)(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + (uint64_t)(lo < bd);
+  return lo;
+}
+
+#ifdef FASTFLOAT_32BIT
+
+// slow emulation routine for 32-bit
+#if !defined(__MINGW64__)
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t _umul128(uint64_t ab,
+                                                                uint64_t cd,
+                                                                uint64_t *hi) {
+  return umul128_generic(ab, cd, hi);
+}
+#endif // !__MINGW64__
+
+#endif // FASTFLOAT_32BIT
+
+// compute 64-bit a*b
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128
+full_multiplication(uint64_t a, uint64_t b) {
+  if (cpp20_and_in_constexpr()) {
+    value128 answer;
+    answer.low = umul128_generic(a, b, &answer.high);
+    return answer;
+  }
+  value128 answer;
+#if defined(_M_ARM64) && !defined(__MINGW32__)
+  // ARM64 has native support for 64-bit multiplications, no need to emulate
+  // But MinGW on ARM64 doesn't have native support for 64-bit multiplications
+  answer.high = __umulh(a, b);
+  answer.low = a * b;
+#elif defined(FASTFLOAT_32BIT) ||                                              \
+    (defined(_WIN64) && !defined(__clang__) && !defined(_M_ARM64))
+  answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64
+#elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__)
+  __uint128_t r = ((__uint128_t)a) * b;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#else
+  answer.low = umul128_generic(a, b, &answer.high);
+#endif
+  return answer;
+}
+
+struct adjusted_mantissa {
+  uint64_t mantissa{0};
+  int32_t power2{0}; // a negative value indicates an invalid result
+  adjusted_mantissa() = default;
+
+  constexpr bool operator==(adjusted_mantissa const &o) const {
+    return mantissa == o.mantissa && power2 == o.power2;
+  }
+
+  constexpr bool operator!=(adjusted_mantissa const &o) const {
+    return mantissa != o.mantissa || power2 != o.power2;
+  }
+};
+
+// Bias so we can get the real exponent with an invalid adjusted_mantissa.
+constexpr static int32_t invalid_am_bias = -0x8000;
+
+// used for binary_format_lookup_tables<T>::max_mantissa
+constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5;
+
+template <typename T, typename U = void> struct binary_format_lookup_tables;
+
+template <typename T> struct binary_format : binary_format_lookup_tables<T> {
+  using equiv_uint = equiv_uint_t<T>;
+
+  static constexpr int mantissa_explicit_bits();
+  static constexpr int minimum_exponent();
+  static constexpr int infinite_power();
+  static constexpr int sign_index();
+  static constexpr int
+  min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST
+  static constexpr int max_exponent_fast_path();
+  static constexpr int max_exponent_round_to_even();
+  static constexpr int min_exponent_round_to_even();
+  static constexpr uint64_t max_mantissa_fast_path(int64_t power);
+  static constexpr uint64_t
+  max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST
+  static constexpr int largest_power_of_ten();
+  static constexpr int smallest_power_of_ten();
+  static constexpr T exact_power_of_ten(int64_t power);
+  static constexpr size_t max_digits();
+  static constexpr equiv_uint exponent_mask();
+  static constexpr equiv_uint mantissa_mask();
+  static constexpr equiv_uint hidden_bit_mask();
+};
+
+template <typename U> struct binary_format_lookup_tables<double, U> {
+  static constexpr double powers_of_ten[] = {
+      1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
+      1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
+
+  // Largest integer value v so that (5**index * v) <= 1<<53.
+  // 0x20000000000000 == 1 << 53
+  static constexpr uint64_t max_mantissa[] = {
+      0x20000000000000,
+      0x20000000000000 / 5,
+      0x20000000000000 / (5 * 5),
+      0x20000000000000 / (5 * 5 * 5),
+      0x20000000000000 / (5 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555),
+      0x20000000000000 / (constant_55555 * 5),
+      0x20000000000000 / (constant_55555 * 5 * 5),
+      0x20000000000000 / (constant_55555 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * 5 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555),
+      0x20000000000000 / (constant_55555 * constant_55555 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * 5 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * constant_55555),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5 * 5 * 5 * 5)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr double binary_format_lookup_tables<double, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t binary_format_lookup_tables<double, U>::max_mantissa[];
+
+#endif
+
+template <typename U> struct binary_format_lookup_tables<float, U> {
+  static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f,
+                                            1e6f, 1e7f, 1e8f, 1e9f, 1e10f};
+
+  // Largest integer value v so that (5**index * v) <= 1<<24.
+  // 0x1000000 == 1<<24
+  static constexpr uint64_t max_mantissa[] = {
+      0x1000000,
+      0x1000000 / 5,
+      0x1000000 / (5 * 5),
+      0x1000000 / (5 * 5 * 5),
+      0x1000000 / (5 * 5 * 5 * 5),
+      0x1000000 / (constant_55555),
+      0x1000000 / (constant_55555 * 5),
+      0x1000000 / (constant_55555 * 5 * 5),
+      0x1000000 / (constant_55555 * 5 * 5 * 5),
+      0x1000000 / (constant_55555 * 5 * 5 * 5 * 5),
+      0x1000000 / (constant_55555 * constant_55555),
+      0x1000000 / (constant_55555 * constant_55555 * 5)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr float binary_format_lookup_tables<float, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t binary_format_lookup_tables<float, U>::max_mantissa[];
+
+#endif
+
+template <>
+inline constexpr int binary_format<double>::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -22;
+#endif
+}
+
+template <>
+inline constexpr int binary_format<float>::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -10;
+#endif
+}
+
+template <>
+inline constexpr int binary_format<double>::mantissa_explicit_bits() {
+  return 52;
+}
+
+template <>
+inline constexpr int binary_format<float>::mantissa_explicit_bits() {
+  return 23;
+}
+
+template <>
+inline constexpr int binary_format<double>::max_exponent_round_to_even() {
+  return 23;
+}
+
+template <>
+inline constexpr int binary_format<float>::max_exponent_round_to_even() {
+  return 10;
+}
+
+template <>
+inline constexpr int binary_format<double>::min_exponent_round_to_even() {
+  return -4;
+}
+
+template <>
+inline constexpr int binary_format<float>::min_exponent_round_to_even() {
+  return -17;
+}
+
+template <> inline constexpr int binary_format<double>::minimum_exponent() {
+  return -1023;
+}
+
+template <> inline constexpr int binary_format<float>::minimum_exponent() {
+  return -127;
+}
+
+template <> inline constexpr int binary_format<double>::infinite_power() {
+  return 0x7FF;
+}
+
+template <> inline constexpr int binary_format<float>::infinite_power() {
+  return 0xFF;
+}
+
+template <> inline constexpr int binary_format<double>::sign_index() {
+  return 63;
+}
+
+template <> inline constexpr int binary_format<float>::sign_index() {
+  return 31;
+}
+
+template <>
+inline constexpr int binary_format<double>::max_exponent_fast_path() {
+  return 22;
+}
+
+template <>
+inline constexpr int binary_format<float>::max_exponent_fast_path() {
+  return 10;
+}
+
+template <>
+inline constexpr uint64_t binary_format<double>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+template <>
+inline constexpr uint64_t binary_format<float>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+// credit: Jakub Jelínek
+#ifdef __STDCPP_FLOAT16_T__
+template <typename U> struct binary_format_lookup_tables<std::float16_t, U> {
+  static constexpr std::float16_t powers_of_ten[] = {1e0f16, 1e1f16, 1e2f16,
+                                                     1e3f16, 1e4f16};
+
+  // Largest integer value v so that (5**index * v) <= 1<<11.
+  // 0x800 == 1<<11
+  static constexpr uint64_t max_mantissa[] = {0x800,
+                                              0x800 / 5,
+                                              0x800 / (5 * 5),
+                                              0x800 / (5 * 5 * 5),
+                                              0x800 / (5 * 5 * 5 * 5),
+                                              0x800 / (constant_55555)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr std::float16_t
+    binary_format_lookup_tables<std::float16_t, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t
+    binary_format_lookup_tables<std::float16_t, U>::max_mantissa[];
+
+#endif
+
+template <>
+inline constexpr std::float16_t
+binary_format<std::float16_t>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <>
+inline constexpr binary_format<std::float16_t>::equiv_uint
+binary_format<std::float16_t>::exponent_mask() {
+  return 0x7C00;
+}
+
+template <>
+inline constexpr binary_format<std::float16_t>::equiv_uint
+binary_format<std::float16_t>::mantissa_mask() {
+  return 0x03FF;
+}
+
+template <>
+inline constexpr binary_format<std::float16_t>::equiv_uint
+binary_format<std::float16_t>::hidden_bit_mask() {
+  return 0x0400;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::max_exponent_fast_path() {
+  return 4;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::mantissa_explicit_bits() {
+  return 10;
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::float16_t>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::float16_t>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 4
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::min_exponent_fast_path() {
+  return 0;
+}
+
+template <>
+inline constexpr int
+binary_format<std::float16_t>::max_exponent_round_to_even() {
+  return 5;
+}
+
+template <>
+inline constexpr int
+binary_format<std::float16_t>::min_exponent_round_to_even() {
+  return -22;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::minimum_exponent() {
+  return -15;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::infinite_power() {
+  return 0x1F;
+}
+
+template <> inline constexpr int binary_format<std::float16_t>::sign_index() {
+  return 15;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::largest_power_of_ten() {
+  return 4;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::smallest_power_of_ten() {
+  return -27;
+}
+
+template <>
+inline constexpr size_t binary_format<std::float16_t>::max_digits() {
+  return 22;
+}
+#endif // __STDCPP_FLOAT16_T__
+
+// credit: Jakub Jelínek
+#ifdef __STDCPP_BFLOAT16_T__
+template <typename U> struct binary_format_lookup_tables<std::bfloat16_t, U> {
+  static constexpr std::bfloat16_t powers_of_ten[] = {1e0bf16, 1e1bf16, 1e2bf16,
+                                                      1e3bf16};
+
+  // Largest integer value v so that (5**index * v) <= 1<<8.
+  // 0x100 == 1<<8
+  static constexpr uint64_t max_mantissa[] = {0x100, 0x100 / 5, 0x100 / (5 * 5),
+                                              0x100 / (5 * 5 * 5),
+                                              0x100 / (5 * 5 * 5 * 5)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr std::bfloat16_t
+    binary_format_lookup_tables<std::bfloat16_t, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t
+    binary_format_lookup_tables<std::bfloat16_t, U>::max_mantissa[];
+
+#endif
+
+template <>
+inline constexpr std::bfloat16_t
+binary_format<std::bfloat16_t>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::max_exponent_fast_path() {
+  return 3;
+}
+
+template <>
+inline constexpr binary_format<std::bfloat16_t>::equiv_uint
+binary_format<std::bfloat16_t>::exponent_mask() {
+  return 0x7F80;
+}
+
+template <>
+inline constexpr binary_format<std::bfloat16_t>::equiv_uint
+binary_format<std::bfloat16_t>::mantissa_mask() {
+  return 0x007F;
+}
+
+template <>
+inline constexpr binary_format<std::bfloat16_t>::equiv_uint
+binary_format<std::bfloat16_t>::hidden_bit_mask() {
+  return 0x0080;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::mantissa_explicit_bits() {
+  return 7;
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::bfloat16_t>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::bfloat16_t>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 3
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::min_exponent_fast_path() {
+  return 0;
+}
+
+template <>
+inline constexpr int
+binary_format<std::bfloat16_t>::max_exponent_round_to_even() {
+  return 3;
+}
+
+template <>
+inline constexpr int
+binary_format<std::bfloat16_t>::min_exponent_round_to_even() {
+  return -24;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::minimum_exponent() {
+  return -127;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::infinite_power() {
+  return 0xFF;
+}
+
+template <> inline constexpr int binary_format<std::bfloat16_t>::sign_index() {
+  return 15;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::largest_power_of_ten() {
+  return 38;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::smallest_power_of_ten() {
+  return -60;
+}
+
+template <>
+inline constexpr size_t binary_format<std::bfloat16_t>::max_digits() {
+  return 98;
+}
+#endif // __STDCPP_BFLOAT16_T__
+
+template <>
+inline constexpr uint64_t
+binary_format<double>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 22
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<float>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 10
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr double
+binary_format<double>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <>
+inline constexpr float binary_format<float>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <> inline constexpr int binary_format<double>::largest_power_of_ten() {
+  return 308;
+}
+
+template <> inline constexpr int binary_format<float>::largest_power_of_ten() {
+  return 38;
+}
+
+template <>
+inline constexpr int binary_format<double>::smallest_power_of_ten() {
+  return -342;
+}
+
+template <> inline constexpr int binary_format<float>::smallest_power_of_ten() {
+  return -64;
+}
+
+template <> inline constexpr size_t binary_format<double>::max_digits() {
+  return 769;
+}
+
+template <> inline constexpr size_t binary_format<float>::max_digits() {
+  return 114;
+}
+
+template <>
+inline constexpr binary_format<float>::equiv_uint
+binary_format<float>::exponent_mask() {
+  return 0x7F800000;
+}
+
+template <>
+inline constexpr binary_format<double>::equiv_uint
+binary_format<double>::exponent_mask() {
+  return 0x7FF0000000000000;
+}
+
+template <>
+inline constexpr binary_format<float>::equiv_uint
+binary_format<float>::mantissa_mask() {
+  return 0x007FFFFF;
+}
+
+template <>
+inline constexpr binary_format<double>::equiv_uint
+binary_format<double>::mantissa_mask() {
+  return 0x000FFFFFFFFFFFFF;
+}
+
+template <>
+inline constexpr binary_format<float>::equiv_uint
+binary_format<float>::hidden_bit_mask() {
+  return 0x00800000;
+}
+
+template <>
+inline constexpr binary_format<double>::equiv_uint
+binary_format<double>::hidden_bit_mask() {
+  return 0x0010000000000000;
+}
+
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+to_float(bool negative, adjusted_mantissa am, T &value) {
+  using equiv_uint = equiv_uint_t<T>;
+  equiv_uint word = equiv_uint(am.mantissa);
+  word = equiv_uint(word | equiv_uint(am.power2)
+                               << binary_format<T>::mantissa_explicit_bits());
+  word =
+      equiv_uint(word | equiv_uint(negative) << binary_format<T>::sign_index());
+#if FASTFLOAT_HAS_BIT_CAST
+  value = std::bit_cast<T>(word);
+#else
+  ::memcpy(&value, &word, sizeof(T));
+#endif
+}
+
+template <typename = void> struct space_lut {
+  static constexpr bool value[] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename T> constexpr bool space_lut<T>::value[];
+
+#endif
+
+template <typename UC> constexpr bool is_space(UC c) {
+  return c < 256 && space_lut<>::value[uint8_t(c)];
+}
+
+template <typename UC> static constexpr uint64_t int_cmp_zeros() {
+  static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4),
+                "Unsupported character size");
+  return (sizeof(UC) == 1) ? 0x3030303030303030
+         : (sizeof(UC) == 2)
+             ? (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 |
+                uint64_t(UC('0')) << 16 | UC('0'))
+             : (uint64_t(UC('0')) << 32 | UC('0'));
+}
+
+template <typename UC> static constexpr int int_cmp_len() {
+  return sizeof(uint64_t) / sizeof(UC);
+}
+
+template <typename UC> constexpr UC const *str_const_nan();
+
+template <> constexpr char const *str_const_nan<char>() { return "nan"; }
+
+template <> constexpr wchar_t const *str_const_nan<wchar_t>() { return L"nan"; }
+
+template <> constexpr char16_t const *str_const_nan<char16_t>() {
+  return u"nan";
+}
+
+template <> constexpr char32_t const *str_const_nan<char32_t>() {
+  return U"nan";
+}
+
+#ifdef __cpp_char8_t
+template <> constexpr char8_t const *str_const_nan<char8_t>() {
+  return u8"nan";
+}
+#endif
+
+template <typename UC> constexpr UC const *str_const_inf();
+
+template <> constexpr char const *str_const_inf<char>() { return "infinity"; }
+
+template <> constexpr wchar_t const *str_const_inf<wchar_t>() {
+  return L"infinity";
+}
+
+template <> constexpr char16_t const *str_const_inf<char16_t>() {
+  return u"infinity";
+}
+
+template <> constexpr char32_t const *str_const_inf<char32_t>() {
+  return U"infinity";
+}
+
+#ifdef __cpp_char8_t
+template <> constexpr char8_t const *str_const_inf<char8_t>() {
+  return u8"infinity";
+}
+#endif
+
+template <typename = void> struct int_luts {
+  static constexpr uint8_t chdigit[] = {
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   255, 255,
+      255, 255, 255, 255, 255, 10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
+      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,
+      35,  255, 255, 255, 255, 255, 255, 10,  11,  12,  13,  14,  15,  16,  17,
+      18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,
+      33,  34,  35,  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255};
+
+  static constexpr size_t maxdigits_u64[] = {
+      64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16,
+      15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13};
+
+  static constexpr uint64_t min_safe_u64[] = {
+      9223372036854775808ull,  12157665459056928801ull, 4611686018427387904,
+      7450580596923828125,     4738381338321616896,     3909821048582988049,
+      9223372036854775808ull,  12157665459056928801ull, 10000000000000000000ull,
+      5559917313492231481,     2218611106740436992,     8650415919381337933,
+      2177953337809371136,     6568408355712890625,     1152921504606846976,
+      2862423051509815793,     6746640616477458432,     15181127029874798299ull,
+      1638400000000000000,     3243919932521508681,     6221821273427820544,
+      11592836324538749809ull, 876488338465357824,      1490116119384765625,
+      2481152873203736576,     4052555153018976267,     6502111422497947648,
+      10260628712958602189ull, 15943230000000000000ull, 787662783788549761,
+      1152921504606846976,     1667889514952984961,     2386420683693101056,
+      3379220508056640625,     4738381338321616896};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename T> constexpr uint8_t int_luts<T>::chdigit[];
+
+template <typename T> constexpr size_t int_luts<T>::maxdigits_u64[];
+
+template <typename T> constexpr uint64_t int_luts<T>::min_safe_u64[];
+
+#endif
+
+template <typename UC>
+fastfloat_really_inline constexpr uint8_t ch_to_digit(UC c) {
+  return int_luts<>::chdigit[static_cast<unsigned char>(c)];
+}
+
+fastfloat_really_inline constexpr size_t max_digits_u64(int base) {
+  return int_luts<>::maxdigits_u64[base - 2];
+}
+
+// If a u64 is exactly max_digits_u64() in length, this is
+// the value below which it has definitely overflowed.
+fastfloat_really_inline constexpr uint64_t min_safe_u64(int base) {
+  return int_luts<>::min_safe_u64[base - 2];
+}
+
+static_assert(std::is_same<equiv_uint_t<double>, uint64_t>::value,
+              "equiv_uint should be uint64_t for double");
+static_assert(std::numeric_limits<double>::is_iec559,
+              "double must fulfill the requirements of IEC 559 (IEEE 754)");
+
+static_assert(std::is_same<equiv_uint_t<float>, uint32_t>::value,
+              "equiv_uint should be uint32_t for float");
+static_assert(std::numeric_limits<float>::is_iec559,
+              "float must fulfill the requirements of IEC 559 (IEEE 754)");
+
+#ifdef __STDCPP_FLOAT64_T__
+static_assert(std::is_same<equiv_uint_t<std::float64_t>, uint64_t>::value,
+              "equiv_uint should be uint64_t for std::float64_t");
+static_assert(
+    std::numeric_limits<std::float64_t>::is_iec559,
+    "std::float64_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_FLOAT64_T__
+
+#ifdef __STDCPP_FLOAT32_T__
+static_assert(std::is_same<equiv_uint_t<std::float32_t>, uint32_t>::value,
+              "equiv_uint should be uint32_t for std::float32_t");
+static_assert(
+    std::numeric_limits<std::float32_t>::is_iec559,
+    "std::float32_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_FLOAT32_T__
+
+#ifdef __STDCPP_FLOAT16_T__
+static_assert(
+    std::is_same<binary_format<std::float16_t>::equiv_uint, uint16_t>::value,
+    "equiv_uint should be uint16_t for std::float16_t");
+static_assert(
+    std::numeric_limits<std::float16_t>::is_iec559,
+    "std::float16_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_FLOAT16_T__
+
+#ifdef __STDCPP_BFLOAT16_T__
+static_assert(
+    std::is_same<binary_format<std::bfloat16_t>::equiv_uint, uint16_t>::value,
+    "equiv_uint should be uint16_t for std::bfloat16_t");
+static_assert(
+    std::numeric_limits<std::bfloat16_t>::is_iec559,
+    "std::bfloat16_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_BFLOAT16_T__
+
+constexpr chars_format operator~(chars_format rhs) noexcept {
+  using int_type = std::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(~static_cast<int_type>(rhs));
+}
+
+constexpr chars_format operator&(chars_format lhs, chars_format rhs) noexcept {
+  using int_type = std::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(static_cast<int_type>(lhs) &
+                                   static_cast<int_type>(rhs));
+}
+
+constexpr chars_format operator|(chars_format lhs, chars_format rhs) noexcept {
+  using int_type = std::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(static_cast<int_type>(lhs) |
+                                   static_cast<int_type>(rhs));
+}
+
+constexpr chars_format operator^(chars_format lhs, chars_format rhs) noexcept {
+  using int_type = std::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(static_cast<int_type>(lhs) ^
+                                   static_cast<int_type>(rhs));
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format &
+operator&=(chars_format &lhs, chars_format rhs) noexcept {
+  return lhs = (lhs & rhs);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format &
+operator|=(chars_format &lhs, chars_format rhs) noexcept {
+  return lhs = (lhs | rhs);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format &
+operator^=(chars_format &lhs, chars_format rhs) noexcept {
+  return lhs = (lhs ^ rhs);
+}
+
+namespace detail {
+// adjust for deprecated feature macros
+constexpr chars_format adjust_for_feature_macros(chars_format fmt) {
+  return fmt
+#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS
+         | chars_format::allow_leading_plus
+#endif
+#ifdef FASTFLOAT_SKIP_WHITE_SPACE
+         | chars_format::skip_white_space
+#endif
+      ;
+}
+} // namespace detail
+
+} // namespace fast_float
+
+#endif
+
+
+#ifndef FASTFLOAT_FAST_FLOAT_H
+#define FASTFLOAT_FAST_FLOAT_H
+
+
+namespace fast_float {
+/**
+ * This function parses the character sequence [first,last) for a number. It
+ * parses floating-point numbers expecting a locale-indepent format equivalent
+ * to what is used by std::strtod in the default ("C") locale. The resulting
+ * floating-point value is the closest floating-point values (using either float
+ * or double), using the "round to even" convention for values that would
+ * otherwise fall right in-between two values. That is, we provide exact parsing
+ * according to the IEEE standard.
+ *
+ * Given a successful parse, the pointer (`ptr`) in the returned value is set to
+ * point right after the parsed number, and the `value` referenced is set to the
+ * parsed value. In case of error, the returned `ec` contains a representative
+ * error, otherwise the default (`std::errc()`) value is stored.
+ *
+ * The implementation does not throw and does not allocate memory (e.g., with
+ * `new` or `malloc`).
+ *
+ * Like the C++17 standard, the `fast_float::from_chars` functions take an
+ * optional last argument of the type `fast_float::chars_format`. It is a bitset
+ * value: we check whether `fmt & fast_float::chars_format::fixed` and `fmt &
+ * fast_float::chars_format::scientific` are set to determine whether we allow
+ * the fixed point and scientific notation respectively. The default is
+ * `fast_float::chars_format::general` which allows both `fixed` and
+ * `scientific`.
+ */
+template <typename T, typename UC = char,
+          typename = FASTFLOAT_ENABLE_IF(is_supported_float_type<T>::value)>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value,
+           chars_format fmt = chars_format::general) noexcept;
+
+/**
+ * Like from_chars, but accepts an `options` argument to govern number parsing.
+ * Both for floating-point types and integer types.
+ */
+template <typename T, typename UC = char>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(UC const *first, UC const *last, T &value,
+                    parse_options_t<UC> options) noexcept;
+
+/**
+ * from_chars for integer types.
+ */
+template <typename T, typename UC = char,
+          typename = FASTFLOAT_ENABLE_IF(is_supported_integer_type<T>::value)>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value, int base = 10) noexcept;
+
+} // namespace fast_float
+
+#endif // FASTFLOAT_FAST_FLOAT_H
+
+#ifndef FASTFLOAT_ASCII_NUMBER_H
+#define FASTFLOAT_ASCII_NUMBER_H
+
+#include <cctype>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+
+
+#ifdef FASTFLOAT_SSE2
+#include <emmintrin.h>
+#endif
+
+#ifdef FASTFLOAT_NEON
+#include <arm_neon.h>
+#endif
+
+namespace fast_float {
+
+template <typename UC> fastfloat_really_inline constexpr bool has_simd_opt() {
+#ifdef FASTFLOAT_HAS_SIMD
+  return std::is_same<UC, char16_t>::value;
+#else
+  return false;
+#endif
+}
+
+// Next function can be micro-optimized, but compilers are entirely
+// able to optimize it well.
+template <typename UC>
+fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
+  return !(c > UC('9') || c < UC('0'));
+}
+
+fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
+  return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 |
+         (val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 |
+         (val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 |
+         (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56;
+}
+
+// Read 8 UC into a u64. Truncates UC if not char.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+read8_to_u64(UC const *chars) {
+  if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
+    uint64_t val = 0;
+    for (int i = 0; i < 8; ++i) {
+      val |= uint64_t(uint8_t(*chars)) << (i * 8);
+      ++chars;
+    }
+    return val;
+  }
+  uint64_t val;
+  ::memcpy(&val, chars, sizeof(uint64_t));
+#if FASTFLOAT_IS_BIG_ENDIAN == 1
+  // Need to read as-if the number was in little-endian order.
+  val = byteswap(val);
+#endif
+  return val;
+}
+
+#ifdef FASTFLOAT_SSE2
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  __m128i const packed = _mm_packus_epi16(data, data);
+#ifdef FASTFLOAT_64BIT
+  return uint64_t(_mm_cvtsi128_si64(packed));
+#else
+  uint64_t value;
+  // Visual Studio + older versions of GCC don't support _mm_storeu_si64
+  _mm_storel_epi64(reinterpret_cast<__m128i *>(&value), packed);
+  return value;
+#endif
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(
+      _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars)));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+#elif defined(FASTFLOAT_NEON)
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const data) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  uint8x8_t utf8_packed = vmovn_u16(data);
+  return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0);
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(
+      vld1q_u16(reinterpret_cast<uint16_t const *>(chars)));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+#endif // FASTFLOAT_SSE2
+
+// MSVC SFINAE is broken pre-VS2017
+#if defined(_MSC_VER) && _MSC_VER <= 1900
+template <typename UC>
+#else
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
+#endif
+// dummy for compile
+uint64_t simd_read8_to_u64(UC const *) {
+  return 0;
+}
+
+// credit  @aqrit
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
+parse_eight_digits_unrolled(uint64_t val) {
+  uint64_t const mask = 0x000000FF000000FF;
+  uint64_t const mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+  uint64_t const mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+  val -= 0x3030303030303030;
+  val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+  val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+  return uint32_t(val);
+}
+
+// Call this if chars are definitely 8 digits.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
+parse_eight_digits_unrolled(UC const *chars) noexcept {
+  if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
+    return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
+  }
+  return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
+}
+
+// credit @aqrit
+fastfloat_really_inline constexpr bool
+is_made_of_eight_digits_fast(uint64_t val) noexcept {
+  return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
+            0x8080808080808080));
+}
+
+#ifdef FASTFLOAT_HAS_SIMD
+
+// Call this if chars might not be 8 digits.
+// Using this style (instead of is_made_of_eight_digits_fast() then
+// parse_eight_digits_unrolled()) ensures we don't load SIMD registers twice.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+simd_parse_if_eight_digits_unrolled(char16_t const *chars,
+                                    uint64_t &i) noexcept {
+  if (cpp20_and_in_constexpr()) {
+    return false;
+  }
+#ifdef FASTFLOAT_SSE2
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  __m128i const data =
+      _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars));
+
+  // (x - '0') <= 9
+  // http://0x80.pl/articles/simd-parsing-int-sequences.html
+  __m128i const t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
+  __m128i const t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
+
+  if (_mm_movemask_epi8(t1) == 0) {
+    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
+    return true;
+  } else
+    return false;
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+#elif defined(FASTFLOAT_NEON)
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  uint16x8_t const data = vld1q_u16(reinterpret_cast<uint16_t const *>(chars));
+
+  // (x - '0') <= 9
+  // http://0x80.pl/articles/simd-parsing-int-sequences.html
+  uint16x8_t const t0 = vsubq_u16(data, vmovq_n_u16('0'));
+  uint16x8_t const mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1));
+
+  if (vminvq_u16(mask) == 0xFFFF) {
+    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
+    return true;
+  } else
+    return false;
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+#else
+  (void)chars;
+  (void)i;
+  return false;
+#endif // FASTFLOAT_SSE2
+}
+
+#endif // FASTFLOAT_HAS_SIMD
+
+// MSVC SFINAE is broken pre-VS2017
+#if defined(_MSC_VER) && _MSC_VER <= 1900
+template <typename UC>
+#else
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
+#endif
+// dummy for compile
+bool simd_parse_if_eight_digits_unrolled(UC const *, uint64_t &) {
+  return 0;
+}
+
+template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value) = 0>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+loop_parse_if_eight_digits(UC const *&p, UC const *const pend, uint64_t &i) {
+  if (!has_simd_opt<UC>()) {
+    return;
+  }
+  while ((std::distance(p, pend) >= 8) &&
+         simd_parse_if_eight_digits_unrolled(
+             p, i)) { // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+loop_parse_if_eight_digits(char const *&p, char const *const pend,
+                           uint64_t &i) {
+  // optimizes better than parse_if_eight_digits_unrolled() for UC = char.
+  while ((std::distance(p, pend) >= 8) &&
+         is_made_of_eight_digits_fast(read8_to_u64(p))) {
+    i = i * 100000000 +
+        parse_eight_digits_unrolled(read8_to_u64(
+            p)); // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
+}
+
+enum class parse_error {
+  no_error,
+  // [JSON-only] The minus sign must be followed by an integer.
+  missing_integer_after_sign,
+  // A sign must be followed by an integer or dot.
+  missing_integer_or_dot_after_sign,
+  // [JSON-only] The integer part must not have leading zeros.
+  leading_zeros_in_integer_part,
+  // [JSON-only] The integer part must have at least one digit.
+  no_digits_in_integer_part,
+  // [JSON-only] If there is a decimal point, there must be digits in the
+  // fractional part.
+  no_digits_in_fractional_part,
+  // The mantissa must have at least one digit.
+  no_digits_in_mantissa,
+  // Scientific notation requires an exponential part.
+  missing_exponential_part,
+};
+
+template <typename UC> struct parsed_number_string_t {
+  int64_t exponent{0};
+  uint64_t mantissa{0};
+  UC const *lastmatch{nullptr};
+  bool negative{false};
+  bool valid{false};
+  bool too_many_digits{false};
+  // contains the range of the significant digits
+  span<UC const> integer{};  // non-nullable
+  span<UC const> fraction{}; // nullable
+  parse_error error{parse_error::no_error};
+};
+
+using byte_span = span<char const>;
+using parsed_number_string = parsed_number_string_t<char>;
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
+report_parse_error(UC const *p, parse_error error) {
+  parsed_number_string_t<UC> answer;
+  answer.valid = false;
+  answer.lastmatch = p;
+  answer.error = error;
+  return answer;
+}
+
+// Assuming that you use no more than 19 digits, this will
+// parse an ASCII string.
+template <bool basic_json_fmt, typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
+parse_number_string(UC const *p, UC const *pend,
+                    parse_options_t<UC> options) noexcept {
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  UC const decimal_point = options.decimal_point;
+
+  parsed_number_string_t<UC> answer;
+  answer.valid = false;
+  answer.too_many_digits = false;
+  // assume p < pend, so dereference without checks;
+  answer.negative = (*p == UC('-'));
+  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+  if ((*p == UC('-')) || (uint64_t(fmt & chars_format::allow_leading_plus) &&
+                          !basic_json_fmt && *p == UC('+'))) {
+    ++p;
+    if (p == pend) {
+      return report_parse_error<UC>(
+          p, parse_error::missing_integer_or_dot_after_sign);
+    }
+    FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+      if (!is_integer(*p)) { // a sign must be followed by an integer
+        return report_parse_error<UC>(p,
+                                      parse_error::missing_integer_after_sign);
+      }
+    }
+    else {
+      if (!is_integer(*p) &&
+          (*p !=
+           decimal_point)) { // a sign must be followed by an integer or the dot
+        return report_parse_error<UC>(
+            p, parse_error::missing_integer_or_dot_after_sign);
+      }
+    }
+  }
+  UC const *const start_digits = p;
+
+  uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
+
+  while ((p != pend) && is_integer(*p)) {
+    // a multiplication by 10 is cheaper than an arbitrary integer
+    // multiplication
+    i = 10 * i +
+        uint64_t(*p -
+                 UC('0')); // might overflow, we will handle the overflow later
+    ++p;
+  }
+  UC const *const end_of_integer_part = p;
+  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
+  answer.integer = span<UC const>(start_digits, size_t(digit_count));
+  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+    // at least 1 digit in integer part, without leading zeros
+    if (digit_count == 0) {
+      return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
+    }
+    if ((start_digits[0] == UC('0') && digit_count > 1)) {
+      return report_parse_error<UC>(start_digits,
+                                    parse_error::leading_zeros_in_integer_part);
+    }
+  }
+
+  int64_t exponent = 0;
+  bool const has_decimal_point = (p != pend) && (*p == decimal_point);
+  if (has_decimal_point) {
+    ++p;
+    UC const *before = p;
+    // can occur at most twice without overflowing, but let it occur more, since
+    // for integers with many digits, digit parsing is the primary bottleneck.
+    loop_parse_if_eight_digits(p, pend, i);
+
+    while ((p != pend) && is_integer(*p)) {
+      uint8_t digit = uint8_t(*p - UC('0'));
+      ++p;
+      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
+    }
+    exponent = before - p;
+    answer.fraction = span<UC const>(before, size_t(p - before));
+    digit_count -= exponent;
+  }
+  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+    // at least 1 digit in fractional part
+    if (has_decimal_point && exponent == 0) {
+      return report_parse_error<UC>(p,
+                                    parse_error::no_digits_in_fractional_part);
+    }
+  }
+  else if (digit_count == 0) { // we must have encountered at least one integer!
+    return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
+  }
+  int64_t exp_number = 0; // explicit exponential part
+  if ((uint64_t(fmt & chars_format::scientific) && (p != pend) &&
+       ((UC('e') == *p) || (UC('E') == *p))) ||
+      (uint64_t(fmt & detail::basic_fortran_fmt) && (p != pend) &&
+       ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) ||
+        (UC('D') == *p)))) {
+    UC const *location_of_e = p;
+    if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) ||
+        (UC('D') == *p)) {
+      ++p;
+    }
+    bool neg_exp = false;
+    if ((p != pend) && (UC('-') == *p)) {
+      neg_exp = true;
+      ++p;
+    } else if ((p != pend) &&
+               (UC('+') ==
+                *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
+      ++p;
+    }
+    if ((p == pend) || !is_integer(*p)) {
+      if (!uint64_t(fmt & chars_format::fixed)) {
+        // The exponential part is invalid for scientific notation, so it must
+        // be a trailing token for fixed notation. However, fixed notation is
+        // disabled, so report a scientific notation error.
+        return report_parse_error<UC>(p, parse_error::missing_exponential_part);
+      }
+      // Otherwise, we will be ignoring the 'e'.
+      p = location_of_e;
+    } else {
+      while ((p != pend) && is_integer(*p)) {
+        uint8_t digit = uint8_t(*p - UC('0'));
+        if (exp_number < 0x10000000) {
+          exp_number = 10 * exp_number + digit;
+        }
+        ++p;
+      }
+      if (neg_exp) {
+        exp_number = -exp_number;
+      }
+      exponent += exp_number;
+    }
+  } else {
+    // If it scientific and not fixed, we have to bail out.
+    if (uint64_t(fmt & chars_format::scientific) &&
+        !uint64_t(fmt & chars_format::fixed)) {
+      return report_parse_error<UC>(p, parse_error::missing_exponential_part);
+    }
+  }
+  answer.lastmatch = p;
+  answer.valid = true;
+
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon.
+  //
+  // We can deal with up to 19 digits.
+  if (digit_count > 19) { // this is uncommon
+    // It is possible that the integer had an overflow.
+    // We have to handle the case where we have 0.0000somenumber.
+    // We need to be mindful of the case where we only have zeroes...
+    // E.g., 0.000000000...000.
+    UC const *start = start_digits;
+    while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
+      if (*start == UC('0')) {
+        digit_count--;
+      }
+      start++;
+    }
+
+    if (digit_count > 19) {
+      answer.too_many_digits = true;
+      // Let us start again, this time, avoiding overflows.
+      // We don't need to check if is_integer, since we use the
+      // pre-tokenized spans from above.
+      i = 0;
+      p = answer.integer.ptr;
+      UC const *int_end = p + answer.integer.len();
+      uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
+      while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+        i = i * 10 + uint64_t(*p - UC('0'));
+        ++p;
+      }
+      if (i >= minimal_nineteen_digit_integer) { // We have a big integers
+        exponent = end_of_integer_part - p + exp_number;
+      } else { // We have a value with a fractional component.
+        p = answer.fraction.ptr;
+        UC const *frac_end = p + answer.fraction.len();
+        while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+          i = i * 10 + uint64_t(*p - UC('0'));
+          ++p;
+        }
+        exponent = answer.fraction.ptr - p + exp_number;
+      }
+      // We have now corrected both exponent and i, to a truncated value
+    }
+  }
+  answer.exponent = exponent;
+  answer.mantissa = i;
+  return answer;
+}
+
+template <typename T, typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+parse_int_string(UC const *p, UC const *pend, T &value,
+                 parse_options_t<UC> options) {
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  int const base = options.base;
+
+  from_chars_result_t<UC> answer;
+
+  UC const *const first = p;
+
+  bool const negative = (*p == UC('-'));
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+#pragma warning(disable : 4127)
+#endif
+  if (!std::is_signed<T>::value && negative) {
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#endif
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  if ((*p == UC('-')) ||
+      (uint64_t(fmt & chars_format::allow_leading_plus) && (*p == UC('+')))) {
+    ++p;
+  }
+
+  UC const *const start_num = p;
+
+  while (p != pend && *p == UC('0')) {
+    ++p;
+  }
+
+  bool const has_leading_zeros = p > start_num;
+
+  UC const *const start_digits = p;
+
+  uint64_t i = 0;
+  if (base == 10) {
+    loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
+  }
+  while (p != pend) {
+    uint8_t digit = ch_to_digit(*p);
+    if (digit >= base) {
+      break;
+    }
+    i = uint64_t(base) * i + digit; // might overflow, check this later
+    p++;
+  }
+
+  size_t digit_count = size_t(p - start_digits);
+
+  if (digit_count == 0) {
+    if (has_leading_zeros) {
+      value = 0;
+      answer.ec = std::errc();
+      answer.ptr = p;
+    } else {
+      answer.ec = std::errc::invalid_argument;
+      answer.ptr = first;
+    }
+    return answer;
+  }
+
+  answer.ptr = p;
+
+  // check u64 overflow
+  size_t max_digits = max_digits_u64(base);
+  if (digit_count > max_digits) {
+    answer.ec = std::errc::result_out_of_range;
+    return answer;
+  }
+  // this check can be eliminated for all other types, but they will all require
+  // a max_digits(base) equivalent
+  if (digit_count == max_digits && i < min_safe_u64(base)) {
+    answer.ec = std::errc::result_out_of_range;
+    return answer;
+  }
+
+  // check other types overflow
+  if (!std::is_same<T, uint64_t>::value) {
+    if (i > uint64_t(std::numeric_limits<T>::max()) + uint64_t(negative)) {
+      answer.ec = std::errc::result_out_of_range;
+      return answer;
+    }
+  }
+
+  if (negative) {
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+    // this weird workaround is required because:
+    // - converting unsigned to signed when its value is greater than signed max
+    // is UB pre-C++23.
+    // - reinterpret_casting (~i + 1) would work, but it is not constexpr
+    // this is always optimized into a neg instruction (note: T is an integer
+    // type)
+    value = T(-std::numeric_limits<T>::max() -
+              T(i - uint64_t(std::numeric_limits<T>::max())));
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#endif
+  } else {
+    value = T(i);
+  }
+
+  answer.ec = std::errc();
+  return answer;
+}
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_FAST_TABLE_H
+#define FASTFLOAT_FAST_TABLE_H
+
+#include <cstdint>
+
+namespace fast_float {
+
+/**
+ * When mapping numbers from decimal to binary,
+ * we go from w * 10^q to m * 2^p but we have
+ * 10^q = 5^q * 2^q, so effectively
+ * we are trying to match
+ * w * 2^q * 5^q to m * 2^p. Thus the powers of two
+ * are not a concern since they can be represented
+ * exactly using the binary notation, only the powers of five
+ * affect the binary significand.
+ */
+
+/**
+ * The smallest non-zero float (binary64) is 2^-1074.
+ * We take as input numbers of the form w x 10^q where w < 2^64.
+ * We have that w * 10^-343  <  2^(64-344) 5^-343 < 2^-1076.
+ * However, we have that
+ * (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^-1074.
+ * Thus it is possible for a number of the form w * 10^-342 where
+ * w is a 64-bit value to be a non-zero floating-point number.
+ *********
+ * Any number of form w * 10^309 where w>= 1 is going to be
+ * infinite in binary64 so we never need to worry about powers
+ * of 5 greater than 308.
+ */
+template <class unused = void> struct powers_template {
+
+  constexpr static int smallest_power_of_five =
+      binary_format<double>::smallest_power_of_ten();
+  constexpr static int largest_power_of_five =
+      binary_format<double>::largest_power_of_ten();
+  constexpr static int number_of_entries =
+      2 * (largest_power_of_five - smallest_power_of_five + 1);
+  // Powers of five from 5^-342 all the way to 5^308 rounded toward one.
+  constexpr static uint64_t power_of_five_128[number_of_entries] = {
+      0xeef453d6923bd65a, 0x113faa2906a13b3f,
+      0x9558b4661b6565f8, 0x4ac7ca59a424c507,
+      0xbaaee17fa23ebf76, 0x5d79bcf00d2df649,
+      0xe95a99df8ace6f53, 0xf4d82c2c107973dc,
+      0x91d8a02bb6c10594, 0x79071b9b8a4be869,
+      0xb64ec836a47146f9, 0x9748e2826cdee284,
+      0xe3e27a444d8d98b7, 0xfd1b1b2308169b25,
+      0x8e6d8c6ab0787f72, 0xfe30f0f5e50e20f7,
+      0xb208ef855c969f4f, 0xbdbd2d335e51a935,
+      0xde8b2b66b3bc4723, 0xad2c788035e61382,
+      0x8b16fb203055ac76, 0x4c3bcb5021afcc31,
+      0xaddcb9e83c6b1793, 0xdf4abe242a1bbf3d,
+      0xd953e8624b85dd78, 0xd71d6dad34a2af0d,
+      0x87d4713d6f33aa6b, 0x8672648c40e5ad68,
+      0xa9c98d8ccb009506, 0x680efdaf511f18c2,
+      0xd43bf0effdc0ba48, 0x212bd1b2566def2,
+      0x84a57695fe98746d, 0x14bb630f7604b57,
+      0xa5ced43b7e3e9188, 0x419ea3bd35385e2d,
+      0xcf42894a5dce35ea, 0x52064cac828675b9,
+      0x818995ce7aa0e1b2, 0x7343efebd1940993,
+      0xa1ebfb4219491a1f, 0x1014ebe6c5f90bf8,
+      0xca66fa129f9b60a6, 0xd41a26e077774ef6,
+      0xfd00b897478238d0, 0x8920b098955522b4,
+      0x9e20735e8cb16382, 0x55b46e5f5d5535b0,
+      0xc5a890362fddbc62, 0xeb2189f734aa831d,
+      0xf712b443bbd52b7b, 0xa5e9ec7501d523e4,
+      0x9a6bb0aa55653b2d, 0x47b233c92125366e,
+      0xc1069cd4eabe89f8, 0x999ec0bb696e840a,
+      0xf148440a256e2c76, 0xc00670ea43ca250d,
+      0x96cd2a865764dbca, 0x380406926a5e5728,
+      0xbc807527ed3e12bc, 0xc605083704f5ecf2,
+      0xeba09271e88d976b, 0xf7864a44c633682e,
+      0x93445b8731587ea3, 0x7ab3ee6afbe0211d,
+      0xb8157268fdae9e4c, 0x5960ea05bad82964,
+      0xe61acf033d1a45df, 0x6fb92487298e33bd,
+      0x8fd0c16206306bab, 0xa5d3b6d479f8e056,
+      0xb3c4f1ba87bc8696, 0x8f48a4899877186c,
+      0xe0b62e2929aba83c, 0x331acdabfe94de87,
+      0x8c71dcd9ba0b4925, 0x9ff0c08b7f1d0b14,
+      0xaf8e5410288e1b6f, 0x7ecf0ae5ee44dd9,
+      0xdb71e91432b1a24a, 0xc9e82cd9f69d6150,
+      0x892731ac9faf056e, 0xbe311c083a225cd2,
+      0xab70fe17c79ac6ca, 0x6dbd630a48aaf406,
+      0xd64d3d9db981787d, 0x92cbbccdad5b108,
+      0x85f0468293f0eb4e, 0x25bbf56008c58ea5,
+      0xa76c582338ed2621, 0xaf2af2b80af6f24e,
+      0xd1476e2c07286faa, 0x1af5af660db4aee1,
+      0x82cca4db847945ca, 0x50d98d9fc890ed4d,
+      0xa37fce126597973c, 0xe50ff107bab528a0,
+      0xcc5fc196fefd7d0c, 0x1e53ed49a96272c8,
+      0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7a,
+      0x9faacf3df73609b1, 0x77b191618c54e9ac,
+      0xc795830d75038c1d, 0xd59df5b9ef6a2417,
+      0xf97ae3d0d2446f25, 0x4b0573286b44ad1d,
+      0x9becce62836ac577, 0x4ee367f9430aec32,
+      0xc2e801fb244576d5, 0x229c41f793cda73f,
+      0xf3a20279ed56d48a, 0x6b43527578c1110f,
+      0x9845418c345644d6, 0x830a13896b78aaa9,
+      0xbe5691ef416bd60c, 0x23cc986bc656d553,
+      0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa8,
+      0x94b3a202eb1c3f39, 0x7bf7d71432f3d6a9,
+      0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc53,
+      0xe858ad248f5c22c9, 0xd1b3400f8f9cff68,
+      0x91376c36d99995be, 0x23100809b9c21fa1,
+      0xb58547448ffffb2d, 0xabd40a0c2832a78a,
+      0xe2e69915b3fff9f9, 0x16c90c8f323f516c,
+      0x8dd01fad907ffc3b, 0xae3da7d97f6792e3,
+      0xb1442798f49ffb4a, 0x99cd11cfdf41779c,
+      0xdd95317f31c7fa1d, 0x40405643d711d583,
+      0x8a7d3eef7f1cfc52, 0x482835ea666b2572,
+      0xad1c8eab5ee43b66, 0xda3243650005eecf,
+      0xd863b256369d4a40, 0x90bed43e40076a82,
+      0x873e4f75e2224e68, 0x5a7744a6e804a291,
+      0xa90de3535aaae202, 0x711515d0a205cb36,
+      0xd3515c2831559a83, 0xd5a5b44ca873e03,
+      0x8412d9991ed58091, 0xe858790afe9486c2,
+      0xa5178fff668ae0b6, 0x626e974dbe39a872,
+      0xce5d73ff402d98e3, 0xfb0a3d212dc8128f,
+      0x80fa687f881c7f8e, 0x7ce66634bc9d0b99,
+      0xa139029f6a239f72, 0x1c1fffc1ebc44e80,
+      0xc987434744ac874e, 0xa327ffb266b56220,
+      0xfbe9141915d7a922, 0x4bf1ff9f0062baa8,
+      0x9d71ac8fada6c9b5, 0x6f773fc3603db4a9,
+      0xc4ce17b399107c22, 0xcb550fb4384d21d3,
+      0xf6019da07f549b2b, 0x7e2a53a146606a48,
+      0x99c102844f94e0fb, 0x2eda7444cbfc426d,
+      0xc0314325637a1939, 0xfa911155fefb5308,
+      0xf03d93eebc589f88, 0x793555ab7eba27ca,
+      0x96267c7535b763b5, 0x4bc1558b2f3458de,
+      0xbbb01b9283253ca2, 0x9eb1aaedfb016f16,
+      0xea9c227723ee8bcb, 0x465e15a979c1cadc,
+      0x92a1958a7675175f, 0xbfacd89ec191ec9,
+      0xb749faed14125d36, 0xcef980ec671f667b,
+      0xe51c79a85916f484, 0x82b7e12780e7401a,
+      0x8f31cc0937ae58d2, 0xd1b2ecb8b0908810,
+      0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15,
+      0xdfbdcece67006ac9, 0x67a791e093e1d49a,
+      0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e0,
+      0xaecc49914078536d, 0x58fae9f773886e18,
+      0xda7f5bf590966848, 0xaf39a475506a899e,
+      0x888f99797a5e012d, 0x6d8406c952429603,
+      0xaab37fd7d8f58178, 0xc8e5087ba6d33b83,
+      0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a64,
+      0x855c3be0a17fcd26, 0x5cf2eea09a55067f,
+      0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481e,
+      0xd0601d8efc57b08b, 0xf13b94daf124da26,
+      0x823c12795db6ce57, 0x76c53d08d6b70858,
+      0xa2cb1717b52481ed, 0x54768c4b0c64ca6e,
+      0xcb7ddcdda26da268, 0xa9942f5dcf7dfd09,
+      0xfe5d54150b090b02, 0xd3f93b35435d7c4c,
+      0x9efa548d26e5a6e1, 0xc47bc5014a1a6daf,
+      0xc6b8e9b0709f109a, 0x359ab6419ca1091b,
+      0xf867241c8cc6d4c0, 0xc30163d203c94b62,
+      0x9b407691d7fc44f8, 0x79e0de63425dcf1d,
+      0xc21094364dfb5636, 0x985915fc12f542e4,
+      0xf294b943e17a2bc4, 0x3e6f5b7b17b2939d,
+      0x979cf3ca6cec5b5a, 0xa705992ceecf9c42,
+      0xbd8430bd08277231, 0x50c6ff782a838353,
+      0xece53cec4a314ebd, 0xa4f8bf5635246428,
+      0x940f4613ae5ed136, 0x871b7795e136be99,
+      0xb913179899f68584, 0x28e2557b59846e3f,
+      0xe757dd7ec07426e5, 0x331aeada2fe589cf,
+      0x9096ea6f3848984f, 0x3ff0d2c85def7621,
+      0xb4bca50b065abe63, 0xfed077a756b53a9,
+      0xe1ebce4dc7f16dfb, 0xd3e8495912c62894,
+      0x8d3360f09cf6e4bd, 0x64712dd7abbbd95c,
+      0xb080392cc4349dec, 0xbd8d794d96aacfb3,
+      0xdca04777f541c567, 0xecf0d7a0fc5583a0,
+      0x89e42caaf9491b60, 0xf41686c49db57244,
+      0xac5d37d5b79b6239, 0x311c2875c522ced5,
+      0xd77485cb25823ac7, 0x7d633293366b828b,
+      0x86a8d39ef77164bc, 0xae5dff9c02033197,
+      0xa8530886b54dbdeb, 0xd9f57f830283fdfc,
+      0xd267caa862a12d66, 0xd072df63c324fd7b,
+      0x8380dea93da4bc60, 0x4247cb9e59f71e6d,
+      0xa46116538d0deb78, 0x52d9be85f074e608,
+      0xcd795be870516656, 0x67902e276c921f8b,
+      0x806bd9714632dff6, 0xba1cd8a3db53b6,
+      0xa086cfcd97bf97f3, 0x80e8a40eccd228a4,
+      0xc8a883c0fdaf7df0, 0x6122cd128006b2cd,
+      0xfad2a4b13d1b5d6c, 0x796b805720085f81,
+      0x9cc3a6eec6311a63, 0xcbe3303674053bb0,
+      0xc3f490aa77bd60fc, 0xbedbfc4411068a9c,
+      0xf4f1b4d515acb93b, 0xee92fb5515482d44,
+      0x991711052d8bf3c5, 0x751bdd152d4d1c4a,
+      0xbf5cd54678eef0b6, 0xd262d45a78a0635d,
+      0xef340a98172aace4, 0x86fb897116c87c34,
+      0x9580869f0e7aac0e, 0xd45d35e6ae3d4da0,
+      0xbae0a846d2195712, 0x8974836059cca109,
+      0xe998d258869facd7, 0x2bd1a438703fc94b,
+      0x91ff83775423cc06, 0x7b6306a34627ddcf,
+      0xb67f6455292cbf08, 0x1a3bc84c17b1d542,
+      0xe41f3d6a7377eeca, 0x20caba5f1d9e4a93,
+      0x8e938662882af53e, 0x547eb47b7282ee9c,
+      0xb23867fb2a35b28d, 0xe99e619a4f23aa43,
+      0xdec681f9f4c31f31, 0x6405fa00e2ec94d4,
+      0x8b3c113c38f9f37e, 0xde83bc408dd3dd04,
+      0xae0b158b4738705e, 0x9624ab50b148d445,
+      0xd98ddaee19068c76, 0x3badd624dd9b0957,
+      0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d6,
+      0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4c,
+      0xd47487cc8470652b, 0x7647c3200069671f,
+      0x84c8d4dfd2c63f3b, 0x29ecd9f40041e073,
+      0xa5fb0a17c777cf09, 0xf468107100525890,
+      0xcf79cc9db955c2cc, 0x7182148d4066eeb4,
+      0x81ac1fe293d599bf, 0xc6f14cd848405530,
+      0xa21727db38cb002f, 0xb8ada00e5a506a7c,
+      0xca9cf1d206fdc03b, 0xa6d90811f0e4851c,
+      0xfd442e4688bd304a, 0x908f4a166d1da663,
+      0x9e4a9cec15763e2e, 0x9a598e4e043287fe,
+      0xc5dd44271ad3cdba, 0x40eff1e1853f29fd,
+      0xf7549530e188c128, 0xd12bee59e68ef47c,
+      0x9a94dd3e8cf578b9, 0x82bb74f8301958ce,
+      0xc13a148e3032d6e7, 0xe36a52363c1faf01,
+      0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac1,
+      0x96f5600f15a7b7e5, 0x29ab103a5ef8c0b9,
+      0xbcb2b812db11a5de, 0x7415d448f6b6f0e7,
+      0xebdf661791d60f56, 0x111b495b3464ad21,
+      0x936b9fcebb25c995, 0xcab10dd900beec34,
+      0xb84687c269ef3bfb, 0x3d5d514f40eea742,
+      0xe65829b3046b0afa, 0xcb4a5a3112a5112,
+      0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ab,
+      0xb3f4e093db73a093, 0x59ed216765690f56,
+      0xe0f218b8d25088b8, 0x306869c13ec3532c,
+      0x8c974f7383725573, 0x1e414218c73a13fb,
+      0xafbd2350644eeacf, 0xe5d1929ef90898fa,
+      0xdbac6c247d62a583, 0xdf45f746b74abf39,
+      0x894bc396ce5da772, 0x6b8bba8c328eb783,
+      0xab9eb47c81f5114f, 0x66ea92f3f326564,
+      0xd686619ba27255a2, 0xc80a537b0efefebd,
+      0x8613fd0145877585, 0xbd06742ce95f5f36,
+      0xa798fc4196e952e7, 0x2c48113823b73704,
+      0xd17f3b51fca3a7a0, 0xf75a15862ca504c5,
+      0x82ef85133de648c4, 0x9a984d73dbe722fb,
+      0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebba,
+      0xcc963fee10b7d1b3, 0x318df905079926a8,
+      0xffbbcfe994e5c61f, 0xfdf17746497f7052,
+      0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa633,
+      0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc0,
+      0xf9bd690a1b68637b, 0x3dfdce7aa3c673b0,
+      0x9c1661a651213e2d, 0x6bea10ca65c084e,
+      0xc31bfa0fe5698db8, 0x486e494fcff30a62,
+      0xf3e2f893dec3f126, 0x5a89dba3c3efccfa,
+      0x986ddb5c6b3a76b7, 0xf89629465a75e01c,
+      0xbe89523386091465, 0xf6bbb397f1135823,
+      0xee2ba6c0678b597f, 0x746aa07ded582e2c,
+      0x94db483840b717ef, 0xa8c2a44eb4571cdc,
+      0xba121a4650e4ddeb, 0x92f34d62616ce413,
+      0xe896a0d7e51e1566, 0x77b020baf9c81d17,
+      0x915e2486ef32cd60, 0xace1474dc1d122e,
+      0xb5b5ada8aaff80b8, 0xd819992132456ba,
+      0xe3231912d5bf60e6, 0x10e1fff697ed6c69,
+      0x8df5efabc5979c8f, 0xca8d3ffa1ef463c1,
+      0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb2,
+      0xddd0467c64bce4a0, 0xac7cb3f6d05ddbde,
+      0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96b,
+      0xad4ab7112eb3929d, 0x86c16c98d2c953c6,
+      0xd89d64d57a607744, 0xe871c7bf077ba8b7,
+      0x87625f056c7c4a8b, 0x11471cd764ad4972,
+      0xa93af6c6c79b5d2d, 0xd598e40d3dd89bcf,
+      0xd389b47879823479, 0x4aff1d108d4ec2c3,
+      0x843610cb4bf160cb, 0xcedf722a585139ba,
+      0xa54394fe1eedb8fe, 0xc2974eb4ee658828,
+      0xce947a3da6a9273e, 0x733d226229feea32,
+      0x811ccc668829b887, 0x806357d5a3f525f,
+      0xa163ff802a3426a8, 0xca07c2dcb0cf26f7,
+      0xc9bcff6034c13052, 0xfc89b393dd02f0b5,
+      0xfc2c3f3841f17c67, 0xbbac2078d443ace2,
+      0x9d9ba7832936edc0, 0xd54b944b84aa4c0d,
+      0xc5029163f384a931, 0xa9e795e65d4df11,
+      0xf64335bcf065d37d, 0x4d4617b5ff4a16d5,
+      0x99ea0196163fa42e, 0x504bced1bf8e4e45,
+      0xc06481fb9bcf8d39, 0xe45ec2862f71e1d6,
+      0xf07da27a82c37088, 0x5d767327bb4e5a4c,
+      0x964e858c91ba2655, 0x3a6a07f8d510f86f,
+      0xbbe226efb628afea, 0x890489f70a55368b,
+      0xeadab0aba3b2dbe5, 0x2b45ac74ccea842e,
+      0x92c8ae6b464fc96f, 0x3b0b8bc90012929d,
+      0xb77ada0617e3bbcb, 0x9ce6ebb40173744,
+      0xe55990879ddcaabd, 0xcc420a6a101d0515,
+      0x8f57fa54c2a9eab6, 0x9fa946824a12232d,
+      0xb32df8e9f3546564, 0x47939822dc96abf9,
+      0xdff9772470297ebd, 0x59787e2b93bc56f7,
+      0x8bfbea76c619ef36, 0x57eb4edb3c55b65a,
+      0xaefae51477a06b03, 0xede622920b6b23f1,
+      0xdab99e59958885c4, 0xe95fab368e45eced,
+      0x88b402f7fd75539b, 0x11dbcb0218ebb414,
+      0xaae103b5fcd2a881, 0xd652bdc29f26a119,
+      0xd59944a37c0752a2, 0x4be76d3346f0495f,
+      0x857fcae62d8493a5, 0x6f70a4400c562ddb,
+      0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb952,
+      0xd097ad07a71f26b2, 0x7e2000a41346a7a7,
+      0x825ecc24c873782f, 0x8ed400668c0c28c8,
+      0xa2f67f2dfa90563b, 0x728900802f0f32fa,
+      0xcbb41ef979346bca, 0x4f2b40a03ad2ffb9,
+      0xfea126b7d78186bc, 0xe2f610c84987bfa8,
+      0x9f24b832e6b0f436, 0xdd9ca7d2df4d7c9,
+      0xc6ede63fa05d3143, 0x91503d1c79720dbb,
+      0xf8a95fcf88747d94, 0x75a44c6397ce912a,
+      0x9b69dbe1b548ce7c, 0xc986afbe3ee11aba,
+      0xc24452da229b021b, 0xfbe85badce996168,
+      0xf2d56790ab41c2a2, 0xfae27299423fb9c3,
+      0x97c560ba6b0919a5, 0xdccd879fc967d41a,
+      0xbdb6b8e905cb600f, 0x5400e987bbc1c920,
+      0xed246723473e3813, 0x290123e9aab23b68,
+      0x9436c0760c86e30b, 0xf9a0b6720aaf6521,
+      0xb94470938fa89bce, 0xf808e40e8d5b3e69,
+      0xe7958cb87392c2c2, 0xb60b1d1230b20e04,
+      0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c2,
+      0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af3,
+      0xe2280b6c20dd5232, 0x25c6da63c38de1b0,
+      0x8d590723948a535f, 0x579c487e5a38ad0e,
+      0xb0af48ec79ace837, 0x2d835a9df0c6d851,
+      0xdcdb1b2798182244, 0xf8e431456cf88e65,
+      0x8a08f0f8bf0f156b, 0x1b8e9ecb641b58ff,
+      0xac8b2d36eed2dac5, 0xe272467e3d222f3f,
+      0xd7adf884aa879177, 0x5b0ed81dcc6abb0f,
+      0x86ccbb52ea94baea, 0x98e947129fc2b4e9,
+      0xa87fea27a539e9a5, 0x3f2398d747b36224,
+      0xd29fe4b18e88640e, 0x8eec7f0d19a03aad,
+      0x83a3eeeef9153e89, 0x1953cf68300424ac,
+      0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd7,
+      0xcdb02555653131b6, 0x3792f412cb06794d,
+      0x808e17555f3ebf11, 0xe2bbd88bbee40bd0,
+      0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec4,
+      0xc8de047564d20a8b, 0xf245825a5a445275,
+      0xfb158592be068d2e, 0xeed6e2f0f0d56712,
+      0x9ced737bb6c4183d, 0x55464dd69685606b,
+      0xc428d05aa4751e4c, 0xaa97e14c3c26b886,
+      0xf53304714d9265df, 0xd53dd99f4b3066a8,
+      0x993fe2c6d07b7fab, 0xe546a8038efe4029,
+      0xbf8fdb78849a5f96, 0xde98520472bdd033,
+      0xef73d256a5c0f77c, 0x963e66858f6d4440,
+      0x95a8637627989aad, 0xdde7001379a44aa8,
+      0xbb127c53b17ec159, 0x5560c018580d5d52,
+      0xe9d71b689dde71af, 0xaab8f01e6e10b4a6,
+      0x9226712162ab070d, 0xcab3961304ca70e8,
+      0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d22,
+      0xe45c10c42a2b3b05, 0x8cb89a7db77c506a,
+      0x8eb98a7a9a5b04e3, 0x77f3608e92adb242,
+      0xb267ed1940f1c61c, 0x55f038b237591ed3,
+      0xdf01e85f912e37a3, 0x6b6c46dec52f6688,
+      0x8b61313bbabce2c6, 0x2323ac4b3b3da015,
+      0xae397d8aa96c1b77, 0xabec975e0a0d081a,
+      0xd9c7dced53c72255, 0x96e7bd358c904a21,
+      0x881cea14545c7575, 0x7e50d64177da2e54,
+      0xaa242499697392d2, 0xdde50bd1d5d0b9e9,
+      0xd4ad2dbfc3d07787, 0x955e4ec64b44e864,
+      0x84ec3c97da624ab4, 0xbd5af13bef0b113e,
+      0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58e,
+      0xcfb11ead453994ba, 0x67de18eda5814af2,
+      0x81ceb32c4b43fcf4, 0x80eacf948770ced7,
+      0xa2425ff75e14fc31, 0xa1258379a94d028d,
+      0xcad2f7f5359a3b3e, 0x96ee45813a04330,
+      0xfd87b5f28300ca0d, 0x8bca9d6e188853fc,
+      0x9e74d1b791e07e48, 0x775ea264cf55347e,
+      0xc612062576589dda, 0x95364afe032a819e,
+      0xf79687aed3eec551, 0x3a83ddbd83f52205,
+      0x9abe14cd44753b52, 0xc4926a9672793543,
+      0xc16d9a0095928a27, 0x75b7053c0f178294,
+      0xf1c90080baf72cb1, 0x5324c68b12dd6339,
+      0x971da05074da7bee, 0xd3f6fc16ebca5e04,
+      0xbce5086492111aea, 0x88f4bb1ca6bcf585,
+      0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6,
+      0x9392ee8e921d5d07, 0x3aff322e62439fd0,
+      0xb877aa3236a4b449, 0x9befeb9fad487c3,
+      0xe69594bec44de15b, 0x4c2ebe687989a9b4,
+      0x901d7cf73ab0acd9, 0xf9d37014bf60a11,
+      0xb424dc35095cd80f, 0x538484c19ef38c95,
+      0xe12e13424bb40e13, 0x2865a5f206b06fba,
+      0x8cbccc096f5088cb, 0xf93f87b7442e45d4,
+      0xafebff0bcb24aafe, 0xf78f69a51539d749,
+      0xdbe6fecebdedd5be, 0xb573440e5a884d1c,
+      0x89705f4136b4a597, 0x31680a88f8953031,
+      0xabcc77118461cefc, 0xfdc20d2b36ba7c3e,
+      0xd6bf94d5e57a42bc, 0x3d32907604691b4d,
+      0x8637bd05af6c69b5, 0xa63f9a49c2c1b110,
+      0xa7c5ac471b478423, 0xfcf80dc33721d54,
+      0xd1b71758e219652b, 0xd3c36113404ea4a9,
+      0x83126e978d4fdf3b, 0x645a1cac083126ea,
+      0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4,
+      0xcccccccccccccccc, 0xcccccccccccccccd,
+      0x8000000000000000, 0x0,
+      0xa000000000000000, 0x0,
+      0xc800000000000000, 0x0,
+      0xfa00000000000000, 0x0,
+      0x9c40000000000000, 0x0,
+      0xc350000000000000, 0x0,
+      0xf424000000000000, 0x0,
+      0x9896800000000000, 0x0,
+      0xbebc200000000000, 0x0,
+      0xee6b280000000000, 0x0,
+      0x9502f90000000000, 0x0,
+      0xba43b74000000000, 0x0,
+      0xe8d4a51000000000, 0x0,
+      0x9184e72a00000000, 0x0,
+      0xb5e620f480000000, 0x0,
+      0xe35fa931a0000000, 0x0,
+      0x8e1bc9bf04000000, 0x0,
+      0xb1a2bc2ec5000000, 0x0,
+      0xde0b6b3a76400000, 0x0,
+      0x8ac7230489e80000, 0x0,
+      0xad78ebc5ac620000, 0x0,
+      0xd8d726b7177a8000, 0x0,
+      0x878678326eac9000, 0x0,
+      0xa968163f0a57b400, 0x0,
+      0xd3c21bcecceda100, 0x0,
+      0x84595161401484a0, 0x0,
+      0xa56fa5b99019a5c8, 0x0,
+      0xcecb8f27f4200f3a, 0x0,
+      0x813f3978f8940984, 0x4000000000000000,
+      0xa18f07d736b90be5, 0x5000000000000000,
+      0xc9f2c9cd04674ede, 0xa400000000000000,
+      0xfc6f7c4045812296, 0x4d00000000000000,
+      0x9dc5ada82b70b59d, 0xf020000000000000,
+      0xc5371912364ce305, 0x6c28000000000000,
+      0xf684df56c3e01bc6, 0xc732000000000000,
+      0x9a130b963a6c115c, 0x3c7f400000000000,
+      0xc097ce7bc90715b3, 0x4b9f100000000000,
+      0xf0bdc21abb48db20, 0x1e86d40000000000,
+      0x96769950b50d88f4, 0x1314448000000000,
+      0xbc143fa4e250eb31, 0x17d955a000000000,
+      0xeb194f8e1ae525fd, 0x5dcfab0800000000,
+      0x92efd1b8d0cf37be, 0x5aa1cae500000000,
+      0xb7abc627050305ad, 0xf14a3d9e40000000,
+      0xe596b7b0c643c719, 0x6d9ccd05d0000000,
+      0x8f7e32ce7bea5c6f, 0xe4820023a2000000,
+      0xb35dbf821ae4f38b, 0xdda2802c8a800000,
+      0xe0352f62a19e306e, 0xd50b2037ad200000,
+      0x8c213d9da502de45, 0x4526f422cc340000,
+      0xaf298d050e4395d6, 0x9670b12b7f410000,
+      0xdaf3f04651d47b4c, 0x3c0cdd765f114000,
+      0x88d8762bf324cd0f, 0xa5880a69fb6ac800,
+      0xab0e93b6efee0053, 0x8eea0d047a457a00,
+      0xd5d238a4abe98068, 0x72a4904598d6d880,
+      0x85a36366eb71f041, 0x47a6da2b7f864750,
+      0xa70c3c40a64e6c51, 0x999090b65f67d924,
+      0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d,
+      0x82818f1281ed449f, 0xbff8f10e7a8921a4,
+      0xa321f2d7226895c7, 0xaff72d52192b6a0d,
+      0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490,
+      0xfee50b7025c36a08, 0x2f236d04753d5b4,
+      0x9f4f2726179a2245, 0x1d762422c946590,
+      0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5,
+      0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2,
+      0x9b934c3b330c8577, 0x63cc55f49f88eb2f,
+      0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb,
+      0xf316271c7fc3908a, 0x8bef464e3945ef7a,
+      0x97edd871cfda3a56, 0x97758bf0e3cbb5ac,
+      0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317,
+      0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd,
+      0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a,
+      0xb975d6b6ee39e436, 0xb3e2fd538e122b44,
+      0xe7d34c64a9c85d44, 0x60dbbca87196b616,
+      0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd,
+      0xb51d13aea4a488dd, 0x6babab6398bdbe41,
+      0xe264589a4dcdab14, 0xc696963c7eed2dd1,
+      0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2,
+      0xb0de65388cc8ada8, 0x3b25a55f43294bcb,
+      0xdd15fe86affad912, 0x49ef0eb713f39ebe,
+      0x8a2dbf142dfcc7ab, 0x6e3569326c784337,
+      0xacb92ed9397bf996, 0x49c2c37f07965404,
+      0xd7e77a8f87daf7fb, 0xdc33745ec97be906,
+      0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3,
+      0xa8acd7c0222311bc, 0xc40832ea0d68ce0c,
+      0xd2d80db02aabd62b, 0xf50a3fa490c30190,
+      0x83c7088e1aab65db, 0x792667c6da79e0fa,
+      0xa4b8cab1a1563f52, 0x577001b891185938,
+      0xcde6fd5e09abcf26, 0xed4c0226b55e6f86,
+      0x80b05e5ac60b6178, 0x544f8158315b05b4,
+      0xa0dc75f1778e39d6, 0x696361ae3db1c721,
+      0xc913936dd571c84c, 0x3bc3a19cd1e38e9,
+      0xfb5878494ace3a5f, 0x4ab48a04065c723,
+      0x9d174b2dcec0e47b, 0x62eb0d64283f9c76,
+      0xc45d1df942711d9a, 0x3ba5d0bd324f8394,
+      0xf5746577930d6500, 0xca8f44ec7ee36479,
+      0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb,
+      0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e,
+      0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e,
+      0x95d04aee3b80ece5, 0xbba1f1d158724a12,
+      0xbb445da9ca61281f, 0x2a8a6e45ae8edc97,
+      0xea1575143cf97226, 0xf52d09d71a3293bd,
+      0x924d692ca61be758, 0x593c2626705f9c56,
+      0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c,
+      0xe498f455c38b997a, 0xb6dfb9c0f956447,
+      0x8edf98b59a373fec, 0x4724bd4189bd5eac,
+      0xb2977ee300c50fe7, 0x58edec91ec2cb657,
+      0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed,
+      0x8b865b215899f46c, 0xbd79e0d20082ee74,
+      0xae67f1e9aec07187, 0xecd8590680a3aa11,
+      0xda01ee641a708de9, 0xe80e6f4820cc9495,
+      0x884134fe908658b2, 0x3109058d147fdcdd,
+      0xaa51823e34a7eede, 0xbd4b46f0599fd415,
+      0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a,
+      0x850fadc09923329e, 0x3e2cf6bc604ddb0,
+      0xa6539930bf6bff45, 0x84db8346b786151c,
+      0xcfe87f7cef46ff16, 0xe612641865679a63,
+      0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e,
+      0xa26da3999aef7749, 0xe3be5e330f38f09d,
+      0xcb090c8001ab551c, 0x5cadf5bfd3072cc5,
+      0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6,
+      0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa,
+      0xc646d63501a1511d, 0xb281e1fd541501b8,
+      0xf7d88bc24209a565, 0x1f225a7ca91a4226,
+      0x9ae757596946075f, 0x3375788de9b06958,
+      0xc1a12d2fc3978937, 0x52d6b1641c83ae,
+      0xf209787bb47d6b84, 0xc0678c5dbd23a49a,
+      0x9745eb4d50ce6332, 0xf840b7ba963646e0,
+      0xbd176620a501fbff, 0xb650e5a93bc3d898,
+      0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe,
+      0x93ba47c980e98cdf, 0xc66f336c36b10137,
+      0xb8a8d9bbe123f017, 0xb80b0047445d4184,
+      0xe6d3102ad96cec1d, 0xa60dc059157491e5,
+      0x9043ea1ac7e41392, 0x87c89837ad68db2f,
+      0xb454e4a179dd1877, 0x29babe4598c311fb,
+      0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a,
+      0x8ce2529e2734bb1d, 0x1899e4a65f58660c,
+      0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f,
+      0xdc21a1171d42645d, 0x76707543f4fa1f73,
+      0x899504ae72497eba, 0x6a06494a791c53a8,
+      0xabfa45da0edbde69, 0x487db9d17636892,
+      0xd6f8d7509292d603, 0x45a9d2845d3c42b6,
+      0x865b86925b9bc5c2, 0xb8a2392ba45a9b2,
+      0xa7f26836f282b732, 0x8e6cac7768d7141e,
+      0xd1ef0244af2364ff, 0x3207d795430cd926,
+      0x8335616aed761f1f, 0x7f44e6bd49e807b8,
+      0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6,
+      0xcd036837130890a1, 0x36dba887c37a8c0f,
+      0x802221226be55a64, 0xc2494954da2c9789,
+      0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c,
+      0xc83553c5c8965d3d, 0x6f92829494e5acc7,
+      0xfa42a8b73abbf48c, 0xcb772339ba1f17f9,
+      0x9c69a97284b578d7, 0xff2a760414536efb,
+      0xc38413cf25e2d70d, 0xfef5138519684aba,
+      0xf46518c2ef5b8cd1, 0x7eb258665fc25d69,
+      0x98bf2f79d5993802, 0xef2f773ffbd97a61,
+      0xbeeefb584aff8603, 0xaafb550ffacfd8fa,
+      0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38,
+      0x952ab45cfa97a0b2, 0xdd945a747bf26183,
+      0xba756174393d88df, 0x94f971119aeef9e4,
+      0xe912b9d1478ceb17, 0x7a37cd5601aab85d,
+      0x91abb422ccb812ee, 0xac62e055c10ab33a,
+      0xb616a12b7fe617aa, 0x577b986b314d6009,
+      0xe39c49765fdf9d94, 0xed5a7e85fda0b80b,
+      0x8e41ade9fbebc27d, 0x14588f13be847307,
+      0xb1d219647ae6b31c, 0x596eb2d8ae258fc8,
+      0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb,
+      0x8aec23d680043bee, 0x25de7bb9480d5854,
+      0xada72ccc20054ae9, 0xaf561aa79a10ae6a,
+      0xd910f7ff28069da4, 0x1b2ba1518094da04,
+      0x87aa9aff79042286, 0x90fb44d2f05d0842,
+      0xa99541bf57452b28, 0x353a1607ac744a53,
+      0xd3fa922f2d1675f2, 0x42889b8997915ce8,
+      0x847c9b5d7c2e09b7, 0x69956135febada11,
+      0xa59bc234db398c25, 0x43fab9837e699095,
+      0xcf02b2c21207ef2e, 0x94f967e45e03f4bb,
+      0x8161afb94b44f57d, 0x1d1be0eebac278f5,
+      0xa1ba1ba79e1632dc, 0x6462d92a69731732,
+      0xca28a291859bbf93, 0x7d7b8f7503cfdcfe,
+      0xfcb2cb35e702af78, 0x5cda735244c3d43e,
+      0x9defbf01b061adab, 0x3a0888136afa64a7,
+      0xc56baec21c7a1916, 0x88aaa1845b8fdd0,
+      0xf6c69a72a3989f5b, 0x8aad549e57273d45,
+      0x9a3c2087a63f6399, 0x36ac54e2f678864b,
+      0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd,
+      0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5,
+      0x969eb7c47859e743, 0x9f644ae5a4b1b325,
+      0xbc4665b596706114, 0x873d5d9f0dde1fee,
+      0xeb57ff22fc0c7959, 0xa90cb506d155a7ea,
+      0x9316ff75dd87cbd8, 0x9a7f12442d588f2,
+      0xb7dcbf5354e9bece, 0xc11ed6d538aeb2f,
+      0xe5d3ef282a242e81, 0x8f1668c8a86da5fa,
+      0x8fa475791a569d10, 0xf96e017d694487bc,
+      0xb38d92d760ec4455, 0x37c981dcc395a9ac,
+      0xe070f78d3927556a, 0x85bbe253f47b1417,
+      0x8c469ab843b89562, 0x93956d7478ccec8e,
+      0xaf58416654a6babb, 0x387ac8d1970027b2,
+      0xdb2e51bfe9d0696a, 0x6997b05fcc0319e,
+      0x88fcf317f22241e2, 0x441fece3bdf81f03,
+      0xab3c2fddeeaad25a, 0xd527e81cad7626c3,
+      0xd60b3bd56a5586f1, 0x8a71e223d8d3b074,
+      0x85c7056562757456, 0xf6872d5667844e49,
+      0xa738c6bebb12d16c, 0xb428f8ac016561db,
+      0xd106f86e69d785c7, 0xe13336d701beba52,
+      0x82a45b450226b39c, 0xecc0024661173473,
+      0xa34d721642b06084, 0x27f002d7f95d0190,
+      0xcc20ce9bd35c78a5, 0x31ec038df7b441f4,
+      0xff290242c83396ce, 0x7e67047175a15271,
+      0x9f79a169bd203e41, 0xf0062c6e984d386,
+      0xc75809c42c684dd1, 0x52c07b78a3e60868,
+      0xf92e0c3537826145, 0xa7709a56ccdf8a82,
+      0x9bbcc7a142b17ccb, 0x88a66076400bb691,
+      0xc2abf989935ddbfe, 0x6acff893d00ea435,
+      0xf356f7ebf83552fe, 0x583f6b8c4124d43,
+      0x98165af37b2153de, 0xc3727a337a8b704a,
+      0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c,
+      0xeda2ee1c7064130c, 0x1162def06f79df73,
+      0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8,
+      0xb9a74a0637ce2ee1, 0x6d953e2bd7173692,
+      0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437,
+      0x910ab1d4db9914a0, 0x1d9c9892400a22a2,
+      0xb54d5e4a127f59c8, 0x2503beb6d00cab4b,
+      0xe2a0b5dc971f303a, 0x2e44ae64840fd61d,
+      0x8da471a9de737e24, 0x5ceaecfed289e5d2,
+      0xb10d8e1456105dad, 0x7425a83e872c5f47,
+      0xdd50f1996b947518, 0xd12f124e28f77719,
+      0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f,
+      0xace73cbfdc0bfb7b, 0x636cc64d1001550b,
+      0xd8210befd30efa5a, 0x3c47f7e05401aa4e,
+      0x8714a775e3e95c78, 0x65acfaec34810a71,
+      0xa8d9d1535ce3b396, 0x7f1839a741a14d0d,
+      0xd31045a8341ca07c, 0x1ede48111209a050,
+      0x83ea2b892091e44d, 0x934aed0aab460432,
+      0xa4e4b66b68b65d60, 0xf81da84d5617853f,
+      0xce1de40642e3f4b9, 0x36251260ab9d668e,
+      0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019,
+      0xa1075a24e4421730, 0xb24cf65b8612f81f,
+      0xc94930ae1d529cfc, 0xdee033f26797b627,
+      0xfb9b7cd9a4a7443c, 0x169840ef017da3b1,
+      0x9d412e0806e88aa5, 0x8e1f289560ee864e,
+      0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2,
+      0xf5b5d7ec8acb58a2, 0xae10af696774b1db,
+      0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29,
+      0xbff610b0cc6edd3f, 0x17fd090a58d32af3,
+      0xeff394dcff8a948e, 0xddfc4b4cef07f5b0,
+      0x95f83d0a1fb69cd9, 0x4abdaf101564f98e,
+      0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1,
+      0xea53df5fd18d5513, 0x84c86189216dc5ed,
+      0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4,
+      0xb7118682dbb66a77, 0x3fbc8c33221dc2a1,
+      0xe4d5e82392a40515, 0xfabaf3feaa5334a,
+      0x8f05b1163ba6832d, 0x29cb4d87f2a7400e,
+      0xb2c71d5bca9023f8, 0x743e20e9ef511012,
+      0xdf78e4b2bd342cf6, 0x914da9246b255416,
+      0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e,
+      0xae9672aba3d0c320, 0xa184ac2473b529b1,
+      0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e,
+      0x8865899617fb1871, 0x7e2fa67c7a658892,
+      0xaa7eebfb9df9de8d, 0xddbb901b98feeab7,
+      0xd51ea6fa85785631, 0x552a74227f3ea565,
+      0x8533285c936b35de, 0xd53a88958f87275f,
+      0xa67ff273b8460356, 0x8a892abaf368f137,
+      0xd01fef10a657842c, 0x2d2b7569b0432d85,
+      0x8213f56a67f6b29b, 0x9c3b29620e29fc73,
+      0xa298f2c501f45f42, 0x8349f3ba91b47b8f,
+      0xcb3f2f7642717713, 0x241c70a936219a73,
+      0xfe0efb53d30dd4d7, 0xed238cd383aa0110,
+      0x9ec95d1463e8a506, 0xf4363804324a40aa,
+      0xc67bb4597ce2ce48, 0xb143c6053edcd0d5,
+      0xf81aa16fdc1b81da, 0xdd94b7868e94050a,
+      0x9b10a4e5e9913128, 0xca7cf2b4191c8326,
+      0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0,
+      0xf24a01a73cf2dccf, 0xbc633b39673c8cec,
+      0x976e41088617ca01, 0xd5be0503e085d813,
+      0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18,
+      0xec9c459d51852ba2, 0xddf8e7d60ed1219e,
+      0x93e1ab8252f33b45, 0xcabb90e5c942b503,
+      0xb8da1662e7b00a17, 0x3d6a751f3b936243,
+      0xe7109bfba19c0c9d, 0xcc512670a783ad4,
+      0x906a617d450187e2, 0x27fb2b80668b24c5,
+      0xb484f9dc9641e9da, 0xb1f9f660802dedf6,
+      0xe1a63853bbd26451, 0x5e7873f8a0396973,
+      0x8d07e33455637eb2, 0xdb0b487b6423e1e8,
+      0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62,
+      0xdc5c5301c56b75f7, 0x7641a140cc7810fb,
+      0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d,
+      0xac2820d9623bf429, 0x546345fa9fbdcd44,
+      0xd732290fbacaf133, 0xa97c177947ad4095,
+      0x867f59a9d4bed6c0, 0x49ed8eabcccc485d,
+      0xa81f301449ee8c70, 0x5c68f256bfff5a74,
+      0xd226fc195c6a2f8c, 0x73832eec6fff3111,
+      0x83585d8fd9c25db7, 0xc831fd53c5ff7eab,
+      0xa42e74f3d032f525, 0xba3e7ca8b77f5e55,
+      0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb,
+      0x80444b5e7aa7cf85, 0x7980d163cf5b81b3,
+      0xa0555e361951c366, 0xd7e105bcc332621f,
+      0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7,
+      0xfa856334878fc150, 0xb14f98f6f0feb951,
+      0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3,
+      0xc3b8358109e84f07, 0xa862f80ec4700c8,
+      0xf4a642e14c6262c8, 0xcd27bb612758c0fa,
+      0x98e7e9cccfbd7dbd, 0x8038d51cb897789c,
+      0xbf21e44003acdd2c, 0xe0470a63e6bd56c3,
+      0xeeea5d5004981478, 0x1858ccfce06cac74,
+      0x95527a5202df0ccb, 0xf37801e0c43ebc8,
+      0xbaa718e68396cffd, 0xd30560258f54e6ba,
+      0xe950df20247c83fd, 0x47c6b82ef32a2069,
+      0x91d28b7416cdd27e, 0x4cdc331d57fa5441,
+      0xb6472e511c81471d, 0xe0133fe4adf8e952,
+      0xe3d8f9e563a198e5, 0x58180fddd97723a6,
+      0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648,
+  };
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <class unused>
+constexpr uint64_t
+    powers_template<unused>::power_of_five_128[number_of_entries];
+
+#endif
+
+using powers = powers_template<>;
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_DECIMAL_TO_BINARY_H
+#define FASTFLOAT_DECIMAL_TO_BINARY_H
+
+#include <cfloat>
+#include <cinttypes>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+
+namespace fast_float {
+
+// This will compute or rather approximate w * 5**q and return a pair of 64-bit
+// words approximating the result, with the "high" part corresponding to the
+// most significant bits and the low part corresponding to the least significant
+// bits.
+//
+template <int bit_precision>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128
+compute_product_approximation(int64_t q, uint64_t w) {
+  int const index = 2 * int(q - powers::smallest_power_of_five);
+  // For small values of q, e.g., q in [0,27], the answer is always exact
+  // because The line value128 firstproduct = full_multiplication(w,
+  // power_of_five_128[index]); gives the exact answer.
+  value128 firstproduct =
+      full_multiplication(w, powers::power_of_five_128[index]);
+  static_assert((bit_precision >= 0) && (bit_precision <= 64),
+                " precision should  be in (0,64]");
+  constexpr uint64_t precision_mask =
+      (bit_precision < 64) ? (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision)
+                           : uint64_t(0xFFFFFFFFFFFFFFFF);
+  if ((firstproduct.high & precision_mask) ==
+      precision_mask) { // could further guard with  (lower + w < lower)
+    // regarding the second product, we only need secondproduct.high, but our
+    // expectation is that the compiler will optimize this extra work away if
+    // needed.
+    value128 secondproduct =
+        full_multiplication(w, powers::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if (secondproduct.high > firstproduct.low) {
+      firstproduct.high++;
+    }
+  }
+  return firstproduct;
+}
+
+namespace detail {
+/**
+ * For q in (0,350), we have that
+ *  f = (((152170 + 65536) * q ) >> 16);
+ * is equal to
+ *   floor(p) + q
+ * where
+ *   p = log(5**q)/log(2) = q * log(5)/log(2)
+ *
+ * For negative values of q in (-400,0), we have that
+ *  f = (((152170 + 65536) * q ) >> 16);
+ * is equal to
+ *   -ceil(p) + q
+ * where
+ *   p = log(5**-q)/log(2) = -q * log(5)/log(2)
+ */
+constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
+  return (((152170 + 65536) * q) >> 16) + 63;
+}
+} // namespace detail
+
+// create an adjusted mantissa, biased by the invalid power2
+// for significant digits already multiplied by 10 ** q.
+template <typename binary>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa
+compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
+  int hilz = int(w >> 63) ^ 1;
+  adjusted_mantissa answer;
+  answer.mantissa = w << hilz;
+  int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent();
+  answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 +
+                          invalid_am_bias);
+  return answer;
+}
+
+// w * 10 ** q, without rounding the representation up.
+// the power2 in the exponent will be adjusted by invalid_am_bias.
+template <typename binary>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+compute_error(int64_t q, uint64_t w) noexcept {
+  int lz = leading_zeroes(w);
+  w <<= lz;
+  value128 product =
+      compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
+  return compute_error_scaled<binary>(q, product.high, lz);
+}
+
+// Computers w * 10 ** q.
+// The returned value should be a valid number that simply needs to be
+// packed. However, in some very rare cases, the computation will fail. In such
+// cases, we return an adjusted_mantissa with a negative power of 2: the caller
+// should recompute in such cases.
+template <typename binary>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+compute_float(int64_t q, uint64_t w) noexcept {
+  adjusted_mantissa answer;
+  if ((w == 0) || (q < binary::smallest_power_of_ten())) {
+    answer.power2 = 0;
+    answer.mantissa = 0;
+    // result should be zero
+    return answer;
+  }
+  if (q > binary::largest_power_of_ten()) {
+    // we want to get infinity:
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+    return answer;
+  }
+  // At this point in time q is in [powers::smallest_power_of_five,
+  // powers::largest_power_of_five].
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(w);
+  w <<= lz;
+
+  // The required precision is binary::mantissa_explicit_bits() + 3 because
+  // 1. We need the implicit bit
+  // 2. We need an extra bit for rounding purposes
+  // 3. We might lose a bit due to the "upperbit" routine (result too small,
+  // requiring a shift)
+
+  value128 product =
+      compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
+  // The computed 'product' is always sufficient.
+  // Mathematical proof:
+  // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to
+  // appear) See script/mushtak_lemire.py
+
+  // The "compute_product_approximation" function can be slightly slower than a
+  // branchless approach: value128 product = compute_product(q, w); but in
+  // practice, we can win big with the compute_product_approximation if its
+  // additional branch is easily predicted. Which is best is data specific.
+  int upperbit = int(product.high >> 63);
+  int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3;
+
+  answer.mantissa = product.high >> shift;
+
+  answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz -
+                          binary::minimum_exponent());
+  if (answer.power2 <= 0) { // we have a subnormal?
+    // Here have that answer.power2 <= 0 so -answer.power2 >= 0
+    if (-answer.power2 + 1 >=
+        64) { // if we have more than 64 bits below the minimum exponent, you
+              // have a zero for sure.
+      answer.power2 = 0;
+      answer.mantissa = 0;
+      // result should be zero
+      return answer;
+    }
+    // next line is safe because -answer.power2 + 1 < 64
+    answer.mantissa >>= -answer.power2 + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0 in the 32-bit and
+    // and 64-bit case (with no more than 19 digits).
+    answer.mantissa += (answer.mantissa & 1); // round up
+    answer.mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    answer.power2 =
+        (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits()))
+            ? 0
+            : 1;
+    return answer;
+  }
+
+  // usually, we round *up*, but if we fall right in between and and we have an
+  // even basis, we need to round down
+  // We are only concerned with the cases where 5**q fits in single 64-bit word.
+  if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) &&
+      (q <= binary::max_exponent_round_to_even()) &&
+      ((answer.mantissa & 3) == 1)) { // we may fall between two floats!
+    // To be in-between two floats we need that in doing
+    //   answer.mantissa = product.high >> (upperbit + 64 -
+    //   binary::mantissa_explicit_bits() - 3);
+    // ... we dropped out only zeroes. But if this happened, then we can go
+    // back!!!
+    if ((answer.mantissa << shift) == product.high) {
+      answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up
+    }
+  }
+
+  answer.mantissa += (answer.mantissa & 1); // round up
+  answer.mantissa >>= 1;
+  if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) {
+    answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits());
+    answer.power2++; // undo previous addition
+  }
+
+  answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits());
+  if (answer.power2 >= binary::infinite_power()) { // infinity
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+  }
+  return answer;
+}
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_BIGINT_H
+#define FASTFLOAT_BIGINT_H
+
+#include <algorithm>
+#include <cstdint>
+#include <climits>
+#include <cstring>
+
+
+namespace fast_float {
+
+// the limb width: we want efficient multiplication of double the bits in
+// limb, or for 64-bit limbs, at least 64-bit multiplication where we can
+// extract the high and low parts efficiently. this is every 64-bit
+// architecture except for sparc, which emulates 128-bit multiplication.
+// we might have platforms where `CHAR_BIT` is not 8, so let's avoid
+// doing `8 * sizeof(limb)`.
+#if defined(FASTFLOAT_64BIT) && !defined(__sparc)
+#define FASTFLOAT_64BIT_LIMB 1
+typedef uint64_t limb;
+constexpr size_t limb_bits = 64;
+#else
+#define FASTFLOAT_32BIT_LIMB
+typedef uint32_t limb;
+constexpr size_t limb_bits = 32;
+#endif
+
+typedef span<limb> limb_span;
+
+// number of bits in a bigint. this needs to be at least the number
+// of bits required to store the largest bigint, which is
+// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or
+// ~3600 bits, so we round to 4000.
+constexpr size_t bigint_bits = 4000;
+constexpr size_t bigint_limbs = bigint_bits / limb_bits;
+
+// vector-like type that is allocated on the stack. the entire
+// buffer is pre-allocated, and only the length changes.
+template <uint16_t size> struct stackvec {
+  limb data[size];
+  // we never need more than 150 limbs
+  uint16_t length{0};
+
+  stackvec() = default;
+  stackvec(stackvec const &) = delete;
+  stackvec &operator=(stackvec const &) = delete;
+  stackvec(stackvec &&) = delete;
+  stackvec &operator=(stackvec &&other) = delete;
+
+  // create stack vector from existing limb span.
+  FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) {
+    FASTFLOAT_ASSERT(try_extend(s));
+  }
+
+  FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    return data[index];
+  }
+
+  FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    return data[index];
+  }
+
+  // index from the end of the container
+  FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    size_t rindex = length - index - 1;
+    return data[rindex];
+  }
+
+  // set the length, without bounds checking.
+  FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept {
+    length = uint16_t(len);
+  }
+
+  constexpr size_t len() const noexcept { return length; }
+
+  constexpr bool is_empty() const noexcept { return length == 0; }
+
+  constexpr size_t capacity() const noexcept { return size; }
+
+  // append item to vector, without bounds checking
+  FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept {
+    data[length] = value;
+    length++;
+  }
+
+  // append item to vector, returning if item was added
+  FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept {
+    if (len() < capacity()) {
+      push_unchecked(value);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // add items to the vector, from a span, without bounds checking
+  FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept {
+    limb *ptr = data + length;
+    std::copy_n(s.ptr, s.len(), ptr);
+    set_len(len() + s.len());
+  }
+
+  // try to add items to the vector, returning if items were added
+  FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept {
+    if (len() + s.len() <= capacity()) {
+      extend_unchecked(s);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // resize the vector, without bounds checking
+  // if the new size is longer than the vector, assign value to each
+  // appended item.
+  FASTFLOAT_CONSTEXPR20
+  void resize_unchecked(size_t new_len, limb value) noexcept {
+    if (new_len > len()) {
+      size_t count = new_len - len();
+      limb *first = data + len();
+      limb *last = first + count;
+      ::std::fill(first, last, value);
+      set_len(new_len);
+    } else {
+      set_len(new_len);
+    }
+  }
+
+  // try to resize the vector, returning if the vector was resized.
+  FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept {
+    if (new_len > capacity()) {
+      return false;
+    } else {
+      resize_unchecked(new_len, value);
+      return true;
+    }
+  }
+
+  // check if any limbs are non-zero after the given index.
+  // this needs to be done in reverse order, since the index
+  // is relative to the most significant limbs.
+  FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept {
+    while (index < len()) {
+      if (rindex(index) != 0) {
+        return true;
+      }
+      index++;
+    }
+    return false;
+  }
+
+  // normalize the big integer, so most-significant zero limbs are removed.
+  FASTFLOAT_CONSTEXPR14 void normalize() noexcept {
+    while (len() > 0 && rindex(0) == 0) {
+      length--;
+    }
+  }
+};
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
+empty_hi64(bool &truncated) noexcept {
+  truncated = false;
+  return 0;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint64_hi64(uint64_t r0, bool &truncated) noexcept {
+  truncated = false;
+  int shl = leading_zeroes(r0);
+  return r0 << shl;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept {
+  int shl = leading_zeroes(r0);
+  if (shl == 0) {
+    truncated = r1 != 0;
+    return r0;
+  } else {
+    int shr = 64 - shl;
+    truncated = (r1 << shl) != 0;
+    return (r0 << shl) | (r1 >> shr);
+  }
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint32_hi64(uint32_t r0, bool &truncated) noexcept {
+  return uint64_hi64(r0, truncated);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint32_hi64(uint32_t r0, uint32_t r1, bool &truncated) noexcept {
+  uint64_t x0 = r0;
+  uint64_t x1 = r1;
+  return uint64_hi64((x0 << 32) | x1, truncated);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool &truncated) noexcept {
+  uint64_t x0 = r0;
+  uint64_t x1 = r1;
+  uint64_t x2 = r2;
+  return uint64_hi64(x0, (x1 << 32) | x2, truncated);
+}
+
+// add two small integers, checking for overflow.
+// we want an efficient operation. for msvc, where
+// we don't have built-in intrinsics, this is still
+// pretty fast.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
+scalar_add(limb x, limb y, bool &overflow) noexcept {
+  limb z;
+// gcc and clang
+#if defined(__has_builtin)
+#if __has_builtin(__builtin_add_overflow)
+  if (!cpp20_and_in_constexpr()) {
+    overflow = __builtin_add_overflow(x, y, &z);
+    return z;
+  }
+#endif
+#endif
+
+  // generic, this still optimizes correctly on MSVC.
+  z = x + y;
+  overflow = z < x;
+  return z;
+}
+
+// multiply two small integers, getting both the high and low bits.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
+scalar_mul(limb x, limb y, limb &carry) noexcept {
+#ifdef FASTFLOAT_64BIT_LIMB
+#if defined(__SIZEOF_INT128__)
+  // GCC and clang both define it as an extension.
+  __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry);
+  carry = limb(z >> limb_bits);
+  return limb(z);
+#else
+  // fallback, no native 128-bit integer multiplication with carry.
+  // on msvc, this optimizes identically, somehow.
+  value128 z = full_multiplication(x, y);
+  bool overflow;
+  z.low = scalar_add(z.low, carry, overflow);
+  z.high += uint64_t(overflow); // cannot overflow
+  carry = z.high;
+  return z.low;
+#endif
+#else
+  uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry);
+  carry = limb(z >> limb_bits);
+  return limb(z);
+#endif
+}
+
+// add scalar value to bigint starting from offset.
+// used in grade school multiplication
+template <uint16_t size>
+inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
+                                                 size_t start) noexcept {
+  size_t index = start;
+  limb carry = y;
+  bool overflow;
+  while (carry != 0 && index < vec.len()) {
+    vec[index] = scalar_add(vec[index], carry, overflow);
+    carry = limb(overflow);
+    index += 1;
+  }
+  if (carry != 0) {
+    FASTFLOAT_TRY(vec.try_push(carry));
+  }
+  return true;
+}
+
+// add scalar value to bigint.
+template <uint16_t size>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+small_add(stackvec<size> &vec, limb y) noexcept {
+  return small_add_from(vec, y, 0);
+}
+
+// multiply bigint by scalar value.
+template <uint16_t size>
+inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
+                                            limb y) noexcept {
+  limb carry = 0;
+  for (size_t index = 0; index < vec.len(); index++) {
+    vec[index] = scalar_mul(vec[index], y, carry);
+  }
+  if (carry != 0) {
+    FASTFLOAT_TRY(vec.try_push(carry));
+  }
+  return true;
+}
+
+// add bigint to bigint starting from index.
+// used in grade school multiplication
+template <uint16_t size>
+FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
+                                          size_t start) noexcept {
+  // the effective x buffer is from `xstart..x.len()`, so exit early
+  // if we can't get that current range.
+  if (x.len() < start || y.len() > x.len() - start) {
+    FASTFLOAT_TRY(x.try_resize(y.len() + start, 0));
+  }
+
+  bool carry = false;
+  for (size_t index = 0; index < y.len(); index++) {
+    limb xi = x[index + start];
+    limb yi = y[index];
+    bool c1 = false;
+    bool c2 = false;
+    xi = scalar_add(xi, yi, c1);
+    if (carry) {
+      xi = scalar_add(xi, 1, c2);
+    }
+    x[index + start] = xi;
+    carry = c1 | c2;
+  }
+
+  // handle overflow
+  if (carry) {
+    FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start));
+  }
+  return true;
+}
+
+// add bigint to bigint.
+template <uint16_t size>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+large_add_from(stackvec<size> &x, limb_span y) noexcept {
+  return large_add_from(x, y, 0);
+}
+
+// grade-school multiplication algorithm
+template <uint16_t size>
+FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
+  limb_span xs = limb_span(x.data, x.len());
+  stackvec<size> z(xs);
+  limb_span zs = limb_span(z.data, z.len());
+
+  if (y.len() != 0) {
+    limb y0 = y[0];
+    FASTFLOAT_TRY(small_mul(x, y0));
+    for (size_t index = 1; index < y.len(); index++) {
+      limb yi = y[index];
+      stackvec<size> zi;
+      if (yi != 0) {
+        // re-use the same buffer throughout
+        zi.set_len(0);
+        FASTFLOAT_TRY(zi.try_extend(zs));
+        FASTFLOAT_TRY(small_mul(zi, yi));
+        limb_span zis = limb_span(zi.data, zi.len());
+        FASTFLOAT_TRY(large_add_from(x, zis, index));
+      }
+    }
+  }
+
+  x.normalize();
+  return true;
+}
+
+// grade-school multiplication algorithm
+template <uint16_t size>
+FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
+  if (y.len() == 1) {
+    FASTFLOAT_TRY(small_mul(x, y[0]));
+  } else {
+    FASTFLOAT_TRY(long_mul(x, y));
+  }
+  return true;
+}
+
+template <typename = void> struct pow5_tables {
+  static constexpr uint32_t large_step = 135;
+  static constexpr uint64_t small_power_of_5[] = {
+      1UL,
+      5UL,
+      25UL,
+      125UL,
+      625UL,
+      3125UL,
+      15625UL,
+      78125UL,
+      390625UL,
+      1953125UL,
+      9765625UL,
+      48828125UL,
+      244140625UL,
+      1220703125UL,
+      6103515625UL,
+      30517578125UL,
+      152587890625UL,
+      762939453125UL,
+      3814697265625UL,
+      19073486328125UL,
+      95367431640625UL,
+      476837158203125UL,
+      2384185791015625UL,
+      11920928955078125UL,
+      59604644775390625UL,
+      298023223876953125UL,
+      1490116119384765625UL,
+      7450580596923828125UL,
+  };
+#ifdef FASTFLOAT_64BIT_LIMB
+  constexpr static limb large_power_of_5[] = {
+      1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL,
+      10482974169319127550UL, 198276706040285095UL};
+#else
+  constexpr static limb large_power_of_5[] = {
+      4279965485U, 329373468U,  4020270615U, 2137533757U, 4287402176U,
+      1057042919U, 1071430142U, 2440757623U, 381945767U,  46164893U};
+#endif
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename T> constexpr uint32_t pow5_tables<T>::large_step;
+
+template <typename T> constexpr uint64_t pow5_tables<T>::small_power_of_5[];
+
+template <typename T> constexpr limb pow5_tables<T>::large_power_of_5[];
+
+#endif
+
+// big integer type. implements a small subset of big integer
+// arithmetic, using simple algorithms since asymptotically
+// faster algorithms are slower for a small number of limbs.
+// all operations assume the big-integer is normalized.
+struct bigint : pow5_tables<> {
+  // storage of the limbs, in little-endian order.
+  stackvec<bigint_limbs> vec;
+
+  FASTFLOAT_CONSTEXPR20 bigint() : vec() {}
+
+  bigint(bigint const &) = delete;
+  bigint &operator=(bigint const &) = delete;
+  bigint(bigint &&) = delete;
+  bigint &operator=(bigint &&other) = delete;
+
+  FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() {
+#ifdef FASTFLOAT_64BIT_LIMB
+    vec.push_unchecked(value);
+#else
+    vec.push_unchecked(uint32_t(value));
+    vec.push_unchecked(uint32_t(value >> 32));
+#endif
+    vec.normalize();
+  }
+
+  // get the high 64 bits from the vector, and if bits were truncated.
+  // this is to get the significant digits for the float.
+  FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool &truncated) const noexcept {
+#ifdef FASTFLOAT_64BIT_LIMB
+    if (vec.len() == 0) {
+      return empty_hi64(truncated);
+    } else if (vec.len() == 1) {
+      return uint64_hi64(vec.rindex(0), truncated);
+    } else {
+      uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated);
+      truncated |= vec.nonzero(2);
+      return result;
+    }
+#else
+    if (vec.len() == 0) {
+      return empty_hi64(truncated);
+    } else if (vec.len() == 1) {
+      return uint32_hi64(vec.rindex(0), truncated);
+    } else if (vec.len() == 2) {
+      return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated);
+    } else {
+      uint64_t result =
+          uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated);
+      truncated |= vec.nonzero(3);
+      return result;
+    }
+#endif
+  }
+
+  // compare two big integers, returning the large value.
+  // assumes both are normalized. if the return value is
+  // negative, other is larger, if the return value is
+  // positive, this is larger, otherwise they are equal.
+  // the limbs are stored in little-endian order, so we
+  // must compare the limbs in ever order.
+  FASTFLOAT_CONSTEXPR20 int compare(bigint const &other) const noexcept {
+    if (vec.len() > other.vec.len()) {
+      return 1;
+    } else if (vec.len() < other.vec.len()) {
+      return -1;
+    } else {
+      for (size_t index = vec.len(); index > 0; index--) {
+        limb xi = vec[index - 1];
+        limb yi = other.vec[index - 1];
+        if (xi > yi) {
+          return 1;
+        } else if (xi < yi) {
+          return -1;
+        }
+      }
+      return 0;
+    }
+  }
+
+  // shift left each limb n bits, carrying over to the new limb
+  // returns true if we were able to shift all the digits.
+  FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept {
+    // Internally, for each item, we shift left by n, and add the previous
+    // right shifted limb-bits.
+    // For example, we transform (for u8) shifted left 2, to:
+    //      b10100100 b01000010
+    //      b10 b10010001 b00001000
+    FASTFLOAT_DEBUG_ASSERT(n != 0);
+    FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8);
+
+    size_t shl = n;
+    size_t shr = limb_bits - shl;
+    limb prev = 0;
+    for (size_t index = 0; index < vec.len(); index++) {
+      limb xi = vec[index];
+      vec[index] = (xi << shl) | (prev >> shr);
+      prev = xi;
+    }
+
+    limb carry = prev >> shr;
+    if (carry != 0) {
+      return vec.try_push(carry);
+    }
+    return true;
+  }
+
+  // move the limbs left by `n` limbs.
+  FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept {
+    FASTFLOAT_DEBUG_ASSERT(n != 0);
+    if (n + vec.len() > vec.capacity()) {
+      return false;
+    } else if (!vec.is_empty()) {
+      // move limbs
+      limb *dst = vec.data + n;
+      limb const *src = vec.data;
+      std::copy_backward(src, src + vec.len(), dst + vec.len());
+      // fill in empty limbs
+      limb *first = vec.data;
+      limb *last = first + n;
+      ::std::fill(first, last, 0);
+      vec.set_len(n + vec.len());
+      return true;
+    } else {
+      return true;
+    }
+  }
+
+  // move the limbs left by `n` bits.
+  FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept {
+    size_t rem = n % limb_bits;
+    size_t div = n / limb_bits;
+    if (rem != 0) {
+      FASTFLOAT_TRY(shl_bits(rem));
+    }
+    if (div != 0) {
+      FASTFLOAT_TRY(shl_limbs(div));
+    }
+    return true;
+  }
+
+  // get the number of leading zeros in the bigint.
+  FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept {
+    if (vec.is_empty()) {
+      return 0;
+    } else {
+#ifdef FASTFLOAT_64BIT_LIMB
+      return leading_zeroes(vec.rindex(0));
+#else
+      // no use defining a specialized leading_zeroes for a 32-bit type.
+      uint64_t r0 = vec.rindex(0);
+      return leading_zeroes(r0 << 32);
+#endif
+    }
+  }
+
+  // get the number of bits in the bigint.
+  FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept {
+    int lz = ctlz();
+    return int(limb_bits * vec.len()) - lz;
+  }
+
+  FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); }
+
+  FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); }
+
+  // multiply as if by 2 raised to a power.
+  FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); }
+
+  // multiply as if by 5 raised to a power.
+  FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept {
+    // multiply by a power of 5
+    size_t large_length = sizeof(large_power_of_5) / sizeof(limb);
+    limb_span large = limb_span(large_power_of_5, large_length);
+    while (exp >= large_step) {
+      FASTFLOAT_TRY(large_mul(vec, large));
+      exp -= large_step;
+    }
+#ifdef FASTFLOAT_64BIT_LIMB
+    uint32_t small_step = 27;
+    limb max_native = 7450580596923828125UL;
+#else
+    uint32_t small_step = 13;
+    limb max_native = 1220703125U;
+#endif
+    while (exp >= small_step) {
+      FASTFLOAT_TRY(small_mul(vec, max_native));
+      exp -= small_step;
+    }
+    if (exp != 0) {
+      // Work around clang bug https://godbolt.org/z/zedh7rrhc
+      // This is similar to https://github.com/llvm/llvm-project/issues/47746,
+      // except the workaround described there don't work here
+      FASTFLOAT_TRY(small_mul(
+          vec, limb(((void)small_power_of_5[0], small_power_of_5[exp]))));
+    }
+
+    return true;
+  }
+
+  // multiply as if by 10 raised to a power.
+  FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept {
+    FASTFLOAT_TRY(pow5(exp));
+    return pow2(exp);
+  }
+};
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_DIGIT_COMPARISON_H
+#define FASTFLOAT_DIGIT_COMPARISON_H
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+
+
+namespace fast_float {
+
+// 1e0 to 1e19
+constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
+                                                    10UL,
+                                                    100UL,
+                                                    1000UL,
+                                                    10000UL,
+                                                    100000UL,
+                                                    1000000UL,
+                                                    10000000UL,
+                                                    100000000UL,
+                                                    1000000000UL,
+                                                    10000000000UL,
+                                                    100000000000UL,
+                                                    1000000000000UL,
+                                                    10000000000000UL,
+                                                    100000000000000UL,
+                                                    1000000000000000UL,
+                                                    10000000000000000UL,
+                                                    100000000000000000UL,
+                                                    1000000000000000000UL,
+                                                    10000000000000000000UL};
+
+// calculate the exponent, in scientific notation, of the number.
+// this algorithm is not even close to optimized, but it has no practical
+// effect on performance: in order to have a faster algorithm, we'd need
+// to slow down performance for faster algorithms, and this is still fast.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t
+scientific_exponent(parsed_number_string_t<UC> &num) noexcept {
+  uint64_t mantissa = num.mantissa;
+  int32_t exponent = int32_t(num.exponent);
+  while (mantissa >= 10000) {
+    mantissa /= 10000;
+    exponent += 4;
+  }
+  while (mantissa >= 100) {
+    mantissa /= 100;
+    exponent += 2;
+  }
+  while (mantissa >= 10) {
+    mantissa /= 10;
+    exponent += 1;
+  }
+  return exponent;
+}
+
+// this converts a native floating-point number to an extended-precision float.
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+to_extended(T value) noexcept {
+  using equiv_uint = equiv_uint_t<T>;
+  constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
+  constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
+  constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
+
+  adjusted_mantissa am;
+  int32_t bias = binary_format<T>::mantissa_explicit_bits() -
+                 binary_format<T>::minimum_exponent();
+  equiv_uint bits;
+#if FASTFLOAT_HAS_BIT_CAST
+  bits = std::bit_cast<equiv_uint>(value);
+#else
+  ::memcpy(&bits, &value, sizeof(T));
+#endif
+  if ((bits & exponent_mask) == 0) {
+    // denormal
+    am.power2 = 1 - bias;
+    am.mantissa = bits & mantissa_mask;
+  } else {
+    // normal
+    am.power2 = int32_t((bits & exponent_mask) >>
+                        binary_format<T>::mantissa_explicit_bits());
+    am.power2 -= bias;
+    am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
+  }
+
+  return am;
+}
+
+// get the extended precision value of the halfway point between b and b+u.
+// we are given a native float that represents b, so we need to adjust it
+// halfway between b and b+u.
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+to_extended_halfway(T value) noexcept {
+  adjusted_mantissa am = to_extended(value);
+  am.mantissa <<= 1;
+  am.mantissa += 1;
+  am.power2 -= 1;
+  return am;
+}
+
+// round an extended-precision float to the nearest machine float.
+template <typename T, typename callback>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
+                                                         callback cb) noexcept {
+  int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
+  if (-am.power2 >= mantissa_shift) {
+    // have a denormal float
+    int32_t shift = -am.power2 + 1;
+    cb(am, std::min<int32_t>(shift, 64));
+    // check for round-up: if rounding-nearest carried us to the hidden bit.
+    am.power2 = (am.mantissa <
+                 (uint64_t(1) << binary_format<T>::mantissa_explicit_bits()))
+                    ? 0
+                    : 1;
+    return;
+  }
+
+  // have a normal float, use the default shift.
+  cb(am, mantissa_shift);
+
+  // check for carry
+  if (am.mantissa >=
+      (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
+    am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
+    am.power2++;
+  }
+
+  // check for infinite: we could have carried to an infinite power
+  am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
+  if (am.power2 >= binary_format<T>::infinite_power()) {
+    am.power2 = binary_format<T>::infinite_power();
+    am.mantissa = 0;
+  }
+}
+
+template <typename callback>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
+round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
+                       callback cb) noexcept {
+  uint64_t const mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1;
+  uint64_t const halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1);
+  uint64_t truncated_bits = am.mantissa & mask;
+  bool is_above = truncated_bits > halfway;
+  bool is_halfway = truncated_bits == halfway;
+
+  // shift digits into position
+  if (shift == 64) {
+    am.mantissa = 0;
+  } else {
+    am.mantissa >>= shift;
+  }
+  am.power2 += shift;
+
+  bool is_odd = (am.mantissa & 1) == 1;
+  am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
+round_down(adjusted_mantissa &am, int32_t shift) noexcept {
+  if (shift == 64) {
+    am.mantissa = 0;
+  } else {
+    am.mantissa >>= shift;
+  }
+  am.power2 += shift;
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+skip_zeros(UC const *&first, UC const *last) noexcept {
+  uint64_t val;
+  while (!cpp20_and_in_constexpr() &&
+         std::distance(first, last) >= int_cmp_len<UC>()) {
+    ::memcpy(&val, first, sizeof(uint64_t));
+    if (val != int_cmp_zeros<UC>()) {
+      break;
+    }
+    first += int_cmp_len<UC>();
+  }
+  while (first != last) {
+    if (*first != UC('0')) {
+      break;
+    }
+    first++;
+  }
+}
+
+// determine if any non-zero digits were truncated.
+// all characters must be valid digits.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+is_truncated(UC const *first, UC const *last) noexcept {
+  // do 8-bit optimizations, can just compare to 8 literal 0s.
+  uint64_t val;
+  while (!cpp20_and_in_constexpr() &&
+         std::distance(first, last) >= int_cmp_len<UC>()) {
+    ::memcpy(&val, first, sizeof(uint64_t));
+    if (val != int_cmp_zeros<UC>()) {
+      return true;
+    }
+    first += int_cmp_len<UC>();
+  }
+  while (first != last) {
+    if (*first != UC('0')) {
+      return true;
+    }
+    ++first;
+  }
+  return false;
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+is_truncated(span<UC const> s) noexcept {
+  return is_truncated(s.ptr, s.ptr + s.len());
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+parse_eight_digits(UC const *&p, limb &value, size_t &counter,
+                   size_t &count) noexcept {
+  value = value * 100000000 + parse_eight_digits_unrolled(p);
+  p += 8;
+  counter += 8;
+  count += 8;
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
+parse_one_digit(UC const *&p, limb &value, size_t &counter,
+                size_t &count) noexcept {
+  value = value * 10 + limb(*p - UC('0'));
+  p++;
+  counter++;
+  count++;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+add_native(bigint &big, limb power, limb value) noexcept {
+  big.mul(power);
+  big.add(value);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+round_up_bigint(bigint &big, size_t &count) noexcept {
+  // need to round-up the digits, but need to avoid rounding
+  // ....9999 to ...10000, which could cause a false halfway point.
+  add_native(big, 10, 1);
+  count++;
+}
+
+// parse the significant digits into a big integer
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR20 void
+parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
+               size_t max_digits, size_t &digits) noexcept {
+  // try to minimize the number of big integer and scalar multiplication.
+  // therefore, try to parse 8 digits at a time, and multiply by the largest
+  // scalar value (9 or 19 digits) for each step.
+  size_t counter = 0;
+  digits = 0;
+  limb value = 0;
+#ifdef FASTFLOAT_64BIT_LIMB
+  size_t step = 19;
+#else
+  size_t step = 9;
+#endif
+
+  // process all integer digits.
+  UC const *p = num.integer.ptr;
+  UC const *pend = p + num.integer.len();
+  skip_zeros(p, pend);
+  // process all digits, in increments of step per loop
+  while (p != pend) {
+    while ((std::distance(p, pend) >= 8) && (step - counter >= 8) &&
+           (max_digits - digits >= 8)) {
+      parse_eight_digits(p, value, counter, digits);
+    }
+    while (counter < step && p != pend && digits < max_digits) {
+      parse_one_digit(p, value, counter, digits);
+    }
+    if (digits == max_digits) {
+      // add the temporary value, then check if we've truncated any digits
+      add_native(result, limb(powers_of_ten_uint64[counter]), value);
+      bool truncated = is_truncated(p, pend);
+      if (num.fraction.ptr != nullptr) {
+        truncated |= is_truncated(num.fraction);
+      }
+      if (truncated) {
+        round_up_bigint(result, digits);
+      }
+      return;
+    } else {
+      add_native(result, limb(powers_of_ten_uint64[counter]), value);
+      counter = 0;
+      value = 0;
+    }
+  }
+
+  // add our fraction digits, if they're available.
+  if (num.fraction.ptr != nullptr) {
+    p = num.fraction.ptr;
+    pend = p + num.fraction.len();
+    if (digits == 0) {
+      skip_zeros(p, pend);
+    }
+    // process all digits, in increments of step per loop
+    while (p != pend) {
+      while ((std::distance(p, pend) >= 8) && (step - counter >= 8) &&
+             (max_digits - digits >= 8)) {
+        parse_eight_digits(p, value, counter, digits);
+      }
+      while (counter < step && p != pend && digits < max_digits) {
+        parse_one_digit(p, value, counter, digits);
+      }
+      if (digits == max_digits) {
+        // add the temporary value, then check if we've truncated any digits
+        add_native(result, limb(powers_of_ten_uint64[counter]), value);
+        bool truncated = is_truncated(p, pend);
+        if (truncated) {
+          round_up_bigint(result, digits);
+        }
+        return;
+      } else {
+        add_native(result, limb(powers_of_ten_uint64[counter]), value);
+        counter = 0;
+        value = 0;
+      }
+    }
+  }
+
+  if (counter != 0) {
+    add_native(result, limb(powers_of_ten_uint64[counter]), value);
+  }
+}
+
+template <typename T>
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
+  FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
+  adjusted_mantissa answer;
+  bool truncated;
+  answer.mantissa = bigmant.hi64(truncated);
+  int bias = binary_format<T>::mantissa_explicit_bits() -
+             binary_format<T>::minimum_exponent();
+  answer.power2 = bigmant.bit_length() - 64 + bias;
+
+  round<T>(answer, [truncated](adjusted_mantissa &a, int32_t shift) {
+    round_nearest_tie_even(
+        a, shift,
+        [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
+          return is_above || (is_halfway && truncated) ||
+                 (is_odd && is_halfway);
+        });
+  });
+
+  return answer;
+}
+
+// the scaling here is quite simple: we have, for the real digits `m * 10^e`,
+// and for the theoretical digits `n * 2^f`. Since `e` is always negative,
+// to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
+// we then need to scale by `2^(f- e)`, and then the two significant digits
+// are of the same magnitude.
+template <typename T>
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
+    bigint &bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
+  bigint &real_digits = bigmant;
+  int32_t real_exp = exponent;
+
+  // get the value of `b`, rounded down, and get a bigint representation of b+h
+  adjusted_mantissa am_b = am;
+  // gcc7 buf: use a lambda to remove the noexcept qualifier bug with
+  // -Wnoexcept-type.
+  round<T>(am_b,
+           [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
+  T b;
+  to_float(false, am_b, b);
+  adjusted_mantissa theor = to_extended_halfway(b);
+  bigint theor_digits(theor.mantissa);
+  int32_t theor_exp = theor.power2;
+
+  // scale real digits and theor digits to be same power.
+  int32_t pow2_exp = theor_exp - real_exp;
+  uint32_t pow5_exp = uint32_t(-real_exp);
+  if (pow5_exp != 0) {
+    FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
+  }
+  if (pow2_exp > 0) {
+    FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
+  } else if (pow2_exp < 0) {
+    FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
+  }
+
+  // compare digits, and use it to director rounding
+  int ord = real_digits.compare(theor_digits);
+  adjusted_mantissa answer = am;
+  round<T>(answer, [ord](adjusted_mantissa &a, int32_t shift) {
+    round_nearest_tie_even(
+        a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
+          (void)_;  // not needed, since we've done our comparison
+          (void)__; // not needed, since we've done our comparison
+          if (ord > 0) {
+            return true;
+          } else if (ord < 0) {
+            return false;
+          } else {
+            return is_odd;
+          }
+        });
+  });
+
+  return answer;
+}
+
+// parse the significant digits as a big integer to unambiguously round the
+// the significant digits. here, we are trying to determine how to round
+// an extended float representation close to `b+h`, halfway between `b`
+// (the float rounded-down) and `b+u`, the next positive float. this
+// algorithm is always correct, and uses one of two approaches. when
+// the exponent is positive relative to the significant digits (such as
+// 1234), we create a big-integer representation, get the high 64-bits,
+// determine if any lower bits are truncated, and use that to direct
+// rounding. in case of a negative exponent relative to the significant
+// digits (such as 1.2345), we create a theoretical representation of
+// `b` as a big-integer type, scaled to the same binary exponent as
+// the actual digits. we then compare the big integer representations
+// of both, and use that to direct rounding.
+template <typename T, typename UC>
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+digit_comp(parsed_number_string_t<UC> &num, adjusted_mantissa am) noexcept {
+  // remove the invalid exponent bias
+  am.power2 -= invalid_am_bias;
+
+  int32_t sci_exp = scientific_exponent(num);
+  size_t max_digits = binary_format<T>::max_digits();
+  size_t digits = 0;
+  bigint bigmant;
+  parse_mantissa(bigmant, num, max_digits, digits);
+  // can't underflow, since digits is at most max_digits.
+  int32_t exponent = sci_exp + 1 - int32_t(digits);
+  if (exponent >= 0) {
+    return positive_digit_comp<T>(bigmant, exponent);
+  } else {
+    return negative_digit_comp<T>(bigmant, am, exponent);
+  }
+}
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_PARSE_NUMBER_H
+#define FASTFLOAT_PARSE_NUMBER_H
+
+
+#include <cmath>
+#include <cstring>
+#include <limits>
+#include <system_error>
+
+namespace fast_float {
+
+namespace detail {
+/**
+ * Special case +inf, -inf, nan, infinity, -infinity.
+ * The case comparisons could be made much faster given that we know that the
+ * strings a null-free and fixed.
+ **/
+template <typename T, typename UC>
+from_chars_result_t<UC>
+    FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first, UC const *last,
+                                       T &value, chars_format fmt) noexcept {
+  from_chars_result_t<UC> answer{};
+  answer.ptr = first;
+  answer.ec = std::errc(); // be optimistic
+  // assume first < last, so dereference without checks;
+  bool const minusSign = (*first == UC('-'));
+  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+  if ((*first == UC('-')) ||
+      (uint64_t(fmt & chars_format::allow_leading_plus) &&
+       (*first == UC('+')))) {
+    ++first;
+  }
+  if (last - first >= 3) {
+    if (fastfloat_strncasecmp(first, str_const_nan<UC>(), 3)) {
+      answer.ptr = (first += 3);
+      value = minusSign ? -std::numeric_limits<T>::quiet_NaN()
+                        : std::numeric_limits<T>::quiet_NaN();
+      // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7,
+      // C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
+      if (first != last && *first == UC('(')) {
+        for (UC const *ptr = first + 1; ptr != last; ++ptr) {
+          if (*ptr == UC(')')) {
+            answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
+            break;
+          } else if (!((UC('a') <= *ptr && *ptr <= UC('z')) ||
+                       (UC('A') <= *ptr && *ptr <= UC('Z')) ||
+                       (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_')))
+            break; // forbidden char, not nan(n-char-seq-opt)
+        }
+      }
+      return answer;
+    }
+    if (fastfloat_strncasecmp(first, str_const_inf<UC>(), 3)) {
+      if ((last - first >= 8) &&
+          fastfloat_strncasecmp(first + 3, str_const_inf<UC>() + 3, 5)) {
+        answer.ptr = first + 8;
+      } else {
+        answer.ptr = first + 3;
+      }
+      value = minusSign ? -std::numeric_limits<T>::infinity()
+                        : std::numeric_limits<T>::infinity();
+      return answer;
+    }
+  }
+  answer.ec = std::errc::invalid_argument;
+  return answer;
+}
+
+/**
+ * Returns true if the floating-pointing rounding mode is to 'nearest'.
+ * It is the default on most system. This function is meant to be inexpensive.
+ * Credit : @mwalcott3
+ */
+fastfloat_really_inline bool rounds_to_nearest() noexcept {
+  // https://lemire.me/blog/2020/06/26/gcc-not-nearest/
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return false;
+#endif
+  // See
+  // A fast function to check your floating-point rounding mode
+  // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/
+  //
+  // This function is meant to be equivalent to :
+  // prior: #include <cfenv>
+  //  return fegetround() == FE_TONEAREST;
+  // However, it is expected to be much faster than the fegetround()
+  // function call.
+  //
+  // The volatile keyword prevents the compiler from computing the function
+  // at compile-time.
+  // There might be other ways to prevent compile-time optimizations (e.g.,
+  // asm). The value does not need to be std::numeric_limits<float>::min(), any
+  // small value so that 1 + x should round to 1 would do (after accounting for
+  // excess precision, as in 387 instructions).
+  static float volatile fmin = std::numeric_limits<float>::min();
+  float fmini = fmin; // we copy it so that it gets loaded at most once.
+//
+// Explanation:
+// Only when fegetround() == FE_TONEAREST do we have that
+// fmin + 1.0f == 1.0f - fmin.
+//
+// FE_UPWARD:
+//  fmin + 1.0f > 1
+//  1.0f - fmin == 1
+//
+// FE_DOWNWARD or  FE_TOWARDZERO:
+//  fmin + 1.0f == 1
+//  1.0f - fmin < 1
+//
+// Note: This may fail to be accurate if fast-math has been
+// enabled, as rounding conventions may not apply.
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+//  todo: is there a VS warning?
+//  see
+//  https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013
+#elif defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#endif
+  return (fmini + 1.0f == 1.0f - fmini);
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#elif defined(__clang__)
+#pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+}
+
+} // namespace detail
+
+template <typename T> struct from_chars_caller {
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_advanced(first, last, value, options);
+  }
+};
+
+#ifdef __STDCPP_FLOAT32_T__
+template <> struct from_chars_caller<std::float32_t> {
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, std::float32_t &value,
+       parse_options_t<UC> options) noexcept {
+    // if std::float32_t is defined, and we are in C++23 mode; macro set for
+    // float32; set value to float due to equivalence between float and
+    // float32_t
+    float val;
+    auto ret = from_chars_advanced(first, last, val, options);
+    value = val;
+    return ret;
+  }
+};
+#endif
+
+#ifdef __STDCPP_FLOAT64_T__
+template <> struct from_chars_caller<std::float64_t> {
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, std::float64_t &value,
+       parse_options_t<UC> options) noexcept {
+    // if std::float64_t is defined, and we are in C++23 mode; macro set for
+    // float64; set value as double due to equivalence between double and
+    // float64_t
+    double val;
+    auto ret = from_chars_advanced(first, last, val, options);
+    value = val;
+    return ret;
+  }
+};
+#endif
+
+template <typename T, typename UC, typename>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value,
+           chars_format fmt /*= chars_format::general*/) noexcept {
+  return from_chars_caller<T>::call(first, last, value,
+                                    parse_options_t<UC>(fmt));
+}
+
+/**
+ * This function overload takes parsed_number_string_t structure that is created
+ * and populated either by from_chars_advanced function taking chars range and
+ * parsing options or other parsing custom function implemented by user.
+ */
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
+
+  static_assert(is_supported_float_type<T>::value,
+                "only some floating-point types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  from_chars_result_t<UC> answer;
+
+  answer.ec = std::errc(); // be optimistic
+  answer.ptr = pns.lastmatch;
+  // The implementation of the Clinger's fast path is convoluted because
+  // we want round-to-nearest in all cases, irrespective of the rounding mode
+  // selected on the thread.
+  // We proceed optimistically, assuming that detail::rounds_to_nearest()
+  // returns true.
+  if (binary_format<T>::min_exponent_fast_path() <= pns.exponent &&
+      pns.exponent <= binary_format<T>::max_exponent_fast_path() &&
+      !pns.too_many_digits) {
+    // Unfortunately, the conventional Clinger's fast path is only possible
+    // when the system rounds to the nearest float.
+    //
+    // We expect the next branch to almost always be selected.
+    // We could check it first (before the previous branch), but
+    // there might be performance advantages at having the check
+    // be last.
+    if (!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) {
+      // We have that fegetround() == FE_TONEAREST.
+      // Next is Clinger's fast path.
+      if (pns.mantissa <= binary_format<T>::max_mantissa_fast_path()) {
+        value = T(pns.mantissa);
+        if (pns.exponent < 0) {
+          value = value / binary_format<T>::exact_power_of_ten(-pns.exponent);
+        } else {
+          value = value * binary_format<T>::exact_power_of_ten(pns.exponent);
+        }
+        if (pns.negative) {
+          value = -value;
+        }
+        return answer;
+      }
+    } else {
+      // We do not have that fegetround() == FE_TONEAREST.
+      // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's
+      // proposal
+      if (pns.exponent >= 0 &&
+          pns.mantissa <=
+              binary_format<T>::max_mantissa_fast_path(pns.exponent)) {
+#if defined(__clang__) || defined(FASTFLOAT_32BIT)
+        // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD
+        if (pns.mantissa == 0) {
+          value = pns.negative ? T(-0.) : T(0.);
+          return answer;
+        }
+#endif
+        value = T(pns.mantissa) *
+                binary_format<T>::exact_power_of_ten(pns.exponent);
+        if (pns.negative) {
+          value = -value;
+        }
+        return answer;
+      }
+    }
+  }
+  adjusted_mantissa am =
+      compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
+  if (pns.too_many_digits && am.power2 >= 0) {
+    if (am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
+      am = compute_error<binary_format<T>>(pns.exponent, pns.mantissa);
+    }
+  }
+  // If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa)
+  // and we have an invalid power (am.power2 < 0), then we need to go the long
+  // way around again. This is very uncommon.
+  if (am.power2 < 0) {
+    am = digit_comp<T>(pns, am);
+  }
+  to_float(pns.negative, am, value);
+  // Test for over/underflow.
+  if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) ||
+      am.power2 == binary_format<T>::infinite_power()) {
+    answer.ec = std::errc::result_out_of_range;
+  }
+  return answer;
+}
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_float_advanced(UC const *first, UC const *last, T &value,
+                          parse_options_t<UC> options) noexcept {
+
+  static_assert(is_supported_float_type<T>::value,
+                "only some floating-point types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+
+  from_chars_result_t<UC> answer;
+  if (uint64_t(fmt & chars_format::skip_white_space)) {
+    while ((first != last) && fast_float::is_space(*first)) {
+      first++;
+    }
+  }
+  if (first == last) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  parsed_number_string_t<UC> pns =
+      uint64_t(fmt & detail::basic_json_fmt)
+          ? parse_number_string<true, UC>(first, last, options)
+          : parse_number_string<false, UC>(first, last, options);
+  if (!pns.valid) {
+    if (uint64_t(fmt & chars_format::no_infnan)) {
+      answer.ec = std::errc::invalid_argument;
+      answer.ptr = first;
+      return answer;
+    } else {
+      return detail::parse_infnan(first, last, value, fmt);
+    }
+  }
+
+  // call overload that takes parsed_number_string_t directly.
+  return from_chars_advanced(pns, value);
+}
+
+template <typename T, typename UC, typename>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value, int base) noexcept {
+
+  static_assert(is_supported_integer_type<T>::value,
+                "only integer types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  parse_options_t<UC> options;
+  options.base = base;
+  return from_chars_advanced(first, last, value, options);
+}
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_int_advanced(UC const *first, UC const *last, T &value,
+                        parse_options_t<UC> options) noexcept {
+
+  static_assert(is_supported_integer_type<T>::value,
+                "only integer types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  int const base = options.base;
+
+  from_chars_result_t<UC> answer;
+  if (uint64_t(fmt & chars_format::skip_white_space)) {
+    while ((first != last) && fast_float::is_space(*first)) {
+      first++;
+    }
+  }
+  if (first == last || base < 2 || base > 36) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+
+  return parse_int_string(first, last, value, options);
+}
+
+template <size_t TypeIx> struct from_chars_advanced_caller {
+  static_assert(TypeIx > 0, "unsupported type");
+};
+
+template <> struct from_chars_advanced_caller<1> {
+  template <typename T, typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_float_advanced(first, last, value, options);
+  }
+};
+
+template <> struct from_chars_advanced_caller<2> {
+  template <typename T, typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_int_advanced(first, last, value, options);
+  }
+};
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(UC const *first, UC const *last, T &value,
+                    parse_options_t<UC> options) noexcept {
+  return from_chars_advanced_caller<
+      size_t(is_supported_float_type<T>::value) +
+      2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,
+                                                             options);
+}
+
+} // namespace fast_float
+
+#endif
+
diff --git a/sandbox/parse_fp/obj_parse_float.h b/sandbox/parse_fp/obj_parse_float.h
new file mode 100644
index 00000000..454faa78
--- /dev/null
+++ b/sandbox/parse_fp/obj_parse_float.h
@@ -0,0 +1,227 @@
+// obj_parse_float.h - OBJ-specific float parser built on fast_float.
+//
+// Handles OBJ quirks:
+//   - Leading '+' sign  (e.g. "+1.0", "+0.5e+2")
+//   - nan / NaN / NAN   (case-insensitive)
+//   - inf / Inf / INF / infinity (case-insensitive, with optional sign)
+//
+// Non-finite replacement (NumPy-style defaults):
+//   nan  ->  0.0
+//   inf  ->  std::numeric_limits<T>::max()
+//  -inf  ->  std::numeric_limits<T>::lowest()
+//
+// Usage:
+//   double val;
+//   const char *end;
+//   bool ok = obj::parseFloat("  +1.5e3 rest", &val, &end);
+//   // val == 1500.0, end points to ' ' before "rest"
+//
+//   obj::ParseOptions opts;
+//   opts.nan_value = -1.0;  // replace nan with -1
+//   ok = obj::parseFloat("nan", &val, &end, opts);
+//   // val == -1.0
+//
+// SPDX-License-Identifier: MIT OR Apache-2.0 OR BSL-1.0
+
+#ifndef OBJ_PARSE_FLOAT_H_
+#define OBJ_PARSE_FLOAT_H_
+
+#include "fast_float.h"
+
+#include <cstring>
+#include <limits>
+#include <system_error>
+
+namespace obj {
+
+template <typename T>
+struct ParseOptions {
+  T nan_value;
+  T inf_value;
+  T neg_inf_value;
+
+  ParseOptions()
+      : nan_value(static_cast<T>(0.0)),
+        inf_value((std::numeric_limits<T>::max)()),
+        neg_inf_value(std::numeric_limits<T>::lowest()) {}
+};
+
+namespace detail {
+
+// Case-insensitive prefix match. Returns pointer past matched prefix, or NULL.
+inline const char *match_iprefix(const char *p, const char *end,
+                                 const char *prefix) {
+  while (*prefix) {
+    if (p == end) return NULL;
+    char c = *p;
+    char e = *prefix;
+    // ASCII tolower without locale
+    if (c >= 'A' && c <= 'Z') c += 32;
+    if (e >= 'A' && e <= 'Z') e += 32;
+    if (c != e) return NULL;
+    ++p;
+    ++prefix;
+  }
+  return p;
+}
+
+// Check if character is a whitespace or end-of-token in OBJ context.
+inline bool is_obj_delim(char c) {
+  return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\0';
+}
+
+// Try to parse nan/inf. Returns true if matched, sets *result and *end_ptr.
+template <typename T>
+inline bool tryParseNanInf(const char *first, const char *last, T *result,
+                           const char **end_ptr,
+                           const ParseOptions<T> &opts) {
+  if (first >= last) return false;
+
+  const char *p = first;
+  bool negative = false;
+
+  // Optional sign
+  if (*p == '-') {
+    negative = true;
+    ++p;
+  } else if (*p == '+') {
+    ++p;
+  }
+
+  if (p >= last) return false;
+
+  // Try "nan"
+  const char *after = match_iprefix(p, last, "nan");
+  if (after) {
+    *result = opts.nan_value;
+    *end_ptr = after;
+    return true;
+  }
+
+  // Try "infinity" first (longer match), then "inf"
+  after = match_iprefix(p, last, "infinity");
+  if (after) {
+    *result = negative ? opts.neg_inf_value : opts.inf_value;
+    *end_ptr = after;
+    return true;
+  }
+
+  after = match_iprefix(p, last, "inf");
+  if (after) {
+    *result = negative ? opts.neg_inf_value : opts.inf_value;
+    *end_ptr = after;
+    return true;
+  }
+
+  return false;
+}
+
+}  // namespace detail
+
+// Parse a float/double from an OBJ token string.
+//
+// - Skips leading whitespace (space/tab).
+// - Handles leading '+' (via allow_leading_plus format flag).
+// - Handles nan/inf with replacement values.
+// - Sets *end_ptr to the character after the parsed number.
+// - Returns true on success.
+template <typename T>
+inline bool parseFloat(const char *s, T *result, const char **end_ptr,
+                       const ParseOptions<T> &opts = ParseOptions<T>()) {
+  // Skip leading whitespace to find the token start (needed for nan/inf
+  // detection and token_end computation below).
+  const char *p = s;
+  while (*p == ' ' || *p == '\t') ++p;
+
+  if (*p == '\0') {
+    *end_ptr = p;
+    return false;
+  }
+
+  // Check first significant char to decide path.
+  // nan/inf starts with [nNiI] or [+-] followed by [nNiI].
+  const char *q = p;
+  if (*q == '+' || *q == '-') ++q;
+  char fc = *q;
+  // ASCII tolower
+  if (fc >= 'A' && fc <= 'Z') fc += 32;
+
+  if (fc == 'n' || fc == 'i') {
+    // Potential nan/inf — find token end and try match.
+    const char *token_end = p;
+    while (*token_end && !detail::is_obj_delim(*token_end)) ++token_end;
+    if (p != token_end &&
+        detail::tryParseNanInf(p, token_end, result, end_ptr, opts)) {
+      return true;
+    }
+  }
+
+  // Fast path: numeric parse (most common case).
+  // Scan to the end of the numeric token (null or OBJ delimiter) so that
+  // fast_float never reads past the bounds of the current token/buffer.
+  // allow_leading_plus is a built-in fast_float flag that handles the '+'
+  // prefix without manual code.
+  const char *token_end = p;
+  while (*token_end && !detail::is_obj_delim(*token_end)) ++token_end;
+
+  auto r = fast_float::from_chars(
+      p, token_end, *result,
+      fast_float::chars_format::general |
+          fast_float::chars_format::allow_leading_plus);
+  if (r.ec == std::errc()) {
+    *end_ptr = r.ptr;
+    return true;
+  }
+
+  *end_ptr = s;
+  return false;
+}
+
+// Convenience: parse from null-terminated string, no end_ptr needed.
+template <typename T>
+inline bool parseFloat(const char *s, T *result,
+                       const ParseOptions<T> &opts = ParseOptions<T>()) {
+  const char *end;
+  return parseFloat(s, result, &end, opts);
+}
+
+// Parse with explicit [first, last) range (no whitespace skip, no null-term).
+template <typename T>
+inline bool parseFloatRange(const char *first, const char *last, T *result,
+                            const char **end_ptr,
+                            const ParseOptions<T> &opts = ParseOptions<T>()) {
+  if (first >= last) {
+    *end_ptr = first;
+    return false;
+  }
+
+  // Check first significant char for nan/inf.
+  const char *p = first;
+  if (p < last && (*p == '+' || *p == '-')) ++p;
+  if (p < last) {
+    char fc = *p;
+    if (fc >= 'A' && fc <= 'Z') fc += 32;
+    if (fc == 'n' || fc == 'i') {
+      if (detail::tryParseNanInf(first, last, result, end_ptr, opts)) {
+        return true;
+      }
+    }
+  }
+
+  // Numeric parse: allow_leading_plus handles the '+' prefix natively so
+  // no manual advancement is needed.
+  auto r = fast_float::from_chars(first, last, *result,
+                                  fast_float::chars_format::general |
+                                      fast_float::chars_format::allow_leading_plus);
+  if (r.ec == std::errc()) {
+    *end_ptr = r.ptr;
+    return true;
+  }
+
+  *end_ptr = first;
+  return false;
+}
+
+}  // namespace obj
+
+#endif  // OBJ_PARSE_FLOAT_H_
diff --git a/sandbox/parse_fp/test_parse_fp.cc b/sandbox/parse_fp/test_parse_fp.cc
new file mode 100644
index 00000000..76288dc8
--- /dev/null
+++ b/sandbox/parse_fp/test_parse_fp.cc
@@ -0,0 +1,679 @@
+// Standalone test and benchmark for fast_float + OBJ-specific float parser.
+// Build: cmake -B build -DCMAKE_BUILD_TYPE=Release . && cmake --build build
+// Run:   ./build/test_parse_fp
+
+#include "obj_parse_float.h"
+
+#include <cassert>
+#include <chrono>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+
+// ---------------------------------------------------------------------------
+// Reference: tinyobjloader's existing tryParseDouble (for comparison)
+// ---------------------------------------------------------------------------
+#define IS_DIGIT(x) (static_cast<unsigned int>((x) - '0') < 10)
+
+static bool tryParseDouble_legacy(const char *s, const char *s_end,
+                                  double *result) {
+  if (s >= s_end) return false;
+
+  double mantissa = 0.0;
+  int exponent = 0;
+  char sign = '+';
+  char exp_sign = '+';
+  const char *curr = s;
+  int read = 0;
+  bool end_not_reached = false;
+  bool leading_decimal_dots = false;
+
+  if (*curr == '+' || *curr == '-') {
+    sign = *curr;
+    curr++;
+    if ((curr != s_end) && (*curr == '.')) {
+      leading_decimal_dots = true;
+    }
+  } else if (IS_DIGIT(*curr)) {
+  } else if (*curr == '.') {
+    leading_decimal_dots = true;
+  } else {
+    goto fail;
+  }
+
+  end_not_reached = (curr != s_end);
+  if (!leading_decimal_dots) {
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      mantissa *= 10;
+      mantissa += static_cast<int>(*curr - 0x30);
+      curr++;
+      read++;
+      end_not_reached = (curr != s_end);
+    }
+    if (read == 0) goto fail;
+  }
+
+  if (!end_not_reached) goto assemble;
+
+  if (*curr == '.') {
+    curr++;
+    read = 1;
+    end_not_reached = (curr != s_end);
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      static const double pow_lut[] = {
+          1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001,
+      };
+      const int lut_entries = sizeof pow_lut / sizeof pow_lut[0];
+      mantissa += static_cast<int>(*curr - 0x30) *
+                  (read < lut_entries ? pow_lut[read] : std::pow(10.0, -read));
+      read++;
+      curr++;
+      end_not_reached = (curr != s_end);
+    }
+  } else if (*curr == 'e' || *curr == 'E') {
+  } else {
+    goto assemble;
+  }
+
+  if (!end_not_reached) goto assemble;
+
+  if (*curr == 'e' || *curr == 'E') {
+    curr++;
+    end_not_reached = (curr != s_end);
+    if (end_not_reached && (*curr == '+' || *curr == '-')) {
+      exp_sign = *curr;
+      curr++;
+    } else if (IS_DIGIT(*curr)) {
+    } else {
+      goto fail;
+    }
+
+    read = 0;
+    end_not_reached = (curr != s_end);
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      if (exponent > (2147483647 / 10)) goto fail;
+      exponent *= 10;
+      exponent += static_cast<int>(*curr - 0x30);
+      curr++;
+      read++;
+      end_not_reached = (curr != s_end);
+    }
+    exponent *= (exp_sign == '+' ? 1 : -1);
+    if (read == 0) goto fail;
+  }
+
+assemble:
+  *result =
+      (sign == '+' ? 1 : -1) *
+      (exponent ? std::ldexp(mantissa * std::pow(5.0, exponent), exponent)
+                : mantissa);
+  return true;
+fail:
+  return false;
+}
+
+#undef IS_DIGIT
+
+// ---------------------------------------------------------------------------
+// Test helpers
+// ---------------------------------------------------------------------------
+static int g_failures = 0;
+
+#define CHECK(cond, ...)                                                       \
+  do {                                                                         \
+    if (!(cond)) {                                                             \
+      printf("FAIL %s:%d: ", __FILE__, __LINE__);                              \
+      printf(__VA_ARGS__);                                                     \
+      printf("\n");                                                            \
+      g_failures++;                                                            \
+    }                                                                          \
+  } while (0)
+
+// ---------------------------------------------------------------------------
+// 1. Basic fast_float correctness
+// ---------------------------------------------------------------------------
+static int test_fast_float_basic() {
+  struct TestCase {
+    const char *input;
+    double expected;
+    bool should_succeed;
+  };
+
+  const TestCase cases[] = {
+      {"0", 0.0, true},
+      {"1", 1.0, true},
+      {"-1", -1.0, true},
+      {"123", 123.0, true},
+      {"-456", -456.0, true},
+      {"3.14", 3.14, true},
+      {"-2.718", -2.718, true},
+      {"0.5", 0.5, true},
+      {".5", 0.5, true},
+      {"-.5", -0.5, true},
+      {"1e10", 1e10, true},
+      {"1E10", 1E10, true},
+      {"1.5e3", 1.5e3, true},
+      {"-2.5e-4", -2.5e-4, true},
+      {"1e0", 1.0, true},
+      {"1e+0", 1.0, true},
+      {"1e-0", 1.0, true},
+      {"1e308", 1e308, true},
+      {"5e-324", 5e-324, true},
+      {"1.7976931348623157e308", 1.7976931348623157e308, true},
+      {"0.123456", 0.123456, true},
+      {"-0.987654", -0.987654, true},
+      {"1.000000", 1.0, true},
+      {"0.000000", 0.0, true},
+  };
+
+  int before = g_failures;
+  const int n = sizeof(cases) / sizeof(cases[0]);
+
+  for (int i = 0; i < n; i++) {
+    const TestCase &tc = cases[i];
+    double val = 0.0;
+    const char *first = tc.input;
+    const char *last = first + std::strlen(first);
+    auto result = fast_float::from_chars(first, last, val);
+
+    bool ok = (result.ec == std::errc());
+    CHECK(ok == tc.should_succeed, "[%d] \"%s\": expected %s, got %s", i,
+          tc.input, tc.should_succeed ? "success" : "failure",
+          ok ? "success" : "failure");
+
+    if (ok && tc.expected != 0.0) {
+      double rel_err = std::fabs((val - tc.expected) / tc.expected);
+      CHECK(rel_err <= 1e-15, "[%d] \"%s\": expected %.17g, got %.17g", i,
+            tc.input, tc.expected, val);
+    } else if (ok && tc.expected == 0.0) {
+      CHECK(val == 0.0, "[%d] \"%s\": expected 0.0, got %.17g", i, tc.input,
+            val);
+    }
+  }
+
+  return g_failures - before;
+}
+
+// ---------------------------------------------------------------------------
+// 2. OBJ wrapper: leading '+' sign
+// ---------------------------------------------------------------------------
+static int test_leading_plus() {
+  int before = g_failures;
+
+  struct TestCase {
+    const char *input;
+    double expected;
+  };
+
+  const TestCase cases[] = {
+      {"+1.0", 1.0},     {"+0", 0.0},       {"+0.5", 0.5},
+      {"+123", 123.0},   {"+1e5", 1e5},      {"+.5", 0.5},
+      {"+1.5e-3", 1.5e-3},
+  };
+
+  const int n = sizeof(cases) / sizeof(cases[0]);
+  for (int i = 0; i < n; i++) {
+    double val = -999.0;
+    const char *end = NULL;
+    bool ok = obj::parseFloat(cases[i].input, &val, &end);
+    CHECK(ok, "leading+ [%d] \"%s\": parse failed", i, cases[i].input);
+    if (ok && cases[i].expected != 0.0) {
+      double rel_err = std::fabs((val - cases[i].expected) / cases[i].expected);
+      CHECK(rel_err <= 1e-15, "leading+ [%d] \"%s\": expected %.17g, got %.17g",
+            i, cases[i].input, cases[i].expected, val);
+    } else if (ok) {
+      CHECK(val == 0.0, "leading+ [%d] \"%s\": expected 0.0, got %.17g", i,
+            cases[i].input, val);
+    }
+  }
+
+  return g_failures - before;
+}
+
+// ---------------------------------------------------------------------------
+// 3. nan/inf parsing with default replacements
+// ---------------------------------------------------------------------------
+static int test_nan_inf_defaults() {
+  int before = g_failures;
+
+  double dmax = (std::numeric_limits<double>::max)();
+  double dmin = std::numeric_limits<double>::lowest();
+
+  struct TestCase {
+    const char *input;
+    double expected;
+  };
+
+  const TestCase cases[] = {
+      // nan variants
+      {"nan", 0.0},
+      {"NaN", 0.0},
+      {"NAN", 0.0},
+      {"+nan", 0.0},
+      {"-nan", 0.0},
+
+      // inf variants
+      {"inf", dmax},
+      {"Inf", dmax},
+      {"INF", dmax},
+      {"infinity", dmax},
+      {"Infinity", dmax},
+      {"INFINITY", dmax},
+      {"+inf", dmax},
+      {"+Inf", dmax},
+      {"+infinity", dmax},
+
+      // -inf variants
+      {"-inf", dmin},
+      {"-Inf", dmin},
+      {"-INF", dmin},
+      {"-infinity", dmin},
+      {"-Infinity", dmin},
+      {"-INFINITY", dmin},
+  };
+
+  const int n = sizeof(cases) / sizeof(cases[0]);
+  for (int i = 0; i < n; i++) {
+    double val = -12345.0;
+    const char *end = NULL;
+    bool ok = obj::parseFloat(cases[i].input, &val, &end);
+    CHECK(ok, "nan_inf [%d] \"%s\": parse failed", i, cases[i].input);
+    CHECK(val == cases[i].expected,
+          "nan_inf [%d] \"%s\": expected %.17g, got %.17g", i, cases[i].input,
+          cases[i].expected, val);
+  }
+
+  return g_failures - before;
+}
+
+// ---------------------------------------------------------------------------
+// 4. nan/inf with custom replacement values
+// ---------------------------------------------------------------------------
+static int test_nan_inf_custom() {
+  int before = g_failures;
+
+  obj::ParseOptions<double> opts;
+  opts.nan_value = -1.0;
+  opts.inf_value = 9999.0;
+  opts.neg_inf_value = -9999.0;
+
+  {
+    double val = 0.0;
+    bool ok = obj::parseFloat("nan", &val, opts);
+    CHECK(ok && val == -1.0, "custom nan: expected -1.0, got %.17g", val);
+  }
+  {
+    double val = 0.0;
+    bool ok = obj::parseFloat("NaN", &val, opts);
+    CHECK(ok && val == -1.0, "custom NaN: expected -1.0, got %.17g", val);
+  }
+  {
+    double val = 0.0;
+    bool ok = obj::parseFloat("inf", &val, opts);
+    CHECK(ok && val == 9999.0, "custom inf: expected 9999.0, got %.17g", val);
+  }
+  {
+    double val = 0.0;
+    bool ok = obj::parseFloat("-inf", &val, opts);
+    CHECK(ok && val == -9999.0, "custom -inf: expected -9999.0, got %.17g",
+          val);
+  }
+  {
+    double val = 0.0;
+    bool ok = obj::parseFloat("infinity", &val, opts);
+    CHECK(ok && val == 9999.0, "custom infinity: expected 9999.0, got %.17g",
+          val);
+  }
+  {
+    double val = 0.0;
+    bool ok = obj::parseFloat("-infinity", &val, opts);
+    CHECK(ok && val == -9999.0,
+          "custom -infinity: expected -9999.0, got %.17g", val);
+  }
+
+  // float version
+  obj::ParseOptions<float> fopts;
+  fopts.nan_value = -2.0f;
+  fopts.inf_value = 1e30f;
+  fopts.neg_inf_value = -1e30f;
+  {
+    float fval = 0.0f;
+    bool ok = obj::parseFloat("nan", &fval, fopts);
+    CHECK(ok && fval == -2.0f, "custom float nan: expected -2.0, got %f", fval);
+  }
+  {
+    float fval = 0.0f;
+    bool ok = obj::parseFloat("inf", &fval, fopts);
+    CHECK(ok && fval == 1e30f, "custom float inf: expected 1e30, got %g", fval);
+  }
+
+  return g_failures - before;
+}
+
+// ---------------------------------------------------------------------------
+// 5. Pointer advancement / whitespace handling
+// ---------------------------------------------------------------------------
+static int test_pointer_advance() {
+  int before = g_failures;
+
+  // parseFloat skips leading whitespace
+  {
+    double val = 0.0;
+    const char *end = NULL;
+    bool ok = obj::parseFloat("  1.5 rest", &val, &end);
+    CHECK(ok, "ws: parse failed");
+    CHECK(val == 1.5, "ws: expected 1.5, got %.17g", val);
+    CHECK(end && *end == ' ', "ws: expected end at space before 'rest'");
+  }
+
+  // Tab whitespace
+  {
+    double val = 0.0;
+    const char *end = NULL;
+    bool ok = obj::parseFloat("\t3.14\tnext", &val, &end);
+    CHECK(ok, "tab ws: parse failed");
+    CHECK(std::fabs(val - 3.14) < 1e-15, "tab ws: expected 3.14, got %.17g",
+          val);
+    CHECK(end && *end == '\t', "tab ws: expected end at tab");
+  }
+
+  // nan pointer advancement
+  {
+    double val = -1.0;
+    const char *end = NULL;
+    bool ok = obj::parseFloat("nan rest", &val, &end);
+    CHECK(ok, "nan ptr: parse failed");
+    CHECK(val == 0.0, "nan ptr: expected 0.0, got %.17g", val);
+    CHECK(end && *end == ' ', "nan ptr: expected end at space");
+  }
+
+  // inf pointer advancement
+  {
+    double val = 0.0;
+    const char *end = NULL;
+    bool ok = obj::parseFloat("infinity rest", &val, &end);
+    CHECK(ok, "inf ptr: parse failed");
+    CHECK(end && *end == ' ', "inf ptr: expected end at space");
+  }
+
+  // parseFloatRange
+  {
+    double val = 0.0;
+    const char *end = NULL;
+    const char *s = "+1.5e2xxx";
+    bool ok = obj::parseFloatRange(s, s + 6, &val, &end);
+    CHECK(ok, "range: parse failed");
+    CHECK(val == 150.0, "range: expected 150.0, got %.17g", val);
+  }
+
+  // parseFloatRange with nan
+  {
+    double val = -1.0;
+    const char *end = NULL;
+    const char *s = "nan";
+    bool ok = obj::parseFloatRange(s, s + 3, &val, &end);
+    CHECK(ok, "range nan: parse failed");
+    CHECK(val == 0.0, "range nan: expected 0.0, got %.17g", val);
+  }
+
+  return g_failures - before;
+}
+
+// ---------------------------------------------------------------------------
+// 6. Edge cases / error handling
+// ---------------------------------------------------------------------------
+static int test_edge_cases() {
+  int before = g_failures;
+
+  // Empty string
+  {
+    double val = 999.0;
+    const char *end = NULL;
+    bool ok = obj::parseFloat("", &val, &end);
+    CHECK(!ok, "empty: should fail");
+  }
+
+  // Whitespace only
+  {
+    double val = 999.0;
+    const char *end = NULL;
+    bool ok = obj::parseFloat("   ", &val, &end);
+    CHECK(!ok, "ws-only: should fail");
+  }
+
+  // Just '+'
+  {
+    double val = 999.0;
+    const char *end = NULL;
+    bool ok = obj::parseFloat("+", &val, &end);
+    CHECK(!ok, "bare-plus: should fail");
+  }
+
+  // Normal negative still works
+  {
+    double val = 0.0;
+    bool ok = obj::parseFloat("-3.14", &val);
+    CHECK(ok && std::fabs(val - (-3.14)) < 1e-15,
+          "negative: expected -3.14, got %.17g", val);
+  }
+
+  // "nana" should parse "nan" and stop
+  {
+    double val = -1.0;
+    const char *end = NULL;
+    // Use range-based to see where it stops
+    const char *s = "nana";
+    bool ok = obj::parseFloatRange(s, s + 4, &val, &end);
+    CHECK(ok, "nana: parse should succeed (match 'nan')");
+    CHECK(val == 0.0, "nana: expected 0.0, got %.17g", val);
+    CHECK(end == s + 3, "nana: expected end at s+3, got s+%d", (int)(end - s));
+  }
+
+  // "info" should parse "inf" and stop at 'o'
+  {
+    double val = 0.0;
+    const char *end = NULL;
+    const char *s = "info";
+    bool ok = obj::parseFloatRange(s, s + 4, &val, &end);
+    CHECK(ok, "info: parse should succeed (match 'inf')");
+    double dmax = (std::numeric_limits<double>::max)();
+    CHECK(val == dmax, "info: expected max, got %.17g", val);
+    CHECK(end == s + 3, "info: expected end at s+3, got s+%d", (int)(end - s));
+  }
+
+  return g_failures - before;
+}
+
+// ---------------------------------------------------------------------------
+// 7. Accuracy comparison: fast_float vs legacy tryParseDouble vs strtod
+// ---------------------------------------------------------------------------
+static int test_accuracy() {
+  const char *hard_cases[] = {
+      "2.2250738585072014e-308",
+      "2.2250738585072011e-308",
+      "1.00000000000000011102230246251565404236316680908203125",
+      "0.3",
+      "0.1",
+      "0.2",
+      "7205759403792794e-1",
+      "922337203685477.5807",
+      "1e23",
+  };
+
+  int before = g_failures;
+  const int n = sizeof(hard_cases) / sizeof(hard_cases[0]);
+
+  printf("--- Accuracy comparison ---\n");
+  printf("%-55s %22s %22s %22s\n", "Input", "strtod", "fast_float", "legacy");
+
+  for (int i = 0; i < n; i++) {
+    const char *s = hard_cases[i];
+    size_t len = std::strlen(s);
+
+    double ref = std::strtod(s, NULL);
+
+    double ff_val = 0.0;
+    fast_float::from_chars(s, s + len, ff_val);
+
+    double lg_val = 0.0;
+    tryParseDouble_legacy(s, s + len, &lg_val);
+
+    printf("%-55s %22.17g %22.17g %22.17g\n", s, ref, ff_val, lg_val);
+
+    CHECK(ref == ff_val, "accuracy [%d] \"%s\": fast_float mismatch vs strtod",
+          i, s);
+    if (ref != lg_val) {
+      printf("  ^^ legacy MISMATCH vs strtod (expected)\n");
+    }
+  }
+
+  return g_failures - before;
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark
+// ---------------------------------------------------------------------------
+static void benchmark() {
+  const int N = 1000000;
+  std::vector<std::string> data;
+  data.reserve(N);
+
+  const char *patterns[] = {
+      "0.123456",   "-0.987654",  "1.000000",   "0.000000",  "123.456",
+      "-789.012",   "0.5",        "-0.5",        "1e-5",      "3.14159265",
+      "2.71828182", "-0.0001234", "999.999999",  "0.333333",  "-1.414213",
+      "+0.5",       "+123.456",   "+1e-5",  // OBJ '+' prefix
+  };
+  const int np = sizeof(patterns) / sizeof(patterns[0]);
+  for (int i = 0; i < N; i++) {
+    data.push_back(patterns[i % np]);
+  }
+
+  double sum;
+  typedef std::chrono::high_resolution_clock Clock;
+
+  // Benchmark obj::parseFloat (fast_float + OBJ wrapper)
+  sum = 0.0;
+  auto t0 = Clock::now();
+  for (int i = 0; i < N; i++) {
+    double val = 0.0;
+    obj::parseFloat(data[i].c_str(), &val);
+    sum += val;
+  }
+  auto t1 = Clock::now();
+  double obj_ms =
+      std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count() /
+      1000.0;
+  printf("obj::parseFloat: %8.2f ms  (sum=%.6f, %d values)\n", obj_ms, sum, N);
+
+  // Benchmark legacy tryParseDouble
+  sum = 0.0;
+  t0 = Clock::now();
+  for (int i = 0; i < N; i++) {
+    double val = 0.0;
+    const char *s = data[i].c_str();
+    size_t len = data[i].size();
+    // Skip whitespace for fair comparison
+    while (*s == ' ' || *s == '\t') { ++s; --len; }
+    const char *e = s + len;
+    while (e > s && (*(e-1) == ' ' || *(e-1) == '\t')) --e;
+    tryParseDouble_legacy(s, e, &val);
+    sum += val;
+  }
+  t1 = Clock::now();
+  double lg_ms =
+      std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count() /
+      1000.0;
+  printf("legacy:          %8.2f ms  (sum=%.6f, %d values)\n", lg_ms, sum, N);
+
+  // Benchmark strtod
+  sum = 0.0;
+  t0 = Clock::now();
+  for (int i = 0; i < N; i++) {
+    sum += std::strtod(data[i].c_str(), NULL);
+  }
+  t1 = Clock::now();
+  double sd_ms =
+      std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count() /
+      1000.0;
+  printf("strtod:          %8.2f ms  (sum=%.6f, %d values)\n", sd_ms, sum, N);
+
+  printf("\nSpeedup obj::parseFloat vs legacy:  %.2fx\n", lg_ms / obj_ms);
+  printf("Speedup obj::parseFloat vs strtod:  %.2fx\n", sd_ms / obj_ms);
+
+  // Benchmark nan/inf mix
+  printf("\n--- nan/inf mixed benchmark (100K) ---\n");
+  const int M = 100000;
+  std::vector<std::string> mixed;
+  mixed.reserve(M);
+  const char *mixed_patterns[] = {
+      "1.5",  "-2.3",  "nan",  "inf",  "-inf",
+      "0.5",  "NaN",   "Inf",  "+1.0", "infinity",
+  };
+  const int mp = sizeof(mixed_patterns) / sizeof(mixed_patterns[0]);
+  for (int i = 0; i < M; i++) {
+    mixed.push_back(mixed_patterns[i % mp]);
+  }
+
+  sum = 0.0;
+  t0 = Clock::now();
+  for (int i = 0; i < M; i++) {
+    double val = 0.0;
+    obj::parseFloat(mixed[i].c_str(), &val);
+    sum += val;
+  }
+  t1 = Clock::now();
+  double mix_ms =
+      std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0).count() /
+      1000.0;
+  printf("obj::parseFloat (mixed): %8.2f ms  (sum=%.6f, %d values)\n", mix_ms,
+         sum, M);
+}
+
+// ---------------------------------------------------------------------------
+int main() {
+  int section_fails;
+
+  printf("=== 1. fast_float basic correctness ===\n");
+  section_fails = test_fast_float_basic();
+  printf("  %s\n\n", section_fails == 0 ? "PASSED" : "FAILED");
+
+  printf("=== 2. Leading '+' sign ===\n");
+  section_fails = test_leading_plus();
+  printf("  %s\n\n", section_fails == 0 ? "PASSED" : "FAILED");
+
+  printf("=== 3. nan/inf with default replacements ===\n");
+  section_fails = test_nan_inf_defaults();
+  printf("  %s\n\n", section_fails == 0 ? "PASSED" : "FAILED");
+
+  printf("=== 4. nan/inf with custom replacements ===\n");
+  section_fails = test_nan_inf_custom();
+  printf("  %s\n\n", section_fails == 0 ? "PASSED" : "FAILED");
+
+  printf("=== 5. Pointer advancement / whitespace ===\n");
+  section_fails = test_pointer_advance();
+  printf("  %s\n\n", section_fails == 0 ? "PASSED" : "FAILED");
+
+  printf("=== 6. Edge cases ===\n");
+  section_fails = test_edge_cases();
+  printf("  %s\n\n", section_fails == 0 ? "PASSED" : "FAILED");
+
+  printf("=== 7. Accuracy comparison ===\n");
+  section_fails = test_accuracy();
+  printf("  %s\n\n", section_fails == 0 ? "PASSED" : "FAILED");
+
+  printf("=== Benchmark (1M parses) ===\n");
+  benchmark();
+
+  printf("\n");
+  if (g_failures == 0) {
+    printf("ALL TESTS PASSED.\n");
+    return 0;
+  } else {
+    printf("%d TEST(S) FAILED.\n", g_failures);
+    return 1;
+  }
+}
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..53ab3312
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,64 @@
+# Adapted from https://github.com/pybind/python_example/blob/master/setup.py
+import sys
+
+# from pybind11 import get_cmake_dir
+# Available at setup time due to pyproject.toml
+from pybind11.setup_helpers import Pybind11Extension  # , build_ext
+from setuptools import setup
+
+try:
+    # try to read setuptools_scm generated _version.py
+    from .python import _version
+except:
+    __version__ = "2.0.0rc10"
+
+# The main interface is through Pybind11Extension.
+# * You can add cxx_std=11/14/17, and then build_ext can be removed.
+# * You can set include_pybind11=false to add the include directory yourself,
+#   say from a submodule.
+#
+# Note:
+#   Sort input source files if you glob sources to ensure bit-for-bit
+#   reproducible builds (https://github.com/pybind/python_example/pull/53)
+
+ext_modules = [
+    Pybind11Extension(
+        "tinyobjloader",
+        sorted(["python/bindings.cc", "python/tiny_obj_loader.cc"]),
+        # Example: passing in the version to the compiled code
+        define_macros=[("VERSION_INFO", __version__)],
+        cxx_std=11,
+    ),
+]
+
+setup(
+    name="tinyobjloader",
+    packages=["python"],
+    # version=__version__,
+    author="Syoyo Fujita",
+    author_email="syoyo@lighttransport.com",
+    url="https://github.com/tinyobjloader/tinyobjloader",
+    # project_urls={
+    #    "Issue Tracker": "https://github.com/tinyobjloader/tinyobjloader/issues",
+    # },
+    description="Tiny but powerful Wavefront OBJ loader",
+    long_description_content_type="text/markdown",
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Science/Research",
+        "Intended Audience :: Manufacturing",
+        "Topic :: Artistic Software",
+        "Topic :: Multimedia :: Graphics :: 3D Modeling",
+        "Topic :: Scientific/Engineering :: Visualization",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3",
+    ],
+    ext_modules=ext_modules,
+    # extras_require={"test": "pytest"},
+    # Currently, build_ext only provides an optional "highest supported C++
+    # level" feature, but in the future it may provide more features.
+    # cmdclass={"build_ext": build_ext},
+    # zip_safe=False,
+    # python_requires=">=3.6",
+)
diff --git a/tests/Makefile b/tests/Makefile
index c6708232..4a557a4e 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,10 +1,11 @@
 .PHONY: clean
 
-CXX = clang++
+CXX ?= clang++
 CXXFLAGS ?= -g -O1
+EXTRA_CXXFLAGS ?= -std=c++11 -fsanitize=address
 
 tester: tester.cc ../tiny_obj_loader.h
-	$(CXX) $(CXXFLAGS) -fsanitize=address -o tester tester.cc
+	$(CXX) $(CXXFLAGS) $(EXTRA_CXXFLAGS) -o tester tester.cc
 
 all: tester
 
diff --git a/tests/python/README.md b/tests/python/README.md
new file mode 100644
index 00000000..543a238d
--- /dev/null
+++ b/tests/python/README.md
@@ -0,0 +1,12 @@
+# tinyobjloader Python tests
+
+This folder hosts a project for running the Python binding tests.
+
+## Development
+
+The tests require NumPy. To optimize CI install times, the uv.lock excludes
+NumPy, as for some Python versions, pinning a version would result in builds
+from source which are then discarded. To run the tests locally, after running
+`uv sync`, install into the venv a version of NumPy from the build matrix in
+`.github/workflows/python.yml`.
+
diff --git a/tests/python/pyproject.toml b/tests/python/pyproject.toml
new file mode 100644
index 00000000..8157856b
--- /dev/null
+++ b/tests/python/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "tinyobjloader-tests"
+version = "0.0.1"
+description = "Tests for tinyobjloader Python bindings"
+readme = "README.md"
+requires-python = ">=3.9"
+
+dependencies = ["pytest>=8.0", "black==22.10.0"]
+
+[build-system]
+requires = ["hatchling>=1.24"]
+build-backend = "hatchling.build"
diff --git a/tests/python/tinyobjloader_tests/__init__.py b/tests/python/tinyobjloader_tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/python/tinyobjloader_tests/loader.py b/tests/python/tinyobjloader_tests/loader.py
new file mode 100644
index 00000000..9d7b3dad
--- /dev/null
+++ b/tests/python/tinyobjloader_tests/loader.py
@@ -0,0 +1,33 @@
+from tinyobjloader import ObjReader, ObjReaderConfig
+
+
+class LoadException(Exception):
+    pass
+
+
+class Loader:
+    """
+    A light wrapper around ObjReader to provide a convenient interface for testing.
+    """
+
+    def __init__(self, triangulate):
+        self.reader = ObjReader()
+        config = ObjReaderConfig()
+        config.triangulate = triangulate
+        self.config = config
+
+    def load(self, mesh_path):
+        if not self.reader.ParseFromFile(mesh_path, self.config):
+            raise LoadException(self.reader.Error() or self.reader.Warning())
+
+    def loads(self, mesh_string):
+        if not self.reader.ParseFromString(mesh_string, "", self.config):
+            raise LoadException(self.reader.Error() or self.reader.Warning())
+
+    @property
+    def shapes(self):
+        return self.reader.GetShapes()
+
+    @property
+    def attrib(self):
+        return self.reader.GetAttrib()
diff --git a/tests/python/tinyobjloader_tests/test_loader.py b/tests/python/tinyobjloader_tests/test_loader.py
new file mode 100644
index 00000000..c9159ea1
--- /dev/null
+++ b/tests/python/tinyobjloader_tests/test_loader.py
@@ -0,0 +1,235 @@
+from pathlib import Path
+
+import numpy as np
+
+from .loader import Loader, LoadException
+
+MODELS_DIR = Path(__file__).parent.parent.parent.parent / "models"
+
+TWO_QUADS = """
+v 0 0 0
+v 0 0 0
+v 0 0 0
+v 0 0 0
+f 1 2 3 4
+v 46.367584 82.676086 8.867414
+v 46.524185 82.81955 8.825487
+v 46.59864 83.086678 8.88121
+v 46.461926 82.834091 8.953863
+f 5 6 7 8
+"""
+
+MIXED_ARITY = """
+v 0 1 1
+v 0 2 2
+v 0 3 3
+v 0 4 4
+v 0 5 5
+f 1 2 3 4
+f 1 4 5
+"""
+
+
+def test_numpy_face_vertices_two_quads():
+    """
+    Test for https://github.com/tinyobjloader/tinyobjloader/issues/400
+    """
+
+    # Set up.
+    loader = Loader(triangulate=False)
+    loader.loads(TWO_QUADS)
+
+    shapes = loader.shapes
+    assert len(shapes) == 1
+
+    # Confidence check.
+    (shape,) = shapes
+    expected_num_face_vertices = [4, 4]
+    assert shape.mesh.num_face_vertices == expected_num_face_vertices
+
+    # Test.
+    np.testing.assert_array_equal(shape.mesh.numpy_num_face_vertices(), expected_num_face_vertices)
+
+
+def test_numpy_face_vertices_two_quads_with_triangulate():
+    """
+    Test for https://github.com/tinyobjloader/tinyobjloader/issues/400
+    """
+
+    # Set up.
+    loader = Loader(triangulate=True)
+    loader.loads(TWO_QUADS)
+
+    shapes = loader.shapes
+    assert len(shapes) == 1
+
+    # Confidence check.
+    (shape,) = shapes
+    expected_num_face_vertices = [3, 3, 3, 3]
+    assert shape.mesh.num_face_vertices == expected_num_face_vertices
+
+    # Test.
+    np.testing.assert_array_equal(shape.mesh.numpy_num_face_vertices(), expected_num_face_vertices)
+
+
+def test_numpy_face_vertices_mixed_arity():
+    """
+    Test for:
+      - https://github.com/tinyobjloader/tinyobjloader/issues/400
+      - https://github.com/tinyobjloader/tinyobjloader/issues/402
+    """
+
+    # Set up.
+    loader = Loader(triangulate=False)
+    loader.loads(MIXED_ARITY)
+
+    shapes = loader.shapes
+    assert len(shapes) == 1
+
+    # Confidence check.
+    (shape,) = shapes
+    expected_num_face_vertices = [4, 3]
+    assert shape.mesh.num_face_vertices == expected_num_face_vertices
+
+    # Test.
+    np.testing.assert_array_equal(shape.mesh.numpy_num_face_vertices(), expected_num_face_vertices)
+
+
+def test_numpy_face_vertices_mixed_arity_with_triangulate():
+    """
+    Test for https://github.com/tinyobjloader/tinyobjloader/issues/400
+    """
+
+    # Set up.
+    loader = Loader(triangulate=True)
+    loader.loads(MIXED_ARITY)
+
+    shapes = loader.shapes
+    assert len(shapes) == 1
+
+    # Confidence check.
+    (shape,) = shapes
+    expected_num_face_vertices = [3, 3, 3]
+    assert shape.mesh.num_face_vertices == expected_num_face_vertices
+
+    # Test.
+    np.testing.assert_array_equal(shape.mesh.numpy_num_face_vertices(), expected_num_face_vertices)
+
+
+def test_numpy_index_array_two_quads():
+    """
+    Test for https://github.com/tinyobjloader/tinyobjloader/issues/401
+    """
+
+    # Set up.
+    loader = Loader(triangulate=False)
+    loader.loads(TWO_QUADS)
+
+    shapes = loader.shapes
+    assert len(shapes) == 1
+
+    # Confidence check.
+    (shape,) = shapes
+    expected_vertex_index = [0, 1, 2, 3, 4, 5, 6, 7]
+    assert [x.vertex_index for x in shape.mesh.indices] == expected_vertex_index
+
+    # Test.
+    expected_numpy_indices = [
+        0,
+        -1,
+        -1,
+        1,
+        -1,
+        -1,
+        2,
+        -1,
+        -1,
+        3,
+        -1,
+        -1,
+        4,
+        -1,
+        -1,
+        5,
+        -1,
+        -1,
+        6,
+        -1,
+        -1,
+        7,
+        -1,
+        -1,
+    ]
+    np.testing.assert_array_equal(shape.mesh.numpy_indices(), expected_numpy_indices)
+
+
+def test_numpy_vertex_array_two_quads():
+    """
+    Test for https://github.com/tinyobjloader/tinyobjloader/issues/401
+    """
+
+    # Set up.
+    loader = Loader(triangulate=False)
+    loader.loads(TWO_QUADS)
+
+    # Confidence check.
+    expected_vertices = [
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+        46.367584,
+        82.676086,
+        8.867414,
+        46.524185,
+        82.81955,
+        8.825487,
+        46.59864,
+        83.086678,
+        8.88121,
+        46.461926,
+        82.834091,
+        8.953863,
+    ]
+    np.testing.assert_array_almost_equal(loader.attrib.vertices, expected_vertices, decimal=6)
+
+    # Test.
+    np.testing.assert_array_almost_equal(loader.attrib.numpy_vertices(), expected_vertices, decimal=6)
+
+
+def test_numpy_num_face_vertices_from_file():
+    """
+    Regression test for https://github.com/tinyobjloader/tinyobjloader/issues/400
+
+    Loads a mixed quad/triangle mesh from a .obj file and checks that
+    numpy_num_face_vertices() returns correct unsigned int values.
+    With the bug (unsigned char element type), values were read as zeros.
+    """
+
+    # Set up.
+    obj_path = str(MODELS_DIR / "issue-400-num-face-vertices.obj")
+    loader = Loader(triangulate=False)
+    loader.load(obj_path)
+
+    shapes = loader.shapes
+    assert len(shapes) == 1
+
+    (shape,) = shapes
+    # The file has one quad face (4 vertices) and two triangle faces (3 vertices each).
+    expected_num_face_vertices = [4, 3, 3]
+
+    # Confidence check using the non-numpy accessor.
+    assert shape.mesh.num_face_vertices == expected_num_face_vertices
+
+    # Test: numpy_num_face_vertices() must return the same values with the correct dtype.
+    result = shape.mesh.numpy_num_face_vertices()
+    np.testing.assert_array_equal(result, expected_num_face_vertices)
+    assert result.dtype == np.dtype("uint32")
diff --git a/tests/python/uv.lock b/tests/python/uv.lock
new file mode 100644
index 00000000..98022816
--- /dev/null
+++ b/tests/python/uv.lock
@@ -0,0 +1,303 @@
+version = 1
+revision = 3
+requires-python = ">=3.9"
+resolution-markers = [
+    "python_full_version >= '3.10'",
+    "python_full_version < '3.10'",
+]
+
+[[package]]
+name = "black"
+version = "22.10.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "click", version = "8.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "platformdirs", version = "4.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "platformdirs", version = "4.9.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a3/89/629fca2eea0899c06befaa58dc0f49d56807d454202bb2e54bd0d98c77f3/black-22.10.0.tar.gz", hash = "sha256:f513588da599943e0cde4e32cc9879e825d58720d6557062d1098c5ad80080e1", size = 547735, upload-time = "2022-10-06T22:44:48.253Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ae/49/ea03c318a25be359b8e5178a359d47e2da8f7524e1522c74b8f74c66b6f8/black-22.10.0-1fixedarch-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:5cc42ca67989e9c3cf859e84c2bf014f6633db63d1cbdf8fdb666dcd9e77e3fa", size = 1413786, upload-time = "2022-10-07T18:06:56.738Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/84/5c3f3ffc4143fa7e208d745d2239d915e74d3709fdbc64c3e98d3fd27e56/black-22.10.0-1fixedarch-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:5d8f74030e67087b219b032aa33a919fae8806d49c867846bfacde57f43972ef", size = 1395367, upload-time = "2022-10-07T18:07:10.109Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/23/f4278377cabf882298b4766e977fd04377f288d1ccef706953076a1e0598/black-22.10.0-1fixedarch-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:e41a86c6c650bcecc6633ee3180d80a025db041a8e2398dcc059b3afa8382cd4", size = 1412948, upload-time = "2022-10-07T18:06:45.929Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/11/f2737cd3b458d91401801e83a014e87c63e8904dc063200f77826c352f54/black-22.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2039230db3c6c639bd84efe3292ec7b06e9214a2992cd9beb293d639c6402edb", size = 1248864, upload-time = "2022-10-07T18:34:56.303Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/5f/9cfc6dd95965f8df30194472543e6f0515a10d78ea5378426ef1546735c7/black-22.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14ff67aec0a47c424bc99b71005202045dc09270da44a27848d534600ac64fc7", size = 1542985, upload-time = "2022-10-06T22:54:23.32Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/ce/22281871536b3d79474fd44d48dad48f7cbc5c3982bddf6a7495e7079d00/black-22.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:819dc789f4498ecc91438a7de64427c73b45035e2e3680c92e18795a839ebb66", size = 1198188, upload-time = "2022-10-06T22:58:56.509Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/2f/a8406a9e337a213802aa90a3e9fbf90c86f3edce92f527255fd381309b77/black-22.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5b9b29da4f564ba8787c119f37d174f2b69cdfdf9015b7d8c5c16121ddc054ae", size = 1233231, upload-time = "2022-10-07T18:35:05.895Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/9e/fa912c5ae4b8eb6d36982fc8ac2d779cf944dbd7c3c1fe7a28acf462c1ed/black-22.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8b49776299fece66bffaafe357d929ca9451450f5466e997a7285ab0fe28e3b", size = 1527386, upload-time = "2022-10-06T22:54:25.636Z" },
+    { url = "https://files.pythonhosted.org/packages/56/df/913d71817c7034edba25d596c54f782c2f809b6af30367d2f00309e8890a/black-22.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:21199526696b8f09c3997e2b4db8d0b108d801a348414264d2eb8eb2532e540d", size = 1201344, upload-time = "2022-10-06T22:58:58.134Z" },
+    { url = "https://files.pythonhosted.org/packages/69/84/903cdf41514088d5a716538cb189c471ab34e56ae9a1c2da6b8bfe8e4dbf/black-22.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:974308c58d057a651d182208a484ce80a26dac0caef2895836a92dd6ebd725e0", size = 1248291, upload-time = "2022-10-07T18:34:48.48Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/51/403b0b0eb9fb412ca02b79dc38472469f2f88c9aacc6bb5262143e4ff0bc/black-22.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72ef3925f30e12a184889aac03d77d031056860ccae8a1e519f6cbb742736383", size = 1542631, upload-time = "2022-10-06T22:54:31.965Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/15/61119d166a44699827c112d7c4726421f14323c2cb7aa9f4c26628f237f9/black-22.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:432247333090c8c5366e69627ccb363bc58514ae3e63f7fc75c54b1ea80fa7de", size = 1197402, upload-time = "2022-10-06T22:59:03.766Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/6f/74492b8852ee4f2ad2178178f6b65bc8fc80ad539abe56c1c23eab6732e2/black-22.10.0-py3-none-any.whl", hash = "sha256:c957b2b4ea88587b46cf49d1dc17681c1e672864fd7af32fc1e9664d572b3458", size = 165761, upload-time = "2022-10-06T22:44:46.108Z" },
+]
+
+[[package]]
+name = "click"
+version = "8.1.8"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" },
+]
+
+[[package]]
+name = "click"
+version = "8.3.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+dependencies = [
+    { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "26.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
+]
+
+[[package]]
+name = "pathspec"
+version = "1.0.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fa/36/e27608899f9b8d4dff0617b2d9ab17ca5608956ca44461ac14ac48b44015/pathspec-1.0.4.tar.gz", hash = "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", size = 131200, upload-time = "2026-01-27T03:59:46.938Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" },
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.4.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/23/e8/21db9c9987b0e728855bd57bff6984f67952bea55d6f75e055c46b5383e8/platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf", size = 21634, upload-time = "2025-08-26T14:32:04.268Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/4b/2028861e724d3bd36227adfa20d3fd24c3fc6d52032f4a93c133be5d17ce/platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", size = 18654, upload-time = "2025-08-26T14:32:02.735Z" },
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.9.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1b/04/fea538adf7dbbd6d186f551d595961e564a3b6715bdf276b477460858672/platformdirs-4.9.2.tar.gz", hash = "sha256:9a33809944b9db043ad67ca0db94b14bf452cc6aeaac46a88ea55b26e2e9d291", size = 28394, upload-time = "2026-02-16T03:56:10.574Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/48/31/05e764397056194206169869b50cf2fee4dbbbc71b344705b9c0d878d4d8/platformdirs-4.9.2-py3-none-any.whl", hash = "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd", size = 21168, upload-time = "2026-02-16T03:56:08.891Z" },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "8.4.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.10'" },
+    { name = "iniconfig", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "packaging", marker = "python_full_version < '3.10'" },
+    { name = "pluggy", marker = "python_full_version < '3.10'" },
+    { name = "pygments", marker = "python_full_version < '3.10'" },
+    { name = "tomli", marker = "python_full_version < '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "9.0.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+dependencies = [
+    { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version == '3.10.*'" },
+    { name = "iniconfig", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "packaging", marker = "python_full_version >= '3.10'" },
+    { name = "pluggy", marker = "python_full_version >= '3.10'" },
+    { name = "pygments", marker = "python_full_version >= '3.10'" },
+    { name = "tomli", marker = "python_full_version == '3.10.*'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+]
+
+[[package]]
+name = "tinyobjloader-tests"
+version = "0.0.1"
+source = { editable = "." }
+dependencies = [
+    { name = "black" },
+    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "pytest", version = "9.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "black", specifier = "==22.10.0" },
+    { name = "pytest", specifier = ">=8.0" },
+]
+
+[[package]]
+name = "tomli"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/30/31573e9457673ab10aa432461bee537ce6cef177667deca369efb79df071/tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c", size = 17477, upload-time = "2026-01-11T11:22:38.165Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/d9/3dc2289e1f3b32eb19b9785b6a006b28ee99acb37d1d47f78d4c10e28bf8/tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867", size = 153663, upload-time = "2026-01-11T11:21:45.27Z" },
+    { url = "https://files.pythonhosted.org/packages/51/32/ef9f6845e6b9ca392cd3f64f9ec185cc6f09f0a2df3db08cbe8809d1d435/tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9", size = 148469, upload-time = "2026-01-11T11:21:46.873Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/c2/506e44cce89a8b1b1e047d64bd495c22c9f71f21e05f380f1a950dd9c217/tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95", size = 236039, upload-time = "2026-01-11T11:21:48.503Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/40/e1b65986dbc861b7e986e8ec394598187fa8aee85b1650b01dd925ca0be8/tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76", size = 243007, upload-time = "2026-01-11T11:21:49.456Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/6f/6e39ce66b58a5b7ae572a0f4352ff40c71e8573633deda43f6a379d56b3e/tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d", size = 240875, upload-time = "2026-01-11T11:21:50.755Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/ad/cb089cb190487caa80204d503c7fd0f4d443f90b95cf4ef5cf5aa0f439b0/tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576", size = 246271, upload-time = "2026-01-11T11:21:51.81Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/63/69125220e47fd7a3a27fd0de0c6398c89432fec41bc739823bcc66506af6/tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a", size = 96770, upload-time = "2026-01-11T11:21:52.647Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/0d/a22bb6c83f83386b0008425a6cd1fa1c14b5f3dd4bad05e98cf3dbbf4a64/tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa", size = 107626, upload-time = "2026-01-11T11:21:53.459Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/6d/77be674a3485e75cacbf2ddba2b146911477bd887dda9d8c9dfb2f15e871/tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614", size = 94842, upload-time = "2026-01-11T11:21:54.831Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/43/7389a1869f2f26dba52404e1ef13b4784b6b37dac93bac53457e3ff24ca3/tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1", size = 154894, upload-time = "2026-01-11T11:21:56.07Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/05/2f9bf110b5294132b2edf13fe6ca6ae456204f3d749f623307cbb7a946f2/tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8", size = 149053, upload-time = "2026-01-11T11:21:57.467Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/41/1eda3ca1abc6f6154a8db4d714a4d35c4ad90adc0bcf700657291593fbf3/tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a", size = 243481, upload-time = "2026-01-11T11:21:58.661Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/6d/02ff5ab6c8868b41e7d4b987ce2b5f6a51d3335a70aa144edd999e055a01/tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1", size = 251720, upload-time = "2026-01-11T11:22:00.178Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/57/0405c59a909c45d5b6f146107c6d997825aa87568b042042f7a9c0afed34/tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b", size = 247014, upload-time = "2026-01-11T11:22:01.238Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/0e/2e37568edd944b4165735687cbaf2fe3648129e440c26d02223672ee0630/tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51", size = 251820, upload-time = "2026-01-11T11:22:02.727Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/1c/ee3b707fdac82aeeb92d1a113f803cf6d0f37bdca0849cb489553e1f417a/tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729", size = 97712, upload-time = "2026-01-11T11:22:03.777Z" },
+    { url = "https://files.pythonhosted.org/packages/69/13/c07a9177d0b3bab7913299b9278845fc6eaaca14a02667c6be0b0a2270c8/tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da", size = 108296, upload-time = "2026-01-11T11:22:04.86Z" },
+    { url = "https://files.pythonhosted.org/packages/18/27/e267a60bbeeee343bcc279bb9e8fbed0cbe224bc7b2a3dc2975f22809a09/tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3", size = 94553, upload-time = "2026-01-11T11:22:05.854Z" },
+    { url = "https://files.pythonhosted.org/packages/34/91/7f65f9809f2936e1f4ce6268ae1903074563603b2a2bd969ebbda802744f/tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0", size = 154915, upload-time = "2026-01-11T11:22:06.703Z" },
+    { url = "https://files.pythonhosted.org/packages/20/aa/64dd73a5a849c2e8f216b755599c511badde80e91e9bc2271baa7b2cdbb1/tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e", size = 149038, upload-time = "2026-01-11T11:22:07.56Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8a/6d38870bd3d52c8d1505ce054469a73f73a0fe62c0eaf5dddf61447e32fa/tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4", size = 242245, upload-time = "2026-01-11T11:22:08.344Z" },
+    { url = "https://files.pythonhosted.org/packages/59/bb/8002fadefb64ab2669e5b977df3f5e444febea60e717e755b38bb7c41029/tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e", size = 250335, upload-time = "2026-01-11T11:22:09.951Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/3d/4cdb6f791682b2ea916af2de96121b3cb1284d7c203d97d92d6003e91c8d/tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c", size = 245962, upload-time = "2026-01-11T11:22:11.27Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/4a/5f25789f9a460bd858ba9756ff52d0830d825b458e13f754952dd15fb7bb/tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f", size = 250396, upload-time = "2026-01-11T11:22:12.325Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/2f/b73a36fea58dfa08e8b3a268750e6853a6aac2a349241a905ebd86f3047a/tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86", size = 97530, upload-time = "2026-01-11T11:22:13.865Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/af/ca18c134b5d75de7e8dc551c5234eaba2e8e951f6b30139599b53de9c187/tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87", size = 108227, upload-time = "2026-01-11T11:22:15.224Z" },
+    { url = "https://files.pythonhosted.org/packages/22/c3/b386b832f209fee8073c8138ec50f27b4460db2fdae9ffe022df89a57f9b/tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132", size = 94748, upload-time = "2026-01-11T11:22:16.009Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/c4/84047a97eb1004418bc10bdbcfebda209fca6338002eba2dc27cc6d13563/tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6", size = 154725, upload-time = "2026-01-11T11:22:17.269Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/5d/d39038e646060b9d76274078cddf146ced86dc2b9e8bbf737ad5983609a0/tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc", size = 148901, upload-time = "2026-01-11T11:22:18.287Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e5/383be1724cb30f4ce44983d249645684a48c435e1cd4f8b5cded8a816d3c/tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66", size = 243375, upload-time = "2026-01-11T11:22:19.154Z" },
+    { url = "https://files.pythonhosted.org/packages/31/f0/bea80c17971c8d16d3cc109dc3585b0f2ce1036b5f4a8a183789023574f2/tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d", size = 250639, upload-time = "2026-01-11T11:22:20.168Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/8f/2853c36abbb7608e3f945d8a74e32ed3a74ee3a1f468f1ffc7d1cb3abba6/tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702", size = 246897, upload-time = "2026-01-11T11:22:21.544Z" },
+    { url = "https://files.pythonhosted.org/packages/49/f0/6c05e3196ed5337b9fe7ea003e95fd3819a840b7a0f2bf5a408ef1dad8ed/tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8", size = 254697, upload-time = "2026-01-11T11:22:23.058Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/f5/2922ef29c9f2951883525def7429967fc4d8208494e5ab524234f06b688b/tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776", size = 98567, upload-time = "2026-01-11T11:22:24.033Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/31/22b52e2e06dd2a5fdbc3ee73226d763b184ff21fc24e20316a44ccc4d96b/tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475", size = 108556, upload-time = "2026-01-11T11:22:25.378Z" },
+    { url = "https://files.pythonhosted.org/packages/48/3d/5058dff3255a3d01b705413f64f4306a141a8fd7a251e5a495e3f192a998/tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2", size = 96014, upload-time = "2026-01-11T11:22:26.138Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/4e/75dab8586e268424202d3a1997ef6014919c941b50642a1682df43204c22/tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9", size = 163339, upload-time = "2026-01-11T11:22:27.143Z" },
+    { url = "https://files.pythonhosted.org/packages/06/e3/b904d9ab1016829a776d97f163f183a48be6a4deb87304d1e0116a349519/tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0", size = 159490, upload-time = "2026-01-11T11:22:28.399Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/5a/fc3622c8b1ad823e8ea98a35e3c632ee316d48f66f80f9708ceb4f2a0322/tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df", size = 269398, upload-time = "2026-01-11T11:22:29.345Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/33/62bd6152c8bdd4c305ad9faca48f51d3acb2df1f8791b1477d46ff86e7f8/tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d", size = 276515, upload-time = "2026-01-11T11:22:30.327Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/ff/ae53619499f5235ee4211e62a8d7982ba9e439a0fb4f2f351a93d67c1dd2/tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f", size = 273806, upload-time = "2026-01-11T11:22:32.56Z" },
+    { url = "https://files.pythonhosted.org/packages/47/71/cbca7787fa68d4d0a9f7072821980b39fbb1b6faeb5f5cf02f4a5559fa28/tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b", size = 281340, upload-time = "2026-01-11T11:22:33.505Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/00/d595c120963ad42474cf6ee7771ad0d0e8a49d0f01e29576ee9195d9ecdf/tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087", size = 108106, upload-time = "2026-01-11T11:22:34.451Z" },
+    { url = "https://files.pythonhosted.org/packages/de/69/9aa0c6a505c2f80e519b43764f8b4ba93b5a0bbd2d9a9de6e2b24271b9a5/tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd", size = 120504, upload-time = "2026-01-11T11:22:35.764Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/9f/f1668c281c58cfae01482f7114a4b88d345e4c140386241a1a24dcc9e7bc/tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4", size = 99561, upload-time = "2026-01-11T11:22:36.624Z" },
+    { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
diff --git a/tests/tester.cc b/tests/tester.cc
index 7538c740..9709d1c5 100644
--- a/tests/tester.cc
+++ b/tests/tester.cc
@@ -1,4 +1,5 @@
 #define TINYOBJLOADER_IMPLEMENTATION
+#define TINYOBJLOADER_STREAM_READER_MAX_BYTES (size_t(8) * size_t(1024) * size_t(1024))
 #include "../tiny_obj_loader.h"
 
 #if defined(__clang__)
@@ -29,6 +30,29 @@
 #include <fstream>
 #include <iostream>
 #include <sstream>
+#include <string>
+
+#ifdef _WIN32
+#include <direct.h>    // _mkdir
+#include <windows.h>   // GetTempPathW, CreateDirectoryW, RegOpenKeyExA
+#include <winreg.h>    // registry constants
+#pragma comment(lib, "Advapi32.lib")  // RegOpenKeyExA, RegQueryValueExA, RegCloseKey
+
+// Converts a UTF-16 wide string to a UTF-8 std::string.
+static std::string WcharToUTF8(const std::wstring &wstr) {
+  if (wstr.empty()) return std::string();
+  int len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1,
+                                NULL, 0, NULL, NULL);
+  if (len <= 0) return std::string();
+  std::string str(static_cast<size_t>(len), '\0');
+  WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, &str[0], len, NULL, NULL);
+  str.resize(static_cast<size_t>(len - 1));  // trim terminating '\0'
+  return str;
+}
+#else
+#include <cerrno>
+#include <sys/stat.h>  // mkdir
+#endif
 
 template <typename T>
 static bool FloatEquals(const T& a, const T& b) {
@@ -399,6 +423,190 @@ static bool TestStreamLoadObj() {
 
 const char* gMtlBasePath = "../models/";
 
+// ---------------------------------------------------------------------------
+// Helpers for path-related tests
+// ---------------------------------------------------------------------------
+
+// Creates a single directory level. Returns true on success or if it already exists.
+static bool MakeDir(const std::string& path) {
+#ifdef _WIN32
+  // Use the wide-character API so that paths with non-ASCII characters work.
+  std::wstring wpath = UTF8ToWchar(path);
+  if (wpath.empty()) return false;
+  if (CreateDirectoryW(wpath.c_str(), NULL) != 0) return true;
+  return GetLastError() == ERROR_ALREADY_EXISTS;
+#else
+  return mkdir(path.c_str(), 0755) == 0 || errno == EEXIST;
+#endif
+}
+
+// Removes a directory and all its contents.
+// NOTE: All callers pass paths that are fully constructed within this test
+// file from hardcoded string literals, so there is no user-controlled input
+// that could be used for command injection.
+static void RemoveTestDir(const std::string& path) {
+#ifdef _WIN32
+  std::string cmd = "rd /s /q \"" + path + "\"";
+#else
+  std::string cmd = "rm -rf '" + path + "'";
+#endif
+  if (system(cmd.c_str()) != 0) { /* cleanup failure is non-fatal */ }
+}
+
+// Copies a file in binary mode. The destination path is taken as UTF-8.
+// On Windows, LongPathW(UTF8ToWchar()) is used so that long paths (> MAX_PATH)
+// are handled, exercising the same conversion that tinyobjloader itself uses.
+static bool CopyTestFile(const std::string& src, const std::string& dst) {
+  std::ifstream in(src.c_str(), std::ios::binary);
+  if (!in) return false;
+#ifdef _WIN32
+  // Apply long-path prefix so that the copy works even for paths > MAX_PATH.
+  std::ofstream out(LongPathW(UTF8ToWchar(dst)).c_str(), std::ios::binary);
+#else
+  std::ofstream out(dst.c_str(), std::ios::binary);
+#endif
+  if (!out) return false;
+  out << in.rdbuf();
+  return !out.fail();
+}
+
+#ifdef _WIN32
+// Returns true if Windows has the system-wide long path support enabled
+// (HKLM\SYSTEM\CurrentControlSet\Control\FileSystem\LongPathsEnabled = 1).
+static bool IsWindowsLongPathEnabled() {
+  HKEY hKey;
+  DWORD value = 0;
+  DWORD size = sizeof(DWORD);
+  if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
+                    "SYSTEM\\CurrentControlSet\\Control\\FileSystem", 0,
+                    KEY_READ, &hKey) == ERROR_SUCCESS) {
+    RegQueryValueExA(hKey, "LongPathsEnabled", NULL, NULL,
+                     reinterpret_cast<LPBYTE>(&value), &size);
+    RegCloseKey(hKey);
+  }
+  return value != 0;
+}
+#endif  // _WIN32
+
+// ---------------------------------------------------------------------------
+// Path-related tests
+// ---------------------------------------------------------------------------
+
+// Test: load .obj/.mtl from a directory path containing UTF-8 non-ASCII
+// characters. On Windows our code converts the UTF-8 path to UTF-16 before
+// calling the file API. On Linux, UTF-8 paths are handled natively.
+void test_load_obj_from_utf8_path() {
+  // Build a temp directory name that contains the UTF-8 encoded character é
+  // (U+00E9, encoded as \xC3\xA9 in UTF-8).
+#ifdef _WIN32
+  wchar_t wtmpbuf[MAX_PATH];
+  GetTempPathW(MAX_PATH, wtmpbuf);
+  std::string test_dir =
+      WcharToUTF8(wtmpbuf) + "tinyobj_utf8_\xc3\xa9_test\\";
+#else
+  std::string test_dir = "/tmp/tinyobj_utf8_\xc3\xa9_test/";
+#endif
+
+  if (!MakeDir(test_dir)) {
+    std::cout << "SKIPPED: Cannot create Unicode temp directory: " << test_dir
+              << "\n";
+    return;
+  }
+
+  const std::string obj_dst = test_dir + "utf8-path-test.obj";
+  const std::string mtl_dst = test_dir + "utf8-path-test.mtl";
+
+  if (!CopyTestFile("../models/utf8-path-test.obj", obj_dst) ||
+      !CopyTestFile("../models/utf8-path-test.mtl", mtl_dst)) {
+    RemoveTestDir(test_dir);
+    TEST_CHECK_(false, "Failed to copy test files to Unicode temp directory");
+    return;
+  }
+
+  tinyobj::ObjReader reader;
+  bool ret = reader.ParseFromFile(obj_dst);
+
+  RemoveTestDir(test_dir);
+
+  if (!reader.Warning().empty())
+    std::cout << "WARN: " << reader.Warning() << "\n";
+  if (!reader.Error().empty())
+    std::cerr << "ERR: " << reader.Error() << "\n";
+
+  TEST_CHECK(ret == true);
+  TEST_CHECK(reader.GetShapes().size() == 1);
+  TEST_CHECK(reader.GetMaterials().size() == 1);
+}
+
+// Test: load .obj/.mtl from a path whose total length exceeds MAX_PATH (260).
+// On Windows, tinyobjloader prepends the \\?\ extended-length path prefix so
+// that the file can be opened even on systems that have the OS-wide long path
+// support enabled. The test is skipped when that support is not active.
+// On Linux, long paths work natively; this test verifies no regression.
+void test_load_obj_from_long_path() {
+#ifdef _WIN32
+  if (!IsWindowsLongPathEnabled()) {
+    std::cout
+        << "SKIPPED: Windows long path support (LongPathsEnabled) is not "
+           "enabled\n";
+    return;
+  }
+  wchar_t wtmpbuf[MAX_PATH];
+  GetTempPathW(MAX_PATH, wtmpbuf);
+  std::string base = WcharToUTF8(wtmpbuf);  // e.g. "C:\Users\...\Temp\"
+  const char path_sep = '\\';
+#else
+  std::string base = "/tmp/";
+  const char path_sep = '/';
+#endif
+
+  // Create a two-level directory where the deepest directory name is 250
+  // characters long.  Combined with the base path and the filename
+  // "utf8-path-test.obj" (18 chars) the total file path comfortably exceeds
+  // MAX_PATH (260) on all supported platforms.
+  std::string test_root = base + "tinyobj_lp_test" + path_sep;
+  std::string long_subdir = test_root + std::string(250, 'a') + path_sep;
+  std::string obj_path = long_subdir + "utf8-path-test.obj";
+
+  // obj_path must exceed MAX_PATH for the test to be meaningful.
+  // (On a typical Windows installation it is ~320 chars; on Linux ~287 chars.)
+  if (obj_path.size() <= 260) {
+    std::cout << "SKIPPED: generated path (" << obj_path.size()
+              << " chars) does not exceed MAX_PATH=260\n";
+    return;
+  }
+
+  if (!MakeDir(test_root) || !MakeDir(long_subdir)) {
+    RemoveTestDir(test_root);
+    std::cout << "SKIPPED: Cannot create long-path temp directory: "
+              << long_subdir << "\n";
+    return;
+  }
+
+  if (!CopyTestFile("../models/utf8-path-test.obj",
+                    long_subdir + "utf8-path-test.obj") ||
+      !CopyTestFile("../models/utf8-path-test.mtl",
+                    long_subdir + "utf8-path-test.mtl")) {
+    RemoveTestDir(test_root);
+    TEST_CHECK_(false, "Failed to copy test files to long-path directory");
+    return;
+  }
+
+  tinyobj::ObjReader reader;
+  bool ret = reader.ParseFromFile(obj_path);
+
+  RemoveTestDir(test_root);
+
+  if (!reader.Warning().empty())
+    std::cout << "WARN: " << reader.Warning() << "\n";
+  if (!reader.Error().empty())
+    std::cerr << "ERR: " << reader.Error() << "\n";
+
+  TEST_CHECK(ret == true);
+  TEST_CHECK(reader.GetShapes().size() == 1);
+  TEST_CHECK(reader.GetMaterials().size() == 1);
+}
+
 void test_cornell_box() {
   TEST_CHECK(true == TestLoadObj("../models/cornell_box.obj", gMtlBasePath));
 }
@@ -426,6 +634,13 @@ void test_catmark_torus_creases0() {
 
   TEST_CHECK(1 == shapes.size());
   TEST_CHECK(8 == shapes[0].mesh.tags.size());
+  TEST_CHECK(std::string("crease") == shapes[0].mesh.tags[0].name);
+  TEST_CHECK(2 == shapes[0].mesh.tags[0].intValues.size());
+  TEST_CHECK(1 == shapes[0].mesh.tags[0].floatValues.size());
+  TEST_CHECK(0 == shapes[0].mesh.tags[0].stringValues.size());
+  TEST_CHECK(1 == shapes[0].mesh.tags[0].intValues[0]);
+  TEST_CHECK(5 == shapes[0].mesh.tags[0].intValues[1]);
+  TEST_CHECK(FloatEquals(4.7f, shapes[0].mesh.tags[0].floatValues[0]));
 }
 
 void test_pbr() {
@@ -858,6 +1073,50 @@ void test_zero_face_idx_value_issue140() {
   TEST_CHECK(!err.empty());
 }
 
+void test_invalid_relative_vertex_index() {
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret =
+      tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                       "../models/invalid-relative-vertex-index.obj", gMtlBasePath);
+
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+  TEST_CHECK(false == ret);
+  TEST_CHECK(!err.empty());
+}
+
+void test_invalid_texture_vertex_index() {
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret =
+      tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                       "../models/invalid-relative-texture-index.obj", gMtlBasePath);
+
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+  TEST_CHECK(false == ret);
+  TEST_CHECK(!err.empty());
+}
+
 void test_texture_name_whitespace_issue145() {
   tinyobj::attrib_t attrib;
   std::vector<tinyobj::shape_t> shapes;
@@ -1308,6 +1567,1736 @@ void test_texres_texopt_issue248() {
   TEST_CHECK("input.jpg" == materials[0].diffuse_texname);
 }
 
+void test_mtl_filename_with_whitespace_issue46() {
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret =
+      tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                       "../models/mtl filename with whitespace issue46.obj",
+                       gMtlBasePath);
+
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+  TEST_CHECK(true == ret);
+  TEST_CHECK(1 == materials.size());
+  TEST_CHECK("green" == materials[0].name);
+}
+
+void test_face_missing_issue295() {
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(
+      &attrib, &shapes, &materials, &warn, &err,
+      "../models/issue-295-trianguation-failure.obj",
+      gMtlBasePath, /* triangualte */true);
+
+  TEST_CHECK(warn.empty());
+
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+
+  TEST_CHECK(true == ret);
+  TEST_CHECK(1 == shapes.size());
+
+  // 14 quad faces are triangulated into 28 triangles.
+  TEST_CHECK(28 == shapes[0].mesh.num_face_vertices.size());
+  TEST_CHECK(28 == shapes[0].mesh.smoothing_group_ids.size());
+  TEST_CHECK(28 == shapes[0].mesh.material_ids.size());
+  TEST_CHECK((3 * 28) == shapes[0].mesh.indices.size()); // 28 triangle faces x 3
+}
+
+void test_comment_issue389() {
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(
+      &attrib, &shapes, &materials, &warn, &err,
+      "../models/issue-389-comment.obj",
+      gMtlBasePath, /* triangualte */false);
+
+  TEST_CHECK(warn.empty());
+
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+
+  TEST_CHECK(true == ret);
+}
+
+void test_default_kd_for_multiple_materials_issue391() {
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              "../models/issue-391.obj", gMtlBasePath);
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+
+  const tinyobj::real_t kGrey[] = {0.6, 0.6, 0.6};
+  const tinyobj::real_t kRed[] = {1.0, 0.0, 0.0};
+
+  TEST_CHECK(true == ret);
+  TEST_CHECK(2 == materials.size());
+  for (size_t i = 0; i < materials.size(); ++i) {
+    const tinyobj::material_t& material = materials[i];
+    if (material.name == "has_map") {
+      for (int i = 0; i < 3; ++i) TEST_CHECK(material.diffuse[i] == kGrey[i]);
+    } else if (material.name == "has_kd") {
+      for (int i = 0; i < 3; ++i) TEST_CHECK(material.diffuse[i] == kRed[i]);
+    } else {
+      std::cerr << "Unexpected material found!" << std::endl;
+      TEST_CHECK(false);
+    }
+  }  
+}
+
+void test_removeUtf8Bom() {
+  // Basic input with BOM
+  std::string withBOM = "\xEF\xBB\xBFhello world";
+  TEST_CHECK(tinyobj::removeUtf8Bom(withBOM) == "hello world");
+
+  // Input without BOM
+  std::string noBOM = "hello world";
+  TEST_CHECK(tinyobj::removeUtf8Bom(noBOM) == "hello world");
+
+  // Leaves short string unchanged
+  std::string shortStr = "\xEF";
+  TEST_CHECK(tinyobj::removeUtf8Bom(shortStr) == shortStr);
+
+  std::string shortStr2 = "\xEF\xBB";
+  TEST_CHECK(tinyobj::removeUtf8Bom(shortStr2) == shortStr2);
+
+  // BOM only returns empty string
+  std::string justBom = "\xEF\xBB\xBF";
+  TEST_CHECK(tinyobj::removeUtf8Bom(justBom) == "");
+
+  // Empty string
+  std::string emptyStr = "";
+  TEST_CHECK(tinyobj::removeUtf8Bom(emptyStr) == "");
+}
+
+void test_loadObj_with_BOM() {
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              "../models/cube_w_BOM.obj", gMtlBasePath);
+
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+
+  TEST_CHECK(true == ret);
+  TEST_CHECK(6 == shapes.size());
+  TEST_CHECK(0 == shapes[0].name.compare("front cube"));
+  TEST_CHECK(0 == shapes[1].name.compare("back cube"));  // multiple whitespaces
+                                                         // are aggregated as
+                                                         // single white space.
+}
+
+
+void test_texcoord_w_component() {
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              "../models/texcoord-w.obj", gMtlBasePath,
+                              /*triangulate*/ false);
+
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+
+  TEST_CHECK(true == ret);
+  TEST_CHECK(4 == attrib.texcoords.size() / 2);    // 4 uv pairs
+  TEST_CHECK(4 == attrib.texcoord_ws.size());       // 4 w values
+  TEST_CHECK(FloatEquals(0.50f, attrib.texcoord_ws[0]));
+  TEST_CHECK(FloatEquals(0.25f, attrib.texcoord_ws[1]));
+  TEST_CHECK(FloatEquals(0.75f, attrib.texcoord_ws[2]));
+  TEST_CHECK(FloatEquals(0.00f, attrib.texcoord_ws[3]));
+}
+
+
+
+void test_texcoord_w_mixed_component() {
+  // Test a mix of vt lines with the optional w present and omitted.
+  // Lines without w should produce 0.0 in texcoord_ws.
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              "../models/texcoord-w-mixed.obj", gMtlBasePath,
+                              /*triangulate*/ false);
+
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+
+  TEST_CHECK(true == ret);
+  TEST_CHECK(4 == attrib.texcoords.size() / 2);    // 4 uv pairs
+  TEST_CHECK(4 == attrib.texcoord_ws.size());       // 4 w values (present or defaulted)
+  TEST_CHECK(FloatEquals(0.50f, attrib.texcoord_ws[0]));  // w present
+  TEST_CHECK(FloatEquals(0.00f, attrib.texcoord_ws[1]));  // w omitted -> 0.0
+  TEST_CHECK(FloatEquals(0.75f, attrib.texcoord_ws[2]));  // w present
+  TEST_CHECK(FloatEquals(0.00f, attrib.texcoord_ws[3]));  // w omitted -> 0.0
+}
+
+void test_loadObjWithCallback_with_BOM() {
+  // Verify that LoadObjWithCallback correctly strips a UTF-8 BOM from the
+  // first line, just as LoadObj and LoadMtl do.
+  // We reuse cube_w_BOM.obj which starts with 0xEF 0xBB 0xBF followed by
+  // "mtllib cube_w_BOM.mtl".  Without BOM stripping the mtllib line would
+  // not be recognised and no materials would be loaded; with BOM stripping
+  // all 8 vertices and 6 groups are parsed.
+
+  struct CallbackData {
+    int vertex_count;
+    int group_count;
+    int material_count;
+    CallbackData() : vertex_count(0), group_count(0), material_count(0) {}
+  };
+
+  CallbackData data;
+
+  tinyobj::callback_t cb;
+  cb.vertex_cb = [](void *user_data, tinyobj::real_t x, tinyobj::real_t y,
+                    tinyobj::real_t z, tinyobj::real_t w) {
+    reinterpret_cast<CallbackData *>(user_data)->vertex_count++;
+  };
+  cb.group_cb = [](void *user_data, const char **names, int num_names) {
+    if (num_names > 0)
+      reinterpret_cast<CallbackData *>(user_data)->group_count++;
+  };
+  cb.mtllib_cb = [](void *user_data, const tinyobj::material_t *materials,
+                    int num_materials) {
+    reinterpret_cast<CallbackData *>(user_data)->material_count +=
+        num_materials;
+  };
+
+  std::ifstream ifs("../models/cube_w_BOM.obj");
+  TEST_CHECK(ifs.is_open());
+
+  tinyobj::MaterialFileReader matReader(gMtlBasePath);
+  std::string warn, err;
+  bool ret = tinyobj::LoadObjWithCallback(ifs, cb, &data, &matReader, &warn, &err);
+
+  if (!warn.empty()) {
+    std::cout << "WARN: " << warn << std::endl;
+  }
+  if (!err.empty()) {
+    std::cerr << "ERR: " << err << std::endl;
+  }
+
+  TEST_CHECK(true == ret);
+  TEST_CHECK(8 == data.vertex_count);   // 8 vertices in cube_w_BOM.obj
+  TEST_CHECK(6 == data.group_count);    // 6 groups: front/back/right/top/left/bottom
+  TEST_CHECK(data.material_count > 0);  // materials loaded => mtllib line was parsed
+}
+
+void test_loadObjWithCallback_mtllib_failure_does_not_crash() {
+  // mtllib load failure should not crash callback path, and should report an
+  // error/warning while continuing OBJ parsing.
+  std::string obj_text = "mtllib test.mtl\nv 1 2 3\n";
+  std::istringstream obj_stream(obj_text);
+
+  std::string oversized_mtl(TINYOBJLOADER_STREAM_READER_MAX_BYTES + size_t(1), ' ');
+  std::istringstream mtl_stream(oversized_mtl);
+  tinyobj::MaterialStreamReader mtl_reader(mtl_stream);
+
+  struct CallbackData {
+    int vertex_count;
+    int mtllib_count;
+    CallbackData() : vertex_count(0), mtllib_count(0) {}
+  } data;
+
+  tinyobj::callback_t cb;
+  cb.vertex_cb = [](void *user_data, tinyobj::real_t, tinyobj::real_t,
+                    tinyobj::real_t, tinyobj::real_t) {
+    reinterpret_cast<CallbackData *>(user_data)->vertex_count++;
+  };
+  cb.mtllib_cb = [](void *user_data, const tinyobj::material_t *, int) {
+    reinterpret_cast<CallbackData *>(user_data)->mtllib_count++;
+  };
+
+  std::string warn, err;
+  bool ret = tinyobj::LoadObjWithCallback(obj_stream, cb, &data, &mtl_reader,
+                                          &warn, &err);
+
+  TEST_CHECK(ret == true);
+  TEST_CHECK(data.vertex_count == 1);
+  TEST_CHECK(data.mtllib_count == 0);
+  TEST_CHECK(warn.find("Failed to load material file(s)") != std::string::npos);
+  TEST_CHECK(err.find("input stream too large") != std::string::npos);
+}
+
+void test_mtllib_empty_filename_is_ignored_loadobj() {
+  std::string obj_text = "mtllib    \nv 1 2 3\n";
+  std::istringstream obj_stream(obj_text);
+
+  std::string mtl_text = "newmtl should_not_load\nKd 1 1 1\n";
+  std::istringstream mtl_stream(mtl_text);
+  tinyobj::MaterialStreamReader mtl_reader(mtl_stream);
+
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_stream, &mtl_reader);
+
+  TEST_CHECK(ret == true);
+  TEST_CHECK(materials.empty());
+  TEST_CHECK(warn.find("Looks like empty filename for mtllib") != std::string::npos);
+  TEST_CHECK(err.empty());
+}
+
+void test_mtllib_empty_filename_is_ignored_callback() {
+  std::string obj_text = "mtllib    \nv 1 2 3\n";
+  std::istringstream obj_stream(obj_text);
+
+  std::string mtl_text = "newmtl should_not_load\nKd 1 1 1\n";
+  std::istringstream mtl_stream(mtl_text);
+  tinyobj::MaterialStreamReader mtl_reader(mtl_stream);
+
+  struct CallbackData {
+    int vertex_count;
+    int mtllib_count;
+    CallbackData() : vertex_count(0), mtllib_count(0) {}
+  } data;
+
+  tinyobj::callback_t cb;
+  cb.vertex_cb = [](void *user_data, tinyobj::real_t, tinyobj::real_t,
+                    tinyobj::real_t, tinyobj::real_t) {
+    reinterpret_cast<CallbackData *>(user_data)->vertex_count++;
+  };
+  cb.mtllib_cb = [](void *user_data, const tinyobj::material_t *, int) {
+    reinterpret_cast<CallbackData *>(user_data)->mtllib_count++;
+  };
+
+  std::string warn, err;
+  bool ret = tinyobj::LoadObjWithCallback(obj_stream, cb, &data, &mtl_reader,
+                                          &warn, &err);
+
+  TEST_CHECK(ret == true);
+  TEST_CHECK(data.vertex_count == 1);
+  TEST_CHECK(data.mtllib_count == 0);
+  TEST_CHECK(warn.find("Looks like empty filename for mtllib") != std::string::npos);
+  TEST_CHECK(err.empty());
+}
+
+void test_usemtl_callback_trims_trailing_comment() {
+  std::string obj_text =
+      "mtllib test.mtl\n"
+      "usemtl mat   # trailing comment\n"
+      "v 0 0 0\n";
+  std::istringstream obj_stream(obj_text);
+
+  std::string mtl_text = "newmtl mat\nKd 1 1 1\n";
+  std::istringstream mtl_stream(mtl_text);
+  tinyobj::MaterialStreamReader mtl_reader(mtl_stream);
+
+  struct CallbackData {
+    int usemtl_count;
+    int last_material_id;
+    std::string last_name;
+    CallbackData() : usemtl_count(0), last_material_id(-1), last_name() {}
+  } data;
+
+  tinyobj::callback_t cb;
+  cb.usemtl_cb = [](void *user_data, const char *name, int material_id) {
+    CallbackData *d = reinterpret_cast<CallbackData *>(user_data);
+    d->usemtl_count++;
+    d->last_name = name ? name : "";
+    d->last_material_id = material_id;
+  };
+
+  std::string warn, err;
+  bool ret = tinyobj::LoadObjWithCallback(obj_stream, cb, &data, &mtl_reader,
+                                          &warn, &err);
+
+  TEST_CHECK(ret == true);
+  TEST_CHECK(data.usemtl_count == 1);
+  TEST_CHECK(data.last_name == "mat");
+  TEST_CHECK(data.last_material_id == 0);
+  TEST_CHECK(err.empty());
+}
+
+void test_tag_triple_huge_count_is_safely_rejected() {
+  std::string obj_text =
+      "v 0 0 0\n"
+      "v 1 0 0\n"
+      "v 0 1 0\n"
+      "f 1 2 3\n"
+      "t crease 999999999999999999999999999999999999999999999999999999999999999999/0/0\n";
+  std::istringstream obj_stream(obj_text);
+  std::string mtl_text;
+  std::istringstream mtl_stream(mtl_text);
+  tinyobj::MaterialStreamReader mtl_reader(mtl_stream);
+
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_stream, &mtl_reader);
+
+  TEST_CHECK(ret == true);
+  TEST_CHECK(shapes.size() == size_t(1));
+  TEST_CHECK(shapes[0].mesh.tags.size() == size_t(1));
+  TEST_CHECK(shapes[0].mesh.tags[0].intValues.size() == size_t(0));
+  TEST_CHECK(shapes[0].mesh.tags[0].floatValues.size() == size_t(0));
+  TEST_CHECK(shapes[0].mesh.tags[0].stringValues.size() == size_t(0));
+}
+
+
+
+// Verify that mmap-based loading (TINYOBJLOADER_USE_MMAP) produces the same
+// vertex/shape/material data as the standard ifstream-based path.
+void test_file_and_stream_load_agree() {
+  const char *obj_file = "../models/cornell_box.obj";
+
+  // Load using the file path API (uses mmap when TINYOBJLOADER_USE_MMAP is defined).
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              obj_file, gMtlBasePath);
+  if (!warn.empty()) std::cout << "WARN: " << warn << "\n";
+  if (!err.empty()) std::cerr << "ERR: " << err << "\n";
+  TEST_CHECK(ret == true);
+
+  // Also load via the stream API (always uses ifstream-equivalent path).
+  tinyobj::attrib_t attrib2;
+  std::vector<tinyobj::shape_t> shapes2;
+  std::vector<tinyobj::material_t> materials2;
+  std::string warn2, err2;
+  std::ifstream ifs(obj_file);
+  TEST_CHECK(ifs.good());
+  tinyobj::MaterialFileReader matReader(gMtlBasePath);
+  bool ret2 = tinyobj::LoadObj(&attrib2, &shapes2, &materials2, &warn2, &err2,
+                               &ifs, &matReader);
+  TEST_CHECK(ret2 == true);
+
+  // Compare results.
+  TEST_CHECK(attrib.vertices.size() == attrib2.vertices.size());
+  TEST_CHECK(attrib.normals.size() == attrib2.normals.size());
+  TEST_CHECK(shapes.size() == shapes2.size());
+  TEST_CHECK(materials.size() == materials2.size());
+  for (size_t i = 0; i < shapes.size(); i++) {
+    TEST_CHECK(shapes[i].mesh.indices.size() == shapes2[i].mesh.indices.size());
+  }
+}
+
+// Verify robustness: loading from a memory buffer (imemstream) is consistent
+// with standard file loading.
+void test_load_from_memory_buffer() {
+  const char *obj_file = "../models/cube.obj";
+
+  // Read file into memory manually.
+  std::ifstream file(obj_file, std::ios::binary | std::ios::ate);
+  TEST_CHECK(file.good());
+  std::streamsize sz = file.tellg();
+  file.seekg(0, std::ios::beg);
+  std::vector<char> buf(static_cast<size_t>(sz));
+  TEST_CHECK(file.read(buf.data(), sz).good());
+  file.close();
+
+  // Parse from the memory buffer via the stream API.
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  // Copy the memory buffer into a std::string and parse via std::istringstream.
+  std::string obj_text(buf.begin(), buf.end());
+  std::istringstream obj_ss(obj_text);
+  tinyobj::MaterialFileReader matReader(gMtlBasePath);
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_ss, &matReader);
+  if (!warn.empty()) std::cout << "WARN: " << warn << "\n";
+  if (!err.empty()) std::cerr << "ERR: " << err << "\n";
+  TEST_CHECK(ret == true);
+
+  // Compare with direct file load to check consistency.
+  tinyobj::attrib_t attrib2;
+  std::vector<tinyobj::shape_t> shapes2;
+  std::vector<tinyobj::material_t> materials2;
+  std::string warn2, err2;
+  bool ret2 = tinyobj::LoadObj(&attrib2, &shapes2, &materials2, &warn2, &err2,
+                               obj_file, gMtlBasePath);
+  TEST_CHECK(ret2 == true);
+  TEST_CHECK(attrib.vertices.size() == attrib2.vertices.size());
+  TEST_CHECK(shapes.size() == shapes2.size());
+}
+
+
+// --- Error reporting tests ---
+
+void test_streamreader_column_tracking() {
+  const char *input = "hello world\nfoo\n";
+  tinyobj::StreamReader sr(input, strlen(input));
+
+  TEST_CHECK(sr.col_num() == 1);
+  TEST_CHECK(sr.line_num() == 1);
+
+  // Advance 5 chars: "hello"
+  sr.advance(5);
+  TEST_CHECK(sr.col_num() == 6);  // col is 1-based, after 5 chars -> col 6
+  TEST_CHECK(sr.line_num() == 1);
+
+  // skip_space: " "
+  sr.skip_space();
+  TEST_CHECK(sr.col_num() == 7);
+
+  // read_token: "world"
+  std::string tok = sr.read_token();
+  TEST_CHECK(tok == "world");
+  TEST_CHECK(sr.col_num() == 12);
+
+  // skip_line: "\n"
+  sr.skip_line();
+  TEST_CHECK(sr.line_num() == 2);
+  TEST_CHECK(sr.col_num() == 1);
+
+  // get each char of "foo"
+  sr.get();  // 'f'
+  TEST_CHECK(sr.col_num() == 2);
+  sr.get();  // 'o'
+  sr.get();  // 'o'
+  TEST_CHECK(sr.col_num() == 4);
+}
+
+void test_stream_load_from_current_offset() {
+  std::string prefix = "v 0 0 0\n";
+  std::string payload = "v 1 2 3\n";
+  std::string text = prefix + payload;
+  std::istringstream obj_ss(text);
+  obj_ss.seekg(static_cast<std::streamoff>(prefix.size()), std::ios::beg);
+
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_ss, NULL);
+  if (!warn.empty()) std::cout << "WARN: " << warn << "\n";
+  if (!err.empty()) std::cerr << "ERR: " << err << "\n";
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 3);
+  TEST_CHECK(attrib.vertices[0] == 1.0f);
+  TEST_CHECK(attrib.vertices[1] == 2.0f);
+  TEST_CHECK(attrib.vertices[2] == 3.0f);
+}
+
+void test_stream_load_rejects_oversized_input() {
+  std::string oversized(TINYOBJLOADER_STREAM_READER_MAX_BYTES + size_t(1), ' ');
+  std::istringstream obj_ss(oversized);
+
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_ss, NULL);
+  TEST_CHECK(ret == false);
+  TEST_CHECK(err.find("input stream too large") != std::string::npos);
+}
+
+void test_error_format_clang_style() {
+  const char *input = "v 1.0 abc 3.0\n";
+  tinyobj::StreamReader sr(input, strlen(input));
+
+  // Position to the 'a' in 'abc' (column 7)
+  sr.advance(6);  // past "v 1.0 "
+  TEST_CHECK(sr.col_num() == 7);
+
+  std::string err = sr.format_error("test.obj", "expected number");
+  // Should contain file:line:col
+  TEST_CHECK(err.find("test.obj:1:7: error: expected number") != std::string::npos);
+  // Should contain the source line
+  TEST_CHECK(err.find("v 1.0 abc 3.0") != std::string::npos);
+  // Should contain a caret
+  TEST_CHECK(err.find("^") != std::string::npos);
+}
+
+void test_error_stack() {
+  const char *input = "test\n";
+  tinyobj::StreamReader sr(input, strlen(input));
+
+  TEST_CHECK(!sr.has_errors());
+  TEST_CHECK(sr.error_stack().empty());
+
+  sr.push_error("error 1\n");
+  sr.push_error("error 2\n");
+  TEST_CHECK(sr.has_errors());
+  TEST_CHECK(sr.error_stack().size() == 2);
+
+  std::string all = sr.get_errors();
+  TEST_CHECK(all.find("error 1") != std::string::npos);
+  TEST_CHECK(all.find("error 2") != std::string::npos);
+
+  sr.clear_errors();
+  TEST_CHECK(!sr.has_errors());
+  TEST_CHECK(sr.error_stack().empty());
+}
+
+void test_malformed_vertex_error() {
+  const char *obj_text = "v 1.0 abc 3.0\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  // Early return: malformed vertex coordinate is unrecoverable
+  TEST_CHECK(ret == false);
+  TEST_CHECK(err.find("expected number") != std::string::npos);
+  TEST_CHECK(err.find("abc") != std::string::npos);
+}
+
+void test_malformed_mtl_error() {
+  const char *mtl_text = "newmtl test\nNs abc\n";
+  std::istringstream mtl_iss(mtl_text);
+  std::map<std::string, int> matMap;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  tinyobj::LoadMtl(&matMap, &materials, &mtl_iss, &warn, &err);
+  // LoadMtl is void (public API), but error should still be reported
+  TEST_CHECK(err.find("expected number") != std::string::npos);
+  TEST_CHECK(err.find("abc") != std::string::npos);
+}
+
+void test_parse_error_backward_compat() {
+  // Verify that valid OBJ input parses without errors (the old non-error
+  // sr_parseReal path is still exercised by the callback API).
+  const char *obj_text = "v 1.0 2.0 3.0\nv 4.0 5.0 6.0\nf 1 2 1\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(err.empty());
+  TEST_CHECK(attrib.vertices.size() == 6);
+}
+
+void test_split_string_preserves_non_escape_backslash() {
+  std::vector<std::string> tokens;
+  tinyobj::SplitString("subdir\\file.mtl", ' ', '\\', tokens);
+
+  TEST_CHECK(tokens.size() == 1);
+  TEST_CHECK(tokens[0] == "subdir\\file.mtl");
+
+  tokens.clear();
+  tinyobj::SplitString("a\\ b.mtl", ' ', '\\', tokens);
+  TEST_CHECK(tokens.size() == 1);
+  TEST_CHECK(tokens[0] == "a b.mtl");
+}
+
+void test_numeric_edge_cases() {
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              "../models/numeric-edge-cases.obj");
+
+  if (!warn.empty()) std::cout << "WARN: " << warn << std::endl;
+  if (!err.empty()) std::cerr << "ERR: " << err << std::endl;
+
+  TEST_CHECK(true == ret);
+
+  // 16 vertices * 3 components = 48
+  TEST_CHECK(attrib.vertices.size() == 48);
+
+  // v0: 0 0 0
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[0]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[1]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[2]));
+
+  // v1: 1.5 -2.25 3.125
+  TEST_CHECK(FloatEquals(1.5f, attrib.vertices[3]));
+  TEST_CHECK(FloatEquals(-2.25f, attrib.vertices[4]));
+  TEST_CHECK(FloatEquals(3.125f, attrib.vertices[5]));
+
+  // v2: .5 -.75 .001 (leading decimal dot)
+  TEST_CHECK(FloatEquals(0.5f, attrib.vertices[6]));
+  TEST_CHECK(FloatEquals(-0.75f, attrib.vertices[7]));
+  TEST_CHECK(FloatEquals(0.001f, attrib.vertices[8]));
+
+  // v3: 1. -2. 100. (trailing dot)
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[9]));
+  TEST_CHECK(FloatEquals(-2.0f, attrib.vertices[10]));
+  TEST_CHECK(FloatEquals(100.0f, attrib.vertices[11]));
+
+  // v4: 1.5e2 -3.0e-4 7e10 (scientific notation lowercase)
+  TEST_CHECK(FloatEquals(150.0f, attrib.vertices[12]));
+  TEST_CHECK(FloatEquals(-3.0e-4f, attrib.vertices[13]));
+  TEST_CHECK(FloatEquals(7e10f, attrib.vertices[14]));
+
+  // v5: 2.5E3 -1.0E-2 4E+5 (scientific notation uppercase)
+  TEST_CHECK(FloatEquals(2500.0f, attrib.vertices[15]));
+  TEST_CHECK(FloatEquals(-0.01f, attrib.vertices[16]));
+  TEST_CHECK(FloatEquals(400000.0f, attrib.vertices[17]));
+
+  // v6: +1.0 +0.5 +100 (leading plus)
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[18]));
+  TEST_CHECK(FloatEquals(0.5f, attrib.vertices[19]));
+  TEST_CHECK(FloatEquals(100.0f, attrib.vertices[20]));
+
+  // v7: 007.5 -003.14 000.001 (leading zeros)
+  TEST_CHECK(FloatEquals(7.5f, attrib.vertices[21]));
+  TEST_CHECK(FloatEquals(-3.14f, attrib.vertices[22]));
+  TEST_CHECK(FloatEquals(0.001f, attrib.vertices[23]));
+
+  // v8: 1e-300 -1e-300 5e-310 (tiny values -- flush to zero in float)
+  // These are below float min, so they become 0 in float mode.
+  // Just check they parsed without error (ret == true above).
+
+  // v9: 1.7976931348623157e+308 -1e+308 1e+307
+  // These overflow float, but should not crash. Check parse succeeded.
+
+  // v10: -0 -0.0 -0.0e0 (negative zero)
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[30]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[31]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[32]));
+
+  // v11: 1.5e002 -3.0e+007 7e-003 (exponent with leading zeros)
+  TEST_CHECK(FloatEquals(150.0f, attrib.vertices[33]));
+  TEST_CHECK(FloatEquals(-3.0e7f, attrib.vertices[34]));
+  TEST_CHECK(FloatEquals(7e-3f, attrib.vertices[35]));
+
+  // v12: 0 1 9 (single digit values)
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[36]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[37]));
+  TEST_CHECK(FloatEquals(9.0f, attrib.vertices[38]));
+
+  // v13: 1e+0 1e-0 -1e+0 (exponent zero)
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[39]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[40]));
+  TEST_CHECK(FloatEquals(-1.0f, attrib.vertices[41]));
+
+  // v14: pi, e, sqrt(2) (high precision)
+  TEST_CHECK(FloatEquals(3.141592653589793f, attrib.vertices[42]));
+  TEST_CHECK(FloatEquals(2.718281828459045f, attrib.vertices[43]));
+  TEST_CHECK(FloatEquals(1.4142135623730951f, attrib.vertices[44]));
+
+  // v15: 1e1 1e-1 -1e1 (simple exponent)
+  TEST_CHECK(FloatEquals(10.0f, attrib.vertices[45]));
+  TEST_CHECK(FloatEquals(0.1f, attrib.vertices[46]));
+  TEST_CHECK(FloatEquals(-10.0f, attrib.vertices[47]));
+
+  // Normals: 3 normals * 3 = 9
+  TEST_CHECK(attrib.normals.size() == 9);
+  TEST_CHECK(FloatEquals(0.0f, attrib.normals[0]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.normals[1]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.normals[2]));
+  TEST_CHECK(FloatEquals(-0.707107f, attrib.normals[3]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.normals[4]));
+  TEST_CHECK(FloatEquals(0.707107f, attrib.normals[5]));
+  TEST_CHECK(FloatEquals(1e-5f, attrib.normals[6]));
+  TEST_CHECK(FloatEquals(-1e-5f, attrib.normals[7]));
+  TEST_CHECK(FloatEquals(0.99999f, attrib.normals[8]));
+
+  // Texcoords: 4 texcoords * 2 = 8
+  TEST_CHECK(attrib.texcoords.size() == 8);
+  TEST_CHECK(FloatEquals(0.0f, attrib.texcoords[0]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.texcoords[1]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.texcoords[2]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.texcoords[3]));
+  TEST_CHECK(FloatEquals(0.5f, attrib.texcoords[4]));
+  TEST_CHECK(FloatEquals(0.5f, attrib.texcoords[5]));
+  TEST_CHECK(FloatEquals(0.25f, attrib.texcoords[6]));
+  TEST_CHECK(FloatEquals(0.75f, attrib.texcoords[7]));
+}
+
+void test_numeric_nan_inf() {
+  // Test nan/inf parsing via an in-memory OBJ string
+  std::string obj_str =
+      "v nan 0 0\n"
+      "v NaN 1 1\n"
+      "v NAN 2 2\n"
+      "v inf 0 0\n"
+      "v -inf 1 1\n"
+      "v Inf 2 2\n"
+      "v -Inf 3 3\n"
+      "v INF 4 4\n"
+      "v infinity 0 0\n"
+      "v -infinity 1 1\n"
+      "v +nan 0 0\n"
+      "v +inf 0 0\n"
+      "f 1 2 3\n";
+
+  std::istringstream obj_stream(obj_str);
+
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_stream, NULL);
+
+  if (!warn.empty()) std::cout << "WARN: " << warn << std::endl;
+  if (!err.empty()) std::cerr << "ERR: " << err << std::endl;
+
+  TEST_CHECK(true == ret);
+  // 12 vertices * 3 components = 36
+  TEST_CHECK(attrib.vertices.size() == 36);
+
+  // All nan/inf should parse without crashing.
+  // The exact values depend on the implementation (nan -> 0.0, inf -> max, -inf -> lowest),
+  // but the parser must not fail or produce garbage for the non-nan/inf coords.
+
+  // v0: nan 0 0 -> second and third should be 0
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[1]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[2]));
+
+  // v3: inf 0 0 -> second and third should be 0
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[10]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[11]));
+
+  // v4: -inf 1 1
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[13]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[14]));
+}
+
+void test_numeric_from_stream() {
+  // Test that stream-based loading also gets the same numeric results
+  std::string obj_str =
+      "v 1.5e2 -3.0e-4 +7.5\n"
+      "v .001 -.999 1.\n"
+      "v 0 0 0\n"
+      "f 1 2 3\n";
+
+  std::istringstream obj_stream(obj_str);
+
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+
+  std::string warn;
+  std::string err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_stream, NULL);
+
+  TEST_CHECK(true == ret);
+  TEST_CHECK(attrib.vertices.size() == 9);
+
+  TEST_CHECK(FloatEquals(150.0f, attrib.vertices[0]));
+  TEST_CHECK(FloatEquals(-3.0e-4f, attrib.vertices[1]));
+  TEST_CHECK(FloatEquals(7.5f, attrib.vertices[2]));
+
+  TEST_CHECK(FloatEquals(0.001f, attrib.vertices[3]));
+  TEST_CHECK(FloatEquals(-0.999f, attrib.vertices[4]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[5]));
+
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[6]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[7]));
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[8]));
+}
+
+void test_numeric_overflow_preserves_default() {
+  // Regression: values that overflow double must not crash or corrupt memory.
+  // tryParseDouble now parses into a temp; *result is only written on success.
+  // With the StreamReader-based parser, overflow is detected as a parse error.
+  std::string obj_str =
+      "v 1e9999 2.0 3.0\n"    // first coord overflows
+      "f 1\n";
+
+  std::istringstream obj_stream(obj_str);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_stream, NULL);
+
+  // Must not crash. Parser detects overflow and returns false.
+  TEST_CHECK(false == ret);
+  TEST_CHECK(!err.empty());
+}
+
+void test_numeric_empty_and_whitespace() {
+  // Regression: empty tokens, whitespace-only lines, and trailing whitespace
+  // must not crash the parser.
+  std::string obj_str =
+      "v   1.0   2.0   3.0  \n"   // extra whitespace around values
+      "v 4.0 5.0 6.0\r\n"         // Windows line endings
+      "v\t7.0\t8.0\t9.0\n"        // tab-separated
+      "\n"                          // blank line
+      "   \n"                       // whitespace-only line
+      "f 1 2 3\n";
+
+  std::istringstream obj_stream(obj_str);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_stream, NULL);
+
+  TEST_CHECK(true == ret);
+  TEST_CHECK(attrib.vertices.size() == 9);
+
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[0]));
+  TEST_CHECK(FloatEquals(2.0f, attrib.vertices[1]));
+  TEST_CHECK(FloatEquals(3.0f, attrib.vertices[2]));
+  TEST_CHECK(FloatEquals(4.0f, attrib.vertices[3]));
+  TEST_CHECK(FloatEquals(5.0f, attrib.vertices[4]));
+  TEST_CHECK(FloatEquals(6.0f, attrib.vertices[5]));
+  TEST_CHECK(FloatEquals(7.0f, attrib.vertices[6]));
+  TEST_CHECK(FloatEquals(8.0f, attrib.vertices[7]));
+  TEST_CHECK(FloatEquals(9.0f, attrib.vertices[8]));
+}
+
+void test_numeric_garbage_input() {
+  // Regression: totally invalid numeric input must not crash.
+  // With the StreamReader-based parser, garbage input is detected and
+  // LoadObj returns false with an error message.
+  std::string obj_str =
+      "v abc def ghi\n"           // alphabetic garbage
+      "f 1\n";
+
+  std::istringstream obj_stream(obj_str);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_stream, NULL);
+
+  // Must not crash. Parser detects invalid input and returns false.
+  TEST_CHECK(false == ret);
+  TEST_CHECK(!err.empty());
+  TEST_CHECK(err.find("expected number") != std::string::npos);
+}
+
+void test_numeric_extreme_precision() {
+  // Regression: values with many digits must not crash or corrupt.
+  // fast_float handles arbitrary digit counts gracefully.
+  std::string obj_str =
+      "v 1.00000000000000000000000000000000000001 "
+         "2.99999999999999999999999999999999999999 "
+         "0.00000000000000000000000000000000000001\n"
+      "v 123456789012345678.0 -123456789012345678.0 0.0\n"
+      "f 1 2\n";
+
+  std::istringstream obj_stream(obj_str);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &obj_stream, NULL);
+
+  TEST_CHECK(true == ret);
+  TEST_CHECK(attrib.vertices.size() == 6);
+
+  // Values should round to nearest representable float
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[0]));
+  TEST_CHECK(FloatEquals(3.0f, attrib.vertices[1]));
+}
+
+// ---------------------------------------------------------------------------
+// Additional coverage tests
+// ---------------------------------------------------------------------------
+
+// StreamReader: direct unit tests for public API methods
+void test_streamreader_eof_and_remaining() {
+  // Empty input
+  {
+    tinyobj::StreamReader sr("", 0);
+    TEST_CHECK(sr.eof() == true);
+    TEST_CHECK(sr.remaining() == 0);
+    TEST_CHECK(sr.peek() == '\0');
+    TEST_CHECK(sr.peek_at(0) == '\0');
+    TEST_CHECK(sr.peek_at(100) == '\0');
+    TEST_CHECK(sr.get() == '\0');
+    TEST_CHECK(sr.char_at(0, 'a') == false);
+    TEST_CHECK(sr.match("abc", 3) == false);
+    TEST_CHECK(sr.line_num() == 1);
+    TEST_CHECK(sr.col_num() == 1);
+  }
+  // Single char
+  {
+    tinyobj::StreamReader sr("x", 1);
+    TEST_CHECK(sr.eof() == false);
+    TEST_CHECK(sr.remaining() == 1);
+    TEST_CHECK(sr.peek() == 'x');
+    TEST_CHECK(sr.char_at(0, 'x') == true);
+    TEST_CHECK(sr.char_at(0, 'y') == false);
+    TEST_CHECK(sr.char_at(1, 'x') == false);  // out of bounds
+    TEST_CHECK(sr.match("x", 1) == true);
+    TEST_CHECK(sr.match("xy", 2) == false);
+    char c = sr.get();
+    TEST_CHECK(c == 'x');
+    TEST_CHECK(sr.eof() == true);
+    TEST_CHECK(sr.remaining() == 0);
+    // After EOF, these should be safe
+    TEST_CHECK(sr.peek() == '\0');
+    TEST_CHECK(sr.peek_at(0) == '\0');
+    TEST_CHECK(sr.match("a", 1) == false);
+    TEST_CHECK(sr.char_at(0, 'a') == false);
+  }
+}
+
+void test_streamreader_skip_and_read() {
+  const char *input = "  hello \t world\r\nline2\n";
+  tinyobj::StreamReader sr(input, strlen(input));
+
+  // skip_space should skip spaces and tabs
+  sr.skip_space();
+  TEST_CHECK(sr.peek() == 'h');
+  TEST_CHECK(sr.col_num() == 3);
+
+  // read_token should return "hello"
+  std::string tok = sr.read_token();
+  TEST_CHECK(tok == "hello");
+
+  // skip_space should skip " \t "
+  sr.skip_space();
+  TEST_CHECK(sr.peek() == 'w');
+
+  // read_token should return "world"
+  tok = sr.read_token();
+  TEST_CHECK(tok == "world");
+
+  // at_line_end should be true (next is \r\n)
+  TEST_CHECK(sr.at_line_end() == true);
+
+  // skip_line should advance past \r\n
+  sr.skip_line();
+  TEST_CHECK(sr.line_num() == 2);
+  TEST_CHECK(sr.col_num() == 1);
+
+  // read_line should return "line2"
+  std::string line = sr.read_line();
+  TEST_CHECK(line == "line2");
+  // read_line reads the content but line_num updates on skip_line/get past \n
+  TEST_CHECK(sr.line_num() == 2);
+}
+
+void test_streamreader_match_and_advance() {
+  const char *input = "mtllib foo.mtl\n";
+  tinyobj::StreamReader sr(input, strlen(input));
+
+  TEST_CHECK(sr.match("mtllib", 6) == true);
+  TEST_CHECK(sr.match("mtlliX", 6) == false);
+  TEST_CHECK(sr.match("mtllib foo.mtl\n", 15) == true);
+  // match longer than remaining
+  TEST_CHECK(sr.match("mtllib foo.mtl\nX", 16) == false);
+
+  sr.advance(7);  // past "mtllib "
+  TEST_CHECK(sr.peek() == 'f');
+  TEST_CHECK(sr.col_num() == 8);
+
+  // advance past end should clamp to EOF
+  sr.advance(1000);
+  TEST_CHECK(sr.eof() == true);
+}
+
+void test_streamreader_current_line_text() {
+  const char *input = "first line\nsecond line\nthird\n";
+  tinyobj::StreamReader sr(input, strlen(input));
+
+  // On first line
+  std::string lt = sr.current_line_text();
+  TEST_CHECK(lt == "first line");
+
+  sr.skip_line();  // move to second line
+  lt = sr.current_line_text();
+  TEST_CHECK(lt == "second line");
+
+  sr.advance(3);  // in the middle of "second line" -> "con" in "second"
+  lt = sr.current_line_text();
+  TEST_CHECK(lt == "second line");
+}
+
+void test_streamreader_error_stack() {
+  const char *input = "hello\n";
+  tinyobj::StreamReader sr(input, strlen(input));
+
+  TEST_CHECK(sr.has_errors() == false);
+  TEST_CHECK(sr.get_errors().empty());
+
+  sr.push_error("error 1");
+  TEST_CHECK(sr.has_errors() == true);
+  TEST_CHECK(sr.error_stack().size() == 1);
+
+  sr.push_error("error 2");
+  TEST_CHECK(sr.error_stack().size() == 2);
+  // get_errors() concatenates all errors into a single string
+  TEST_CHECK(sr.get_errors().find("error 1") != std::string::npos);
+  TEST_CHECK(sr.get_errors().find("error 2") != std::string::npos);
+
+  sr.clear_errors();
+  TEST_CHECK(sr.has_errors() == false);
+  TEST_CHECK(sr.get_errors().empty());
+}
+
+// Empty OBJ file (0 bytes)
+void test_empty_obj_file() {
+  std::istringstream iss("");
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.empty());
+  TEST_CHECK(shapes.empty());
+}
+
+// OBJ with only BOM (3 bytes, no content)
+void test_bom_only_obj() {
+  std::string bom("\xEF\xBB\xBF");
+  std::istringstream iss(bom);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.empty());
+}
+
+// File with no trailing newline
+void test_no_trailing_newline() {
+  const char *obj_text = "v 1.0 2.0 3.0\nv 4.0 5.0 6.0";  // no \n at end
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 6);
+  TEST_CHECK(FloatEquals(4.0f, attrib.vertices[3]));
+  TEST_CHECK(FloatEquals(5.0f, attrib.vertices[4]));
+  TEST_CHECK(FloatEquals(6.0f, attrib.vertices[5]));
+}
+
+// Mixed CRLF, LF, and CR-only line endings
+void test_mixed_line_endings() {
+  // LF, CRLF, CR-only, and no trailing newline
+  std::string obj_text = "v 1.0 2.0 3.0\n"
+                         "v 4.0 5.0 6.0\r\n"
+                         "v 7.0 8.0 9.0\r"
+                         "f 1 2 3";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 9);
+  TEST_CHECK(FloatEquals(7.0f, attrib.vertices[6]));
+  TEST_CHECK(FloatEquals(8.0f, attrib.vertices[7]));
+  TEST_CHECK(FloatEquals(9.0f, attrib.vertices[8]));
+  TEST_CHECK(shapes.size() == 1);
+}
+
+// Vertex colors from in-memory stream (6-component vertices)
+void test_vertex_colors_from_stream() {
+  const char *obj_text =
+      "v 1.0 2.0 3.0 0.5 0.6 0.7\n"
+      "v 4.0 5.0 6.0 0.1 0.2 0.3\n"
+      "f 1 2 1\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 6);
+  TEST_CHECK(attrib.colors.size() == 6);
+  TEST_CHECK(FloatEquals(0.5f, attrib.colors[0]));
+  TEST_CHECK(FloatEquals(0.6f, attrib.colors[1]));
+  TEST_CHECK(FloatEquals(0.7f, attrib.colors[2]));
+  TEST_CHECK(FloatEquals(0.1f, attrib.colors[3]));
+  TEST_CHECK(FloatEquals(0.2f, attrib.colors[4]));
+  TEST_CHECK(FloatEquals(0.3f, attrib.colors[5]));
+}
+
+// Mixed: some vertices with colors, some without
+void test_vertex_colors_mixed() {
+  const char *obj_text =
+      "v 1.0 2.0 3.0 0.5 0.6 0.7\n"
+      "v 4.0 5.0 6.0\n"
+      "f 1 2 1\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 6);
+  // Colors array should have entries for both vertices (default 1.0 for no-color vertex)
+  TEST_CHECK(attrib.colors.size() == 6);
+  TEST_CHECK(FloatEquals(0.5f, attrib.colors[0]));
+  TEST_CHECK(FloatEquals(0.6f, attrib.colors[1]));
+  TEST_CHECK(FloatEquals(0.7f, attrib.colors[2]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.colors[3]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.colors[4]));
+  TEST_CHECK(FloatEquals(1.0f, attrib.colors[5]));
+}
+
+// OBJ with all element types: v, vn, vt, f, l, p
+void test_all_element_types() {
+  const char *obj_text =
+      "v 1.0 0.0 0.0\n"
+      "v 0.0 1.0 0.0\n"
+      "v 0.0 0.0 1.0\n"
+      "vn 0.0 0.0 1.0\n"
+      "vt 0.5 0.5\n"
+      "f 1/1/1 2/1/1 3/1/1\n"
+      "l 1 2\n"
+      "p 3\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 9);
+  TEST_CHECK(attrib.normals.size() == 3);
+  TEST_CHECK(attrib.texcoords.size() == 2);
+  TEST_CHECK(shapes.size() >= 1);
+  // Face indices
+  TEST_CHECK(shapes[0].mesh.indices.size() == 3);
+  // Line indices
+  TEST_CHECK(shapes[0].lines.indices.size() == 2);
+  // Point indices
+  TEST_CHECK(shapes[0].points.indices.size() == 1);
+}
+
+// Multiple groups and objects
+void test_multiple_objects() {
+  const char *obj_text =
+      "v 0.0 0.0 0.0\n"
+      "v 1.0 0.0 0.0\n"
+      "v 0.0 1.0 0.0\n"
+      "v 0.0 0.0 1.0\n"
+      "o obj1\n"
+      "f 1 2 3\n"
+      "o obj2\n"
+      "f 2 3 4\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(shapes.size() == 2);
+  TEST_CHECK(shapes[0].name == "obj1");
+  TEST_CHECK(shapes[1].name == "obj2");
+  TEST_CHECK(shapes[0].mesh.indices.size() == 3);
+  TEST_CHECK(shapes[1].mesh.indices.size() == 3);
+}
+
+// MTL warning accumulation (d and Tr conflict)
+void test_mtl_d_and_tr_warning() {
+  // Both d and Tr in same material should produce a warning
+  const char *mtl_text = "newmtl test\nd 0.5\nTr 0.8\n";
+  std::istringstream mtl_iss(mtl_text);
+  std::map<std::string, int> matMap;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  tinyobj::LoadMtl(&matMap, &materials, &mtl_iss, &warn, &err);
+  TEST_CHECK(materials.size() == 1);
+  // d=0.5 should win over Tr=0.8
+  TEST_CHECK(FloatEquals(0.5f, materials[0].dissolve));
+}
+
+// Multiple malformed lines: errors should accumulate
+void test_multiple_malformed_vertices() {
+  const char *obj_text =
+      "v 1.0 bad1 3.0\n"
+      "v 4.0 bad2 6.0\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == false);
+  // First error causes early return, so at least one error must be present
+  TEST_CHECK(err.find("bad1") != std::string::npos);
+}
+
+// Malformed normal
+void test_malformed_normal_error() {
+  const char *obj_text = "vn 1.0 xyz 0.0\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == false);
+  TEST_CHECK(err.find("expected number") != std::string::npos);
+}
+
+// Malformed texcoord
+void test_malformed_texcoord_error() {
+  const char *obj_text = "vt abc 0.5\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == false);
+  TEST_CHECK(err.find("expected number") != std::string::npos);
+}
+
+// Negative vertex indices (relative indexing)
+void test_negative_vertex_indices() {
+  const char *obj_text =
+      "v 1.0 0.0 0.0\n"
+      "v 0.0 1.0 0.0\n"
+      "v 0.0 0.0 1.0\n"
+      "f -3 -2 -1\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(shapes.size() == 1);
+  TEST_CHECK(shapes[0].mesh.indices.size() == 3);
+  // -3 should resolve to index 0, -2 to 1, -1 to 2
+  TEST_CHECK(shapes[0].mesh.indices[0].vertex_index == 0);
+  TEST_CHECK(shapes[0].mesh.indices[1].vertex_index == 1);
+  TEST_CHECK(shapes[0].mesh.indices[2].vertex_index == 2);
+}
+
+// Comments everywhere (inline and full-line)
+void test_comments_everywhere() {
+  const char *obj_text =
+      "# full line comment\n"
+      "v 1.0 2.0 3.0 # inline comment\n"
+      "  # indented comment\n"
+      "v 4.0 5.0 6.0\n"
+      "# another comment\n"
+      "f 1 2 1 # face comment\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 6);
+  TEST_CHECK(shapes.size() == 1);
+}
+
+// Multiple spaces/tabs between tokens
+void test_excessive_whitespace() {
+  const char *obj_text =
+      "v   1.0  \t  2.0  \t\t  3.0\n"
+      "v\t4.0\t5.0\t6.0\n"
+      "f  1  2  1\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 6);
+  TEST_CHECK(FloatEquals(1.0f, attrib.vertices[0]));
+  TEST_CHECK(FloatEquals(2.0f, attrib.vertices[1]));
+  TEST_CHECK(FloatEquals(3.0f, attrib.vertices[2]));
+  TEST_CHECK(FloatEquals(4.0f, attrib.vertices[3]));
+}
+
+// v2 ObjReader API
+void test_objreader_api_stream() {
+  const char *obj_text =
+      "v 1.0 2.0 3.0\n"
+      "v 4.0 5.0 6.0\n"
+      "v 7.0 8.0 9.0\n"
+      "f 1 2 3\n";
+
+  tinyobj::ObjReader reader;
+  bool ret = reader.ParseFromString(obj_text, "");
+  TEST_CHECK(ret == true);
+  TEST_CHECK(reader.Valid());
+  TEST_CHECK(reader.GetAttrib().vertices.size() == 9);
+  TEST_CHECK(reader.GetShapes().size() == 1);
+  // Warning output is optional and is not asserted here.
+}
+
+// ObjReader with invalid input
+void test_objreader_api_error() {
+  tinyobj::ObjReader reader;
+  bool ret = reader.ParseFromString("v 1.0 badval 3.0\n", "");
+  TEST_CHECK(ret == false);
+  TEST_CHECK(!reader.Error().empty());
+}
+
+// SplitString edge cases
+void test_split_string_edge_cases() {
+  std::vector<std::string> tokens;
+
+  // Empty input — SplitString always pushes at least one token (possibly empty)
+  tinyobj::SplitString("", ' ', '\\', tokens);
+  TEST_CHECK(tokens.size() == 1);
+  TEST_CHECK(tokens[0].empty());
+
+  // Only spaces — trailing token is empty
+  tokens.clear();
+  tinyobj::SplitString("   ", ' ', '\\', tokens);
+  TEST_CHECK(tokens.size() == 1);
+  TEST_CHECK(tokens[0].empty());
+
+  // Multiple tokens with multiple delimiters
+  tokens.clear();
+  tinyobj::SplitString("a  b  c", ' ', '\\', tokens);
+  TEST_CHECK(tokens.size() == 3);
+  TEST_CHECK(tokens[0] == "a");
+  TEST_CHECK(tokens[1] == "b");
+  TEST_CHECK(tokens[2] == "c");
+
+  // Escaped space in middle
+  tokens.clear();
+  tinyobj::SplitString("path\\ name.mtl other.mtl", ' ', '\\', tokens);
+  TEST_CHECK(tokens.size() == 2);
+  TEST_CHECK(tokens[0] == "path name.mtl");
+  TEST_CHECK(tokens[1] == "other.mtl");
+
+  // Trailing backslash (not an escape — preserved as-is)
+  tokens.clear();
+  tinyobj::SplitString("dir\\", ' ', '\\', tokens);
+  TEST_CHECK(tokens.size() == 1);
+  TEST_CHECK(tokens[0] == "dir\\");
+}
+
+// Quad face (non-triangle)
+void test_quad_face() {
+  const char *obj_text =
+      "v 0.0 0.0 0.0\n"
+      "v 1.0 0.0 0.0\n"
+      "v 1.0 1.0 0.0\n"
+      "v 0.0 1.0 0.0\n"
+      "f 1 2 3 4\n";
+  std::istringstream iss(obj_text);
+  tinyobj::ObjReaderConfig config;
+  config.triangulate = false;
+  tinyobj::ObjReader reader;
+  bool ret = reader.ParseFromString(obj_text, "", config);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(reader.GetShapes().size() == 1);
+  // Without triangulation: 4 indices, 1 face
+  TEST_CHECK(reader.GetShapes()[0].mesh.indices.size() == 4);
+  TEST_CHECK(reader.GetShapes()[0].mesh.num_face_vertices.size() == 1);
+  TEST_CHECK(reader.GetShapes()[0].mesh.num_face_vertices[0] == 4);
+}
+
+// Quad face with triangulation
+void test_quad_face_triangulated() {
+  const char *obj_text =
+      "v 0.0 0.0 0.0\n"
+      "v 1.0 0.0 0.0\n"
+      "v 1.0 1.0 0.0\n"
+      "v 0.0 1.0 0.0\n"
+      "f 1 2 3 4\n";
+  std::istringstream iss(obj_text);
+  tinyobj::ObjReaderConfig config;
+  config.triangulate = true;
+  tinyobj::ObjReader reader;
+  bool ret = reader.ParseFromString(obj_text, "", config);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(reader.GetShapes().size() == 1);
+  // With triangulation: quad -> 2 triangles = 6 indices
+  TEST_CHECK(reader.GetShapes()[0].mesh.indices.size() == 6);
+  TEST_CHECK(reader.GetShapes()[0].mesh.num_face_vertices.size() == 2);
+}
+
+// Face with v/vt/vn format
+void test_face_full_index_format() {
+  const char *obj_text =
+      "v 0.0 0.0 0.0\n"
+      "v 1.0 0.0 0.0\n"
+      "v 0.0 1.0 0.0\n"
+      "vt 0.0 0.0\n"
+      "vt 1.0 0.0\n"
+      "vt 0.0 1.0\n"
+      "vn 0.0 0.0 1.0\n"
+      "f 1/1/1 2/2/1 3/3/1\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(shapes[0].mesh.indices[0].vertex_index == 0);
+  TEST_CHECK(shapes[0].mesh.indices[0].texcoord_index == 0);
+  TEST_CHECK(shapes[0].mesh.indices[0].normal_index == 0);
+  TEST_CHECK(shapes[0].mesh.indices[1].vertex_index == 1);
+  TEST_CHECK(shapes[0].mesh.indices[1].texcoord_index == 1);
+  TEST_CHECK(shapes[0].mesh.indices[2].vertex_index == 2);
+  TEST_CHECK(shapes[0].mesh.indices[2].texcoord_index == 2);
+}
+
+// Face with v//vn format (no texcoord)
+void test_face_vertex_normal_only() {
+  const char *obj_text =
+      "v 0.0 0.0 0.0\n"
+      "v 1.0 0.0 0.0\n"
+      "v 0.0 1.0 0.0\n"
+      "vn 0.0 0.0 1.0\n"
+      "f 1//1 2//1 3//1\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(shapes[0].mesh.indices[0].vertex_index == 0);
+  TEST_CHECK(shapes[0].mesh.indices[0].texcoord_index == -1);
+  TEST_CHECK(shapes[0].mesh.indices[0].normal_index == 0);
+}
+
+// MTL with multiple materials and various properties
+void test_mtl_multiple_properties() {
+  const char *mtl_text =
+      "newmtl mat1\n"
+      "Ka 0.1 0.2 0.3\n"
+      "Kd 0.4 0.5 0.6\n"
+      "Ks 0.7 0.8 0.9\n"
+      "Ns 100.0\n"
+      "d 0.5\n"
+      "illum 2\n"
+      "\n"
+      "newmtl mat2\n"
+      "Ka 0.0 0.0 0.0\n"
+      "Kd 1.0 1.0 1.0\n"
+      "Ns 50.0\n";
+  std::istringstream mtl_iss(mtl_text);
+  std::map<std::string, int> matMap;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  tinyobj::LoadMtl(&matMap, &materials, &mtl_iss, &warn, &err);
+  TEST_CHECK(materials.size() == 2);
+  TEST_CHECK(materials[0].name == "mat1");
+  TEST_CHECK(FloatEquals(0.1f, materials[0].ambient[0]));
+  TEST_CHECK(FloatEquals(0.4f, materials[0].diffuse[0]));
+  TEST_CHECK(FloatEquals(0.7f, materials[0].specular[0]));
+  TEST_CHECK(FloatEquals(100.0f, materials[0].shininess));
+  TEST_CHECK(FloatEquals(0.5f, materials[0].dissolve));
+  TEST_CHECK(materials[0].illum == 2);
+  TEST_CHECK(materials[1].name == "mat2");
+  TEST_CHECK(FloatEquals(1.0f, materials[1].diffuse[0]));
+  TEST_CHECK(FloatEquals(50.0f, materials[1].shininess));
+}
+
+// Callback API: vertices, normals, texcoords, and faces
+void test_callback_all_elements() {
+  const char *obj_text =
+      "v 1.0 2.0 3.0\n"
+      "v 4.0 5.0 6.0\n"
+      "v 7.0 8.0 9.0\n"
+      "vn 0.0 0.0 1.0\n"
+      "vt 0.5 0.5\n"
+      "f 1 2 3\n";
+  std::istringstream iss(obj_text);
+
+  struct Counts {
+    int vertices;
+    int normals;
+    int texcoords;
+    int faces;
+  };
+  Counts counts = {0, 0, 0, 0};
+
+  tinyobj::callback_t cb;
+  cb.vertex_cb = [](void *user, float, float, float, float) {
+    static_cast<Counts *>(user)->vertices++;
+  };
+  cb.normal_cb = [](void *user, float, float, float) {
+    static_cast<Counts *>(user)->normals++;
+  };
+  cb.texcoord_cb = [](void *user, float, float, float) {
+    static_cast<Counts *>(user)->texcoords++;
+  };
+  cb.index_cb = [](void *user, tinyobj::index_t *, int) {
+    static_cast<Counts *>(user)->faces++;
+  };
+
+  std::string warn, err;
+  bool ret = tinyobj::LoadObjWithCallback(iss, cb, &counts, NULL, &warn, &err);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(counts.vertices == 3);
+  TEST_CHECK(counts.normals == 1);
+  TEST_CHECK(counts.texcoords == 1);
+  TEST_CHECK(counts.faces == 1);
+}
+
+// Callback API with NULL callbacks (should not crash)
+void test_callback_null_callbacks() {
+  const char *obj_text =
+      "v 1.0 2.0 3.0\n"
+      "vn 0.0 0.0 1.0\n"
+      "vt 0.5 0.5\n"
+      "f 1/1/1 1/1/1 1/1/1\n";
+  std::istringstream iss(obj_text);
+
+  tinyobj::callback_t cb;
+  // All callbacks are NULL by default
+  std::string warn, err;
+  bool ret = tinyobj::LoadObjWithCallback(iss, cb, NULL, NULL, &warn, &err);
+  TEST_CHECK(ret == true);
+}
+
+// Subnormal float values should parse without error
+void test_numeric_subnormal_values() {
+  // 5e-310 is subnormal for double, 1e-45 is subnormal for float
+  const char *obj_text = "v 5e-310 1e-45 0.0\n";
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 3);
+  // Values should be >= 0 (either the subnormal or flushed to zero)
+  TEST_CHECK(attrib.vertices[0] >= 0.0f);
+  TEST_CHECK(attrib.vertices[1] >= 0.0f);
+  TEST_CHECK(FloatEquals(0.0f, attrib.vertices[2]));
+}
+
+// Empty MTL should not produce a phantom material
+void test_empty_mtl_no_phantom_material() {
+  const char *mtl_text = "# just a comment\n";
+  std::istringstream mtl_iss(mtl_text);
+  std::map<std::string, int> matMap;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  tinyobj::LoadMtl(&matMap, &materials, &mtl_iss, &warn, &err);
+  TEST_CHECK(materials.empty());
+  TEST_CHECK(matMap.empty());
+}
+
+// StreamReader should not be copyable (deleted copy constructor)
+void test_streamreader_not_copyable() {
+  // This is a compile-time check. If StreamReader were copyable,
+  // copying one built from istream would create a dangling buf_ pointer.
+  // We just verify construction and basic use work correctly.
+  const char *input = "hello";
+  tinyobj::StreamReader sr(input, 5);
+  TEST_CHECK(sr.remaining() == 5);
+  TEST_CHECK(sr.peek() == 'h');
+}
+
+// Out-of-range face indices should not crash
+void test_out_of_range_face_index() {
+  const char *obj_text =
+      "v 1.0 0.0 0.0\n"
+      "v 0.0 1.0 0.0\n"
+      "v 0.0 0.0 1.0\n"
+      "f 1 2 999\n";  // index 999 doesn't exist
+  std::istringstream iss(obj_text);
+  tinyobj::attrib_t attrib;
+  std::vector<tinyobj::shape_t> shapes;
+  std::vector<tinyobj::material_t> materials;
+  std::string warn, err;
+  bool ret = tinyobj::LoadObj(&attrib, &shapes, &materials, &warn, &err,
+                              &iss, NULL);
+  // Should parse without crashing. The face will have an out-of-range index
+  // which may produce a warning during triangulation.
+  TEST_CHECK(ret == true);
+  TEST_CHECK(attrib.vertices.size() == 9);
+}
+
 // Fuzzer test.
 // Just check if it does not crash.
 // Disable by default since Windows filesystem can't create filename of afl
@@ -1403,8 +3392,94 @@ TEST_LIST = {
      test_usemtl_then_o_issue235},
     {"mtl_searchpaths_issue244",
      test_mtl_searchpaths_issue244},
-    {"usemtl_whitespece_issue246",
+    {"usemtl_whitespace_issue246",
      test_usemtl_whitespace_issue246},
     {"texres_texopt_issue248",
      test_texres_texopt_issue248},
+    {"test_mtl_filename_with_whitespace_issue46",
+     test_mtl_filename_with_whitespace_issue46},
+    {"test_face_missing_issue295",
+     test_face_missing_issue295},
+    {"test_comment_issue389",
+     test_comment_issue389},
+    {"test_invalid_relative_vertex_index",
+     test_invalid_relative_vertex_index},
+    {"test_invalid_texture_vertex_index",
+     test_invalid_texture_vertex_index},
+    {"default_kd_for_multiple_materials_issue391",
+     test_default_kd_for_multiple_materials_issue391},
+    {"test_removeUtf8Bom", test_removeUtf8Bom},
+    {"test_loadObj_with_BOM", test_loadObj_with_BOM},
+    {"test_load_obj_from_utf8_path", test_load_obj_from_utf8_path},
+    {"test_load_obj_from_long_path", test_load_obj_from_long_path},
+    {"test_loadObjWithCallback_with_BOM", test_loadObjWithCallback_with_BOM},
+    {"test_loadObjWithCallback_mtllib_failure_does_not_crash",
+     test_loadObjWithCallback_mtllib_failure_does_not_crash},
+    {"test_mtllib_empty_filename_is_ignored_loadobj",
+     test_mtllib_empty_filename_is_ignored_loadobj},
+    {"test_mtllib_empty_filename_is_ignored_callback",
+     test_mtllib_empty_filename_is_ignored_callback},
+    {"test_usemtl_callback_trims_trailing_comment",
+     test_usemtl_callback_trims_trailing_comment},
+    {"test_tag_triple_huge_count_is_safely_rejected",
+     test_tag_triple_huge_count_is_safely_rejected},
+    {"test_texcoord_w_component", test_texcoord_w_component},
+    {"test_texcoord_w_mixed_component", test_texcoord_w_mixed_component},
+    {"test_numeric_edge_cases", test_numeric_edge_cases},
+    {"test_numeric_nan_inf", test_numeric_nan_inf},
+    {"test_numeric_from_stream", test_numeric_from_stream},
+    {"test_numeric_overflow_preserves_default", test_numeric_overflow_preserves_default},
+    {"test_numeric_empty_and_whitespace", test_numeric_empty_and_whitespace},
+    {"test_numeric_garbage_input", test_numeric_garbage_input},
+    {"test_numeric_extreme_precision", test_numeric_extreme_precision},
+    {"test_file_and_stream_load_agree", test_file_and_stream_load_agree},
+    {"test_load_from_memory_buffer", test_load_from_memory_buffer},
+    {"test_streamreader_column_tracking", test_streamreader_column_tracking},
+    {"test_stream_load_from_current_offset", test_stream_load_from_current_offset},
+    {"test_stream_load_rejects_oversized_input", test_stream_load_rejects_oversized_input},
+    {"test_error_format_clang_style", test_error_format_clang_style},
+    {"test_error_stack", test_error_stack},
+    {"test_malformed_vertex_error", test_malformed_vertex_error},
+    {"test_malformed_mtl_error", test_malformed_mtl_error},
+    {"test_parse_error_backward_compat", test_parse_error_backward_compat},
+    {"test_split_string_preserves_non_escape_backslash",
+     test_split_string_preserves_non_escape_backslash},
+    {"test_streamreader_eof_and_remaining",
+     test_streamreader_eof_and_remaining},
+    {"test_streamreader_skip_and_read", test_streamreader_skip_and_read},
+    {"test_streamreader_match_and_advance",
+     test_streamreader_match_and_advance},
+    {"test_streamreader_current_line_text",
+     test_streamreader_current_line_text},
+    {"test_streamreader_error_stack", test_streamreader_error_stack},
+    {"test_empty_obj_file", test_empty_obj_file},
+    {"test_bom_only_obj", test_bom_only_obj},
+    {"test_no_trailing_newline", test_no_trailing_newline},
+    {"test_mixed_line_endings", test_mixed_line_endings},
+    {"test_vertex_colors_from_stream", test_vertex_colors_from_stream},
+    {"test_vertex_colors_mixed", test_vertex_colors_mixed},
+    {"test_all_element_types", test_all_element_types},
+    {"test_multiple_objects", test_multiple_objects},
+    {"test_mtl_d_and_tr_warning", test_mtl_d_and_tr_warning},
+    {"test_multiple_malformed_vertices", test_multiple_malformed_vertices},
+    {"test_malformed_normal_error", test_malformed_normal_error},
+    {"test_malformed_texcoord_error", test_malformed_texcoord_error},
+    {"test_negative_vertex_indices", test_negative_vertex_indices},
+    {"test_comments_everywhere", test_comments_everywhere},
+    {"test_excessive_whitespace", test_excessive_whitespace},
+    {"test_objreader_api_stream", test_objreader_api_stream},
+    {"test_objreader_api_error", test_objreader_api_error},
+    {"test_split_string_edge_cases", test_split_string_edge_cases},
+    {"test_quad_face", test_quad_face},
+    {"test_quad_face_triangulated", test_quad_face_triangulated},
+    {"test_face_full_index_format", test_face_full_index_format},
+    {"test_face_vertex_normal_only", test_face_vertex_normal_only},
+    {"test_mtl_multiple_properties", test_mtl_multiple_properties},
+    {"test_callback_all_elements", test_callback_all_elements},
+    {"test_callback_null_callbacks", test_callback_null_callbacks},
+    {"test_numeric_subnormal_values", test_numeric_subnormal_values},
+    {"test_empty_mtl_no_phantom_material",
+     test_empty_mtl_no_phantom_material},
+    {"test_streamreader_not_copyable", test_streamreader_not_copyable},
+    {"test_out_of_range_face_index", test_out_of_range_face_index},
     {NULL, NULL}};
diff --git a/tiny_obj_loader.h b/tiny_obj_loader.h
index f9e3c649..af98ac2d 100644
--- a/tiny_obj_loader.h
+++ b/tiny_obj_loader.h
@@ -1,7 +1,7 @@
 /*
 The MIT License (MIT)
 
-Copyright (c) 2012-2018 Syoyo Fujita and many contributors.
+Copyright (c) 2012-Present, Syoyo Fujita and many contributors.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -24,9 +24,16 @@ THE SOFTWARE.
 
 //
 // version 2.0.0 : Add new object oriented API. 1.x API is still provided.
+//                 * Add python binding.
 //                 * Support line primitive.
 //                 * Support points primitive.
 //                 * Support multiple search path for .mtl(v1 API).
+//                 * Support vertex skinning weight `vw`(as an tinyobj
+//                 extension). Note that this differs vertex weight([w]
+//                 component in `v` line)
+//                 * Support escaped whitespece in mtllib
+//                 * Add robust triangulation using Mapbox
+//                 earcut(TINYOBJLOADER_USE_MAPBOX_EARCUT).
 // version 1.4.0 : Modifed ParseTextureNameAndOption API
 // version 1.3.1 : Make ParseTextureNameAndOption API public
 // version 1.3.0 : Separate warning and error message(breaking API of LoadObj)
@@ -63,6 +70,12 @@ THE SOFTWARE.
 
 namespace tinyobj {
 
+// C++11 is now the minimum required standard.
+#if __cplusplus < 201103L && (!defined(_MSVC_LANG) || _MSVC_LANG < 201103L)
+#error "tinyobjloader requires C++11 or later. Compile with -std=c++11 or higher."
+#endif
+#define TINYOBJ_OVERRIDE override
+
 #ifdef __clang__
 #pragma clang diagnostic push
 #if __has_warning("-Wzero-as-null-pointer-constant")
@@ -147,7 +160,7 @@ typedef enum {
   TEXTURE_TYPE_CUBE_RIGHT
 } texture_type_t;
 
-typedef struct {
+struct texture_option_t {
   texture_type_t type;      // -type (default TEXTURE_TYPE_NONE)
   real_t sharpness;         // -boost (default 1.0?)
   real_t brightness;        // base_value in -mm option (default 0)
@@ -155,8 +168,9 @@ typedef struct {
   real_t origin_offset[3];  // -o u [v [w]] (default 0 0 0)
   real_t scale[3];          // -s u [v [w]] (default 1 1 1)
   real_t turbulence[3];     // -t u [v [w]] (default 0 0 0)
-  int   texture_resolution; // -texres resolution (No default value in the spec. We'll use -1)
-  bool clamp;    // -clamp (default false)
+  int texture_resolution;   // -texres resolution (No default value in the spec.
+                            // We'll use -1)
+  bool clamp;               // -clamp (default false)
   char imfchan;  // -imfchan (the default for bump is 'l' and for decal is 'm')
   bool blendu;   // -blendu (default on)
   bool blendv;   // -blendv (default on)
@@ -165,9 +179,9 @@ typedef struct {
   // extension
   std::string colorspace;  // Explicitly specify color space of stored texel
                            // value. Usually `sRGB` or `linear` (default empty).
-} texture_option_t;
+};
 
-typedef struct _material_t {
+struct material_t {
   std::string name;
 
   real_t ambient[3];
@@ -183,9 +197,9 @@ typedef struct _material_t {
 
   int dummy;  // Suppress padding warning.
 
-  std::string ambient_texname;             // map_Ka
-  std::string diffuse_texname;             // map_Kd
-  std::string specular_texname;            // map_Ks
+  std::string ambient_texname;   // map_Ka. For ambient or ambient occlusion.
+  std::string diffuse_texname;   // map_Kd
+  std::string specular_texname;  // map_Ks
   std::string specular_highlight_texname;  // map_Ns
   std::string bump_texname;                // map_bump, map_Bump, bump
   std::string displacement_texname;        // disp
@@ -309,58 +323,68 @@ typedef struct _material_t {
   }
 
 #endif
+};
 
-} material_t;
-
-typedef struct {
+struct tag_t {
   std::string name;
 
   std::vector<int> intValues;
   std::vector<real_t> floatValues;
   std::vector<std::string> stringValues;
-} tag_t;
+};
+
+struct joint_and_weight_t {
+  int joint_id;
+  real_t weight;
+};
+
+struct skin_weight_t {
+  int vertex_id;  // Corresponding vertex index in `attrib_t::vertices`.
+                  // Compared to `index_t`, this index must be positive and
+                  // start with 0(does not allow relative indexing)
+  std::vector<joint_and_weight_t> weightValues;
+};
 
 // Index struct to support different indices for vtx/normal/texcoord.
 // -1 means not used.
-typedef struct {
+struct index_t {
   int vertex_index;
   int normal_index;
   int texcoord_index;
-} index_t;
+};
 
-typedef struct {
+struct mesh_t {
   std::vector<index_t> indices;
-  std::vector<unsigned char>
+  std::vector<unsigned int>
       num_face_vertices;          // The number of vertices per
-                                  // face. 3 = triangle, 4 = quad,
-                                  // ... Up to 255 vertices per face.
+                                  // face. 3 = triangle, 4 = quad, ...
   std::vector<int> material_ids;  // per-face material ID
   std::vector<unsigned int> smoothing_group_ids;  // per-face smoothing group
                                                   // ID(0 = off. positive value
                                                   // = group id)
   std::vector<tag_t> tags;                        // SubD tag
-} mesh_t;
+};
 
-// typedef struct {
+// struct path_t {
 //  std::vector<int> indices;  // pairs of indices for lines
-//} path_t;
+//};
 
-typedef struct {
+struct lines_t {
   // Linear flattened indices.
   std::vector<index_t> indices;        // indices for vertices(poly lines)
   std::vector<int> num_line_vertices;  // The number of vertices per line.
-} lines_t;
+};
 
-typedef struct {
+struct points_t {
   std::vector<index_t> indices;  // indices for points
-} points_t;
+};
 
-typedef struct {
+struct shape_t {
   std::string name;
   mesh_t mesh;
   lines_t lines;
   points_t points;
-} shape_t;
+};
 
 // Vertex attributes
 struct attrib_t {
@@ -376,6 +400,16 @@ struct attrib_t {
   std::vector<real_t> texcoord_ws;  // 'vt'(w)
   std::vector<real_t> colors;       // extension: vertex colors
 
+  //
+  // TinyObj extension.
+  //
+
+  // NOTE(syoyo): array index is based on the appearance order.
+  // To get a corresponding skin weight for a specific vertex id `vid`,
+  // Need to reconstruct a look up table: `skin_weight_t::vertex_id` == `vid`
+  // (e.g. using std::map, std::unordered_map)
+  std::vector<skin_weight_t> skin_weights;
+
   attrib_t() {}
 
   //
@@ -386,9 +420,11 @@ struct attrib_t {
   const std::vector<real_t> &GetVertexWeights() const { return vertex_weights; }
 };
 
-typedef struct callback_t_ {
+struct callback_t {
   // W is optional and set to 1 if there is no `w` item in `v` line
   void (*vertex_cb)(void *user_data, real_t x, real_t y, real_t z, real_t w);
+  void (*vertex_color_cb)(void *user_data, real_t x, real_t y, real_t z,
+                          real_t r, real_t g, real_t b, bool has_color);
   void (*normal_cb)(void *user_data, real_t x, real_t y, real_t z);
 
   // y and z are optional and set to 0 if there is no `y` and/or `z` item(s) in
@@ -410,8 +446,9 @@ typedef struct callback_t_ {
   void (*group_cb)(void *user_data, const char **names, int num_names);
   void (*object_cb)(void *user_data, const char *name);
 
-  callback_t_()
+  callback_t()
       : vertex_cb(NULL),
+        vertex_color_cb(NULL),
         normal_cb(NULL),
         texcoord_cb(NULL),
         index_cb(NULL),
@@ -419,7 +456,7 @@ typedef struct callback_t_ {
         mtllib_cb(NULL),
         group_cb(NULL),
         object_cb(NULL) {}
-} callback_t;
+};
 
 class MaterialReader {
  public:
@@ -440,11 +477,11 @@ class MaterialFileReader : public MaterialReader {
   // Path could contain separator(';' in Windows, ':' in Posix)
   explicit MaterialFileReader(const std::string &mtl_basedir)
       : m_mtlBaseDir(mtl_basedir) {}
-  virtual ~MaterialFileReader() {}
+  virtual ~MaterialFileReader() TINYOBJ_OVERRIDE {}
   virtual bool operator()(const std::string &matId,
                           std::vector<material_t> *materials,
                           std::map<std::string, int> *matMap, std::string *warn,
-                          std::string *err);
+                          std::string *err) TINYOBJ_OVERRIDE;
 
  private:
   std::string m_mtlBaseDir;
@@ -457,11 +494,11 @@ class MaterialStreamReader : public MaterialReader {
  public:
   explicit MaterialStreamReader(std::istream &inStream)
       : m_inStream(inStream) {}
-  virtual ~MaterialStreamReader() {}
+  virtual ~MaterialStreamReader() TINYOBJ_OVERRIDE {}
   virtual bool operator()(const std::string &matId,
                           std::vector<material_t> *materials,
                           std::map<std::string, int> *matMap, std::string *warn,
-                          std::string *err);
+                          std::string *err) TINYOBJ_OVERRIDE;
 
  private:
   std::istream &m_inStream;
@@ -471,6 +508,11 @@ class MaterialStreamReader : public MaterialReader {
 struct ObjReaderConfig {
   bool triangulate;  // triangulate polygon?
 
+  // Currently not used.
+  // "simple" or empty: Create triangle fan
+  // "earcut": Use the algorithm based on Ear clipping
+  std::string triangulation_method;
+
   /// Parse vertex color.
   /// If vertex color is not present, its filled with default value.
   /// false = no vertex color
@@ -484,7 +526,8 @@ struct ObjReaderConfig {
   ///
   std::string mtl_search_path;
 
-  ObjReaderConfig() : triangulate(true), vertex_color(true) {}
+  ObjReaderConfig()
+      : triangulate(true), triangulation_method("simple"), vertex_color(true) {}
 };
 
 ///
@@ -493,7 +536,6 @@ struct ObjReaderConfig {
 class ObjReader {
  public:
   ObjReader() : valid_(false) {}
-  ~ObjReader() {}
 
   ///
   /// Load .obj and .mtl from a file.
@@ -614,267 +656,5469 @@ bool ParseTextureNameAndOption(std::string *texname, texture_option_t *texopt,
 #ifdef TINYOBJLOADER_IMPLEMENTATION
 #include <cassert>
 #include <cctype>
+#include <climits>
 #include <cmath>
 #include <cstddef>
+#include <cstdint>
+#include <cerrno>
 #include <cstdlib>
 #include <cstring>
+#include <fstream>
 #include <limits>
-#include <utility>
 
-#include <fstream>
-#include <sstream>
+#ifdef _WIN32
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#endif
 
-namespace tinyobj {
+#ifdef TINYOBJLOADER_USE_MMAP
+#if !defined(_WIN32)
+// POSIX headers for mmap
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#endif
+#endif  // TINYOBJLOADER_USE_MMAP
+#include <set>
+#include <sstream>
+#include <utility>
 
-MaterialReader::~MaterialReader() {}
+#ifdef TINYOBJLOADER_USE_MAPBOX_EARCUT
 
-struct vertex_index_t {
-  int v_idx, vt_idx, vn_idx;
-  vertex_index_t() : v_idx(-1), vt_idx(-1), vn_idx(-1) {}
-  explicit vertex_index_t(int idx) : v_idx(idx), vt_idx(idx), vn_idx(idx) {}
-  vertex_index_t(int vidx, int vtidx, int vnidx)
-      : v_idx(vidx), vt_idx(vtidx), vn_idx(vnidx) {}
-};
+#ifdef TINYOBJLOADER_DONOT_INCLUDE_MAPBOX_EARCUT
+// Assume earcut.hpp is included outside of tiny_obj_loader.h
+#else
 
-// Internal data structure for face representation
-// index + smoothing group.
-struct face_t {
-  unsigned int
-      smoothing_group_id;  // smoothing group id. 0 = smoothing groupd is off.
-  int pad_;
-  std::vector<vertex_index_t> vertex_indices;  // face vertex indices.
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Weverything"
+#endif
 
-  face_t() : smoothing_group_id(0), pad_(0) {}
-};
+#include <array>
 
-// Internal data structure for line representation
-struct __line_t {
-  // l v1/vt1 v2/vt2 ...
-  // In the specification, line primitrive does not have normal index, but
-  // TinyObjLoader allow it
-  std::vector<vertex_index_t> vertex_indices;
-};
+#include "mapbox/earcut.hpp"
 
-// Internal data structure for points representation
-struct __points_t {
-  // p v1 v2 ...
-  // In the specification, point primitrive does not have normal index and
-  // texture coord index, but TinyObjLoader allow it.
-  std::vector<vertex_index_t> vertex_indices;
-};
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
 
-struct tag_sizes {
-  tag_sizes() : num_ints(0), num_reals(0), num_strings(0) {}
-  int num_ints;
-  int num_reals;
-  int num_strings;
-};
+#endif
 
-struct obj_shape {
-  std::vector<real_t> v;
-  std::vector<real_t> vn;
-  std::vector<real_t> vt;
-};
+#endif  // TINYOBJLOADER_USE_MAPBOX_EARCUT
 
-//
-// Manages group of primitives(face, line, points, ...)
-struct PrimGroup {
-  std::vector<face_t> faceGroup;
-  std::vector<__line_t> lineGroup;
-  std::vector<__points_t> pointsGroup;
+#ifdef _WIN32
+// Converts a UTF-8 encoded string to a UTF-16 wide string for use with
+// Windows file APIs that support Unicode paths (including paths longer than
+// MAX_PATH when combined with the extended-length path prefix).
+static std::wstring UTF8ToWchar(const std::string &str) {
+  if (str.empty()) return std::wstring();
+  int size_needed =
+      MultiByteToWideChar(CP_UTF8, 0, str.c_str(),
+                          static_cast<int>(str.size()), NULL, 0);
+  if (size_needed == 0) return std::wstring();
+  std::wstring wstr(static_cast<size_t>(size_needed), L'\0');
+  int result =
+      MultiByteToWideChar(CP_UTF8, 0, str.c_str(),
+                          static_cast<int>(str.size()), &wstr[0], size_needed);
+  if (result == 0) return std::wstring();
+  return wstr;
+}
 
-  void clear() {
-    faceGroup.clear();
-    lineGroup.clear();
-    pointsGroup.clear();
+// Prepends the Windows extended-length path prefix ("\\?\") to an absolute
+// path when the path length meets or exceeds MAX_PATH (260 characters).
+// This allows Windows APIs to handle paths up to 32767 characters long.
+// UNC paths (starting with "\\") are converted to "\\?\UNC\" form.
+static std::wstring LongPathW(const std::wstring &wpath) {
+  const std::wstring kLongPathPrefix = L"\\\\?\\";
+  const std::wstring kUNCPrefix = L"\\\\";
+  const std::wstring kLongUNCPathPrefix = L"\\\\?\\UNC\\";
+
+  // Already has the extended-length prefix; return as-is.
+  if (wpath.size() >= kLongPathPrefix.size() &&
+      wpath.substr(0, kLongPathPrefix.size()) == kLongPathPrefix) {
+    return wpath;
   }
 
-  bool IsEmpty() const {
-    return faceGroup.empty() && lineGroup.empty() && pointsGroup.empty();
+  // Only add the prefix when the path is long enough to require it.
+  if (wpath.size() < MAX_PATH) {
+    return wpath;
   }
 
-  // TODO(syoyo): bspline, surface, ...
-};
-
-// See
-// http://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf
-static std::istream &safeGetline(std::istream &is, std::string &t) {
-  t.clear();
-
-  // The characters in the stream are read one-by-one using a std::streambuf.
-  // That is faster than reading them one-by-one using the std::istream.
-  // Code that uses streambuf this way must be guarded by a sentry object.
-  // The sentry object performs various tasks,
-  // such as thread synchronization and updating the stream state.
+  // Normalize forward slashes to backslashes: the extended-length "\\?\"
+  // prefix requires backslash separators only.
+  std::wstring normalized = wpath;
+  for (std::wstring::size_type i = 0; i < normalized.size(); ++i) {
+    if (normalized[i] == L'/') normalized[i] = L'\\';
+  }
 
-  std::istream::sentry se(is, true);
-  std::streambuf *sb = is.rdbuf();
+  // UNC path: "\\server\share\..." -> "\\?\UNC\server\share\..."
+  if (normalized.size() >= kUNCPrefix.size() &&
+      normalized.substr(0, kUNCPrefix.size()) == kUNCPrefix) {
+    return kLongUNCPathPrefix + normalized.substr(kUNCPrefix.size());
+  }
 
-  if (se) {
-    for (;;) {
-      int c = sb->sbumpc();
-      switch (c) {
-        case '\n':
-          return is;
-        case '\r':
-          if (sb->sgetc() == '\n') sb->sbumpc();
-          return is;
-        case EOF:
-          // Also handle the case when the last line has no line ending
-          if (t.empty()) is.setstate(std::ios::eofbit);
-          return is;
-        default:
-          t += static_cast<char>(c);
-      }
-    }
+  // Absolute path with drive letter: "C:\..." -> "\\?\C:\..."
+  if (normalized.size() >= 2 && normalized[1] == L':') {
+    return kLongPathPrefix + normalized;
   }
 
-  return is;
+  return normalized;
 }
+#endif  // _WIN32
+
+// --------------------------------------------------------------------------
+// Embedded fast_float v8.0.2 for high-performance, bit-exact float parsing.
+// Disable by defining TINYOBJLOADER_DISABLE_FAST_FLOAT before including
+// this file with TINYOBJLOADER_IMPLEMENTATION.
+// --------------------------------------------------------------------------
+#ifndef TINYOBJLOADER_DISABLE_FAST_FLOAT
+
+// Standard headers needed by the embedded fast_float.
+#include <cfloat>
+#include <cstdint>
+
+namespace tinyobj_ff {
+
+// --- integral_constant, true_type, false_type ---
+template <typename T, T V>
+struct integral_constant {
+  static const T value = V;
+  typedef T value_type;
+  typedef integral_constant type;
+  operator value_type() const { return value; }
+};
+typedef integral_constant<bool, true>  true_type;
+typedef integral_constant<bool, false> false_type;
+
+// --- is_same ---
+template <typename T, typename U> struct is_same       : false_type {};
+template <typename T>             struct is_same<T, T> : true_type  {};
+
+// --- enable_if ---
+template <bool B, typename T = void> struct enable_if {};
+template <typename T>                struct enable_if<true, T> { typedef T type; };
+
+// --- conditional ---
+template <bool B, typename T, typename F> struct conditional              { typedef T type; };
+template <typename T, typename F>         struct conditional<false, T, F> { typedef F type; };
+
+// --- is_integral ---
+template <typename T> struct is_integral : false_type {};
+template <> struct is_integral<bool>               : true_type {};
+template <> struct is_integral<char>               : true_type {};
+template <> struct is_integral<signed char>        : true_type {};
+template <> struct is_integral<unsigned char>      : true_type {};
+template <> struct is_integral<short>              : true_type {};
+template <> struct is_integral<unsigned short>     : true_type {};
+template <> struct is_integral<int>                : true_type {};
+template <> struct is_integral<unsigned int>       : true_type {};
+template <> struct is_integral<long>               : true_type {};
+template <> struct is_integral<unsigned long>      : true_type {};
+template <> struct is_integral<long long>          : true_type {};
+template <> struct is_integral<unsigned long long> : true_type {};
+template <> struct is_integral<wchar_t>            : true_type {};
+template <> struct is_integral<char16_t>           : true_type {};
+template <> struct is_integral<char32_t>           : true_type {};
+
+// --- is_signed ---
+template <typename T> struct is_signed : integral_constant<bool, T(-1) < T(0)> {};
+
+// --- underlying_type (uses compiler builtin) ---
+template <typename T> struct underlying_type {
+  typedef __underlying_type(T) type;
+};
 
-#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t'))
-#define IS_DIGIT(x) \
-  (static_cast<unsigned int>((x) - '0') < static_cast<unsigned int>(10))
-#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0'))
-
-// Make index zero-base, and also support relative index.
-static inline bool fixIndex(int idx, int n, int *ret) {
-  if (!ret) {
-    return false;
-  }
-
-  if (idx > 0) {
-    (*ret) = idx - 1;
-    return true;
-  }
+// --- ff_errc (replaces std::errc, our own enum - no system_error needed) ---
+enum class ff_errc { ok = 0, invalid_argument = 22, result_out_of_range = 34 };
 
-  if (idx == 0) {
-    // zero is not allowed according to the spec.
-    return false;
-  }
+// --- min_val (replaces std::min, avoids Windows min/max macro conflicts) ---
+template <typename T>
+inline T min_val(T a, T b) { return (b < a) ? b : a; }
 
-  if (idx < 0) {
-    (*ret) = n + idx;  // negative value = relative
-    return true;
-  }
+// --- copy_n ---
+template <typename InputIt, typename Size, typename OutputIt>
+inline OutputIt copy_n(InputIt first, Size count, OutputIt result) {
+  for (Size i = 0; i < count; ++i) *result++ = *first++;
+  return result;
+}
 
-  return false;  // never reach here.
+// --- copy_backward ---
+template <typename BidirIt1, typename BidirIt2>
+inline BidirIt2 copy_backward(BidirIt1 first, BidirIt1 last, BidirIt2 d_last) {
+  while (first != last) *(--d_last) = *(--last);
+  return d_last;
 }
 
-static inline std::string parseString(const char **token) {
-  std::string s;
-  (*token) += strspn((*token), " \t");
-  size_t e = strcspn((*token), " \t\r");
-  s = std::string((*token), &(*token)[e]);
-  (*token) += e;
-  return s;
+// --- fill ---
+template <typename ForwardIt, typename T>
+inline void fill(ForwardIt first, ForwardIt last, const T &value) {
+  for (; first != last; ++first) *first = value;
 }
 
-static inline int parseInt(const char **token) {
-  (*token) += strspn((*token), " \t");
-  int i = atoi((*token));
-  (*token) += strcspn((*token), " \t\r");
-  return i;
+// --- distance ---
+template <typename It>
+inline typename conditional<true, long long, It>::type
+distance(It first, It last) {
+  return last - first;
 }
 
-// Tries to parse a floating point number located at s.
+}  // namespace tinyobj_ff
+
+// --- Begin embedded fast_float v8.0.2 (MIT / Apache-2.0 / BSL-1.0) ---
+// https://github.com/fastfloat/fast_float
+// fast_float by Daniel Lemire
+// fast_float by João Paulo Magalhaes
 //
-// s_end should be a location in the string where reading should absolutely
-// stop. For example at the end of the string, to prevent buffer overflows.
 //
-// Parses the following EBNF grammar:
-//   sign    = "+" | "-" ;
-//   END     = ? anything not in digit ?
-//   digit   = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
-//   integer = [sign] , digit , {digit} ;
-//   decimal = integer , ["." , integer] ;
-//   float   = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ;
+// with contributions from Eugene Golushkov
+// with contributions from Maksim Kita
+// with contributions from Marcin Wojdyr
+// with contributions from Neal Richardson
+// with contributions from Tim Paine
+// with contributions from Fabio Pellacini
+// with contributions from Lénárd Szolnoki
+// with contributions from Jan Pharago
+// with contributions from Maya Warrier
+// with contributions from Taha Khokhar
+// with contributions from Anders Dalvander
 //
-//  Valid strings are for example:
-//   -0  +3.1417e+2  -0.0E-3  1.0324  -1.41   11e2
 //
-// If the parsing is a success, result is set to the parsed value and true
-// is returned.
+// Licensed under the Apache License, Version 2.0, or the
+// MIT License or the Boost License. This file may not be copied,
+// modified, or distributed except according to those terms.
 //
-// The function is greedy and will parse until any of the following happens:
-//  - a non-conforming character is encountered.
-//  - s_end is reached.
+// MIT License Notice
 //
-// The following situations triggers a failure:
-//  - s >= s_end.
-//  - parse failure.
+//    MIT License
+//
+//    Copyright (c) 2021 The fast_float authors
+//
+//    Permission is hereby granted, free of charge, to any
+//    person obtaining a copy of this software and associated
+//    documentation files (the "Software"), to deal in the
+//    Software without restriction, including without
+//    limitation the rights to use, copy, modify, merge,
+//    publish, distribute, sublicense, and/or sell copies of
+//    the Software, and to permit persons to whom the Software
+//    is furnished to do so, subject to the following
+//    conditions:
+//
+//    The above copyright notice and this permission notice
+//    shall be included in all copies or substantial portions
+//    of the Software.
+//
+//    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+//    ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+//    TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+//    PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+//    SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+//    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+//    OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+//    IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+//    DEALINGS IN THE SOFTWARE.
+//
+// Apache License (Version 2.0) Notice
+//
+//    Copyright 2021 The fast_float authors
+//    Licensed under the Apache License, Version 2.0 (the "License");
+//    you may not use this file except in compliance with the License.
+//    You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//    Unless required by applicable law or agreed to in writing, software
+//    distributed under the License is distributed on an "AS IS" BASIS,
+//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//    See the License for the specific language governing permissions and
+//
+// BOOST License Notice
+//
+//    Boost Software License - Version 1.0 - August 17th, 2003
+//
+//    Permission is hereby granted, free of charge, to any person or organization
+//    obtaining a copy of the software and accompanying documentation covered by
+//    this license (the "Software") to use, reproduce, display, distribute,
+//    execute, and transmit the Software, and to prepare derivative works of the
+//    Software, and to permit third-parties to whom the Software is furnished to
+//    do so, all subject to the following:
+//
+//    The copyright notices in the Software and this entire statement, including
+//    the above license grant, this restriction and the following disclaimer,
+//    must be included in all copies of the Software, in whole or in part, and
+//    all derivative works of the Software, unless such copies or derivative
+//    works are solely in the form of machine-executable object code generated by
+//    a source language processor.
+//
+//    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//    FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+//    SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+//    FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+//    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+//    DEALINGS IN THE SOFTWARE.
 //
-static bool tryParseDouble(const char *s, const char *s_end, double *result) {
-  if (s >= s_end) {
-    return false;
-  }
 
-  double mantissa = 0.0;
-  // This exponent is base 2 rather than 10.
-  // However the exponent we parse is supposed to be one of ten,
-  // thus we must take care to convert the exponent/and or the
-  // mantissa to a * 2^E, where a is the mantissa and E is the
-  // exponent.
-  // To get the final double we will use ldexp, it requires the
-  // exponent to be in base 2.
-  int exponent = 0;
+#ifndef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+#define FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
 
-  // NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED
-  // TO JUMP OVER DEFINITIONS.
-  char sign = '+';
-  char exp_sign = '+';
-  char const *curr = s;
+#ifdef __has_include
+#if __has_include(<version>)
+#include <version>
+#endif
+#endif
 
-  // How many characters were read in a loop.
-  int read = 0;
-  // Tells whether a loop terminated due to reaching s_end.
-  bool end_not_reached = false;
-  bool leading_decimal_dots = false;
+// Testing for https://wg21.link/N3652, adopted in C++14
+#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304
+#define FASTFLOAT_CONSTEXPR14 constexpr
+#else
+#define FASTFLOAT_CONSTEXPR14
+#endif
 
-  /*
-          BEGIN PARSING.
-  */
+#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L
+#define FASTFLOAT_HAS_BIT_CAST 1
+#else
+#define FASTFLOAT_HAS_BIT_CAST 0
+#endif
 
-  // Find out what sign we've got.
-  if (*curr == '+' || *curr == '-') {
-    sign = *curr;
-    curr++;
-    if ((curr != s_end) && (*curr == '.')) {
-      // accept. Somethig like `.7e+2`, `-.5234`
-      leading_decimal_dots = true;
-    }
-  } else if (IS_DIGIT(*curr)) { /* Pass through. */
-  } else if (*curr == '.') {
-    // accept. Somethig like `.7e+2`, `-.5234`
-    leading_decimal_dots = true;
-  } else {
-    goto fail;
-  }
+#if defined(__cpp_lib_is_constant_evaluated) &&                                \
+    __cpp_lib_is_constant_evaluated >= 201811L
+#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1
+#else
+#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0
+#endif
 
-  // Read the integer part.
-  end_not_reached = (curr != s_end);
-  if (!leading_decimal_dots) {
-    while (end_not_reached && IS_DIGIT(*curr)) {
-      mantissa *= 10;
-      mantissa += static_cast<int>(*curr - 0x30);
-      curr++;
-      read++;
-      end_not_reached = (curr != s_end);
-    }
+#if defined(__cpp_if_constexpr) && __cpp_if_constexpr >= 201606L
+#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
+#else
+#define FASTFLOAT_IF_CONSTEXPR17(x) if (x)
+#endif
 
-    // We must make sure we actually got something.
-    if (read == 0) goto fail;
-  }
+// Testing for relevant C++20 constexpr library features
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST &&           \
+    defined(__cpp_lib_constexpr_algorithms) &&                                 \
+    __cpp_lib_constexpr_algorithms >= 201806L /*For std::copy and std::fill*/
+#define FASTFLOAT_CONSTEXPR20 constexpr
+#define FASTFLOAT_IS_CONSTEXPR 1
+#else
+#define FASTFLOAT_CONSTEXPR20
+#define FASTFLOAT_IS_CONSTEXPR 0
+#endif
 
-  // We allow numbers of form "#", "###" etc.
-  if (!end_not_reached) goto assemble;
+#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 0
+#else
+#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 1
+#endif
 
-  // Read the decimal part.
-  if (*curr == '.') {
-    curr++;
-    read = 1;
-    end_not_reached = (curr != s_end);
-    while (end_not_reached && IS_DIGIT(*curr)) {
+#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+
+#ifndef FASTFLOAT_FLOAT_COMMON_H
+#define FASTFLOAT_FLOAT_COMMON_H
+
+#include <cassert>
+#include <cstring>
+#include <limits>
+#ifdef __has_include
+#if __has_include(<stdfloat>) && (__cplusplus > 202002L || (defined(_MSVC_LANG) && (_MSVC_LANG > 202002L)))
+#include <stdfloat>
+#endif
+#endif
+
+#define FASTFLOAT_VERSION_MAJOR 8
+#define FASTFLOAT_VERSION_MINOR 0
+#define FASTFLOAT_VERSION_PATCH 2
+
+#define FASTFLOAT_STRINGIZE_IMPL(x) #x
+#define FASTFLOAT_STRINGIZE(x) FASTFLOAT_STRINGIZE_IMPL(x)
+
+#define FASTFLOAT_VERSION_STR                                                  \
+  FASTFLOAT_STRINGIZE(FASTFLOAT_VERSION_MAJOR)                                 \
+  "." FASTFLOAT_STRINGIZE(FASTFLOAT_VERSION_MINOR) "." FASTFLOAT_STRINGIZE(    \
+      FASTFLOAT_VERSION_PATCH)
+
+#define FASTFLOAT_VERSION                                                      \
+  (FASTFLOAT_VERSION_MAJOR * 10000 + FASTFLOAT_VERSION_MINOR * 100 +           \
+   FASTFLOAT_VERSION_PATCH)
+
+namespace fast_float {
+
+enum class chars_format : uint64_t;
+
+namespace detail {
+constexpr chars_format basic_json_fmt = chars_format(1 << 5);
+constexpr chars_format basic_fortran_fmt = chars_format(1 << 6);
+} // namespace detail
+
+enum class chars_format : uint64_t {
+  scientific = 1 << 0,
+  fixed = 1 << 2,
+  hex = 1 << 3,
+  no_infnan = 1 << 4,
+  // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6
+  json = uint64_t(detail::basic_json_fmt) | fixed | scientific | no_infnan,
+  // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed.
+  json_or_infnan = uint64_t(detail::basic_json_fmt) | fixed | scientific,
+  fortran = uint64_t(detail::basic_fortran_fmt) | fixed | scientific,
+  general = fixed | scientific,
+  allow_leading_plus = 1 << 7,
+  skip_white_space = 1 << 8,
+};
+
+template <typename UC> struct from_chars_result_t {
+  UC const *ptr;
+  tinyobj_ff::ff_errc ec;
+};
+
+using from_chars_result = from_chars_result_t<char>;
+
+template <typename UC> struct parse_options_t {
+  constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
+                                     UC dot = UC('.'), int b = 10)
+      : format(fmt), decimal_point(dot), base(b) {}
+
+  /** Which number formats are accepted */
+  chars_format format;
+  /** The character used as decimal point */
+  UC decimal_point;
+  /** The base used for integers */
+  int base;
+};
+
+using parse_options = parse_options_t<char>;
+
+} // namespace fast_float
+
+#if FASTFLOAT_HAS_BIT_CAST
+#include <bit>
+#endif
+
+#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) ||            \
+     defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) ||          \
+     defined(__MINGW64__) || defined(__s390x__) ||                             \
+     (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) ||      \
+      defined(__PPC64LE__)) ||                                                 \
+     defined(__loongarch64))
+#define FASTFLOAT_64BIT 1
+#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) ||             \
+       defined(__arm__) || defined(_M_ARM) || defined(__ppc__) ||              \
+       defined(__MINGW32__) || defined(__EMSCRIPTEN__))
+#define FASTFLOAT_32BIT 1
+#else
+  // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow.
+// We can never tell the register width, but the SIZE_MAX is a good
+// approximation. UINTPTR_MAX and INTPTR_MAX are optional, so avoid them for max
+// portability.
+#if SIZE_MAX == 0xffff
+#error Unknown platform (16-bit, unsupported)
+#elif SIZE_MAX == 0xffffffff
+#define FASTFLOAT_32BIT 1
+#elif SIZE_MAX == 0xffffffffffffffff
+#define FASTFLOAT_64BIT 1
+#else
+#error Unknown platform (not 32-bit, not 64-bit?)
+#endif
+#endif
+
+#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__)) ||           \
+    (defined(_M_ARM64) && !defined(__MINGW32__))
+#include <intrin.h>
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define FASTFLOAT_VISUAL_STUDIO 1
+#endif
+
+#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
+#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined _WIN32
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#else
+#if defined(__APPLE__) || defined(__FreeBSD__)
+#include <machine/endian.h>
+#elif defined(sun) || defined(__sun)
+#include <sys/byteorder.h>
+#elif defined(__MVS__)
+#include <sys/endian.h>
+#else
+#ifdef __has_include
+#if __has_include(<endian.h>)
+#include <endian.h>
+#endif //__has_include(<endian.h>)
+#endif //__has_include
+#endif
+#
+#ifndef __BYTE_ORDER__
+// safe choice
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#endif
+#
+#ifndef __ORDER_LITTLE_ENDIAN__
+// safe choice
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#endif
+#
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define FASTFLOAT_IS_BIG_ENDIAN 0
+#else
+#define FASTFLOAT_IS_BIG_ENDIAN 1
+#endif
+#endif
+
+#if defined(__SSE2__) || (defined(FASTFLOAT_VISUAL_STUDIO) &&                  \
+                          (defined(_M_AMD64) || defined(_M_X64) ||             \
+                           (defined(_M_IX86_FP) && _M_IX86_FP == 2)))
+#define FASTFLOAT_SSE2 1
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define FASTFLOAT_NEON 1
+#endif
+
+#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_NEON)
+#define FASTFLOAT_HAS_SIMD 1
+#endif
+
+#if defined(__GNUC__)
+// disable -Wcast-align=strict (GCC only)
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS                                        \
+  _Pragma("GCC diagnostic push")                                               \
+      _Pragma("GCC diagnostic ignored \"-Wcast-align\"")
+#else
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS
+#endif
+
+#if defined(__GNUC__)
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS _Pragma("GCC diagnostic pop")
+#else
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
+
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#define fastfloat_really_inline __forceinline
+#else
+#define fastfloat_really_inline inline __attribute__((always_inline))
+#endif
+
+#ifndef FASTFLOAT_ASSERT
+#define FASTFLOAT_ASSERT(x)                                                    \
+  { ((void)(x)); }
+#endif
+
+#ifndef FASTFLOAT_DEBUG_ASSERT
+#define FASTFLOAT_DEBUG_ASSERT(x)                                              \
+  { ((void)(x)); }
+#endif
+
+// rust style `try!()` macro, or `?` operator
+#define FASTFLOAT_TRY(x)                                                       \
+  {                                                                            \
+    if (!(x))                                                                  \
+      return false;                                                            \
+  }
+
+#define FASTFLOAT_ENABLE_IF(...)                                               \
+  typename tinyobj_ff::enable_if<(__VA_ARGS__), int>::type
+
+namespace fast_float {
+
+fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED
+  return std::is_constant_evaluated();
+#else
+  return false;
+#endif
+}
+
+template <typename T>
+struct is_supported_float_type
+    : tinyobj_ff::integral_constant<
+          bool, tinyobj_ff::is_same<T, double>::value || tinyobj_ff::is_same<T, float>::value
+#ifdef __STDCPP_FLOAT64_T__
+                    || tinyobj_ff::is_same<T, std::float64_t>::value
+#endif
+#ifdef __STDCPP_FLOAT32_T__
+                    || tinyobj_ff::is_same<T, std::float32_t>::value
+#endif
+#ifdef __STDCPP_FLOAT16_T__
+                    || tinyobj_ff::is_same<T, std::float16_t>::value
+#endif
+#ifdef __STDCPP_BFLOAT16_T__
+                    || tinyobj_ff::is_same<T, std::bfloat16_t>::value
+#endif
+          > {
+};
+
+template <typename T>
+using equiv_uint_t = typename tinyobj_ff::conditional<
+    sizeof(T) == 1, uint8_t,
+    typename tinyobj_ff::conditional<
+        sizeof(T) == 2, uint16_t,
+        typename tinyobj_ff::conditional<sizeof(T) == 4, uint32_t,
+                                  uint64_t>::type>::type>::type;
+
+template <typename T> struct is_supported_integer_type : tinyobj_ff::is_integral<T> {};
+
+template <typename UC>
+struct is_supported_char_type
+    : tinyobj_ff::integral_constant<bool, tinyobj_ff::is_same<UC, char>::value ||
+                                       tinyobj_ff::is_same<UC, wchar_t>::value ||
+                                       tinyobj_ff::is_same<UC, char16_t>::value ||
+                                       tinyobj_ff::is_same<UC, char32_t>::value
+#ifdef __cpp_char8_t
+                                       || tinyobj_ff::is_same<UC, char8_t>::value
+#endif
+                             > {
+};
+
+// Compares two ASCII strings in a case insensitive manner.
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR14 bool
+fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase,
+                      size_t length) {
+  for (size_t i = 0; i < length; ++i) {
+    UC const actual = actual_mixedcase[i];
+    if ((actual < 256 ? actual | 32 : actual) != expected_lowercase[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+#ifndef FLT_EVAL_METHOD
+#error "FLT_EVAL_METHOD should be defined, please include cfloat."
+#endif
+
+// a pointer and a length to a contiguous block of memory
+template <typename T> struct span {
+  T const *ptr;
+  size_t length;
+
+  constexpr span(T const *_ptr, size_t _length) : ptr(_ptr), length(_length) {}
+
+  constexpr span() : ptr(nullptr), length(0) {}
+
+  constexpr size_t len() const noexcept { return length; }
+
+  FASTFLOAT_CONSTEXPR14 const T &operator[](size_t index) const noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    return ptr[index];
+  }
+};
+
+struct value128 {
+  uint64_t low;
+  uint64_t high;
+
+  constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {}
+
+  constexpr value128() : low(0), high(0) {}
+};
+
+/* Helper C++14 constexpr generic implementation of leading_zeroes */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int
+leading_zeroes_generic(uint64_t input_num, int last_bit = 0) {
+  if (input_num & uint64_t(0xffffffff00000000)) {
+    input_num >>= 32;
+    last_bit |= 32;
+  }
+  if (input_num & uint64_t(0xffff0000)) {
+    input_num >>= 16;
+    last_bit |= 16;
+  }
+  if (input_num & uint64_t(0xff00)) {
+    input_num >>= 8;
+    last_bit |= 8;
+  }
+  if (input_num & uint64_t(0xf0)) {
+    input_num >>= 4;
+    last_bit |= 4;
+  }
+  if (input_num & uint64_t(0xc)) {
+    input_num >>= 2;
+    last_bit |= 2;
+  }
+  if (input_num & uint64_t(0x2)) { /* input_num >>=  1; */
+    last_bit |= 1;
+  }
+  return 63 - last_bit;
+}
+
+/* result might be undefined when input_num is zero */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int
+leading_zeroes(uint64_t input_num) {
+  assert(input_num > 0);
+  if (cpp20_and_in_constexpr()) {
+    return leading_zeroes_generic(input_num);
+  }
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#if defined(_M_X64) || defined(_M_ARM64)
+  unsigned long leading_zero = 0;
+  // Search the mask data from most significant bit (MSB)
+  // to least significant bit (LSB) for a set bit (1).
+  _BitScanReverse64(&leading_zero, input_num);
+  return (int)(63 - leading_zero);
+#else
+  return leading_zeroes_generic(input_num);
+#endif
+#else
+  return __builtin_clzll(input_num);
+#endif
+}
+
+// slow emulation routine for 32-bit
+fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) {
+  return x * (uint64_t)y;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
+umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd);
+  uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd);
+  uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32));
+  uint64_t adbc_carry = (uint64_t)(adbc < ad);
+  uint64_t lo = bd + (adbc << 32);
+  *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
+        (adbc_carry << 32) + (uint64_t)(lo < bd);
+  return lo;
+}
+
+#ifdef FASTFLOAT_32BIT
+
+// slow emulation routine for 32-bit
+#if !defined(__MINGW64__)
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t _umul128(uint64_t ab,
+                                                                uint64_t cd,
+                                                                uint64_t *hi) {
+  return umul128_generic(ab, cd, hi);
+}
+#endif // !__MINGW64__
+
+#endif // FASTFLOAT_32BIT
+
+// compute 64-bit a*b
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128
+full_multiplication(uint64_t a, uint64_t b) {
+  if (cpp20_and_in_constexpr()) {
+    value128 answer;
+    answer.low = umul128_generic(a, b, &answer.high);
+    return answer;
+  }
+  value128 answer;
+#if defined(_M_ARM64) && !defined(__MINGW32__)
+  // ARM64 has native support for 64-bit multiplications, no need to emulate
+  // But MinGW on ARM64 doesn't have native support for 64-bit multiplications
+  answer.high = __umulh(a, b);
+  answer.low = a * b;
+#elif defined(FASTFLOAT_32BIT) ||                                              \
+    (defined(_WIN64) && !defined(__clang__) && !defined(_M_ARM64))
+  answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64
+#elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__)
+  __uint128_t r = ((__uint128_t)a) * b;
+  answer.low = uint64_t(r);
+  answer.high = uint64_t(r >> 64);
+#else
+  answer.low = umul128_generic(a, b, &answer.high);
+#endif
+  return answer;
+}
+
+struct adjusted_mantissa {
+  uint64_t mantissa{0};
+  int32_t power2{0}; // a negative value indicates an invalid result
+  adjusted_mantissa() = default;
+
+  constexpr bool operator==(adjusted_mantissa const &o) const {
+    return mantissa == o.mantissa && power2 == o.power2;
+  }
+
+  constexpr bool operator!=(adjusted_mantissa const &o) const {
+    return mantissa != o.mantissa || power2 != o.power2;
+  }
+};
+
+// Bias so we can get the real exponent with an invalid adjusted_mantissa.
+constexpr static int32_t invalid_am_bias = -0x8000;
+
+// used for binary_format_lookup_tables<T>::max_mantissa
+constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5;
+
+template <typename T, typename U = void> struct binary_format_lookup_tables;
+
+template <typename T> struct binary_format : binary_format_lookup_tables<T> {
+  using equiv_uint = equiv_uint_t<T>;
+
+  static constexpr int mantissa_explicit_bits();
+  static constexpr int minimum_exponent();
+  static constexpr int infinite_power();
+  static constexpr int sign_index();
+  static constexpr int
+  min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST
+  static constexpr int max_exponent_fast_path();
+  static constexpr int max_exponent_round_to_even();
+  static constexpr int min_exponent_round_to_even();
+  static constexpr uint64_t max_mantissa_fast_path(int64_t power);
+  static constexpr uint64_t
+  max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST
+  static constexpr int largest_power_of_ten();
+  static constexpr int smallest_power_of_ten();
+  static constexpr T exact_power_of_ten(int64_t power);
+  static constexpr size_t max_digits();
+  static constexpr equiv_uint exponent_mask();
+  static constexpr equiv_uint mantissa_mask();
+  static constexpr equiv_uint hidden_bit_mask();
+};
+
+template <typename U> struct binary_format_lookup_tables<double, U> {
+  static constexpr double powers_of_ten[] = {
+      1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
+      1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
+
+  // Largest integer value v so that (5**index * v) <= 1<<53.
+  // 0x20000000000000 == 1 << 53
+  static constexpr uint64_t max_mantissa[] = {
+      0x20000000000000,
+      0x20000000000000 / 5,
+      0x20000000000000 / (5 * 5),
+      0x20000000000000 / (5 * 5 * 5),
+      0x20000000000000 / (5 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555),
+      0x20000000000000 / (constant_55555 * 5),
+      0x20000000000000 / (constant_55555 * 5 * 5),
+      0x20000000000000 / (constant_55555 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * 5 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555),
+      0x20000000000000 / (constant_55555 * constant_55555 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * 5 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5),
+      0x20000000000000 /
+          (constant_55555 * constant_55555 * constant_55555 * constant_55555),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5 * 5 * 5),
+      0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 *
+                          constant_55555 * 5 * 5 * 5 * 5)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr double binary_format_lookup_tables<double, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t binary_format_lookup_tables<double, U>::max_mantissa[];
+
+#endif
+
+template <typename U> struct binary_format_lookup_tables<float, U> {
+  static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f,
+                                            1e6f, 1e7f, 1e8f, 1e9f, 1e10f};
+
+  // Largest integer value v so that (5**index * v) <= 1<<24.
+  // 0x1000000 == 1<<24
+  static constexpr uint64_t max_mantissa[] = {
+      0x1000000,
+      0x1000000 / 5,
+      0x1000000 / (5 * 5),
+      0x1000000 / (5 * 5 * 5),
+      0x1000000 / (5 * 5 * 5 * 5),
+      0x1000000 / (constant_55555),
+      0x1000000 / (constant_55555 * 5),
+      0x1000000 / (constant_55555 * 5 * 5),
+      0x1000000 / (constant_55555 * 5 * 5 * 5),
+      0x1000000 / (constant_55555 * 5 * 5 * 5 * 5),
+      0x1000000 / (constant_55555 * constant_55555),
+      0x1000000 / (constant_55555 * constant_55555 * 5)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr float binary_format_lookup_tables<float, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t binary_format_lookup_tables<float, U>::max_mantissa[];
+
+#endif
+
+template <>
+inline constexpr int binary_format<double>::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -22;
+#endif
+}
+
+template <>
+inline constexpr int binary_format<float>::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -10;
+#endif
+}
+
+template <>
+inline constexpr int binary_format<double>::mantissa_explicit_bits() {
+  return 52;
+}
+
+template <>
+inline constexpr int binary_format<float>::mantissa_explicit_bits() {
+  return 23;
+}
+
+template <>
+inline constexpr int binary_format<double>::max_exponent_round_to_even() {
+  return 23;
+}
+
+template <>
+inline constexpr int binary_format<float>::max_exponent_round_to_even() {
+  return 10;
+}
+
+template <>
+inline constexpr int binary_format<double>::min_exponent_round_to_even() {
+  return -4;
+}
+
+template <>
+inline constexpr int binary_format<float>::min_exponent_round_to_even() {
+  return -17;
+}
+
+template <> inline constexpr int binary_format<double>::minimum_exponent() {
+  return -1023;
+}
+
+template <> inline constexpr int binary_format<float>::minimum_exponent() {
+  return -127;
+}
+
+template <> inline constexpr int binary_format<double>::infinite_power() {
+  return 0x7FF;
+}
+
+template <> inline constexpr int binary_format<float>::infinite_power() {
+  return 0xFF;
+}
+
+template <> inline constexpr int binary_format<double>::sign_index() {
+  return 63;
+}
+
+template <> inline constexpr int binary_format<float>::sign_index() {
+  return 31;
+}
+
+template <>
+inline constexpr int binary_format<double>::max_exponent_fast_path() {
+  return 22;
+}
+
+template <>
+inline constexpr int binary_format<float>::max_exponent_fast_path() {
+  return 10;
+}
+
+template <>
+inline constexpr uint64_t binary_format<double>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+template <>
+inline constexpr uint64_t binary_format<float>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+// credit: Jakub Jelínek
+#ifdef __STDCPP_FLOAT16_T__
+template <typename U> struct binary_format_lookup_tables<std::float16_t, U> {
+  static constexpr std::float16_t powers_of_ten[] = {1e0f16, 1e1f16, 1e2f16,
+                                                     1e3f16, 1e4f16};
+
+  // Largest integer value v so that (5**index * v) <= 1<<11.
+  // 0x800 == 1<<11
+  static constexpr uint64_t max_mantissa[] = {0x800,
+                                              0x800 / 5,
+                                              0x800 / (5 * 5),
+                                              0x800 / (5 * 5 * 5),
+                                              0x800 / (5 * 5 * 5 * 5),
+                                              0x800 / (constant_55555)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr std::float16_t
+    binary_format_lookup_tables<std::float16_t, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t
+    binary_format_lookup_tables<std::float16_t, U>::max_mantissa[];
+
+#endif
+
+template <>
+inline constexpr std::float16_t
+binary_format<std::float16_t>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <>
+inline constexpr binary_format<std::float16_t>::equiv_uint
+binary_format<std::float16_t>::exponent_mask() {
+  return 0x7C00;
+}
+
+template <>
+inline constexpr binary_format<std::float16_t>::equiv_uint
+binary_format<std::float16_t>::mantissa_mask() {
+  return 0x03FF;
+}
+
+template <>
+inline constexpr binary_format<std::float16_t>::equiv_uint
+binary_format<std::float16_t>::hidden_bit_mask() {
+  return 0x0400;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::max_exponent_fast_path() {
+  return 4;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::mantissa_explicit_bits() {
+  return 10;
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::float16_t>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::float16_t>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 4
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::min_exponent_fast_path() {
+  return 0;
+}
+
+template <>
+inline constexpr int
+binary_format<std::float16_t>::max_exponent_round_to_even() {
+  return 5;
+}
+
+template <>
+inline constexpr int
+binary_format<std::float16_t>::min_exponent_round_to_even() {
+  return -22;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::minimum_exponent() {
+  return -15;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::infinite_power() {
+  return 0x1F;
+}
+
+template <> inline constexpr int binary_format<std::float16_t>::sign_index() {
+  return 15;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::largest_power_of_ten() {
+  return 4;
+}
+
+template <>
+inline constexpr int binary_format<std::float16_t>::smallest_power_of_ten() {
+  return -27;
+}
+
+template <>
+inline constexpr size_t binary_format<std::float16_t>::max_digits() {
+  return 22;
+}
+#endif // __STDCPP_FLOAT16_T__
+
+// credit: Jakub Jelínek
+#ifdef __STDCPP_BFLOAT16_T__
+template <typename U> struct binary_format_lookup_tables<std::bfloat16_t, U> {
+  static constexpr std::bfloat16_t powers_of_ten[] = {1e0bf16, 1e1bf16, 1e2bf16,
+                                                      1e3bf16};
+
+  // Largest integer value v so that (5**index * v) <= 1<<8.
+  // 0x100 == 1<<8
+  static constexpr uint64_t max_mantissa[] = {0x100, 0x100 / 5, 0x100 / (5 * 5),
+                                              0x100 / (5 * 5 * 5),
+                                              0x100 / (5 * 5 * 5 * 5)};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename U>
+constexpr std::bfloat16_t
+    binary_format_lookup_tables<std::bfloat16_t, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t
+    binary_format_lookup_tables<std::bfloat16_t, U>::max_mantissa[];
+
+#endif
+
+template <>
+inline constexpr std::bfloat16_t
+binary_format<std::bfloat16_t>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::max_exponent_fast_path() {
+  return 3;
+}
+
+template <>
+inline constexpr binary_format<std::bfloat16_t>::equiv_uint
+binary_format<std::bfloat16_t>::exponent_mask() {
+  return 0x7F80;
+}
+
+template <>
+inline constexpr binary_format<std::bfloat16_t>::equiv_uint
+binary_format<std::bfloat16_t>::mantissa_mask() {
+  return 0x007F;
+}
+
+template <>
+inline constexpr binary_format<std::bfloat16_t>::equiv_uint
+binary_format<std::bfloat16_t>::hidden_bit_mask() {
+  return 0x0080;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::mantissa_explicit_bits() {
+  return 7;
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::bfloat16_t>::max_mantissa_fast_path() {
+  return uint64_t(2) << mantissa_explicit_bits();
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<std::bfloat16_t>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 3
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::min_exponent_fast_path() {
+  return 0;
+}
+
+template <>
+inline constexpr int
+binary_format<std::bfloat16_t>::max_exponent_round_to_even() {
+  return 3;
+}
+
+template <>
+inline constexpr int
+binary_format<std::bfloat16_t>::min_exponent_round_to_even() {
+  return -24;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::minimum_exponent() {
+  return -127;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::infinite_power() {
+  return 0xFF;
+}
+
+template <> inline constexpr int binary_format<std::bfloat16_t>::sign_index() {
+  return 15;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::largest_power_of_ten() {
+  return 38;
+}
+
+template <>
+inline constexpr int binary_format<std::bfloat16_t>::smallest_power_of_ten() {
+  return -60;
+}
+
+template <>
+inline constexpr size_t binary_format<std::bfloat16_t>::max_digits() {
+  return 98;
+}
+#endif // __STDCPP_BFLOAT16_T__
+
+template <>
+inline constexpr uint64_t
+binary_format<double>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 22
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr uint64_t
+binary_format<float>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 10
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
+
+template <>
+inline constexpr double
+binary_format<double>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <>
+inline constexpr float binary_format<float>::exact_power_of_ten(int64_t power) {
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
+}
+
+template <> inline constexpr int binary_format<double>::largest_power_of_ten() {
+  return 308;
+}
+
+template <> inline constexpr int binary_format<float>::largest_power_of_ten() {
+  return 38;
+}
+
+template <>
+inline constexpr int binary_format<double>::smallest_power_of_ten() {
+  return -342;
+}
+
+template <> inline constexpr int binary_format<float>::smallest_power_of_ten() {
+  return -64;
+}
+
+template <> inline constexpr size_t binary_format<double>::max_digits() {
+  return 769;
+}
+
+template <> inline constexpr size_t binary_format<float>::max_digits() {
+  return 114;
+}
+
+template <>
+inline constexpr binary_format<float>::equiv_uint
+binary_format<float>::exponent_mask() {
+  return 0x7F800000;
+}
+
+template <>
+inline constexpr binary_format<double>::equiv_uint
+binary_format<double>::exponent_mask() {
+  return 0x7FF0000000000000;
+}
+
+template <>
+inline constexpr binary_format<float>::equiv_uint
+binary_format<float>::mantissa_mask() {
+  return 0x007FFFFF;
+}
+
+template <>
+inline constexpr binary_format<double>::equiv_uint
+binary_format<double>::mantissa_mask() {
+  return 0x000FFFFFFFFFFFFF;
+}
+
+template <>
+inline constexpr binary_format<float>::equiv_uint
+binary_format<float>::hidden_bit_mask() {
+  return 0x00800000;
+}
+
+template <>
+inline constexpr binary_format<double>::equiv_uint
+binary_format<double>::hidden_bit_mask() {
+  return 0x0010000000000000;
+}
+
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+to_float(bool negative, adjusted_mantissa am, T &value) {
+  using equiv_uint = equiv_uint_t<T>;
+  equiv_uint word = equiv_uint(am.mantissa);
+  word = equiv_uint(word | equiv_uint(am.power2)
+                               << binary_format<T>::mantissa_explicit_bits());
+  word =
+      equiv_uint(word | equiv_uint(negative) << binary_format<T>::sign_index());
+#if FASTFLOAT_HAS_BIT_CAST
+  value = std::bit_cast<T>(word);
+#else
+  ::memcpy(&value, &word, sizeof(T));
+#endif
+}
+
+template <typename = void> struct space_lut {
+  static constexpr bool value[] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename T> constexpr bool space_lut<T>::value[];
+
+#endif
+
+template <typename UC> constexpr bool is_space(UC c) {
+  return c < 256 && space_lut<>::value[uint8_t(c)];
+}
+
+template <typename UC> static constexpr uint64_t int_cmp_zeros() {
+  static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4),
+                "Unsupported character size");
+  return (sizeof(UC) == 1) ? 0x3030303030303030
+         : (sizeof(UC) == 2)
+             ? (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 |
+                uint64_t(UC('0')) << 16 | UC('0'))
+             : (uint64_t(UC('0')) << 32 | UC('0'));
+}
+
+template <typename UC> static constexpr int int_cmp_len() {
+  return sizeof(uint64_t) / sizeof(UC);
+}
+
+template <typename UC> constexpr UC const *str_const_nan();
+
+template <> constexpr char const *str_const_nan<char>() { return "nan"; }
+
+template <> constexpr wchar_t const *str_const_nan<wchar_t>() { return L"nan"; }
+
+template <> constexpr char16_t const *str_const_nan<char16_t>() {
+  return u"nan";
+}
+
+template <> constexpr char32_t const *str_const_nan<char32_t>() {
+  return U"nan";
+}
+
+#ifdef __cpp_char8_t
+template <> constexpr char8_t const *str_const_nan<char8_t>() {
+  return u8"nan";
+}
+#endif
+
+template <typename UC> constexpr UC const *str_const_inf();
+
+template <> constexpr char const *str_const_inf<char>() { return "infinity"; }
+
+template <> constexpr wchar_t const *str_const_inf<wchar_t>() {
+  return L"infinity";
+}
+
+template <> constexpr char16_t const *str_const_inf<char16_t>() {
+  return u"infinity";
+}
+
+template <> constexpr char32_t const *str_const_inf<char32_t>() {
+  return U"infinity";
+}
+
+#ifdef __cpp_char8_t
+template <> constexpr char8_t const *str_const_inf<char8_t>() {
+  return u8"infinity";
+}
+#endif
+
+template <typename = void> struct int_luts {
+  static constexpr uint8_t chdigit[] = {
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   255, 255,
+      255, 255, 255, 255, 255, 10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
+      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,
+      35,  255, 255, 255, 255, 255, 255, 10,  11,  12,  13,  14,  15,  16,  17,
+      18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,
+      33,  34,  35,  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+      255};
+
+  static constexpr size_t maxdigits_u64[] = {
+      64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16,
+      15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13};
+
+  static constexpr uint64_t min_safe_u64[] = {
+      9223372036854775808ull,  12157665459056928801ull, 4611686018427387904,
+      7450580596923828125,     4738381338321616896,     3909821048582988049,
+      9223372036854775808ull,  12157665459056928801ull, 10000000000000000000ull,
+      5559917313492231481,     2218611106740436992,     8650415919381337933,
+      2177953337809371136,     6568408355712890625,     1152921504606846976,
+      2862423051509815793,     6746640616477458432,     15181127029874798299ull,
+      1638400000000000000,     3243919932521508681,     6221821273427820544,
+      11592836324538749809ull, 876488338465357824,      1490116119384765625,
+      2481152873203736576,     4052555153018976267,     6502111422497947648,
+      10260628712958602189ull, 15943230000000000000ull, 787662783788549761,
+      1152921504606846976,     1667889514952984961,     2386420683693101056,
+      3379220508056640625,     4738381338321616896};
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename T> constexpr uint8_t int_luts<T>::chdigit[];
+
+template <typename T> constexpr size_t int_luts<T>::maxdigits_u64[];
+
+template <typename T> constexpr uint64_t int_luts<T>::min_safe_u64[];
+
+#endif
+
+template <typename UC>
+fastfloat_really_inline constexpr uint8_t ch_to_digit(UC c) {
+  return int_luts<>::chdigit[static_cast<unsigned char>(c)];
+}
+
+fastfloat_really_inline constexpr size_t max_digits_u64(int base) {
+  return int_luts<>::maxdigits_u64[base - 2];
+}
+
+// If a u64 is exactly max_digits_u64() in length, this is
+// the value below which it has definitely overflowed.
+fastfloat_really_inline constexpr uint64_t min_safe_u64(int base) {
+  return int_luts<>::min_safe_u64[base - 2];
+}
+
+static_assert(tinyobj_ff::is_same<equiv_uint_t<double>, uint64_t>::value,
+              "equiv_uint should be uint64_t for double");
+static_assert(std::numeric_limits<double>::is_iec559,
+              "double must fulfill the requirements of IEC 559 (IEEE 754)");
+
+static_assert(tinyobj_ff::is_same<equiv_uint_t<float>, uint32_t>::value,
+              "equiv_uint should be uint32_t for float");
+static_assert(std::numeric_limits<float>::is_iec559,
+              "float must fulfill the requirements of IEC 559 (IEEE 754)");
+
+#ifdef __STDCPP_FLOAT64_T__
+static_assert(tinyobj_ff::is_same<equiv_uint_t<std::float64_t>, uint64_t>::value,
+              "equiv_uint should be uint64_t for std::float64_t");
+static_assert(
+    std::numeric_limits<std::float64_t>::is_iec559,
+    "std::float64_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_FLOAT64_T__
+
+#ifdef __STDCPP_FLOAT32_T__
+static_assert(tinyobj_ff::is_same<equiv_uint_t<std::float32_t>, uint32_t>::value,
+              "equiv_uint should be uint32_t for std::float32_t");
+static_assert(
+    std::numeric_limits<std::float32_t>::is_iec559,
+    "std::float32_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_FLOAT32_T__
+
+#ifdef __STDCPP_FLOAT16_T__
+static_assert(
+    tinyobj_ff::is_same<binary_format<std::float16_t>::equiv_uint, uint16_t>::value,
+    "equiv_uint should be uint16_t for std::float16_t");
+static_assert(
+    std::numeric_limits<std::float16_t>::is_iec559,
+    "std::float16_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_FLOAT16_T__
+
+#ifdef __STDCPP_BFLOAT16_T__
+static_assert(
+    tinyobj_ff::is_same<binary_format<std::bfloat16_t>::equiv_uint, uint16_t>::value,
+    "equiv_uint should be uint16_t for std::bfloat16_t");
+static_assert(
+    std::numeric_limits<std::bfloat16_t>::is_iec559,
+    "std::bfloat16_t must fulfill the requirements of IEC 559 (IEEE 754)");
+#endif // __STDCPP_BFLOAT16_T__
+
+constexpr chars_format operator~(chars_format rhs) noexcept {
+  using int_type = tinyobj_ff::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(~static_cast<int_type>(rhs));
+}
+
+constexpr chars_format operator&(chars_format lhs, chars_format rhs) noexcept {
+  using int_type = tinyobj_ff::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(static_cast<int_type>(lhs) &
+                                   static_cast<int_type>(rhs));
+}
+
+constexpr chars_format operator|(chars_format lhs, chars_format rhs) noexcept {
+  using int_type = tinyobj_ff::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(static_cast<int_type>(lhs) |
+                                   static_cast<int_type>(rhs));
+}
+
+constexpr chars_format operator^(chars_format lhs, chars_format rhs) noexcept {
+  using int_type = tinyobj_ff::underlying_type<chars_format>::type;
+  return static_cast<chars_format>(static_cast<int_type>(lhs) ^
+                                   static_cast<int_type>(rhs));
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format &
+operator&=(chars_format &lhs, chars_format rhs) noexcept {
+  return lhs = (lhs & rhs);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format &
+operator|=(chars_format &lhs, chars_format rhs) noexcept {
+  return lhs = (lhs | rhs);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 chars_format &
+operator^=(chars_format &lhs, chars_format rhs) noexcept {
+  return lhs = (lhs ^ rhs);
+}
+
+namespace detail {
+// adjust for deprecated feature macros
+constexpr chars_format adjust_for_feature_macros(chars_format fmt) {
+  return fmt
+#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS
+         | chars_format::allow_leading_plus
+#endif
+#ifdef FASTFLOAT_SKIP_WHITE_SPACE
+         | chars_format::skip_white_space
+#endif
+      ;
+}
+} // namespace detail
+
+} // namespace fast_float
+
+#endif
+
+
+#ifndef FASTFLOAT_FAST_FLOAT_H
+#define FASTFLOAT_FAST_FLOAT_H
+
+
+namespace fast_float {
+/**
+ * This function parses the character sequence [first,last) for a number. It
+ * parses floating-point numbers expecting a locale-indepent format equivalent
+ * to what is used by std::strtod in the default ("C") locale. The resulting
+ * floating-point value is the closest floating-point values (using either float
+ * or double), using the "round to even" convention for values that would
+ * otherwise fall right in-between two values. That is, we provide exact parsing
+ * according to the IEEE standard.
+ *
+ * Given a successful parse, the pointer (`ptr`) in the returned value is set to
+ * point right after the parsed number, and the `value` referenced is set to the
+ * parsed value. In case of error, the returned `ec` contains a representative
+ * error, otherwise the default (`tinyobj_ff::ff_errc()`) value is stored.
+ *
+ * The implementation does not throw and does not allocate memory (e.g., with
+ * `new` or `malloc`).
+ *
+ * Like the C++17 standard, the `fast_float::from_chars` functions take an
+ * optional last argument of the type `fast_float::chars_format`. It is a bitset
+ * value: we check whether `fmt & fast_float::chars_format::fixed` and `fmt &
+ * fast_float::chars_format::scientific` are set to determine whether we allow
+ * the fixed point and scientific notation respectively. The default is
+ * `fast_float::chars_format::general` which allows both `fixed` and
+ * `scientific`.
+ */
+template <typename T, typename UC = char,
+          typename = FASTFLOAT_ENABLE_IF(is_supported_float_type<T>::value)>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value,
+           chars_format fmt = chars_format::general) noexcept;
+
+/**
+ * Like from_chars, but accepts an `options` argument to govern number parsing.
+ * Both for floating-point types and integer types.
+ */
+template <typename T, typename UC = char>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(UC const *first, UC const *last, T &value,
+                    parse_options_t<UC> options) noexcept;
+
+/**
+ * from_chars for integer types.
+ */
+template <typename T, typename UC = char,
+          typename = FASTFLOAT_ENABLE_IF(is_supported_integer_type<T>::value)>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value, int base = 10) noexcept;
+
+} // namespace fast_float
+
+#endif // FASTFLOAT_FAST_FLOAT_H
+
+#ifndef FASTFLOAT_ASCII_NUMBER_H
+#define FASTFLOAT_ASCII_NUMBER_H
+
+#include <cctype>
+#include <cstring>
+#include <limits>
+
+
+#ifdef FASTFLOAT_SSE2
+#include <emmintrin.h>
+#endif
+
+#ifdef FASTFLOAT_NEON
+#include <arm_neon.h>
+#endif
+
+namespace fast_float {
+
+template <typename UC> fastfloat_really_inline constexpr bool has_simd_opt() {
+#ifdef FASTFLOAT_HAS_SIMD
+  return tinyobj_ff::is_same<UC, char16_t>::value;
+#else
+  return false;
+#endif
+}
+
+// Next function can be micro-optimized, but compilers are entirely
+// able to optimize it well.
+template <typename UC>
+fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
+  return !(c > UC('9') || c < UC('0'));
+}
+
+fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
+  return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 |
+         (val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 |
+         (val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 |
+         (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56;
+}
+
+// Read 8 UC into a u64. Truncates UC if not char.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+read8_to_u64(UC const *chars) {
+  if (cpp20_and_in_constexpr() || !tinyobj_ff::is_same<UC, char>::value) {
+    uint64_t val = 0;
+    for (int i = 0; i < 8; ++i) {
+      val |= uint64_t(uint8_t(*chars)) << (i * 8);
+      ++chars;
+    }
+    return val;
+  }
+  uint64_t val;
+  ::memcpy(&val, chars, sizeof(uint64_t));
+#if FASTFLOAT_IS_BIG_ENDIAN == 1
+  // Need to read as-if the number was in little-endian order.
+  val = byteswap(val);
+#endif
+  return val;
+}
+
+#ifdef FASTFLOAT_SSE2
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  __m128i const packed = _mm_packus_epi16(data, data);
+#ifdef FASTFLOAT_64BIT
+  return uint64_t(_mm_cvtsi128_si64(packed));
+#else
+  uint64_t value;
+  // Visual Studio + older versions of GCC don't support _mm_storeu_si64
+  _mm_storel_epi64(reinterpret_cast<__m128i *>(&value), packed);
+  return value;
+#endif
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(
+      _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars)));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+#elif defined(FASTFLOAT_NEON)
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const data) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  uint8x8_t utf8_packed = vmovn_u16(data);
+  return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0);
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(
+      vld1q_u16(reinterpret_cast<uint16_t const *>(chars)));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+#endif // FASTFLOAT_SSE2
+
+// MSVC SFINAE is broken pre-VS2017
+#if defined(_MSC_VER) && _MSC_VER <= 1900
+template <typename UC>
+#else
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
+#endif
+// dummy for compile
+uint64_t simd_read8_to_u64(UC const *) {
+  return 0;
+}
+
+// credit  @aqrit
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
+parse_eight_digits_unrolled(uint64_t val) {
+  uint64_t const mask = 0x000000FF000000FF;
+  uint64_t const mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
+  uint64_t const mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
+  val -= 0x3030303030303030;
+  val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
+  val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
+  return uint32_t(val);
+}
+
+// Call this if chars are definitely 8 digits.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
+parse_eight_digits_unrolled(UC const *chars) noexcept {
+  if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
+    return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
+  }
+  return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
+}
+
+// credit @aqrit
+fastfloat_really_inline constexpr bool
+is_made_of_eight_digits_fast(uint64_t val) noexcept {
+  return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
+            0x8080808080808080));
+}
+
+#ifdef FASTFLOAT_HAS_SIMD
+
+// Call this if chars might not be 8 digits.
+// Using this style (instead of is_made_of_eight_digits_fast() then
+// parse_eight_digits_unrolled()) ensures we don't load SIMD registers twice.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+simd_parse_if_eight_digits_unrolled(char16_t const *chars,
+                                    uint64_t &i) noexcept {
+  if (cpp20_and_in_constexpr()) {
+    return false;
+  }
+#ifdef FASTFLOAT_SSE2
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  __m128i const data =
+      _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars));
+
+  // (x - '0') <= 9
+  // http://0x80.pl/articles/simd-parsing-int-sequences.html
+  __m128i const t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
+  __m128i const t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
+
+  if (_mm_movemask_epi8(t1) == 0) {
+    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
+    return true;
+  } else
+    return false;
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+#elif defined(FASTFLOAT_NEON)
+  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  uint16x8_t const data = vld1q_u16(reinterpret_cast<uint16_t const *>(chars));
+
+  // (x - '0') <= 9
+  // http://0x80.pl/articles/simd-parsing-int-sequences.html
+  uint16x8_t const t0 = vsubq_u16(data, vmovq_n_u16('0'));
+  uint16x8_t const mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1));
+
+  if (vminvq_u16(mask) == 0xFFFF) {
+    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
+    return true;
+  } else
+    return false;
+  FASTFLOAT_SIMD_RESTORE_WARNINGS
+#else
+  (void)chars;
+  (void)i;
+  return false;
+#endif // FASTFLOAT_SSE2
+}
+
+#endif // FASTFLOAT_HAS_SIMD
+
+// MSVC SFINAE is broken pre-VS2017
+#if defined(_MSC_VER) && _MSC_VER <= 1900
+template <typename UC>
+#else
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
+#endif
+// dummy for compile
+bool simd_parse_if_eight_digits_unrolled(UC const *, uint64_t &) {
+  return 0;
+}
+
+template <typename UC, FASTFLOAT_ENABLE_IF(!tinyobj_ff::is_same<UC, char>::value) = 0>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+loop_parse_if_eight_digits(UC const *&p, UC const *const pend, uint64_t &i) {
+  if (!has_simd_opt<UC>()) {
+    return;
+  }
+  while ((tinyobj_ff::distance(p, pend) >= 8) &&
+         simd_parse_if_eight_digits_unrolled(
+             p, i)) { // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+loop_parse_if_eight_digits(char const *&p, char const *const pend,
+                           uint64_t &i) {
+  // optimizes better than parse_if_eight_digits_unrolled() for UC = char.
+  while ((tinyobj_ff::distance(p, pend) >= 8) &&
+         is_made_of_eight_digits_fast(read8_to_u64(p))) {
+    i = i * 100000000 +
+        parse_eight_digits_unrolled(read8_to_u64(
+            p)); // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
+}
+
+enum class parse_error {
+  no_error,
+  // [JSON-only] The minus sign must be followed by an integer.
+  missing_integer_after_sign,
+  // A sign must be followed by an integer or dot.
+  missing_integer_or_dot_after_sign,
+  // [JSON-only] The integer part must not have leading zeros.
+  leading_zeros_in_integer_part,
+  // [JSON-only] The integer part must have at least one digit.
+  no_digits_in_integer_part,
+  // [JSON-only] If there is a decimal point, there must be digits in the
+  // fractional part.
+  no_digits_in_fractional_part,
+  // The mantissa must have at least one digit.
+  no_digits_in_mantissa,
+  // Scientific notation requires an exponential part.
+  missing_exponential_part,
+};
+
+template <typename UC> struct parsed_number_string_t {
+  int64_t exponent{0};
+  uint64_t mantissa{0};
+  UC const *lastmatch{nullptr};
+  bool negative{false};
+  bool valid{false};
+  bool too_many_digits{false};
+  // contains the range of the significant digits
+  span<UC const> integer{};  // non-nullable
+  span<UC const> fraction{}; // nullable
+  parse_error error{parse_error::no_error};
+};
+
+using byte_span = span<char const>;
+using parsed_number_string = parsed_number_string_t<char>;
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
+report_parse_error(UC const *p, parse_error error) {
+  parsed_number_string_t<UC> answer;
+  answer.valid = false;
+  answer.lastmatch = p;
+  answer.error = error;
+  return answer;
+}
+
+// Assuming that you use no more than 19 digits, this will
+// parse an ASCII string.
+template <bool basic_json_fmt, typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
+parse_number_string(UC const *p, UC const *pend,
+                    parse_options_t<UC> options) noexcept {
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  UC const decimal_point = options.decimal_point;
+
+  parsed_number_string_t<UC> answer;
+  answer.valid = false;
+  answer.too_many_digits = false;
+  // assume p < pend, so dereference without checks;
+  answer.negative = (*p == UC('-'));
+  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+  if ((*p == UC('-')) || (uint64_t(fmt & chars_format::allow_leading_plus) &&
+                          !basic_json_fmt && *p == UC('+'))) {
+    ++p;
+    if (p == pend) {
+      return report_parse_error<UC>(
+          p, parse_error::missing_integer_or_dot_after_sign);
+    }
+    FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+      if (!is_integer(*p)) { // a sign must be followed by an integer
+        return report_parse_error<UC>(p,
+                                      parse_error::missing_integer_after_sign);
+      }
+    }
+    else {
+      if (!is_integer(*p) &&
+          (*p !=
+           decimal_point)) { // a sign must be followed by an integer or the dot
+        return report_parse_error<UC>(
+            p, parse_error::missing_integer_or_dot_after_sign);
+      }
+    }
+  }
+  UC const *const start_digits = p;
+
+  uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
+
+  while ((p != pend) && is_integer(*p)) {
+    // a multiplication by 10 is cheaper than an arbitrary integer
+    // multiplication
+    i = 10 * i +
+        uint64_t(*p -
+                 UC('0')); // might overflow, we will handle the overflow later
+    ++p;
+  }
+  UC const *const end_of_integer_part = p;
+  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
+  answer.integer = span<UC const>(start_digits, size_t(digit_count));
+  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+    // at least 1 digit in integer part, without leading zeros
+    if (digit_count == 0) {
+      return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
+    }
+    if ((start_digits[0] == UC('0') && digit_count > 1)) {
+      return report_parse_error<UC>(start_digits,
+                                    parse_error::leading_zeros_in_integer_part);
+    }
+  }
+
+  int64_t exponent = 0;
+  bool const has_decimal_point = (p != pend) && (*p == decimal_point);
+  if (has_decimal_point) {
+    ++p;
+    UC const *before = p;
+    // can occur at most twice without overflowing, but let it occur more, since
+    // for integers with many digits, digit parsing is the primary bottleneck.
+    loop_parse_if_eight_digits(p, pend, i);
+
+    while ((p != pend) && is_integer(*p)) {
+      uint8_t digit = uint8_t(*p - UC('0'));
+      ++p;
+      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
+    }
+    exponent = before - p;
+    answer.fraction = span<UC const>(before, size_t(p - before));
+    digit_count -= exponent;
+  }
+  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+    // at least 1 digit in fractional part
+    if (has_decimal_point && exponent == 0) {
+      return report_parse_error<UC>(p,
+                                    parse_error::no_digits_in_fractional_part);
+    }
+  }
+  else if (digit_count == 0) { // we must have encountered at least one integer!
+    return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
+  }
+  int64_t exp_number = 0; // explicit exponential part
+  if ((uint64_t(fmt & chars_format::scientific) && (p != pend) &&
+       ((UC('e') == *p) || (UC('E') == *p))) ||
+      (uint64_t(fmt & detail::basic_fortran_fmt) && (p != pend) &&
+       ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) ||
+        (UC('D') == *p)))) {
+    UC const *location_of_e = p;
+    if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) ||
+        (UC('D') == *p)) {
+      ++p;
+    }
+    bool neg_exp = false;
+    if ((p != pend) && (UC('-') == *p)) {
+      neg_exp = true;
+      ++p;
+    } else if ((p != pend) &&
+               (UC('+') ==
+                *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
+      ++p;
+    }
+    if ((p == pend) || !is_integer(*p)) {
+      if (!uint64_t(fmt & chars_format::fixed)) {
+        // The exponential part is invalid for scientific notation, so it must
+        // be a trailing token for fixed notation. However, fixed notation is
+        // disabled, so report a scientific notation error.
+        return report_parse_error<UC>(p, parse_error::missing_exponential_part);
+      }
+      // Otherwise, we will be ignoring the 'e'.
+      p = location_of_e;
+    } else {
+      while ((p != pend) && is_integer(*p)) {
+        uint8_t digit = uint8_t(*p - UC('0'));
+        if (exp_number < 0x10000000) {
+          exp_number = 10 * exp_number + digit;
+        }
+        ++p;
+      }
+      if (neg_exp) {
+        exp_number = -exp_number;
+      }
+      exponent += exp_number;
+    }
+  } else {
+    // If it scientific and not fixed, we have to bail out.
+    if (uint64_t(fmt & chars_format::scientific) &&
+        !uint64_t(fmt & chars_format::fixed)) {
+      return report_parse_error<UC>(p, parse_error::missing_exponential_part);
+    }
+  }
+  answer.lastmatch = p;
+  answer.valid = true;
+
+  // If we frequently had to deal with long strings of digits,
+  // we could extend our code by using a 128-bit integer instead
+  // of a 64-bit integer. However, this is uncommon.
+  //
+  // We can deal with up to 19 digits.
+  if (digit_count > 19) { // this is uncommon
+    // It is possible that the integer had an overflow.
+    // We have to handle the case where we have 0.0000somenumber.
+    // We need to be mindful of the case where we only have zeroes...
+    // E.g., 0.000000000...000.
+    UC const *start = start_digits;
+    while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
+      if (*start == UC('0')) {
+        digit_count--;
+      }
+      start++;
+    }
+
+    if (digit_count > 19) {
+      answer.too_many_digits = true;
+      // Let us start again, this time, avoiding overflows.
+      // We don't need to check if is_integer, since we use the
+      // pre-tokenized spans from above.
+      i = 0;
+      p = answer.integer.ptr;
+      UC const *int_end = p + answer.integer.len();
+      uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
+      while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+        i = i * 10 + uint64_t(*p - UC('0'));
+        ++p;
+      }
+      if (i >= minimal_nineteen_digit_integer) { // We have a big integers
+        exponent = end_of_integer_part - p + exp_number;
+      } else { // We have a value with a fractional component.
+        p = answer.fraction.ptr;
+        UC const *frac_end = p + answer.fraction.len();
+        while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+          i = i * 10 + uint64_t(*p - UC('0'));
+          ++p;
+        }
+        exponent = answer.fraction.ptr - p + exp_number;
+      }
+      // We have now corrected both exponent and i, to a truncated value
+    }
+  }
+  answer.exponent = exponent;
+  answer.mantissa = i;
+  return answer;
+}
+
+template <typename T, typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+parse_int_string(UC const *p, UC const *pend, T &value,
+                 parse_options_t<UC> options) {
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  int const base = options.base;
+
+  from_chars_result_t<UC> answer;
+
+  UC const *const first = p;
+
+  bool const negative = (*p == UC('-'));
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+#pragma warning(disable : 4127)
+#endif
+  if (!tinyobj_ff::is_signed<T>::value && negative) {
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#endif
+    answer.ec = tinyobj_ff::ff_errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  if ((*p == UC('-')) ||
+      (uint64_t(fmt & chars_format::allow_leading_plus) && (*p == UC('+')))) {
+    ++p;
+  }
+
+  UC const *const start_num = p;
+
+  while (p != pend && *p == UC('0')) {
+    ++p;
+  }
+
+  bool const has_leading_zeros = p > start_num;
+
+  UC const *const start_digits = p;
+
+  uint64_t i = 0;
+  if (base == 10) {
+    loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
+  }
+  while (p != pend) {
+    uint8_t digit = ch_to_digit(*p);
+    if (digit >= base) {
+      break;
+    }
+    i = uint64_t(base) * i + digit; // might overflow, check this later
+    p++;
+  }
+
+  size_t digit_count = size_t(p - start_digits);
+
+  if (digit_count == 0) {
+    if (has_leading_zeros) {
+      value = 0;
+      answer.ec = tinyobj_ff::ff_errc();
+      answer.ptr = p;
+    } else {
+      answer.ec = tinyobj_ff::ff_errc::invalid_argument;
+      answer.ptr = first;
+    }
+    return answer;
+  }
+
+  answer.ptr = p;
+
+  // check u64 overflow
+  size_t max_digits = max_digits_u64(base);
+  if (digit_count > max_digits) {
+    answer.ec = tinyobj_ff::ff_errc::result_out_of_range;
+    return answer;
+  }
+  // this check can be eliminated for all other types, but they will all require
+  // a max_digits(base) equivalent
+  if (digit_count == max_digits && i < min_safe_u64(base)) {
+    answer.ec = tinyobj_ff::ff_errc::result_out_of_range;
+    return answer;
+  }
+
+  // check other types overflow
+  if (!tinyobj_ff::is_same<T, uint64_t>::value) {
+    if (i > uint64_t(std::numeric_limits<T>::max()) + uint64_t(negative)) {
+      answer.ec = tinyobj_ff::ff_errc::result_out_of_range;
+      return answer;
+    }
+  }
+
+  if (negative) {
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+#pragma warning(disable : 4146)
+#endif
+    // this weird workaround is required because:
+    // - converting unsigned to signed when its value is greater than signed max
+    // is UB pre-C++23.
+    // - reinterpret_casting (~i + 1) would work, but it is not constexpr
+    // this is always optimized into a neg instruction (note: T is an integer
+    // type)
+    value = T(-std::numeric_limits<T>::max() -
+              T(i - uint64_t(std::numeric_limits<T>::max())));
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#endif
+  } else {
+    value = T(i);
+  }
+
+  answer.ec = tinyobj_ff::ff_errc();
+  return answer;
+}
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_FAST_TABLE_H
+#define FASTFLOAT_FAST_TABLE_H
+
+namespace fast_float {
+
+/**
+ * When mapping numbers from decimal to binary,
+ * we go from w * 10^q to m * 2^p but we have
+ * 10^q = 5^q * 2^q, so effectively
+ * we are trying to match
+ * w * 2^q * 5^q to m * 2^p. Thus the powers of two
+ * are not a concern since they can be represented
+ * exactly using the binary notation, only the powers of five
+ * affect the binary significand.
+ */
+
+/**
+ * The smallest non-zero float (binary64) is 2^-1074.
+ * We take as input numbers of the form w x 10^q where w < 2^64.
+ * We have that w * 10^-343  <  2^(64-344) 5^-343 < 2^-1076.
+ * However, we have that
+ * (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^-1074.
+ * Thus it is possible for a number of the form w * 10^-342 where
+ * w is a 64-bit value to be a non-zero floating-point number.
+ *********
+ * Any number of form w * 10^309 where w>= 1 is going to be
+ * infinite in binary64 so we never need to worry about powers
+ * of 5 greater than 308.
+ */
+template <class unused = void> struct powers_template {
+
+  constexpr static int smallest_power_of_five =
+      binary_format<double>::smallest_power_of_ten();
+  constexpr static int largest_power_of_five =
+      binary_format<double>::largest_power_of_ten();
+  constexpr static int number_of_entries =
+      2 * (largest_power_of_five - smallest_power_of_five + 1);
+  // Powers of five from 5^-342 all the way to 5^308 rounded toward one.
+  constexpr static uint64_t power_of_five_128[number_of_entries] = {
+      0xeef453d6923bd65a, 0x113faa2906a13b3f,
+      0x9558b4661b6565f8, 0x4ac7ca59a424c507,
+      0xbaaee17fa23ebf76, 0x5d79bcf00d2df649,
+      0xe95a99df8ace6f53, 0xf4d82c2c107973dc,
+      0x91d8a02bb6c10594, 0x79071b9b8a4be869,
+      0xb64ec836a47146f9, 0x9748e2826cdee284,
+      0xe3e27a444d8d98b7, 0xfd1b1b2308169b25,
+      0x8e6d8c6ab0787f72, 0xfe30f0f5e50e20f7,
+      0xb208ef855c969f4f, 0xbdbd2d335e51a935,
+      0xde8b2b66b3bc4723, 0xad2c788035e61382,
+      0x8b16fb203055ac76, 0x4c3bcb5021afcc31,
+      0xaddcb9e83c6b1793, 0xdf4abe242a1bbf3d,
+      0xd953e8624b85dd78, 0xd71d6dad34a2af0d,
+      0x87d4713d6f33aa6b, 0x8672648c40e5ad68,
+      0xa9c98d8ccb009506, 0x680efdaf511f18c2,
+      0xd43bf0effdc0ba48, 0x212bd1b2566def2,
+      0x84a57695fe98746d, 0x14bb630f7604b57,
+      0xa5ced43b7e3e9188, 0x419ea3bd35385e2d,
+      0xcf42894a5dce35ea, 0x52064cac828675b9,
+      0x818995ce7aa0e1b2, 0x7343efebd1940993,
+      0xa1ebfb4219491a1f, 0x1014ebe6c5f90bf8,
+      0xca66fa129f9b60a6, 0xd41a26e077774ef6,
+      0xfd00b897478238d0, 0x8920b098955522b4,
+      0x9e20735e8cb16382, 0x55b46e5f5d5535b0,
+      0xc5a890362fddbc62, 0xeb2189f734aa831d,
+      0xf712b443bbd52b7b, 0xa5e9ec7501d523e4,
+      0x9a6bb0aa55653b2d, 0x47b233c92125366e,
+      0xc1069cd4eabe89f8, 0x999ec0bb696e840a,
+      0xf148440a256e2c76, 0xc00670ea43ca250d,
+      0x96cd2a865764dbca, 0x380406926a5e5728,
+      0xbc807527ed3e12bc, 0xc605083704f5ecf2,
+      0xeba09271e88d976b, 0xf7864a44c633682e,
+      0x93445b8731587ea3, 0x7ab3ee6afbe0211d,
+      0xb8157268fdae9e4c, 0x5960ea05bad82964,
+      0xe61acf033d1a45df, 0x6fb92487298e33bd,
+      0x8fd0c16206306bab, 0xa5d3b6d479f8e056,
+      0xb3c4f1ba87bc8696, 0x8f48a4899877186c,
+      0xe0b62e2929aba83c, 0x331acdabfe94de87,
+      0x8c71dcd9ba0b4925, 0x9ff0c08b7f1d0b14,
+      0xaf8e5410288e1b6f, 0x7ecf0ae5ee44dd9,
+      0xdb71e91432b1a24a, 0xc9e82cd9f69d6150,
+      0x892731ac9faf056e, 0xbe311c083a225cd2,
+      0xab70fe17c79ac6ca, 0x6dbd630a48aaf406,
+      0xd64d3d9db981787d, 0x92cbbccdad5b108,
+      0x85f0468293f0eb4e, 0x25bbf56008c58ea5,
+      0xa76c582338ed2621, 0xaf2af2b80af6f24e,
+      0xd1476e2c07286faa, 0x1af5af660db4aee1,
+      0x82cca4db847945ca, 0x50d98d9fc890ed4d,
+      0xa37fce126597973c, 0xe50ff107bab528a0,
+      0xcc5fc196fefd7d0c, 0x1e53ed49a96272c8,
+      0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7a,
+      0x9faacf3df73609b1, 0x77b191618c54e9ac,
+      0xc795830d75038c1d, 0xd59df5b9ef6a2417,
+      0xf97ae3d0d2446f25, 0x4b0573286b44ad1d,
+      0x9becce62836ac577, 0x4ee367f9430aec32,
+      0xc2e801fb244576d5, 0x229c41f793cda73f,
+      0xf3a20279ed56d48a, 0x6b43527578c1110f,
+      0x9845418c345644d6, 0x830a13896b78aaa9,
+      0xbe5691ef416bd60c, 0x23cc986bc656d553,
+      0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa8,
+      0x94b3a202eb1c3f39, 0x7bf7d71432f3d6a9,
+      0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc53,
+      0xe858ad248f5c22c9, 0xd1b3400f8f9cff68,
+      0x91376c36d99995be, 0x23100809b9c21fa1,
+      0xb58547448ffffb2d, 0xabd40a0c2832a78a,
+      0xe2e69915b3fff9f9, 0x16c90c8f323f516c,
+      0x8dd01fad907ffc3b, 0xae3da7d97f6792e3,
+      0xb1442798f49ffb4a, 0x99cd11cfdf41779c,
+      0xdd95317f31c7fa1d, 0x40405643d711d583,
+      0x8a7d3eef7f1cfc52, 0x482835ea666b2572,
+      0xad1c8eab5ee43b66, 0xda3243650005eecf,
+      0xd863b256369d4a40, 0x90bed43e40076a82,
+      0x873e4f75e2224e68, 0x5a7744a6e804a291,
+      0xa90de3535aaae202, 0x711515d0a205cb36,
+      0xd3515c2831559a83, 0xd5a5b44ca873e03,
+      0x8412d9991ed58091, 0xe858790afe9486c2,
+      0xa5178fff668ae0b6, 0x626e974dbe39a872,
+      0xce5d73ff402d98e3, 0xfb0a3d212dc8128f,
+      0x80fa687f881c7f8e, 0x7ce66634bc9d0b99,
+      0xa139029f6a239f72, 0x1c1fffc1ebc44e80,
+      0xc987434744ac874e, 0xa327ffb266b56220,
+      0xfbe9141915d7a922, 0x4bf1ff9f0062baa8,
+      0x9d71ac8fada6c9b5, 0x6f773fc3603db4a9,
+      0xc4ce17b399107c22, 0xcb550fb4384d21d3,
+      0xf6019da07f549b2b, 0x7e2a53a146606a48,
+      0x99c102844f94e0fb, 0x2eda7444cbfc426d,
+      0xc0314325637a1939, 0xfa911155fefb5308,
+      0xf03d93eebc589f88, 0x793555ab7eba27ca,
+      0x96267c7535b763b5, 0x4bc1558b2f3458de,
+      0xbbb01b9283253ca2, 0x9eb1aaedfb016f16,
+      0xea9c227723ee8bcb, 0x465e15a979c1cadc,
+      0x92a1958a7675175f, 0xbfacd89ec191ec9,
+      0xb749faed14125d36, 0xcef980ec671f667b,
+      0xe51c79a85916f484, 0x82b7e12780e7401a,
+      0x8f31cc0937ae58d2, 0xd1b2ecb8b0908810,
+      0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15,
+      0xdfbdcece67006ac9, 0x67a791e093e1d49a,
+      0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e0,
+      0xaecc49914078536d, 0x58fae9f773886e18,
+      0xda7f5bf590966848, 0xaf39a475506a899e,
+      0x888f99797a5e012d, 0x6d8406c952429603,
+      0xaab37fd7d8f58178, 0xc8e5087ba6d33b83,
+      0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a64,
+      0x855c3be0a17fcd26, 0x5cf2eea09a55067f,
+      0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481e,
+      0xd0601d8efc57b08b, 0xf13b94daf124da26,
+      0x823c12795db6ce57, 0x76c53d08d6b70858,
+      0xa2cb1717b52481ed, 0x54768c4b0c64ca6e,
+      0xcb7ddcdda26da268, 0xa9942f5dcf7dfd09,
+      0xfe5d54150b090b02, 0xd3f93b35435d7c4c,
+      0x9efa548d26e5a6e1, 0xc47bc5014a1a6daf,
+      0xc6b8e9b0709f109a, 0x359ab6419ca1091b,
+      0xf867241c8cc6d4c0, 0xc30163d203c94b62,
+      0x9b407691d7fc44f8, 0x79e0de63425dcf1d,
+      0xc21094364dfb5636, 0x985915fc12f542e4,
+      0xf294b943e17a2bc4, 0x3e6f5b7b17b2939d,
+      0x979cf3ca6cec5b5a, 0xa705992ceecf9c42,
+      0xbd8430bd08277231, 0x50c6ff782a838353,
+      0xece53cec4a314ebd, 0xa4f8bf5635246428,
+      0x940f4613ae5ed136, 0x871b7795e136be99,
+      0xb913179899f68584, 0x28e2557b59846e3f,
+      0xe757dd7ec07426e5, 0x331aeada2fe589cf,
+      0x9096ea6f3848984f, 0x3ff0d2c85def7621,
+      0xb4bca50b065abe63, 0xfed077a756b53a9,
+      0xe1ebce4dc7f16dfb, 0xd3e8495912c62894,
+      0x8d3360f09cf6e4bd, 0x64712dd7abbbd95c,
+      0xb080392cc4349dec, 0xbd8d794d96aacfb3,
+      0xdca04777f541c567, 0xecf0d7a0fc5583a0,
+      0x89e42caaf9491b60, 0xf41686c49db57244,
+      0xac5d37d5b79b6239, 0x311c2875c522ced5,
+      0xd77485cb25823ac7, 0x7d633293366b828b,
+      0x86a8d39ef77164bc, 0xae5dff9c02033197,
+      0xa8530886b54dbdeb, 0xd9f57f830283fdfc,
+      0xd267caa862a12d66, 0xd072df63c324fd7b,
+      0x8380dea93da4bc60, 0x4247cb9e59f71e6d,
+      0xa46116538d0deb78, 0x52d9be85f074e608,
+      0xcd795be870516656, 0x67902e276c921f8b,
+      0x806bd9714632dff6, 0xba1cd8a3db53b6,
+      0xa086cfcd97bf97f3, 0x80e8a40eccd228a4,
+      0xc8a883c0fdaf7df0, 0x6122cd128006b2cd,
+      0xfad2a4b13d1b5d6c, 0x796b805720085f81,
+      0x9cc3a6eec6311a63, 0xcbe3303674053bb0,
+      0xc3f490aa77bd60fc, 0xbedbfc4411068a9c,
+      0xf4f1b4d515acb93b, 0xee92fb5515482d44,
+      0x991711052d8bf3c5, 0x751bdd152d4d1c4a,
+      0xbf5cd54678eef0b6, 0xd262d45a78a0635d,
+      0xef340a98172aace4, 0x86fb897116c87c34,
+      0x9580869f0e7aac0e, 0xd45d35e6ae3d4da0,
+      0xbae0a846d2195712, 0x8974836059cca109,
+      0xe998d258869facd7, 0x2bd1a438703fc94b,
+      0x91ff83775423cc06, 0x7b6306a34627ddcf,
+      0xb67f6455292cbf08, 0x1a3bc84c17b1d542,
+      0xe41f3d6a7377eeca, 0x20caba5f1d9e4a93,
+      0x8e938662882af53e, 0x547eb47b7282ee9c,
+      0xb23867fb2a35b28d, 0xe99e619a4f23aa43,
+      0xdec681f9f4c31f31, 0x6405fa00e2ec94d4,
+      0x8b3c113c38f9f37e, 0xde83bc408dd3dd04,
+      0xae0b158b4738705e, 0x9624ab50b148d445,
+      0xd98ddaee19068c76, 0x3badd624dd9b0957,
+      0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d6,
+      0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4c,
+      0xd47487cc8470652b, 0x7647c3200069671f,
+      0x84c8d4dfd2c63f3b, 0x29ecd9f40041e073,
+      0xa5fb0a17c777cf09, 0xf468107100525890,
+      0xcf79cc9db955c2cc, 0x7182148d4066eeb4,
+      0x81ac1fe293d599bf, 0xc6f14cd848405530,
+      0xa21727db38cb002f, 0xb8ada00e5a506a7c,
+      0xca9cf1d206fdc03b, 0xa6d90811f0e4851c,
+      0xfd442e4688bd304a, 0x908f4a166d1da663,
+      0x9e4a9cec15763e2e, 0x9a598e4e043287fe,
+      0xc5dd44271ad3cdba, 0x40eff1e1853f29fd,
+      0xf7549530e188c128, 0xd12bee59e68ef47c,
+      0x9a94dd3e8cf578b9, 0x82bb74f8301958ce,
+      0xc13a148e3032d6e7, 0xe36a52363c1faf01,
+      0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac1,
+      0x96f5600f15a7b7e5, 0x29ab103a5ef8c0b9,
+      0xbcb2b812db11a5de, 0x7415d448f6b6f0e7,
+      0xebdf661791d60f56, 0x111b495b3464ad21,
+      0x936b9fcebb25c995, 0xcab10dd900beec34,
+      0xb84687c269ef3bfb, 0x3d5d514f40eea742,
+      0xe65829b3046b0afa, 0xcb4a5a3112a5112,
+      0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ab,
+      0xb3f4e093db73a093, 0x59ed216765690f56,
+      0xe0f218b8d25088b8, 0x306869c13ec3532c,
+      0x8c974f7383725573, 0x1e414218c73a13fb,
+      0xafbd2350644eeacf, 0xe5d1929ef90898fa,
+      0xdbac6c247d62a583, 0xdf45f746b74abf39,
+      0x894bc396ce5da772, 0x6b8bba8c328eb783,
+      0xab9eb47c81f5114f, 0x66ea92f3f326564,
+      0xd686619ba27255a2, 0xc80a537b0efefebd,
+      0x8613fd0145877585, 0xbd06742ce95f5f36,
+      0xa798fc4196e952e7, 0x2c48113823b73704,
+      0xd17f3b51fca3a7a0, 0xf75a15862ca504c5,
+      0x82ef85133de648c4, 0x9a984d73dbe722fb,
+      0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebba,
+      0xcc963fee10b7d1b3, 0x318df905079926a8,
+      0xffbbcfe994e5c61f, 0xfdf17746497f7052,
+      0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa633,
+      0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc0,
+      0xf9bd690a1b68637b, 0x3dfdce7aa3c673b0,
+      0x9c1661a651213e2d, 0x6bea10ca65c084e,
+      0xc31bfa0fe5698db8, 0x486e494fcff30a62,
+      0xf3e2f893dec3f126, 0x5a89dba3c3efccfa,
+      0x986ddb5c6b3a76b7, 0xf89629465a75e01c,
+      0xbe89523386091465, 0xf6bbb397f1135823,
+      0xee2ba6c0678b597f, 0x746aa07ded582e2c,
+      0x94db483840b717ef, 0xa8c2a44eb4571cdc,
+      0xba121a4650e4ddeb, 0x92f34d62616ce413,
+      0xe896a0d7e51e1566, 0x77b020baf9c81d17,
+      0x915e2486ef32cd60, 0xace1474dc1d122e,
+      0xb5b5ada8aaff80b8, 0xd819992132456ba,
+      0xe3231912d5bf60e6, 0x10e1fff697ed6c69,
+      0x8df5efabc5979c8f, 0xca8d3ffa1ef463c1,
+      0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb2,
+      0xddd0467c64bce4a0, 0xac7cb3f6d05ddbde,
+      0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96b,
+      0xad4ab7112eb3929d, 0x86c16c98d2c953c6,
+      0xd89d64d57a607744, 0xe871c7bf077ba8b7,
+      0x87625f056c7c4a8b, 0x11471cd764ad4972,
+      0xa93af6c6c79b5d2d, 0xd598e40d3dd89bcf,
+      0xd389b47879823479, 0x4aff1d108d4ec2c3,
+      0x843610cb4bf160cb, 0xcedf722a585139ba,
+      0xa54394fe1eedb8fe, 0xc2974eb4ee658828,
+      0xce947a3da6a9273e, 0x733d226229feea32,
+      0x811ccc668829b887, 0x806357d5a3f525f,
+      0xa163ff802a3426a8, 0xca07c2dcb0cf26f7,
+      0xc9bcff6034c13052, 0xfc89b393dd02f0b5,
+      0xfc2c3f3841f17c67, 0xbbac2078d443ace2,
+      0x9d9ba7832936edc0, 0xd54b944b84aa4c0d,
+      0xc5029163f384a931, 0xa9e795e65d4df11,
+      0xf64335bcf065d37d, 0x4d4617b5ff4a16d5,
+      0x99ea0196163fa42e, 0x504bced1bf8e4e45,
+      0xc06481fb9bcf8d39, 0xe45ec2862f71e1d6,
+      0xf07da27a82c37088, 0x5d767327bb4e5a4c,
+      0x964e858c91ba2655, 0x3a6a07f8d510f86f,
+      0xbbe226efb628afea, 0x890489f70a55368b,
+      0xeadab0aba3b2dbe5, 0x2b45ac74ccea842e,
+      0x92c8ae6b464fc96f, 0x3b0b8bc90012929d,
+      0xb77ada0617e3bbcb, 0x9ce6ebb40173744,
+      0xe55990879ddcaabd, 0xcc420a6a101d0515,
+      0x8f57fa54c2a9eab6, 0x9fa946824a12232d,
+      0xb32df8e9f3546564, 0x47939822dc96abf9,
+      0xdff9772470297ebd, 0x59787e2b93bc56f7,
+      0x8bfbea76c619ef36, 0x57eb4edb3c55b65a,
+      0xaefae51477a06b03, 0xede622920b6b23f1,
+      0xdab99e59958885c4, 0xe95fab368e45eced,
+      0x88b402f7fd75539b, 0x11dbcb0218ebb414,
+      0xaae103b5fcd2a881, 0xd652bdc29f26a119,
+      0xd59944a37c0752a2, 0x4be76d3346f0495f,
+      0x857fcae62d8493a5, 0x6f70a4400c562ddb,
+      0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb952,
+      0xd097ad07a71f26b2, 0x7e2000a41346a7a7,
+      0x825ecc24c873782f, 0x8ed400668c0c28c8,
+      0xa2f67f2dfa90563b, 0x728900802f0f32fa,
+      0xcbb41ef979346bca, 0x4f2b40a03ad2ffb9,
+      0xfea126b7d78186bc, 0xe2f610c84987bfa8,
+      0x9f24b832e6b0f436, 0xdd9ca7d2df4d7c9,
+      0xc6ede63fa05d3143, 0x91503d1c79720dbb,
+      0xf8a95fcf88747d94, 0x75a44c6397ce912a,
+      0x9b69dbe1b548ce7c, 0xc986afbe3ee11aba,
+      0xc24452da229b021b, 0xfbe85badce996168,
+      0xf2d56790ab41c2a2, 0xfae27299423fb9c3,
+      0x97c560ba6b0919a5, 0xdccd879fc967d41a,
+      0xbdb6b8e905cb600f, 0x5400e987bbc1c920,
+      0xed246723473e3813, 0x290123e9aab23b68,
+      0x9436c0760c86e30b, 0xf9a0b6720aaf6521,
+      0xb94470938fa89bce, 0xf808e40e8d5b3e69,
+      0xe7958cb87392c2c2, 0xb60b1d1230b20e04,
+      0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c2,
+      0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af3,
+      0xe2280b6c20dd5232, 0x25c6da63c38de1b0,
+      0x8d590723948a535f, 0x579c487e5a38ad0e,
+      0xb0af48ec79ace837, 0x2d835a9df0c6d851,
+      0xdcdb1b2798182244, 0xf8e431456cf88e65,
+      0x8a08f0f8bf0f156b, 0x1b8e9ecb641b58ff,
+      0xac8b2d36eed2dac5, 0xe272467e3d222f3f,
+      0xd7adf884aa879177, 0x5b0ed81dcc6abb0f,
+      0x86ccbb52ea94baea, 0x98e947129fc2b4e9,
+      0xa87fea27a539e9a5, 0x3f2398d747b36224,
+      0xd29fe4b18e88640e, 0x8eec7f0d19a03aad,
+      0x83a3eeeef9153e89, 0x1953cf68300424ac,
+      0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd7,
+      0xcdb02555653131b6, 0x3792f412cb06794d,
+      0x808e17555f3ebf11, 0xe2bbd88bbee40bd0,
+      0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec4,
+      0xc8de047564d20a8b, 0xf245825a5a445275,
+      0xfb158592be068d2e, 0xeed6e2f0f0d56712,
+      0x9ced737bb6c4183d, 0x55464dd69685606b,
+      0xc428d05aa4751e4c, 0xaa97e14c3c26b886,
+      0xf53304714d9265df, 0xd53dd99f4b3066a8,
+      0x993fe2c6d07b7fab, 0xe546a8038efe4029,
+      0xbf8fdb78849a5f96, 0xde98520472bdd033,
+      0xef73d256a5c0f77c, 0x963e66858f6d4440,
+      0x95a8637627989aad, 0xdde7001379a44aa8,
+      0xbb127c53b17ec159, 0x5560c018580d5d52,
+      0xe9d71b689dde71af, 0xaab8f01e6e10b4a6,
+      0x9226712162ab070d, 0xcab3961304ca70e8,
+      0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d22,
+      0xe45c10c42a2b3b05, 0x8cb89a7db77c506a,
+      0x8eb98a7a9a5b04e3, 0x77f3608e92adb242,
+      0xb267ed1940f1c61c, 0x55f038b237591ed3,
+      0xdf01e85f912e37a3, 0x6b6c46dec52f6688,
+      0x8b61313bbabce2c6, 0x2323ac4b3b3da015,
+      0xae397d8aa96c1b77, 0xabec975e0a0d081a,
+      0xd9c7dced53c72255, 0x96e7bd358c904a21,
+      0x881cea14545c7575, 0x7e50d64177da2e54,
+      0xaa242499697392d2, 0xdde50bd1d5d0b9e9,
+      0xd4ad2dbfc3d07787, 0x955e4ec64b44e864,
+      0x84ec3c97da624ab4, 0xbd5af13bef0b113e,
+      0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58e,
+      0xcfb11ead453994ba, 0x67de18eda5814af2,
+      0x81ceb32c4b43fcf4, 0x80eacf948770ced7,
+      0xa2425ff75e14fc31, 0xa1258379a94d028d,
+      0xcad2f7f5359a3b3e, 0x96ee45813a04330,
+      0xfd87b5f28300ca0d, 0x8bca9d6e188853fc,
+      0x9e74d1b791e07e48, 0x775ea264cf55347e,
+      0xc612062576589dda, 0x95364afe032a819e,
+      0xf79687aed3eec551, 0x3a83ddbd83f52205,
+      0x9abe14cd44753b52, 0xc4926a9672793543,
+      0xc16d9a0095928a27, 0x75b7053c0f178294,
+      0xf1c90080baf72cb1, 0x5324c68b12dd6339,
+      0x971da05074da7bee, 0xd3f6fc16ebca5e04,
+      0xbce5086492111aea, 0x88f4bb1ca6bcf585,
+      0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6,
+      0x9392ee8e921d5d07, 0x3aff322e62439fd0,
+      0xb877aa3236a4b449, 0x9befeb9fad487c3,
+      0xe69594bec44de15b, 0x4c2ebe687989a9b4,
+      0x901d7cf73ab0acd9, 0xf9d37014bf60a11,
+      0xb424dc35095cd80f, 0x538484c19ef38c95,
+      0xe12e13424bb40e13, 0x2865a5f206b06fba,
+      0x8cbccc096f5088cb, 0xf93f87b7442e45d4,
+      0xafebff0bcb24aafe, 0xf78f69a51539d749,
+      0xdbe6fecebdedd5be, 0xb573440e5a884d1c,
+      0x89705f4136b4a597, 0x31680a88f8953031,
+      0xabcc77118461cefc, 0xfdc20d2b36ba7c3e,
+      0xd6bf94d5e57a42bc, 0x3d32907604691b4d,
+      0x8637bd05af6c69b5, 0xa63f9a49c2c1b110,
+      0xa7c5ac471b478423, 0xfcf80dc33721d54,
+      0xd1b71758e219652b, 0xd3c36113404ea4a9,
+      0x83126e978d4fdf3b, 0x645a1cac083126ea,
+      0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4,
+      0xcccccccccccccccc, 0xcccccccccccccccd,
+      0x8000000000000000, 0x0,
+      0xa000000000000000, 0x0,
+      0xc800000000000000, 0x0,
+      0xfa00000000000000, 0x0,
+      0x9c40000000000000, 0x0,
+      0xc350000000000000, 0x0,
+      0xf424000000000000, 0x0,
+      0x9896800000000000, 0x0,
+      0xbebc200000000000, 0x0,
+      0xee6b280000000000, 0x0,
+      0x9502f90000000000, 0x0,
+      0xba43b74000000000, 0x0,
+      0xe8d4a51000000000, 0x0,
+      0x9184e72a00000000, 0x0,
+      0xb5e620f480000000, 0x0,
+      0xe35fa931a0000000, 0x0,
+      0x8e1bc9bf04000000, 0x0,
+      0xb1a2bc2ec5000000, 0x0,
+      0xde0b6b3a76400000, 0x0,
+      0x8ac7230489e80000, 0x0,
+      0xad78ebc5ac620000, 0x0,
+      0xd8d726b7177a8000, 0x0,
+      0x878678326eac9000, 0x0,
+      0xa968163f0a57b400, 0x0,
+      0xd3c21bcecceda100, 0x0,
+      0x84595161401484a0, 0x0,
+      0xa56fa5b99019a5c8, 0x0,
+      0xcecb8f27f4200f3a, 0x0,
+      0x813f3978f8940984, 0x4000000000000000,
+      0xa18f07d736b90be5, 0x5000000000000000,
+      0xc9f2c9cd04674ede, 0xa400000000000000,
+      0xfc6f7c4045812296, 0x4d00000000000000,
+      0x9dc5ada82b70b59d, 0xf020000000000000,
+      0xc5371912364ce305, 0x6c28000000000000,
+      0xf684df56c3e01bc6, 0xc732000000000000,
+      0x9a130b963a6c115c, 0x3c7f400000000000,
+      0xc097ce7bc90715b3, 0x4b9f100000000000,
+      0xf0bdc21abb48db20, 0x1e86d40000000000,
+      0x96769950b50d88f4, 0x1314448000000000,
+      0xbc143fa4e250eb31, 0x17d955a000000000,
+      0xeb194f8e1ae525fd, 0x5dcfab0800000000,
+      0x92efd1b8d0cf37be, 0x5aa1cae500000000,
+      0xb7abc627050305ad, 0xf14a3d9e40000000,
+      0xe596b7b0c643c719, 0x6d9ccd05d0000000,
+      0x8f7e32ce7bea5c6f, 0xe4820023a2000000,
+      0xb35dbf821ae4f38b, 0xdda2802c8a800000,
+      0xe0352f62a19e306e, 0xd50b2037ad200000,
+      0x8c213d9da502de45, 0x4526f422cc340000,
+      0xaf298d050e4395d6, 0x9670b12b7f410000,
+      0xdaf3f04651d47b4c, 0x3c0cdd765f114000,
+      0x88d8762bf324cd0f, 0xa5880a69fb6ac800,
+      0xab0e93b6efee0053, 0x8eea0d047a457a00,
+      0xd5d238a4abe98068, 0x72a4904598d6d880,
+      0x85a36366eb71f041, 0x47a6da2b7f864750,
+      0xa70c3c40a64e6c51, 0x999090b65f67d924,
+      0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d,
+      0x82818f1281ed449f, 0xbff8f10e7a8921a4,
+      0xa321f2d7226895c7, 0xaff72d52192b6a0d,
+      0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490,
+      0xfee50b7025c36a08, 0x2f236d04753d5b4,
+      0x9f4f2726179a2245, 0x1d762422c946590,
+      0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5,
+      0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2,
+      0x9b934c3b330c8577, 0x63cc55f49f88eb2f,
+      0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb,
+      0xf316271c7fc3908a, 0x8bef464e3945ef7a,
+      0x97edd871cfda3a56, 0x97758bf0e3cbb5ac,
+      0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317,
+      0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd,
+      0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a,
+      0xb975d6b6ee39e436, 0xb3e2fd538e122b44,
+      0xe7d34c64a9c85d44, 0x60dbbca87196b616,
+      0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd,
+      0xb51d13aea4a488dd, 0x6babab6398bdbe41,
+      0xe264589a4dcdab14, 0xc696963c7eed2dd1,
+      0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2,
+      0xb0de65388cc8ada8, 0x3b25a55f43294bcb,
+      0xdd15fe86affad912, 0x49ef0eb713f39ebe,
+      0x8a2dbf142dfcc7ab, 0x6e3569326c784337,
+      0xacb92ed9397bf996, 0x49c2c37f07965404,
+      0xd7e77a8f87daf7fb, 0xdc33745ec97be906,
+      0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3,
+      0xa8acd7c0222311bc, 0xc40832ea0d68ce0c,
+      0xd2d80db02aabd62b, 0xf50a3fa490c30190,
+      0x83c7088e1aab65db, 0x792667c6da79e0fa,
+      0xa4b8cab1a1563f52, 0x577001b891185938,
+      0xcde6fd5e09abcf26, 0xed4c0226b55e6f86,
+      0x80b05e5ac60b6178, 0x544f8158315b05b4,
+      0xa0dc75f1778e39d6, 0x696361ae3db1c721,
+      0xc913936dd571c84c, 0x3bc3a19cd1e38e9,
+      0xfb5878494ace3a5f, 0x4ab48a04065c723,
+      0x9d174b2dcec0e47b, 0x62eb0d64283f9c76,
+      0xc45d1df942711d9a, 0x3ba5d0bd324f8394,
+      0xf5746577930d6500, 0xca8f44ec7ee36479,
+      0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb,
+      0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e,
+      0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e,
+      0x95d04aee3b80ece5, 0xbba1f1d158724a12,
+      0xbb445da9ca61281f, 0x2a8a6e45ae8edc97,
+      0xea1575143cf97226, 0xf52d09d71a3293bd,
+      0x924d692ca61be758, 0x593c2626705f9c56,
+      0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c,
+      0xe498f455c38b997a, 0xb6dfb9c0f956447,
+      0x8edf98b59a373fec, 0x4724bd4189bd5eac,
+      0xb2977ee300c50fe7, 0x58edec91ec2cb657,
+      0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed,
+      0x8b865b215899f46c, 0xbd79e0d20082ee74,
+      0xae67f1e9aec07187, 0xecd8590680a3aa11,
+      0xda01ee641a708de9, 0xe80e6f4820cc9495,
+      0x884134fe908658b2, 0x3109058d147fdcdd,
+      0xaa51823e34a7eede, 0xbd4b46f0599fd415,
+      0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a,
+      0x850fadc09923329e, 0x3e2cf6bc604ddb0,
+      0xa6539930bf6bff45, 0x84db8346b786151c,
+      0xcfe87f7cef46ff16, 0xe612641865679a63,
+      0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e,
+      0xa26da3999aef7749, 0xe3be5e330f38f09d,
+      0xcb090c8001ab551c, 0x5cadf5bfd3072cc5,
+      0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6,
+      0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa,
+      0xc646d63501a1511d, 0xb281e1fd541501b8,
+      0xf7d88bc24209a565, 0x1f225a7ca91a4226,
+      0x9ae757596946075f, 0x3375788de9b06958,
+      0xc1a12d2fc3978937, 0x52d6b1641c83ae,
+      0xf209787bb47d6b84, 0xc0678c5dbd23a49a,
+      0x9745eb4d50ce6332, 0xf840b7ba963646e0,
+      0xbd176620a501fbff, 0xb650e5a93bc3d898,
+      0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe,
+      0x93ba47c980e98cdf, 0xc66f336c36b10137,
+      0xb8a8d9bbe123f017, 0xb80b0047445d4184,
+      0xe6d3102ad96cec1d, 0xa60dc059157491e5,
+      0x9043ea1ac7e41392, 0x87c89837ad68db2f,
+      0xb454e4a179dd1877, 0x29babe4598c311fb,
+      0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a,
+      0x8ce2529e2734bb1d, 0x1899e4a65f58660c,
+      0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f,
+      0xdc21a1171d42645d, 0x76707543f4fa1f73,
+      0x899504ae72497eba, 0x6a06494a791c53a8,
+      0xabfa45da0edbde69, 0x487db9d17636892,
+      0xd6f8d7509292d603, 0x45a9d2845d3c42b6,
+      0x865b86925b9bc5c2, 0xb8a2392ba45a9b2,
+      0xa7f26836f282b732, 0x8e6cac7768d7141e,
+      0xd1ef0244af2364ff, 0x3207d795430cd926,
+      0x8335616aed761f1f, 0x7f44e6bd49e807b8,
+      0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6,
+      0xcd036837130890a1, 0x36dba887c37a8c0f,
+      0x802221226be55a64, 0xc2494954da2c9789,
+      0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c,
+      0xc83553c5c8965d3d, 0x6f92829494e5acc7,
+      0xfa42a8b73abbf48c, 0xcb772339ba1f17f9,
+      0x9c69a97284b578d7, 0xff2a760414536efb,
+      0xc38413cf25e2d70d, 0xfef5138519684aba,
+      0xf46518c2ef5b8cd1, 0x7eb258665fc25d69,
+      0x98bf2f79d5993802, 0xef2f773ffbd97a61,
+      0xbeeefb584aff8603, 0xaafb550ffacfd8fa,
+      0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38,
+      0x952ab45cfa97a0b2, 0xdd945a747bf26183,
+      0xba756174393d88df, 0x94f971119aeef9e4,
+      0xe912b9d1478ceb17, 0x7a37cd5601aab85d,
+      0x91abb422ccb812ee, 0xac62e055c10ab33a,
+      0xb616a12b7fe617aa, 0x577b986b314d6009,
+      0xe39c49765fdf9d94, 0xed5a7e85fda0b80b,
+      0x8e41ade9fbebc27d, 0x14588f13be847307,
+      0xb1d219647ae6b31c, 0x596eb2d8ae258fc8,
+      0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb,
+      0x8aec23d680043bee, 0x25de7bb9480d5854,
+      0xada72ccc20054ae9, 0xaf561aa79a10ae6a,
+      0xd910f7ff28069da4, 0x1b2ba1518094da04,
+      0x87aa9aff79042286, 0x90fb44d2f05d0842,
+      0xa99541bf57452b28, 0x353a1607ac744a53,
+      0xd3fa922f2d1675f2, 0x42889b8997915ce8,
+      0x847c9b5d7c2e09b7, 0x69956135febada11,
+      0xa59bc234db398c25, 0x43fab9837e699095,
+      0xcf02b2c21207ef2e, 0x94f967e45e03f4bb,
+      0x8161afb94b44f57d, 0x1d1be0eebac278f5,
+      0xa1ba1ba79e1632dc, 0x6462d92a69731732,
+      0xca28a291859bbf93, 0x7d7b8f7503cfdcfe,
+      0xfcb2cb35e702af78, 0x5cda735244c3d43e,
+      0x9defbf01b061adab, 0x3a0888136afa64a7,
+      0xc56baec21c7a1916, 0x88aaa1845b8fdd0,
+      0xf6c69a72a3989f5b, 0x8aad549e57273d45,
+      0x9a3c2087a63f6399, 0x36ac54e2f678864b,
+      0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd,
+      0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5,
+      0x969eb7c47859e743, 0x9f644ae5a4b1b325,
+      0xbc4665b596706114, 0x873d5d9f0dde1fee,
+      0xeb57ff22fc0c7959, 0xa90cb506d155a7ea,
+      0x9316ff75dd87cbd8, 0x9a7f12442d588f2,
+      0xb7dcbf5354e9bece, 0xc11ed6d538aeb2f,
+      0xe5d3ef282a242e81, 0x8f1668c8a86da5fa,
+      0x8fa475791a569d10, 0xf96e017d694487bc,
+      0xb38d92d760ec4455, 0x37c981dcc395a9ac,
+      0xe070f78d3927556a, 0x85bbe253f47b1417,
+      0x8c469ab843b89562, 0x93956d7478ccec8e,
+      0xaf58416654a6babb, 0x387ac8d1970027b2,
+      0xdb2e51bfe9d0696a, 0x6997b05fcc0319e,
+      0x88fcf317f22241e2, 0x441fece3bdf81f03,
+      0xab3c2fddeeaad25a, 0xd527e81cad7626c3,
+      0xd60b3bd56a5586f1, 0x8a71e223d8d3b074,
+      0x85c7056562757456, 0xf6872d5667844e49,
+      0xa738c6bebb12d16c, 0xb428f8ac016561db,
+      0xd106f86e69d785c7, 0xe13336d701beba52,
+      0x82a45b450226b39c, 0xecc0024661173473,
+      0xa34d721642b06084, 0x27f002d7f95d0190,
+      0xcc20ce9bd35c78a5, 0x31ec038df7b441f4,
+      0xff290242c83396ce, 0x7e67047175a15271,
+      0x9f79a169bd203e41, 0xf0062c6e984d386,
+      0xc75809c42c684dd1, 0x52c07b78a3e60868,
+      0xf92e0c3537826145, 0xa7709a56ccdf8a82,
+      0x9bbcc7a142b17ccb, 0x88a66076400bb691,
+      0xc2abf989935ddbfe, 0x6acff893d00ea435,
+      0xf356f7ebf83552fe, 0x583f6b8c4124d43,
+      0x98165af37b2153de, 0xc3727a337a8b704a,
+      0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c,
+      0xeda2ee1c7064130c, 0x1162def06f79df73,
+      0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8,
+      0xb9a74a0637ce2ee1, 0x6d953e2bd7173692,
+      0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437,
+      0x910ab1d4db9914a0, 0x1d9c9892400a22a2,
+      0xb54d5e4a127f59c8, 0x2503beb6d00cab4b,
+      0xe2a0b5dc971f303a, 0x2e44ae64840fd61d,
+      0x8da471a9de737e24, 0x5ceaecfed289e5d2,
+      0xb10d8e1456105dad, 0x7425a83e872c5f47,
+      0xdd50f1996b947518, 0xd12f124e28f77719,
+      0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f,
+      0xace73cbfdc0bfb7b, 0x636cc64d1001550b,
+      0xd8210befd30efa5a, 0x3c47f7e05401aa4e,
+      0x8714a775e3e95c78, 0x65acfaec34810a71,
+      0xa8d9d1535ce3b396, 0x7f1839a741a14d0d,
+      0xd31045a8341ca07c, 0x1ede48111209a050,
+      0x83ea2b892091e44d, 0x934aed0aab460432,
+      0xa4e4b66b68b65d60, 0xf81da84d5617853f,
+      0xce1de40642e3f4b9, 0x36251260ab9d668e,
+      0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019,
+      0xa1075a24e4421730, 0xb24cf65b8612f81f,
+      0xc94930ae1d529cfc, 0xdee033f26797b627,
+      0xfb9b7cd9a4a7443c, 0x169840ef017da3b1,
+      0x9d412e0806e88aa5, 0x8e1f289560ee864e,
+      0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2,
+      0xf5b5d7ec8acb58a2, 0xae10af696774b1db,
+      0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29,
+      0xbff610b0cc6edd3f, 0x17fd090a58d32af3,
+      0xeff394dcff8a948e, 0xddfc4b4cef07f5b0,
+      0x95f83d0a1fb69cd9, 0x4abdaf101564f98e,
+      0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1,
+      0xea53df5fd18d5513, 0x84c86189216dc5ed,
+      0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4,
+      0xb7118682dbb66a77, 0x3fbc8c33221dc2a1,
+      0xe4d5e82392a40515, 0xfabaf3feaa5334a,
+      0x8f05b1163ba6832d, 0x29cb4d87f2a7400e,
+      0xb2c71d5bca9023f8, 0x743e20e9ef511012,
+      0xdf78e4b2bd342cf6, 0x914da9246b255416,
+      0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e,
+      0xae9672aba3d0c320, 0xa184ac2473b529b1,
+      0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e,
+      0x8865899617fb1871, 0x7e2fa67c7a658892,
+      0xaa7eebfb9df9de8d, 0xddbb901b98feeab7,
+      0xd51ea6fa85785631, 0x552a74227f3ea565,
+      0x8533285c936b35de, 0xd53a88958f87275f,
+      0xa67ff273b8460356, 0x8a892abaf368f137,
+      0xd01fef10a657842c, 0x2d2b7569b0432d85,
+      0x8213f56a67f6b29b, 0x9c3b29620e29fc73,
+      0xa298f2c501f45f42, 0x8349f3ba91b47b8f,
+      0xcb3f2f7642717713, 0x241c70a936219a73,
+      0xfe0efb53d30dd4d7, 0xed238cd383aa0110,
+      0x9ec95d1463e8a506, 0xf4363804324a40aa,
+      0xc67bb4597ce2ce48, 0xb143c6053edcd0d5,
+      0xf81aa16fdc1b81da, 0xdd94b7868e94050a,
+      0x9b10a4e5e9913128, 0xca7cf2b4191c8326,
+      0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0,
+      0xf24a01a73cf2dccf, 0xbc633b39673c8cec,
+      0x976e41088617ca01, 0xd5be0503e085d813,
+      0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18,
+      0xec9c459d51852ba2, 0xddf8e7d60ed1219e,
+      0x93e1ab8252f33b45, 0xcabb90e5c942b503,
+      0xb8da1662e7b00a17, 0x3d6a751f3b936243,
+      0xe7109bfba19c0c9d, 0xcc512670a783ad4,
+      0x906a617d450187e2, 0x27fb2b80668b24c5,
+      0xb484f9dc9641e9da, 0xb1f9f660802dedf6,
+      0xe1a63853bbd26451, 0x5e7873f8a0396973,
+      0x8d07e33455637eb2, 0xdb0b487b6423e1e8,
+      0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62,
+      0xdc5c5301c56b75f7, 0x7641a140cc7810fb,
+      0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d,
+      0xac2820d9623bf429, 0x546345fa9fbdcd44,
+      0xd732290fbacaf133, 0xa97c177947ad4095,
+      0x867f59a9d4bed6c0, 0x49ed8eabcccc485d,
+      0xa81f301449ee8c70, 0x5c68f256bfff5a74,
+      0xd226fc195c6a2f8c, 0x73832eec6fff3111,
+      0x83585d8fd9c25db7, 0xc831fd53c5ff7eab,
+      0xa42e74f3d032f525, 0xba3e7ca8b77f5e55,
+      0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb,
+      0x80444b5e7aa7cf85, 0x7980d163cf5b81b3,
+      0xa0555e361951c366, 0xd7e105bcc332621f,
+      0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7,
+      0xfa856334878fc150, 0xb14f98f6f0feb951,
+      0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3,
+      0xc3b8358109e84f07, 0xa862f80ec4700c8,
+      0xf4a642e14c6262c8, 0xcd27bb612758c0fa,
+      0x98e7e9cccfbd7dbd, 0x8038d51cb897789c,
+      0xbf21e44003acdd2c, 0xe0470a63e6bd56c3,
+      0xeeea5d5004981478, 0x1858ccfce06cac74,
+      0x95527a5202df0ccb, 0xf37801e0c43ebc8,
+      0xbaa718e68396cffd, 0xd30560258f54e6ba,
+      0xe950df20247c83fd, 0x47c6b82ef32a2069,
+      0x91d28b7416cdd27e, 0x4cdc331d57fa5441,
+      0xb6472e511c81471d, 0xe0133fe4adf8e952,
+      0xe3d8f9e563a198e5, 0x58180fddd97723a6,
+      0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648,
+  };
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <class unused>
+constexpr uint64_t
+    powers_template<unused>::power_of_five_128[number_of_entries];
+
+#endif
+
+using powers = powers_template<>;
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_DECIMAL_TO_BINARY_H
+#define FASTFLOAT_DECIMAL_TO_BINARY_H
+
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+
+namespace fast_float {
+
+// This will compute or rather approximate w * 5**q and return a pair of 64-bit
+// words approximating the result, with the "high" part corresponding to the
+// most significant bits and the low part corresponding to the least significant
+// bits.
+//
+template <int bit_precision>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128
+compute_product_approximation(int64_t q, uint64_t w) {
+  int const index = 2 * int(q - powers::smallest_power_of_five);
+  // For small values of q, e.g., q in [0,27], the answer is always exact
+  // because The line value128 firstproduct = full_multiplication(w,
+  // power_of_five_128[index]); gives the exact answer.
+  value128 firstproduct =
+      full_multiplication(w, powers::power_of_five_128[index]);
+  static_assert((bit_precision >= 0) && (bit_precision <= 64),
+                " precision should  be in (0,64]");
+  constexpr uint64_t precision_mask =
+      (bit_precision < 64) ? (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision)
+                           : uint64_t(0xFFFFFFFFFFFFFFFF);
+  if ((firstproduct.high & precision_mask) ==
+      precision_mask) { // could further guard with  (lower + w < lower)
+    // regarding the second product, we only need secondproduct.high, but our
+    // expectation is that the compiler will optimize this extra work away if
+    // needed.
+    value128 secondproduct =
+        full_multiplication(w, powers::power_of_five_128[index + 1]);
+    firstproduct.low += secondproduct.high;
+    if (secondproduct.high > firstproduct.low) {
+      firstproduct.high++;
+    }
+  }
+  return firstproduct;
+}
+
+namespace detail {
+/**
+ * For q in (0,350), we have that
+ *  f = (((152170 + 65536) * q ) >> 16);
+ * is equal to
+ *   floor(p) + q
+ * where
+ *   p = log(5**q)/log(2) = q * log(5)/log(2)
+ *
+ * For negative values of q in (-400,0), we have that
+ *  f = (((152170 + 65536) * q ) >> 16);
+ * is equal to
+ *   -ceil(p) + q
+ * where
+ *   p = log(5**-q)/log(2) = -q * log(5)/log(2)
+ */
+constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
+  return (((152170 + 65536) * q) >> 16) + 63;
+}
+} // namespace detail
+
+// create an adjusted mantissa, biased by the invalid power2
+// for significant digits already multiplied by 10 ** q.
+template <typename binary>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa
+compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
+  int hilz = int(w >> 63) ^ 1;
+  adjusted_mantissa answer;
+  answer.mantissa = w << hilz;
+  int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent();
+  answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 +
+                          invalid_am_bias);
+  return answer;
+}
+
+// w * 10 ** q, without rounding the representation up.
+// the power2 in the exponent will be adjusted by invalid_am_bias.
+template <typename binary>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+compute_error(int64_t q, uint64_t w) noexcept {
+  int lz = leading_zeroes(w);
+  w <<= lz;
+  value128 product =
+      compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
+  return compute_error_scaled<binary>(q, product.high, lz);
+}
+
+// Computers w * 10 ** q.
+// The returned value should be a valid number that simply needs to be
+// packed. However, in some very rare cases, the computation will fail. In such
+// cases, we return an adjusted_mantissa with a negative power of 2: the caller
+// should recompute in such cases.
+template <typename binary>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+compute_float(int64_t q, uint64_t w) noexcept {
+  adjusted_mantissa answer;
+  if ((w == 0) || (q < binary::smallest_power_of_ten())) {
+    answer.power2 = 0;
+    answer.mantissa = 0;
+    // result should be zero
+    return answer;
+  }
+  if (q > binary::largest_power_of_ten()) {
+    // we want to get infinity:
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+    return answer;
+  }
+  // At this point in time q is in [powers::smallest_power_of_five,
+  // powers::largest_power_of_five].
+
+  // We want the most significant bit of i to be 1. Shift if needed.
+  int lz = leading_zeroes(w);
+  w <<= lz;
+
+  // The required precision is binary::mantissa_explicit_bits() + 3 because
+  // 1. We need the implicit bit
+  // 2. We need an extra bit for rounding purposes
+  // 3. We might lose a bit due to the "upperbit" routine (result too small,
+  // requiring a shift)
+
+  value128 product =
+      compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
+  // The computed 'product' is always sufficient.
+  // Mathematical proof:
+  // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to
+  // appear) See script/mushtak_lemire.py
+
+  // The "compute_product_approximation" function can be slightly slower than a
+  // branchless approach: value128 product = compute_product(q, w); but in
+  // practice, we can win big with the compute_product_approximation if its
+  // additional branch is easily predicted. Which is best is data specific.
+  int upperbit = int(product.high >> 63);
+  int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3;
+
+  answer.mantissa = product.high >> shift;
+
+  answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz -
+                          binary::minimum_exponent());
+  if (answer.power2 <= 0) { // we have a subnormal?
+    // Here have that answer.power2 <= 0 so -answer.power2 >= 0
+    if (-answer.power2 + 1 >=
+        64) { // if we have more than 64 bits below the minimum exponent, you
+              // have a zero for sure.
+      answer.power2 = 0;
+      answer.mantissa = 0;
+      // result should be zero
+      return answer;
+    }
+    // next line is safe because -answer.power2 + 1 < 64
+    answer.mantissa >>= -answer.power2 + 1;
+    // Thankfully, we can't have both "round-to-even" and subnormals because
+    // "round-to-even" only occurs for powers close to 0 in the 32-bit and
+    // and 64-bit case (with no more than 19 digits).
+    answer.mantissa += (answer.mantissa & 1); // round up
+    answer.mantissa >>= 1;
+    // There is a weird scenario where we don't have a subnormal but just.
+    // Suppose we start with 2.2250738585072013e-308, we end up
+    // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
+    // whereas 0x40000000000000 x 2^-1023-53  is normal. Now, we need to round
+    // up 0x3fffffffffffff x 2^-1023-53  and once we do, we are no longer
+    // subnormal, but we can only know this after rounding.
+    // So we only declare a subnormal if we are smaller than the threshold.
+    answer.power2 =
+        (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits()))
+            ? 0
+            : 1;
+    return answer;
+  }
+
+  // usually, we round *up*, but if we fall right in between and and we have an
+  // even basis, we need to round down
+  // We are only concerned with the cases where 5**q fits in single 64-bit word.
+  if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) &&
+      (q <= binary::max_exponent_round_to_even()) &&
+      ((answer.mantissa & 3) == 1)) { // we may fall between two floats!
+    // To be in-between two floats we need that in doing
+    //   answer.mantissa = product.high >> (upperbit + 64 -
+    //   binary::mantissa_explicit_bits() - 3);
+    // ... we dropped out only zeroes. But if this happened, then we can go
+    // back!!!
+    if ((answer.mantissa << shift) == product.high) {
+      answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up
+    }
+  }
+
+  answer.mantissa += (answer.mantissa & 1); // round up
+  answer.mantissa >>= 1;
+  if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) {
+    answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits());
+    answer.power2++; // undo previous addition
+  }
+
+  answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits());
+  if (answer.power2 >= binary::infinite_power()) { // infinity
+    answer.power2 = binary::infinite_power();
+    answer.mantissa = 0;
+  }
+  return answer;
+}
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_BIGINT_H
+#define FASTFLOAT_BIGINT_H
+
+#include <cstring>
+
+
+namespace fast_float {
+
+// the limb width: we want efficient multiplication of double the bits in
+// limb, or for 64-bit limbs, at least 64-bit multiplication where we can
+// extract the high and low parts efficiently. this is every 64-bit
+// architecture except for sparc, which emulates 128-bit multiplication.
+// we might have platforms where `CHAR_BIT` is not 8, so let's avoid
+// doing `8 * sizeof(limb)`.
+#if defined(FASTFLOAT_64BIT) && !defined(__sparc)
+#define FASTFLOAT_64BIT_LIMB 1
+typedef uint64_t limb;
+constexpr size_t limb_bits = 64;
+#else
+#define FASTFLOAT_32BIT_LIMB
+typedef uint32_t limb;
+constexpr size_t limb_bits = 32;
+#endif
+
+typedef span<limb> limb_span;
+
+// number of bits in a bigint. this needs to be at least the number
+// of bits required to store the largest bigint, which is
+// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or
+// ~3600 bits, so we round to 4000.
+constexpr size_t bigint_bits = 4000;
+constexpr size_t bigint_limbs = bigint_bits / limb_bits;
+
+// vector-like type that is allocated on the stack. the entire
+// buffer is pre-allocated, and only the length changes.
+template <uint16_t size> struct stackvec {
+  limb data[size];
+  // we never need more than 150 limbs
+  uint16_t length{0};
+
+  stackvec() = default;
+  stackvec(stackvec const &) = delete;
+  stackvec &operator=(stackvec const &) = delete;
+  stackvec(stackvec &&) = delete;
+  stackvec &operator=(stackvec &&other) = delete;
+
+  // create stack vector from existing limb span.
+  FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) {
+    FASTFLOAT_ASSERT(try_extend(s));
+  }
+
+  FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    return data[index];
+  }
+
+  FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    return data[index];
+  }
+
+  // index from the end of the container
+  FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept {
+    FASTFLOAT_DEBUG_ASSERT(index < length);
+    size_t rindex = length - index - 1;
+    return data[rindex];
+  }
+
+  // set the length, without bounds checking.
+  FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept {
+    length = uint16_t(len);
+  }
+
+  constexpr size_t len() const noexcept { return length; }
+
+  constexpr bool is_empty() const noexcept { return length == 0; }
+
+  constexpr size_t capacity() const noexcept { return size; }
+
+  // append item to vector, without bounds checking
+  FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept {
+    data[length] = value;
+    length++;
+  }
+
+  // append item to vector, returning if item was added
+  FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept {
+    if (len() < capacity()) {
+      push_unchecked(value);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // add items to the vector, from a span, without bounds checking
+  FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept {
+    limb *ptr = data + length;
+    tinyobj_ff::copy_n(s.ptr, s.len(), ptr);
+    set_len(len() + s.len());
+  }
+
+  // try to add items to the vector, returning if items were added
+  FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept {
+    if (len() + s.len() <= capacity()) {
+      extend_unchecked(s);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // resize the vector, without bounds checking
+  // if the new size is longer than the vector, assign value to each
+  // appended item.
+  FASTFLOAT_CONSTEXPR20
+  void resize_unchecked(size_t new_len, limb value) noexcept {
+    if (new_len > len()) {
+      size_t count = new_len - len();
+      limb *first = data + len();
+      limb *last = first + count;
+      tinyobj_ff::fill(first, last, value);
+      set_len(new_len);
+    } else {
+      set_len(new_len);
+    }
+  }
+
+  // try to resize the vector, returning if the vector was resized.
+  FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept {
+    if (new_len > capacity()) {
+      return false;
+    } else {
+      resize_unchecked(new_len, value);
+      return true;
+    }
+  }
+
+  // check if any limbs are non-zero after the given index.
+  // this needs to be done in reverse order, since the index
+  // is relative to the most significant limbs.
+  FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept {
+    while (index < len()) {
+      if (rindex(index) != 0) {
+        return true;
+      }
+      index++;
+    }
+    return false;
+  }
+
+  // normalize the big integer, so most-significant zero limbs are removed.
+  FASTFLOAT_CONSTEXPR14 void normalize() noexcept {
+    while (len() > 0 && rindex(0) == 0) {
+      length--;
+    }
+  }
+};
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t
+empty_hi64(bool &truncated) noexcept {
+  truncated = false;
+  return 0;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint64_hi64(uint64_t r0, bool &truncated) noexcept {
+  truncated = false;
+  int shl = leading_zeroes(r0);
+  return r0 << shl;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept {
+  int shl = leading_zeroes(r0);
+  if (shl == 0) {
+    truncated = r1 != 0;
+    return r0;
+  } else {
+    int shr = 64 - shl;
+    truncated = (r1 << shl) != 0;
+    return (r0 << shl) | (r1 >> shr);
+  }
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint32_hi64(uint32_t r0, bool &truncated) noexcept {
+  return uint64_hi64(r0, truncated);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint32_hi64(uint32_t r0, uint32_t r1, bool &truncated) noexcept {
+  uint64_t x0 = r0;
+  uint64_t x1 = r1;
+  return uint64_hi64((x0 << 32) | x1, truncated);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
+uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool &truncated) noexcept {
+  uint64_t x0 = r0;
+  uint64_t x1 = r1;
+  uint64_t x2 = r2;
+  return uint64_hi64(x0, (x1 << 32) | x2, truncated);
+}
+
+// add two small integers, checking for overflow.
+// we want an efficient operation. for msvc, where
+// we don't have built-in intrinsics, this is still
+// pretty fast.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
+scalar_add(limb x, limb y, bool &overflow) noexcept {
+  limb z;
+// gcc and clang
+#if defined(__has_builtin)
+#if __has_builtin(__builtin_add_overflow)
+  if (!cpp20_and_in_constexpr()) {
+    overflow = __builtin_add_overflow(x, y, &z);
+    return z;
+  }
+#endif
+#endif
+
+  // generic, this still optimizes correctly on MSVC.
+  z = x + y;
+  overflow = z < x;
+  return z;
+}
+
+// multiply two small integers, getting both the high and low bits.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb
+scalar_mul(limb x, limb y, limb &carry) noexcept {
+#ifdef FASTFLOAT_64BIT_LIMB
+#if defined(__SIZEOF_INT128__)
+  // GCC and clang both define it as an extension.
+  __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry);
+  carry = limb(z >> limb_bits);
+  return limb(z);
+#else
+  // fallback, no native 128-bit integer multiplication with carry.
+  // on msvc, this optimizes identically, somehow.
+  value128 z = full_multiplication(x, y);
+  bool overflow;
+  z.low = scalar_add(z.low, carry, overflow);
+  z.high += uint64_t(overflow); // cannot overflow
+  carry = z.high;
+  return z.low;
+#endif
+#else
+  uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry);
+  carry = limb(z >> limb_bits);
+  return limb(z);
+#endif
+}
+
+// add scalar value to bigint starting from offset.
+// used in grade school multiplication
+template <uint16_t size>
+inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
+                                                 size_t start) noexcept {
+  size_t index = start;
+  limb carry = y;
+  bool overflow;
+  while (carry != 0 && index < vec.len()) {
+    vec[index] = scalar_add(vec[index], carry, overflow);
+    carry = limb(overflow);
+    index += 1;
+  }
+  if (carry != 0) {
+    FASTFLOAT_TRY(vec.try_push(carry));
+  }
+  return true;
+}
+
+// add scalar value to bigint.
+template <uint16_t size>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+small_add(stackvec<size> &vec, limb y) noexcept {
+  return small_add_from(vec, y, 0);
+}
+
+// multiply bigint by scalar value.
+template <uint16_t size>
+inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
+                                            limb y) noexcept {
+  limb carry = 0;
+  for (size_t index = 0; index < vec.len(); index++) {
+    vec[index] = scalar_mul(vec[index], y, carry);
+  }
+  if (carry != 0) {
+    FASTFLOAT_TRY(vec.try_push(carry));
+  }
+  return true;
+}
+
+// add bigint to bigint starting from index.
+// used in grade school multiplication
+template <uint16_t size>
+FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
+                                          size_t start) noexcept {
+  // the effective x buffer is from `xstart..x.len()`, so exit early
+  // if we can't get that current range.
+  if (x.len() < start || y.len() > x.len() - start) {
+    FASTFLOAT_TRY(x.try_resize(y.len() + start, 0));
+  }
+
+  bool carry = false;
+  for (size_t index = 0; index < y.len(); index++) {
+    limb xi = x[index + start];
+    limb yi = y[index];
+    bool c1 = false;
+    bool c2 = false;
+    xi = scalar_add(xi, yi, c1);
+    if (carry) {
+      xi = scalar_add(xi, 1, c2);
+    }
+    x[index + start] = xi;
+    carry = c1 | c2;
+  }
+
+  // handle overflow
+  if (carry) {
+    FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start));
+  }
+  return true;
+}
+
+// add bigint to bigint.
+template <uint16_t size>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+large_add_from(stackvec<size> &x, limb_span y) noexcept {
+  return large_add_from(x, y, 0);
+}
+
+// grade-school multiplication algorithm
+template <uint16_t size>
+FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
+  limb_span xs = limb_span(x.data, x.len());
+  stackvec<size> z(xs);
+  limb_span zs = limb_span(z.data, z.len());
+
+  if (y.len() != 0) {
+    limb y0 = y[0];
+    FASTFLOAT_TRY(small_mul(x, y0));
+    for (size_t index = 1; index < y.len(); index++) {
+      limb yi = y[index];
+      stackvec<size> zi;
+      if (yi != 0) {
+        // re-use the same buffer throughout
+        zi.set_len(0);
+        FASTFLOAT_TRY(zi.try_extend(zs));
+        FASTFLOAT_TRY(small_mul(zi, yi));
+        limb_span zis = limb_span(zi.data, zi.len());
+        FASTFLOAT_TRY(large_add_from(x, zis, index));
+      }
+    }
+  }
+
+  x.normalize();
+  return true;
+}
+
+// grade-school multiplication algorithm
+template <uint16_t size>
+FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
+  if (y.len() == 1) {
+    FASTFLOAT_TRY(small_mul(x, y[0]));
+  } else {
+    FASTFLOAT_TRY(long_mul(x, y));
+  }
+  return true;
+}
+
+template <typename = void> struct pow5_tables {
+  static constexpr uint32_t large_step = 135;
+  static constexpr uint64_t small_power_of_5[] = {
+      1UL,
+      5UL,
+      25UL,
+      125UL,
+      625UL,
+      3125UL,
+      15625UL,
+      78125UL,
+      390625UL,
+      1953125UL,
+      9765625UL,
+      48828125UL,
+      244140625UL,
+      1220703125UL,
+      6103515625UL,
+      30517578125UL,
+      152587890625UL,
+      762939453125UL,
+      3814697265625UL,
+      19073486328125UL,
+      95367431640625UL,
+      476837158203125UL,
+      2384185791015625UL,
+      11920928955078125UL,
+      59604644775390625UL,
+      298023223876953125UL,
+      1490116119384765625UL,
+      7450580596923828125UL,
+  };
+#ifdef FASTFLOAT_64BIT_LIMB
+  constexpr static limb large_power_of_5[] = {
+      1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL,
+      10482974169319127550UL, 198276706040285095UL};
+#else
+  constexpr static limb large_power_of_5[] = {
+      4279965485U, 329373468U,  4020270615U, 2137533757U, 4287402176U,
+      1057042919U, 1071430142U, 2440757623U, 381945767U,  46164893U};
+#endif
+};
+
+#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+
+template <typename T> constexpr uint32_t pow5_tables<T>::large_step;
+
+template <typename T> constexpr uint64_t pow5_tables<T>::small_power_of_5[];
+
+template <typename T> constexpr limb pow5_tables<T>::large_power_of_5[];
+
+#endif
+
+// big integer type. implements a small subset of big integer
+// arithmetic, using simple algorithms since asymptotically
+// faster algorithms are slower for a small number of limbs.
+// all operations assume the big-integer is normalized.
+struct bigint : pow5_tables<> {
+  // storage of the limbs, in little-endian order.
+  stackvec<bigint_limbs> vec;
+
+  FASTFLOAT_CONSTEXPR20 bigint() : vec() {}
+
+  bigint(bigint const &) = delete;
+  bigint &operator=(bigint const &) = delete;
+  bigint(bigint &&) = delete;
+  bigint &operator=(bigint &&other) = delete;
+
+  FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() {
+#ifdef FASTFLOAT_64BIT_LIMB
+    vec.push_unchecked(value);
+#else
+    vec.push_unchecked(uint32_t(value));
+    vec.push_unchecked(uint32_t(value >> 32));
+#endif
+    vec.normalize();
+  }
+
+  // get the high 64 bits from the vector, and if bits were truncated.
+  // this is to get the significant digits for the float.
+  FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool &truncated) const noexcept {
+#ifdef FASTFLOAT_64BIT_LIMB
+    if (vec.len() == 0) {
+      return empty_hi64(truncated);
+    } else if (vec.len() == 1) {
+      return uint64_hi64(vec.rindex(0), truncated);
+    } else {
+      uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated);
+      truncated |= vec.nonzero(2);
+      return result;
+    }
+#else
+    if (vec.len() == 0) {
+      return empty_hi64(truncated);
+    } else if (vec.len() == 1) {
+      return uint32_hi64(vec.rindex(0), truncated);
+    } else if (vec.len() == 2) {
+      return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated);
+    } else {
+      uint64_t result =
+          uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated);
+      truncated |= vec.nonzero(3);
+      return result;
+    }
+#endif
+  }
+
+  // compare two big integers, returning the large value.
+  // assumes both are normalized. if the return value is
+  // negative, other is larger, if the return value is
+  // positive, this is larger, otherwise they are equal.
+  // the limbs are stored in little-endian order, so we
+  // must compare the limbs in ever order.
+  FASTFLOAT_CONSTEXPR20 int compare(bigint const &other) const noexcept {
+    if (vec.len() > other.vec.len()) {
+      return 1;
+    } else if (vec.len() < other.vec.len()) {
+      return -1;
+    } else {
+      for (size_t index = vec.len(); index > 0; index--) {
+        limb xi = vec[index - 1];
+        limb yi = other.vec[index - 1];
+        if (xi > yi) {
+          return 1;
+        } else if (xi < yi) {
+          return -1;
+        }
+      }
+      return 0;
+    }
+  }
+
+  // shift left each limb n bits, carrying over to the new limb
+  // returns true if we were able to shift all the digits.
+  FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept {
+    // Internally, for each item, we shift left by n, and add the previous
+    // right shifted limb-bits.
+    // For example, we transform (for u8) shifted left 2, to:
+    //      b10100100 b01000010
+    //      b10 b10010001 b00001000
+    FASTFLOAT_DEBUG_ASSERT(n != 0);
+    FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8);
+
+    size_t shl = n;
+    size_t shr = limb_bits - shl;
+    limb prev = 0;
+    for (size_t index = 0; index < vec.len(); index++) {
+      limb xi = vec[index];
+      vec[index] = (xi << shl) | (prev >> shr);
+      prev = xi;
+    }
+
+    limb carry = prev >> shr;
+    if (carry != 0) {
+      return vec.try_push(carry);
+    }
+    return true;
+  }
+
+  // move the limbs left by `n` limbs.
+  FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept {
+    FASTFLOAT_DEBUG_ASSERT(n != 0);
+    if (n + vec.len() > vec.capacity()) {
+      return false;
+    } else if (!vec.is_empty()) {
+      // move limbs
+      limb *dst = vec.data + n;
+      limb const *src = vec.data;
+      tinyobj_ff::copy_backward(src, src + vec.len(), dst + vec.len());
+      // fill in empty limbs
+      limb *first = vec.data;
+      limb *last = first + n;
+      tinyobj_ff::fill(first, last, 0);
+      vec.set_len(n + vec.len());
+      return true;
+    } else {
+      return true;
+    }
+  }
+
+  // move the limbs left by `n` bits.
+  FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept {
+    size_t rem = n % limb_bits;
+    size_t div = n / limb_bits;
+    if (rem != 0) {
+      FASTFLOAT_TRY(shl_bits(rem));
+    }
+    if (div != 0) {
+      FASTFLOAT_TRY(shl_limbs(div));
+    }
+    return true;
+  }
+
+  // get the number of leading zeros in the bigint.
+  FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept {
+    if (vec.is_empty()) {
+      return 0;
+    } else {
+#ifdef FASTFLOAT_64BIT_LIMB
+      return leading_zeroes(vec.rindex(0));
+#else
+      // no use defining a specialized leading_zeroes for a 32-bit type.
+      uint64_t r0 = vec.rindex(0);
+      return leading_zeroes(r0 << 32);
+#endif
+    }
+  }
+
+  // get the number of bits in the bigint.
+  FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept {
+    int lz = ctlz();
+    return int(limb_bits * vec.len()) - lz;
+  }
+
+  FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); }
+
+  FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); }
+
+  // multiply as if by 2 raised to a power.
+  FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); }
+
+  // multiply as if by 5 raised to a power.
+  FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept {
+    // multiply by a power of 5
+    size_t large_length = sizeof(large_power_of_5) / sizeof(limb);
+    limb_span large = limb_span(large_power_of_5, large_length);
+    while (exp >= large_step) {
+      FASTFLOAT_TRY(large_mul(vec, large));
+      exp -= large_step;
+    }
+#ifdef FASTFLOAT_64BIT_LIMB
+    uint32_t small_step = 27;
+    limb max_native = 7450580596923828125UL;
+#else
+    uint32_t small_step = 13;
+    limb max_native = 1220703125U;
+#endif
+    while (exp >= small_step) {
+      FASTFLOAT_TRY(small_mul(vec, max_native));
+      exp -= small_step;
+    }
+    if (exp != 0) {
+      // Work around clang bug https://godbolt.org/z/zedh7rrhc
+      // This is similar to https://github.com/llvm/llvm-project/issues/47746,
+      // except the workaround described there don't work here
+      FASTFLOAT_TRY(small_mul(
+          vec, limb(((void)small_power_of_5[0], small_power_of_5[exp]))));
+    }
+
+    return true;
+  }
+
+  // multiply as if by 10 raised to a power.
+  FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept {
+    FASTFLOAT_TRY(pow5(exp));
+    return pow2(exp);
+  }
+};
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_DIGIT_COMPARISON_H
+#define FASTFLOAT_DIGIT_COMPARISON_H
+
+#include <cstring>
+
+
+namespace fast_float {
+
+// 1e0 to 1e19
+constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
+                                                    10UL,
+                                                    100UL,
+                                                    1000UL,
+                                                    10000UL,
+                                                    100000UL,
+                                                    1000000UL,
+                                                    10000000UL,
+                                                    100000000UL,
+                                                    1000000000UL,
+                                                    10000000000UL,
+                                                    100000000000UL,
+                                                    1000000000000UL,
+                                                    10000000000000UL,
+                                                    100000000000000UL,
+                                                    1000000000000000UL,
+                                                    10000000000000000UL,
+                                                    100000000000000000UL,
+                                                    1000000000000000000UL,
+                                                    10000000000000000000UL};
+
+// calculate the exponent, in scientific notation, of the number.
+// this algorithm is not even close to optimized, but it has no practical
+// effect on performance: in order to have a faster algorithm, we'd need
+// to slow down performance for faster algorithms, and this is still fast.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t
+scientific_exponent(parsed_number_string_t<UC> &num) noexcept {
+  uint64_t mantissa = num.mantissa;
+  int32_t exponent = int32_t(num.exponent);
+  while (mantissa >= 10000) {
+    mantissa /= 10000;
+    exponent += 4;
+  }
+  while (mantissa >= 100) {
+    mantissa /= 100;
+    exponent += 2;
+  }
+  while (mantissa >= 10) {
+    mantissa /= 10;
+    exponent += 1;
+  }
+  return exponent;
+}
+
+// this converts a native floating-point number to an extended-precision float.
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+to_extended(T value) noexcept {
+  using equiv_uint = equiv_uint_t<T>;
+  constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
+  constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
+  constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
+
+  adjusted_mantissa am;
+  int32_t bias = binary_format<T>::mantissa_explicit_bits() -
+                 binary_format<T>::minimum_exponent();
+  equiv_uint bits;
+#if FASTFLOAT_HAS_BIT_CAST
+  bits = std::bit_cast<equiv_uint>(value);
+#else
+  ::memcpy(&bits, &value, sizeof(T));
+#endif
+  if ((bits & exponent_mask) == 0) {
+    // denormal
+    am.power2 = 1 - bias;
+    am.mantissa = bits & mantissa_mask;
+  } else {
+    // normal
+    am.power2 = int32_t((bits & exponent_mask) >>
+                        binary_format<T>::mantissa_explicit_bits());
+    am.power2 -= bias;
+    am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
+  }
+
+  return am;
+}
+
+// get the extended precision value of the halfway point between b and b+u.
+// we are given a native float that represents b, so we need to adjust it
+// halfway between b and b+u.
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+to_extended_halfway(T value) noexcept {
+  adjusted_mantissa am = to_extended(value);
+  am.mantissa <<= 1;
+  am.mantissa += 1;
+  am.power2 -= 1;
+  return am;
+}
+
+// round an extended-precision float to the nearest machine float.
+template <typename T, typename callback>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
+                                                         callback cb) noexcept {
+  int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
+  if (-am.power2 >= mantissa_shift) {
+    // have a denormal float
+    int32_t shift = -am.power2 + 1;
+    cb(am, tinyobj_ff::min_val<int32_t>(shift, 64));
+    // check for round-up: if rounding-nearest carried us to the hidden bit.
+    am.power2 = (am.mantissa <
+                 (uint64_t(1) << binary_format<T>::mantissa_explicit_bits()))
+                    ? 0
+                    : 1;
+    return;
+  }
+
+  // have a normal float, use the default shift.
+  cb(am, mantissa_shift);
+
+  // check for carry
+  if (am.mantissa >=
+      (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
+    am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
+    am.power2++;
+  }
+
+  // check for infinite: we could have carried to an infinite power
+  am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
+  if (am.power2 >= binary_format<T>::infinite_power()) {
+    am.power2 = binary_format<T>::infinite_power();
+    am.mantissa = 0;
+  }
+}
+
+template <typename callback>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
+round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
+                       callback cb) noexcept {
+  uint64_t const mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1;
+  uint64_t const halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1);
+  uint64_t truncated_bits = am.mantissa & mask;
+  bool is_above = truncated_bits > halfway;
+  bool is_halfway = truncated_bits == halfway;
+
+  // shift digits into position
+  if (shift == 64) {
+    am.mantissa = 0;
+  } else {
+    am.mantissa >>= shift;
+  }
+  am.power2 += shift;
+
+  bool is_odd = (am.mantissa & 1) == 1;
+  am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
+round_down(adjusted_mantissa &am, int32_t shift) noexcept {
+  if (shift == 64) {
+    am.mantissa = 0;
+  } else {
+    am.mantissa >>= shift;
+  }
+  am.power2 += shift;
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+skip_zeros(UC const *&first, UC const *last) noexcept {
+  uint64_t val;
+  while (!cpp20_and_in_constexpr() &&
+         tinyobj_ff::distance(first, last) >= int_cmp_len<UC>()) {
+    ::memcpy(&val, first, sizeof(uint64_t));
+    if (val != int_cmp_zeros<UC>()) {
+      break;
+    }
+    first += int_cmp_len<UC>();
+  }
+  while (first != last) {
+    if (*first != UC('0')) {
+      break;
+    }
+    first++;
+  }
+}
+
+// determine if any non-zero digits were truncated.
+// all characters must be valid digits.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+is_truncated(UC const *first, UC const *last) noexcept {
+  // do 8-bit optimizations, can just compare to 8 literal 0s.
+  uint64_t val;
+  while (!cpp20_and_in_constexpr() &&
+         tinyobj_ff::distance(first, last) >= int_cmp_len<UC>()) {
+    ::memcpy(&val, first, sizeof(uint64_t));
+    if (val != int_cmp_zeros<UC>()) {
+      return true;
+    }
+    first += int_cmp_len<UC>();
+  }
+  while (first != last) {
+    if (*first != UC('0')) {
+      return true;
+    }
+    ++first;
+  }
+  return false;
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
+is_truncated(span<UC const> s) noexcept {
+  return is_truncated(s.ptr, s.ptr + s.len());
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+parse_eight_digits(UC const *&p, limb &value, size_t &counter,
+                   size_t &count) noexcept {
+  value = value * 100000000 + parse_eight_digits_unrolled(p);
+  p += 8;
+  counter += 8;
+  count += 8;
+}
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
+parse_one_digit(UC const *&p, limb &value, size_t &counter,
+                size_t &count) noexcept {
+  value = value * 10 + limb(*p - UC('0'));
+  p++;
+  counter++;
+  count++;
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+add_native(bigint &big, limb power, limb value) noexcept {
+  big.mul(power);
+  big.add(value);
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
+round_up_bigint(bigint &big, size_t &count) noexcept {
+  // need to round-up the digits, but need to avoid rounding
+  // ....9999 to ...10000, which could cause a false halfway point.
+  add_native(big, 10, 1);
+  count++;
+}
+
+// parse the significant digits into a big integer
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR20 void
+parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
+               size_t max_digits, size_t &digits) noexcept {
+  // try to minimize the number of big integer and scalar multiplication.
+  // therefore, try to parse 8 digits at a time, and multiply by the largest
+  // scalar value (9 or 19 digits) for each step.
+  size_t counter = 0;
+  digits = 0;
+  limb value = 0;
+#ifdef FASTFLOAT_64BIT_LIMB
+  size_t step = 19;
+#else
+  size_t step = 9;
+#endif
+
+  // process all integer digits.
+  UC const *p = num.integer.ptr;
+  UC const *pend = p + num.integer.len();
+  skip_zeros(p, pend);
+  // process all digits, in increments of step per loop
+  while (p != pend) {
+    while ((tinyobj_ff::distance(p, pend) >= 8) && (step - counter >= 8) &&
+           (max_digits - digits >= 8)) {
+      parse_eight_digits(p, value, counter, digits);
+    }
+    while (counter < step && p != pend && digits < max_digits) {
+      parse_one_digit(p, value, counter, digits);
+    }
+    if (digits == max_digits) {
+      // add the temporary value, then check if we've truncated any digits
+      add_native(result, limb(powers_of_ten_uint64[counter]), value);
+      bool truncated = is_truncated(p, pend);
+      if (num.fraction.ptr != nullptr) {
+        truncated |= is_truncated(num.fraction);
+      }
+      if (truncated) {
+        round_up_bigint(result, digits);
+      }
+      return;
+    } else {
+      add_native(result, limb(powers_of_ten_uint64[counter]), value);
+      counter = 0;
+      value = 0;
+    }
+  }
+
+  // add our fraction digits, if they're available.
+  if (num.fraction.ptr != nullptr) {
+    p = num.fraction.ptr;
+    pend = p + num.fraction.len();
+    if (digits == 0) {
+      skip_zeros(p, pend);
+    }
+    // process all digits, in increments of step per loop
+    while (p != pend) {
+      while ((tinyobj_ff::distance(p, pend) >= 8) && (step - counter >= 8) &&
+             (max_digits - digits >= 8)) {
+        parse_eight_digits(p, value, counter, digits);
+      }
+      while (counter < step && p != pend && digits < max_digits) {
+        parse_one_digit(p, value, counter, digits);
+      }
+      if (digits == max_digits) {
+        // add the temporary value, then check if we've truncated any digits
+        add_native(result, limb(powers_of_ten_uint64[counter]), value);
+        bool truncated = is_truncated(p, pend);
+        if (truncated) {
+          round_up_bigint(result, digits);
+        }
+        return;
+      } else {
+        add_native(result, limb(powers_of_ten_uint64[counter]), value);
+        counter = 0;
+        value = 0;
+      }
+    }
+  }
+
+  if (counter != 0) {
+    add_native(result, limb(powers_of_ten_uint64[counter]), value);
+  }
+}
+
+template <typename T>
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
+  FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
+  adjusted_mantissa answer;
+  bool truncated;
+  answer.mantissa = bigmant.hi64(truncated);
+  int bias = binary_format<T>::mantissa_explicit_bits() -
+             binary_format<T>::minimum_exponent();
+  answer.power2 = bigmant.bit_length() - 64 + bias;
+
+  round<T>(answer, [truncated](adjusted_mantissa &a, int32_t shift) {
+    round_nearest_tie_even(
+        a, shift,
+        [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
+          return is_above || (is_halfway && truncated) ||
+                 (is_odd && is_halfway);
+        });
+  });
+
+  return answer;
+}
+
+// the scaling here is quite simple: we have, for the real digits `m * 10^e`,
+// and for the theoretical digits `n * 2^f`. Since `e` is always negative,
+// to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
+// we then need to scale by `2^(f- e)`, and then the two significant digits
+// are of the same magnitude.
+template <typename T>
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
+    bigint &bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
+  bigint &real_digits = bigmant;
+  int32_t real_exp = exponent;
+
+  // get the value of `b`, rounded down, and get a bigint representation of b+h
+  adjusted_mantissa am_b = am;
+  // gcc7 buf: use a lambda to remove the noexcept qualifier bug with
+  // -Wnoexcept-type.
+  round<T>(am_b,
+           [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
+  T b;
+  to_float(false, am_b, b);
+  adjusted_mantissa theor = to_extended_halfway(b);
+  bigint theor_digits(theor.mantissa);
+  int32_t theor_exp = theor.power2;
+
+  // scale real digits and theor digits to be same power.
+  int32_t pow2_exp = theor_exp - real_exp;
+  uint32_t pow5_exp = uint32_t(-real_exp);
+  if (pow5_exp != 0) {
+    FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
+  }
+  if (pow2_exp > 0) {
+    FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
+  } else if (pow2_exp < 0) {
+    FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
+  }
+
+  // compare digits, and use it to director rounding
+  int ord = real_digits.compare(theor_digits);
+  adjusted_mantissa answer = am;
+  round<T>(answer, [ord](adjusted_mantissa &a, int32_t shift) {
+    round_nearest_tie_even(
+        a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
+          (void)_;  // not needed, since we've done our comparison
+          (void)__; // not needed, since we've done our comparison
+          if (ord > 0) {
+            return true;
+          } else if (ord < 0) {
+            return false;
+          } else {
+            return is_odd;
+          }
+        });
+  });
+
+  return answer;
+}
+
+// parse the significant digits as a big integer to unambiguously round the
+// the significant digits. here, we are trying to determine how to round
+// an extended float representation close to `b+h`, halfway between `b`
+// (the float rounded-down) and `b+u`, the next positive float. this
+// algorithm is always correct, and uses one of two approaches. when
+// the exponent is positive relative to the significant digits (such as
+// 1234), we create a big-integer representation, get the high 64-bits,
+// determine if any lower bits are truncated, and use that to direct
+// rounding. in case of a negative exponent relative to the significant
+// digits (such as 1.2345), we create a theoretical representation of
+// `b` as a big-integer type, scaled to the same binary exponent as
+// the actual digits. we then compare the big integer representations
+// of both, and use that to direct rounding.
+template <typename T, typename UC>
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+digit_comp(parsed_number_string_t<UC> &num, adjusted_mantissa am) noexcept {
+  // remove the invalid exponent bias
+  am.power2 -= invalid_am_bias;
+
+  int32_t sci_exp = scientific_exponent(num);
+  size_t max_digits = binary_format<T>::max_digits();
+  size_t digits = 0;
+  bigint bigmant;
+  parse_mantissa(bigmant, num, max_digits, digits);
+  // can't underflow, since digits is at most max_digits.
+  int32_t exponent = sci_exp + 1 - int32_t(digits);
+  if (exponent >= 0) {
+    return positive_digit_comp<T>(bigmant, exponent);
+  } else {
+    return negative_digit_comp<T>(bigmant, am, exponent);
+  }
+}
+
+} // namespace fast_float
+
+#endif
+
+#ifndef FASTFLOAT_PARSE_NUMBER_H
+#define FASTFLOAT_PARSE_NUMBER_H
+
+
+#include <cmath>
+#include <cstring>
+#include <limits>
+
+namespace fast_float {
+
+namespace detail {
+/**
+ * Special case +inf, -inf, nan, infinity, -infinity.
+ * The case comparisons could be made much faster given that we know that the
+ * strings a null-free and fixed.
+ **/
+template <typename T, typename UC>
+from_chars_result_t<UC>
+    FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first, UC const *last,
+                                       T &value, chars_format fmt) noexcept {
+  from_chars_result_t<UC> answer{};
+  answer.ptr = first;
+  answer.ec = tinyobj_ff::ff_errc(); // be optimistic
+  // assume first < last, so dereference without checks;
+  bool const minusSign = (*first == UC('-'));
+  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+  if ((*first == UC('-')) ||
+      (uint64_t(fmt & chars_format::allow_leading_plus) &&
+       (*first == UC('+')))) {
+    ++first;
+  }
+  if (last - first >= 3) {
+    if (fastfloat_strncasecmp(first, str_const_nan<UC>(), 3)) {
+      answer.ptr = (first += 3);
+      value = minusSign ? -std::numeric_limits<T>::quiet_NaN()
+                        : std::numeric_limits<T>::quiet_NaN();
+      // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7,
+      // C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
+      if (first != last && *first == UC('(')) {
+        for (UC const *ptr = first + 1; ptr != last; ++ptr) {
+          if (*ptr == UC(')')) {
+            answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
+            break;
+          } else if (!((UC('a') <= *ptr && *ptr <= UC('z')) ||
+                       (UC('A') <= *ptr && *ptr <= UC('Z')) ||
+                       (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_')))
+            break; // forbidden char, not nan(n-char-seq-opt)
+        }
+      }
+      return answer;
+    }
+    if (fastfloat_strncasecmp(first, str_const_inf<UC>(), 3)) {
+      if ((last - first >= 8) &&
+          fastfloat_strncasecmp(first + 3, str_const_inf<UC>() + 3, 5)) {
+        answer.ptr = first + 8;
+      } else {
+        answer.ptr = first + 3;
+      }
+      value = minusSign ? -std::numeric_limits<T>::infinity()
+                        : std::numeric_limits<T>::infinity();
+      return answer;
+    }
+  }
+  answer.ec = tinyobj_ff::ff_errc::invalid_argument;
+  return answer;
+}
+
+/**
+ * Returns true if the floating-pointing rounding mode is to 'nearest'.
+ * It is the default on most system. This function is meant to be inexpensive.
+ * Credit : @mwalcott3
+ */
+fastfloat_really_inline bool rounds_to_nearest() noexcept {
+  // https://lemire.me/blog/2020/06/26/gcc-not-nearest/
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return false;
+#endif
+  // See
+  // A fast function to check your floating-point rounding mode
+  // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/
+  //
+  // This function is meant to be equivalent to :
+  // prior: #include <cfenv>
+  //  return fegetround() == FE_TONEAREST;
+  // However, it is expected to be much faster than the fegetround()
+  // function call.
+  //
+  // The volatile keyword prevents the compiler from computing the function
+  // at compile-time.
+  // There might be other ways to prevent compile-time optimizations (e.g.,
+  // asm). The value does not need to be std::numeric_limits<float>::min(), any
+  // small value so that 1 + x should round to 1 would do (after accounting for
+  // excess precision, as in 387 instructions).
+  static float volatile fmin = std::numeric_limits<float>::min();
+  float fmini = fmin; // we copy it so that it gets loaded at most once.
+//
+// Explanation:
+// Only when fegetround() == FE_TONEAREST do we have that
+// fmin + 1.0f == 1.0f - fmin.
+//
+// FE_UPWARD:
+//  fmin + 1.0f > 1
+//  1.0f - fmin == 1
+//
+// FE_DOWNWARD or  FE_TOWARDZERO:
+//  fmin + 1.0f == 1
+//  1.0f - fmin < 1
+//
+// Note: This may fail to be accurate if fast-math has been
+// enabled, as rounding conventions may not apply.
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+//  todo: is there a VS warning?
+//  see
+//  https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013
+#elif defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfloat-equal"
+#endif
+  return (fmini + 1.0f == 1.0f - fmini);
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#elif defined(__clang__)
+#pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+}
+
+} // namespace detail
+
+template <typename T> struct from_chars_caller {
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_advanced(first, last, value, options);
+  }
+};
+
+#ifdef __STDCPP_FLOAT32_T__
+template <> struct from_chars_caller<std::float32_t> {
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, std::float32_t &value,
+       parse_options_t<UC> options) noexcept {
+    // if std::float32_t is defined, and we are in C++23 mode; macro set for
+    // float32; set value to float due to equivalence between float and
+    // float32_t
+    float val;
+    auto ret = from_chars_advanced(first, last, val, options);
+    value = val;
+    return ret;
+  }
+};
+#endif
+
+#ifdef __STDCPP_FLOAT64_T__
+template <> struct from_chars_caller<std::float64_t> {
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, std::float64_t &value,
+       parse_options_t<UC> options) noexcept {
+    // if std::float64_t is defined, and we are in C++23 mode; macro set for
+    // float64; set value as double due to equivalence between double and
+    // float64_t
+    double val;
+    auto ret = from_chars_advanced(first, last, val, options);
+    value = val;
+    return ret;
+  }
+};
+#endif
+
+template <typename T, typename UC, typename>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value,
+           chars_format fmt /*= chars_format::general*/) noexcept {
+  return from_chars_caller<T>::call(first, last, value,
+                                    parse_options_t<UC>(fmt));
+}
+
+/**
+ * This function overload takes parsed_number_string_t structure that is created
+ * and populated either by from_chars_advanced function taking chars range and
+ * parsing options or other parsing custom function implemented by user.
+ */
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
+
+  static_assert(is_supported_float_type<T>::value,
+                "only some floating-point types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  from_chars_result_t<UC> answer;
+
+  answer.ec = tinyobj_ff::ff_errc(); // be optimistic
+  answer.ptr = pns.lastmatch;
+  // The implementation of the Clinger's fast path is convoluted because
+  // we want round-to-nearest in all cases, irrespective of the rounding mode
+  // selected on the thread.
+  // We proceed optimistically, assuming that detail::rounds_to_nearest()
+  // returns true.
+  if (binary_format<T>::min_exponent_fast_path() <= pns.exponent &&
+      pns.exponent <= binary_format<T>::max_exponent_fast_path() &&
+      !pns.too_many_digits) {
+    // Unfortunately, the conventional Clinger's fast path is only possible
+    // when the system rounds to the nearest float.
+    //
+    // We expect the next branch to almost always be selected.
+    // We could check it first (before the previous branch), but
+    // there might be performance advantages at having the check
+    // be last.
+    if (!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) {
+      // We have that fegetround() == FE_TONEAREST.
+      // Next is Clinger's fast path.
+      if (pns.mantissa <= binary_format<T>::max_mantissa_fast_path()) {
+        value = T(pns.mantissa);
+        if (pns.exponent < 0) {
+          value = value / binary_format<T>::exact_power_of_ten(-pns.exponent);
+        } else {
+          value = value * binary_format<T>::exact_power_of_ten(pns.exponent);
+        }
+        if (pns.negative) {
+          value = -value;
+        }
+        return answer;
+      }
+    } else {
+      // We do not have that fegetround() == FE_TONEAREST.
+      // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's
+      // proposal
+      if (pns.exponent >= 0 &&
+          pns.mantissa <=
+              binary_format<T>::max_mantissa_fast_path(pns.exponent)) {
+#if defined(__clang__) || defined(FASTFLOAT_32BIT)
+        // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD
+        if (pns.mantissa == 0) {
+          value = pns.negative ? T(-0.) : T(0.);
+          return answer;
+        }
+#endif
+        value = T(pns.mantissa) *
+                binary_format<T>::exact_power_of_ten(pns.exponent);
+        if (pns.negative) {
+          value = -value;
+        }
+        return answer;
+      }
+    }
+  }
+  adjusted_mantissa am =
+      compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
+  if (pns.too_many_digits && am.power2 >= 0) {
+    if (am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
+      am = compute_error<binary_format<T>>(pns.exponent, pns.mantissa);
+    }
+  }
+  // If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa)
+  // and we have an invalid power (am.power2 < 0), then we need to go the long
+  // way around again. This is very uncommon.
+  if (am.power2 < 0) {
+    am = digit_comp<T>(pns, am);
+  }
+  to_float(pns.negative, am, value);
+  // Test for over/underflow.
+  if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) ||
+      am.power2 == binary_format<T>::infinite_power()) {
+    answer.ec = tinyobj_ff::ff_errc::result_out_of_range;
+  }
+  return answer;
+}
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_float_advanced(UC const *first, UC const *last, T &value,
+                          parse_options_t<UC> options) noexcept {
+
+  static_assert(is_supported_float_type<T>::value,
+                "only some floating-point types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+
+  from_chars_result_t<UC> answer;
+  if (uint64_t(fmt & chars_format::skip_white_space)) {
+    while ((first != last) && fast_float::is_space(*first)) {
+      first++;
+    }
+  }
+  if (first == last) {
+    answer.ec = tinyobj_ff::ff_errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  parsed_number_string_t<UC> pns =
+      uint64_t(fmt & detail::basic_json_fmt)
+          ? parse_number_string<true, UC>(first, last, options)
+          : parse_number_string<false, UC>(first, last, options);
+  if (!pns.valid) {
+    if (uint64_t(fmt & chars_format::no_infnan)) {
+      answer.ec = tinyobj_ff::ff_errc::invalid_argument;
+      answer.ptr = first;
+      return answer;
+    } else {
+      return detail::parse_infnan(first, last, value, fmt);
+    }
+  }
+
+  // call overload that takes parsed_number_string_t directly.
+  return from_chars_advanced(pns, value);
+}
+
+template <typename T, typename UC, typename>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars(UC const *first, UC const *last, T &value, int base) noexcept {
+
+  static_assert(is_supported_integer_type<T>::value,
+                "only integer types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  parse_options_t<UC> options;
+  options.base = base;
+  return from_chars_advanced(first, last, value, options);
+}
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_int_advanced(UC const *first, UC const *last, T &value,
+                        parse_options_t<UC> options) noexcept {
+
+  static_assert(is_supported_integer_type<T>::value,
+                "only integer types are supported");
+  static_assert(is_supported_char_type<UC>::value,
+                "only char, wchar_t, char16_t and char32_t are supported");
+
+  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
+  int const base = options.base;
+
+  from_chars_result_t<UC> answer;
+  if (uint64_t(fmt & chars_format::skip_white_space)) {
+    while ((first != last) && fast_float::is_space(*first)) {
+      first++;
+    }
+  }
+  if (first == last || base < 2 || base > 36) {
+    answer.ec = tinyobj_ff::ff_errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+
+  return parse_int_string(first, last, value, options);
+}
+
+template <size_t TypeIx> struct from_chars_advanced_caller {
+  static_assert(TypeIx > 0, "unsupported type");
+};
+
+template <> struct from_chars_advanced_caller<1> {
+  template <typename T, typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_float_advanced(first, last, value, options);
+  }
+};
+
+template <> struct from_chars_advanced_caller<2> {
+  template <typename T, typename UC>
+  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
+  call(UC const *first, UC const *last, T &value,
+       parse_options_t<UC> options) noexcept {
+    return from_chars_int_advanced(first, last, value, options);
+  }
+};
+
+template <typename T, typename UC>
+FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
+from_chars_advanced(UC const *first, UC const *last, T &value,
+                    parse_options_t<UC> options) noexcept {
+  return from_chars_advanced_caller<
+      size_t(is_supported_float_type<T>::value) +
+      2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,
+                                                             options);
+}
+
+} // namespace fast_float
+
+#endif
+
+
+// --- End embedded fast_float ---
+
+// Clean up fast_float macros to avoid polluting the user's namespace.
+#undef FASTFLOAT_32BIT
+#undef FASTFLOAT_32BIT_LIMB
+#undef FASTFLOAT_64BIT
+#undef FASTFLOAT_64BIT_LIMB
+#undef FASTFLOAT_ASCII_NUMBER_H
+#undef FASTFLOAT_ASSERT
+#undef FASTFLOAT_BIGINT_H
+#undef FASTFLOAT_CONSTEXPR14
+#undef FASTFLOAT_CONSTEXPR20
+#undef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+#undef FASTFLOAT_DEBUG_ASSERT
+#undef FASTFLOAT_DECIMAL_TO_BINARY_H
+#undef FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
+#undef FASTFLOAT_DIGIT_COMPARISON_H
+#undef FASTFLOAT_ENABLE_IF
+#undef FASTFLOAT_FAST_FLOAT_H
+#undef FASTFLOAT_FAST_TABLE_H
+#undef FASTFLOAT_FLOAT_COMMON_H
+#undef FASTFLOAT_HAS_BIT_CAST
+#undef FASTFLOAT_HAS_IS_CONSTANT_EVALUATED
+#undef FASTFLOAT_HAS_SIMD
+#undef FASTFLOAT_IF_CONSTEXPR17
+#undef FASTFLOAT_IS_BIG_ENDIAN
+#undef FASTFLOAT_IS_CONSTEXPR
+#undef FASTFLOAT_NEON
+#undef FASTFLOAT_PARSE_NUMBER_H
+#undef fastfloat_really_inline
+#undef FASTFLOAT_SIMD_DISABLE_WARNINGS
+#undef FASTFLOAT_SIMD_RESTORE_WARNINGS
+#undef FASTFLOAT_SSE2
+#undef FASTFLOAT_STRINGIZE
+#undef FASTFLOAT_STRINGIZE_IMPL
+#undef FASTFLOAT_TRY
+#undef FASTFLOAT_VERSION
+#undef FASTFLOAT_VERSION_MAJOR
+#undef FASTFLOAT_VERSION_MINOR
+#undef FASTFLOAT_VERSION_PATCH
+#undef FASTFLOAT_VERSION_STR
+#undef FASTFLOAT_VISUAL_STUDIO
+
+#endif  // TINYOBJLOADER_DISABLE_FAST_FLOAT
+
+namespace tinyobj {
+
+MaterialReader::~MaterialReader() {}
+
+// Byte-stream reader for bounds-checked text parsing.
+// Replaces raw `const char*` token pointers with `(buf, len, idx)` triple.
+// Every byte access is guarded by an EOF check.
+class StreamReader {
+ public:
+// Maximum number of bytes StreamReader will buffer from std::istream.
+// Define this macro to a larger value if your application needs to parse
+// very large streamed OBJ/MTL content.
+#ifndef TINYOBJLOADER_STREAM_READER_MAX_BYTES
+#define TINYOBJLOADER_STREAM_READER_MAX_BYTES (size_t(256) * size_t(1024) * size_t(1024))
+#endif
+
+  StreamReader(const char *buf, size_t length)
+      : buf_(buf), length_(length), idx_(0), line_num_(1), col_num_(1) {}
+
+  // Non-copyable, non-movable: buf_ may point into owned_buf_.
+  StreamReader(const StreamReader &) /* = delete */;
+  StreamReader &operator=(const StreamReader &) /* = delete */;
+
+  // Build from std::istream by reading all content into an internal buffer.
+  explicit StreamReader(std::istream &is) : buf_(NULL), length_(0), idx_(0), line_num_(1), col_num_(1) {
+    const size_t max_stream_bytes = TINYOBJLOADER_STREAM_READER_MAX_BYTES;
+    std::streampos start_pos = is.tellg();
+    bool can_seek = (start_pos != std::streampos(-1));
+    if (can_seek) {
+      is.seekg(0, std::ios::end);
+      std::streampos end_pos = is.tellg();
+      if (end_pos >= start_pos) {
+        std::streamoff remaining_off = static_cast<std::streamoff>(end_pos - start_pos);
+        is.seekg(start_pos);
+        unsigned long long remaining_ull = static_cast<unsigned long long>(remaining_off);
+        if (remaining_ull > static_cast<unsigned long long>((std::numeric_limits<size_t>::max)())) {
+          std::stringstream ss;
+          ss << "input stream too large for this platform (" << remaining_ull
+             << " bytes exceeds size_t max " << (std::numeric_limits<size_t>::max)() << ")\n";
+          push_error(ss.str());
+          buf_ = "";
+          length_ = 0;
+          return;
+        }
+        size_t remaining_size = static_cast<size_t>(remaining_ull);
+        if (remaining_size > max_stream_bytes) {
+          std::stringstream ss;
+          ss << "input stream too large (" << remaining_size
+             << " bytes exceeds limit " << max_stream_bytes << " bytes)\n";
+          push_error(ss.str());
+          buf_ = "";
+          length_ = 0;
+          return;
+        }
+        owned_buf_.resize(remaining_size);
+        if (remaining_size > 0) {
+          is.read(&owned_buf_[0], static_cast<std::streamsize>(remaining_size));
+        }
+        size_t actually_read = static_cast<size_t>(is.gcount());
+        owned_buf_.resize(actually_read);
+      }
+    }
+    if (!can_seek || owned_buf_.empty()) {
+      // Stream doesn't support seeking, or seek probing failed.
+      if (can_seek) is.seekg(start_pos);
+      is.clear();
+      std::vector<char> content;
+      char chunk[4096];
+      size_t total_read = 0;
+      while (is.good()) {
+        is.read(chunk, static_cast<std::streamsize>(sizeof(chunk)));
+        std::streamsize nread = is.gcount();
+        if (nread <= 0) break;
+        size_t n = static_cast<size_t>(nread);
+        if (n > (max_stream_bytes - total_read)) {
+          std::stringstream ss;
+          ss << "input stream too large (exceeds limit " << max_stream_bytes
+             << " bytes)\n";
+          push_error(ss.str());
+          owned_buf_.clear();
+          buf_ = "";
+          length_ = 0;
+          return;
+        }
+        content.insert(content.end(), chunk, chunk + n);
+        total_read += n;
+      }
+      owned_buf_.swap(content);
+    }
+    buf_ = owned_buf_.empty() ? "" : &owned_buf_[0];
+    length_ = owned_buf_.size();
+  }
+
+  bool eof() const { return idx_ >= length_; }
+  size_t tell() const { return idx_; }
+  size_t size() const { return length_; }
+  size_t line_num() const { return line_num_; }
+  size_t col_num() const { return col_num_; }
+
+  char peek() const {
+    if (idx_ >= length_) return '\0';
+    return buf_[idx_];
+  }
+
+  char get() {
+    if (idx_ >= length_) return '\0';
+    char c = buf_[idx_++];
+    if (c == '\n') { line_num_++; col_num_ = 1; } else { col_num_++; }
+    return c;
+  }
+
+  void advance(size_t n) {
+    for (size_t i = 0; i < n && idx_ < length_; i++) {
+      if (buf_[idx_] == '\n') { line_num_++; col_num_ = 1; } else { col_num_++; }
+      idx_++;
+    }
+  }
+
+  void skip_space() {
+    while (idx_ < length_ && (buf_[idx_] == ' ' || buf_[idx_] == '\t')) {
+      col_num_++;
+      idx_++;
+    }
+  }
+
+  void skip_space_and_cr() {
+    while (idx_ < length_ && (buf_[idx_] == ' ' || buf_[idx_] == '\t' || buf_[idx_] == '\r')) {
+      col_num_++;
+      idx_++;
+    }
+  }
+
+  void skip_line() {
+    while (idx_ < length_) {
+      char c = buf_[idx_];
+      if (c == '\n') {
+        idx_++;
+        line_num_++;
+        col_num_ = 1;
+        return;
+      }
+      if (c == '\r') {
+        idx_++;
+        if (idx_ < length_ && buf_[idx_] == '\n') {
+          idx_++;
+        }
+        line_num_++;
+        col_num_ = 1;
+        return;
+      }
+      col_num_++;
+      idx_++;
+    }
+  }
+
+  bool at_line_end() const {
+    if (idx_ >= length_) return true;
+    char c = buf_[idx_];
+    return (c == '\n' || c == '\r' || c == '\0');
+  }
+
+  std::string read_line() {
+    std::string result;
+    while (idx_ < length_) {
+      char c = buf_[idx_];
+      if (c == '\n' || c == '\r') break;
+      result += c;
+      col_num_++;
+      idx_++;
+    }
+    return result;
+  }
+
+  // Reads a whitespace-delimited token. Used by tests and as a general utility.
+  std::string read_token() {
+    skip_space();
+    std::string result;
+    while (idx_ < length_) {
+      char c = buf_[idx_];
+      if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\0') break;
+      result += c;
+      col_num_++;
+      idx_++;
+    }
+    return result;
+  }
+
+  bool match(const char *prefix, size_t len) const {
+    if (idx_ >= length_ || len > length_ - idx_) return false;
+    return (memcmp(buf_ + idx_, prefix, len) == 0);
+  }
+
+  bool char_at(size_t offset, char c) const {
+    if (idx_ >= length_ || offset >= length_ - idx_) return false;
+    return buf_[idx_ + offset] == c;
+  }
+
+  char peek_at(size_t offset) const {
+    if (idx_ >= length_ || offset >= length_ - idx_) return '\0';
+    return buf_[idx_ + offset];
+  }
+
+  const char *current_ptr() const {
+    if (idx_ >= length_) return "";
+    return buf_ + idx_;
+  }
+
+  size_t remaining() const {
+    return (idx_ < length_) ? (length_ - idx_) : 0;
+  }
+
+  // Returns the full text of the current line (for diagnostic display).
+  std::string current_line_text() const {
+    // Scan backward to find line start
+    size_t line_start = idx_;
+    while (line_start > 0 && buf_[line_start - 1] != '\n' && buf_[line_start - 1] != '\r') {
+      line_start--;
+    }
+    // Scan forward to find line end
+    size_t line_end = idx_;
+    while (line_end < length_ && buf_[line_end] != '\n' && buf_[line_end] != '\r') {
+      line_end++;
+    }
+    return std::string(buf_ + line_start, line_end - line_start);
+  }
+
+  // Clang-style formatted error with file:line:col and caret.
+  std::string format_error(const std::string &filename, const std::string &msg) const {
+    std::stringstream line_ss, col_ss;
+    line_ss << line_num_;
+    col_ss << col_num_;
+    std::string result;
+    result += filename + ":" + line_ss.str() + ":" + col_ss.str() + ": error: " + msg + "\n";
+    std::string line_text = current_line_text();
+    result += line_text + "\n";
+    // Build caret line preserving tab alignment
+    std::string caret;
+    size_t caret_pos = (col_num_ > 0) ? (col_num_ - 1) : 0;
+    for (size_t i = 0; i < caret_pos && i < line_text.size(); i++) {
+      caret += (line_text[i] == '\t') ? '\t' : ' ';
+    }
+    caret += "^";
+    result += caret + "\n";
+    return result;
+  }
+
+  std::string format_error(const std::string &msg) const {
+    return format_error("<input>", msg);
+  }
+
+  // Error stack
+  void push_error(const std::string &msg) {
+    errors_.push_back(msg);
+  }
+
+  void push_formatted_error(const std::string &filename, const std::string &msg) {
+    errors_.push_back(format_error(filename, msg));
+  }
+
+  bool has_errors() const { return !errors_.empty(); }
+
+  std::string get_errors() const {
+    std::string result;
+    for (size_t i = 0; i < errors_.size(); i++) {
+      result += errors_[i];
+    }
+    return result;
+  }
+
+  const std::vector<std::string> &error_stack() const { return errors_; }
+
+  void clear_errors() { errors_.clear(); }
+
+ private:
+  const char *buf_;
+  size_t length_;
+  size_t idx_;
+  size_t line_num_;
+  size_t col_num_;
+  std::vector<char> owned_buf_;
+  std::vector<std::string> errors_;
+};
+
+#ifdef TINYOBJLOADER_USE_MMAP
+// RAII wrapper for memory-mapped file I/O.
+// Opens a file and maps it into memory; the mapping is released on destruction.
+// For empty files, data is set to "" and is_mapped remains false so close()
+// will not attempt to unmap a string literal.
+struct MappedFile {
+  const char *data;
+  size_t size;
+  bool is_mapped;  // true when data points to an actual mapped region
+#if defined(_WIN32)
+  HANDLE hFile;
+  HANDLE hMapping;
+#else
+  void *mapped_ptr;
+#endif
+
+  MappedFile() : data(NULL), size(0), is_mapped(false)
+#if defined(_WIN32)
+    , hFile(INVALID_HANDLE_VALUE), hMapping(NULL)
+#else
+    , mapped_ptr(NULL)
+#endif
+  {}
+
+  // Opens and maps the file. Returns true on success.
+  bool open(const char *filepath) {
+#if defined(_WIN32)
+    std::wstring wfilepath = LongPathW(UTF8ToWchar(std::string(filepath)));
+    hFile = CreateFileW(wfilepath.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL,
+                        OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+    if (hFile == INVALID_HANDLE_VALUE) return false;
+    LARGE_INTEGER fileSize;
+    if (!GetFileSizeEx(hFile, &fileSize)) { close(); return false; }
+    if (fileSize.QuadPart < 0) { close(); return false; }
+    unsigned long long fsize = static_cast<unsigned long long>(fileSize.QuadPart);
+    if (fsize > static_cast<unsigned long long>((std::numeric_limits<size_t>::max)())) {
+      close();
+      return false;
+    }
+    size = static_cast<size_t>(fsize);
+    if (size == 0) { data = ""; return true; }  // valid but empty; is_mapped stays false
+    hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (hMapping == NULL) { close(); return false; }
+    data = static_cast<const char *>(MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0));
+    if (!data) { close(); return false; }
+    is_mapped = true;
+    return true;
+#else
+    int fd = ::open(filepath, O_RDONLY);
+    if (fd == -1) return false;
+    struct stat sb;
+    if (fstat(fd, &sb) != 0) { ::close(fd); return false; }
+    if (sb.st_size < 0) { ::close(fd); return false; }
+    if (static_cast<unsigned long long>(sb.st_size) >
+        static_cast<unsigned long long>((std::numeric_limits<size_t>::max)())) {
+      ::close(fd);
+      return false;
+    }
+    size = static_cast<size_t>(sb.st_size);
+    if (size == 0) { ::close(fd); data = ""; return true; }  // valid but empty
+    mapped_ptr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    ::close(fd);
+    if (mapped_ptr == MAP_FAILED) { mapped_ptr = NULL; size = 0; return false; }
+    data = static_cast<const char *>(mapped_ptr);
+    is_mapped = true;
+    return true;
+#endif
+  }
+
+  void close() {
+#if defined(_WIN32)
+    if (is_mapped && data) { UnmapViewOfFile(data); }
+    data = NULL;
+    is_mapped = false;
+    if (hMapping != NULL) { CloseHandle(hMapping); hMapping = NULL; }
+    if (hFile != INVALID_HANDLE_VALUE) { CloseHandle(hFile); hFile = INVALID_HANDLE_VALUE; }
+#else
+    if (is_mapped && mapped_ptr && mapped_ptr != MAP_FAILED) { munmap(mapped_ptr, size); }
+    mapped_ptr = NULL;
+    data = NULL;
+    is_mapped = false;
+#endif
+    size = 0;
+  }
+
+  ~MappedFile() { close(); }
+
+ private:
+  MappedFile(const MappedFile &);             // non-copyable
+  MappedFile &operator=(const MappedFile &);  // non-copyable
+};
+#endif  // TINYOBJLOADER_USE_MMAP
+
+
+struct vertex_index_t {
+  int v_idx, vt_idx, vn_idx;
+  vertex_index_t() : v_idx(-1), vt_idx(-1), vn_idx(-1) {}
+  explicit vertex_index_t(int idx) : v_idx(idx), vt_idx(idx), vn_idx(idx) {}
+  vertex_index_t(int vidx, int vtidx, int vnidx)
+      : v_idx(vidx), vt_idx(vtidx), vn_idx(vnidx) {}
+};
+
+// Internal data structure for face representation
+// index + smoothing group.
+struct face_t {
+  unsigned int
+      smoothing_group_id;  // smoothing group id. 0 = smoothing groupd is off.
+  int pad_;
+  std::vector<vertex_index_t> vertex_indices;  // face vertex indices.
+
+  face_t() : smoothing_group_id(0), pad_(0) {}
+};
+
+// Internal data structure for line representation
+struct __line_t {
+  // l v1/vt1 v2/vt2 ...
+  // In the specification, line primitrive does not have normal index, but
+  // TinyObjLoader allow it
+  std::vector<vertex_index_t> vertex_indices;
+};
+
+// Internal data structure for points representation
+struct __points_t {
+  // p v1 v2 ...
+  // In the specification, point primitrive does not have normal index and
+  // texture coord index, but TinyObjLoader allow it.
+  std::vector<vertex_index_t> vertex_indices;
+};
+
+struct tag_sizes {
+  tag_sizes() : num_ints(0), num_reals(0), num_strings(0) {}
+  int num_ints;
+  int num_reals;
+  int num_strings;
+};
+
+struct obj_shape {
+  std::vector<real_t> v;
+  std::vector<real_t> vn;
+  std::vector<real_t> vt;
+};
+
+//
+// Manages group of primitives(face, line, points, ...)
+struct PrimGroup {
+  std::vector<face_t> faceGroup;
+  std::vector<__line_t> lineGroup;
+  std::vector<__points_t> pointsGroup;
+
+  void clear() {
+    faceGroup.clear();
+    lineGroup.clear();
+    pointsGroup.clear();
+  }
+
+  bool IsEmpty() const {
+    return faceGroup.empty() && lineGroup.empty() && pointsGroup.empty();
+  }
+
+  // TODO(syoyo): bspline, surface, ...
+};
+
+// See
+// http://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf
+#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t'))
+#define IS_DIGIT(x) \
+  (static_cast<unsigned int>((x) - '0') < static_cast<unsigned int>(10))
+#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0'))
+
+template <typename T>
+static inline std::string toString(const T &t) {
+  std::stringstream ss;
+  ss << t;
+  return ss.str();
+}
+
+static inline std::string removeUtf8Bom(const std::string& input) {
+    // UTF-8 BOM = 0xEF,0xBB,0xBF
+    if (input.size() >= 3 &&
+        static_cast<unsigned char>(input[0]) == 0xEF &&
+        static_cast<unsigned char>(input[1]) == 0xBB &&
+        static_cast<unsigned char>(input[2]) == 0xBF) {
+        return input.substr(3); // Skip BOM
+    }
+    return input;
+}
+
+// Trim trailing spaces and tabs from a string.
+static inline std::string trimTrailingWhitespace(const std::string &s) {
+  size_t end = s.find_last_not_of(" \t");
+  if (end == std::string::npos) return "";
+  return s.substr(0, end + 1);
+}
+
+struct warning_context {
+  std::string *warn;
+  size_t line_number;
+  std::string filename;
+};
+
+// Safely convert size_t to int, clamping at INT_MAX to prevent overflow.
+static inline int size_to_int(size_t sz) {
+  return sz > static_cast<size_t>(INT_MAX) ? INT_MAX : static_cast<int>(sz);
+}
+
+// Make index zero-base, and also support relative index.
+static inline bool fixIndex(int idx, int n, int *ret, bool allow_zero,
+                            const warning_context &context) {
+  if (!ret) {
+    return false;
+  }
+
+  if (idx > 0) {
+    (*ret) = idx - 1;
+    return true;
+  }
+
+  if (idx == 0) {
+    // zero is not allowed according to the spec.
+    if (context.warn) {
+      (*context.warn) +=
+          context.filename + ":" + toString(context.line_number) +
+          ": warning: zero value index found (will have a value of -1 for "
+          "normal and tex indices)\n";
+    }
+
+    (*ret) = idx - 1;
+    return allow_zero;
+  }
+
+  if (idx < 0) {
+    (*ret) = n + idx;  // negative value = relative
+    if ((*ret) < 0) {
+      return false;  // invalid relative index
+    }
+    return true;
+  }
+
+  return false;  // never reach here.
+}
+
+static inline std::string parseString(const char **token) {
+  std::string s;
+  (*token) += strspn((*token), " \t");
+  size_t e = strcspn((*token), " \t\r");
+  s = std::string((*token), &(*token)[e]);
+  (*token) += e;
+  return s;
+}
+
+static inline int parseInt(const char **token) {
+  (*token) += strspn((*token), " \t");
+  int i = atoi((*token));
+  (*token) += strcspn((*token), " \t\r");
+  return i;
+}
+
+#ifndef TINYOBJLOADER_DISABLE_FAST_FLOAT
+
+// ---- fast_float-based float parser (bit-exact with strtod, ~3x faster) ----
+
+namespace detail_fp {
+
+// Case-insensitive prefix match. Returns pointer past matched prefix, or NULL.
+static inline const char *match_iprefix(const char *p, const char *end,
+                                        const char *prefix) {
+  while (*prefix) {
+    if (p == end) return NULL;
+    char c = *p;
+    char e = *prefix;
+    if (c >= 'A' && c <= 'Z') c += 32;
+    if (e >= 'A' && e <= 'Z') e += 32;
+    if (c != e) return NULL;
+    ++p;
+    ++prefix;
+  }
+  return p;
+}
+
+// Try to parse nan/inf. Returns true if matched, sets *result and *end_ptr.
+static inline bool tryParseNanInf(const char *first, const char *last,
+                                  double *result, const char **end_ptr) {
+  if (first >= last) return false;
+
+  const char *p = first;
+  bool negative = false;
+
+  if (*p == '-') {
+    negative = true;
+    ++p;
+  } else if (*p == '+') {
+    ++p;
+  }
+
+  if (p >= last) return false;
+
+  // Try "nan"
+  const char *after = match_iprefix(p, last, "nan");
+  if (after) {
+    *result = 0.0;  // nan -> 0.0 for OBJ
+    *end_ptr = after;
+    return true;
+  }
+
+  // Try "infinity" first (longer match), then "inf"
+  after = match_iprefix(p, last, "infinity");
+  if (after) {
+    *result = negative ? std::numeric_limits<double>::lowest()
+                       : (std::numeric_limits<double>::max)();
+    *end_ptr = after;
+    return true;
+  }
+
+  after = match_iprefix(p, last, "inf");
+  if (after) {
+    *result = negative ? std::numeric_limits<double>::lowest()
+                       : (std::numeric_limits<double>::max)();
+    *end_ptr = after;
+    return true;
+  }
+
+  return false;
+}
+
+}  // namespace detail_fp
+
+// Tries to parse a floating point number located at s.
+// Uses fast_float::from_chars for bit-exact, high-performance parsing.
+// Handles OBJ quirks: leading '+', nan/inf with replacement values.
+//
+// s_end should be a location in the string where reading should absolutely
+// stop. For example at the end of the string, to prevent buffer overflows.
+//
+// If the parsing is a success, result is set to the parsed value and true
+// is returned.
+//
+static bool tryParseDouble(const char *s, const char *s_end, double *result) {
+  if (!s || !s_end || !result || s >= s_end) {
+    return false;
+  }
+
+  // Check for nan/inf (starts with [nNiI] or [+-] followed by [nNiI])
+  const char *p = s;
+  if (p < s_end && (*p == '+' || *p == '-')) ++p;
+  if (p < s_end) {
+    char fc = *p;
+    if (fc >= 'A' && fc <= 'Z') fc += 32;
+    if (fc == 'n' || fc == 'i') {
+      const char *end_ptr;
+      if (detail_fp::tryParseNanInf(s, s_end, result, &end_ptr)) {
+        return true;
+      }
+    }
+  }
+
+  // Use allow_leading_plus so fast_float handles '+' natively.
+  double tmp;
+  auto r = fast_float::from_chars(s, s_end, tmp,
+      fast_float::chars_format::general |
+      fast_float::chars_format::allow_leading_plus);
+  if (r.ec == tinyobj_ff::ff_errc::ok) {
+    *result = tmp;
+    return true;
+  }
+  // On error (invalid_argument, result_out_of_range), *result is unchanged.
+
+  return false;
+}
+
+static inline real_t parseReal(const char **token, double default_value = 0.0) {
+  (*token) += strspn((*token), " \t");
+  const char *end = (*token) + strcspn((*token), " \t\r");
+  double val = default_value;
+  tryParseDouble((*token), end, &val);
+  real_t f = static_cast<real_t>(val);
+  (*token) = end;
+  return f;
+}
+
+static inline bool parseReal(const char **token, real_t *out) {
+  (*token) += strspn((*token), " \t");
+  const char *end = (*token) + strcspn((*token), " \t\r");
+  double val;
+  bool ret = tryParseDouble((*token), end, &val);
+  if (ret) {
+    real_t f = static_cast<real_t>(val);
+    (*out) = f;
+  }
+  (*token) = end;
+  return ret;
+}
+
+#else  // TINYOBJLOADER_DISABLE_FAST_FLOAT
+
+// ---- Legacy hand-written float parser (fallback) ----
+
+// Tries to parse a floating point number located at s.
+//
+// s_end should be a location in the string where reading should absolutely
+// stop. For example at the end of the string, to prevent buffer overflows.
+//
+// Parses the following EBNF grammar:
+//   sign    = "+" | "-" ;
+//   END     = ? anything not in digit ?
+//   digit   = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+//   integer = [sign] , digit , {digit} ;
+//   decimal = integer , ["." , integer] ;
+//   float   = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ;
+//
+//  Valid strings are for example:
+//   -0  +3.1417e+2  -0.0E-3  1.0324  -1.41   11e2
+//
+// If the parsing is a success, result is set to the parsed value and true
+// is returned.
+//
+// The function is greedy and will parse until any of the following happens:
+//  - a non-conforming character is encountered.
+//  - s_end is reached.
+//
+// The following situations triggers a failure:
+//  - s >= s_end.
+//  - parse failure.
+//
+static bool tryParseDouble(const char *s, const char *s_end, double *result) {
+  if (s >= s_end) {
+    return false;
+  }
+
+  double mantissa = 0.0;
+  // This exponent is base 2 rather than 10.
+  // However the exponent we parse is supposed to be one of ten,
+  // thus we must take care to convert the exponent/and or the
+  // mantissa to a * 2^E, where a is the mantissa and E is the
+  // exponent.
+  // To get the final double we will use ldexp, it requires the
+  // exponent to be in base 2.
+  int exponent = 0;
+
+  // NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED
+  // TO JUMP OVER DEFINITIONS.
+  char sign = '+';
+  char exp_sign = '+';
+  char const *curr = s;
+
+  // How many characters were read in a loop.
+  int read = 0;
+  // Tells whether a loop terminated due to reaching s_end.
+  bool end_not_reached = false;
+  bool leading_decimal_dots = false;
+
+  /*
+          BEGIN PARSING.
+  */
+
+  // Find out what sign we've got.
+  if (*curr == '+' || *curr == '-') {
+    sign = *curr;
+    curr++;
+    if ((curr != s_end) && (*curr == '.')) {
+      // accept. Somethig like `.7e+2`, `-.5234`
+      leading_decimal_dots = true;
+    }
+  } else if (IS_DIGIT(*curr)) { /* Pass through. */
+  } else if (*curr == '.') {
+    // accept. Somethig like `.7e+2`, `-.5234`
+    leading_decimal_dots = true;
+  } else {
+    goto fail;
+  }
+
+  // Read the integer part.
+  end_not_reached = (curr != s_end);
+  if (!leading_decimal_dots) {
+    while (end_not_reached && IS_DIGIT(*curr)) {
+      mantissa *= 10;
+      mantissa += static_cast<int>(*curr - 0x30);
+      curr++;
+      read++;
+      end_not_reached = (curr != s_end);
+    }
+
+    // We must make sure we actually got something.
+    if (read == 0) goto fail;
+  }
+
+  // We allow numbers of form "#", "###" etc.
+  if (!end_not_reached) goto assemble;
+
+  // Read the decimal part.
+  if (*curr == '.') {
+    curr++;
+    read = 1;
+    end_not_reached = (curr != s_end);
+    while (end_not_reached && IS_DIGIT(*curr)) {
       static const double pow_lut[] = {
           1.0, 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001,
       };
@@ -911,6 +6155,13 @@ static bool tryParseDouble(const char *s, const char *s_end, double *result) {
     read = 0;
     end_not_reached = (curr != s_end);
     while (end_not_reached && IS_DIGIT(*curr)) {
+      // To avoid annoying MSVC's min/max macro definiton,
+      // Use hardcoded int max value
+      if (exponent >
+          ((2147483647 - 9) / 10)) {  // (INT_MAX - 9) / 10, guards both multiply and add
+        // Integer overflow
+        goto fail;
+      }
       exponent *= 10;
       exponent += static_cast<int>(*curr - 0x30);
       curr++;
@@ -953,6 +6204,8 @@ static inline bool parseReal(const char **token, real_t *out) {
   return ret;
 }
 
+#endif  // TINYOBJLOADER_DISABLE_FAST_FLOAT
+
 static inline void parseReal2(real_t *x, real_t *y, const char **token,
                               const double default_x = 0.0,
                               const double default_y = 0.0) {
@@ -969,188 +6222,646 @@ static inline void parseReal3(real_t *x, real_t *y, real_t *z,
   (*z) = parseReal(token, default_z);
 }
 
-static inline void parseV(real_t *x, real_t *y, real_t *z, real_t *w,
-                          const char **token, const double default_x = 0.0,
-                          const double default_y = 0.0,
-                          const double default_z = 0.0,
-                          const double default_w = 1.0) {
-  (*x) = parseReal(token, default_x);
-  (*y) = parseReal(token, default_y);
-  (*z) = parseReal(token, default_z);
-  (*w) = parseReal(token, default_w);
+#if 0  // not used
+static inline void parseV(real_t *x, real_t *y, real_t *z, real_t *w,
+                          const char **token, const double default_x = 0.0,
+                          const double default_y = 0.0,
+                          const double default_z = 0.0,
+                          const double default_w = 1.0) {
+  (*x) = parseReal(token, default_x);
+  (*y) = parseReal(token, default_y);
+  (*z) = parseReal(token, default_z);
+  (*w) = parseReal(token, default_w);
+}
+#endif
+
+// Extension: parse vertex with colors(6 items)
+// Return 3: xyz, 4: xyzw, 6: xyzrgb
+// `r`: red(case 6) or [w](case 4)
+static inline int parseVertexWithColor(real_t *x, real_t *y, real_t *z,
+                                       real_t *r, real_t *g, real_t *b,
+                                       const char **token,
+                                       const double default_x = 0.0,
+                                       const double default_y = 0.0,
+                                       const double default_z = 0.0) {
+  // TODO: Check error
+  (*x) = parseReal(token, default_x);
+  (*y) = parseReal(token, default_y);
+  (*z) = parseReal(token, default_z);
+
+  // - 4 components(x, y, z, w) ot 6 components
+  bool has_r = parseReal(token, r);
+
+  if (!has_r) {
+    (*r) = (*g) = (*b) = 1.0;
+    return 3;
+  }
+
+  bool has_g = parseReal(token, g);
+
+  if (!has_g) {
+    (*g) = (*b) = 1.0;
+    return 4;
+  }
+
+  bool has_b = parseReal(token, b);
+
+  if (!has_b) {
+    (*r) = (*g) = (*b) = 1.0;
+    return 3;  // treated as xyz
+  }
+
+  return 6;
+}
+
+static inline bool parseOnOff(const char **token, bool default_value = true) {
+  (*token) += strspn((*token), " \t");
+  const char *end = (*token) + strcspn((*token), " \t\r");
+
+  bool ret = default_value;
+  if ((0 == strncmp((*token), "on", 2))) {
+    ret = true;
+  } else if ((0 == strncmp((*token), "off", 3))) {
+    ret = false;
+  }
+
+  (*token) = end;
+  return ret;
+}
+
+static inline texture_type_t parseTextureType(
+    const char **token, texture_type_t default_value = TEXTURE_TYPE_NONE) {
+  (*token) += strspn((*token), " \t");
+  const char *end = (*token) + strcspn((*token), " \t\r");
+  texture_type_t ty = default_value;
+
+  if ((0 == strncmp((*token), "cube_top", strlen("cube_top")))) {
+    ty = TEXTURE_TYPE_CUBE_TOP;
+  } else if ((0 == strncmp((*token), "cube_bottom", strlen("cube_bottom")))) {
+    ty = TEXTURE_TYPE_CUBE_BOTTOM;
+  } else if ((0 == strncmp((*token), "cube_left", strlen("cube_left")))) {
+    ty = TEXTURE_TYPE_CUBE_LEFT;
+  } else if ((0 == strncmp((*token), "cube_right", strlen("cube_right")))) {
+    ty = TEXTURE_TYPE_CUBE_RIGHT;
+  } else if ((0 == strncmp((*token), "cube_front", strlen("cube_front")))) {
+    ty = TEXTURE_TYPE_CUBE_FRONT;
+  } else if ((0 == strncmp((*token), "cube_back", strlen("cube_back")))) {
+    ty = TEXTURE_TYPE_CUBE_BACK;
+  } else if ((0 == strncmp((*token), "sphere", strlen("sphere")))) {
+    ty = TEXTURE_TYPE_SPHERE;
+  }
+
+  (*token) = end;
+  return ty;
+}
+
+static tag_sizes parseTagTriple(const char **token) {
+  tag_sizes ts;
+
+  (*token) += strspn((*token), " \t");
+  ts.num_ints = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return ts;
+  }
+
+  (*token)++;  // Skip '/'
+
+  (*token) += strspn((*token), " \t");
+  ts.num_reals = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return ts;
+  }
+  (*token)++;  // Skip '/'
+
+  ts.num_strings = parseInt(token);
+
+  return ts;
+}
+
+// Parse triples with index offsets: i, i/j/k, i//k, i/j
+static bool parseTriple(const char **token, int vsize, int vnsize, int vtsize,
+                        vertex_index_t *ret, const warning_context &context) {
+  if (!ret) {
+    return false;
+  }
+
+  vertex_index_t vi(-1);
+
+  if (!fixIndex(atoi((*token)), vsize, &vi.v_idx, false, context)) {
+    return false;
+  }
+
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    (*ret) = vi;
+    return true;
+  }
+  (*token)++;
+
+  // i//k
+  if ((*token)[0] == '/') {
+    (*token)++;
+    if (!fixIndex(atoi((*token)), vnsize, &vi.vn_idx, true, context)) {
+      return false;
+    }
+    (*token) += strcspn((*token), "/ \t\r");
+    (*ret) = vi;
+    return true;
+  }
+
+  // i/j/k or i/j
+  if (!fixIndex(atoi((*token)), vtsize, &vi.vt_idx, true, context)) {
+    return false;
+  }
+
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    (*ret) = vi;
+    return true;
+  }
+
+  // i/j/k
+  (*token)++;  // skip '/'
+  if (!fixIndex(atoi((*token)), vnsize, &vi.vn_idx, true, context)) {
+    return false;
+  }
+  (*token) += strcspn((*token), "/ \t\r");
+
+  (*ret) = vi;
+
+  return true;
+}
+
+// Parse raw triples: i, i/j/k, i//k, i/j
+static vertex_index_t parseRawTriple(const char **token) {
+  vertex_index_t vi(static_cast<int>(0));  // 0 is an invalid index in OBJ
+
+  vi.v_idx = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+  (*token)++;
+
+  // i//k
+  if ((*token)[0] == '/') {
+    (*token)++;
+    vi.vn_idx = atoi((*token));
+    (*token) += strcspn((*token), "/ \t\r");
+    return vi;
+  }
+
+  // i/j/k or i/j
+  vi.vt_idx = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  if ((*token)[0] != '/') {
+    return vi;
+  }
+
+  // i/j/k
+  (*token)++;  // skip '/'
+  vi.vn_idx = atoi((*token));
+  (*token) += strcspn((*token), "/ \t\r");
+  return vi;
+}
+
+// --- Stream-based parse functions ---
+
+static inline std::string sr_parseString(StreamReader &sr) {
+  sr.skip_space();
+  std::string s;
+  while (!sr.eof()) {
+    char c = sr.peek();
+    if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\0') break;
+    s += c;
+    sr.advance(1);
+  }
+  return s;
+}
+
+static inline int sr_parseInt(StreamReader &sr) {
+  sr.skip_space();
+  const char *start = sr.current_ptr();
+  size_t rem = sr.remaining();
+  size_t len = 0;
+  while (len < rem) {
+    char c = start[len];
+    if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\0') break;
+    len++;
+  }
+  int i = 0;
+  if (len > 0) {
+    char tmp[64];
+    size_t copy_len = len < 63 ? len : 63;
+    if (copy_len != len) {
+      sr.advance(len);
+      return 0;
+    }
+    memcpy(tmp, start, copy_len);
+    tmp[copy_len] = '\0';
+    errno = 0;
+    char *endptr = NULL;
+    long val = strtol(tmp, &endptr, 10);
+    const bool has_error =
+        (errno == ERANGE || endptr == tmp ||
+         val > (std::numeric_limits<int>::max)() ||
+         val < (std::numeric_limits<int>::min)());
+    if (!has_error) {
+      i = static_cast<int>(val);
+    }
+  }
+  sr.advance(len);
+  return i;
+}
+
+static inline real_t sr_parseReal(StreamReader &sr, double default_value = 0.0) {
+  sr.skip_space();
+  const char *start = sr.current_ptr();
+  size_t rem = sr.remaining();
+  size_t len = 0;
+  while (len < rem) {
+    char c = start[len];
+    if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\0') break;
+    len++;
+  }
+  double val = default_value;
+  if (len > 0) {
+    tryParseDouble(start, start + len, &val);
+  }
+  sr.advance(len);
+  return static_cast<real_t>(val);
+}
+
+static inline bool sr_parseReal(StreamReader &sr, real_t *out) {
+  sr.skip_space();
+  const char *start = sr.current_ptr();
+  size_t rem = sr.remaining();
+  size_t len = 0;
+  while (len < rem) {
+    char c = start[len];
+    if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\0') break;
+    len++;
+  }
+  if (len == 0) return false;
+  double val;
+  bool ret = tryParseDouble(start, start + len, &val);
+  if (ret) {
+    (*out) = static_cast<real_t>(val);
+  }
+  sr.advance(len);
+  return ret;
+}
+
+static inline void sr_parseReal2(real_t *x, real_t *y, StreamReader &sr,
+                                 const double default_x = 0.0,
+                                 const double default_y = 0.0) {
+  (*x) = sr_parseReal(sr, default_x);
+  (*y) = sr_parseReal(sr, default_y);
+}
+
+static inline void sr_parseReal3(real_t *x, real_t *y, real_t *z,
+                                 StreamReader &sr,
+                                 const double default_x = 0.0,
+                                 const double default_y = 0.0,
+                                 const double default_z = 0.0) {
+  (*x) = sr_parseReal(sr, default_x);
+  (*y) = sr_parseReal(sr, default_y);
+  (*z) = sr_parseReal(sr, default_z);
+}
+
+static inline int sr_parseVertexWithColor(real_t *x, real_t *y, real_t *z,
+                                          real_t *r, real_t *g, real_t *b,
+                                          StreamReader &sr,
+                                          const double default_x = 0.0,
+                                          const double default_y = 0.0,
+                                          const double default_z = 0.0) {
+  (*x) = sr_parseReal(sr, default_x);
+  (*y) = sr_parseReal(sr, default_y);
+  (*z) = sr_parseReal(sr, default_z);
+
+  bool has_r = sr_parseReal(sr, r);
+  if (!has_r) {
+    (*r) = (*g) = (*b) = 1.0;
+    return 3;
+  }
+
+  bool has_g = sr_parseReal(sr, g);
+  if (!has_g) {
+    (*g) = (*b) = 1.0;
+    return 4;
+  }
+
+  bool has_b = sr_parseReal(sr, b);
+  if (!has_b) {
+    (*r) = (*g) = (*b) = 1.0;
+    return 3;
+  }
+
+  return 6;
+}
+
+// --- Error-reporting overloads ---
+// These overloads push clang-style diagnostics into `err` when parsing fails
+// and return false so callers can early-return on unrecoverable parse errors.
+// The original signatures are preserved above for backward compatibility.
+
+static inline bool sr_parseInt(StreamReader &sr, int *out, std::string *err,
+                               const std::string &filename) {
+  sr.skip_space();
+  const char *start = sr.current_ptr();
+  size_t rem = sr.remaining();
+  size_t len = 0;
+  while (len < rem) {
+    char c = start[len];
+    if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\0') break;
+    len++;
+  }
+  if (len == 0) {
+    if (err) {
+      (*err) += sr.format_error(filename, "expected integer value");
+    }
+    *out = 0;
+    return false;
+  }
+  char tmp[64];
+  size_t copy_len = len < 63 ? len : 63;
+  memcpy(tmp, start, copy_len);
+  tmp[copy_len] = '\0';
+  if (copy_len != len) {
+    if (err) {
+      (*err) += sr.format_error(filename, "integer value too long");
+    }
+    *out = 0;
+    sr.advance(len);
+    return false;
+  }
+  errno = 0;
+  char *endptr = NULL;
+  long val = strtol(tmp, &endptr, 10);
+  if (errno == ERANGE || val > (std::numeric_limits<int>::max)() ||
+      val < (std::numeric_limits<int>::min)()) {
+    if (err) {
+      (*err) += sr.format_error(filename,
+          "integer value out of range, got '" + std::string(tmp) + "'");
+    }
+    *out = 0;
+    sr.advance(len);
+    return false;
+  }
+  if (endptr == tmp || (*endptr != '\0' && *endptr != ' ' && *endptr != '\t')) {
+    if (err) {
+      (*err) += sr.format_error(filename,
+          "expected integer, got '" + std::string(tmp) + "'");
+    }
+    *out = 0;
+    sr.advance(len);
+    return false;
+  }
+  *out = static_cast<int>(val);
+  sr.advance(len);
+  return true;
+}
+
+static inline bool sr_parseReal(StreamReader &sr, real_t *out,
+                                 double default_value,
+                                 std::string *err,
+                                 const std::string &filename) {
+  sr.skip_space();
+  const char *start = sr.current_ptr();
+  size_t rem = sr.remaining();
+  size_t len = 0;
+  while (len < rem) {
+    char c = start[len];
+    if (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\0') break;
+    len++;
+  }
+  if (len == 0) {
+    // No token to parse — not necessarily an error (e.g. optional component).
+    *out = static_cast<real_t>(default_value);
+    return true;
+  }
+  double val;
+  if (!tryParseDouble(start, start + len, &val)) {
+    if (err) {
+      char tmp[64];
+      size_t copy_len = len < 63 ? len : 63;
+      memcpy(tmp, start, copy_len);
+      tmp[copy_len] = '\0';
+      (*err) += sr.format_error(filename,
+          "expected number, got '" + std::string(tmp) + "'");
+    }
+    *out = static_cast<real_t>(default_value);
+    sr.advance(len);
+    return false;
+  }
+  *out = static_cast<real_t>(val);
+  sr.advance(len);
+  return true;
+}
+
+static inline bool sr_parseReal2(real_t *x, real_t *y, StreamReader &sr,
+                                  std::string *err,
+                                  const std::string &filename,
+                                  const double default_x = 0.0,
+                                  const double default_y = 0.0) {
+  if (!sr_parseReal(sr, x, default_x, err, filename)) return false;
+  if (!sr_parseReal(sr, y, default_y, err, filename)) return false;
+  return true;
 }
 
-// Extension: parse vertex with colors(6 items)
-static inline bool parseVertexWithColor(real_t *x, real_t *y, real_t *z,
-                                        real_t *r, real_t *g, real_t *b,
-                                        const char **token,
-                                        const double default_x = 0.0,
-                                        const double default_y = 0.0,
-                                        const double default_z = 0.0) {
-  (*x) = parseReal(token, default_x);
-  (*y) = parseReal(token, default_y);
-  (*z) = parseReal(token, default_z);
-
-  const bool found_color =
-      parseReal(token, r) && parseReal(token, g) && parseReal(token, b);
+static inline bool sr_parseReal3(real_t *x, real_t *y, real_t *z,
+                                  StreamReader &sr,
+                                  std::string *err,
+                                  const std::string &filename,
+                                  const double default_x = 0.0,
+                                  const double default_y = 0.0,
+                                  const double default_z = 0.0) {
+  if (!sr_parseReal(sr, x, default_x, err, filename)) return false;
+  if (!sr_parseReal(sr, y, default_y, err, filename)) return false;
+  if (!sr_parseReal(sr, z, default_z, err, filename)) return false;
+  return true;
+}
 
-  if (!found_color) {
+// Returns number of components parsed (3, 4, or 6) on success, -1 on error.
+static inline int sr_parseVertexWithColor(real_t *x, real_t *y, real_t *z,
+                                          real_t *r, real_t *g, real_t *b,
+                                          StreamReader &sr,
+                                          std::string *err,
+                                          const std::string &filename,
+                                          const double default_x = 0.0,
+                                          const double default_y = 0.0,
+                                          const double default_z = 0.0) {
+  if (!sr_parseReal(sr, x, default_x, err, filename)) return -1;
+  if (!sr_parseReal(sr, y, default_y, err, filename)) return -1;
+  if (!sr_parseReal(sr, z, default_z, err, filename)) return -1;
+
+  bool has_r = sr_parseReal(sr, r);
+  if (!has_r) {
     (*r) = (*g) = (*b) = 1.0;
+    return 3;
   }
 
-  return found_color;
-}
-
-static inline bool parseOnOff(const char **token, bool default_value = true) {
-  (*token) += strspn((*token), " \t");
-  const char *end = (*token) + strcspn((*token), " \t\r");
+  bool has_g = sr_parseReal(sr, g);
+  if (!has_g) {
+    (*g) = (*b) = 1.0;
+    return 4;
+  }
 
-  bool ret = default_value;
-  if ((0 == strncmp((*token), "on", 2))) {
-    ret = true;
-  } else if ((0 == strncmp((*token), "off", 3))) {
-    ret = false;
+  bool has_b = sr_parseReal(sr, b);
+  if (!has_b) {
+    (*r) = (*g) = (*b) = 1.0;
+    return 3;
   }
 
-  (*token) = end;
-  return ret;
+  return 6;
 }
 
-static inline texture_type_t parseTextureType(
-    const char **token, texture_type_t default_value = TEXTURE_TYPE_NONE) {
-  (*token) += strspn((*token), " \t");
-  const char *end = (*token) + strcspn((*token), " \t\r");
-  texture_type_t ty = default_value;
+static inline int sr_parseIntNoSkip(StreamReader &sr);
 
-  if ((0 == strncmp((*token), "cube_top", strlen("cube_top")))) {
-    ty = TEXTURE_TYPE_CUBE_TOP;
-  } else if ((0 == strncmp((*token), "cube_bottom", strlen("cube_bottom")))) {
-    ty = TEXTURE_TYPE_CUBE_BOTTOM;
-  } else if ((0 == strncmp((*token), "cube_left", strlen("cube_left")))) {
-    ty = TEXTURE_TYPE_CUBE_LEFT;
-  } else if ((0 == strncmp((*token), "cube_right", strlen("cube_right")))) {
-    ty = TEXTURE_TYPE_CUBE_RIGHT;
-  } else if ((0 == strncmp((*token), "cube_front", strlen("cube_front")))) {
-    ty = TEXTURE_TYPE_CUBE_FRONT;
-  } else if ((0 == strncmp((*token), "cube_back", strlen("cube_back")))) {
-    ty = TEXTURE_TYPE_CUBE_BACK;
-  } else if ((0 == strncmp((*token), "sphere", strlen("sphere")))) {
-    ty = TEXTURE_TYPE_SPHERE;
+// Advance past remaining characters in a tag triple field (stops at '/', whitespace, or line end).
+static inline void sr_skipTagField(StreamReader &sr) {
+  while (!sr.eof() && !sr.at_line_end() && !IS_SPACE(sr.peek()) &&
+         sr.peek() != '/') {
+    sr.advance(1);
   }
-
-  (*token) = end;
-  return ty;
 }
 
-static tag_sizes parseTagTriple(const char **token) {
+static tag_sizes sr_parseTagTriple(StreamReader &sr) {
   tag_sizes ts;
 
-  (*token) += strspn((*token), " \t");
-  ts.num_ints = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    return ts;
+  sr.skip_space();
+  ts.num_ints = sr_parseIntNoSkip(sr);
+  sr_skipTagField(sr);
+  if (!sr.eof() && sr.peek() == '/') {
+    sr.advance(1);
+    sr.skip_space();
+    ts.num_reals = sr_parseIntNoSkip(sr);
+    sr_skipTagField(sr);
+    if (!sr.eof() && sr.peek() == '/') {
+      sr.advance(1);
+      ts.num_strings = sr_parseInt(sr);
+    }
   }
+  return ts;
+}
 
-  (*token)++;  // Skip '/'
-
-  (*token) += strspn((*token), " \t");
-  ts.num_reals = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    return ts;
+static inline int sr_parseIntNoSkip(StreamReader &sr) {
+  const char *start = sr.current_ptr();
+  size_t rem = sr.remaining();
+  size_t len = 0;
+  if (len < rem && (start[len] == '+' || start[len] == '-')) len++;
+  while (len < rem && start[len] >= '0' && start[len] <= '9') len++;
+  int i = 0;
+  if (len > 0) {
+    char tmp[64];
+    size_t copy_len = len < 63 ? len : 63;
+    if (copy_len != len) {
+      sr.advance(len);
+      return 0;
+    }
+    memcpy(tmp, start, copy_len);
+    tmp[copy_len] = '\0';
+    errno = 0;
+    char *endptr = NULL;
+    long val = strtol(tmp, &endptr, 10);
+    if (errno == 0 && endptr != tmp && *endptr == '\0' &&
+        val <= (std::numeric_limits<int>::max)() &&
+        val >= (std::numeric_limits<int>::min)()) {
+      i = static_cast<int>(val);
+    }
   }
-  (*token)++;  // Skip '/'
-
-  ts.num_strings = parseInt(token);
-
-  return ts;
+  sr.advance(len);
+  return i;
 }
 
-// Parse triples with index offsets: i, i/j/k, i//k, i/j
-static bool parseTriple(const char **token, int vsize, int vnsize, int vtsize,
-                        vertex_index_t *ret) {
-  if (!ret) {
-    return false;
+static inline void sr_skipUntil(StreamReader &sr, const char *delims) {
+  while (!sr.eof()) {
+    char c = sr.peek();
+    for (const char *d = delims; *d; d++) {
+      if (c == *d) return;
+    }
+    sr.advance(1);
   }
+}
+
+static bool sr_parseTriple(StreamReader &sr, int vsize, int vnsize, int vtsize,
+                           vertex_index_t *ret, const warning_context &context) {
+  if (!ret) return false;
 
   vertex_index_t vi(-1);
 
-  if (!fixIndex(atoi((*token)), vsize, &(vi.v_idx))) {
+  sr.skip_space();
+  if (!fixIndex(sr_parseIntNoSkip(sr), vsize, &vi.v_idx, false, context)) {
     return false;
   }
 
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
+  sr_skipUntil(sr, "/ \t\r\n");
+  if (sr.eof() || sr.peek() != '/') {
     (*ret) = vi;
     return true;
   }
-  (*token)++;
+  sr.advance(1);
 
   // i//k
-  if ((*token)[0] == '/') {
-    (*token)++;
-    if (!fixIndex(atoi((*token)), vnsize, &(vi.vn_idx))) {
+  if (!sr.eof() && sr.peek() == '/') {
+    sr.advance(1);
+    if (!fixIndex(sr_parseIntNoSkip(sr), vnsize, &vi.vn_idx, true, context)) {
       return false;
     }
-    (*token) += strcspn((*token), "/ \t\r");
+    sr_skipUntil(sr, "/ \t\r\n");
     (*ret) = vi;
     return true;
   }
 
   // i/j/k or i/j
-  if (!fixIndex(atoi((*token)), vtsize, &(vi.vt_idx))) {
+  if (!fixIndex(sr_parseIntNoSkip(sr), vtsize, &vi.vt_idx, true, context)) {
     return false;
   }
 
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
+  sr_skipUntil(sr, "/ \t\r\n");
+  if (sr.eof() || sr.peek() != '/') {
     (*ret) = vi;
     return true;
   }
 
   // i/j/k
-  (*token)++;  // skip '/'
-  if (!fixIndex(atoi((*token)), vnsize, &(vi.vn_idx))) {
+  sr.advance(1);
+  if (!fixIndex(sr_parseIntNoSkip(sr), vnsize, &vi.vn_idx, true, context)) {
     return false;
   }
-  (*token) += strcspn((*token), "/ \t\r");
+  sr_skipUntil(sr, "/ \t\r\n");
 
   (*ret) = vi;
-
   return true;
 }
 
-// Parse raw triples: i, i/j/k, i//k, i/j
-static vertex_index_t parseRawTriple(const char **token) {
-  vertex_index_t vi(static_cast<int>(0));  // 0 is an invalid index in OBJ
+static vertex_index_t sr_parseRawTriple(StreamReader &sr) {
+  vertex_index_t vi(static_cast<int>(0));
 
-  vi.v_idx = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    return vi;
-  }
-  (*token)++;
+  sr.skip_space();
+  vi.v_idx = sr_parseIntNoSkip(sr);
+  sr_skipUntil(sr, "/ \t\r\n");
+  if (sr.eof() || sr.peek() != '/') return vi;
+  sr.advance(1);
 
   // i//k
-  if ((*token)[0] == '/') {
-    (*token)++;
-    vi.vn_idx = atoi((*token));
-    (*token) += strcspn((*token), "/ \t\r");
+  if (!sr.eof() && sr.peek() == '/') {
+    sr.advance(1);
+    vi.vn_idx = sr_parseIntNoSkip(sr);
+    sr_skipUntil(sr, "/ \t\r\n");
     return vi;
   }
 
   // i/j/k or i/j
-  vi.vt_idx = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
-  if ((*token)[0] != '/') {
-    return vi;
-  }
+  vi.vt_idx = sr_parseIntNoSkip(sr);
+  sr_skipUntil(sr, "/ \t\r\n");
+  if (sr.eof() || sr.peek() != '/') return vi;
 
-  // i/j/k
-  (*token)++;  // skip '/'
-  vi.vn_idx = atoi((*token));
-  (*token) += strcspn((*token), "/ \t\r");
+  sr.advance(1);
+  vi.vn_idx = sr_parseIntNoSkip(sr);
+  sr_skipUntil(sr, "/ \t\r\n");
   return vi;
 }
 
@@ -1332,12 +7043,43 @@ static int pnpoly(int nvert, T *vertx, T *verty, T testx, T testy) {
   return c;
 }
 
+struct TinyObjPoint {
+  real_t x, y, z;
+  TinyObjPoint() : x(0), y(0), z(0) {}
+  TinyObjPoint(real_t x_, real_t y_, real_t z_) : x(x_), y(y_), z(z_) {}
+};
+
+inline TinyObjPoint cross(const TinyObjPoint &v1, const TinyObjPoint &v2) {
+  return TinyObjPoint(v1.y * v2.z - v1.z * v2.y, v1.z * v2.x - v1.x * v2.z,
+                      v1.x * v2.y - v1.y * v2.x);
+}
+
+inline real_t dot(const TinyObjPoint &v1, const TinyObjPoint &v2) {
+  return (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z);
+}
+
+inline real_t GetLength(TinyObjPoint &e) {
+  return std::sqrt(e.x * e.x + e.y * e.y + e.z * e.z);
+}
+
+inline TinyObjPoint Normalize(TinyObjPoint e) {
+  real_t len = GetLength(e);
+  if (len <= real_t(0)) return TinyObjPoint(real_t(0), real_t(0), real_t(0));
+  real_t inv_length = real_t(1) / len;
+  return TinyObjPoint(e.x * inv_length, e.y * inv_length, e.z * inv_length);
+}
+
+inline TinyObjPoint WorldToLocal(const TinyObjPoint &a, const TinyObjPoint &u,
+                                 const TinyObjPoint &v, const TinyObjPoint &w) {
+  return TinyObjPoint(dot(a, u), dot(a, v), dot(a, w));
+}
+
 // TODO(syoyo): refactor function.
 static bool exportGroupsToShape(shape_t *shape, const PrimGroup &prim_group,
                                 const std::vector<tag_t> &tags,
                                 const int material_id, const std::string &name,
-                                bool triangulate,
-                                const std::vector<real_t> &v) {
+                                bool triangulate, const std::vector<real_t> &v,
+                                std::string *warn) {
   if (prim_group.IsEmpty()) {
     return false;
   }
@@ -1354,30 +7096,41 @@ static bool exportGroupsToShape(shape_t *shape, const PrimGroup &prim_group,
 
       if (npolys < 3) {
         // Face must have 3+ vertices.
+        if (warn) {
+          (*warn) += "Degenerated face found\n.";
+        }
         continue;
       }
 
-      vertex_index_t i0 = face.vertex_indices[0];
-      vertex_index_t i1(-1);
-      vertex_index_t i2 = face.vertex_indices[1];
+      if (triangulate && npolys != 3) {
+        if (npolys == 4) {
+          vertex_index_t i0 = face.vertex_indices[0];
+          vertex_index_t i1 = face.vertex_indices[1];
+          vertex_index_t i2 = face.vertex_indices[2];
+          vertex_index_t i3 = face.vertex_indices[3];
+
+          if (i0.v_idx < 0 || i1.v_idx < 0 || i2.v_idx < 0 || i3.v_idx < 0) {
+            if (warn) {
+              (*warn) += "Face with invalid vertex index found.\n";
+            }
+            continue;
+          }
 
-      if (triangulate) {
-        // find the two axes to work in
-        size_t axes[2] = {1, 2};
-        for (size_t k = 0; k < npolys; ++k) {
-          i0 = face.vertex_indices[(k + 0) % npolys];
-          i1 = face.vertex_indices[(k + 1) % npolys];
-          i2 = face.vertex_indices[(k + 2) % npolys];
           size_t vi0 = size_t(i0.v_idx);
           size_t vi1 = size_t(i1.v_idx);
           size_t vi2 = size_t(i2.v_idx);
+          size_t vi3 = size_t(i3.v_idx);
 
           if (((3 * vi0 + 2) >= v.size()) || ((3 * vi1 + 2) >= v.size()) ||
-              ((3 * vi2 + 2) >= v.size())) {
+              ((3 * vi2 + 2) >= v.size()) || ((3 * vi3 + 2) >= v.size())) {
             // Invalid triangle.
             // FIXME(syoyo): Is it ok to simply skip this invalid triangle?
+            if (warn) {
+              (*warn) += "Face with invalid vertex index found.\n";
+            }
             continue;
           }
+
           real_t v0x = v[vi0 * 3 + 0];
           real_t v0y = v[vi0 * 3 + 1];
           real_t v0z = v[vi0 * 3 + 2];
@@ -1387,186 +7140,449 @@ static bool exportGroupsToShape(shape_t *shape, const PrimGroup &prim_group,
           real_t v2x = v[vi2 * 3 + 0];
           real_t v2y = v[vi2 * 3 + 1];
           real_t v2z = v[vi2 * 3 + 2];
-          real_t e0x = v1x - v0x;
-          real_t e0y = v1y - v0y;
-          real_t e0z = v1z - v0z;
-          real_t e1x = v2x - v1x;
-          real_t e1y = v2y - v1y;
-          real_t e1z = v2z - v1z;
-          real_t cx = std::fabs(e0y * e1z - e0z * e1y);
-          real_t cy = std::fabs(e0z * e1x - e0x * e1z);
-          real_t cz = std::fabs(e0x * e1y - e0y * e1x);
-          const real_t epsilon = std::numeric_limits<real_t>::epsilon();
-          if (cx > epsilon || cy > epsilon || cz > epsilon) {
-            // found a corner
-            if (cx > cy && cx > cz) {
-            } else {
-              axes[0] = 0;
-              if (cz > cx && cz > cy) axes[1] = 1;
-            }
-            break;
-          }
-        }
+          real_t v3x = v[vi3 * 3 + 0];
+          real_t v3y = v[vi3 * 3 + 1];
+          real_t v3z = v[vi3 * 3 + 2];
+
+          // There are two candidates to split the quad into two triangles.
+          //
+          // Choose the shortest edge.
+          // TODO: Is it better to determine the edge to split by calculating
+          // the area of each triangle?
+          //
+          // +---+
+          // |\  |
+          // | \ |
+          // |  \|
+          // +---+
+          //
+          // +---+
+          // |  /|
+          // | / |
+          // |/  |
+          // +---+
+
+          real_t e02x = v2x - v0x;
+          real_t e02y = v2y - v0y;
+          real_t e02z = v2z - v0z;
+          real_t e13x = v3x - v1x;
+          real_t e13y = v3y - v1y;
+          real_t e13z = v3z - v1z;
+
+          real_t sqr02 = e02x * e02x + e02y * e02y + e02z * e02z;
+          real_t sqr13 = e13x * e13x + e13y * e13y + e13z * e13z;
+
+          index_t idx0, idx1, idx2, idx3;
+
+          idx0.vertex_index = i0.v_idx;
+          idx0.normal_index = i0.vn_idx;
+          idx0.texcoord_index = i0.vt_idx;
+          idx1.vertex_index = i1.v_idx;
+          idx1.normal_index = i1.vn_idx;
+          idx1.texcoord_index = i1.vt_idx;
+          idx2.vertex_index = i2.v_idx;
+          idx2.normal_index = i2.vn_idx;
+          idx2.texcoord_index = i2.vt_idx;
+          idx3.vertex_index = i3.v_idx;
+          idx3.normal_index = i3.vn_idx;
+          idx3.texcoord_index = i3.vt_idx;
+
+          if (sqr02 < sqr13) {
+            // [0, 1, 2], [0, 2, 3]
+            shape->mesh.indices.push_back(idx0);
+            shape->mesh.indices.push_back(idx1);
+            shape->mesh.indices.push_back(idx2);
 
-        real_t area = 0;
-        for (size_t k = 0; k < npolys; ++k) {
-          i0 = face.vertex_indices[(k + 0) % npolys];
-          i1 = face.vertex_indices[(k + 1) % npolys];
-          size_t vi0 = size_t(i0.v_idx);
-          size_t vi1 = size_t(i1.v_idx);
-          if (((vi0 * 3 + axes[0]) >= v.size()) ||
-              ((vi0 * 3 + axes[1]) >= v.size()) ||
-              ((vi1 * 3 + axes[0]) >= v.size()) ||
-              ((vi1 * 3 + axes[1]) >= v.size())) {
-            // Invalid index.
-            continue;
-          }
-          real_t v0x = v[vi0 * 3 + axes[0]];
-          real_t v0y = v[vi0 * 3 + axes[1]];
-          real_t v1x = v[vi1 * 3 + axes[0]];
-          real_t v1y = v[vi1 * 3 + axes[1]];
-          area += (v0x * v1y - v0y * v1x) * static_cast<real_t>(0.5);
-        }
+            shape->mesh.indices.push_back(idx0);
+            shape->mesh.indices.push_back(idx2);
+            shape->mesh.indices.push_back(idx3);
+          } else {
+            // [0, 1, 3], [1, 2, 3]
+            shape->mesh.indices.push_back(idx0);
+            shape->mesh.indices.push_back(idx1);
+            shape->mesh.indices.push_back(idx3);
 
-        face_t remainingFace = face;  // copy
-        size_t guess_vert = 0;
-        vertex_index_t ind[3];
-        real_t vx[3];
-        real_t vy[3];
-
-        // How many iterations can we do without decreasing the remaining
-        // vertices.
-        size_t remainingIterations = face.vertex_indices.size();
-        size_t previousRemainingVertices = remainingFace.vertex_indices.size();
-
-        while (remainingFace.vertex_indices.size() > 3 &&
-               remainingIterations > 0) {
-          npolys = remainingFace.vertex_indices.size();
-          if (guess_vert >= npolys) {
-            guess_vert -= npolys;
+            shape->mesh.indices.push_back(idx1);
+            shape->mesh.indices.push_back(idx2);
+            shape->mesh.indices.push_back(idx3);
           }
 
-          if (previousRemainingVertices != npolys) {
-            // The number of remaining vertices decreased. Reset counters.
-            previousRemainingVertices = npolys;
-            remainingIterations = npolys;
-          } else {
-            // We didn't consume a vertex on previous iteration, reduce the
-            // available iterations.
-            remainingIterations--;
-          }
+          // Two triangle faces
+          shape->mesh.num_face_vertices.push_back(3);
+          shape->mesh.num_face_vertices.push_back(3);
 
-          for (size_t k = 0; k < 3; k++) {
-            ind[k] = remainingFace.vertex_indices[(guess_vert + k) % npolys];
-            size_t vi = size_t(ind[k].v_idx);
-            if (((vi * 3 + axes[0]) >= v.size()) ||
-                ((vi * 3 + axes[1]) >= v.size())) {
-              // ???
-              vx[k] = static_cast<real_t>(0.0);
-              vy[k] = static_cast<real_t>(0.0);
-            } else {
-              vx[k] = v[vi * 3 + axes[0]];
-              vy[k] = v[vi * 3 + axes[1]];
+          shape->mesh.material_ids.push_back(material_id);
+          shape->mesh.material_ids.push_back(material_id);
+
+          shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id);
+          shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id);
+
+        } else {
+#ifdef TINYOBJLOADER_USE_MAPBOX_EARCUT
+          // Validate all vertex indices before accessing the vertex array.
+          {
+            bool valid_poly = true;
+            for (size_t k = 0; k < npolys; ++k) {
+              size_t vi = size_t(face.vertex_indices[k].v_idx);
+              if ((3 * vi + 2) >= v.size()) {
+                valid_poly = false;
+                break;
+              }
+            }
+            if (!valid_poly) {
+              if (warn) {
+                (*warn) += "Face with invalid vertex index found.\n";
+              }
+              continue;
             }
           }
-          real_t e0x = vx[1] - vx[0];
-          real_t e0y = vy[1] - vy[0];
-          real_t e1x = vx[2] - vx[1];
-          real_t e1y = vy[2] - vy[1];
-          real_t cross = e0x * e1y - e0y * e1x;
-          // if an internal angle
-          if (cross * area < static_cast<real_t>(0.0)) {
-            guess_vert += 1;
+
+          vertex_index_t i0 = face.vertex_indices[0];
+          vertex_index_t i0_2 = i0;
+
+          // TMW change: Find the normal axis of the polygon using Newell's
+          // method
+          TinyObjPoint n;
+          for (size_t k = 0; k < npolys; ++k) {
+            i0 = face.vertex_indices[k % npolys];
+            size_t vi0 = size_t(i0.v_idx);
+
+            size_t j = (k + 1) % npolys;
+            i0_2 = face.vertex_indices[j];
+            size_t vi0_2 = size_t(i0_2.v_idx);
+
+            real_t v0x = v[vi0 * 3 + 0];
+            real_t v0y = v[vi0 * 3 + 1];
+            real_t v0z = v[vi0 * 3 + 2];
+
+            real_t v0x_2 = v[vi0_2 * 3 + 0];
+            real_t v0y_2 = v[vi0_2 * 3 + 1];
+            real_t v0z_2 = v[vi0_2 * 3 + 2];
+
+            const TinyObjPoint point1(v0x, v0y, v0z);
+            const TinyObjPoint point2(v0x_2, v0y_2, v0z_2);
+
+            TinyObjPoint a(point1.x - point2.x, point1.y - point2.y,
+                           point1.z - point2.z);
+            TinyObjPoint b(point1.x + point2.x, point1.y + point2.y,
+                           point1.z + point2.z);
+
+            n.x += (a.y * b.z);
+            n.y += (a.z * b.x);
+            n.z += (a.x * b.y);
+          }
+          real_t length_n = GetLength(n);
+          // Check if zero length normal
+          if (length_n <= 0) {
             continue;
           }
+          // Negative is to flip the normal to the correct direction
+          real_t inv_length = -real_t(1.0) / length_n;
+          n.x *= inv_length;
+          n.y *= inv_length;
+          n.z *= inv_length;
+
+          TinyObjPoint axis_w, axis_v, axis_u;
+          axis_w = n;
+          TinyObjPoint a;
+          if (std::fabs(axis_w.x) > real_t(0.9999999)) {
+            a = TinyObjPoint(0, 1, 0);
+          } else {
+            a = TinyObjPoint(1, 0, 0);
+          }
+          axis_v = Normalize(cross(axis_w, a));
+          axis_u = cross(axis_w, axis_v);
+          using Point = std::array<real_t, 2>;
 
-          // check all other verts in case they are inside this triangle
-          bool overlap = false;
-          for (size_t otherVert = 3; otherVert < npolys; ++otherVert) {
-            size_t idx = (guess_vert + otherVert) % npolys;
+          // first polyline define the main polygon.
+          // following polylines define holes(not used in tinyobj).
+          std::vector<std::vector<Point> > polygon;
 
-            if (idx >= remainingFace.vertex_indices.size()) {
-              // ???
-              continue;
-            }
+          std::vector<Point> polyline;
+
+          // TMW change: Find best normal and project v0x and v0y to those
+          // coordinates, instead of picking a plane aligned with an axis (which
+          // can flip polygons).
+
+          // Fill polygon data(facevarying vertices).
+          for (size_t k = 0; k < npolys; k++) {
+            i0 = face.vertex_indices[k];
+            size_t vi0 = size_t(i0.v_idx);
 
-            size_t ovi = size_t(remainingFace.vertex_indices[idx].v_idx);
+            assert(((3 * vi0 + 2) < v.size()));
 
-            if (((ovi * 3 + axes[0]) >= v.size()) ||
-                ((ovi * 3 + axes[1]) >= v.size())) {
-              // ???
+            real_t v0x = v[vi0 * 3 + 0];
+            real_t v0y = v[vi0 * 3 + 1];
+            real_t v0z = v[vi0 * 3 + 2];
+
+            TinyObjPoint polypoint(v0x, v0y, v0z);
+            TinyObjPoint loc = WorldToLocal(polypoint, axis_u, axis_v, axis_w);
+
+            polyline.push_back({loc.x, loc.y});
+          }
+
+          polygon.push_back(polyline);
+          std::vector<uint32_t> indices = mapbox::earcut<uint32_t>(polygon);
+          // => result = 3 * faces, clockwise
+
+          assert(indices.size() % 3 == 0);
+
+          // Reconstruct vertex_index_t
+          for (size_t k = 0; k < indices.size() / 3; k++) {
+            {
+              index_t idx0, idx1, idx2;
+              idx0.vertex_index = face.vertex_indices[indices[3 * k + 0]].v_idx;
+              idx0.normal_index =
+                  face.vertex_indices[indices[3 * k + 0]].vn_idx;
+              idx0.texcoord_index =
+                  face.vertex_indices[indices[3 * k + 0]].vt_idx;
+              idx1.vertex_index = face.vertex_indices[indices[3 * k + 1]].v_idx;
+              idx1.normal_index =
+                  face.vertex_indices[indices[3 * k + 1]].vn_idx;
+              idx1.texcoord_index =
+                  face.vertex_indices[indices[3 * k + 1]].vt_idx;
+              idx2.vertex_index = face.vertex_indices[indices[3 * k + 2]].v_idx;
+              idx2.normal_index =
+                  face.vertex_indices[indices[3 * k + 2]].vn_idx;
+              idx2.texcoord_index =
+                  face.vertex_indices[indices[3 * k + 2]].vt_idx;
+
+              shape->mesh.indices.push_back(idx0);
+              shape->mesh.indices.push_back(idx1);
+              shape->mesh.indices.push_back(idx2);
+
+              shape->mesh.num_face_vertices.push_back(3);
+              shape->mesh.material_ids.push_back(material_id);
+              shape->mesh.smoothing_group_ids.push_back(
+                  face.smoothing_group_id);
+            }
+          }
+
+#else  // Built-in ear clipping triangulation
+          vertex_index_t i0 = face.vertex_indices[0];
+          vertex_index_t i1(-1);
+          vertex_index_t i2 = face.vertex_indices[1];
+
+          // find the two axes to work in
+          size_t axes[2] = {1, 2};
+          for (size_t k = 0; k < npolys; ++k) {
+            i0 = face.vertex_indices[(k + 0) % npolys];
+            i1 = face.vertex_indices[(k + 1) % npolys];
+            i2 = face.vertex_indices[(k + 2) % npolys];
+            size_t vi0 = size_t(i0.v_idx);
+            size_t vi1 = size_t(i1.v_idx);
+            size_t vi2 = size_t(i2.v_idx);
+
+            if (((3 * vi0 + 2) >= v.size()) || ((3 * vi1 + 2) >= v.size()) ||
+                ((3 * vi2 + 2) >= v.size())) {
+              // Invalid triangle.
+              // FIXME(syoyo): Is it ok to simply skip this invalid triangle?
               continue;
             }
-            real_t tx = v[ovi * 3 + axes[0]];
-            real_t ty = v[ovi * 3 + axes[1]];
-            if (pnpoly(3, vx, vy, tx, ty)) {
-              overlap = true;
+            real_t v0x = v[vi0 * 3 + 0];
+            real_t v0y = v[vi0 * 3 + 1];
+            real_t v0z = v[vi0 * 3 + 2];
+            real_t v1x = v[vi1 * 3 + 0];
+            real_t v1y = v[vi1 * 3 + 1];
+            real_t v1z = v[vi1 * 3 + 2];
+            real_t v2x = v[vi2 * 3 + 0];
+            real_t v2y = v[vi2 * 3 + 1];
+            real_t v2z = v[vi2 * 3 + 2];
+            real_t e0x = v1x - v0x;
+            real_t e0y = v1y - v0y;
+            real_t e0z = v1z - v0z;
+            real_t e1x = v2x - v1x;
+            real_t e1y = v2y - v1y;
+            real_t e1z = v2z - v1z;
+            real_t cx = std::fabs(e0y * e1z - e0z * e1y);
+            real_t cy = std::fabs(e0z * e1x - e0x * e1z);
+            real_t cz = std::fabs(e0x * e1y - e0y * e1x);
+            const real_t epsilon = std::numeric_limits<real_t>::epsilon();
+            // std::cout << "cx " << cx << ", cy " << cy << ", cz " << cz <<
+            // "\n";
+            if (cx > epsilon || cy > epsilon || cz > epsilon) {
+              // std::cout << "corner\n";
+              // found a corner
+              if (cx > cy && cx > cz) {
+                // std::cout << "pattern0\n";
+              } else {
+                // std::cout << "axes[0] = 0\n";
+                axes[0] = 0;
+                if (cz > cx && cz > cy) {
+                  // std::cout << "axes[1] = 1\n";
+                  axes[1] = 1;
+                }
+              }
               break;
             }
           }
 
-          if (overlap) {
-            guess_vert += 1;
-            continue;
-          }
+          face_t remainingFace = face;  // copy
+          size_t guess_vert = 0;
+          vertex_index_t ind[3];
+          real_t vx[3];
+          real_t vy[3];
+
+          // How many iterations can we do without decreasing the remaining
+          // vertices.
+          size_t remainingIterations = face.vertex_indices.size();
+          size_t previousRemainingVertices =
+              remainingFace.vertex_indices.size();
+
+          while (remainingFace.vertex_indices.size() > 3 &&
+                 remainingIterations > 0) {
+            // std::cout << "remainingIterations " << remainingIterations <<
+            // "\n";
+
+            npolys = remainingFace.vertex_indices.size();
+            if (guess_vert >= npolys) {
+              guess_vert -= npolys;
+            }
 
-          // this triangle is an ear
-          {
-            index_t idx0, idx1, idx2;
-            idx0.vertex_index = ind[0].v_idx;
-            idx0.normal_index = ind[0].vn_idx;
-            idx0.texcoord_index = ind[0].vt_idx;
-            idx1.vertex_index = ind[1].v_idx;
-            idx1.normal_index = ind[1].vn_idx;
-            idx1.texcoord_index = ind[1].vt_idx;
-            idx2.vertex_index = ind[2].v_idx;
-            idx2.normal_index = ind[2].vn_idx;
-            idx2.texcoord_index = ind[2].vt_idx;
+            if (previousRemainingVertices != npolys) {
+              // The number of remaining vertices decreased. Reset counters.
+              previousRemainingVertices = npolys;
+              remainingIterations = npolys;
+            } else {
+              // We didn't consume a vertex on previous iteration, reduce the
+              // available iterations.
+              remainingIterations--;
+            }
 
-            shape->mesh.indices.push_back(idx0);
-            shape->mesh.indices.push_back(idx1);
-            shape->mesh.indices.push_back(idx2);
+            for (size_t k = 0; k < 3; k++) {
+              ind[k] = remainingFace.vertex_indices[(guess_vert + k) % npolys];
+              size_t vi = size_t(ind[k].v_idx);
+              if (((vi * 3 + axes[0]) >= v.size()) ||
+                  ((vi * 3 + axes[1]) >= v.size())) {
+                // ???
+                vx[k] = static_cast<real_t>(0.0);
+                vy[k] = static_cast<real_t>(0.0);
+              } else {
+                vx[k] = v[vi * 3 + axes[0]];
+                vy[k] = v[vi * 3 + axes[1]];
+              }
+            }
 
-            shape->mesh.num_face_vertices.push_back(3);
-            shape->mesh.material_ids.push_back(material_id);
-            shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id);
-          }
+            //
+            // area is calculated per face
+            //
+            real_t e0x = vx[1] - vx[0];
+            real_t e0y = vy[1] - vy[0];
+            real_t e1x = vx[2] - vx[1];
+            real_t e1y = vy[2] - vy[1];
+            real_t cross = e0x * e1y - e0y * e1x;
+            // std::cout << "axes = " << axes[0] << ", " << axes[1] << "\n";
+            // std::cout << "e0x, e0y, e1x, e1y " << e0x << ", " << e0y << ", "
+            // << e1x << ", " << e1y << "\n";
+
+            real_t area =
+                (vx[0] * vy[1] - vy[0] * vx[1]) * static_cast<real_t>(0.5);
+            // std::cout << "cross " << cross << ", area " << area << "\n";
+            // if an internal angle
+            if (cross * area < static_cast<real_t>(0.0)) {
+              // std::cout << "internal \n";
+              guess_vert += 1;
+              // std::cout << "guess vert : " << guess_vert << "\n";
+              continue;
+            }
 
-          // remove v1 from the list
-          size_t removed_vert_index = (guess_vert + 1) % npolys;
-          while (removed_vert_index + 1 < npolys) {
-            remainingFace.vertex_indices[removed_vert_index] =
-                remainingFace.vertex_indices[removed_vert_index + 1];
-            removed_vert_index += 1;
-          }
-          remainingFace.vertex_indices.pop_back();
-        }
+            // check all other verts in case they are inside this triangle
+            bool overlap = false;
+            for (size_t otherVert = 3; otherVert < npolys; ++otherVert) {
+              size_t idx = (guess_vert + otherVert) % npolys;
+
+              if (idx >= remainingFace.vertex_indices.size()) {
+                // std::cout << "???0\n";
+                // ???
+                continue;
+              }
+
+              size_t ovi = size_t(remainingFace.vertex_indices[idx].v_idx);
+
+              if (((ovi * 3 + axes[0]) >= v.size()) ||
+                  ((ovi * 3 + axes[1]) >= v.size())) {
+                // std::cout << "???1\n";
+                // ???
+                continue;
+              }
+              real_t tx = v[ovi * 3 + axes[0]];
+              real_t ty = v[ovi * 3 + axes[1]];
+              if (pnpoly(3, vx, vy, tx, ty)) {
+                // std::cout << "overlap\n";
+                overlap = true;
+                break;
+              }
+            }
 
-        if (remainingFace.vertex_indices.size() == 3) {
-          i0 = remainingFace.vertex_indices[0];
-          i1 = remainingFace.vertex_indices[1];
-          i2 = remainingFace.vertex_indices[2];
-          {
-            index_t idx0, idx1, idx2;
-            idx0.vertex_index = i0.v_idx;
-            idx0.normal_index = i0.vn_idx;
-            idx0.texcoord_index = i0.vt_idx;
-            idx1.vertex_index = i1.v_idx;
-            idx1.normal_index = i1.vn_idx;
-            idx1.texcoord_index = i1.vt_idx;
-            idx2.vertex_index = i2.v_idx;
-            idx2.normal_index = i2.vn_idx;
-            idx2.texcoord_index = i2.vt_idx;
+            if (overlap) {
+              // std::cout << "overlap2\n";
+              guess_vert += 1;
+              continue;
+            }
 
-            shape->mesh.indices.push_back(idx0);
-            shape->mesh.indices.push_back(idx1);
-            shape->mesh.indices.push_back(idx2);
+            // this triangle is an ear
+            {
+              index_t idx0, idx1, idx2;
+              idx0.vertex_index = ind[0].v_idx;
+              idx0.normal_index = ind[0].vn_idx;
+              idx0.texcoord_index = ind[0].vt_idx;
+              idx1.vertex_index = ind[1].v_idx;
+              idx1.normal_index = ind[1].vn_idx;
+              idx1.texcoord_index = ind[1].vt_idx;
+              idx2.vertex_index = ind[2].v_idx;
+              idx2.normal_index = ind[2].vn_idx;
+              idx2.texcoord_index = ind[2].vt_idx;
+
+              shape->mesh.indices.push_back(idx0);
+              shape->mesh.indices.push_back(idx1);
+              shape->mesh.indices.push_back(idx2);
+
+              shape->mesh.num_face_vertices.push_back(3);
+              shape->mesh.material_ids.push_back(material_id);
+              shape->mesh.smoothing_group_ids.push_back(
+                  face.smoothing_group_id);
+            }
 
-            shape->mesh.num_face_vertices.push_back(3);
-            shape->mesh.material_ids.push_back(material_id);
-            shape->mesh.smoothing_group_ids.push_back(face.smoothing_group_id);
+            // remove v1 from the list
+            size_t removed_vert_index = (guess_vert + 1) % npolys;
+            while (removed_vert_index + 1 < npolys) {
+              remainingFace.vertex_indices[removed_vert_index] =
+                  remainingFace.vertex_indices[removed_vert_index + 1];
+              removed_vert_index += 1;
+            }
+            remainingFace.vertex_indices.pop_back();
           }
-        }
+
+          // std::cout << "remainingFace.vi.size = " <<
+          // remainingFace.vertex_indices.size() << "\n";
+          if (remainingFace.vertex_indices.size() == 3) {
+            i0 = remainingFace.vertex_indices[0];
+            i1 = remainingFace.vertex_indices[1];
+            i2 = remainingFace.vertex_indices[2];
+            {
+              index_t idx0, idx1, idx2;
+              idx0.vertex_index = i0.v_idx;
+              idx0.normal_index = i0.vn_idx;
+              idx0.texcoord_index = i0.vt_idx;
+              idx1.vertex_index = i1.v_idx;
+              idx1.normal_index = i1.vn_idx;
+              idx1.texcoord_index = i1.vt_idx;
+              idx2.vertex_index = i2.v_idx;
+              idx2.normal_index = i2.vn_idx;
+              idx2.texcoord_index = i2.vt_idx;
+
+              shape->mesh.indices.push_back(idx0);
+              shape->mesh.indices.push_back(idx1);
+              shape->mesh.indices.push_back(idx2);
+
+              shape->mesh.num_face_vertices.push_back(3);
+              shape->mesh.material_ids.push_back(material_id);
+              shape->mesh.smoothing_group_ids.push_back(
+                  face.smoothing_group_id);
+            }
+          }
+#endif
+        }  // npolys
       } else {
         for (size_t k = 0; k < npolys; k++) {
           index_t idx;
@@ -1577,7 +7593,7 @@ static bool exportGroupsToShape(shape_t *shape, const PrimGroup &prim_group,
         }
 
         shape->mesh.num_face_vertices.push_back(
-            static_cast<unsigned char>(npolys));
+            static_cast<unsigned int>(npolys));
         shape->mesh.material_ids.push_back(material_id);  // per face
         shape->mesh.smoothing_group_ids.push_back(
             face.smoothing_group_id);  // per face
@@ -1629,16 +7645,50 @@ static bool exportGroupsToShape(shape_t *shape, const PrimGroup &prim_group,
   return true;
 }
 
-// Split a string with specified delimiter character.
-// http://stackoverflow.com/questions/236129/split-a-string-in-c
-static void SplitString(const std::string &s, char delim,
+// Split a string with specified delimiter character and escape character.
+// https://rosettacode.org/wiki/Tokenize_a_string_with_escaping#C.2B.2B
+static void SplitString(const std::string &s, char delim, char escape,
                         std::vector<std::string> &elems) {
-  std::stringstream ss;
-  ss.str(s);
-  std::string item;
-  while (std::getline(ss, item, delim)) {
-    elems.push_back(item);
+  std::string token;
+
+  bool escaping = false;
+  for (size_t i = 0; i < s.size(); ++i) {
+    char ch = s[i];
+    if (escaping) {
+      escaping = false;
+    } else if (ch == escape) {
+      if ((i + 1) < s.size()) {
+        const char next = s[i + 1];
+        if ((next == delim) || (next == escape)) {
+          escaping = true;
+          continue;
+        }
+      }
+    } else if (ch == delim) {
+      if (!token.empty()) {
+        elems.push_back(token);
+      }
+      token.clear();
+      continue;
+    }
+    token += ch;
+  }
+
+  elems.push_back(token);
+}
+
+static void RemoveEmptyTokens(std::vector<std::string> *tokens) {
+  if (!tokens) return;
+
+  const std::vector<std::string> &src = *tokens;
+  std::vector<std::string> filtered;
+  filtered.reserve(src.size());
+  for (size_t i = 0; i < src.size(); i++) {
+    if (!src[i].empty()) {
+      filtered.push_back(src[i]);
+    }
   }
+  tokens->swap(filtered);
 }
 
 static std::string JoinPath(const std::string &dir,
@@ -1656,12 +7706,18 @@ static std::string JoinPath(const std::string &dir,
   }
 }
 
-void LoadMtl(std::map<std::string, int> *material_map,
-             std::vector<material_t> *materials, std::istream *inStream,
-             std::string *warning, std::string *err) {
-  (void)err;
+static bool LoadMtlInternal(std::map<std::string, int> *material_map,
+                            std::vector<material_t> *materials,
+                            StreamReader &sr,
+                            std::string *warning, std::string *err,
+                            const std::string &filename = "<stream>") {
+  if (sr.has_errors()) {
+    if (err) {
+      (*err) += sr.get_errors();
+    }
+    return false;
+  }
 
-  // Create a default material anyway.
   material_t material;
   InitMaterial(&material);
 
@@ -1669,45 +7725,29 @@ void LoadMtl(std::map<std::string, int> *material_map,
   bool has_d = false;
   bool has_tr = false;
 
-  std::stringstream warn_ss;
-
-  size_t line_no = 0;
-  std::string linebuf;
-  while (inStream->peek() != -1) {
-    safeGetline(*inStream, linebuf);
-    line_no++;
-
-    // Trim trailing whitespace.
-    if (linebuf.size() > 0) {
-      linebuf = linebuf.substr(0, linebuf.find_last_not_of(" \t") + 1);
-    }
-
-    // Trim newline '\r\n' or '\n'
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\n')
-        linebuf.erase(linebuf.size() - 1);
-    }
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\r')
-        linebuf.erase(linebuf.size() - 1);
-    }
+  // has_kd is used to set a default diffuse value when map_Kd is present
+  // and Kd is not.
+  bool has_kd = false;
 
-    // Skip if empty line.
-    if (linebuf.empty()) {
-      continue;
-    }
+  std::stringstream warn_ss;
 
-    // Skip leading space.
-    const char *token = linebuf.c_str();
-    token += strspn(token, " \t");
+  // Handle BOM
+  if (sr.remaining() >= 3 &&
+      static_cast<unsigned char>(sr.peek()) == 0xEF &&
+      static_cast<unsigned char>(sr.peek_at(1)) == 0xBB &&
+      static_cast<unsigned char>(sr.peek_at(2)) == 0xBF) {
+    sr.advance(3);
+  }
 
-    assert(token);
-    if (token[0] == '\0') continue;  // empty line
+  while (!sr.eof()) {
+    sr.skip_space();
+    if (sr.at_line_end()) { sr.skip_line(); continue; }
+    if (sr.peek() == '#') { sr.skip_line(); continue; }
 
-    if (token[0] == '#') continue;  // comment line
+    size_t line_num = sr.line_num();
 
     // new mtl
-    if ((0 == strncmp(token, "newmtl", 6)) && IS_SPACE((token[6]))) {
+    if (sr.match("newmtl", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
       // flush previous material.
       if (!material.name.empty()) {
         material_map->insert(std::pair<std::string, int>(
@@ -1715,324 +7755,400 @@ void LoadMtl(std::map<std::string, int> *material_map,
         materials->push_back(material);
       }
 
-      // initial temporary material
       InitMaterial(&material);
 
       has_d = false;
       has_tr = false;
+      has_kd = false;
 
-      // set new mtl name
-      token += 7;
+      sr.advance(7);
       {
-        std::stringstream sstr;
-        sstr << token;
-        material.name = sstr.str();
+        std::string namebuf = sr_parseString(sr);
+        if (namebuf.empty()) {
+          if (warning) {
+            (*warning) += "empty material name in `newmtl`\n";
+          }
+        }
+        material.name = namebuf;
       }
+      sr.skip_line();
       continue;
     }
 
     // ambient
-    if (token[0] == 'K' && token[1] == 'a' && IS_SPACE((token[2]))) {
-      token += 2;
+    if (sr.peek() == 'K' && sr.peek_at(1) == 'a' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
       real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
+      if (!sr_parseReal3(&r, &g, &b, sr, err, filename)) return false;
       material.ambient[0] = r;
       material.ambient[1] = g;
       material.ambient[2] = b;
+      sr.skip_line();
       continue;
     }
 
     // diffuse
-    if (token[0] == 'K' && token[1] == 'd' && IS_SPACE((token[2]))) {
-      token += 2;
+    if (sr.peek() == 'K' && sr.peek_at(1) == 'd' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
       real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
+      if (!sr_parseReal3(&r, &g, &b, sr, err, filename)) return false;
       material.diffuse[0] = r;
       material.diffuse[1] = g;
       material.diffuse[2] = b;
+      has_kd = true;
+      sr.skip_line();
       continue;
     }
 
     // specular
-    if (token[0] == 'K' && token[1] == 's' && IS_SPACE((token[2]))) {
-      token += 2;
+    if (sr.peek() == 'K' && sr.peek_at(1) == 's' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
       real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
+      if (!sr_parseReal3(&r, &g, &b, sr, err, filename)) return false;
       material.specular[0] = r;
       material.specular[1] = g;
       material.specular[2] = b;
+      sr.skip_line();
       continue;
     }
 
     // transmittance
-    if ((token[0] == 'K' && token[1] == 't' && IS_SPACE((token[2]))) ||
-        (token[0] == 'T' && token[1] == 'f' && IS_SPACE((token[2])))) {
-      token += 2;
+    if ((sr.peek() == 'K' && sr.peek_at(1) == 't' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) ||
+        (sr.peek() == 'T' && sr.peek_at(1) == 'f' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t'))) {
+      sr.advance(2);
       real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
+      if (!sr_parseReal3(&r, &g, &b, sr, err, filename)) return false;
       material.transmittance[0] = r;
       material.transmittance[1] = g;
       material.transmittance[2] = b;
+      sr.skip_line();
       continue;
     }
 
     // ior(index of refraction)
-    if (token[0] == 'N' && token[1] == 'i' && IS_SPACE((token[2]))) {
-      token += 2;
-      material.ior = parseReal(&token);
+    if (sr.peek() == 'N' && sr.peek_at(1) == 'i' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
+      if (!sr_parseReal(sr, &material.ior, 0.0, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
     // emission
-    if (token[0] == 'K' && token[1] == 'e' && IS_SPACE(token[2])) {
-      token += 2;
+    if (sr.peek() == 'K' && sr.peek_at(1) == 'e' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
       real_t r, g, b;
-      parseReal3(&r, &g, &b, &token);
+      if (!sr_parseReal3(&r, &g, &b, sr, err, filename)) return false;
       material.emission[0] = r;
       material.emission[1] = g;
       material.emission[2] = b;
+      sr.skip_line();
       continue;
     }
 
     // shininess
-    if (token[0] == 'N' && token[1] == 's' && IS_SPACE(token[2])) {
-      token += 2;
-      material.shininess = parseReal(&token);
+    if (sr.peek() == 'N' && sr.peek_at(1) == 's' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
+      if (!sr_parseReal(sr, &material.shininess, 0.0, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
     // illum model
-    if (0 == strncmp(token, "illum", 5) && IS_SPACE(token[5])) {
-      token += 6;
-      material.illum = parseInt(&token);
+    if (sr.match("illum", 5) && (sr.peek_at(5) == ' ' || sr.peek_at(5) == '\t')) {
+      sr.advance(6);
+      if (!sr_parseInt(sr, &material.illum, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
     // dissolve
-    if ((token[0] == 'd' && IS_SPACE(token[1]))) {
-      token += 1;
-      material.dissolve = parseReal(&token);
+    if (sr.peek() == 'd' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
+      sr.advance(1);
+      if (!sr_parseReal(sr, &material.dissolve, 0.0, err, filename)) return false;
 
       if (has_tr) {
         warn_ss << "Both `d` and `Tr` parameters defined for \""
                 << material.name
-                << "\". Use the value of `d` for dissolve (line " << line_no
-                << " in .mtl.)" << std::endl;
+                << "\". Use the value of `d` for dissolve (line " << line_num
+                << " in .mtl.)\n";
       }
       has_d = true;
+      sr.skip_line();
       continue;
     }
-    if (token[0] == 'T' && token[1] == 'r' && IS_SPACE(token[2])) {
-      token += 2;
+    if (sr.peek() == 'T' && sr.peek_at(1) == 'r' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
       if (has_d) {
-        // `d` wins. Ignore `Tr` value.
         warn_ss << "Both `d` and `Tr` parameters defined for \""
                 << material.name
-                << "\". Use the value of `d` for dissolve (line " << line_no
-                << " in .mtl.)" << std::endl;
+                << "\". Use the value of `d` for dissolve (line " << line_num
+                << " in .mtl.)\n";
       } else {
-        // We invert value of Tr(assume Tr is in range [0, 1])
-        // NOTE: Interpretation of Tr is application(exporter) dependent. For
-        // some application(e.g. 3ds max obj exporter), Tr = d(Issue 43)
-        material.dissolve = static_cast<real_t>(1.0) - parseReal(&token);
+        real_t tr_val;
+        if (!sr_parseReal(sr, &tr_val, 0.0, err, filename)) return false;
+        material.dissolve = static_cast<real_t>(1.0) - tr_val;
       }
       has_tr = true;
+      sr.skip_line();
       continue;
     }
 
     // PBR: roughness
-    if (token[0] == 'P' && token[1] == 'r' && IS_SPACE(token[2])) {
-      token += 2;
-      material.roughness = parseReal(&token);
+    if (sr.peek() == 'P' && sr.peek_at(1) == 'r' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
+      if (!sr_parseReal(sr, &material.roughness, 0.0, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
     // PBR: metallic
-    if (token[0] == 'P' && token[1] == 'm' && IS_SPACE(token[2])) {
-      token += 2;
-      material.metallic = parseReal(&token);
+    if (sr.peek() == 'P' && sr.peek_at(1) == 'm' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
+      if (!sr_parseReal(sr, &material.metallic, 0.0, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
     // PBR: sheen
-    if (token[0] == 'P' && token[1] == 's' && IS_SPACE(token[2])) {
-      token += 2;
-      material.sheen = parseReal(&token);
+    if (sr.peek() == 'P' && sr.peek_at(1) == 's' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
+      if (!sr_parseReal(sr, &material.sheen, 0.0, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
     // PBR: clearcoat thickness
-    if (token[0] == 'P' && token[1] == 'c' && IS_SPACE(token[2])) {
-      token += 2;
-      material.clearcoat_thickness = parseReal(&token);
+    if (sr.peek() == 'P' && sr.peek_at(1) == 'c' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(2);
+      if (!sr_parseReal(sr, &material.clearcoat_thickness, 0.0, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
     // PBR: clearcoat roughness
-    if ((0 == strncmp(token, "Pcr", 3)) && IS_SPACE(token[3])) {
-      token += 4;
-      material.clearcoat_roughness = parseReal(&token);
+    if (sr.match("Pcr", 3) && (sr.peek_at(3) == ' ' || sr.peek_at(3) == '\t')) {
+      sr.advance(4);
+      if (!sr_parseReal(sr, &material.clearcoat_roughness, 0.0, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
     // PBR: anisotropy
-    if ((0 == strncmp(token, "aniso", 5)) && IS_SPACE(token[5])) {
-      token += 6;
-      material.anisotropy = parseReal(&token);
+    if (sr.match("aniso", 5) && (sr.peek_at(5) == ' ' || sr.peek_at(5) == '\t')) {
+      sr.advance(6);
+      if (!sr_parseReal(sr, &material.anisotropy, 0.0, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
     // PBR: anisotropy rotation
-    if ((0 == strncmp(token, "anisor", 6)) && IS_SPACE(token[6])) {
-      token += 7;
-      material.anisotropy_rotation = parseReal(&token);
+    if (sr.match("anisor", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(7);
+      if (!sr_parseReal(sr, &material.anisotropy_rotation, 0.0, err, filename)) return false;
+      sr.skip_line();
       continue;
     }
 
-    // ambient texture
-    if ((0 == strncmp(token, "map_Ka", 6)) && IS_SPACE(token[6])) {
-      token += 7;
+    // For texture directives, read rest of line and delegate to
+    // ParseTextureNameAndOption (which uses the old const char* parse functions).
+
+    // ambient or ambient occlusion texture
+    if (sr.match("map_Ka", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(7);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.ambient_texname),
-                                &(material.ambient_texopt), token);
+                                &(material.ambient_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // diffuse texture
-    if ((0 == strncmp(token, "map_Kd", 6)) && IS_SPACE(token[6])) {
-      token += 7;
+    if (sr.match("map_Kd", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(7);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.diffuse_texname),
-                                &(material.diffuse_texopt), token);
+                                &(material.diffuse_texopt), line_rest.c_str());
+      if (!has_kd) {
+        material.diffuse[0] = static_cast<real_t>(0.6);
+        material.diffuse[1] = static_cast<real_t>(0.6);
+        material.diffuse[2] = static_cast<real_t>(0.6);
+      }
+      sr.skip_line();
       continue;
     }
 
     // specular texture
-    if ((0 == strncmp(token, "map_Ks", 6)) && IS_SPACE(token[6])) {
-      token += 7;
+    if (sr.match("map_Ks", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(7);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.specular_texname),
-                                &(material.specular_texopt), token);
+                                &(material.specular_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // specular highlight texture
-    if ((0 == strncmp(token, "map_Ns", 6)) && IS_SPACE(token[6])) {
-      token += 7;
+    if (sr.match("map_Ns", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(7);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.specular_highlight_texname),
-                                &(material.specular_highlight_texopt), token);
-      continue;
-    }
-
-    // bump texture
-    if ((0 == strncmp(token, "map_bump", 8)) && IS_SPACE(token[8])) {
-      token += 9;
-      ParseTextureNameAndOption(&(material.bump_texname),
-                                &(material.bump_texopt), token);
+                                &(material.specular_highlight_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // bump texture
-    if ((0 == strncmp(token, "map_Bump", 8)) && IS_SPACE(token[8])) {
-      token += 9;
+    if ((sr.match("map_bump", 8) || sr.match("map_Bump", 8)) &&
+        (sr.peek_at(8) == ' ' || sr.peek_at(8) == '\t')) {
+      sr.advance(9);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.bump_texname),
-                                &(material.bump_texopt), token);
+                                &(material.bump_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
-    // bump texture
-    if ((0 == strncmp(token, "bump", 4)) && IS_SPACE(token[4])) {
-      token += 5;
+    // bump texture (short form)
+    if (sr.match("bump", 4) && (sr.peek_at(4) == ' ' || sr.peek_at(4) == '\t')) {
+      sr.advance(5);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.bump_texname),
-                                &(material.bump_texopt), token);
+                                &(material.bump_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // alpha texture
-    if ((0 == strncmp(token, "map_d", 5)) && IS_SPACE(token[5])) {
-      token += 6;
-      material.alpha_texname = token;
+    if (sr.match("map_d", 5) && (sr.peek_at(5) == ' ' || sr.peek_at(5) == '\t')) {
+      sr.advance(6);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.alpha_texname),
-                                &(material.alpha_texopt), token);
+                                &(material.alpha_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // displacement texture
-    if ((0 == strncmp(token, "disp", 4)) && IS_SPACE(token[4])) {
-      token += 5;
+    if ((sr.match("map_disp", 8) || sr.match("map_Disp", 8)) &&
+        (sr.peek_at(8) == ' ' || sr.peek_at(8) == '\t')) {
+      sr.advance(9);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
+      ParseTextureNameAndOption(&(material.displacement_texname),
+                                &(material.displacement_texopt), line_rest.c_str());
+      sr.skip_line();
+      continue;
+    }
+
+    // displacement texture (short form)
+    if (sr.match("disp", 4) && (sr.peek_at(4) == ' ' || sr.peek_at(4) == '\t')) {
+      sr.advance(5);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.displacement_texname),
-                                &(material.displacement_texopt), token);
+                                &(material.displacement_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // reflection map
-    if ((0 == strncmp(token, "refl", 4)) && IS_SPACE(token[4])) {
-      token += 5;
+    if (sr.match("refl", 4) && (sr.peek_at(4) == ' ' || sr.peek_at(4) == '\t')) {
+      sr.advance(5);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.reflection_texname),
-                                &(material.reflection_texopt), token);
+                                &(material.reflection_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // PBR: roughness texture
-    if ((0 == strncmp(token, "map_Pr", 6)) && IS_SPACE(token[6])) {
-      token += 7;
+    if (sr.match("map_Pr", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(7);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.roughness_texname),
-                                &(material.roughness_texopt), token);
+                                &(material.roughness_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // PBR: metallic texture
-    if ((0 == strncmp(token, "map_Pm", 6)) && IS_SPACE(token[6])) {
-      token += 7;
+    if (sr.match("map_Pm", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(7);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.metallic_texname),
-                                &(material.metallic_texopt), token);
+                                &(material.metallic_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // PBR: sheen texture
-    if ((0 == strncmp(token, "map_Ps", 6)) && IS_SPACE(token[6])) {
-      token += 7;
+    if (sr.match("map_Ps", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(7);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.sheen_texname),
-                                &(material.sheen_texopt), token);
+                                &(material.sheen_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // PBR: emissive texture
-    if ((0 == strncmp(token, "map_Ke", 6)) && IS_SPACE(token[6])) {
-      token += 7;
+    if (sr.match("map_Ke", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(7);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.emissive_texname),
-                                &(material.emissive_texopt), token);
+                                &(material.emissive_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // PBR: normal map texture
-    if ((0 == strncmp(token, "norm", 4)) && IS_SPACE(token[4])) {
-      token += 5;
+    if (sr.match("norm", 4) && (sr.peek_at(4) == ' ' || sr.peek_at(4) == '\t')) {
+      sr.advance(5);
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
       ParseTextureNameAndOption(&(material.normal_texname),
-                                &(material.normal_texopt), token);
+                                &(material.normal_texopt), line_rest.c_str());
+      sr.skip_line();
       continue;
     }
 
     // unknown parameter
-    const char *_space = strchr(token, ' ');
-    if (!_space) {
-      _space = strchr(token, '\t');
-    }
-    if (_space) {
-      std::ptrdiff_t len = _space - token;
-      std::string key(token, static_cast<size_t>(len));
-      std::string value = _space + 1;
-      material.unknown_parameter.insert(
-          std::pair<std::string, std::string>(key, value));
+    {
+      std::string line_rest = trimTrailingWhitespace(sr.read_line());
+      const char *_lp = line_rest.c_str();
+      const char *_space = strchr(_lp, ' ');
+      if (!_space) {
+        _space = strchr(_lp, '\t');
+      }
+      if (_space) {
+        std::ptrdiff_t len = _space - _lp;
+        std::string key(_lp, static_cast<size_t>(len));
+        std::string value = _space + 1;
+        material.unknown_parameter.insert(
+            std::pair<std::string, std::string>(key, value));
+      }
     }
+    sr.skip_line();
+  }
+  // flush last material (only if it was actually defined).
+  if (!material.name.empty()) {
+    material_map->insert(std::pair<std::string, int>(
+        material.name, static_cast<int>(materials->size())));
+    materials->push_back(material);
   }
-  // flush last material.
-  material_map->insert(std::pair<std::string, int>(
-      material.name, static_cast<int>(materials->size())));
-  materials->push_back(material);
 
   if (warning) {
-    (*warning) = warn_ss.str();
+    (*warning) += warn_ss.str();
   }
+
+  return true;
+}
+
+void LoadMtl(std::map<std::string, int> *material_map,
+             std::vector<material_t> *materials, std::istream *inStream,
+             std::string *warning, std::string *err) {
+  StreamReader sr(*inStream);
+  LoadMtlInternal(material_map, materials, sr, warning, err);
 }
 
+
 bool MaterialFileReader::operator()(const std::string &matId,
                                     std::vector<material_t> *materials,
                                     std::map<std::string, int> *matMap,
@@ -2056,17 +8172,39 @@ bool MaterialFileReader::operator()(const std::string &matId,
     for (size_t i = 0; i < paths.size(); i++) {
       std::string filepath = JoinPath(paths[i], matId);
 
+#ifdef TINYOBJLOADER_USE_MMAP
+      {
+        MappedFile mf;
+        if (!mf.open(filepath.c_str())) continue;
+        if (mf.size > TINYOBJLOADER_STREAM_READER_MAX_BYTES) {
+          if (err) {
+            std::stringstream ss;
+            ss << "input stream too large (" << mf.size
+               << " bytes exceeds limit "
+               << TINYOBJLOADER_STREAM_READER_MAX_BYTES << " bytes)\n";
+            (*err) += ss.str();
+          }
+          return false;
+        }
+        StreamReader sr(mf.data, mf.size);
+        return LoadMtlInternal(matMap, materials, sr, warn, err, filepath);
+      }
+#else   // !TINYOBJLOADER_USE_MMAP
+#ifdef _WIN32
+      std::ifstream matIStream(LongPathW(UTF8ToWchar(filepath)).c_str());
+#else
       std::ifstream matIStream(filepath.c_str());
+#endif
       if (matIStream) {
-        LoadMtl(matMap, materials, &matIStream, warn, err);
-
-        return true;
+        StreamReader mtl_sr(matIStream);
+        return LoadMtlInternal(matMap, materials, mtl_sr, warn, err, filepath);
       }
+#endif  // TINYOBJLOADER_USE_MMAP
     }
 
     std::stringstream ss;
     ss << "Material file [ " << matId
-       << " ] not found in a path : " << m_mtlBaseDir << std::endl;
+       << " ] not found in a path : " << m_mtlBaseDir << "\n";
     if (warn) {
       (*warn) += ss.str();
     }
@@ -2074,16 +8212,40 @@ bool MaterialFileReader::operator()(const std::string &matId,
 
   } else {
     std::string filepath = matId;
+
+#ifdef TINYOBJLOADER_USE_MMAP
+    {
+      MappedFile mf;
+      if (mf.open(filepath.c_str())) {
+        if (mf.size > TINYOBJLOADER_STREAM_READER_MAX_BYTES) {
+          if (err) {
+            std::stringstream ss;
+            ss << "input stream too large (" << mf.size
+               << " bytes exceeds limit "
+               << TINYOBJLOADER_STREAM_READER_MAX_BYTES << " bytes)\n";
+            (*err) += ss.str();
+          }
+          return false;
+        }
+        StreamReader sr(mf.data, mf.size);
+        return LoadMtlInternal(matMap, materials, sr, warn, err, filepath);
+      }
+    }
+#else   // !TINYOBJLOADER_USE_MMAP
+#ifdef _WIN32
+    std::ifstream matIStream(LongPathW(UTF8ToWchar(filepath)).c_str());
+#else
     std::ifstream matIStream(filepath.c_str());
+#endif
     if (matIStream) {
-      LoadMtl(matMap, materials, &matIStream, warn, err);
-
-      return true;
+      StreamReader mtl_sr(matIStream);
+      return LoadMtlInternal(matMap, materials, mtl_sr, warn, err, filepath);
     }
+#endif  // TINYOBJLOADER_USE_MMAP
 
     std::stringstream ss;
     ss << "Material file [ " << filepath
-       << " ] not found in a path : " << m_mtlBaseDir << std::endl;
+       << " ] not found in a path : " << m_mtlBaseDir << "\n";
     if (warn) {
       (*warn) += ss.str();
     }
@@ -2096,80 +8258,51 @@ bool MaterialStreamReader::operator()(const std::string &matId,
                                       std::vector<material_t> *materials,
                                       std::map<std::string, int> *matMap,
                                       std::string *warn, std::string *err) {
-  (void)err;
   (void)matId;
   if (!m_inStream) {
     std::stringstream ss;
-    ss << "Material stream in error state. " << std::endl;
+    ss << "Material stream in error state. \n";
     if (warn) {
       (*warn) += ss.str();
     }
     return false;
   }
 
-  LoadMtl(matMap, materials, &m_inStream, warn, err);
-
-  return true;
+  StreamReader mtl_sr(m_inStream);
+  return LoadMtlInternal(matMap, materials, mtl_sr, warn, err, "<stream>");
 }
 
-bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
-             std::vector<material_t> *materials, std::string *warn,
-             std::string *err, const char *filename, const char *mtl_basedir,
-             bool trianglulate, bool default_vcols_fallback) {
-  attrib->vertices.clear();
-  attrib->normals.clear();
-  attrib->texcoords.clear();
-  attrib->colors.clear();
-  shapes->clear();
-
-  std::stringstream errss;
-
-  std::ifstream ifs(filename);
-  if (!ifs) {
-    errss << "Cannot open file [" << filename << "]" << std::endl;
+static bool LoadObjInternal(attrib_t *attrib, std::vector<shape_t> *shapes,
+                            std::vector<material_t> *materials,
+                            std::string *warn, std::string *err,
+                            StreamReader &sr,
+                            MaterialReader *readMatFn, bool triangulate,
+                            bool default_vcols_fallback,
+                            const std::string &filename = "<stream>") {
+  if (sr.has_errors()) {
     if (err) {
-      (*err) = errss.str();
+      (*err) += sr.get_errors();
     }
     return false;
   }
 
-  std::string baseDir = mtl_basedir ? mtl_basedir : "";
-  if (!baseDir.empty()) {
-#ifndef _WIN32
-    const char dirsep = '/';
-#else
-    const char dirsep = '\\';
-#endif
-    if (baseDir[baseDir.length() - 1] != dirsep) baseDir += dirsep;
-  }
-  MaterialFileReader matFileReader(baseDir);
-
-  return LoadObj(attrib, shapes, materials, warn, err, &ifs, &matFileReader,
-                 trianglulate, default_vcols_fallback);
-}
-
-bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
-             std::vector<material_t> *materials, std::string *warn,
-             std::string *err, std::istream *inStream,
-             MaterialReader *readMatFn /*= NULL*/, bool triangulate,
-             bool default_vcols_fallback) {
-  std::stringstream errss;
-
   std::vector<real_t> v;
+  std::vector<real_t> vertex_weights;
   std::vector<real_t> vn;
   std::vector<real_t> vt;
+  std::vector<real_t> vt_w;  // optional [w] component in `vt`
   std::vector<real_t> vc;
+  std::vector<skin_weight_t> vw;
   std::vector<tag_t> tags;
   PrimGroup prim_group;
   std::string name;
 
   // material
+  std::set<std::string> material_filenames;
   std::map<std::string, int> material_map;
   int material = -1;
 
-  // smoothing group id
-  unsigned int current_smoothing_id =
-      0;  // Initial value. 0 means no smoothing.
+  unsigned int current_smoothing_id = 0;
 
   int greatest_v_idx = -1;
   int greatest_vn_idx = -1;
@@ -2179,163 +8312,204 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
 
   bool found_all_colors = true;
 
-  size_t line_num = 0;
-  std::string linebuf;
-  while (inStream->peek() != -1) {
-    safeGetline(*inStream, linebuf);
-
-    line_num++;
-
-    // Trim newline '\r\n' or '\n'
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\n')
-        linebuf.erase(linebuf.size() - 1);
-    }
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\r')
-        linebuf.erase(linebuf.size() - 1);
-    }
-
-    // Skip if empty line.
-    if (linebuf.empty()) {
-      continue;
-    }
+  // Handle BOM
+  if (sr.remaining() >= 3 &&
+      static_cast<unsigned char>(sr.peek()) == 0xEF &&
+      static_cast<unsigned char>(sr.peek_at(1)) == 0xBB &&
+      static_cast<unsigned char>(sr.peek_at(2)) == 0xBF) {
+    sr.advance(3);
+  }
 
-    // Skip leading space.
-    const char *token = linebuf.c_str();
-    token += strspn(token, " \t");
+  warning_context context;
+  context.warn = warn;
+  context.filename = filename;
 
-    assert(token);
-    if (token[0] == '\0') continue;  // empty line
+  while (!sr.eof()) {
+    sr.skip_space();
+    if (sr.at_line_end()) { sr.skip_line(); continue; }
+    if (sr.peek() == '#') { sr.skip_line(); continue; }
 
-    if (token[0] == '#') continue;  // comment line
+    size_t line_num = sr.line_num();
 
     // vertex
-    if (token[0] == 'v' && IS_SPACE((token[1]))) {
-      token += 2;
+    if (sr.peek() == 'v' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
+      sr.advance(2);
       real_t x, y, z;
       real_t r, g, b;
 
-      found_all_colors &= parseVertexWithColor(&x, &y, &z, &r, &g, &b, &token);
+      int num_components = sr_parseVertexWithColor(&x, &y, &z, &r, &g, &b, sr, err, filename);
+      if (num_components < 0) return false;
+      found_all_colors &= (num_components == 6);
 
       v.push_back(x);
       v.push_back(y);
       v.push_back(z);
 
-      if (found_all_colors || default_vcols_fallback) {
+      vertex_weights.push_back(r);
+
+      if ((num_components == 6) || default_vcols_fallback) {
         vc.push_back(r);
         vc.push_back(g);
         vc.push_back(b);
       }
 
+      sr.skip_line();
       continue;
     }
 
     // normal
-    if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
-      token += 3;
+    if (sr.peek() == 'v' && sr.peek_at(1) == 'n' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(3);
       real_t x, y, z;
-      parseReal3(&x, &y, &z, &token);
+      if (!sr_parseReal3(&x, &y, &z, sr, err, filename)) return false;
       vn.push_back(x);
       vn.push_back(y);
       vn.push_back(z);
+      sr.skip_line();
       continue;
     }
 
     // texcoord
-    if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
-      token += 3;
+    if (sr.peek() == 'v' && sr.peek_at(1) == 't' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(3);
       real_t x, y;
-      parseReal2(&x, &y, &token);
+      if (!sr_parseReal2(&x, &y, sr, err, filename)) return false;
       vt.push_back(x);
       vt.push_back(y);
+
+      // Parse optional w component
+      real_t w = static_cast<real_t>(0.0);
+      sr_parseReal(sr, &w);
+      vt_w.push_back(w);
+
+      sr.skip_line();
+      continue;
+    }
+
+    // skin weight. tinyobj extension
+    if (sr.peek() == 'v' && sr.peek_at(1) == 'w' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(3);
+
+      int vid;
+      if (!sr_parseInt(sr, &vid, err, filename)) return false;
+
+      skin_weight_t sw;
+      sw.vertex_id = vid;
+
+      size_t vw_loop_max = sr.remaining() + 1;
+      size_t vw_loop_iter = 0;
+      while (!sr.at_line_end() && sr.peek() != '#' &&
+             vw_loop_iter < vw_loop_max) {
+        real_t j, w;
+        sr_parseReal2(&j, &w, sr, -1.0);
+
+        if (j < static_cast<real_t>(0)) {
+          if (err) {
+            (*err) += sr.format_error(filename,
+                "failed to parse `vw' line: joint_id is negative");
+          }
+          return false;
+        }
+
+        joint_and_weight_t jw;
+        jw.joint_id = int(j);
+        jw.weight = w;
+
+        sw.weightValues.push_back(jw);
+        sr.skip_space_and_cr();
+        vw_loop_iter++;
+      }
+
+      vw.push_back(sw);
+      sr.skip_line();
       continue;
     }
 
+    context.line_number = line_num;
+
     // line
-    if (token[0] == 'l' && IS_SPACE((token[1]))) {
-      token += 2;
+    if (sr.peek() == 'l' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
+      sr.advance(2);
 
       __line_t line;
 
-      while (!IS_NEW_LINE(token[0])) {
+      size_t l_loop_max = sr.remaining() + 1;
+      size_t l_loop_iter = 0;
+      while (!sr.at_line_end() && sr.peek() != '#' &&
+             l_loop_iter < l_loop_max) {
         vertex_index_t vi;
-        if (!parseTriple(&token, static_cast<int>(v.size() / 3),
-                         static_cast<int>(vn.size() / 3),
-                         static_cast<int>(vt.size() / 2), &vi)) {
+        if (!sr_parseTriple(sr, size_to_int(v.size() / 3),
+                         size_to_int(vn.size() / 3),
+                         size_to_int(vt.size() / 2), &vi, context)) {
           if (err) {
-            std::stringstream ss;
-            ss << "Failed parse `l' line(e.g. zero value for vertex index. "
-                  "line "
-               << line_num << ".)\n";
-            (*err) += ss.str();
+            (*err) += sr.format_error(filename,
+                "failed to parse `l' line (invalid vertex index)");
           }
           return false;
         }
 
         line.vertex_indices.push_back(vi);
-
-        size_t n = strspn(token, " \t\r");
-        token += n;
+        sr.skip_space_and_cr();
+        l_loop_iter++;
       }
 
       prim_group.lineGroup.push_back(line);
-
+      sr.skip_line();
       continue;
     }
 
     // points
-    if (token[0] == 'p' && IS_SPACE((token[1]))) {
-      token += 2;
+    if (sr.peek() == 'p' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
+      sr.advance(2);
 
       __points_t pts;
 
-      while (!IS_NEW_LINE(token[0])) {
+      size_t p_loop_max = sr.remaining() + 1;
+      size_t p_loop_iter = 0;
+      while (!sr.at_line_end() && sr.peek() != '#' &&
+             p_loop_iter < p_loop_max) {
         vertex_index_t vi;
-        if (!parseTriple(&token, static_cast<int>(v.size() / 3),
-                         static_cast<int>(vn.size() / 3),
-                         static_cast<int>(vt.size() / 2), &vi)) {
+        if (!sr_parseTriple(sr, size_to_int(v.size() / 3),
+                         size_to_int(vn.size() / 3),
+                         size_to_int(vt.size() / 2), &vi, context)) {
           if (err) {
-            std::stringstream ss;
-            ss << "Failed parse `p' line(e.g. zero value for vertex index. "
-                  "line "
-               << line_num << ".)\n";
-            (*err) += ss.str();
+            (*err) += sr.format_error(filename,
+                "failed to parse `p' line (invalid vertex index)");
           }
           return false;
         }
 
         pts.vertex_indices.push_back(vi);
-
-        size_t n = strspn(token, " \t\r");
-        token += n;
+        sr.skip_space_and_cr();
+        p_loop_iter++;
       }
 
       prim_group.pointsGroup.push_back(pts);
-
+      sr.skip_line();
       continue;
     }
 
     // face
-    if (token[0] == 'f' && IS_SPACE((token[1]))) {
-      token += 2;
-      token += strspn(token, " \t");
+    if (sr.peek() == 'f' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
+      sr.advance(2);
+      sr.skip_space();
 
       face_t face;
 
       face.smoothing_group_id = current_smoothing_id;
       face.vertex_indices.reserve(3);
 
-      while (!IS_NEW_LINE(token[0])) {
+      size_t f_loop_max = sr.remaining() + 1;
+      size_t f_loop_iter = 0;
+      while (!sr.at_line_end() && sr.peek() != '#' &&
+             f_loop_iter < f_loop_max) {
         vertex_index_t vi;
-        if (!parseTriple(&token, static_cast<int>(v.size() / 3),
-                         static_cast<int>(vn.size() / 3),
-                         static_cast<int>(vt.size() / 2), &vi)) {
+        if (!sr_parseTriple(sr, size_to_int(v.size() / 3),
+                         size_to_int(vn.size() / 3),
+                         size_to_int(vt.size() / 2), &vi, context)) {
           if (err) {
-            std::stringstream ss;
-            ss << "Failed parse `f' line(e.g. zero value for face index. line "
-               << line_num << ".)\n";
-            (*err) += ss.str();
+            (*err) += sr.format_error(filename,
+                "failed to parse `f' line (invalid vertex index)");
           }
           return false;
         }
@@ -2347,51 +8521,51 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
             greatest_vt_idx > vi.vt_idx ? greatest_vt_idx : vi.vt_idx;
 
         face.vertex_indices.push_back(vi);
-        size_t n = strspn(token, " \t\r");
-        token += n;
+        sr.skip_space_and_cr();
+        f_loop_iter++;
       }
 
-      // replace with emplace_back + std::move on C++11
       prim_group.faceGroup.push_back(face);
-
+      sr.skip_line();
       continue;
     }
 
     // use mtl
-    if ((0 == strncmp(token, "usemtl", 6))) {
-      token += 6;
-      std::string namebuf = parseString(&token);
+    if (sr.match("usemtl", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(6);
+      std::string namebuf = sr_parseString(sr);
 
       int newMaterialId = -1;
-      if (material_map.find(namebuf) != material_map.end()) {
-        newMaterialId = material_map[namebuf];
+      std::map<std::string, int>::const_iterator it =
+          material_map.find(namebuf);
+      if (it != material_map.end()) {
+        newMaterialId = it->second;
       } else {
-        // { error!! material not found }
         if (warn) {
           (*warn) += "material [ '" + namebuf + "' ] not found in .mtl\n";
         }
       }
 
       if (newMaterialId != material) {
-        // Create per-face material. Thus we don't add `shape` to `shapes` at
-        // this time.
-        // just clear `faceGroup` after `exportGroupsToShape()` call.
         exportGroupsToShape(&shape, prim_group, tags, material, name,
-                            triangulate, v);
+                            triangulate, v, warn);
         prim_group.faceGroup.clear();
         material = newMaterialId;
       }
 
+      sr.skip_line();
       continue;
     }
 
     // load mtl
-    if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
+    if (sr.match("mtllib", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
       if (readMatFn) {
-        token += 7;
+        sr.advance(7);
 
+        std::string line_rest = trimTrailingWhitespace(sr.read_line());
         std::vector<std::string> filenames;
-        SplitString(std::string(token), ' ', filenames);
+        SplitString(line_rest, ' ', '\\', filenames);
+        RemoveEmptyTokens(&filenames);
 
         if (filenames.empty()) {
           if (warn) {
@@ -2405,6 +8579,11 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
         } else {
           bool found = false;
           for (size_t s = 0; s < filenames.size(); s++) {
+            if (material_filenames.count(filenames[s]) > 0) {
+              found = true;
+              continue;
+            }
+
             std::string warn_mtl;
             std::string err_mtl;
             bool ok = (*readMatFn)(filenames[s].c_str(), materials,
@@ -2419,6 +8598,7 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
 
             if (ok) {
               found = true;
+              material_filenames.insert(filenames[s]);
               break;
             }
           }
@@ -2433,15 +8613,16 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
         }
       }
 
+      sr.skip_line();
       continue;
     }
 
     // group name
-    if (token[0] == 'g' && IS_SPACE((token[1]))) {
+    if (sr.peek() == 'g' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
       // flush previous face group.
       bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name,
-                                     triangulate, v);
-      (void)ret;  // return value not used.
+                                     triangulate, v, warn);
+      (void)ret;
 
       if (shape.mesh.indices.size() > 0) {
         shapes->push_back(shape);
@@ -2454,10 +8635,14 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
 
       std::vector<std::string> names;
 
-      while (!IS_NEW_LINE(token[0])) {
-        std::string str = parseString(&token);
+      size_t g_loop_max = sr.remaining() + 1;
+      size_t g_loop_iter = 0;
+      while (!sr.at_line_end() && sr.peek() != '#' &&
+             g_loop_iter < g_loop_max) {
+        std::string str = sr_parseString(sr);
         names.push_back(str);
-        token += strspn(token, " \t\r");  // skip tag
+        sr.skip_space_and_cr();
+        g_loop_iter++;
       }
 
       // names[0] must be 'g'
@@ -2474,10 +8659,6 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
         std::stringstream ss;
         ss << names[1];
 
-        // tinyobjloader does not support multiple groups for a primitive.
-        // Currently we concatinate multiple group names with a space to get
-        // single group name.
-
         for (size_t i = 2; i < names.size(); i++) {
           ss << " " << names[i];
         }
@@ -2485,15 +8666,16 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
         name = ss.str();
       }
 
+      sr.skip_line();
       continue;
     }
 
     // object name
-    if (token[0] == 'o' && IS_SPACE((token[1]))) {
+    if (sr.peek() == 'o' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
       // flush previous face group.
       bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name,
-                                     triangulate, v);
-      (void)ret;  // return value not used.
+                                     triangulate, v, warn);
+      (void)ret;
 
       if (shape.mesh.indices.size() > 0 || shape.lines.indices.size() > 0 ||
           shape.points.indices.size() > 0) {
@@ -2504,24 +8686,23 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
       prim_group.clear();
       shape = shape_t();
 
-      // @todo { multiple object name? }
-      token += 2;
-      std::stringstream ss;
-      ss << token;
-      name = ss.str();
+      sr.advance(2);
+      std::string rest = sr.read_line();
+      name = rest;
 
+      sr.skip_line();
       continue;
     }
 
-    if (token[0] == 't' && IS_SPACE(token[1])) {
-      const int max_tag_nums = 8192;  // FIXME(syoyo): Parameterize.
+    if (sr.peek() == 't' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
+      const int max_tag_nums = 8192;
       tag_t tag;
 
-      token += 2;
+      sr.advance(2);
 
-      tag.name = parseString(&token);
+      tag.name = sr_parseString(sr);
 
-      tag_sizes ts = parseTagTriple(&token);
+      tag_sizes ts = sr_parseTagTriple(sr);
 
       if (ts.num_ints < 0) {
         ts.num_ints = 0;
@@ -2547,58 +8728,57 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
       tag.intValues.resize(static_cast<size_t>(ts.num_ints));
 
       for (size_t i = 0; i < static_cast<size_t>(ts.num_ints); ++i) {
-        tag.intValues[i] = parseInt(&token);
+        tag.intValues[i] = sr_parseInt(sr);
       }
 
       tag.floatValues.resize(static_cast<size_t>(ts.num_reals));
       for (size_t i = 0; i < static_cast<size_t>(ts.num_reals); ++i) {
-        tag.floatValues[i] = parseReal(&token);
+        tag.floatValues[i] = sr_parseReal(sr);
       }
 
       tag.stringValues.resize(static_cast<size_t>(ts.num_strings));
       for (size_t i = 0; i < static_cast<size_t>(ts.num_strings); ++i) {
-        tag.stringValues[i] = parseString(&token);
+        tag.stringValues[i] = sr_parseString(sr);
       }
 
       tags.push_back(tag);
 
+      sr.skip_line();
       continue;
     }
 
-    if (token[0] == 's' && IS_SPACE(token[1])) {
+    if (sr.peek() == 's' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
       // smoothing group id
-      token += 2;
-
-      // skip space.
-      token += strspn(token, " \t");  // skip space
+      sr.advance(2);
+      sr.skip_space();
 
-      if (token[0] == '\0') {
+      if (sr.at_line_end()) {
+        sr.skip_line();
         continue;
       }
 
-      if (token[0] == '\r' || token[1] == '\n') {
+      if (sr.peek() == '\r') {
+        sr.skip_line();
         continue;
       }
 
-      if (strlen(token) >= 3 && token[0] == 'o' && token[1] == 'f' &&
-          token[2] == 'f') {
+      if (sr.remaining() >= 3 && sr.match("off", 3)) {
         current_smoothing_id = 0;
       } else {
-        // assume number
-        int smGroupId = parseInt(&token);
+        int smGroupId = sr_parseInt(sr);
         if (smGroupId < 0) {
-          // parse error. force set to 0.
-          // FIXME(syoyo): Report warning.
           current_smoothing_id = 0;
         } else {
           current_smoothing_id = static_cast<unsigned int>(smGroupId);
         }
       }
 
+      sr.skip_line();
       continue;
-    }  // smoothing group id
+    }
 
     // Ignore unknown command.
+    sr.skip_line();
   }
 
   // not all vertices have colors, no default colors desired? -> clear colors
@@ -2606,67 +8786,158 @@ bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
     vc.clear();
   }
 
-  if (greatest_v_idx >= static_cast<int>(v.size() / 3)) {
+  if (greatest_v_idx >= size_to_int(v.size() / 3)) {
     if (warn) {
       std::stringstream ss;
-      ss << "Vertex indices out of bounds (line " << line_num << ".)\n"
-         << std::endl;
+      ss << "Vertex indices out of bounds (line " << sr.line_num() << ".)\n\n";
       (*warn) += ss.str();
     }
   }
-  if (greatest_vn_idx >= static_cast<int>(vn.size() / 3)) {
+  if (greatest_vn_idx >= size_to_int(vn.size() / 3)) {
     if (warn) {
       std::stringstream ss;
-      ss << "Vertex normal indices out of bounds (line " << line_num << ".)\n"
-         << std::endl;
+      ss << "Vertex normal indices out of bounds (line " << sr.line_num()
+         << ".)\n\n";
       (*warn) += ss.str();
     }
   }
-  if (greatest_vt_idx >= static_cast<int>(vt.size() / 2)) {
+  if (greatest_vt_idx >= size_to_int(vt.size() / 2)) {
     if (warn) {
       std::stringstream ss;
-      ss << "Vertex texcoord indices out of bounds (line " << line_num << ".)\n"
-         << std::endl;
+      ss << "Vertex texcoord indices out of bounds (line " << sr.line_num()
+         << ".)\n\n";
       (*warn) += ss.str();
     }
   }
 
   bool ret = exportGroupsToShape(&shape, prim_group, tags, material, name,
-                                 triangulate, v);
-  // exportGroupsToShape return false when `usemtl` is called in the last
-  // line.
-  // we also add `shape` to `shapes` when `shape.mesh` has already some
-  // faces(indices)
-  if (ret || shape.mesh.indices
-                 .size()) {  // FIXME(syoyo): Support other prims(e.g. lines)
+                                 triangulate, v, warn);
+  if (ret || shape.mesh.indices.size()) {
     shapes->push_back(shape);
   }
-  prim_group.clear();  // for safety
-
-  if (err) {
-    (*err) += errss.str();
-  }
+  prim_group.clear();
 
   attrib->vertices.swap(v);
-  attrib->vertex_weights.swap(v);
+  attrib->vertex_weights.swap(vertex_weights);
   attrib->normals.swap(vn);
   attrib->texcoords.swap(vt);
-  attrib->texcoord_ws.swap(vt);
+  attrib->texcoord_ws.swap(vt_w);
   attrib->colors.swap(vc);
+  attrib->skin_weights.swap(vw);
 
   return true;
 }
 
-bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
-                         void *user_data /*= NULL*/,
-                         MaterialReader *readMatFn /*= NULL*/,
-                         std::string *warn, /* = NULL*/
-                         std::string *err /*= NULL*/) {
-  std::stringstream errss;
+bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+             std::vector<material_t> *materials, std::string *warn,
+             std::string *err, const char *filename, const char *mtl_basedir,
+             bool triangulate, bool default_vcols_fallback) {
+  attrib->vertices.clear();
+  attrib->vertex_weights.clear();
+  attrib->normals.clear();
+  attrib->texcoords.clear();
+  attrib->texcoord_ws.clear();
+  attrib->colors.clear();
+  attrib->skin_weights.clear();
+  shapes->clear();
+
+  std::string baseDir = mtl_basedir ? mtl_basedir : "";
+  if (!baseDir.empty()) {
+#ifndef _WIN32
+    const char dirsep = '/';
+#else
+    const char dirsep = '\\';
+#endif
+    if (baseDir[baseDir.length() - 1] != dirsep) baseDir += dirsep;
+  }
+  MaterialFileReader matFileReader(baseDir);
+
+#ifdef TINYOBJLOADER_USE_MMAP
+  {
+    MappedFile mf;
+    if (!mf.open(filename)) {
+      if (err) {
+        std::stringstream ss;
+        ss << "Cannot open file [" << filename << "]\n";
+        (*err) = ss.str();
+      }
+      return false;
+    }
+    if (mf.size > TINYOBJLOADER_STREAM_READER_MAX_BYTES) {
+      if (err) {
+        std::stringstream ss;
+        ss << "input stream too large (" << mf.size
+           << " bytes exceeds limit "
+           << TINYOBJLOADER_STREAM_READER_MAX_BYTES << " bytes)\n";
+        (*err) += ss.str();
+      }
+      return false;
+    }
+    StreamReader sr(mf.data, mf.size);
+    return LoadObjInternal(attrib, shapes, materials, warn, err, sr,
+                           &matFileReader, triangulate, default_vcols_fallback,
+                           filename);
+  }
+#else   // !TINYOBJLOADER_USE_MMAP
+#ifdef _WIN32
+  std::ifstream ifs(LongPathW(UTF8ToWchar(filename)).c_str());
+#else
+  std::ifstream ifs(filename);
+#endif
+  if (!ifs) {
+    if (err) {
+      std::stringstream ss;
+      ss << "Cannot open file [" << filename << "]\n";
+      (*err) = ss.str();
+    }
+    return false;
+  }
+  {
+    StreamReader sr(ifs);
+    return LoadObjInternal(attrib, shapes, materials, warn, err, sr,
+                           &matFileReader, triangulate, default_vcols_fallback,
+                           filename);
+  }
+#endif  // TINYOBJLOADER_USE_MMAP
+}
+
+bool LoadObj(attrib_t *attrib, std::vector<shape_t> *shapes,
+             std::vector<material_t> *materials, std::string *warn,
+             std::string *err, std::istream *inStream,
+             MaterialReader *readMatFn /*= NULL*/, bool triangulate,
+             bool default_vcols_fallback) {
+  attrib->vertices.clear();
+  attrib->vertex_weights.clear();
+  attrib->normals.clear();
+  attrib->texcoords.clear();
+  attrib->texcoord_ws.clear();
+  attrib->colors.clear();
+  attrib->skin_weights.clear();
+  shapes->clear();
+
+  StreamReader sr(*inStream);
+  return LoadObjInternal(attrib, shapes, materials, warn, err, sr,
+                         readMatFn, triangulate, default_vcols_fallback);
+}
+
+
+static bool LoadObjWithCallbackInternal(StreamReader &sr,
+                                        const callback_t &callback,
+                                        void *user_data,
+                                        MaterialReader *readMatFn,
+                                        std::string *warn,
+                                        std::string *err) {
+  if (sr.has_errors()) {
+    if (err) {
+      (*err) += sr.get_errors();
+    }
+    return false;
+  }
 
   // material
+  std::set<std::string> material_filenames;
   std::map<std::string, int> material_map;
-  int material_id = -1;  // -1 = invalid
+  int material_id = -1;
 
   std::vector<index_t> indices;
   std::vector<material_t> materials;
@@ -2674,76 +8945,75 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
   names.reserve(2);
   std::vector<const char *> names_out;
 
-  std::string linebuf;
-  while (inStream.peek() != -1) {
-    safeGetline(inStream, linebuf);
-
-    // Trim newline '\r\n' or '\n'
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\n')
-        linebuf.erase(linebuf.size() - 1);
-    }
-    if (linebuf.size() > 0) {
-      if (linebuf[linebuf.size() - 1] == '\r')
-        linebuf.erase(linebuf.size() - 1);
-    }
-
-    // Skip if empty line.
-    if (linebuf.empty()) {
-      continue;
-    }
-
-    // Skip leading space.
-    const char *token = linebuf.c_str();
-    token += strspn(token, " \t");
-
-    assert(token);
-    if (token[0] == '\0') continue;  // empty line
+  // Handle BOM
+  if (sr.remaining() >= 3 &&
+      static_cast<unsigned char>(sr.peek()) == 0xEF &&
+      static_cast<unsigned char>(sr.peek_at(1)) == 0xBB &&
+      static_cast<unsigned char>(sr.peek_at(2)) == 0xBF) {
+    sr.advance(3);
+  }
 
-    if (token[0] == '#') continue;  // comment line
+  while (!sr.eof()) {
+    sr.skip_space();
+    if (sr.at_line_end()) { sr.skip_line(); continue; }
+    if (sr.peek() == '#') { sr.skip_line(); continue; }
 
     // vertex
-    if (token[0] == 'v' && IS_SPACE((token[1]))) {
-      token += 2;
-      // TODO(syoyo): Support parsing vertex color extension.
-      real_t x, y, z, w;  // w is optional. default = 1.0
-      parseV(&x, &y, &z, &w, &token);
+    if (sr.peek() == 'v' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
+      sr.advance(2);
+      real_t x, y, z;
+      real_t r, g, b;
+
+      int num_components = sr_parseVertexWithColor(&x, &y, &z, &r, &g, &b, sr, err, std::string());
+      if (num_components < 0) {
+        return false;
+      }
       if (callback.vertex_cb) {
-        callback.vertex_cb(user_data, x, y, z, w);
+        callback.vertex_cb(user_data, x, y, z, r);
       }
+      if (callback.vertex_color_cb) {
+        bool found_color = (num_components == 6);
+        callback.vertex_color_cb(user_data, x, y, z, r, g, b, found_color);
+      }
+      sr.skip_line();
       continue;
     }
 
     // normal
-    if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) {
-      token += 3;
+    if (sr.peek() == 'v' && sr.peek_at(1) == 'n' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(3);
       real_t x, y, z;
-      parseReal3(&x, &y, &z, &token);
+      sr_parseReal3(&x, &y, &z, sr);
       if (callback.normal_cb) {
         callback.normal_cb(user_data, x, y, z);
       }
+      sr.skip_line();
       continue;
     }
 
     // texcoord
-    if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) {
-      token += 3;
-      real_t x, y, z;  // y and z are optional. default = 0.0
-      parseReal3(&x, &y, &z, &token);
+    if (sr.peek() == 'v' && sr.peek_at(1) == 't' && (sr.peek_at(2) == ' ' || sr.peek_at(2) == '\t')) {
+      sr.advance(3);
+      real_t x, y, z;
+      sr_parseReal3(&x, &y, &z, sr);
       if (callback.texcoord_cb) {
         callback.texcoord_cb(user_data, x, y, z);
       }
+      sr.skip_line();
       continue;
     }
 
     // face
-    if (token[0] == 'f' && IS_SPACE((token[1]))) {
-      token += 2;
-      token += strspn(token, " \t");
+    if (sr.peek() == 'f' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
+      sr.advance(2);
+      sr.skip_space();
 
       indices.clear();
-      while (!IS_NEW_LINE(token[0])) {
-        vertex_index_t vi = parseRawTriple(&token);
+      size_t cf_loop_max = sr.remaining() + 1;
+      size_t cf_loop_iter = 0;
+      while (!sr.at_line_end() && sr.peek() != '#' &&
+             cf_loop_iter < cf_loop_max) {
+        vertex_index_t vi = sr_parseRawTriple(sr);
 
         index_t idx;
         idx.vertex_index = vi.v_idx;
@@ -2751,8 +9021,8 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
         idx.texcoord_index = vi.vt_idx;
 
         indices.push_back(idx);
-        size_t n = strspn(token, " \t\r");
-        token += n;
+        sr.skip_space_and_cr();
+        cf_loop_iter++;
       }
 
       if (callback.index_cb && indices.size() > 0) {
@@ -2760,21 +9030,21 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
                           static_cast<int>(indices.size()));
       }
 
+      sr.skip_line();
       continue;
     }
 
     // use mtl
-    if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) {
-      token += 7;
-      std::stringstream ss;
-      ss << token;
-      std::string namebuf = ss.str();
+    if (sr.match("usemtl", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
+      sr.advance(6);
+      std::string namebuf = sr_parseString(sr);
 
       int newMaterialId = -1;
-      if (material_map.find(namebuf) != material_map.end()) {
-        newMaterialId = material_map[namebuf];
+      std::map<std::string, int>::const_iterator it =
+          material_map.find(namebuf);
+      if (it != material_map.end()) {
+        newMaterialId = it->second;
       } else {
-        // { warn!! material not found }
         if (warn && (!callback.usemtl_cb)) {
           (*warn) += "material [ " + namebuf + " ] not found in .mtl\n";
         }
@@ -2788,16 +9058,19 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
         callback.usemtl_cb(user_data, namebuf.c_str(), material_id);
       }
 
+      sr.skip_line();
       continue;
     }
 
     // load mtl
-    if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) {
+    if (sr.match("mtllib", 6) && (sr.peek_at(6) == ' ' || sr.peek_at(6) == '\t')) {
       if (readMatFn) {
-        token += 7;
+        sr.advance(7);
 
+        std::string line_rest = trimTrailingWhitespace(sr.read_line());
         std::vector<std::string> filenames;
-        SplitString(std::string(token), ' ', filenames);
+        SplitString(line_rest, ' ', '\\', filenames);
+        RemoveEmptyTokens(&filenames);
 
         if (filenames.empty()) {
           if (warn) {
@@ -2808,13 +9081,18 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
         } else {
           bool found = false;
           for (size_t s = 0; s < filenames.size(); s++) {
+            if (material_filenames.count(filenames[s]) > 0) {
+              found = true;
+              continue;
+            }
+
             std::string warn_mtl;
             std::string err_mtl;
             bool ok = (*readMatFn)(filenames[s].c_str(), &materials,
                                    &material_map, &warn_mtl, &err_mtl);
 
             if (warn && (!warn_mtl.empty())) {
-              (*warn) += warn_mtl;  // This should be warn message.
+              (*warn) += warn_mtl;
             }
 
             if (err && (!err_mtl.empty())) {
@@ -2823,6 +9101,7 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
 
             if (ok) {
               found = true;
+              material_filenames.insert(filenames[s]);
               break;
             }
           }
@@ -2834,7 +9113,7 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
                   "material.\n";
             }
           } else {
-            if (callback.mtllib_cb) {
+            if (callback.mtllib_cb && !materials.empty()) {
               callback.mtllib_cb(user_data, &materials.at(0),
                                  static_cast<int>(materials.size()));
             }
@@ -2842,24 +9121,28 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
         }
       }
 
+      sr.skip_line();
       continue;
     }
 
     // group name
-    if (token[0] == 'g' && IS_SPACE((token[1]))) {
+    if (sr.peek() == 'g' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
       names.clear();
 
-      while (!IS_NEW_LINE(token[0])) {
-        std::string str = parseString(&token);
+      size_t cg_loop_max = sr.remaining() + 1;
+      size_t cg_loop_iter = 0;
+      while (!sr.at_line_end() && sr.peek() != '#' &&
+             cg_loop_iter < cg_loop_max) {
+        std::string str = sr_parseString(sr);
         names.push_back(str);
-        token += strspn(token, " \t\r");  // skip tag
+        sr.skip_space_and_cr();
+        cg_loop_iter++;
       }
 
       assert(names.size() > 0);
 
       if (callback.group_cb) {
         if (names.size() > 1) {
-          // create const char* array.
           names_out.resize(names.size() - 1);
           for (size_t j = 0; j < names_out.size(); j++) {
             names_out[j] = names[j + 1].c_str();
@@ -2872,57 +9155,46 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
         }
       }
 
+      sr.skip_line();
       continue;
     }
 
     // object name
-    if (token[0] == 'o' && IS_SPACE((token[1]))) {
-      // @todo { multiple object name? }
-      token += 2;
-
-      std::stringstream ss;
-      ss << token;
-      std::string object_name = ss.str();
+    if (sr.peek() == 'o' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
+      sr.advance(2);
+      std::string object_name = sr.read_line();
 
       if (callback.object_cb) {
         callback.object_cb(user_data, object_name.c_str());
       }
 
+      sr.skip_line();
       continue;
     }
 
 #if 0  // @todo
-    if (token[0] == 't' && IS_SPACE(token[1])) {
+    if (sr.peek() == 't' && (sr.peek_at(1) == ' ' || sr.peek_at(1) == '\t')) {
       tag_t tag;
 
-      token += 2;
-      std::stringstream ss;
-      ss << token;
-      tag.name = ss.str();
-
-      token += tag.name.size() + 1;
+      sr.advance(2);
+      tag.name = sr_parseString(sr);
 
-      tag_sizes ts = parseTagTriple(&token);
+      tag_sizes ts = sr_parseTagTriple(sr);
 
       tag.intValues.resize(static_cast<size_t>(ts.num_ints));
 
       for (size_t i = 0; i < static_cast<size_t>(ts.num_ints); ++i) {
-        tag.intValues[i] = atoi(token);
-        token += strcspn(token, "/ \t\r") + 1;
+        tag.intValues[i] = sr_parseInt(sr);
       }
 
       tag.floatValues.resize(static_cast<size_t>(ts.num_reals));
       for (size_t i = 0; i < static_cast<size_t>(ts.num_reals); ++i) {
-        tag.floatValues[i] = parseReal(&token);
-        token += strcspn(token, "/ \t\r") + 1;
+        tag.floatValues[i] = sr_parseReal(sr);
       }
 
       tag.stringValues.resize(static_cast<size_t>(ts.num_strings));
       for (size_t i = 0; i < static_cast<size_t>(ts.num_strings); ++i) {
-        std::stringstream ss;
-        ss << token;
-        tag.stringValues[i] = ss.str();
-        token += tag.stringValues[i].size() + 1;
+        tag.stringValues[i] = sr_parseString(sr);
       }
 
       tags.push_back(tag);
@@ -2930,15 +9202,22 @@ bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
 #endif
 
     // Ignore unknown command.
-  }
-
-  if (err) {
-    (*err) += errss.str();
+    sr.skip_line();
   }
 
   return true;
 }
 
+bool LoadObjWithCallback(std::istream &inStream, const callback_t &callback,
+                         void *user_data /*= NULL*/,
+                         MaterialReader *readMatFn /*= NULL*/,
+                         std::string *warn, /* = NULL*/
+                         std::string *err /*= NULL*/) {
+  StreamReader sr(inStream);
+  return LoadObjWithCallbackInternal(sr, callback, user_data, readMatFn,
+                                     warn, err);
+}
+
 bool ObjReader::ParseFromFile(const std::string &filename,
                               const ObjReaderConfig &config) {
   std::string mtl_search_path;
@@ -2948,8 +9227,9 @@ bool ObjReader::ParseFromFile(const std::string &filename,
     // split at last '/'(for unixish system) or '\\'(for windows) to get
     // the base directory of .obj file
     //
-    if (filename.find_last_of("/\\") != std::string::npos) {
-      mtl_search_path = filename.substr(0, filename.find_last_of("/\\"));
+    size_t pos = filename.find_last_of("/\\");
+    if (pos != std::string::npos) {
+      mtl_search_path = filename.substr(0, pos);
     }
   } else {
     mtl_search_path = config.mtl_search_path;
diff --git a/tools/travis_postbuild.sh b/tools/travis_postbuild.sh
deleted file mode 100755
index 00c5d498..00000000
--- a/tools/travis_postbuild.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-DATEVAL=`date +%Y-%m-%d`
-VERSIONVAL=master
-
-# Use tag as version
-if [ $TRAVIS_TAG ]; then
-  VERSIONVAL=$TRAVIS_TAG
-fi
-
-sed -e s%@DATE@%${DATEVAL}% .bintray.in > .bintray.tmp
-sed -e s%@VERSION@%${VERSIONVAL}% .bintray.tmp > .bintray.json