diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..c2e9b9c9 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,15 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + groups: + all-dependencies: + patterns: + - "*" diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index b696b926..221f5d42 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -11,10 +11,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.x' cache: "pip" diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 198cf7b5..6ed5a1c1 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -8,10 +8,10 @@ jobs: # by the push to the branch. if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository - runs-on: ubuntu-latest + runs-on: ubuntu-slim steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 - name: ruff check run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 530238c9..bc101aa7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,48 +9,79 @@ jobs: test: strategy: matrix: - os: ["ubuntu-latest", "windows-latest", "macos-latest"] - py: ["3.13-dev", "3.12", "3.11", "3.10", "3.9", "3.8"] + os: + - ubuntu-latest + - ubuntu-24.04-arm + - windows-latest + - windows-11-arm + - macos-15-intel + - macos-latest + py: ["3.15", "3.15t", "3.14", "3.14t", "3.13", "3.12", "3.11", "3.10"] + exclude: + - os: windows-11-arm + py: "3.10" runs-on: ${{ matrix.os }} name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.py }} allow-prereleases: true cache: "pip" + - name: Prepare + shell: bash + run: | + python -m pip install -r requirements.txt pytest + + - name: Install pytest-run-parallel under free-threading + if: contains(matrix.py, 't') + run: | + pip install pytest-run-parallel + - name: Build shell: bash run: | - pip install -r requirements.txt pytest make cython pip install . - name: Test (C extension) + if: ${{ ! contains(matrix.py, 't') }} shell: bash run: | pytest -v test - name: Test (pure Python fallback) + if: ${{ ! contains(matrix.py, 't') }} shell: bash run: | MSGPACK_PUREPYTHON=1 pytest -v test + - name: Test (C extension) in parallel under free-threading + if: contains(matrix.py, 't') + shell: bash + run: | + pytest -v --parallel-threads=auto --iterations=20 test + + - name: Test (pure Python fallback) in parallel under free-threading + if: contains(matrix.py, 't') + shell: bash + run: | + MSGPACK_PUREPYTHON=1 pytest -v --parallel-threads=auto --iterations=20 test + - name: build packages shell: bash run: | - pip install build - python -m build + python -m build -nv - name: upload packages - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: dist-${{ matrix.os }}-${{ matrix.py }} path: dist diff --git a/.github/workflows/test_debug.yml b/.github/workflows/test_debug.yml new file mode 100644 index 00000000..8aeec640 --- /dev/null +++ b/.github/workflows/test_debug.yml @@ -0,0 +1,62 @@ +name: Run tests with debug Python +on: + push: + branches: [main] + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + env: + PYTHON_VERSION: 3.14.6 + steps: + - name: Checkout + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + + - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: /opt/python-debug + key: python-debug-${{ runner.os }}-${{ env.PYTHON_VERSION }} + + - name: Install build dependencies + if: steps.cache.outputs.cache-hit != 'true' + run: | + sudo apt-get update + sudo apt-get install -y build-essential libssl-dev zlib1g-dev \ + libbz2-dev libreadline-dev libsqlite3-dev curl libncursesw5-dev \ + xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev + + - name: Build Python with Py_DEBUG + if: steps.cache.outputs.cache-hit != 'true' + run: | + wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz + tar -xzf Python-${PYTHON_VERSION}.tgz + cd Python-${PYTHON_VERSION} + + ./configure --with-pydebug --prefix=/opt/python-debug + make -j4 + sudo make install + + - name: Create venv from debug Python + run: | + PYDBG_BIN="/opt/python-debug/bin/python3" + "$PYDBG_BIN" -m venv .venv + echo "$PWD/.venv/bin" >> "$GITHUB_PATH" + echo "VIRTUAL_ENV=$PWD/.venv" >> "$GITHUB_ENV" + + - name: Prepare + run: | + python -m pip install -r requirements.txt pytest + + - name: Build + run: | + make cython + pip install . + + - name: Test (C extension) + run: | + pytest -v test + + - name: Test (pure Python fallback) + run: | + MSGPACK_PUREPYTHON=1 pytest -v test diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index 50157223..92fce016 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -1,27 +1,33 @@ -name: Build Wheels +name: Build sdist and Wheels on: push: branches: [main] - create: + release: + types: + - published workflow_dispatch: jobs: build_wheels: strategy: matrix: - os: ["ubuntu-latest", "windows-latest", "macos-latest"] + include: + - os: ubuntu-24.04 + - os: ubuntu-24.04-arm + - os: windows-latest + - os: windows-11-arm + - os: macos-15-intel + - os: macos-latest + - os: ubuntu-24.04 + cibw_archs: riscv64 + name_suffix: "-riscv64" + runs-on: ${{ matrix.os }} - name: Build wheels on ${{ matrix.os }} + name: Build wheels on ${{ matrix.os }}${{ matrix.name_suffix || '' }} steps: - - name: Set up QEMU - if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v3 - with: - platforms: all - - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.x" cache: "pip" @@ -31,23 +37,68 @@ jobs: pip install -r requirements.txt make cython + - name: Set up QEMU for emulation + if: matrix.cibw_archs == 'riscv64' + uses: docker/setup-qemu-action@06116385d9baf250c9f4dcb4858b16962ea869c3 # v4.1.0 + with: + platforms: ${{ matrix.cibw_archs }} + - name: Build - uses: pypa/cibuildwheel@v2.20.0 + uses: pypa/cibuildwheel@294735312765b09d24a2fbec22660ce817587d55 # v4.1.0 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" - CIBW_ARCHS_LINUX: auto aarch64 - CIBW_ARCHS_MACOS: x86_64 universal2 arm64 - CIBW_SKIP: "pp* cp38-macosx_*" + CIBW_SKIP: "cp38-* cp39-* cp310-win_arm64" + CIBW_ARCHS: ${{ matrix.cibw_archs || 'auto' }} - name: Build sdist - if: runner.os == 'Linux' + if: runner.os == 'Linux' && runner.arch == 'X64' run: | pip install build python -m build -s -o wheelhouse - name: Upload Wheels to artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: - name: wheels-${{ matrix.os }} + name: wheels-${{ matrix.os }}${{ matrix.name_suffix || '' }} path: wheelhouse + + # combine all wheels into one artifact + combine_wheels: + needs: [build_wheels] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - name: Upload Wheels to artifact + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: wheels-all + path: dist + + # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml + upload_pypi: + needs: [build_wheels] + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write + if: github.event_name == 'release' && github.event.action == 'published' + # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) + # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + # unpacks all CIBW artifacts into dist/ + pattern: wheels-* + path: dist + merge-multiple: true + + - uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0 + #with: + # To test: repository-url: https://test.pypi.org/legacy/ diff --git a/.gitignore b/.gitignore index 341be631..31d7d6e1 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ msgpack/*.cpp /tags /docs/_build .cache +uv.lock diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..b8fbd8ce --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,580 @@ +# 1.2.1 + +Release Date: 2026-06-19 + +Fix a segfault when calling `Unpacker.unpack()` or `Unpacker.skip()` after an unpacking failure. +But note that reusing the same `Unpacker` instance after an unpacking failure is not supported. +Please create a new `Unpacker` instance instead. GHSA-6v7p-g79w-8964 + + +# 1.2.0 + +Release Date: 2026-06-11 + +- Support free threaded Python. #654, #686 +- Dropped support for Python 3.9. #656 +- Fix missing error checks in C code. #665, #666, #667, #672 +- Fix `strict_map_key` option didn't work for `object_pairs_hook`. #673 +- Increase DEFAULT_RECURSE_LIMIT of Unpacker to 1024. #676 +- Fix memory leak when Unpacker returns error for invalid input. #671 +- Fix `Packer.pack_ext_type()` ignored `autoreset` option. #663 +- Fix `Timestamp.from_datetime()` returning wrong value for pre-epoch datetimes. #662 +- Fix use-after-free in `unpackb()` and `Unpacker.unpack()` for non-contiguous input. #677 +- Fix possible memory leak when calling `Unpacker.__init__()` several times. #687 + + +# 1.1.2 + +Release Date: 2025-10-08 + +This release does not change source code. It updates only building +wheels: + +- Update Cython to v3.1.4 +- Update cibuildwheel to v3.2.0 +- Drop Python 3.8 +- Add Python 3.14 +- Add windows-arm + +# 1.1.1 + +Release Date: 2025-06-13 + +- No change from 1.1.1rc1. + +# 1.1.1rc1 + +Release Date: 2025-06-06 + +- Update Cython to 3.1.1 and cibuildwheel to 2.23.3. + +# 1.1.0 + +Release Date: 2024-09-10 + +- use `PyLong_*` instead of `PyInt_*` for compatibility with future + Cython. (#620) + +# 1.1.0rc2 + +Release Date: 2024-08-19 + +- Update Cython to 3.0.11 for better Python 3.13 support. +- Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. + +# 1.1.0rc1 + +Release Date: 2024-05-07 + +- Update Cython to 3.0.10 to reduce C warnings and future support for + Python 3.13. +- Stop using C++ mode in Cython to reduce compile error on some + compilers. +- `Packer()` has `buf_size` option to specify initial size of internal + buffer to reduce reallocation. +- The default internal buffer size of `Packer()` is reduced from 1MiB to + 256KiB to optimize for common use cases. Use `buf_size` if you are + packing large data. +- `Timestamp.to_datetime()` and `Timestamp.from_datetime()` become more + accurate by avoiding floating point calculations. (#591) +- The Cython code for `Unpacker` has been slightly rewritten for + maintainability. +- The fallback implementation of `Packer()` and `Unpacker()` now uses + keyword-only arguments to improve compatibility with the Cython + implementation. + +# 1.0.8 + +Release Date: 2024-03-01 + +- Update Cython to 3.0.8. This fixes memory leak when iterating + `Unpacker` object on Python 3.12. +- Do not include C/Cython files in binary wheels. + +# 1.0.7 + +Release Date: 2023-09-28 + +- Fix build error of extension module on Windows. (#567) +- `setup.py` doesn't skip build error of extension module. (#568) + +# 1.0.6 + +Release Date: 2023-09-21 + +> [!NOTE] +> v1.0.6 Wheels for Windows don't contain extension module. Please +> upgrade to v1.0.7 or newer. + +- Add Python 3.12 wheels (#517) +- Remove Python 2.7, 3.6, and 3.7 support + +# 1.0.5 + +Release Date: 2023-03-08 + +- Use `__BYTE_ORDER__` instead of `__BYTE_ORDER` for portability. (#513, + \#514) +- Add Python 3.11 wheels (#517) +- fallback: Fix packing multidimensional memoryview (#527) + +# 1.0.4 + +Release Date: 2022-06-03 + +- Support Python 3.11 (beta). +- Don't define `__*_ENDIAN__` macro + on Unix. by @methane in + +- Use PyFloat_Pack8() on Python 3.11a7 by @vstinner in + +- Fix Unpacker max_buffer_length handling by @methane in + + +# 1.0.3 + +Release Date: 2021-11-24 JST + +- Fix Docstring (#459) +- Fix error formatting (#463) +- Improve error message about strict_map_key (#485) + +# 1.0.2 + +- Fix year 2038 problem regression in 1.0.1. (#451) + +# 1.0.1 + +- Add Python 3.9 and linux/arm64 wheels. (#439) +- Fixed Unpacker.tell() after read_bytes() (#426) +- Fixed unpacking datetime before epoch on Windows (#433) +- Fixed fallback Packer didn't check DateTime.tzinfo (#434) + +# 1.0.0 + +Release Date: 2020-02-17 + +- Remove Python 2 support from the `msgpack/_cmsgpack`. + `msgpack/fallback` still supports Python 2. +- Remove `encoding` option from the Packer and Unpacker. +- Unpacker: The default value of `max_buffer_size` is changed to 100MiB. +- Unpacker: `strict_map_key` is True by default now. +- Unpacker: String map keys are interned. +- Drop old buffer protocol support. +- Support Timestamp type. +- Support serializing and decerializing `datetime` object with tzinfo. +- Unpacker: `Fix Unpacker.read_bytes()` in fallback implementation. + (#352) + +# 0.6.2 + +Release Date: 2019-09-20 + +- Support Python 3.8. +- Update Cython to 0.29.13 for support Python 3.8. +- Some small optimizations. + +# 0.6.1 + +Release Date: 2019-01-25 + +This release is for mitigating pain caused by v0.6.0 reduced max input +limits for security reason. + +- `unpackb(data)` configures `max_*_len` options from `len(data)`, + instead of static default sizes. +- `Unpacker(max_buffer_len=N)` configures `max_*_len` options from `N`, + instead of static default sizes. +- `max_bin_len`, `max_str_len`, and `max_ext_len` are deprecated. Since + this is minor release, it's document only deprecation. + +# 0.6.0 + +Release Date: 2018-11-30 + +This release contains some backward incompatible changes for security +reason (DoS). + +## Important changes + +- unpacker: Default value of input limits are smaller than before to + avoid DoS attack. If you need to handle large data, you need to + specify limits manually. (#319) +- Unpacker doesn't wrap underlying `ValueError` (including + `UnicodeError`) into `UnpackValueError`. If you want to catch all + exception during unpack, you need to use `try ... except Exception` + with minimum try code block. (#323, \#233) +- `PackValueError` and `PackOverflowError` are also removed. You need to + catch normal `ValueError` and `OverflowError`. (#323, \#233) +- Unpacker has `strict_map_key` option now. When it is true, only bytes + and str (unicode in Python 2) are allowed for map keys. It is + recommended to avoid hashdos. Default value of this option is False + for backward compatibility reason. But it will be changed True in 1.0. + (#296, \#334) + +## Other changes + +- Extension modules are merged. There is `msgpack._cmsgpack` instead of + `msgpack._packer` and `msgpack._unpacker`. (#314, \#328) +- Add `Unpacker.getbuffer()` method. (#320) +- unpacker: `msgpack.StackError` is raised when input data contains too + nested data. (#331) +- unpacker: `msgpack.FormatError` is raised when input data is not valid + msgpack format. (#331) + +# 0.5.6 + +- Fix fallback.Unpacker.feed() dropped unused data from buffer (#287) +- Resurrect fallback.unpack() and `unpacker.unpack()`. They were removed + at 0.5.5 but it breaks backward compatibility. (#288, #290) + +# 0.5.5 + +- Fix memory leak in pure Python Unpacker.feed() (#283) +- Fix unpack() didn't support `raw` option + (#285) + +# 0.5.4 + +- Undeprecate `unicode_errors` option. (#278) + +# 0.5.3 + +- Fixed regression when passing `unicode_errors` to Packer but not + `encoding`. (#277) + +# 0.5.2 + +- Add `raw` option to Unpacker. It is preferred way than `encoding` + option. +- Packer.pack() reset buffer on exception (#274) + +# 0.5.1 + +- Remove FutureWarning about use_bin_type option (#271) + +# 0.5.0 + +There are some deprecations. Please read changes carefully. + +## Changes + +- Drop Python 2.6 and ~3.4 support. Python 2.7 and 3.5+ are supported. +- Deprecate useless custom exceptions. Use ValueError instead of + PackValueError, Exception instead of PackException and + UnpackException, etc... See msgpack/exceptions.py +- Add *strict_types* option to packer. It can be used to serialize + subclass of builtin types. For example, when packing object which type + is subclass of dict, `default()` is called. `default()` is called for + tuple too. +- Pure Python implementation supports packing memoryview object. +- Support packing bytearray. +- Add `Unpacker.tell()`. And `write_bytes` option is deprecated. + +## Bugs fixed + +- Fixed zero length raw can't be decoded when encoding is specified. + (#236) + +# 0.4.8 + +Release Date: 2016-07-29 + +## Bugs fixed + +- Calling ext_hook with wrong length. (Only on Windows, maybe. \#203) + +# 0.4.7 + +Release Date: 2016-01-25 + +## Bugs fixed + +- Memory leak when unpack is failed + +## Changes + +- Reduce compiler warnings while building extension module +- unpack() now accepts ext_hook argument like Unpacker and unpackb() +- Update Cython version to 0.23.4 +- default function is called when integer overflow + +# 0.4.6 + +Release Date: 2015-03-13 + +## Bugs fixed + +- fallback.Unpacker: Fix Data corruption when OutOfData. This bug only + affects "Streaming unpacking." + +# 0.4.5 + +Release Date: 2015-01-25 + +## Incompatible Changes + +## Changes + +## Bugs fixed + +- Fix test failure on pytest 2.3. (by @ktdreyer) +- Fix typos in ChangeLog. (Thanks to @dmick) +- Improve README.rst (by @msabramo) + +# 0.4.4 + +Release Date: 2015-01-09 + +## Incompatible Changes + +## Changes + +## Bugs fixed + +- Fix compile error. + +# 0.4.3 + +Release Date: 2015-01-07 + +## Incompatible Changes + +## Changes + +## Bugs fixed + +- Unpacker may unpack wrong uint32 value on 32bit or LLP64 environment. + (#101) +- Build failed on Windows Python 2.7. + +# 0.4.2 + +Release Date: 2014-03-26 + +## Incompatible Changes + +## Changes + +## Bugs fixed + +- Unpacker doesn't increment refcount of ExtType hook. +- Packer raises no exception for inputs doesn't fit to msgpack format. + +# 0.4.1 + +Release Date: 2014-02-17 + +## Incompatible Changes + +## Changes + +- fallback.Unpacker.feed() supports bytearray. + +## Bugs fixed + +- Unpacker doesn't increment refcount of hooks. Hooks may be GCed while + unpacking. +- Unpacker may read unfilled internal buffer. + +# 0.4.0 + +Release Date: 2013-10-21 + +## Incompatible Changes + +- Raises TypeError instead of ValueError when packer receives + unsupported type. + +## Changes + +- Support New msgpack spec. + +# 0.3.0 + +## Incompatible Changes + +- Default value of `use_list` is `True` for now. (It was `False` for + 0.2.x) You should pass it explicitly for compatibility to 0.2.x. +- `Unpacker.unpack()` and some unpack methods now raise `OutOfData` instead of + `StopIteration`. `StopIteration` is used for iterator protocol only. + +## Changes + +- Pure Python fallback module is added. (thanks to bwesterb) +- Add `.skip()` method to `Unpacker` (thanks to jnothman) +- Add capturing feature. You can pass the writable object to + `Unpacker.unpack()` as a second parameter. +- Add `Packer.pack_array_header` and `Packer.pack_map_header`. These + methods only pack header of each type. +- Add `autoreset` option to `Packer` (default: True). Packer doesn't + return packed bytes and clear internal buffer. +- Add `Packer.pack_map_pairs`. It packs sequence of pair to map type. + +# 0.2.4 + +Release Date: 2012-12-22 + +## Bugs fixed + +- Fix SEGV when object_hook or object_pairs_hook raise Exception. (#39) + +# 0.2.3 + +Release Date: 2012-12-11 + +## Changes + +- Warn when use_list is not specified. It's default value will be + changed in 0.3. + +## Bugs fixed + +- Can't pack subclass of dict. + +# 0.2.2 + +Release Date: 2012-09-21 + +## Changes + +- Add `use_single_float` option to `Packer`. When it is true, packs + float object in single precision format. + +## Bugs fixed + +- `unpack()` didn't restores gc state when it called with gc disabled. + `unpack()` doesn't control gc now instead of restoring gc state + collectly. User can control gc state when gc cause performance issue. +- `Unpacker`'s `read_size` option didn't used. + +# 0.2.1 + +Release Date: 2012-08-20 + +## Changes + +- Add `max_buffer_size` parameter to Unpacker. It limits internal buffer + size and allows unpack data from untrusted source safely. +- Unpacker's buffer reallocation algorithm is less greedy now. It cause + performance decrease in rare case but memory efficient and don't + allocate than `max_buffer_size`. + +## Bugs fixed + +- Fix msgpack didn't work on SPARC Solaris. It was because choosing + wrong byteorder on compilation time. Use `sys.byteorder` to get + correct byte order. Very thanks to Chris Casey for giving test + environment to me. + +# 0.2.0 + +Release Date: 2012-06-27 + +## Changes + +- Drop supporting Python 2.5 and unify tests for Py2 and Py3. +- Use new version of msgpack-c. It packs correctly on big endian + platforms. +- Remove deprecated packs and unpacks API. + +## Bugs fixed + +- \#8 Packing subclass of dict raises TypeError. (Thanks to Steeve + Morin.) + +# 0.1.13 + +Release Date: 2012-04-21 + +## New + +- Don't accept subtype of list and tuple as msgpack list. (Steeve Morin) + It allows customize how it serialized with `default` argument. + +## Bugs fixed + +- Fix wrong error message. (David Wolever) +- Fix memory leak while unpacking when `object_hook` or `list_hook` is + used. (Steeve Morin) + +## Other changes + +- setup.py works on Python 2.5 (Steffen Siering) +- Optimization for serializing dict. + +# 0.1.12 + +Release Date: 2011-12-27 + +## Bugs fixed + +- Re-enable packs/unpacks removed at 0.1.11. It will be removed when 0.2 + is released. + +# 0.1.11 + +Release Date: 2011-12-26 + +## Bugs fixed + +- Include test code for Python3 to sdist. (Johan Bergström) +- Fix compilation error on MSVC. (davidgaleano) + +# 0.1.10 + +Release Date: 2011-08-22 + +## New feature + +- Add `encoding` and `unicode_errors` option to packer and unpacker. + When this option is specified, (un)packs unicode object instead of + bytes. This enables using msgpack as a replacement of json. (tailhook) + +# 0.1.9 + +Release Date: 2011-01-29 + +## New feature + +- `use_list` option is added to unpack(b) like Unpacker. (Use keyword + argument because order of parameters are different) + +## Bugs fixed + +- Fix typo. +- Add MemoryError check. + +# 0.1.8 + +Release Date: 2011-01-10 + +## New feature + +- Support `loads` and `dumps` aliases for API compatibility with + simplejson and pickle. +- Add *object_hook* and *list_hook* option to unpacker. It allows you to + hook unpacking mapping type and array type. +- Add *default* option to packer. It allows you to pack unsupported + types. +- unpacker accepts (old) buffer types. + +## Bugs fixed + +- Fix segv around `Unpacker.feed` or `Unpacker(file)`. + +# 0.1.7 + +Release Date: 2010-11-02 + +## New feature + +- Add *object_hook* and *list_hook* option to unpacker. It allows you to + hook unpacking mapping type and array type. +- Add *default* option to packer. It allows you to pack unsupported + types. +- unpacker accepts (old) buffer types. + +## Bugs fixed + +- Compilation error on win32. diff --git a/ChangeLog.rst b/ChangeLog.rst deleted file mode 100644 index 863c6b2c..00000000 --- a/ChangeLog.rst +++ /dev/null @@ -1,598 +0,0 @@ -1.1.0 -===== - -Release Date: 2024-09-10 - -* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with - future Cython. (#620) - -1.1.0rc2 -======== - -Release Date: 2024-08-19 - -* Update Cython to 3.0.11 for better Python 3.13 support. -* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. - -1.1.0rc1 -======== - -Release Date: 2024-05-07 - -* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13. -* Stop using C++ mode in Cython to reduce compile error on some compilers. -* ``Packer()`` has ``buf_size`` option to specify initial size of - internal buffer to reduce reallocation. -* The default internal buffer size of ``Packer()`` is reduced from - 1MiB to 256KiB to optimize for common use cases. Use ``buf_size`` - if you are packing large data. -* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become - more accurate by avoiding floating point calculations. (#591) -* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability. -* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only - arguments to improve compatibility with the Cython implementation. - -1.0.8 -===== - -Release Date: 2024-03-01 - -* Update Cython to 3.0.8. This fixes memory leak when iterating - ``Unpacker`` object on Python 3.12. -* Do not include C/Cython files in binary wheels. - - -1.0.7 -===== - -Release Date: 2023-09-28 - -* Fix build error of extension module on Windows. (#567) -* ``setup.py`` doesn't skip build error of extension module. (#568) - - -1.0.6 -===== - -Release Date: 2023-09-21 - -.. note:: - v1.0.6 Wheels for Windows don't contain extension module. - Please upgrade to v1.0.7 or newer. - -* Add Python 3.12 wheels (#517) -* Remove Python 2.7, 3.6, and 3.7 support - - -1.0.5 -===== - -Release Date: 2023-03-08 - -* Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514) -* Add Python 3.11 wheels (#517) -* fallback: Fix packing multidimensional memoryview (#527) - -1.0.4 -===== - -Release Date: 2022-06-03 - -* Support Python 3.11 (beta). -* Don't define `__*_ENDIAN__` macro on Unix. by @methane in https://github.com/msgpack/msgpack-python/pull/495 -* Use PyFloat_Pack8() on Python 3.11a7 by @vstinner in https://github.com/msgpack/msgpack-python/pull/499 -* Fix Unpacker max_buffer_length handling by @methane in https://github.com/msgpack/msgpack-python/pull/506 - -1.0.3 -===== - -Release Date: 2021-11-24 JST - -* Fix Docstring (#459) -* Fix error formatting (#463) -* Improve error message about strict_map_key (#485) - -1.0.2 -===== - -* Fix year 2038 problem regression in 1.0.1. (#451) - -1.0.1 -===== - -* Add Python 3.9 and linux/arm64 wheels. (#439) -* Fixed Unpacker.tell() after read_bytes() (#426) -* Fixed unpacking datetime before epoch on Windows (#433) -* Fixed fallback Packer didn't check DateTime.tzinfo (#434) - -1.0.0 -===== - -Release Date: 2020-02-17 - -* Remove Python 2 support from the ``msgpack/_cmsgpack``. - ``msgpack/fallback`` still supports Python 2. -* Remove ``encoding`` option from the Packer and Unpacker. -* Unpacker: The default value of ``max_buffer_size`` is changed to 100MiB. -* Unpacker: ``strict_map_key`` is True by default now. -* Unpacker: String map keys are interned. -* Drop old buffer protocol support. -* Support Timestamp type. -* Support serializing and decerializing ``datetime`` object - with tzinfo. -* Unpacker: ``Fix Unpacker.read_bytes()`` in fallback implementation. (#352) - - -0.6.2 -===== - -Release Date: 2019-09-20 - -* Support Python 3.8. -* Update Cython to 0.29.13 for support Python 3.8. -* Some small optimizations. - - -0.6.1 -====== - -Release Date: 2019-01-25 - -This release is for mitigating pain caused by v0.6.0 reduced max input limits -for security reason. - -* ``unpackb(data)`` configures ``max_*_len`` options from ``len(data)``, - instead of static default sizes. - -* ``Unpacker(max_buffer_len=N)`` configures ``max_*_len`` options from ``N``, - instead of static default sizes. - -* ``max_bin_len``, ``max_str_len``, and ``max_ext_len`` are deprecated. - Since this is minor release, it's document only deprecation. - - -0.6.0 -====== - -Release Date: 2018-11-30 - -This release contains some backward incompatible changes for security reason (DoS). - -Important changes ------------------ - -* unpacker: Default value of input limits are smaller than before to avoid DoS attack. - If you need to handle large data, you need to specify limits manually. (#319) - -* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into - ``UnpackValueError``. If you want to catch all exception during unpack, you need - to use ``try ... except Exception`` with minimum try code block. (#323, #233) - -* ``PackValueError`` and ``PackOverflowError`` are also removed. You need to catch - normal ``ValueError`` and ``OverflowError``. (#323, #233) - -* Unpacker has ``strict_map_key`` option now. When it is true, only bytes and str - (unicode in Python 2) are allowed for map keys. It is recommended to avoid - hashdos. Default value of this option is False for backward compatibility reason. - But it will be changed True in 1.0. (#296, #334) - -Other changes -------------- - -* Extension modules are merged. There is ``msgpack._cmsgpack`` instead of - ``msgpack._packer`` and ``msgpack._unpacker``. (#314, #328) - -* Add ``Unpacker.getbuffer()`` method. (#320) - -* unpacker: ``msgpack.StackError`` is raised when input data contains too - nested data. (#331) - -* unpacker: ``msgpack.FormatError`` is raised when input data is not valid - msgpack format. (#331) - - -0.5.6 -====== - -* Fix fallback.Unpacker.feed() dropped unused data from buffer (#287) -* Resurrect fallback.unpack() and _unpacker.unpack(). - They were removed at 0.5.5 but it breaks backward compatibility. (#288, #290) - -0.5.5 -====== - -* Fix memory leak in pure Python Unpacker.feed() (#283) -* Fix unpack() didn't support `raw` option (#285) - -0.5.4 -====== - -* Undeprecate ``unicode_errors`` option. (#278) - -0.5.3 -====== - -* Fixed regression when passing ``unicode_errors`` to Packer but not ``encoding``. (#277) - -0.5.2 -====== - -* Add ``raw`` option to Unpacker. It is preferred way than ``encoding`` option. - -* Packer.pack() reset buffer on exception (#274) - - -0.5.1 -====== - -* Remove FutureWarning about use_bin_type option (#271) - -0.5.0 -====== - -There are some deprecations. Please read changes carefully. - -Changes -------- - -* Drop Python 2.6 and ~3.4 support. Python 2.7 and 3.5+ are supported. - -* Deprecate useless custom exceptions. Use ValueError instead of PackValueError, - Exception instead of PackException and UnpackException, etc... - See msgpack/exceptions.py - -* Add *strict_types* option to packer. It can be used to serialize subclass of - builtin types. For example, when packing object which type is subclass of dict, - ``default()`` is called. ``default()`` is called for tuple too. - -* Pure Python implementation supports packing memoryview object. - -* Support packing bytearray. - -* Add ``Unpacker.tell()``. And ``write_bytes`` option is deprecated. - - -Bugs fixed ----------- - -* Fixed zero length raw can't be decoded when encoding is specified. (#236) - - -0.4.8 -===== -:release date: 2016-07-29 - -Bugs fixed ----------- - -* Calling ext_hook with wrong length. (Only on Windows, maybe. #203) - - -0.4.7 -===== -:release date: 2016-01-25 - -Bugs fixed ----------- - -* Memory leak when unpack is failed - -Changes -------- - -* Reduce compiler warnings while building extension module -* unpack() now accepts ext_hook argument like Unpacker and unpackb() -* Update Cython version to 0.23.4 -* default function is called when integer overflow - - -0.4.6 -===== -:release date: 2015-03-13 - -Bugs fixed ----------- - -* fallback.Unpacker: Fix Data corruption when OutOfData. - This bug only affects "Streaming unpacking." - - -0.4.5 -===== -:release date: 2015-01-25 - -Incompatible Changes --------------------- - -Changes -------- - -Bugs fixed ----------- - -* Fix test failure on pytest 2.3. (by @ktdreyer) -* Fix typos in ChangeLog. (Thanks to @dmick) -* Improve README.rst (by @msabramo) - - -0.4.4 -===== -:release date: 2015-01-09 - -Incompatible Changes --------------------- - -Changes -------- - -Bugs fixed ----------- - -* Fix compile error. - -0.4.3 -===== -:release date: 2015-01-07 - -Incompatible Changes --------------------- - -Changes -------- - -Bugs fixed ----------- - -* Unpacker may unpack wrong uint32 value on 32bit or LLP64 environment. (#101) -* Build failed on Windows Python 2.7. - -0.4.2 -===== -:release date: 2014-03-26 - -Incompatible Changes --------------------- - -Changes -------- - -Bugs fixed ----------- - -* Unpacker doesn't increment refcount of ExtType hook. -* Packer raises no exception for inputs doesn't fit to msgpack format. - -0.4.1 -===== -:release date: 2014-02-17 - -Incompatible Changes --------------------- - -Changes -------- - -* fallback.Unpacker.feed() supports bytearray. - -Bugs fixed ----------- - -* Unpacker doesn't increment refcount of hooks. Hooks may be GCed while unpacking. -* Unpacker may read unfilled internal buffer. - -0.4.0 -===== -:release date: 2013-10-21 - -Incompatible Changes --------------------- - -* Raises TypeError instead of ValueError when packer receives unsupported type. - -Changes -------- - -* Support New msgpack spec. - - -0.3.0 -===== - -Incompatible Changes --------------------- - -* Default value of ``use_list`` is ``True`` for now. (It was ``False`` for 0.2.x) - You should pass it explicitly for compatibility to 0.2.x. -* `Unpacker.unpack()` and some unpack methods now raise `OutOfData` instead of - `StopIteration`. `StopIteration` is used for iterator protocol only. - -Changes -------- -* Pure Python fallback module is added. (thanks to bwesterb) -* Add ``.skip()`` method to ``Unpacker`` (thanks to jnothman) -* Add capturing feature. You can pass the writable object to - ``Unpacker.unpack()`` as a second parameter. -* Add ``Packer.pack_array_header`` and ``Packer.pack_map_header``. - These methods only pack header of each type. -* Add ``autoreset`` option to ``Packer`` (default: True). - Packer doesn't return packed bytes and clear internal buffer. -* Add ``Packer.pack_map_pairs``. It packs sequence of pair to map type. - - - -0.2.4 -===== -:release date: 2012-12-22 - -Bugs fixed ----------- - -* Fix SEGV when object_hook or object_pairs_hook raise Exception. (#39) - -0.2.3 -===== -:release date: 2012-12-11 - -Changes -------- -* Warn when use_list is not specified. It's default value will be changed in 0.3. - -Bugs fixed ----------- -* Can't pack subclass of dict. - -0.2.2 -===== -:release date: 2012-09-21 - -Changes -------- -* Add ``use_single_float`` option to ``Packer``. When it is true, packs float - object in single precision format. - -Bugs fixed ----------- -* ``unpack()`` didn't restores gc state when it called with gc disabled. - ``unpack()`` doesn't control gc now instead of restoring gc state collectly. - User can control gc state when gc cause performance issue. - -* ``Unpacker``'s ``read_size`` option didn't used. - -0.2.1 -===== -:release date: 2012-08-20 - -Changes -------- -* Add ``max_buffer_size`` parameter to Unpacker. It limits internal buffer size - and allows unpack data from untrusted source safely. - -* Unpacker's buffer reallocation algorithm is less greedy now. It cause performance - decrease in rare case but memory efficient and don't allocate than ``max_buffer_size``. - -Bugs fixed ----------- -* Fix msgpack didn't work on SPARC Solaris. It was because choosing wrong byteorder - on compilation time. Use ``sys.byteorder`` to get correct byte order. - Very thanks to Chris Casey for giving test environment to me. - - -0.2.0 -===== -:release date: 2012-06-27 - -Changes -------- -* Drop supporting Python 2.5 and unify tests for Py2 and Py3. -* Use new version of msgpack-c. It packs correctly on big endian platforms. -* Remove deprecated packs and unpacks API. - -Bugs fixed ----------- -* #8 Packing subclass of dict raises TypeError. (Thanks to Steeve Morin.) - - -0.1.13 -====== -:release date: 2012-04-21 - -New ---- -* Don't accept subtype of list and tuple as msgpack list. (Steeve Morin) - It allows customize how it serialized with ``default`` argument. - -Bugs fixed ----------- -* Fix wrong error message. (David Wolever) -* Fix memory leak while unpacking when ``object_hook`` or ``list_hook`` is used. - (Steeve Morin) - -Other changes -------------- -* setup.py works on Python 2.5 (Steffen Siering) -* Optimization for serializing dict. - - -0.1.12 -====== -:release date: 2011-12-27 - -Bugs fixed ----------- - -* Re-enable packs/unpacks removed at 0.1.11. It will be removed when 0.2 is released. - - -0.1.11 -====== -:release date: 2011-12-26 - -Bugs fixed ----------- - -* Include test code for Python3 to sdist. (Johan Bergström) -* Fix compilation error on MSVC. (davidgaleano) - - -0.1.10 -====== -:release date: 2011-08-22 - -New feature ------------ -* Add ``encoding`` and ``unicode_errors`` option to packer and unpacker. - When this option is specified, (un)packs unicode object instead of bytes. - This enables using msgpack as a replacement of json. (tailhook) - - -0.1.9 -===== -:release date: 2011-01-29 - -New feature ------------ -* ``use_list`` option is added to unpack(b) like Unpacker. - (Use keyword argument because order of parameters are different) - -Bugs fixed ----------- -* Fix typo. -* Add MemoryError check. - -0.1.8 -===== -:release date: 2011-01-10 - -New feature ------------ -* Support ``loads`` and ``dumps`` aliases for API compatibility with - simplejson and pickle. - -* Add *object_hook* and *list_hook* option to unpacker. It allows you to - hook unpacking mapping type and array type. - -* Add *default* option to packer. It allows you to pack unsupported types. - -* unpacker accepts (old) buffer types. - -Bugs fixed ----------- -* Fix segv around ``Unpacker.feed`` or ``Unpacker(file)``. - - -0.1.7 -===== -:release date: 2010-11-02 - -New feature ------------ -* Add *object_hook* and *list_hook* option to unpacker. It allows you to - hook unpacking mapping type and array type. - -* Add *default* option to packer. It allows you to pack unsupported types. - -* unpacker accepts (old) buffer types. - -Bugs fixed ----------- -* Compilation error on win32. diff --git a/MANIFEST.in b/MANIFEST.in index 57d84a4c..6317706e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include setup.py include COPYING include README.md -recursive-include msgpack *.h *.c *.pyx *.cpp +recursive-include msgpack *.h *.c *.pyx recursive-include test *.py diff --git a/README.md b/README.md index 61a03c60..e66d4549 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) [![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) -## What's this +## What is this? [MessagePack](https://msgpack.org/) is an efficient binary serialization format. It lets you exchange data among multiple languages like JSON. @@ -25,9 +25,9 @@ But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. ### Windows -When you can't use a binary distribution, you need to install Visual Studio -or Windows SDK on Windows. -Without extension, using pure Python implementation on CPython runs slowly. +If you can't use a binary distribution, you need to install Visual Studio +or the Windows SDK on Windows. +Without the extension, the pure Python implementation on CPython runs slowly. ## How to use @@ -35,11 +35,11 @@ Without extension, using pure Python implementation on CPython runs slowly. ### One-shot pack & unpack Use `packb` for packing and `unpackb` for unpacking. -msgpack provides `dumps` and `loads` as an alias for compatibility with +msgpack provides `dumps` and `loads` as aliases for compatibility with `json` and `pickle`. -`pack` and `dump` packs to a file-like object. -`unpack` and `load` unpacks from a file-like object. +`pack` and `dump` pack to a file-like object. +`unpack` and `load` unpack from a file-like object. ```pycon >>> import msgpack @@ -72,8 +72,12 @@ for unpacked in unpacker: print(unpacked) ``` +> [!IMPORTANT] +> If `Unpacker.unpack()` stops with an exception other than `OutOfData`, that `Unpacker` cannot be reused. +> Create a new `Unpacker` when reading another stream. -### Packing/unpacking of custom data type + +### Packing/unpacking of custom data types It is also possible to pack/unpack custom data types. Here is an example for `datetime.datetime`. @@ -119,15 +123,15 @@ It is also possible to pack/unpack custom data types using the **ext** type. >>> import array >>> def default(obj): ... if isinstance(obj, array.array) and obj.typecode == 'd': -... return msgpack.ExtType(42, obj.tostring()) +... return msgpack.ExtType(42, obj.tobytes()) ... raise TypeError("Unknown type: %r" % (obj,)) ... >>> def ext_hook(code, data): ... if code == 42: ... a = array.array('d') -... a.fromstring(data) +... a.frombytes(data) ... return a -... return ExtType(code, data) +... return msgpack.ExtType(code, data) ... >>> data = array.array('d', [1.2, 3.4]) >>> packed = msgpack.packb(data, default=default) @@ -140,8 +144,8 @@ True ### Advanced unpacking control As an alternative to iteration, `Unpacker` objects provide `unpack`, -`skip`, `read_array_header` and `read_map_header` methods. The former two -read an entire message from the stream, respectively de-serialising and returning +`skip`, `read_array_header`, and `read_map_header` methods. The former two +read an entire message from the stream, respectively deserializing and returning the result, or ignoring it. The latter two methods return the number of elements in the upcoming container, so that each element in an array, or key-value pair in a map, can be unpacked or skipped individually. @@ -149,7 +153,7 @@ in a map, can be unpacked or skipped individually. ## Notes -### string and binary type in old msgpack spec +### String and binary types in the old MessagePack spec Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. @@ -167,7 +171,7 @@ and `raw=True` options. ### ext type -To use the **ext** type, pass `msgpack.ExtType` object to packer. +To use the **ext** type, pass a `msgpack.ExtType` object to the packer. ```pycon >>> import msgpack @@ -181,26 +185,26 @@ You can use it with `default` and `ext_hook`. See below. ### Security -To unpacking data received from unreliable source, msgpack provides +When unpacking data received from an unreliable source, msgpack provides two security options. `max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size. -It is used to limit the preallocated list size too. +It is also used to limit preallocated list sizes. `strict_map_key` (default: `True`) limits the type of map keys to bytes and str. -While msgpack spec doesn't limit the types of the map keys, -there is a risk of the hashdos. +While the MessagePack spec doesn't limit map key types, +there is a risk of a hash DoS. If you need to support other types for map keys, use `strict_map_key=False`. ### Performance tips -CPython's GC starts when growing allocated object. -This means unpacking may cause useless GC. -You can use `gc.disable()` when unpacking large message. +CPython's GC starts when the number of allocated objects grows. +This means unpacking may trigger unnecessary GC. +You can use `gc.disable()` when unpacking a large message. -List is the default sequence type of Python. -But tuple is lighter than list. +A list is the default sequence type in Python. +However, a tuple is lighter than a list. You can use `use_list=False` while unpacking when performance is important. @@ -208,7 +212,7 @@ You can use `use_list=False` while unpacking when performance is important. ### msgpack 0.5 -Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5. +The package name on PyPI was changed from `msgpack-python` to `msgpack` in 0.5. When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before `pip install -U msgpack`. @@ -218,25 +222,25 @@ When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` be * Python 2 support - * The extension module does not support Python 2 anymore. + * The extension module no longer supports Python 2. The pure Python implementation (`msgpack.fallback`) is used for Python 2. - + * msgpack 1.0.6 drops official support of Python 2.7, as pip and - GitHub Action (setup-python) no longer support Python 2.7. + GitHub Action "setup-python" no longer supports Python 2.7. * Packer * Packer uses `use_bin_type=True` by default. - Bytes are encoded in bin type in msgpack. - * The `encoding` option is removed. UTF-8 is used always. + Bytes are encoded in the bin type in MessagePack. + * The `encoding` option is removed. UTF-8 is always used. * Unpacker - * Unpacker uses `raw=False` by default. It assumes str types are valid UTF-8 string - and decode them to Python str (unicode) object. + * Unpacker uses `raw=False` by default. It assumes str values are valid UTF-8 strings + and decodes them to Python str (Unicode) objects. * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). - * Default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attack. + * The default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attacks. You need to pass `max_buffer_size=0` if you have large but safe data. - * Default value of `strict_map_key` is changed to True to avoid hashdos. - You need to pass `strict_map_key=False` if you have data which contain map keys - which type is not bytes or str. + * The default value of `strict_map_key` is changed to True to avoid hash DoS. + You need to pass `strict_map_key=False` if you have data that contain map keys + whose type is neither bytes nor str. diff --git a/docs/conf.py b/docs/conf.py index ab0ad3c8..28116cd6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,9 +12,9 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#import os -#import sys -#sys.path.insert(0, os.path.abspath('..')) +# import os +# import sys +# sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- diff --git a/msgpack/__init__.py b/msgpack/__init__.py index b6151054..eeb85afa 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -4,8 +4,8 @@ from .exceptions import * # noqa: F403 from .ext import ExtType, Timestamp -version = (1, 1, 0) -__version__ = "1.1.0" +version = (1, 2, 1) +__version__ = "1.2.1" if os.environ.get("MSGPACK_PUREPYTHON"): diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx index 1faaac3a..9680b31e 100644 --- a/msgpack/_cmsgpack.pyx +++ b/msgpack/_cmsgpack.pyx @@ -1,5 +1,6 @@ -# coding: utf-8 #cython: embedsignature=True, c_string_encoding=ascii, language_level=3 +#cython: freethreading_compatible = True +import cython from cpython.datetime cimport import_datetime, datetime_new import_datetime() diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 402b6946..277239d8 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -1,5 +1,3 @@ -# coding: utf-8 - from cpython cimport * from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact from cpython.datetime cimport ( @@ -40,7 +38,7 @@ cdef extern from "pack.h": int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1 -cdef int DEFAULT_RECURSE_LIMIT=511 +cdef int DEFAULT_RECURSE_LIMIT=1024 cdef long long ITEM_LIMIT = (2**32)-1 @@ -129,6 +127,7 @@ cdef class Packer: if self.exports > 0: raise BufferError("Existing exports of data: Packer cannot be changed") + @cython.critical_section def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, bint strict_types=False, bint datetime=False, unicode_errors=None, @@ -269,6 +268,7 @@ cdef class Packer: return ret return self._pack_inner(o, 0, nest_limit) + @cython.critical_section def pack(self, object obj): cdef int ret self._check_exports() @@ -284,13 +284,19 @@ cdef class Packer: self.pk.length = 0 return buf + @cython.critical_section def pack_ext_type(self, typecode, data): self._check_exports() if len(data) > ITEM_LIMIT: raise ValueError("ext data too large") msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + @cython.critical_section def pack_array_header(self, long long size): self._check_exports() if size > ITEM_LIMIT: @@ -301,6 +307,7 @@ cdef class Packer: self.pk.length = 0 return buf + @cython.critical_section def pack_map_header(self, long long size): self._check_exports() if size > ITEM_LIMIT: @@ -311,6 +318,7 @@ cdef class Packer: self.pk.length = 0 return buf + @cython.critical_section def pack_map_pairs(self, object pairs): """ Pack *pairs* as msgpack map type. @@ -331,6 +339,7 @@ cdef class Packer: self.pk.length = 0 return buf + @cython.critical_section def reset(self): """Reset internal buffer. @@ -339,6 +348,7 @@ cdef class Packer: self._check_exports() self.pk.length = 0 + @cython.critical_section def bytes(self): """Return internal buffer contents as bytes object""" return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) @@ -350,9 +360,11 @@ cdef class Packer: """ return memoryview(self) + @cython.critical_section def __getbuffer__(self, Py_buffer *buffer, int flags): PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) self.exports += 1 + @cython.critical_section def __releasebuffer__(self, Py_buffer *buffer): self.exports -= 1 diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 34ff3304..26fb377e 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,5 +1,3 @@ -# coding: utf-8 - from cpython cimport * cdef extern from "Python.h": ctypedef struct PyObject @@ -53,6 +51,7 @@ cdef extern from "unpack.h": execute_fn unpack_skip execute_fn read_array_header execute_fn read_map_header + void unpack_init(unpack_context* ctx) object unpack_data(unpack_context* ctx) void unpack_clear(unpack_context* ctx) @@ -131,10 +130,9 @@ cdef inline int get_data_from_buffer(object obj, PyBuffer_Release(view) # create a contiguous copy and get buffer contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, b'C') - PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) - # view must hold the only reference to contiguous, - # so memory is freed when view is released - Py_DECREF(contiguous) + if PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) == -1: + raise + buffer_len[0] = view.len buf[0] = view.buf return 1 @@ -200,6 +198,7 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, if off < buf_len: raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) return obj + unpack_clear(&ctx) if ret == 0: raise ValueError("Unpack failed: incomplete input") @@ -207,7 +206,10 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, raise FormatError elif ret == -3: raise StackError - raise ValueError("Unpack failed: error = %d" % (ret,)) + elif PyErr_Occurred(): + raise + else: + raise ValueError("Unpack failed: error = %d" % (ret,)) cdef class Unpacker: @@ -220,7 +222,7 @@ cdef class Unpacker: If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) + Used as `file_like.read(read_size)`. Must be equal to or smaller than *max_buffer_size*. :param bool use_list: If true, unpack msgpack array to Python list. @@ -317,13 +319,12 @@ cdef class Unpacker: cdef Py_ssize_t max_buffer_size cdef uint64_t stream_offset - def __cinit__(self): - self.buf = NULL - def __dealloc__(self): + unpack_clear(&self.ctx) PyMem_Free(self.buf) self.buf = NULL + @cython.critical_section def __init__(self, file_like=None, *, Py_ssize_t read_size=0, bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, @@ -336,6 +337,12 @@ cdef class Unpacker: Py_ssize_t max_ext_len=-1): cdef const char *cerr=NULL + unpack_clear(&self.ctx) + unpack_init(&self.ctx) + if self.buf != NULL: + PyMem_Free(self.buf) + self.buf = NULL + self.object_hook = object_hook self.object_pairs_hook = object_pairs_hook self.list_hook = list_hook @@ -384,6 +391,7 @@ cdef class Unpacker: max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) + @cython.critical_section def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff @@ -469,7 +477,7 @@ cdef class Unpacker: obj = unpack_data(&self.ctx) unpack_init(&self.ctx) return obj - elif ret == 0: + if ret == 0: if self.file_like is not None: self.read_from_file() continue @@ -477,13 +485,18 @@ cdef class Unpacker: raise StopIteration("No more data to unpack.") else: raise OutOfData("No more data to unpack.") - elif ret == -2: + + unpack_clear(&self.ctx) + if ret == -2: raise FormatError elif ret == -3: raise StackError + elif PyErr_Occurred(): + raise else: raise ValueError("Unpack failed: error = %d" % (ret,)) + @cython.critical_section def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" cdef Py_ssize_t nread @@ -496,6 +509,7 @@ cdef class Unpacker: self.stream_offset += nread return ret + @cython.critical_section def unpack(self): """Unpack one object @@ -503,6 +517,7 @@ cdef class Unpacker: """ return self._unpack(unpack_construct) + @cython.critical_section def skip(self): """Read and ignore one object, returning None @@ -510,6 +525,7 @@ cdef class Unpacker: """ return self._unpack(unpack_skip) + @cython.critical_section def read_array_header(self): """assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents. @@ -518,6 +534,7 @@ cdef class Unpacker: """ return self._unpack(read_array_header) + @cython.critical_section def read_map_header(self): """assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs. @@ -526,6 +543,7 @@ cdef class Unpacker: """ return self._unpack(read_map_header) + @cython.critical_section def tell(self): """Returns the current position of the Unpacker in bytes, i.e., the number of bytes that were read from the input, also the starting @@ -536,6 +554,7 @@ cdef class Unpacker: def __iter__(self): return self + @cython.critical_section def __next__(self): return self._unpack(unpack_construct, 1) diff --git a/msgpack/ext.py b/msgpack/ext.py index 9694819a..92ea4530 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -167,4 +167,4 @@ def from_datetime(dt): :rtype: Timestamp """ - return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) + return Timestamp(seconds=int(dt.timestamp() // 1), nanoseconds=dt.microsecond * 1000) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index b02e47cf..824f59d5 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -52,7 +52,7 @@ def newlist_hint(size): TYPE_BIN = 4 TYPE_EXT = 5 -DEFAULT_RECURSE_LIMIT = 511 +DEFAULT_RECURSE_LIMIT = 1024 def _check_type_strict(obj, t, type=type, tuple=tuple): @@ -137,7 +137,7 @@ class Unpacker: If specified, unpacker reads serialized data from it and `.feed()` is not usable. :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) + Used as `file_like.read(read_size)`. Must be equal to or smaller than *max_buffer_size*. :param bool use_list: If true, unpack msgpack array to Python list. @@ -328,7 +328,7 @@ def feed(self, next_bytes): self._buf_checkpoint = 0 # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython - self._buffer.extend(view) + self._buffer.extend(view if view.contiguous else view.tobytes()) view.release() def _consume(self): @@ -518,9 +518,15 @@ def _unpack(self, execute=EX_CONSTRUCT): self._unpack(EX_SKIP) return if self._object_pairs_hook is not None: - ret = self._object_pairs_hook( - (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n) - ) + + def _gen(): + for _ in range(n): + key = self._unpack(EX_CONSTRUCT) + if self._strict_map_key and type(key) not in (str, bytes): + raise ValueError("%s is not allowed for map key" % str(type(key))) + yield key, self._unpack(EX_CONSTRUCT) + + ret = self._object_pairs_hook(_gen()) else: ret = {} for _ in range(n): @@ -861,6 +867,10 @@ def pack_ext_type(self, typecode, data): self._buffer.write(b"\xc9" + struct.pack(">I", L)) self._buffer.write(struct.pack("B", typecode)) self._buffer.write(data) + if self._autoreset: + ret = self._buffer.getvalue() + self._buffer = BytesIO() + return ret def _pack_array_header(self, n): if n <= 0x0F: diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index b8959f02..e02ec324 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -551,6 +551,7 @@ static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l) */ static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uint32_t nanoseconds) { + int ret; if ((seconds >> 34) == 0) { /* seconds is unsigned and fits in 34 bits */ uint64_t data64 = ((uint64_t)nanoseconds << 34) | (uint64_t)seconds; @@ -558,26 +559,33 @@ static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uin /* no nanoseconds and seconds is 32bits or smaller. timestamp32. */ unsigned char buf[4]; uint32_t data32 = (uint32_t)data64; - msgpack_pack_ext(x, -1, 4); + ret = msgpack_pack_ext(x, -1, 4); + if (ret != 0) + return ret; + _msgpack_store32(buf, data32); - msgpack_pack_raw_body(x, buf, 4); + return msgpack_pack_raw_body(x, buf, 4); } else { /* timestamp64 */ unsigned char buf[8]; - msgpack_pack_ext(x, -1, 8); - _msgpack_store64(buf, data64); - msgpack_pack_raw_body(x, buf, 8); + ret = msgpack_pack_ext(x, -1, 8); + if (ret != 0) + return ret; + _msgpack_store64(buf, data64); + return msgpack_pack_raw_body(x, buf, 8); } } else { - /* seconds is signed or >34bits */ - unsigned char buf[12]; - _msgpack_store32(&buf[0], nanoseconds); - _msgpack_store64(&buf[4], seconds); - msgpack_pack_ext(x, -1, 12); - msgpack_pack_raw_body(x, buf, 12); + /* seconds is signed or >34bits */ + unsigned char buf[12]; + _msgpack_store32(&buf[0], nanoseconds); + _msgpack_store64(&buf[4], seconds); + ret = msgpack_pack_ext(x, -1, 12); + if (ret != 0) + return ret; + + return msgpack_pack_raw_body(x, buf, 12); } - return 0; } diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 58a2f4f5..55cfdab9 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -70,12 +70,7 @@ static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unp static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o) { - PyObject *p; - if (d > LONG_MAX) { - p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); - } else { - p = PyLong_FromLong((long)d); - } + PyObject *p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); if (!p) return -1; *o = p; @@ -103,12 +98,9 @@ static inline int unpack_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_ static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o) { - PyObject *p; - if (d > LONG_MAX || d < LONG_MIN) { - p = PyLong_FromLongLong((PY_LONG_LONG)d); - } else { - p = PyLong_FromLong((long)d); - } + PyObject *p = PyLong_FromLongLong((PY_LONG_LONG)d); + if (!p) + return -1; *o = p; return 0; } @@ -291,6 +283,7 @@ static int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timest ts->tv_sec = _msgpack_load64(int64_t, buf + 4); return 0; default: + PyErr_Format(PyExc_ValueError, "invalid timestamp data (length %u)", buflen); return -1; } } @@ -344,12 +337,18 @@ static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos else if (u->timestamp == 3) { // datetime // Calculate datetime using epoch + delta // due to limitations PyDateTime_FromTimestamp on Windows with negative timestamps + int64_t days = ts.tv_sec / (24*3600); + if (days < INT_MIN || days > INT_MAX) { + PyErr_Format(PyExc_OverflowError, + "days=%lld; too large to convert to C int", days); + return -1; + } PyObject *epoch = PyDateTimeAPI->DateTime_FromDateAndTime(1970, 1, 1, 0, 0, 0, 0, u->utc, PyDateTimeAPI->DateTimeType); if (epoch == NULL) { return -1; } - PyObject* d = PyDelta_FromDSU(ts.tv_sec/(24*3600), ts.tv_sec%(24*3600), ts.tv_nsec / 1000); + PyObject* d = PyDelta_FromDSU((int)days, ts.tv_sec%(24*3600), ts.tv_nsec / 1000); if (d == NULL) { Py_DECREF(epoch); return -1; diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h index c14a3c2b..e48d43ba 100644 --- a/msgpack/unpack_container_header.h +++ b/msgpack/unpack_container_header.h @@ -45,7 +45,10 @@ static inline int unpack_container_header(unpack_context* ctx, const char* data, PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); return -1; } - unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + + if (unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj) < 0) + return -1; + return 1; } diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index cce29e7a..ab5887ae 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -72,7 +72,14 @@ static inline PyObject* unpack_data(unpack_context* ctx) static inline void unpack_clear(unpack_context *ctx) { - Py_CLEAR(ctx->stack[0].obj); + for (unsigned int i = 0; i < ctx->top; i++) { + /* map_key holds a live reference only while waiting for the value */ + if (ctx->stack[i].ct == CT_MAP_VALUE) { + Py_CLEAR(ctx->stack[i].map_key); + } + Py_CLEAR(ctx->stack[i].obj); + } + unpack_init(ctx); } static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) @@ -192,7 +199,7 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char case 0xd5: // fixext 2 case 0xd6: // fixext 4 case 0xd7: // fixext 8 - again_fixed_trail_if_zero(ACS_EXT_VALUE, + again_fixed_trail_if_zero(ACS_EXT_VALUE, (1 << (((unsigned int)*p) & 0x03))+1, _ext_zero); case 0xd8: // fixext 16 @@ -336,6 +343,7 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char goto _header_again; case CT_MAP_VALUE: if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; } + c->map_key = NULL; if(++c->count == c->size) { obj = c->obj; if (construct_cb(_map_end)(user, &obj) < 0) { goto _failed; } @@ -398,10 +406,18 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char #undef start_container static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { - return unpack_execute(1, ctx, data, len, off); + int ret = unpack_execute(1, ctx, data, len, off); + if (ret == -1) { + unpack_clear(ctx); + } + return ret; } static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { - return unpack_execute(0, ctx, data, len, off); + int ret = unpack_execute(0, ctx, data, len, off); + if (ret == -1) { + unpack_clear(ctx); + } + return ret; } #define unpack_container_header read_array_header diff --git a/pyproject.toml b/pyproject.toml index e24f2b88..ebc6b50d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,39 +1,32 @@ [build-system] -requires = ["setuptools >= 69.5.1"] +requires = ["setuptools >= 78.1.1"] build-backend = "setuptools.build_meta" [project] name = "msgpack" dynamic = ["version"] -license = {text="Apache 2.0"} +license = "Apache-2.0" authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] description = "MessagePack serializer" readme = "README.md" keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"] -requires-python = ">=3.8" +requires-python = ">=3.10" classifiers = [ "Development Status :: 5 - Production/Stable", "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", + "Topic :: File Formats", + "Intended Audience :: Developers", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", ] +dependencies = [] [project.urls] Homepage = "https://msgpack.org/" Documentation = "https://msgpack-python.readthedocs.io/" Repository = "https://github.com/msgpack/msgpack-python/" Tracker = "https://github.com/msgpack/msgpack-python/issues" -Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" +Changelog = "https://github.com/msgpack/msgpack-python/blob/main/CHANGELOG.md" [tool.setuptools] # Do not install C/C++/Cython source files @@ -44,10 +37,16 @@ version = {attr = "msgpack.__version__"} [tool.ruff] line-length = 100 -target-version = "py38" +target-version = "py310" lint.select = [ "E", # pycodestyle "F", # Pyflakes "I", # isort #"UP", pyupgrade ] + +[dependency-groups] +dev = [ + "cython>=3.2.5", + "pytest>=9.0.3", +] diff --git a/requirements.txt b/requirements.txt index b677f068..7991e3f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,4 @@ -Cython~=3.0.11 +cython==3.2.5 +setuptools==78.1.1 +pytest +build diff --git a/test/test_buffer.py b/test/test_buffer.py index 2c5a14c5..ca097222 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -17,7 +17,7 @@ def test_unpack_bytearray(): obj = unpackb(buf, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes - assert all(type(s) == expected_type for s in obj) + assert all(type(s) is expected_type for s in obj) def test_unpack_memoryview(): @@ -26,7 +26,7 @@ def test_unpack_memoryview(): obj = unpackb(view, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes - assert all(type(s) == expected_type for s in obj) + assert all(type(s) is expected_type for s in obj) def test_packer_getbuffer(): diff --git a/test/test_except.py b/test/test_except.py index b77ac800..a3bf4675 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,10 +1,12 @@ #!/usr/bin/env python import datetime +import gc +import tracemalloc from pytest import raises -from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb +from msgpack import ExtType, FormatError, OutOfData, StackError, Unpacker, packb, unpackb class DummyException(Exception): @@ -31,6 +33,50 @@ def hook(obj): object_pairs_hook=hook, ) + up = Unpacker(object_hook=hook) + + def up_unpack(x): + up.feed(x) + return up.unpack() + + raises(DummyException, up_unpack, packb({})) + raises(DummyException, up_unpack, packb({"fizz": "buzz"})) + raises(DummyException, up_unpack, packb({"fizz": "buzz"})) + raises(DummyException, up_unpack, packb({"fizz": {"buzz": "spam"}})) + raises( + DummyException, + up_unpack, + packb({"fizz": {"buzz": "spam"}}), + ) + + +def test_raise_from_list_hook(): + def hook(lst: list) -> list: + raise DummyException + + with raises(DummyException): + unpackb(packb([1, 2, 3]), list_hook=hook) + + with raises(DummyException): + unpacker = Unpacker(list_hook=hook) + unpacker.feed(packb([1, 2, 3])) + unpacker.unpack() + + +def test_raise_from_ext_hook(): + def hook(code: int, data: bytes) -> ExtType: + raise DummyException + + packed = packb(ExtType(42, b"hello")) + + with raises(DummyException): + unpackb(packed, ext_hook=hook) + + with raises(DummyException): + unpacker = Unpacker(ext_hook=hook) + unpacker.feed(packed) + unpacker.unpack() + def test_invalidvalue(): incomplete = b"\xd9\x97#DL_" # raw8 - length=0x97 @@ -52,6 +98,39 @@ def test_invalidvalue(): unpackb(b"\x91" * 3000) # nested fixarray(len=1) +def test_no_memory_leak_on_nested_invalid_tag() -> None: + """Regression test: unpacking nested arrays containing an invalid tag must not leak objects.""" + + kwargs: dict = { + "raw": False, + "strict_map_key": False, + "max_array_len": 1 << 20, + "max_map_len": 1 << 20, + } + n = 1000 + + for depth in range(1, 15): + data = bytes([0x91] * depth + [0xC1]) + + gc.collect() + tracemalloc.start() + s1 = tracemalloc.take_snapshot() + + for _ in range(n): + try: + unpackb(data, **kwargs) + except Exception: + pass + + gc.collect() + s2 = tracemalloc.take_snapshot() + tracemalloc.stop() + + leaked = sum(s.count_diff for s in s2.compare_to(s1, "lineno") if s.count_diff > 0) + per_call = leaked / n + assert per_call < 1.0, f"depth={depth}: {per_call:.2f} leaked objects/call (expected < 1)" + + def test_strict_map_key(): valid = {"unicode": 1, b"bytes": 2} packed = packb(valid, use_bin_type=True) @@ -61,3 +140,27 @@ def test_strict_map_key(): packed = packb(invalid, use_bin_type=True) with raises(ValueError): unpackb(packed, raw=False, strict_map_key=True) + + +def test_strict_map_key_with_object_pairs_hook(): + # strict_map_key should be enforced even when object_pairs_hook is set + invalid = {42: "value"} + packed = packb(invalid, use_bin_type=True) + with raises(ValueError): + unpackb(packed, raw=False, strict_map_key=True, object_pairs_hook=list) + + # valid keys (str/bytes) should still work with object_pairs_hook + valid = {"key": "value"} + packed = packb(valid, use_bin_type=True) + result = unpackb(packed, raw=False, strict_map_key=True, object_pairs_hook=list) + assert result == [("key", "value")] + + +def test_unpacker_should_not_crash_after_exception(): + up = Unpacker() # default: strict_map_key=True + up.feed(b"\x83\x73\xc4\x00") # fixmap(3): int key (rejected) + empty bin8 + try: + up.unpack() # ValueError: int is not allowed for map key ... + except Exception: + pass + up.skip() # SIGSEGV (resumes from a corrupt parser context) diff --git a/test/test_extension.py b/test/test_extension.py index aaf0fd92..61852f15 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -6,7 +6,7 @@ def test_pack_ext_type(): def p(s): - packer = msgpack.Packer() + packer = msgpack.Packer(autoreset=False) packer.pack_ext_type(0x42, s) return packer.bytes() @@ -20,6 +20,15 @@ def p(s): assert p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 # ext 32 +def test_pack_ext_type_autoreset(): + packer = msgpack.Packer() + + assert packer.pack_ext_type(0x42, b"A") == b"\xd4\x42A" + assert packer.bytes() == b"" + assert packer.pack_ext_type(0x42, b"ABC") == b"\xc7\x03\x42ABC" + assert packer.bytes() == b"" + + def test_unpack_ext_type(): def check(b, expected): assert msgpack.unpackb(b) == expected diff --git a/test/test_limits.py b/test/test_limits.py index 9b92b4d9..bb554d68 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -1,4 +1,6 @@ #!/usr/bin/env python +import sysconfig + import pytest from msgpack import ( @@ -153,6 +155,35 @@ def test_auto_max_array_len(): unpacker.unpack() +# Skip on free-threaded CPython builds because this test depends on recursion behavior. +IS_FREE_THREADED_BUILD = bool(sysconfig.get_config_var("Py_GIL_DISABLED")) + + +@pytest.mark.skipif(IS_FREE_THREADED_BUILD, reason="Skipped on free-threaded build") +def test_nest_limit_1024(): + import sys + + # Build a list nested 1024 levels deep + d = None + for _ in range(1024): + d = [d] + + # Temporarily raise Python's recursion limit so packing 1024 levels succeeds + old_limit = sys.getrecursionlimit() + sys.setrecursionlimit(max(old_limit, 30000)) + try: + packed = packb(d) + result = unpackb(packed) + finally: + sys.setrecursionlimit(old_limit) + + # Verify structure iteratively to avoid hitting C-level recursion limit in == + for _ in range(1024): + assert isinstance(result, list) and len(result) == 1 + result = result[0] + assert result is None + + def test_auto_max_map_len(): # len(packed) == 6 -> max_map_len == 3 packed = b"\xde\x00\x04zzz" diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 0a2a6f53..3f6a39d4 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -97,3 +97,15 @@ def test_multidim_memoryview(): data = view.cast(view.format, (3, 2)) packed = packb(data) assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00" + + +def test_unpack_noncontiguous_memoryview(): + # Use a multi-byte value so the padded stride-2 view is non-contiguous. + packed = packb(2**32) + padded = bytearray() + for byte in packed: + padded.append(byte) + padded.append(0) + noncont = memoryview(bytes(padded))[::2] + assert not noncont.c_contiguous + assert unpackb(noncont) == 2**32 diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 831141a1..7c8e3e83 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -103,6 +103,13 @@ def test_timestamp_datetime(): assert Timestamp.from_datetime(ts).to_datetime() == ts + # Regression test: pre-epoch fractional seconds must floor toward -inf. + pre_epoch = datetime.datetime(1969, 12, 31, 23, 59, 59, 500000, tzinfo=utc) + ts_pre_epoch = Timestamp.from_datetime(pre_epoch) + assert ts_pre_epoch.seconds == -1 + assert ts_pre_epoch.nanoseconds == 500000000 + assert ts_pre_epoch.to_datetime() == pre_epoch + def test_unpack_datetime(): t = Timestamp(42, 14) @@ -169,3 +176,9 @@ def test_pack_datetime_without_tzinfo(): packed = msgpack.packb(dt, datetime=True) unpacked = msgpack.unpackb(packed, timestamp=3) assert unpacked == dt + + +def test_too_large_timestamp(): + # When timestamp64 is too large, conversion to datetime fails due to int64 -> int32 conversion. + # https://github.com/msgpack/msgpack-python/issues/696 + print(msgpack.unpackb(b"\xd7\xff" + b"\x00" * 8, timestamp=3)) diff --git a/test/test_unpack.py b/test/test_unpack.py index b17c3c53..705c16a6 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -1,4 +1,6 @@ +import gc import sys +import weakref from io import BytesIO from pytest import mark, raises @@ -87,3 +89,37 @@ def test_unpacker_tell_read_bytes(): assert obj == unp assert pos == unpacker.tell() assert unpacker.read_bytes(n) == raw + + +@mark.skipif( + Unpacker.__module__ == "msgpack.fallback", + reason="specific to C extension reinit leak", +) +def test_unpacker_reinit_clears_partial_state(): + refs = [] + + class Marker: + pass + + def hook(code, data): + obj = Marker() + refs.append(weakref.ref(obj)) + return obj + + unpacker = Unpacker(ext_hook=hook, strict_map_key=False) + # Keep parser state mid-map with a live key object from ext_hook. + # Encodes: [ {ExtType(1, b"a"): } ]. + unpacker.feed(b"\x91\x81\xd4\x01a") + with raises(OutOfData): + unpacker.unpack() + assert len(refs) == 1 + assert refs[0]() is not None + + unpacker.__init__() + gc.collect() + assert refs[0]() is None + with raises(OutOfData): + unpacker.unpack() + + unpacker.feed(packb({"a": 1})) + assert unpacker.unpack() == {"a": 1} diff --git a/test/uneeded_test_multithreading.py b/test/uneeded_test_multithreading.py new file mode 100644 index 00000000..6694fdc6 --- /dev/null +++ b/test/uneeded_test_multithreading.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import threading +from concurrent.futures import ThreadPoolExecutor + +from msgpack import Packer + + +def run_threaded( + func, + num_threads=8, + pass_count=False, + pass_barrier=False, + outer_iterations=1, + prepare_args=None, +): + """Runs a function many times in parallel""" + for _ in range(outer_iterations): + with ThreadPoolExecutor(max_workers=num_threads) as tpe: + if prepare_args is None: + args = [] + else: + args = prepare_args() + if pass_barrier: + barrier = threading.Barrier(num_threads) + args.append(barrier) + if pass_count: + all_args = [(func, i, *args) for i in range(num_threads)] + else: + all_args = [(func, *args) for i in range(num_threads)] + try: + futures = [] + for arg in all_args: + futures.append(tpe.submit(*arg)) + finally: + if len(futures) < num_threads and pass_barrier: + barrier.abort() + for f in futures: + f.result() + + +def test_multithread_packing(): + output = [] + test_data = "abcd" * 10_000_000 + packer = Packer() + + def closure(b): + data = packer.pack(test_data) + output.append(data) + b.wait() + + run_threaded(closure, num_threads=10, pass_barrier=True, pass_count=False)