diff --git a/.flake8 b/.flake8 index 0dede3f1d..959e9a22c 100644 --- a/.flake8 +++ b/.flake8 @@ -11,6 +11,6 @@ extend-ignore = # E203 whitespace before ':' (to be compatible with black) per-file-ignores = scripts/create_pickle.py:F403,F405, - graphblas/tests/*.py:T201, + graphblas/tests/*.py:T201,B043, graphblas/core/ss/matrix.py:SIM113, graphblas/**/__init__.py:F401, diff --git a/.github/workflows/debug.yml b/.github/workflows/debug.yml index 6c2b202b1..45401c704 100644 --- a/.github/workflows/debug.yml +++ b/.github/workflows/debug.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - pyver: [3.10] + pyver: [3.11] testopts: - "--blocking" # - "--non-blocking --record --runslow" @@ -26,7 +26,7 @@ jobs: # - "conda-forge" steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 persist-credentials: false diff --git a/.github/workflows/imports.yml b/.github/workflows/imports.yml index e24d0d4db..4be4926c7 100644 --- a/.github/workflows/imports.yml +++ b/.github/workflows/imports.yml @@ -14,7 +14,7 @@ jobs: pyver: ${{ steps.pyver.outputs.selected }} steps: - name: RNG for os - uses: ddradar/choose-random-action@v3.0.0 + uses: ddradar/choose-random-action@v4.1.0 id: os with: contents: | @@ -26,14 +26,14 @@ jobs: 1 1 - name: RNG for Python version - uses: ddradar/choose-random-action@v3.0.0 + uses: ddradar/choose-random-action@v4.1.0 id: pyver with: contents: | - 3.10 3.11 3.12 3.13 + 3.14 weights: | 1 1 @@ -45,14 +45,14 @@ jobs: # runs-on: ${{ matrix.os }} # strategy: # matrix: - # python-version: ["3.10", "3.11", "3.12", "3.13"] + # python-version: ["3.11", "3.12", "3.13", "3.14"] # os: ["ubuntu-latest", "macos-latest", "windows-latest"] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 persist-credentials: false - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: ${{ needs.rngs.outputs.pyver }} # python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 655a576e5..8bceeeadd 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -16,11 +16,11 @@ jobs: name: pre-commit-hooks runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 persist-credentials: false - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: - python-version: "3.10" + python-version: "3.11" - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index 32926c5c8..ad9284f66 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -8,20 +8,19 @@ on: jobs: build_and_deploy: runs-on: ubuntu-latest - if: github.repository == 'python-graphblas/python-graphblas' defaults: run: shell: bash -l {0} steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 persist-credentials: false - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: - python-version: "3.10" + python-version: "3.11" - name: Install build dependencies run: | python -m pip install --upgrade pip @@ -36,7 +35,8 @@ jobs: - name: Check with twine run: python -m twine check --strict dist/* - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@v1.12.4 + uses: pypa/gh-action-pypi-publish@v1.13.0 + if: startsWith(github.ref, 'refs/tags/') && github.repository == 'python-graphblas/python-graphblas' with: user: __token__ password: ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml index bfc17834b..d31e8c534 100644 --- a/.github/workflows/test_and_build.yml +++ b/.github/workflows/test_and_build.yml @@ -50,7 +50,7 @@ jobs: backend: ${{ steps.backend.outputs.selected }} steps: - name: RNG for mapnumpy - uses: ddradar/choose-random-action@v3.0.0 + uses: ddradar/choose-random-action@v4.1.0 id: mapnumpy with: contents: | @@ -64,7 +64,7 @@ jobs: 1 1 - name: RNG for backend - uses: ddradar/choose-random-action@v3.0.0 + uses: ddradar/choose-random-action@v4.1.0 id: backend with: contents: | @@ -93,35 +93,50 @@ jobs: os: ["ubuntu-latest", "macos-latest", "windows-latest"] slowtask: ["pytest_normal", "pytest_bizarro", "notebooks"] # repeat: [1, 2, 3] # For stress testing + include: + # Additional architectures: run pytest_normal only to limit CI cost + - os: macos-15-intel # macOS x86_64 (Intel) + slowtask: pytest_normal + - os: ubuntu-24.04-arm # Linux aarch64 + slowtask: pytest_normal + # Uncomment these temporarily for stress testing + # - os: macos-15-intel # macOS x86_64 (Intel) + # slowtask: pytest_bizarro + # - os: ubuntu-24.04-arm # Linux aarch64 + # slowtask: pytest_bizarro + # - os: macos-15-intel # macOS x86_64 (Intel) + # slowtask: notebooks + # - os: ubuntu-24.04-arm # Linux aarch64 + # slowtask: notebooks env: # Wheels on OS X come with an OpenMP that conflicts with OpenMP from conda-forge. # Setting this is a workaround. KMP_DUPLICATE_LIB_OK: ${{ contains(matrix.os, 'macos') && 'TRUE' || 'FALSE' }} steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 persist-credentials: false - name: RNG for Python version - uses: ddradar/choose-random-action@v3.0.0 + uses: ddradar/choose-random-action@v4.1.0 id: pyver with: # We should support major Python versions for at least 36 months as per SPEC 0 # We may be able to support pypy if anybody asks for it # 3.9.16 0_73_pypy contents: | - 3.10 3.11 3.12 3.13 + 3.14 weights: | 1 1 1 1 - name: RNG for source of python-suitesparse-graphblas - uses: ddradar/choose-random-action@v3.0.0 + uses: ddradar/choose-random-action@v4.1.0 id: sourcetype with: # Weights must be natural numbers, so set weights to very large to skip one @@ -149,164 +164,25 @@ jobs: auto-activate-base: false - name: Update env run: | - # Install dependencies based on the needs of the job. - # Don't panic! This may look scary at a glance, but each line makes sense. - # - # First let's randomly get versions of dependencies to install. - # Consider removing old versions when they become problematic or very old (>=2 years). + # Pick random compatible dependency versions using the version picker script. + # See scripts/ci_pick_versions.py for version pools and constraint logic. + pyver=$(python -c "v='${{ steps.pyver.outputs.selected }}'.split('.');print(f'{v[0]}.{v[1]}')") + eval "$(python scripts/ci_pick_versions.py --python "$pyver" --source ${{ steps.sourcetype.outputs.selected }})" - # Randomly choosing versions of dependencies based on Python version works surprisingly well... - if [[ ${{ startsWith(steps.pyver.outputs.selected, '3.10') }} == true ]]; then - nxver=$(python -c 'import random ; print(random.choice(["=2.8", "=3.0", "=3.1", "=3.2", "=3.3", "=3.4", ""]))') - npver=$(python -c 'import random ; print(random.choice(["=1.24", "=1.25", "=1.26", "=2.0", "=2.1", "=2.2", ""]))') - spver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=1.11", "=1.12", "=1.13", "=1.14", "=1.15", ""]))') - pdver=$(python -c 'import random ; print(random.choice(["=1.5", "=2.0", "=2.1", "=2.2", ""]))') - akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0", "=2.1", "=2.2", "=2.3", "=2.4", "=2.5", "=2.6", "=2.7", ""]))') - fmmver=$(python -c 'import random ; print(random.choice(["=1.4", "=1.5", "=1.6", "=1.7", ""]))') - yamlver=$(python -c 'import random ; print(random.choice(["=5.4", "=6.0", ""]))') - sparsever=$(python -c 'import random ; print(random.choice(["=0.14", "=0.15", ""]))') - elif [[ ${{ startsWith(steps.pyver.outputs.selected, '3.11') }} == true ]]; then - nxver=$(python -c 'import random ; print(random.choice(["=2.8", "=3.0", "=3.1", "=3.2", "=3.3", "=3.4", ""]))') - npver=$(python -c 'import random ; print(random.choice(["=1.24", "=1.25", "=1.26", "=2.0", "=2.1", "=2.2", ""]))') - spver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=1.11", "=1.12", "=1.13", "=1.14", "=1.15", ""]))') - pdver=$(python -c 'import random ; print(random.choice(["=1.5", "=2.0", "=2.1", "=2.2", ""]))') - akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0", "=2.1", "=2.2", "=2.3", "=2.4", "=2.5", "=2.6", "=2.7", ""]))') - fmmver=$(python -c 'import random ; print(random.choice(["=1.4", "=1.5", "=1.6", "=1.7", ""]))') - yamlver=$(python -c 'import random ; print(random.choice(["=5.4", "=6.0", ""]))') - sparsever=$(python -c 'import random ; print(random.choice(["=0.14", "=0.15", ""]))') - elif [[ ${{ startsWith(steps.pyver.outputs.selected, '3.12') }} == true ]]; then - nxver=$(python -c 'import random ; print(random.choice(["=3.2", "=3.3", "=3.4", ""]))') - npver=$(python -c 'import random ; print(random.choice(["=1.26", "=2.0", "=2.1", "=2.2", ""]))') - spver=$(python -c 'import random ; print(random.choice(["=1.11", "=1.12", "=1.13", "=1.14", "=1.15", ""]))') - pdver=$(python -c 'import random ; print(random.choice(["=2.1", "=2.2", ""]))') - akver=$(python -c 'import random ; print(random.choice(["=2.4", "=2.5", "=2.6", "=2.7", ""]))') - fmmver=$(python -c 'import random ; print(random.choice(["=1.7", ""]))') - yamlver=$(python -c 'import random ; print(random.choice(["=6.0", ""]))') - sparsever=$(python -c 'import random ; print(random.choice(["=0.14", "=0.15", ""]))') - else # Python 3.13 - nxver=$(python -c 'import random ; print(random.choice(["=3.4", ""]))') - npver=$(python -c 'import random ; print(random.choice(["=2.1", "=2.2", ""]))') - spver=$(python -c 'import random ; print(random.choice(["=1.14", "=1.15", ""]))') - pdver=$(python -c 'import random ; print(random.choice(["=2.2", ""]))') - akver=$(python -c 'import random ; print(random.choice(["=2.7", ""]))') - fmmver=NA # Not yet supported - yamlver=$(python -c 'import random ; print(random.choice(["=6.0", ""]))') - sparsever=NA # Not yet supported - fi - - # But there may be edge cases of incompatibility we need to handle (more handled below) - if [[ ${{ steps.sourcetype.outputs.selected }} == "source" ]]; then - # TODO: there are currently issues with some numpy versions when - # installing python-suitesparse-grphblas from source. - npver="" - spver="" - pdver="" - fi - - # We can have a tight coupling with python-suitesparse-graphblas. - # That is, we don't need to support versions of it that are two years old. - # But, it's still useful for us to test with different versions! + # Set up psg conda package variable (conda-forge installs via conda, others via pip) psg="" - if [[ ${{ steps.sourcetype.outputs.selected}} == "upstream" ]] ; then - # Upstream needs to build with numpy 2 - psgver="" - if [[ ${{ startsWith(steps.pyver.outputs.selected, '3.13') }} == true ]]; then - npver=$(python -c 'import random ; print(random.choice(["=2.1", "=2.2", ""]))') - else - npver=$(python -c 'import random ; print(random.choice(["=2.0", "=2.1", "=2.2", ""]))') - fi - elif [[ ${{ startsWith(steps.pyver.outputs.selected, '3.13') }} == true ]] ; then - if [[ ${{ steps.sourcetype.outputs.selected}} == "conda-forge" ]] ; then - psgver=$(python -c 'import random ; print(random.choice(["=9.3.1.0", ""]))') - psg=python-suitesparse-graphblas${psgver} - else - psgver=$(python -c 'import random ; print(random.choice(["==9.3.1.0", ""]))') - fi - elif [[ ${{ startsWith(steps.pyver.outputs.selected, '3.12') }} == true ]] ; then - if [[ ${{ steps.sourcetype.outputs.selected}} == "conda-forge" ]] ; then - if [[ $npver == =1.* ]] ; then - psgver=$(python -c 'import random ; print(random.choice(["=8.2.0.1", "=8.2.1.0"]))') - else - psgver=$(python -c 'import random ; print(random.choice(["=9.3.1.0", ""]))') - fi - psg=python-suitesparse-graphblas${psgver} - else - if [[ $npver == =1.* ]] ; then - psgver=$(python -c 'import random ; print(random.choice(["==8.2.0.1", "==8.2.1.0"]))') - else - psgver=$(python -c 'import random ; print(random.choice(["==9.3.1.0", ""]))') - fi - fi - # python-suitsparse-graphblas support is the same for Python 3.10 and 3.11 - elif [[ ${{ steps.sourcetype.outputs.selected}} == "conda-forge" ]] ; then - if [[ $npver == =1.* ]] ; then - psgver=$(python -c 'import random ; print(random.choice(["=7.4.0", "=7.4.1", "=7.4.2", "=7.4.3.0", "=7.4.3.1", "=7.4.3.2", "=8.0.2.1", "=8.2.0.1", "=8.2.1.0"]))') - else - psgver=$(python -c 'import random ; print(random.choice(["=9.3.1.0", ""]))') - fi + if [[ ${{ steps.sourcetype.outputs.selected}} == "conda-forge" && -n "$psgver" ]] ; then psg=python-suitesparse-graphblas${psgver} - elif [[ ${{ steps.sourcetype.outputs.selected}} == "wheel" ]] ; then - if [[ $npver == =1.* ]] ; then - psgver=$(python -c 'import random ; print(random.choice(["==7.4.3.2", "==8.0.2.1", "==8.2.0.1", "==8.2.1.0"]))') - else - psgver=$(python -c 'import random ; print(random.choice(["==9.3.1.0", ""]))') - fi - elif [[ ${{ steps.sourcetype.outputs.selected}} == "source" ]] ; then - # These should be exact versions - if [[ $npver == =1.* ]] ; then - psgver=$(python -c 'import random ; print(random.choice(["==7.4.0.0", "==7.4.1.0", "==7.4.2.0", "==7.4.3.0", "==7.4.3.1", "==7.4.3.2", "==8.0.2.1", "==8.2.0.1", "==8.2.1.0"]))') - else - psgver=$(python -c 'import random ; print(random.choice(["==9.3.1.0", ""]))') - fi fi - # Numba is tightly coupled to numpy versions - if [[ ${npver} == "=1.26" ]] ; then - numbaver=$(python -c 'import random ; print(random.choice(["=0.58", "=0.59", "=0.60", "=0.61", ""]))') - if [[ ${spver} == "=1.9" ]] ; then - spver=$(python -c 'import random ; print(random.choice(["=1.10", "=1.11", ""]))') - fi - elif [[ ${npver} == "=1.25" ]] ; then - numbaver=$(python -c 'import random ; print(random.choice(["=0.58", "=0.59", "=0.60", "=0.61", ""]))') - elif [[ ${npver} == "=1.24" || ${{ startsWith(steps.pyver.outputs.selected, '3.11') }} == true ]] ; then - numbaver=$(python -c 'import random ; print(random.choice(["=0.57", "=0.58", "=0.59", "=0.60", "=0.61", ""]))') - else - numbaver="" - fi - # Only numba >=0.59 support Python 3.12 - if [[ ${{ startsWith(steps.pyver.outputs.selected, '3.12') }} == true ]] ; then - numbaver=$(python -c 'import random ; print(random.choice(["=0.59", "=0.60", "=0.61", ""]))') - fi - - # Handle NumPy 2 - if [[ $npver != =1.* ]] ; then - # Only pandas >=2.2.2 supports NumPy 2 - pdver=$(python -c 'import random ; print(random.choice(["=2.2", ""]))') - - # Only awkward >=2.6.3 supports NumPy 2 - if [[ ${{ startsWith(steps.pyver.outputs.selected, '3.13') }} == true ]] ; then - akver=$(python -c 'import random ; print(random.choice(["=2.7", ""]))') - else - akver=$(python -c 'import random ; print(random.choice(["=2.6", "=2.7", ""]))') - fi - - # Only scipy >=1.13 supports NumPy 2 - if [[ $spver == "=1.9" || $spver == "=1.10" || $spver == "=1.11" || $spver == "=1.12" ]] ; then - spver="=1.13" - fi - fi - - fmm=fast_matrix_market${fmmver} + # Decide whether to install numba and sparse. + # Random skip: pypy always, 20% on Windows, 40% on other OSes (unless notebooks). awkward=awkward${akver} - - # Don't install numba and sparse for some versions - if [[ ${{ contains(steps.pyver.outputs.selected, 'pypy') || - startsWith(steps.pyver.outputs.selected, '3.14') }} == true || + if [[ ${{ contains(steps.pyver.outputs.selected, 'pypy') }} == true || ( ${{ matrix.slowtask != 'notebooks'}} == true && ( ( ${{ matrix.os == 'windows-latest' }} == true && $(python -c 'import random ; print(random.random() < .2)') == True ) || ( ${{ matrix.os == 'windows-latest' }} == false && $(python -c 'import random ; print(random.random() < .4)') == True ))) ]] then - # Some packages aren't available for pypy or Python 3.13; randomly otherwise (if not running notebooks) echo "skipping numba" numba="" numbaver=NA @@ -315,36 +191,23 @@ jobs: if [[ ${{ contains(steps.pyver.outputs.selected, 'pypy') }} ]]; then awkward="" akver=NA - fmm="" - fmmver=NA - # Be more flexible until we determine what versions are supported by pypy npver="" spver="" pdver="" yamlver="" fi - elif [[ ${npver} == =2.* ]] ; then - # Don't install numba for unsupported versions of numpy + elif [[ ${numbaver} == "NA" ]] ; then + # Version picker already determined numba is incompatible numba="" - numbaver=NA sparse="" sparsever=NA else numba=numba${numbaver} - sparse=sparse${sparsever} - fi - - # sparse does not yet support Python 3.13 - if [[ ${{ startsWith(steps.pyver.outputs.selected, '3.13') }} == true ]] ; then - sparse="" - sparsever=NA - fi - # fast_matrix_market does not yet support Python 3.13 or osx-arm64 - if [[ ${{ startsWith(steps.pyver.outputs.selected, '3.13') }} == true || - ${{ matrix.os == 'macos-latest' }} == true ]] - then - fmm="" - fmmver=NA + if [[ $sparsever == "NA" ]] ; then + sparse="" + else + sparse=sparse${sparsever} + fi fi echo "versions: np${npver} sp${spver} pd${pdver} ak${akver} nx${nxver} numba${numbaver} yaml${yamlver} sparse${sparsever} psg${psgver}" @@ -353,11 +216,11 @@ jobs: $(command -v mamba || command -v conda) install -c nodefaults \ packaging pytest coverage pytest-randomly cffi donfig tomli c-compiler make \ pyyaml${yamlver} ${sparse} pandas${pdver} scipy${spver} numpy${npver} ${awkward} \ - networkx${nxver} ${numba} ${fmm} ${psg} \ + networkx${nxver} ${numba} ${psg} \ ${{ matrix.slowtask == 'pytest_bizarro' && 'black' || '' }} \ ${{ matrix.slowtask == 'notebooks' && 'matplotlib nbconvert jupyter "ipython>=7" drawsvg' || '' }} \ ${{ steps.sourcetype.outputs.selected == 'upstream' && 'cython' || '' }} \ - ${{ steps.sourcetype.outputs.selected != 'wheel' && '"graphblas>=7.4,<9.4"' || '' }} \ + ${{ steps.sourcetype.outputs.selected != 'wheel' && '"graphblas>=7.4"' || '' }} \ ${{ contains(steps.pyver.outputs.selected, 'pypy') && 'pypy' || '' }} \ ${{ matrix.os == 'windows-latest' && 'cmake' || 'm4' }} \ # ${{ matrix.os != 'windows-latest' && 'pytest-forked' || '' }} # to investigate crashes @@ -387,10 +250,8 @@ jobs: (cd .. pytest --pyargs suitesparse_graphblas -s -k test_print_jit_config || true pytest -v --pyargs suitesparse_graphblas || true) - - name: Print platform and sysconfig variables - run: | - python -c "import platform ; print(platform.uname())" - python -c "import pprint, sysconfig ; pprint.pprint(sysconfig.get_config_vars())" + - name: JIT diagnostics + run: python scripts/jit_diagnostics.py || true - name: Unit tests run: | A=${{ needs.rngs.outputs.mapnumpy == 'A' || '' }} ; B=${{ needs.rngs.outputs.mapnumpy == 'B' || '' }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 43e28b8fe..20b632c30 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,20 +16,23 @@ default_language_version: python: python3 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-added-large-files - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-illegal-windows-names - id: check-merge-conflict - # - id: check-symlinks - id: check-ast + - id: check-json # No json files yet - id: check-toml - id: check-yaml - id: check-executables-have-shebangs - id: check-vcs-permalinks + - id: debug-statements - id: destroyed-symlinks - id: detect-private-key - - id: debug-statements + - id: forbid-submodules - id: end-of-file-fixer exclude_types: [svg] - id: mixed-line-ending @@ -37,114 +40,142 @@ repos: - id: name-tests-test args: ["--pytest-test-first"] - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.23 + rev: v0.25 hooks: - id: validate-pyproject name: Validate pyproject.toml # I don't yet trust ruff to do what autoflake does - repo: https://github.com/PyCQA/autoflake - rev: v2.3.1 + rev: v2.3.3 hooks: - id: autoflake args: [--in-place] # We can probably remove `isort` if we come to trust `ruff --fix`, # but we'll need to figure out the configuration to do this in `ruff` - repo: https://github.com/pycqa/isort - rev: 6.0.0 + rev: 8.0.1 hooks: - id: isort # Let's keep `pyupgrade` even though `ruff --fix` probably does most of it - repo: https://github.com/asottile/pyupgrade - rev: v3.19.1 + rev: v3.21.2 hooks: - id: pyupgrade - args: [--py310-plus] + args: [--py311-plus] - repo: https://github.com/MarcoGorelli/auto-walrus - rev: 0.3.4 + rev: 0.4.1 hooks: - id: auto-walrus args: [--line-length, "100"] - - repo: https://github.com/psf/black - rev: 25.1.0 + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 26.3.1 hooks: - id: black - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.6 + rev: v0.15.7 hooks: - - id: ruff + - id: ruff-check args: [--fix-only, --show-fixes] # Let's keep `flake8` even though `ruff` does much of the same. # `flake8-bugbear` and `flake8-simplify` have caught things missed by `ruff`. - repo: https://github.com/PyCQA/flake8 - rev: 7.1.2 + rev: 7.3.0 hooks: - id: flake8 args: ["--config=.flake8"] additional_dependencies: &flake8_dependencies # These versions need updated manually - - flake8==7.1.2 - - flake8-bugbear==24.12.12 - - flake8-simplify==0.21.0 + - flake8==7.3.0 + - flake8-bugbear==25.11.29 + - flake8-simplify==0.30.0 - repo: https://github.com/codespell-project/codespell - rev: v2.4.1 + rev: v2.4.2 hooks: - id: codespell - types_or: [python, rst, markdown] - additional_dependencies: [tomli] + types_or: [python, markdown, rst, toml, yaml] + additional_dependencies: + - tomli; python_version<'3.11' files: ^(graphblas|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.6 + rev: v0.15.7 hooks: - - id: ruff + - id: ruff-check + # - id: ruff-format # Prefer black, but may temporarily uncomment this to see - repo: https://github.com/sphinx-contrib/sphinx-lint - rev: v1.0.0 + rev: v1.0.2 hooks: - id: sphinx-lint args: [--enable, all, "--disable=line-too-long,leaked-markup"] # `pyroma` may help keep our package standards up to date if best practices change. # This is probably a "low value" check though and safe to remove if we want faster pre-commit. - repo: https://github.com/regebro/pyroma - rev: "4.2" + rev: "5.0.1" hooks: - id: pyroma args: [-n, "10", .] - repo: https://github.com/shellcheck-py/shellcheck-py - rev: "v0.10.0.1" + rev: "v0.11.0.1" hooks: - id: shellcheck - repo: https://github.com/rbubley/mirrors-prettier - rev: v3.5.1 + rev: v3.8.1 hooks: - id: prettier + args: [--prose-wrap=preserve] + - repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 + hooks: + - id: rst-backticks + - id: rst-directive-colons + - id: rst-inline-touching-normal + - id: python-check-blanket-noqa + - id: python-check-blanket-type-ignore + - id: python-no-eval + - id: python-no-log-warn + - id: text-unicode-replacement-char - repo: https://github.com/ComPWA/taplo-pre-commit rev: v0.9.3 hooks: - id: taplo-format - repo: https://github.com/rhysd/actionlint - rev: v1.7.7 + rev: v1.7.11 hooks: - id: actionlint - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.31.1 + rev: 0.37.0 hooks: - id: check-dependabot - id: check-github-workflows - id: check-readthedocs - repo: https://github.com/adrienverge/yamllint - rev: v1.35.1 + rev: v1.38.0 hooks: - id: yamllint - - repo: https://github.com/woodruffw/zizmor-pre-commit - rev: v1.3.1 - hooks: - - id: zizmor + # - repo: https://github.com/woodruffw/zizmor-pre-commit + # rev: v1.14.2 + # hooks: + # - id: zizmor - repo: meta hooks: - id: check-hooks-apply - id: check-useless-excludes - repo: local hooks: + - id: disallow-caps + name: Disallow improper capitalization + language: pygrep + entry: PyBind|Numpy\>|Cmake|CCache|Github|PyTest|\|PyLint + exclude: (.pre-commit-config.yaml|docs/pages/guides/style\.md)$ + - id: disallow-words + name: Disallow certain words + language: pygrep + entry: "[Ff]alsey" + exclude: .pre-commit-config.yaml$ + - id: disallow-bad-permalinks + name: Disallow _ in permalinks + language: pygrep + entry: "^permalink:.*_.*" # Add `--hook-stage manual` to pre-commit command to run (very slow) # It's probably better (and faster!) to simply run `pylint graphblas/some/file.py` - id: pylint @@ -156,7 +187,7 @@ repos: args: [graphblas/] pass_filenames: false - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: no-commit-to-branch # no commit directly to main # diff --git a/README.md b/README.md index 1080314c7..7b157c979 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ For algorithms, see - **SuiteSparse:GraphBLAS User Guide:** [https://github.com/DrTimothyAldenDavis/GraphBLAS/raw/stable/Doc/GraphBLAS_UserGuide.pdf](https://github.com/DrTimothyAldenDavis/GraphBLAS/raw/stable/Doc/GraphBLAS_UserGuide.pdf) - **Source:** [https://github.com/python-graphblas/python-graphblas](https://github.com/python-graphblas/python-graphblas) - **Bug reports:** [https://github.com/python-graphblas/python-graphblas/issues](https://github.com/python-graphblas/python-graphblas/issues) -- **Github discussions:** [https://github.com/python-graphblas/python-graphblas/discussions](https://github.com/python-graphblas/python-graphblas/discussions) +- **GitHub discussions:** [https://github.com/python-graphblas/python-graphblas/discussions](https://github.com/python-graphblas/python-graphblas/discussions) - **Weekly community call:** [python-graphblas#247](https://github.com/python-graphblas/python-graphblas/issues/247) or [https://scientific-python.org/calendars/](https://scientific-python.org/calendars/) - **Chat via Discord:** [https://discord.com/invite/vur45CbwMz](https://discord.com/invite/vur45CbwMz) in the [#graphblas channel](https://discord.com/channels/786703927705862175/1024732940233605190) @@ -58,8 +58,7 @@ The following are not required by python-graphblas, but may be needed for certai - `pandas` – required for nicer `__repr__`; - `matplotlib` – required for basic plotting of graphs; - `scipy` – used in `io` module to read/write `scipy.sparse` format; -- `networkx` – used in `io` module to interface with `networkx` graphs; -- `fast-matrix-market` - for faster read/write of Matrix Market files with `gb.io.mmread` and `gb.io.mmwrite`. +- `networkx` – used in `io` module to interface with `networkx` graphs. ## Description diff --git a/dev-requirements.txt b/dev-requirements.txt index a281672ec..c6b586369 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -6,7 +6,7 @@ pyyaml pandas # For I/O awkward -fast_matrix_market +# fast_matrix_market is deprecated (no longer maintained; last supported Python is 3.12) networkx scipy sparse diff --git a/docs/conf.py b/docs/conf.py index 283f6d047..da8d0dc67 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,7 +19,7 @@ # -- Project information ----------------------------------------------------- project = "python-graphblas" -copyright = "2020-2023, Anaconda, Inc. and contributors" +copyright = "2020-2026, Anaconda, Inc. and contributors" author = "Anaconda, Inc" # The full version, including alpha/beta/rc tags diff --git a/docs/env.yml b/docs/env.yml index 78a50afbe..caec812d6 100644 --- a/docs/env.yml +++ b/docs/env.yml @@ -3,7 +3,7 @@ channels: - conda-forge - nodefaults dependencies: - - python=3.10 + - python=3.11 - pip # python-graphblas dependencies - donfig diff --git a/docs/getting_started/index.rst b/docs/getting_started/index.rst index 3726131d2..0c02b6194 100644 --- a/docs/getting_started/index.rst +++ b/docs/getting_started/index.rst @@ -34,7 +34,6 @@ to work. - `matplotlib `__ -- required for basic plotting of graphs - `scipy `__ -- used in ``io`` module to read/write ``scipy.sparse`` format - `networkx `__ -- used in ``io`` module to interface with networkx graphs - - `fast-matrix-market `__ -- for faster read/write of Matrix Market files with ``gb.io.mmread`` and ``gb.io.mmwrite`` GraphBLAS Fundamentals ---------------------- diff --git a/docs/user_guide/io.rst b/docs/user_guide/io.rst index f27b40bd3..8ce80184e 100644 --- a/docs/user_guide/io.rst +++ b/docs/user_guide/io.rst @@ -146,5 +146,3 @@ and ``gb.io.mmwrite()`` to write a Matrix to a Matrix Market file. These names match the equivalent functions in `scipy.sparse `_. ``scipy`` is required to be installed to read Matrix Market files. -If ``fast_matrix_market`` is installed, it will be used by default for -`much better performance `_. diff --git a/graphblas/__init__.py b/graphblas/__init__.py index 63110eeeb..86759f570 100644 --- a/graphblas/__init__.py +++ b/graphblas/__init__.py @@ -49,6 +49,7 @@ def get_config(): "core", "dtypes", "exceptions", + "indexbinary", "indexunary", "io", "monoid", diff --git a/graphblas/core/base.py b/graphblas/core/base.py index 24a49ba1a..15f66bc2f 100644 --- a/graphblas/core/base.py +++ b/graphblas/core/base.py @@ -266,28 +266,40 @@ def __or__(self, other): from .infix import _ewise_infix_expr, _ewise_mult_expr_types if isinstance(other, _ewise_mult_expr_types): - raise TypeError("XXX") + raise TypeError( + "Cannot mix `|` (ewise_add) with `&` (ewise_mult) expressions due to Python" + " operator precedence. Use explicit method calls instead." + ) return _ewise_infix_expr(self, other, method="ewise_add", within="__or__") def __ror__(self, other): from .infix import _ewise_infix_expr, _ewise_mult_expr_types if isinstance(other, _ewise_mult_expr_types): - raise TypeError("XXX") + raise TypeError( + "Cannot mix `|` (ewise_add) with `&` (ewise_mult) expressions due to Python" + " operator precedence. Use explicit method calls instead." + ) return _ewise_infix_expr(other, self, method="ewise_add", within="__ror__") def __and__(self, other): from .infix import _ewise_add_expr_types, _ewise_infix_expr if isinstance(other, _ewise_add_expr_types): - raise TypeError("XXX") + raise TypeError( + "Cannot mix `&` (ewise_mult) with `|` (ewise_add) expressions due to Python" + " operator precedence. Use explicit method calls instead." + ) return _ewise_infix_expr(self, other, method="ewise_mult", within="__and__") def __rand__(self, other): from .infix import _ewise_add_expr_types, _ewise_infix_expr if isinstance(other, _ewise_add_expr_types): - raise TypeError("XXX") + raise TypeError( + "Cannot mix `&` (ewise_mult) with `|` (ewise_add) expressions due to Python" + " operator precedence. Use explicit method calls instead." + ) return _ewise_infix_expr(other, self, method="ewise_mult", within="__rand__") def __matmul__(self, other): diff --git a/graphblas/core/expr.py b/graphblas/core/expr.py index efec2db5f..47ff18a0b 100644 --- a/graphblas/core/expr.py +++ b/graphblas/core/expr.py @@ -281,6 +281,8 @@ def __lshift__(self, expr): def __eq__(self, other): raise TypeError(f"__eq__ not defined for objects of type {type(self)}.") + __hash__ = None + def __bool__(self): raise TypeError(f"__bool__ not defined for objects of type {type(self)}.") @@ -473,6 +475,8 @@ def update(self, expr): def __eq__(self, other): raise TypeError(f"__eq__ not defined for objects of type {type(self)}.") + __hash__ = None + def __bool__(self): raise TypeError(f"__bool__ not defined for objects of type {type(self)}.") diff --git a/graphblas/core/formatting.py b/graphblas/core/formatting.py index 0b6252101..5fe9b6972 100644 --- a/graphblas/core/formatting.py +++ b/graphblas/core/formatting.py @@ -119,7 +119,7 @@ """ -def _update_matrix_dataframe(df, matrix, rows, row_offset, columns, column_offset, *, mask=None): +def _update_matrix_array(arr, matrix, rows, row_offset, columns, column_offset, *, mask=None): if rows is None and columns is None: if mask is None: submatrix = matrix @@ -167,13 +167,17 @@ def _update_matrix_dataframe(df, matrix, rows, row_offset, columns, column_offse np_type = submatrix.dtype.np_type if submatrix.dtype._is_udt and np_type.subdtype is not None: vals = vals.tolist() - df.values[rows, cols] = vals + if isinstance(vals, np.ndarray) and vals.dtype.names is not None: + # Structured array: convert numpy.void elements to tuples for consistent display + arr[rows, cols] = [tuple(v) for v in vals] + else: + arr[rows, cols] = vals if np.issubdtype(np_type, np.inexact): nulls = np.isnan(vals) - df.values[rows[nulls], cols[nulls]] = "nan" + arr[rows[nulls], cols[nulls]] = "nan" -def _update_vector_dataframe(df, vector, columns, column_offset, *, mask=None): +def _update_vector_array(arr, vector, columns, column_offset, *, mask=None): if columns is None: if mask is None: subvector = vector @@ -205,9 +209,13 @@ def _update_vector_dataframe(df, vector, columns, column_offset, *, mask=None): np_type = subvector.dtype.np_type if subvector.dtype._is_udt and np_type.subdtype is not None: vals = vals.tolist() - df.values[0, cols] = vals + if isinstance(vals, np.ndarray) and vals.dtype.names is not None: + # Structured array: convert numpy.void elements to tuples for consistent display + arr[0, cols] = [tuple(v) for v in vals] + else: + arr[0, cols] = vals if np.issubdtype(np_type, np.inexact): - df.values[0, cols[np.isnan(vals)]] = "nan" + arr[0, cols[np.isnan(vals)]] = "nan" def _get_max_columns(): @@ -244,11 +252,11 @@ def _get_matrix_dataframe(matrix, max_rows, min_rows, max_columns, *, mask=None) max_columns = _get_max_columns() rows, row_groups = _get_chunk(matrix._nrows, min_rows, max_rows) columns, column_groups = _get_chunk(matrix._ncols, max_columns, max_columns) - df = pd.DataFrame(columns=columns, index=rows) + arr = np.full((len(rows), len(columns)), np.nan, dtype=object) for row_group, row_offset in row_groups: for column_group, column_offset in column_groups: - _update_matrix_dataframe( - df, + _update_matrix_array( + arr, matrix, row_group, row_offset, @@ -256,6 +264,7 @@ def _get_matrix_dataframe(matrix, max_rows, min_rows, max_columns, *, mask=None) column_offset, mask=mask, ) + df = pd.DataFrame(arr, columns=columns, index=rows) if ( (mask is None or mask.structure) and df.shape != matrix.shape @@ -306,9 +315,10 @@ def _get_vector_dataframe(vector, max_rows, min_rows, max_columns, *, mask=None) if max_columns is None: # pragma: no branch max_columns = _get_max_columns() columns, column_groups = _get_chunk(vector._size, max_columns, max_columns) - df = pd.DataFrame(columns=columns, index=[""]) + arr = np.full((1, len(columns)), np.nan, dtype=object) for column_group, column_offset in column_groups: - _update_vector_dataframe(df, vector, column_group, column_offset, mask=mask) + _update_vector_array(arr, vector, column_group, column_offset, mask=mask) + df = pd.DataFrame(arr, columns=columns, index=[""]) if ( (mask is None or mask.structure) and df.size != vector._size diff --git a/graphblas/core/infix.py b/graphblas/core/infix.py index 24c109639..73b3b91b5 100644 --- a/graphblas/core/infix.py +++ b/graphblas/core/infix.py @@ -70,6 +70,8 @@ def dup(self, dtype=None, *, clear=False, is_cscalar=False, name=None, **opts): def is_grbscalar(self): return not self.is_cscalar + __hash__ = None + # Begin auto-generated code: Scalar __and__ = wrapdoc(Scalar.__and__)(property(automethods.__and__)) __array__ = wrapdoc(Scalar.__array__)(property(automethods.__array__)) @@ -134,10 +136,16 @@ class ScalarEwiseAddExpr(ScalarInfixExpr): # Don't allow e.g. `plus(x | y & z)` def __and__(self, other): - raise TypeError("XXX") + raise TypeError( + "Cannot mix `&` (ewise_mult) into an `|` (ewise_add) infix chain due to Python" + " operator precedence. Use explicit method calls instead." + ) def __rand__(self, other): - raise TypeError("XXX") + raise TypeError( + "Cannot mix `&` (ewise_mult) into an `|` (ewise_add) infix chain due to Python" + " operator precedence. Use explicit method calls instead." + ) class ScalarEwiseMultExpr(ScalarInfixExpr): @@ -155,10 +163,16 @@ class ScalarEwiseMultExpr(ScalarInfixExpr): # Don't allow e.g. `plus(x | y & z)` def __or__(self, other): - raise TypeError("XXX") + raise TypeError( + "Cannot mix `|` (ewise_add) into an `&` (ewise_mult) infix chain due to Python" + " operator precedence. Use explicit method calls instead." + ) def __ror__(self, other): - raise TypeError("XXX") + raise TypeError( + "Cannot mix `|` (ewise_add) into an `&` (ewise_mult) infix chain due to Python" + " operator precedence. Use explicit method calls instead." + ) class ScalarMatMulExpr(ScalarInfixExpr): diff --git a/graphblas/core/mask.py b/graphblas/core/mask.py index 3bda2188a..4a8412a60 100644 --- a/graphblas/core/mask.py +++ b/graphblas/core/mask.py @@ -18,6 +18,8 @@ def __init__(self, mask): def __eq__(self, other): raise TypeError(f"__eq__ not defined for objects of type {type(self)}.") + __hash__ = None + def __bool__(self): raise TypeError(f"__bool__ not defined for objects of type {type(self)}.") diff --git a/graphblas/core/matrix.py b/graphblas/core/matrix.py index bf20cc953..698bf7b4f 100644 --- a/graphblas/core/matrix.py +++ b/graphblas/core/matrix.py @@ -636,7 +636,7 @@ def build(self, rows, columns, values, *, dup_op=None, clear=False, nrows=None, # TODO: accept `dtype` keyword to match the dtype of `values`? rows = ints_to_numpy_buffer(rows, np.uint64, name="row indices") columns = ints_to_numpy_buffer(columns, np.uint64, name="column indices") - values, dtype = values_to_numpy_buffer(values, self.dtype) + values, _dtype = values_to_numpy_buffer(values, self.dtype) n = values.shape[0] if rows.size != n or columns.size != n: raise ValueError( @@ -3979,6 +3979,7 @@ def to_dicts(self, order="rowwise"): _extract_element = Matrix._extract_element _prep_for_extract = Matrix._prep_for_extract __eq__ = Matrix.__eq__ + __hash__ = None __bool__ = Matrix.__bool__ __getitem__ = Matrix.__getitem__ __contains__ = Matrix.__contains__ diff --git a/graphblas/core/operator/__init__.py b/graphblas/core/operator/__init__.py index d59c835b3..f40de919b 100644 --- a/graphblas/core/operator/__init__.py +++ b/graphblas/core/operator/__init__.py @@ -1,5 +1,6 @@ from .base import UNKNOWN_OPCLASS, OpBase, OpPath, ParameterizedUdf, TypedOpBase, find_opclass from .binary import BinaryOp, ParameterizedBinaryOp +from .indexbinary import IndexBinaryOp, ParameterizedIndexBinaryOp from .indexunary import IndexUnaryOp, ParameterizedIndexUnaryOp from .monoid import Monoid, ParameterizedMonoid from .select import ParameterizedSelectOp, SelectOp diff --git a/graphblas/core/operator/indexbinary.py b/graphblas/core/operator/indexbinary.py new file mode 100644 index 000000000..cb07e0eaa --- /dev/null +++ b/graphblas/core/operator/indexbinary.py @@ -0,0 +1,507 @@ +import inspect +from types import FunctionType + +from ... import _STANDARD_OPERATOR_NAMES, indexbinary +from ...dtypes import BOOL, INT8, UINT64, lookup_dtype +from ...exceptions import UdfParseError, check_status_carg +from .. import _has_numba, ffi, lib +from ..dtypes import _sample_values +from .base import OpBase, ParameterizedUdf, TypedOpBase, _deserialize_parameterized + +_has_idxbinop = hasattr(lib, "GxB_IndexBinaryOp_new") + +if _has_numba: + import numba + +ffi_new = ffi.new + + +class _BoundIndexBinaryOp(TypedOpBase): + """A BinaryOp created by binding a theta value to an IndexBinaryOp.""" + + __slots__ = () + opclass = "BinaryOp" + + +class TypedBuiltinIndexBinaryOp(TypedOpBase): + __slots__ = () + opclass = "IndexBinaryOp" + + def __call__(self, theta=None): + """Bind a theta value to create a BinaryOp that can be used in operations. + + Parameters + ---------- + theta : scalar, optional + The theta parameter to bind. Default is 0 (False). + + Returns + ------- + TypedOpBase + A BinaryOp created from this IndexBinaryOp with the given theta. + + """ + from ..scalar import Scalar + + if theta is None: + theta = False + if not isinstance(theta, Scalar): + theta = Scalar.from_value(theta, is_cscalar=False, name="") # pragma: is_grbscalar + elif theta._is_cscalar: + # fmt: off + val = theta.value + theta = Scalar.from_value(val, is_cscalar=False, name="") # pragma: is_grbscalar + # fmt: on + new_binop = ffi_new("GrB_BinaryOp*") + check_status_carg( + lib.GxB_BinaryOp_new_IndexOp(new_binop, self.gb_obj, theta._carg), + "BinaryOp", + new_binop[0], + ) + rv = _BoundIndexBinaryOp.__new__(_BoundIndexBinaryOp) + rv.parent = self.parent + rv.name = self.name + rv.type = self.type + rv.return_type = self.return_type + rv.gb_obj = new_binop[0] + rv.gb_name = f"{self.name}_bound" + rv._type2 = self._type2 + return rv + + @property + def thunk_type(self): + return self.type if self._type2 is None else self._type2 + + +class TypedUserIndexBinaryOp(TypedOpBase): + __slots__ = () + opclass = "IndexBinaryOp" + + def __init__(self, parent, name, type_, return_type, gb_obj, dtype2=None): + super().__init__(parent, name, type_, return_type, gb_obj, f"{name}_{type_}", dtype2=dtype2) + + @property + def orig_func(self): + return self.parent.orig_func + + @property + def _numba_func(self): + return self.parent._numba_func + + thunk_type = TypedBuiltinIndexBinaryOp.thunk_type + + def __call__(self, theta=None): + return TypedBuiltinIndexBinaryOp.__call__(self, theta) + + __call__.__doc__ = TypedBuiltinIndexBinaryOp.__call__.__doc__ + + +class ParameterizedIndexBinaryOp(ParameterizedUdf): + __slots__ = "func", "__signature__", "_is_udt" + + def __init__(self, name, func, *, anonymous=False, is_udt=False): + self.func = func + self.__signature__ = inspect.signature(func) + self._is_udt = is_udt + if name is None: + name = getattr(func, "__name__", name) + super().__init__(name, anonymous) + + def _call(self, *args, **kwargs): + idxbinop = self.func(*args, **kwargs) + idxbinop._parameterized_info = (self, args, kwargs) + return IndexBinaryOp.register_anonymous(idxbinop, self.name, is_udt=self._is_udt) + + def __reduce__(self): + name = f"indexbinary.{self.name}" + if not self._anonymous and name in _STANDARD_OPERATOR_NAMES: + return name + return (self._deserialize, (self.name, self.func, self._anonymous)) + + @staticmethod + def _deserialize(name, func, anonymous): + if anonymous: + return IndexBinaryOp.register_anonymous(func, name, parameterized=True) + if (rv := IndexBinaryOp._find(name)) is not None: + return rv + return IndexBinaryOp.register_new(name, func, parameterized=True) + + +class IndexBinaryOp(OpBase): + """Takes two inputs with their indices, plus a thunk, and returns one output. + + The function has the signature ``f(x, ix, jx, y, iy, jy, theta) -> z``, + where ``ix, jx`` are the row/column indices of ``x`` and ``iy, jy`` are + the row/column indices of ``y``. + + An IndexBinaryOp can be converted to a BinaryOp by binding a theta value, + which makes it usable in any operation that accepts a BinaryOp (eWiseMult, + eWiseAdd, mxm, etc.). + + IndexBinaryOps are located in the ``graphblas.indexbinary`` namespace. + + There are no built-in IndexBinaryOps; all are user-defined. + """ + + __slots__ = "orig_func", "_is_udt", "_numba_func" + _module = indexbinary + _modname = "indexbinary" + _custom_dtype = None + _typed_class = TypedBuiltinIndexBinaryOp + _typed_user_class = TypedUserIndexBinaryOp + # No built-in IndexBinaryOps; no parse config needed + _parse_config = { + "trim_from_front": 4, + "num_underscores": 1, + "re_exprs": [], + } + + @classmethod + def _build(cls, name, func, *, is_udt=False, anonymous=False): + if not _has_idxbinop: + raise RuntimeError( + "IndexBinaryOp requires SuiteSparse:GraphBLAS 9.4+ " + "(python-suitesparse-graphblas 9.3.1+)" + ) + if not isinstance(func, FunctionType): + raise TypeError(f"UDF argument must be a function, not {type(func)}") + if name is None: + name = getattr(func, "__name__", "") + success = False + indexbinary_udf = numba.njit(func) + new_type_obj = cls( + name, func, anonymous=anonymous, is_udt=is_udt, numba_func=indexbinary_udf + ) + return_types = {} + nt = numba.types + if not is_udt: + for type_ in _sample_values: + sig = ( + type_.numba_type, + UINT64.numba_type, + UINT64.numba_type, + type_.numba_type, + UINT64.numba_type, + UINT64.numba_type, + type_.numba_type, + ) + try: + indexbinary_udf.compile(sig) + except numba.TypingError: + continue + ret_type = lookup_dtype(indexbinary_udf.overloads[sig].signature.return_type) + if ret_type != type_ and ( + ("INT" in ret_type.name and "INT" in type_.name) + or ("FP" in ret_type.name and "FP" in type_.name) + or ("FC" in ret_type.name and "FC" in type_.name) + or ( + type_ == UINT64 + and ret_type.name == "FP64" + and return_types.get(lookup_dtype("INT64")) == lookup_dtype("INT64") + ) + ): + ret_type = type_ + elif type_ == BOOL and ret_type.name == "INT64" and return_types.get(INT8) == INT8: + ret_type = INT8 + + # Numba is unable to handle BOOL correctly right now + input_type = INT8 if type_ == BOOL else type_ + return_type = INT8 if ret_type == BOOL else ret_type + + # Build wrapper: z = f(x, ix, jx, y, iy, jy, theta) + # C signature: void(z*, x*, ix, jx, y*, iy, jy, theta*) + wrapper_sig = nt.void( + nt.CPointer(return_type.numba_type), + nt.CPointer(input_type.numba_type), + UINT64.numba_type, + UINT64.numba_type, + nt.CPointer(input_type.numba_type), + UINT64.numba_type, + UINT64.numba_type, + nt.CPointer(input_type.numba_type), + ) + + if type_ == BOOL: + if ret_type == BOOL: + + def indexbinary_wrapper( + z, x, ix, jx, y, iy, jy, theta + ): # pragma: no cover (numba) + z[0] = bool( + indexbinary_udf( + bool(x[0]), ix, jx, bool(y[0]), iy, jy, bool(theta[0]) + ) + ) + + else: + + def indexbinary_wrapper( + z, x, ix, jx, y, iy, jy, theta + ): # pragma: no cover (numba) + z[0] = indexbinary_udf( + bool(x[0]), ix, jx, bool(y[0]), iy, jy, bool(theta[0]) + ) + + elif ret_type == BOOL: + + def indexbinary_wrapper( + z, x, ix, jx, y, iy, jy, theta + ): # pragma: no cover (numba) + z[0] = bool(indexbinary_udf(x[0], ix, jx, y[0], iy, jy, theta[0])) + + else: + + def indexbinary_wrapper( + z, x, ix, jx, y, iy, jy, theta + ): # pragma: no cover (numba) + z[0] = indexbinary_udf(x[0], ix, jx, y[0], iy, jy, theta[0]) + + indexbinary_wrapper = numba.cfunc(wrapper_sig, nopython=True)(indexbinary_wrapper) + new_idxbinop = ffi_new("GxB_IndexBinaryOp*") + check_status_carg( + lib.GxB_IndexBinaryOp_new( + new_idxbinop, + indexbinary_wrapper.cffi, + ret_type.gb_obj, + type_.gb_obj, + type_.gb_obj, + type_.gb_obj, + ffi_new("char[]", name.encode()), + ffi.NULL, + ), + "IndexBinaryOp", + new_idxbinop[0], + ) + op = cls._typed_user_class(new_type_obj, name, type_, ret_type, new_idxbinop[0]) + new_type_obj._add(op) + success = True + return_types[type_] = ret_type + if success or is_udt: + return new_type_obj + raise UdfParseError("Unable to parse function using Numba") + + def _compile_udt(self, dtype, dtype2): + if not _has_idxbinop: + raise RuntimeError( + "IndexBinaryOp requires SuiteSparse:GraphBLAS 9.4+ " + "(python-suitesparse-graphblas 9.3.1+)" + ) + if dtype2 is None: + dtype2 = dtype + dtypes = (dtype, dtype2) + if dtypes in self._udt_types: + return self._udt_ops[dtypes] + if self._numba_func is None: + raise KeyError(f"{self.name} does not work with {dtypes} types") + + numba_func = self._numba_func + sig = ( + dtype.numba_type, + UINT64.numba_type, + UINT64.numba_type, + dtype2.numba_type, + UINT64.numba_type, + UINT64.numba_type, + dtype2.numba_type, + ) + numba_func.compile(sig) + ret_type = lookup_dtype(numba_func.overloads[sig].signature.return_type) + indexbinary_wrapper, wrapper_sig = _get_udt_wrapper_indexbinary( + numba_func, ret_type, dtype, dtype2 + ) + + indexbinary_wrapper = numba.cfunc(wrapper_sig, nopython=True)(indexbinary_wrapper) + new_idxbinop = ffi_new("GxB_IndexBinaryOp*") + check_status_carg( + lib.GxB_IndexBinaryOp_new( + new_idxbinop, + indexbinary_wrapper.cffi, + ret_type._carg, + dtype._carg, + dtype2._carg, + dtype2._carg, + ffi_new("char[]", self.name.encode()), + ffi.NULL, + ), + "IndexBinaryOp", + new_idxbinop[0], + ) + op = TypedUserIndexBinaryOp( + self, + self.name, + dtype, + ret_type, + new_idxbinop[0], + dtype2=dtype2, + ) + self._udt_types[dtypes] = ret_type + self._udt_ops[dtypes] = op + return op + + @classmethod + def register_anonymous(cls, func, name=None, *, parameterized=False, is_udt=False): + """Register an IndexBinaryOp without registering it in the ``indexbinary`` namespace. + + Because it is not registered in the namespace, the name is optional. + + Parameters + ---------- + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes seven input parameters--``(x, ix, jx, y, iy, jy, theta)``-- + where ``x`` and ``y`` are element values, ``ix, jx`` and ``iy, jy`` + are their row/column indices (int64), and ``theta`` is a scalar parameter. + name : str, optional + The name of the operator. This *does not* show up as ``gb.indexbinary.{name}``. + parameterized : bool, default False + When True, create a parameterized user-defined operator, which means + additional parameters can be "baked into" the operator when used. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + + Returns + ------- + IndexBinaryOp or ParameterizedIndexBinaryOp + + """ + cls._check_supports_udf("register_anonymous") + if parameterized: + return ParameterizedIndexBinaryOp(name, func, anonymous=True, is_udt=is_udt) + return cls._build(name, func, anonymous=True, is_udt=is_udt) + + @classmethod + def register_new(cls, name, func, *, parameterized=False, is_udt=False, lazy=False): + """Register a new IndexBinaryOp and save it to ``graphblas.indexbinary`` namespace. + + Parameters + ---------- + name : str + The name of the operator. This will show up as ``gb.indexbinary.{name}``. + func : FunctionType + The function to compile. For all current backends, this must be able + to be compiled with ``numba.njit``. + ``func`` takes seven input parameters--``(x, ix, jx, y, iy, jy, theta)``-- + where ``x`` and ``y`` are element values, ``ix, jx`` and ``iy, jy`` + are their row/column indices (int64), and ``theta`` is a scalar parameter. + parameterized : bool, default False + When True, create a parameterized user-defined operator. + is_udt : bool, default False + Whether the operator is intended to operate on user-defined types. + lazy : bool, default False + If True, delay compilation until the operator is used. + + Examples + -------- + >>> gb.indexbinary.register_new("index_dist", lambda x, ix, jx, y, iy, jy, t: abs(ix - iy)) + + """ + cls._check_supports_udf("register_new") + module, funcname = cls._remove_nesting(name) + if lazy: + module._delayed[funcname] = ( + cls.register_new, + {"name": name, "func": func, "parameterized": parameterized}, + ) + elif parameterized: + idxbinop = ParameterizedIndexBinaryOp(name, func, is_udt=is_udt) + setattr(module, funcname, idxbinop) + else: + idxbinop = cls._build(name, func, is_udt=is_udt) + setattr(module, funcname, idxbinop) + + if not cls._initialized: # pragma: no cover (safety) + _STANDARD_OPERATOR_NAMES.add(f"{cls._modname}.{name}") + if not lazy: + return idxbinop + + @classmethod + def _initialize(cls): + if cls._initialized: + return + super()._initialize(include_in_ops=False) + # No built-in IndexBinaryOps to register + cls._initialized = True + + def __init__(self, name, func=None, *, anonymous=False, is_udt=False, numba_func=None): + super().__init__(name, anonymous=anonymous) + self.orig_func = func + self._numba_func = numba_func + self._is_udt = is_udt + if is_udt: + self._udt_types = {} # {(dtype, dtype2): DataType} + self._udt_ops = {} # {(dtype, dtype2): TypedUserIndexBinaryOp} + + def __reduce__(self): + if self._anonymous: + if hasattr(self.orig_func, "_parameterized_info"): + return (_deserialize_parameterized, self.orig_func._parameterized_info) + return (self.register_anonymous, (self.orig_func, self.name)) + if (name := f"indexbinary.{self.name}") in _STANDARD_OPERATOR_NAMES: + return name + return (self._deserialize, (self.name, self.orig_func)) + + def __call__(self, theta=None, dtype=None): + """Bind a theta value to create a BinaryOp. + + Parameters + ---------- + theta : scalar, optional + The theta parameter to bind. Default is 0 (False). + dtype : dtype, optional + The dtype to use. If not provided, it will be inferred from theta. + + Returns + ------- + TypedOpBase + A BinaryOp created from this IndexBinaryOp with the given theta. + + """ + from ...dtypes import lookup_dtype as _lookup_dtype + from ..scalar import Scalar + + if theta is None: + theta = False + if not isinstance(theta, Scalar): + theta = Scalar.from_value(theta, is_cscalar=False, name="") # pragma: is_grbscalar + elif theta._is_cscalar: + # fmt: off + val = theta.value + theta = Scalar.from_value(val, is_cscalar=False, name="") # pragma: is_grbscalar + # fmt: on + if dtype is None: + dtype = theta.dtype + else: + dtype = _lookup_dtype(dtype) + typed_op = self[dtype] + return typed_op(theta) + + +def _get_udt_wrapper_indexbinary(numba_func, return_type, dtype, dtype2): + """Build a wrapper function for UDT IndexBinaryOp: z = f(x, ix, jx, y, iy, jy, theta).""" + nt = numba.types + ztype = INT8 if return_type == BOOL else return_type + xtype = INT8 if dtype == BOOL else dtype + ytype = INT8 if dtype2 == BOOL else dtype2 + + wrapper_sig = nt.void( + nt.CPointer(ztype.numba_type), + nt.CPointer(xtype.numba_type), + UINT64.numba_type, + UINT64.numba_type, + nt.CPointer(ytype.numba_type), + UINT64.numba_type, + UINT64.numba_type, + nt.CPointer(ytype.numba_type), + ) + + d = {"numba": numba, "numba_func": numba_func} + xderef = "bool(x_ptr[0])" if dtype == BOOL else "x_ptr[0]" + yderef = "bool(y_ptr[0])" if dtype2 == BOOL else "y_ptr[0]" + tderef = "bool(t_ptr[0])" if dtype2 == BOOL else "t_ptr[0]" + call = f"numba_func({xderef}, ix, jx, {yderef}, iy, jy, {tderef})" + if return_type == BOOL: + call = f"bool({call})" + text = f"def wrapper(z_ptr, x_ptr, ix, jx, y_ptr, iy, jy, t_ptr):\n z_ptr[0] = {call}\n" + exec(text, d) # noqa: S102 + return d["wrapper"], wrapper_sig diff --git a/graphblas/core/operator/utils.py b/graphblas/core/operator/utils.py index 1442a9b5e..edd06b878 100644 --- a/graphblas/core/operator/utils.py +++ b/graphblas/core/operator/utils.py @@ -1,6 +1,17 @@ from types import BuiltinFunctionType, FunctionType, ModuleType -from ... import backend, binary, config, indexunary, monoid, op, select, semiring, unary +from ... import ( + backend, + binary, + config, + indexbinary, + indexunary, + monoid, + op, + select, + semiring, + unary, +) from ...dtypes import UINT64, lookup_dtype, unify from ..expr import InfixExprBase from .base import ( @@ -14,6 +25,7 @@ find_opclass, ) from .binary import BinaryOp +from .indexbinary import IndexBinaryOp from .indexunary import IndexUnaryOp from .monoid import Monoid from .select import SelectOp @@ -261,6 +273,8 @@ def get_semiring(monoid, binaryop, name=None): unary.register_new = UnaryOp.register_new unary.register_anonymous = UnaryOp.register_anonymous +indexbinary.register_new = IndexBinaryOp.register_new +indexbinary.register_anonymous = IndexBinaryOp.register_anonymous indexunary.register_new = IndexUnaryOp.register_new indexunary.register_anonymous = IndexUnaryOp.register_anonymous select.register_new = SelectOp.register_new diff --git a/graphblas/core/recorder.py b/graphblas/core/recorder.py index ca776f697..90f35bb82 100644 --- a/graphblas/core/recorder.py +++ b/graphblas/core/recorder.py @@ -179,4 +179,4 @@ def __repr__(self): skip_record = Recorder(start=False) -skip_record.data = collections.deque([], 0) +skip_record.data = collections.deque(maxlen=0) diff --git a/graphblas/core/scalar.py b/graphblas/core/scalar.py index 25aef5743..c8148e8e2 100644 --- a/graphblas/core/scalar.py +++ b/graphblas/core/scalar.py @@ -136,6 +136,8 @@ def __eq__(self, other): """ return self.isequal(other) + __hash__ = None + def __ne__(self, other): return not self.isequal(other) @@ -1001,6 +1003,7 @@ def _repr_html_(self): is_cscalar = Scalar.is_cscalar is_grbscalar = Scalar.is_grbscalar + __hash__ = None # Begin auto-generated code: Scalar _get_value = automethods._get_value @@ -1075,6 +1078,7 @@ def dup(self, dtype=None, *, clear=False, is_cscalar=False, name=None, **opts): is_cscalar = Scalar.is_cscalar is_grbscalar = Scalar.is_grbscalar + __hash__ = None # Begin auto-generated code: Scalar _get_value = automethods._get_value diff --git a/graphblas/core/ss/__init__.py b/graphblas/core/ss/__init__.py index 10a6fed94..79859eb53 100644 --- a/graphblas/core/ss/__init__.py +++ b/graphblas/core/ss/__init__.py @@ -1,5 +1,5 @@ import suitesparse_graphblas as _ssgb -(version_major, version_minor, version_bug) = map(int, _ssgb.__version__.split(".")[:3]) +version_major, version_minor, version_bug = map(int, _ssgb.__version__.split(".")[:3]) _IS_SSGB7 = version_major == 7 diff --git a/graphblas/core/ss/context.py b/graphblas/core/ss/context.py index f93d1ec1c..67c7a6c20 100644 --- a/graphblas/core/ss/context.py +++ b/graphblas/core/ss/context.py @@ -20,9 +20,12 @@ class Context(BaseConfig): _context_keys = {"chunk", "gpu_id", "nthreads"} _options = { "chunk": (lib.GxB_CONTEXT_CHUNK, "double"), - "gpu_id": (lib.GxB_CONTEXT_GPU_ID, "int"), "nthreads": (lib.GxB_CONTEXT_NTHREADS, "int"), } + # GxB_CONTEXT_GPU_ID existed in SS:GraphBLAS <=10.1 as a scalar int. + # It was renamed to GxB_CONTEXT_GPU_IDS in 10.2 and changed to a list type. + if hasattr(lib, "GxB_CONTEXT_GPU_ID"): + _options["gpu_id"] = (lib.GxB_CONTEXT_GPU_ID, "int") _defaults = { "nthreads": 0, "chunk": 0, @@ -37,13 +40,14 @@ def __init__(self, engage=True, *, stack=True, nthreads=None, chunk=None, gpu_id context = threadlocal.context self["nthreads"] = context["nthreads"] if nthreads is None else nthreads self["chunk"] = context["chunk"] if chunk is None else chunk - self["gpu_id"] = context["gpu_id"] if gpu_id is None else gpu_id + if "gpu_id" in self._options: + self["gpu_id"] = context["gpu_id"] if gpu_id is None else gpu_id else: if nthreads is not None: self["nthreads"] = nthreads if chunk is not None: self["chunk"] = chunk - if gpu_id is not None: + if gpu_id is not None and "gpu_id" in self._options: self["gpu_id"] = gpu_id self._prev_context = None if engage: @@ -66,7 +70,7 @@ def dup(self, engage=True, *, nthreads=None, chunk=None, gpu_id=None): nthreads = self["nthreads"] if chunk is None: chunk = self["chunk"] - if gpu_id is None: + if gpu_id is None and "gpu_id" in self._options: gpu_id = self["gpu_id"] return type(self)(engage, stack=False, nthreads=nthreads, chunk=chunk, gpu_id=gpu_id) diff --git a/graphblas/core/ss/descriptor.py b/graphblas/core/ss/descriptor.py index 781661b7b..20fc126cf 100644 --- a/graphblas/core/ss/descriptor.py +++ b/graphblas/core/ss/descriptor.py @@ -44,10 +44,13 @@ class _DescriptorConfig(BaseConfig): _options.update( { "chunk": (lib.GxB_CONTEXT_CHUNK, "double"), - "gpu_id": (lib.GxB_CONTEXT_GPU_ID, "int"), "nthreads": (lib.GxB_CONTEXT_NTHREADS, "int"), } ) + # GxB_CONTEXT_GPU_ID existed in SS:GraphBLAS <=10.1 as a scalar int. + # It was renamed to GxB_CONTEXT_GPU_IDS in 10.2 and changed to a list type. + if hasattr(lib, "GxB_CONTEXT_GPU_ID"): + _options["gpu_id"] = (lib.GxB_CONTEXT_GPU_ID, "int") _enumerations = { # GrB "output_replace": { diff --git a/graphblas/core/ss/indexbinary.py b/graphblas/core/ss/indexbinary.py new file mode 100644 index 000000000..89a38b3fc --- /dev/null +++ b/graphblas/core/ss/indexbinary.py @@ -0,0 +1,134 @@ +from ... import backend +from ...dtypes import lookup_dtype +from ...exceptions import check_status_carg +from .. import NULL, ffi, lib +from ..operator.base import TypedOpBase +from ..operator.indexbinary import ( + IndexBinaryOp, + TypedBuiltinIndexBinaryOp, + TypedUserIndexBinaryOp, + _has_idxbinop, +) + +ffi_new = ffi.new + + +class TypedJitIndexBinaryOp(TypedOpBase): + __slots__ = "_jit_c_definition" + opclass = "IndexBinaryOp" + + def __init__(self, parent, name, type_, return_type, gb_obj, jit_c_definition, dtype2=None): + super().__init__(parent, name, type_, return_type, gb_obj, name, dtype2=dtype2) + self._jit_c_definition = jit_c_definition + + @property + def jit_c_definition(self): + return self._jit_c_definition + + thunk_type = TypedUserIndexBinaryOp.thunk_type + + def __call__(self, theta=None): + return TypedBuiltinIndexBinaryOp.__call__(self, theta) + + __call__.__doc__ = TypedBuiltinIndexBinaryOp.__call__.__doc__ + + +def register_new(name, jit_c_definition, x_type, y_type, theta_type, ret_type): + """Register a new IndexBinaryOp using the SuiteSparse:GraphBLAS JIT compiler. + + This creates an IndexBinaryOp by compiling the C string definition of the function. + It requires a shell call to a C compiler. The resulting operator will be as + fast as if it were built-in to SuiteSparse:GraphBLAS and does not have the + overhead of additional function calls as when using ``gb.indexbinary.register_new``. + + This is an advanced feature that requires a C compiler and proper configuration. + Configuration is handled by ``gb.ss.config``; see its docstring for details. + By default, the JIT caches results in ``~/.SuiteSparse/``. For more information, + see the SuiteSparse:GraphBLAS user guide. + + Only one type signature may be registered at a time, but repeated calls using + the same name with different input types is allowed. + + Parameters + ---------- + name : str + The name of the operator. This will show up as ``gb.indexbinary.ss.{name}``. + The name may contain periods, ".", which will result in nested objects + such as ``gb.indexbinary.ss.x.y.z`` for name ``"x.y.z"``. + jit_c_definition : str + The C definition as a string of the user-defined function. For example: + ``"void my_idxbin (double *z, double *x, GrB_Index ix, GrB_Index jx, "`` + ``"double *y, GrB_Index iy, GrB_Index jy, double *theta) "`` + ``"{ (*z) = (*x) + (*y) + (*theta) ; }"`` + x_type : dtype + The dtype of the first operand (x). + y_type : dtype + The dtype of the second operand (y). + theta_type : dtype + The dtype of the theta parameter. + ret_type : dtype + The dtype of the result. + + Returns + ------- + IndexBinaryOp + + See Also + -------- + gb.indexbinary.register_new + gb.indexbinary.register_anonymous + + """ + if backend != "suitesparse": # pragma: no cover (safety) + raise RuntimeError( + "`gb.indexbinary.ss.register_new` invalid when not using 'suitesparse' backend" + ) + if not _has_idxbinop: + import suitesparse_graphblas as ssgb + + raise RuntimeError( + "IndexBinaryOp requires SuiteSparse:GraphBLAS 9.4+; " + f"current version is {ssgb.__version__}" + ) + x_type = lookup_dtype(x_type) + y_type = lookup_dtype(y_type) + theta_type = lookup_dtype(theta_type) + ret_type = lookup_dtype(ret_type) + name = name if name.startswith("ss.") else f"ss.{name}" + module, funcname = IndexBinaryOp._remove_nesting(name, strict=False) + if hasattr(module, funcname): + rv = getattr(module, funcname) + if not isinstance(rv, IndexBinaryOp): + IndexBinaryOp._remove_nesting(name) + if ( + (x_type, theta_type) in rv.types + or rv._udt_types is not None + and (x_type, theta_type) in rv._udt_types + ): + raise TypeError( + f"IndexBinaryOp gb.indexbinary.{name} already defined for " + f"({x_type}, {theta_type}) input types" + ) + else: + rv = IndexBinaryOp(name, is_udt=True) + gb_obj = ffi_new("GxB_IndexBinaryOp*") + check_status_carg( + lib.GxB_IndexBinaryOp_new( + gb_obj, + NULL, + ret_type._carg, + x_type._carg, + y_type._carg, + theta_type._carg, + ffi_new("char[]", funcname.encode()), + ffi_new("char[]", jit_c_definition.encode()), + ), + "IndexBinaryOp", + gb_obj[0], + ) + op = TypedJitIndexBinaryOp( + rv, funcname, x_type, ret_type, gb_obj[0], jit_c_definition, dtype2=theta_type + ) + rv._add(op, is_jit=True) + setattr(module, funcname, rv) + return rv diff --git a/graphblas/core/ss/matrix.py b/graphblas/core/ss/matrix.py index 509c56113..e45c7014b 100644 --- a/graphblas/core/ss/matrix.py +++ b/graphblas/core/ss/matrix.py @@ -376,7 +376,7 @@ def concat(self, tiles, **opts): graphblas.ss.concat """ - tiles, m, n, is_matrix = _concat_mn(tiles, is_matrix=True) + tiles, m, n, _is_matrix = _concat_mn(tiles, is_matrix=True) self._concat(tiles, m, n, opts) def build_scalar(self, rows, columns, value): @@ -4088,13 +4088,22 @@ def serialize(self, compression="default", level=None, **opts): dtype_size = ffi_new("size_t*") status = lib.GrB_Type_get_SIZE(parent.dtype.gb_obj[0], dtype_size, lib.GrB_NAME) check_status_carg(status, "Type", parent.dtype.gb_obj[0]) - # Then get the name - dtype_char = ffi_new(f"char[{dtype_size[0]}]") - status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME) - check_status_carg(status, "Type", parent.dtype.gb_obj[0]) - # Then set the name - status = lib.GrB_Matrix_set_String(parent._carg, dtype_char, lib.GrB_NAME) - check_status_carg(status, "Matrix", parent._carg) + if dtype_size[0] >= lib.GxB_MAX_NAME_LEN: + # The dtype name is too long to safely store in the blob (GxB_Serialized_get_SIZE + # segfaults on names >= GxB_MAX_NAME_LEN). For named UDTs, use the short + # registered name instead; anonymous UDTs cannot round-trip without dtype=. + if not parent.dtype._is_anonymous: + val_obj = ffi.new("char[]", parent.dtype.name.encode()) + status = lib.GrB_Matrix_set_String(parent._carg, val_obj, lib.GrB_NAME) + check_status_carg(status, "Matrix", parent._carg) + else: + # Then get the name + dtype_char = ffi_new(f"char[{dtype_size[0]}]") + status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME) + check_status_carg(status, "Type", parent.dtype.gb_obj[0]) + # Then set the name + status = lib.GrB_Matrix_set_String(parent._carg, dtype_char, lib.GrB_NAME) + check_status_carg(status, "Matrix", parent._carg) check_status( lib.GxB_Matrix_serialize( diff --git a/graphblas/core/ss/vector.py b/graphblas/core/ss/vector.py index fdde7eb92..20b9ce297 100644 --- a/graphblas/core/ss/vector.py +++ b/graphblas/core/ss/vector.py @@ -262,7 +262,7 @@ def concat(self, tiles, **opts): graphblas.ss.concat """ - tiles, m, n, is_matrix = _concat_mn(tiles, is_matrix=False) + tiles, m, _n, _is_matrix = _concat_mn(tiles, is_matrix=False) self._concat(tiles, m, opts) def build_scalar(self, indices, value): @@ -1659,13 +1659,22 @@ def serialize(self, compression="default", level=None, **opts): dtype_size = ffi_new("size_t*") status = lib.GrB_Type_get_SIZE(parent.dtype.gb_obj[0], dtype_size, lib.GrB_NAME) check_status_carg(status, "Type", parent.dtype.gb_obj[0]) - # Then get the name - dtype_char = ffi_new(f"char[{dtype_size[0]}]") - status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME) - check_status_carg(status, "Type", parent.dtype.gb_obj[0]) - # Then set the name - status = lib.GrB_Vector_set_String(parent._carg, dtype_char, lib.GrB_NAME) - check_status_carg(status, "Vector", parent._carg) + if dtype_size[0] >= lib.GxB_MAX_NAME_LEN: + # The dtype name is too long to safely store in the blob (GxB_Serialized_get_SIZE + # segfaults on names >= GxB_MAX_NAME_LEN). For named UDTs, use the short + # registered name instead; anonymous UDTs cannot round-trip without dtype=. + if not parent.dtype._is_anonymous: + val_obj = ffi.new("char[]", parent.dtype.name.encode()) + status = lib.GrB_Vector_set_String(parent._carg, val_obj, lib.GrB_NAME) + check_status_carg(status, "Vector", parent._carg) + else: + # Then get the name + dtype_char = ffi_new(f"char[{dtype_size[0]}]") + status = lib.GrB_Type_get_String(parent.dtype.gb_obj[0], dtype_char, lib.GrB_NAME) + check_status_carg(status, "Type", parent.dtype.gb_obj[0]) + # Then set the name + status = lib.GrB_Vector_set_String(parent._carg, dtype_char, lib.GrB_NAME) + check_status_carg(status, "Vector", parent._carg) check_status( lib.GxB_Vector_serialize( diff --git a/graphblas/core/vector.py b/graphblas/core/vector.py index 8bac4198e..8c73ecc48 100644 --- a/graphblas/core/vector.py +++ b/graphblas/core/vector.py @@ -528,7 +528,7 @@ def build(self, indices, values, *, dup_op=None, clear=False, size=None): """ # TODO: accept `dtype` keyword to match the dtype of `values`? indices = ints_to_numpy_buffer(indices, np.uint64, name="indices") - values, dtype = values_to_numpy_buffer(values, self.dtype) + values, _dtype = values_to_numpy_buffer(values, self.dtype) n = values.shape[0] if indices.size != n: raise ValueError( diff --git a/graphblas/indexbinary/__init__.py b/graphblas/indexbinary/__init__.py new file mode 100644 index 000000000..34f33fd4c --- /dev/null +++ b/graphblas/indexbinary/__init__.py @@ -0,0 +1,33 @@ +# All items are dynamically added by classes in operator.py +# This module acts as a container of IndexBinaryOp instances +_delayed = {} + + +def __dir__(): + return globals().keys() | _delayed.keys() | {"ss"} + + +def __getattr__(key): + if key in _delayed: + func, kwargs = _delayed.pop(key) + rv = func(**kwargs) + globals()[key] = rv + return rv + if key == "ss": + from .. import backend + + if backend != "suitesparse": + raise AttributeError( + f'module {__name__!r} only has attribute "ss" when backend is "suitesparse"' + ) + from importlib import import_module + + ss = import_module(".ss", __name__) + globals()["ss"] = ss + return ss + raise AttributeError(f"module {__name__!r} has no attribute {key!r}") + + +from ..core import operator # noqa: E402 isort:skip + +del operator diff --git a/graphblas/indexbinary/ss.py b/graphblas/indexbinary/ss.py new file mode 100644 index 000000000..6ffd91c01 --- /dev/null +++ b/graphblas/indexbinary/ss.py @@ -0,0 +1,6 @@ +from ..core import operator +from ..core.ss.indexbinary import register_new # noqa: F401 + +_delayed = {} + +del operator diff --git a/graphblas/io/_awkward.py b/graphblas/io/_awkward.py index b30984251..fd4a9bab8 100644 --- a/graphblas/io/_awkward.py +++ b/graphblas/io/_awkward.py @@ -28,19 +28,10 @@ def to_awkward(A, format=None): awkward.Array """ - try: - # awkward version 1 - # MAINT: we can probably drop awkward v1 at the end of 2024 or 2025 - import awkward._v2 as ak - from awkward._v2.forms.listoffsetform import ListOffsetForm - from awkward._v2.forms.numpyform import NumpyForm - from awkward._v2.forms.recordform import RecordForm - except ImportError: - # awkward version 2 - import awkward as ak - from awkward.forms.listoffsetform import ListOffsetForm - from awkward.forms.numpyform import NumpyForm - from awkward.forms.recordform import RecordForm + import awkward as ak + from awkward.forms.listoffsetform import ListOffsetForm + from awkward.forms.numpyform import NumpyForm + from awkward.forms.recordform import RecordForm out_type = output_type(A) if format is None: diff --git a/graphblas/io/_matrixmarket.py b/graphblas/io/_matrixmarket.py index 8cf8738a3..eff8c9031 100644 --- a/graphblas/io/_matrixmarket.py +++ b/graphblas/io/_matrixmarket.py @@ -1,3 +1,5 @@ +import warnings + from .. import backend from ..core.matrix import Matrix from ._scipy import to_scipy_sparse @@ -40,9 +42,25 @@ def mmread(source, engine="auto", *, dup_op=None, name=None, **kwargs): except ImportError: # pragma: no cover (import) raise ImportError("scipy is required to read Matrix Market files") from None engine = engine.lower() + if engine in {"fmm", "fast_matrix_market"}: + warnings.warn( + "fast_matrix_market is no longer maintained and will be removed in a future version. " + 'Use engine="scipy" instead.', + DeprecationWarning, + stacklevel=2, + ) if engine in {"auto", "fmm", "fast_matrix_market"}: try: from fast_matrix_market import mmread # noqa: F811 + + if engine == "auto": + warnings.warn( + "fast_matrix_market is installed but is no longer maintained and will be " + "removed in a future version. Uninstall it or use engine='scipy' to " + "silence this warning.", + DeprecationWarning, + stacklevel=2, + ) except ImportError: # pragma: no cover (import) if engine != "auto": raise ImportError( @@ -104,9 +122,25 @@ def mmwrite( except ImportError: # pragma: no cover (import) raise ImportError("scipy is required to write Matrix Market files") from None engine = engine.lower() + if engine in {"fmm", "fast_matrix_market"}: + warnings.warn( + "fast_matrix_market is no longer maintained and will be removed in a future version. " + 'Use engine="scipy" instead.', + DeprecationWarning, + stacklevel=2, + ) if engine in {"auto", "fmm", "fast_matrix_market"}: try: from fast_matrix_market import __version__, mmwrite # noqa: F811 + + if engine == "auto": + warnings.warn( + "fast_matrix_market is installed but is no longer maintained and will be " + "removed in a future version. Uninstall it or use engine='scipy' to " + "silence this warning.", + DeprecationWarning, + stacklevel=2, + ) except ImportError: # pragma: no cover (import) if engine != "auto": raise ImportError( diff --git a/graphblas/monoid/numpy.py b/graphblas/monoid/numpy.py index b9ff2b502..aae8385df 100644 --- a/graphblas/monoid/numpy.py +++ b/graphblas/monoid/numpy.py @@ -95,7 +95,7 @@ ): # Incorrect behavior was introduced in numba 0.56.2 and numpy 1.23 # See: https://github.com/numba/numba/issues/8478 - # MAINT: we may be able to remove the behavior-based check above in 2025 + # The behavioral check above is kept as a safety net; it's correct for all numba/numpy combos _monoid_identities["fmax"].update( { "BOOL": False, diff --git a/graphblas/ss/_core.py b/graphblas/ss/_core.py index b42ea72b4..9c1d043bc 100644 --- a/graphblas/ss/_core.py +++ b/graphblas/ss/_core.py @@ -215,10 +215,12 @@ class GlobalConfig(BaseConfig): "jit_c_preface": (lib.GxB_JIT_C_PREFACE, "char*"), "jit_error_log": (lib.GxB_JIT_ERROR_LOG, "char*"), "jit_cache_path": (lib.GxB_JIT_CACHE_PATH, "char*"), - # CUDA GPU control - "gpu_id": (lib.GxB_GLOBAL_GPU_ID, "int"), } ) + # GxB_GLOBAL_GPU_ID existed in SS:GraphBLAS <=10.1 as a scalar int. + # It was renamed to GxB_GLOBAL_GPU_IDS in 10.2 and changed to a list type. + if hasattr(lib, "GxB_GLOBAL_GPU_ID"): + _options["gpu_id"] = (lib.GxB_GLOBAL_GPU_ID, "int") # Values to restore defaults _defaults = { "hyper_switch": lib.GxB_HYPER_DEFAULT, diff --git a/graphblas/tests/test_core.py b/graphblas/tests/test_core.py index 3586eb4a8..a15da6f54 100644 --- a/graphblas/tests/test_core.py +++ b/graphblas/tests/test_core.py @@ -67,7 +67,7 @@ class bad: def test_version(): from packaging.version import parse - assert parse(gb.__version__) > parse("2022.11.0") + assert parse(gb.__version__) > parse("2024.2.0") @pytest.mark.skipif("not setuptools or not tomli or not gb.__file__") diff --git a/graphblas/tests/test_formatting.py b/graphblas/tests/test_formatting.py index faadc983b..720ab79e5 100644 --- a/graphblas/tests/test_formatting.py +++ b/graphblas/tests/test_formatting.py @@ -9,7 +9,6 @@ from graphblas import Matrix, Scalar, Vector # isort:skip (for dask-graphblas) - try: import pandas as pd except ImportError: # pragma: no cover (import) diff --git a/graphblas/tests/test_indexbinary.py b/graphblas/tests/test_indexbinary.py new file mode 100644 index 000000000..80aa8d917 --- /dev/null +++ b/graphblas/tests/test_indexbinary.py @@ -0,0 +1,288 @@ +import pickle + +import pytest + +import graphblas as gb +from graphblas import Matrix, Scalar, Vector, dtypes, indexbinary +from graphblas.core import _supports_udfs as supports_udfs +from graphblas.core.operator.indexbinary import _has_idxbinop +from graphblas.exceptions import UdfParseError + +pytestmark = [ + pytest.mark.skipif(not supports_udfs, reason="requires numba"), + pytest.mark.skipif(not _has_idxbinop, reason="requires SuiteSparse:GraphBLAS 9.4+"), +] + + +def test_register_anonymous(): + def add_with_theta(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + op = indexbinary.register_anonymous(add_with_theta) + assert op is not None + assert "add_with_theta" in op.name + assert int in op.types or dtypes.INT64 in op.types + + +def test_register_new(): + def my_idxbin(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x * y + theta + + result = indexbinary.register_new("my_idxbin", my_idxbin) + assert result is not None + assert hasattr(indexbinary, "my_idxbin") + + A = Matrix.from_coo([0, 1], [1, 0], [3, 7]) + B = Matrix.from_coo([0, 1], [1, 0], [5, 2]) + binop = indexbinary.my_idxbin(100) + C = A.ewise_mult(B, binop).new() + assert list(C.to_coo()[2]) == [115, 114] # 3*5+100, 7*2+100 + + delattr(indexbinary, "my_idxbin") + + +def test_register_new_lazy(): + def lazy_op(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + + result = indexbinary.register_new("lazy_op", lazy_op, lazy=True) + assert result is None + assert "lazy_op" in dir(indexbinary) + + op = indexbinary.lazy_op + assert op is not None + delattr(indexbinary, "lazy_op") + + +def test_typed_call(): + def add_theta(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + op = indexbinary.register_anonymous(add_theta) + typed = op[int] + binop = typed(10) + assert binop.opclass == "BinaryOp" + + A = Matrix.from_coo([0, 1], [1, 0], [3, 7]) + B = Matrix.from_coo([0, 1], [1, 0], [5, 2]) + C = A.ewise_mult(B, binop).new() + assert list(C.to_coo()[2]) == [18, 19] # 3+5+10, 7+2+10 + + +def test_untyped_call(): + def add_theta(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + op = indexbinary.register_anonymous(add_theta) + binop = op(10) + assert binop.opclass == "BinaryOp" + + A = Matrix.from_coo([0, 1], [1, 0], [3, 7]) + B = Matrix.from_coo([0, 1], [1, 0], [5, 2]) + C = A.ewise_mult(B, binop).new() + assert list(C.to_coo()[2]) == [18, 19] + + +def test_index_aware(): + """Test that indices are correctly passed to the function.""" + + def index_sum(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return ix + jx + iy + jy + theta + + op = indexbinary.register_anonymous(index_sum) + # For ewise_mult, both operands have the same indices: ix==iy and jx==jy + A = Matrix.from_coo([0, 1, 2], [0, 1, 2], [100, 200, 300]) + B = Matrix.from_coo([0, 1, 2], [0, 1, 2], [1, 1, 1]) + binop = op[int](0) + C = A.ewise_mult(B, binop).new() + # (0,0): 0+0+0+0+0=0, (1,1): 1+1+1+1+0=4, (2,2): 2+2+2+2+0=8 + assert list(C.to_coo()[2]) == [0, 4, 8] + + +def test_floating_point(): + def fp_add(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta * 0.5 + + op = indexbinary.register_anonymous(fp_add) + A = Matrix.from_coo([0], [0], [1.5]) + B = Matrix.from_coo([0], [0], [2.5]) + binop = op(4.0) + C = A.ewise_mult(B, binop).new() + assert abs(C.to_coo()[2][0] - 6.0) < 1e-10 # 1.5 + 2.5 + 4.0*0.5 = 6.0 + + +def test_vector_ewise(): + def add_theta(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + op = indexbinary.register_anonymous(add_theta) + v1 = Vector.from_coo([0, 1, 2], [10, 20, 30]) + v2 = Vector.from_coo([0, 1, 2], [1, 2, 3]) + binop = op(0) + v3 = v1.ewise_mult(v2, binop).new() + assert list(v3.to_coo()[1]) == [11, 22, 33] + + +def test_ewise_add(): + def add_theta(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + op = indexbinary.register_anonymous(add_theta) + A = Matrix.from_coo([0, 1], [0, 1], [3, 7]) + B = Matrix.from_coo([0, 1], [0, 1], [5, 2]) + binop = op(10) + C = A.ewise_add(B, binop).new() + assert list(C.to_coo()[2]) == [18, 19] + + +def test_default_theta(): + """Test that theta=0 works correctly.""" + + def add_theta(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + op = indexbinary.register_anonymous(add_theta) + binop = op(0) # theta=0 as int + A = Matrix.from_coo([0], [0], [3]) + B = Matrix.from_coo([0], [0], [5]) + C = A.ewise_mult(B, binop).new() + assert C.to_coo()[2][0] == 8 # 3 + 5 + 0 + + +def test_bool_return(): + def is_close(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return abs(x - y) <= theta + + op = indexbinary.register_anonymous(is_close) + A = Matrix.from_coo([0, 1], [0, 1], [10, 20]) + B = Matrix.from_coo([0, 1], [0, 1], [11, 25]) + binop = op[int](2) + C = A.ewise_mult(B, binop).new() + assert list(C.to_coo()[2]) == [True, False] # |10-11|<=2, |20-25|>2 + + +def test_scalar_theta(): + """Test passing a graphblas Scalar as theta.""" + + def add_theta(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + op = indexbinary.register_anonymous(add_theta) + theta = Scalar.from_value(42) + binop = op[int](theta) + A = Matrix.from_coo([0], [0], [3]) + B = Matrix.from_coo([0], [0], [5]) + C = A.ewise_mult(B, binop).new() + assert C.to_coo()[2][0] == 50 # 3 + 5 + 42 + + +def test_parameterized(): + def make_op(scale): + def inner(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return (x + y) * scale + theta + + return inner + + op = indexbinary.register_anonymous(make_op, parameterized=True) + scaled_op = op(2) # scale=2 + binop = scaled_op(10) # theta=10 + A = Matrix.from_coo([0], [0], [3]) + B = Matrix.from_coo([0], [0], [5]) + C = A.ewise_mult(B, binop).new() + assert C.to_coo()[2][0] == 26 # (3+5)*2 + 10 = 26 + + +def test_pickle_registered(): + def add_theta(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + indexbinary.register_new("pickle_test_op", add_theta) + op = indexbinary.pickle_test_op + op2 = pickle.loads(pickle.dumps(op)) + assert op2.name == op.name + + typed = op[int] + typed2 = pickle.loads(pickle.dumps(typed)) + assert typed2.name == typed.name + + delattr(indexbinary, "pickle_test_op") + + +def test_bad_udf(): + with pytest.raises(UdfParseError, match="Unable to parse function using Numba"): + indexbinary.register_anonymous(lambda x, ix, jx, y, iy, jy, theta: result) # noqa: F821 + + +def test_bad_type(): + with pytest.raises(TypeError, match="UDF argument must be a function"): + indexbinary.register_anonymous(42) + + +def test_with_mask(): + def add_vals(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + op = indexbinary.register_anonymous(add_vals) + A = Matrix.from_coo([0, 1, 2], [0, 1, 2], [3, 7, 11]) + B = Matrix.from_coo([0, 1, 2], [0, 1, 2], [5, 2, 1]) + mask = Matrix.from_coo([0, 2], [0, 2], [True, True], nrows=3, ncols=3) + C = Matrix(int, nrows=3, ncols=3) + binop = op(10) + C(mask=mask.S) << A.ewise_mult(B, binop) + rows, _, vals = C.to_coo() + assert list(rows) == [0, 2] + assert list(vals) == [18, 22] # 3+5+10, 11+1+10 + + +def test_with_accumulator(): + def add_vals(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + theta + + op = indexbinary.register_anonymous(add_vals) + A = Matrix.from_coo([0], [0], [3]) + B = Matrix.from_coo([0], [0], [5]) + C = Matrix.from_coo([0], [0], [100]) + binop = op(10) + C(accum=gb.binary.plus) << A.ewise_mult(B, binop) + assert C.to_coo()[2][0] == 118 # 100 + (3+5+10) + + +def test_ewise_with_bound_binop(): + """Confirm bound IndexBinaryOp works in all ewise operations.""" + + def mul_plus(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x * y + theta + + op = indexbinary.register_anonymous(mul_plus) + binop = op[int](0) + A = Matrix.from_coo([0, 0], [0, 1], [2, 3]) + B = Matrix.from_coo([0, 0], [0, 1], [4, 5]) + C = A.ewise_mult(B, binop).new() + assert list(C.to_coo()[2]) == [8, 15] # 2*4+0, 3*5+0 + + +def test_find_opclass(): + from graphblas.core.operator import find_opclass + + def add_vals(x, ix, jx, y, iy, jy, theta): # pragma: no cover (numba) + return x + y + + op = indexbinary.register_anonymous(add_vals) + _, opclass = find_opclass(op) + assert opclass == "IndexBinaryOp" + + typed = op[int] + assert typed.opclass == "IndexBinaryOp" + + bound = typed(0) + assert bound.opclass == "BinaryOp" + + +def test_dir_and_module(): + assert "register_new" in dir(indexbinary) + assert "register_anonymous" in dir(indexbinary) + assert "ss" in dir(indexbinary) + # Actually access the ss module to verify it exists (not just in __dir__) + ss = indexbinary.ss + assert hasattr(ss, "_delayed") + assert hasattr(ss, "register_new") diff --git a/graphblas/tests/test_infix.py b/graphblas/tests/test_infix.py index 601f282a7..a1a59fd69 100644 --- a/graphblas/tests/test_infix.py +++ b/graphblas/tests/test_infix.py @@ -454,34 +454,34 @@ def test_multi_infix_vector(): result = op.plus_plus(D0 @ (D0 @ v1)).new() assert result.isequal(v1) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) | v3 - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2).__ror__(v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) | (v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) | (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1 | (v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1.__ror__(v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) | (v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1 & (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1.__rand__(v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) & (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) & (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) & v3 - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2).__rand__(v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) & (v2 & v3) # We differentiate between infix and methods @@ -564,34 +564,34 @@ def test_multi_infix_matrix(): assert op.plus_plus((v1.T @ D0) @ v1).new()[0, 0].new().value == 6 assert op.plus_plus(D0 @ D0 @ D0 @ D0 @ D0).new().isequal(D0) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) | v3 - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2).__ror__(v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) | (v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) | (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1 | (v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1.__ror__(v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) | (v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1 & (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1.__rand__(v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) & (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) & (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) & v3 - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2).__rand__(v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) & (v2 & v3) # We differentiate between infix and methods @@ -716,34 +716,34 @@ def test_multi_infix_scalar(): result = binary.plus(v1 | (2 | v3), left_default=10, right_default=10).new() assert result.isequal(expected) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) | v3 - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2).__ror__(v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) | (v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) | (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1 | (v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1.__ror__(v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) | (v2 & v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1 & (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): v1.__rand__(v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) & (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 & v2) & (v2 | v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) & v3 - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2).__rand__(v3) - with pytest.raises(TypeError, match="XXX"): # TODO + with pytest.raises(TypeError, match="Cannot mix"): (v1 | v2) & (v2 & v3) # We differentiate between infix and methods diff --git a/graphblas/tests/test_io.py b/graphblas/tests/test_io.py index 7e786f0da..9443bbcfa 100644 --- a/graphblas/tests/test_io.py +++ b/graphblas/tests/test_io.py @@ -272,7 +272,7 @@ def test_mmread_mmwrite(engine): example == "_empty_lines_example" and engine in {"fmm", "auto"} and fmm is not None - and fmm.__version__ in {"1.4.5"} + and fmm.__version__ == "1.4.5" ): # `fast_matrix_market` __version__ v1.4.5 does not handle this, but v1.5.0 does continue diff --git a/graphblas/tests/test_matrix.py b/graphblas/tests/test_matrix.py index 24f0e73d7..b5d00337d 100644 --- a/graphblas/tests/test_matrix.py +++ b/graphblas/tests/test_matrix.py @@ -2794,7 +2794,7 @@ def test_ss_concat(A, v): expected[:, A.ncols] = v assert B5.isequal(expected) - with pytest.raises(TypeError, match=""): + with pytest.raises(TypeError): gb.ss.concat([v, [v]]) with pytest.raises(TypeError): gb.ss.concat([[v], v]) @@ -3429,7 +3429,7 @@ def test_ss_lastk(A): @pytest.mark.slow def test_ss_compactify(A, do_iso): if do_iso: - r, c, v = A.to_coo() + r, c, _v = A.to_coo() A = Matrix.from_coo(r, c, 1) rows = [0, 0, 1, 1, 2, 3, 3, 4, 5, 6, 6, 6] new_cols = [0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 2] @@ -3848,11 +3848,11 @@ def test_to_coo_sort(): rng.shuffle(r) rng.shuffle(c) A = Matrix.from_coo(r, c, r, nrows=N, ncols=N) - rows, cols, values = A.to_coo(sort=False) + rows, cols, _values = A.to_coo(sort=False) A = Matrix.from_coo(r, c, r, nrows=N, ncols=N) - rows, cols, values = A.to_coo(sort=True) + rows, cols, _values = A.to_coo(sort=True) assert_array_equal(rows, expected_rows) - rows, cols, values = A.T.to_coo(sort=True) + rows, cols, _values = A.T.to_coo(sort=True) assert_array_equal(cols, expected_rows) diff --git a/graphblas/tests/test_numpyops.py b/graphblas/tests/test_numpyops.py index 999c6d5e0..534312c0b 100644 --- a/graphblas/tests/test_numpyops.py +++ b/graphblas/tests/test_numpyops.py @@ -40,7 +40,7 @@ def test_bool_doesnt_get_too_large(): z = a.ewise_mult(b, gb.monoid.numpy.add).new() else: z = a.ewise_mult(b, gb.monoid.numpy.add).new() - x, y = z.to_coo() + _x, y = z.to_coo() np.testing.assert_array_equal(y, (True, True, True, False)) def func(x): # pragma: no cover (numba) @@ -48,7 +48,7 @@ def func(x): # pragma: no cover (numba) op = gb.core.operator.UnaryOp.register_anonymous(func) z = a.apply(op).new() - x, y = z.to_coo() + _x, y = z.to_coo() np.testing.assert_array_equal(y, (True, False, True, False)) @@ -116,7 +116,7 @@ def test_npunary(): import numba if ( - unary_name in {"sign"} + unary_name == "sign" and np.__version__.startswith("2.") and parse(numba.__version__) < parse("0.61.0") ): @@ -201,7 +201,7 @@ def test_npbinary(): compare_op = isclose else: np_result = getattr(np, binary_name)(np_left, np_right) - if binary_name in {"arctan2"}: + if binary_name == "arctan2": compare_op = isclose else: compare_op = equal diff --git a/graphblas/tests/test_op.py b/graphblas/tests/test_op.py index 41fae80ae..8a072a56b 100644 --- a/graphblas/tests/test_op.py +++ b/graphblas/tests/test_op.py @@ -869,13 +869,13 @@ def test_monoid_attributes(): assert monoid.plus[int].binaryop is binary.plus[int] assert monoid.plus[int].identity == 0 assert monoid.plus.binaryop is binary.plus - assert monoid.plus.identities == {typ: 0 for typ in monoid.plus.types} + assert monoid.plus.identities == dict.fromkeys(monoid.plus.types, 0) if shouldhave(monoid.numpy, "add"): assert monoid.numpy.add[int].binaryop is binary.numpy.add[int] assert monoid.numpy.add[int].identity == 0 assert monoid.numpy.add.binaryop is binary.numpy.add - assert monoid.numpy.add.identities == {typ: 0 for typ in monoid.numpy.add.types} + assert monoid.numpy.add.identities == dict.fromkeys(monoid.numpy.add.types, 0) def plus(x, y): # pragma: no cover (numba) return x + y diff --git a/graphblas/tests/test_scalar.py b/graphblas/tests/test_scalar.py index e93511914..dbfb301a3 100644 --- a/graphblas/tests/test_scalar.py +++ b/graphblas/tests/test_scalar.py @@ -292,7 +292,7 @@ def test_neg(): ( dtype for attr, dtype in vars(dtypes).items() - if isinstance(dtype, dtypes.DataType) and attr not in {"_INDEX"} + if isinstance(dtype, dtypes.DataType) and attr != "_INDEX" ), key=lambda x: x.name, reverse=random.choice([False, True]), # used to segfault when False diff --git a/graphblas/tests/test_ss_utils.py b/graphblas/tests/test_ss_utils.py index 2df7ab939..40774186c 100644 --- a/graphblas/tests/test_ss_utils.py +++ b/graphblas/tests/test_ss_utils.py @@ -256,8 +256,8 @@ def test_context(): assert context4["nthreads"] == context["nthreads"] + 1 assert context == context.dup() assert context4 == context.dup(chunk=context["chunk"] + 1, nthreads=context["nthreads"] + 1) - assert context.dup(gpu_id=-1)["gpu_id"] == -1 - + if "gpu_id" in gb.ss.Context._options: + assert context.dup(gpu_id=-1)["gpu_id"] == -1 context.engage() assert gb.core.ss.context.threadlocal.context is context with gb.ss.Context(nthreads=1) as ctx: diff --git a/graphblas/tests/test_ssjit.py b/graphblas/tests/test_ssjit.py index 4cea0b563..ed6f429e7 100644 --- a/graphblas/tests/test_ssjit.py +++ b/graphblas/tests/test_ssjit.py @@ -1,7 +1,7 @@ import os import pathlib -import platform -import sys +import re +import subprocess import sysconfig import numpy as np @@ -9,8 +9,9 @@ from numpy.testing import assert_array_equal import graphblas as gb -from graphblas import backend, binary, dtypes, indexunary, select, unary +from graphblas import backend, binary, dtypes, indexbinary, indexunary, select, unary from graphblas.core import _supports_udfs as supports_udfs +from graphblas.core.operator.indexbinary import _has_idxbinop from graphblas.core.ss import _IS_SSGB7 from .conftest import autocompute, burble @@ -26,108 +27,122 @@ pytest.skip("not suitesparse backend", allow_module_level=True) +def _fix_jit_config(): + """Fix the GraphBLAS JIT configuration for the current conda environment. + + The graphblas C library bakes in build-time compiler paths from conda-build, + which don't exist in the user's environment. This function: + 1. Replaces the compiler path with the equivalent from $CONDA_PREFIX/bin/ + 2. Replaces -isysroot with the local macOS SDK (via xcrun), or strips it on Linux + 3. Strips -fdebug-prefix-map flags referencing build paths + + Returns + ------- + True: JIT configured and verified working + False: JIT configuration attempted but compilation failed (don't retry) + None: No conda environment; caller should try a different approach + + Only modifies jit_c_compiler_name, jit_c_compiler_flags, and jit_c_control. + Linker flags and libraries are left at their defaults (already have correct + $CONDA_PREFIX paths substituted by the graphblas C library). + """ + conda_prefix = os.environ.get("CONDA_PREFIX", "") + if not conda_prefix: + return None # No conda env; caller should try sysconfig instead + + # Check if the default compiler already works (e.g., /usr/bin/cc on macOS). + # Only replace it if the baked-in path doesn't exist. + jit_cc = gb.ss.config["jit_c_compiler_name"] + if pathlib.Path(jit_cc).exists(): + # Default compiler exists — don't replace it, just fix flags + pass + else: + # Replace build-time path with local conda equivalent. + cc_basename = pathlib.Path(jit_cc).name + bin_dir = pathlib.Path(conda_prefix) / "bin" + for candidate in [cc_basename, "cc", "clang", "gcc"]: + local_cc = bin_dir / candidate + if local_cc.exists(): + break + else: + return False + gb.ss.config["jit_c_compiler_name"] = str(local_cc) + + # Fix compiler flags: fix build-time-only paths that don't exist in the + # user's environment + flags = gb.ss.config["jit_c_compiler_flags"] + # -isysroot : macOS SDK path from conda-build (e.g., /opt/conda-sdks/MacOSX10.13.sdk). + # The conda cross-compiler needs an explicit sysroot. Replace with the local SDK if available. + isysroot_match = re.search(r"-isysroot\s+(\S+)", flags) + if isysroot_match and not pathlib.Path(isysroot_match.group(1)).exists(): + try: + sdk_path = subprocess.check_output( + ["xcrun", "--show-sdk-path"], text=True, stderr=subprocess.DEVNULL + ).strip() + flags = re.sub(r"-isysroot\s+\S+", f"-isysroot {sdk_path}", flags) + except (subprocess.CalledProcessError, FileNotFoundError): + # No Xcode SDK available (linux, or macOS without Xcode CLT) + flags = re.sub(r"-isysroot\s+\S+", "", flags) + # -fdebug-prefix-map==: debug path remapping from conda-build + flags = re.sub(r"-fdebug-prefix-map=\S+", "", flags) + gb.ss.config["jit_c_compiler_flags"] = flags + + gb.ss.config["jit_c_control"] = "on" + + # Verify the JIT actually works by attempting a trivial compilation. + # If it fails (e.g., missing libraries, wrong flags), turn JIT off. + try: + from graphblas import dtypes as _dtypes + + _dtypes.ss.register_new("_jit_probe", "typedef struct { int _probe ; } _jit_probe ;") + except Exception: + gb.ss.config["jit_c_control"] = "off" + return False + else: + return True + + @pytest.fixture(scope="module", autouse=True) def _setup_jit(): - """Set up the SuiteSparse:GraphBLAS JIT.""" + """Set up the SuiteSparse:GraphBLAS JIT. + + Strategy: + 1. _fix_jit_config(): fix conda-baked compiler paths and probe. + - Returns True: JIT works, proceed. + - Returns False: probe failed, JIT is broken, turn off. + - Returns None: no conda env, try sysconfig instead. + 2. Sysconfig fallback: for non-conda installs (pure pip). + """ if _IS_SSGB7: # SuiteSparse JIT was added in SSGB 8 yield return - if not os.environ.get("GITHUB_ACTIONS"): - # Try to run the tests with defaults from sysconfig if not running in CI - prev = gb.ss.config["jit_c_control"] + prev = gb.ss.config["jit_c_control"] + + result = _fix_jit_config() + if result is True: + pass # Conda JIT configured and verified + elif result is False: + # Probe failed — JIT doesn't work with this psg build. + # Don't try sysconfig; if the conda compiler can't compile + # GraphBLAS JIT kernels, Python's sysconfig compiler won't either. + gb.ss.config["jit_c_control"] = "off" + else: + # No conda env (result is None). Try sysconfig for non-conda installs. cc = sysconfig.get_config_var("CC") cflags = sysconfig.get_config_var("CFLAGS") include = sysconfig.get_path("include") libs = sysconfig.get_config_var("LIBS") - if not (cc is None or cflags is None or include is None or libs is None): + if cc and cflags and include: gb.ss.config["jit_c_control"] = "on" gb.ss.config["jit_c_compiler_name"] = cc gb.ss.config["jit_c_compiler_flags"] = f"{cflags} -I{include}" - gb.ss.config["jit_c_libraries"] = libs + if libs: + gb.ss.config["jit_c_libraries"] = libs else: - # Should we skip or try to run if sysconfig vars aren't set? - gb.ss.config["jit_c_control"] = "on" # "off" - try: - yield - finally: - gb.ss.config["jit_c_control"] = prev - return - - if ( - sys.platform == "darwin" - or sys.platform == "linux" - and "conda" not in gb.ss.config["jit_c_compiler_name"] - ): - # XXX TODO: tests for SuiteSparse JIT are not passing on linux when using wheels or on osx - # This should be understood and fixed! - gb.ss.config["jit_c_control"] = "off" - yield - return - - # Configuration values below were obtained from the output of the JIT config - # in CI, but with paths changed to use `{conda_prefix}` where appropriate. - conda_prefix = os.environ["CONDA_PREFIX"] - prev = gb.ss.config["jit_c_control"] - gb.ss.config["jit_c_control"] = "on" - if sys.platform == "linux": - gb.ss.config["jit_c_compiler_name"] = f"{conda_prefix}/bin/x86_64-conda-linux-gnu-cc" - gb.ss.config["jit_c_compiler_flags"] = ( - "-march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong " - f"-fno-plt -O2 -ffunction-sections -pipe -isystem {conda_prefix}/include -Wundef " - "-std=c11 -lm -Wno-pragmas -fexcess-precision=fast -fcx-limited-range " - "-fno-math-errno -fwrapv -O3 -DNDEBUG -fopenmp -fPIC" - ) - gb.ss.config["jit_c_linker_flags"] = ( - "-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now " - "-Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined " - f"-Wl,-rpath,{conda_prefix}/lib -Wl,-rpath-link,{conda_prefix}/lib " - f"-L{conda_prefix}/lib -shared" - ) - gb.ss.config["jit_c_libraries"] = ( - f"-lm -ldl {conda_prefix}/lib/libgomp.so " - f"{conda_prefix}/x86_64-conda-linux-gnu/sysroot/usr/lib/libpthread.so" - ) - gb.ss.config["jit_c_cmake_libs"] = ( - f"m;dl;{conda_prefix}/lib/libgomp.so;" - f"{conda_prefix}/x86_64-conda-linux-gnu/sysroot/usr/lib/libpthread.so" - ) - elif sys.platform == "darwin": - gb.ss.config["jit_c_compiler_name"] = f"{conda_prefix}/bin/clang" - gb.ss.config["jit_c_compiler_flags"] = ( - "-march=core2 -mtune=haswell -mssse3 -ftree-vectorize -fPIC -fPIE " - f"-fstack-protector-strong -O2 -pipe -isystem {conda_prefix}/include -DGBNCPUFEAT " - f"-Wno-pointer-sign -O3 -DNDEBUG -fopenmp=libomp -fPIC -arch {platform.machine()}" - ) - gb.ss.config["jit_c_linker_flags"] = ( - "-Wl,-pie -Wl,-headerpad_max_install_names -Wl,-dead_strip_dylibs " - f"-Wl,-rpath,{conda_prefix}/lib -L{conda_prefix}/lib -dynamiclib" - ) - gb.ss.config["jit_c_libraries"] = f"-lm -ldl {conda_prefix}/lib/libomp.dylib" - gb.ss.config["jit_c_cmake_libs"] = f"m;dl;{conda_prefix}/lib/libomp.dylib" - elif sys.platform == "win32": # pragma: no branch (sanity) - if "mingw" in gb.ss.config["jit_c_libraries"]: - # This probably means we're testing a `python-suitesparse-graphblas` wheel - # in a conda environment. This is not yet working. gb.ss.config["jit_c_control"] = "off" - yield - return - - gb.ss.config["jit_c_compiler_name"] = f"{conda_prefix}/bin/cc" - gb.ss.config["jit_c_compiler_flags"] = ( - '/DWIN32 /D_WINDOWS -DGBNCPUFEAT /O2 -wd"4244" -wd"4146" -wd"4018" ' - '-wd"4996" -wd"4047" -wd"4554" /O2 /Ob2 /DNDEBUG -openmp' - ) - gb.ss.config["jit_c_linker_flags"] = "/machine:x64" - gb.ss.config["jit_c_libraries"] = "" - gb.ss.config["jit_c_cmake_libs"] = "" - if not pathlib.Path(gb.ss.config["jit_c_compiler_name"]).exists(): - # Can't use the JIT if we don't have a compiler! - gb.ss.config["jit_c_control"] = "off" - yield - return try: yield finally: @@ -148,7 +163,7 @@ def test_jit_udt(): ) return if gb.ss.config["jit_c_control"] == "off": - return + pytest.skip("JIT not available (no C compiler configured)") with burble(): dtype = dtypes.ss.register_new( "myquaternion", "typedef struct { float x [4][4] ; int color ; } myquaternion ;" @@ -196,7 +211,7 @@ def test_jit_unary(v): unary.ss.register_new("square", cdef, "FP32", "FP32") return if gb.ss.config["jit_c_control"] == "off": - return + pytest.skip("JIT not available (no C compiler configured)") with burble(): square = unary.ss.register_new("square", cdef, "FP32", "FP32") assert not hasattr(unary, "square") @@ -234,7 +249,7 @@ def test_jit_binary(v): binary.ss.register_new("absdiff", cdef, "FP64", "FP64", "FP64") return if gb.ss.config["jit_c_control"] == "off": - return + pytest.skip("JIT not available (no C compiler configured)") with burble(): absdiff = binary.ss.register_new( "absdiff", @@ -302,7 +317,7 @@ def test_jit_indexunary(v): indexunary.ss.register_new("diffy", cdef, "FP64", "FP64", "FP64") return if gb.ss.config["jit_c_control"] == "off": - return + pytest.skip("JIT not available (no C compiler configured)") with burble(): diffy = indexunary.ss.register_new("diffy", cdef, "FP64", "FP64", "FP64") assert not hasattr(indexunary, "diffy") @@ -356,6 +371,69 @@ def test_jit_indexunary(v): assert ("FP32", "FP64") not in diffy +@pytest.mark.skipif(not _has_idxbinop, reason="requires SuiteSparse:GraphBLAS 9.4+") +def test_jit_indexbinary(v): + cdef = ( + "void add_theta (double *z, double *x, GrB_Index ix, GrB_Index jx, " + "double *y, GrB_Index iy, GrB_Index jy, double *theta) " + "{ (*z) = (*x) + (*y) + (*theta) ; }" + ) + if gb.ss.config["jit_c_control"] == "off": + pytest.skip("JIT not available (no C compiler configured)") + with burble(): + add_theta = indexbinary.ss.register_new("add_theta", cdef, "FP64", "FP64", "FP64", "FP64") + assert not hasattr(indexbinary, "add_theta") + assert indexbinary.ss.add_theta is add_theta + assert add_theta.name == "ss.add_theta" + assert add_theta.types == {(dtypes.FP64, dtypes.FP64): dtypes.FP64} + assert "FP64" in add_theta + assert add_theta["FP64"].return_type == dtypes.FP64 + assert add_theta["FP64"].jit_c_definition == cdef + # Bind theta and use as BinaryOp + v64 = v.dup("FP64") + binop = add_theta["FP64"](10.0) + assert binop.opclass == "BinaryOp" + res = v64.ewise_mult(v64, binop).new() + # v has values at [1, 3, 4, 6] with vals [1, 1, 2, 0] + # ewise_mult: x+y+theta = 2*val + 10 + expected = Vector.from_coo([1, 3, 4, 6], [12.0, 12.0, 14.0, 10.0], dtype="FP64") + assert expected.isequal(res) + # Test duplicate registration fails + assert "FP32" not in add_theta + with burble(): + add_theta_fp32 = indexbinary.ss.register_new( + "add_theta", + cdef.replace("double", "float"), + "FP32", + "FP32", + "FP32", + "FP32", + ) + assert add_theta_fp32 is add_theta + assert "FP32" in add_theta + with pytest.raises( + TypeError, + match="IndexBinaryOp gb.indexbinary.ss.add_theta already defined for .FP64, FP64. input", + ): + indexbinary.ss.register_new("add_theta", cdef, "FP64", "FP64", "FP64", "FP64") + # Test nested names + indexbinary.ss.register_new("nested.add_theta", cdef, "FP64", "FP64", "FP64", "FP64") + with pytest.raises(AttributeError, match="nested is already defined"): + indexbinary.ss.register_new("nested", cdef, "FP64", "FP64", "FP64", "FP64") + # Test mixed types (x=FP64, y=FP64, theta=FP32) + mixed_cdef = ( + "void add_theta (double *z, double *x, GrB_Index ix, GrB_Index jx, " + "double *y, GrB_Index iy, GrB_Index jy, float *theta) " + "{ (*z) = (*x) + (*y) + (double)(*theta) ; }" + ) + add_theta_mixed = indexbinary.ss.register_new( + "add_theta", mixed_cdef, "FP64", "FP64", "FP32", "FP64" + ) + assert add_theta_mixed is add_theta + assert ("FP64", "FP32") in add_theta + assert ("FP32", "FP64") not in add_theta + + def test_jit_select(v): cdef = ( # Why does this one insist on `const` for `x` argument? @@ -367,7 +445,7 @@ def test_jit_select(v): select.ss.register_new("woot", cdef, "INT32", "INT32") return if gb.ss.config["jit_c_control"] == "off": - return + pytest.skip("JIT not available (no C compiler configured)") with burble(): woot = select.ss.register_new("woot", cdef, "INT32", "INT32") assert not hasattr(select, "woot") diff --git a/graphblas/tests/test_vector.py b/graphblas/tests/test_vector.py index db80cdf71..65460fa75 100644 --- a/graphblas/tests/test_vector.py +++ b/graphblas/tests/test_vector.py @@ -27,7 +27,6 @@ from graphblas import Matrix, Scalar, Vector # isort:skip (for dask-graphblas) - suitesparse = backend == "suitesparse" if suitesparse: ss_version_major = gb.core.ss.version_major @@ -2221,11 +2220,8 @@ def test_udt(): if suitesparse: vv = Vector.ss.deserialize(v.ss.serialize(), dtype=long_udt) assert v.isequal(vv, check_dtype=True) - if ss_version_major < 9: - with pytest.raises(SyntaxError): - # The size of the UDT name is limited - Vector.ss.deserialize(v.ss.serialize()) - else: + with pytest.raises(SyntaxError): + # The dtype name is too long to embed in the blob; dtype= must be provided Vector.ss.deserialize(v.ss.serialize()) # May be able to look up non-anonymous dtypes by name if their names are too long named_long_dtype = np.dtype([("x", np.bool_), ("y" * 1000, np.float64)], align=False) diff --git a/graphblas/viz.py b/graphblas/viz.py index b6d5f6ba7..8e2a53228 100644 --- a/graphblas/viz.py +++ b/graphblas/viz.py @@ -81,7 +81,7 @@ def spy(M, *, centered=False, show=True, figure=None, axes=None, figsize=None, * datashade """ - mpl, plt, ss = _get_imports(["mpl", "plt", "ss"], "spy") + mpl, plt, _ss = _get_imports(["mpl", "plt", "ss"], "spy") A = to_scipy_sparse(M, "coo") if show: plt.ion() @@ -132,7 +132,7 @@ def datashade(M, agg="count", *, width=None, height=None, opts_kwargs=None, **kw spy """ - np, pd, bk, hv, hp, ds = _get_imports(["np", "pd", "bk", "hv", "hp", "ds"], "datashade") + np, pd, bk, hv, _hp, _ds = _get_imports(["np", "pd", "bk", "hv", "hp", "ds"], "datashade") if "df" not in kwargs: rows, cols, vals = M.to_coo() max_int = np.iinfo(np.int64).max diff --git a/pyproject.toml b/pyproject.toml index 1bad95118..ef2d11b64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,15 @@ [build-system] build-backend = "setuptools.build_meta" -requires = ["setuptools >=64", "setuptools-git-versioning"] +requires = ["setuptools >=77", "setuptools-git-versioning"] [project] name = "python-graphblas" dynamic = ["version"] description = "Python library for GraphBLAS: high-performance sparse linear algebra for scalable graph analytics" readme = "README.md" -requires-python = ">=3.10" -license = { file = "LICENSE" } +requires-python = ">=3.11" +license = "Apache-2.0" +license-files = ["LICENSE"] authors = [ { name = "Erik Welch", email = "erik.n.welch@gmail.com" }, { name = "Jim Kitchen" }, @@ -35,16 +36,15 @@ keywords = [ ] classifiers = [ "Development Status :: 5 - Production/Stable", - "License :: OSI Approved :: Apache Software License", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", "Operating System :: Microsoft :: Windows", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: 3 :: Only", "Intended Audience :: Developers", "Intended Audience :: Other Audience", @@ -55,14 +55,14 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Python Modules", ] dependencies = [ - "numpy >=1.23", + "numpy >=1.24", "donfig >=0.6", "pyyaml >=5.4", - # These won't be installed by default after 2024.3.0 - # once pep-771 is supported: https://peps.python.org/pep-0771/ + # These should be optional once PEP 771 is accepted and supported: + # https://peps.python.org/pep-0771/ (still draft as of 2026-03) # Use e.g. "python-graphblas[suitesparse]" or "python-graphblas[default]" instead - "suitesparse-graphblas >=7.4.0.0, <10", - "numba >=0.55; python_version<'3.14'", # make optional where numba is not supported + "suitesparse-graphblas >=7.4.0.0", + "numba >=0.57; python_version<'3.15'", # make optional where numba is not supported ] [project.urls] @@ -72,9 +72,9 @@ repository = "https://github.com/python-graphblas/python-graphblas" changelog = "https://github.com/python-graphblas/python-graphblas/releases" [project.optional-dependencies] -suitesparse = ["suitesparse-graphblas >=7.4.0.0, <10"] +suitesparse = ["suitesparse-graphblas >=7.4.0.0"] networkx = ["networkx >=2.8"] -numba = ["numba >=0.55"] +numba = ["numba >=0.57"] pandas = ["pandas >=1.5"] scipy = ["scipy >=1.9"] suitesparse-udf = [ # udf requires numba @@ -83,10 +83,10 @@ suitesparse-udf = [ # udf requires numba repr = ["python-graphblas[pandas]"] io = [ "python-graphblas[networkx,scipy]", - "python-graphblas[numba]; python_version<'3.14'", + "python-graphblas[numba]; python_version<'3.15'", # make optional where numba is not supported "awkward >=2.0", - "sparse >=0.14; python_version<'3.13'", # make optional, b/c sparse needs numba - "fast-matrix-market >=1.4.5; python_version<'3.13'", # py3.13 not supported yet + "sparse >=0.14; python_version<'3.13'", # make optional, b/c sparse needs numba + # fast-matrix-market is no longer maintained; last supported Python is 3.12 ] viz = ["python-graphblas[networkx,scipy]", "matplotlib >=3.6"] datashade = [ # datashade requires numba @@ -102,11 +102,11 @@ test = [ ] default = [ "python-graphblas[suitesparse,pandas,scipy]", - "python-graphblas[numba]; python_version<'3.14'", # make optional where numba is not supported + "python-graphblas[numba]; python_version<'3.15'", # make optional where numba is not supported ] all = [ "python-graphblas[default,io,viz,test]", - "python-graphblas[datashade]; python_version<'3.14'", # make optional, b/c datashade needs numba + "python-graphblas[datashade]; python_version<'3.15'", # make optional, b/c datashade needs numba ] [tool.setuptools] @@ -122,6 +122,7 @@ packages = [ "graphblas.core.operator", "graphblas.core.ss", "graphblas.dtypes", + "graphblas.indexbinary", "graphblas.indexunary", "graphblas.io", "graphblas.monoid", @@ -140,7 +141,7 @@ dirty_template = "{tag}+{ccount}.g{sha}.dirty" [tool.black] line-length = 100 -target-version = ["py310", "py311", "py312", "py313"] +target-version = ["py311", "py312", "py313", "py314"] [tool.isort] sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"] @@ -154,7 +155,7 @@ line_length = 100 [tool.pytest.ini_options] minversion = "6.0" testpaths = "graphblas/tests" -xfail_strict = false # 2023-07-23: awkward and numpy 1.25 sometimes conflict +xfail_strict = false addopts = [ "--strict-config", # Force error if config is mispelled "--strict-markers", # Force error if marker is mispelled (must be defined in config) @@ -167,34 +168,19 @@ filterwarnings = [ # and: https://docs.pytest.org/en/7.2.x/how-to/capture-warnings.html#controlling-warnings "error", - # sparse 0.14.0 (2022-02-24) began raising this warning; it has been reported and fixed upstream. + # sparse 0.14.0 began raising this warning; fixed in newer versions but CI tests with 0.14/0.15 "ignore:coords should be an ndarray. This will raise a ValueError:DeprecationWarning:sparse._coo.core", - # setuptools v67.3.0 deprecated `pkg_resources.declare_namespace` on 13 Feb 2023. See: - # https://setuptools.pypa.io/en/latest/history.html#v67-3-0 - # MAINT: check if this is still necessary in 2025 - "ignore:Deprecated call to `pkg_resources.declare_namespace:DeprecationWarning:pkg_resources", - - # This deprecation warning was added in setuptools v67.5.0 (8 Mar 2023). See: - # https://setuptools.pypa.io/en/latest/history.html#v67-5-0 - "ignore:pkg_resources is deprecated as an API:DeprecationWarning:", - - # sre_parse deprecated in 3.11; this is triggered by awkward 0.10 - "ignore:module 'sre_parse' is deprecated:DeprecationWarning:", - "ignore:module 'sre_constants' is deprecated:DeprecationWarning:", - - # numpy 1.25.0 (2023-06-17) deprecated `np.find_common_type`; many other dependencies use it. - # See if we can remove this filter in 2025. - "ignore:np.find_common_type is deprecated:DeprecationWarning:", - # pypy gives this warning "ignore:can't resolve package from __spec__ or __package__:ImportWarning:", # Python 3.12 introduced this deprecation, which is triggered by pandas 2.1.1 + # MAINT: check if still needed once we drop pandas <2.2 support "ignore:datetime.datetime.utcfromtimestamp:DeprecationWarning:dateutil", - # Pandas 2.2 warns that pyarrow will become a required dependency in pandas 3.0 - "ignore:\\nPyarrow will become a required dependency of pandas:DeprecationWarning:", + # numpy 1.25+ deprecated np.find_common_type; triggered by older scipy/networkx/pandas + # MAINT: remove once we drop numpy <2 support (numpy 2.0 removed it entirely) + "ignore:np.find_common_type is deprecated:DeprecationWarning:", ] [tool.coverage.run] @@ -222,7 +208,7 @@ ignore-words-list = "coo,ba" [tool.ruff] # https://github.com/charliermarsh/ruff/ line-length = 100 -target-version = "py310" +target-version = "py311" [tool.ruff.format] exclude = ["*.ipynb"] # Consider enabling auto-formatting of notebooks @@ -313,6 +299,7 @@ ignore = [ "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar` (Note: no annotations yet) "RUF021", # parenthesize-chained-operators (Note: results don't look good yet) "RUF023", # unsorted-dunder-slots (Note: maybe fine, but noisy changes) + "RUF043", # Pattern passed to `match=` contains metacharacters but is neither escaped nor raw "PERF401", # Use a list comprehension to create a transformed list (Note: poorly implemented atm) # Intentionally ignored @@ -328,6 +315,7 @@ ignore = [ "N818", # Exception name ... should be named with an Error suffix (Note: good advice) "PERF203", # `try`-`except` within a loop incurs performance overhead (Note: too strict) "PLC0205", # Class `__slots__` should be a non-string iterable (Note: string is fine) + "PLC0415", # `import` should be at the top-level of a file "PLR0124", # Name compared with itself, consider replacing `x == x` (Note: too strict) "PLR0911", # Too many return statements "PLR0912", # Too many branches @@ -340,6 +328,7 @@ ignore = [ "RET502", # Do not implicitly `return None` in function able to return non-`None` value "RET503", # Missing explicit `return` at the end of function able to return non-`None` value "RET504", # Unnecessary variable assignment before `return` statement + "RUF028", # This suppression comment is invalid because... (Note: `fmt` is for black, not ruff) "S110", # `try`-`except`-`pass` detected, consider logging the exception (Note: good advice, but we don't log) "S112", # `try`-`except`-`continue` detected, consider logging the exception (Note: good advice, but we don't log) "S603", # `subprocess` call: check for execution of untrusted input (Note: not important for us) @@ -348,7 +337,6 @@ ignore = [ "SIM105", # Use contextlib.suppress(...) instead of try-except-pass (Note: try-except-pass is much faster) "SIM108", # Use ternary operator ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) "TRY003", # Avoid specifying long messages outside the exception class (Note: why?) - "UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)` (Note: using `|` is slower atm) # Ignored categories "C90", # mccabe (Too strict, but maybe we should make things less complex) @@ -374,6 +362,7 @@ ignore = [ "graphblas/core/operator/__init__.py" = ["A005"] "graphblas/io/__init__.py" = ["A005"] # shadows a standard-library module "graphblas/core/operator/base.py" = ["S102"] # exec is used for UDF +"graphblas/monoid/numpy.py" = ["PLW0108"] # lambda is needed for numba.njit "graphblas/core/ss/matrix.py" = [ "NPY002", # numba doesn't support rng generator yet "PLR1730", @@ -382,6 +371,9 @@ ignore = [ "NPY002", # numba doesn't support rng generator yet ] "graphblas/core/utils.py" = ["PLE0302"] # `__set__` is used as a property +"scripts/ci_pick_versions.py" = [ + "S311", +] # random.choice is intentional for CI version selection "graphblas/ss/_core.py" = ["N999"] # We want _core.py to be underscopre # Allow useless expressions, assert, pickle, RNG, print, no docstring, and yoda in tests "graphblas/tests/*py" = [ @@ -398,7 +390,7 @@ ignore = [ "graphblas/**/__init__.py" = [ "F401", # Allow unused imports (w/o defining `__all__`) ] -"scripts/*.py" = ["INP001"] # Not a package +"scripts/*.py" = ["INP001", "T201"] # Not a package; allow print "scripts/create_pickle.py" = ["F403", "F405"] # Allow `from foo import *` "docs/*.py" = ["INP001"] # Not a package @@ -411,7 +403,7 @@ builtins-allowed-modules = ["select"] fixture-parentheses = false mark-parentheses = false -[tool.lint.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "numpy" [tool.bandit] @@ -423,7 +415,7 @@ skips = [ [tool.pylint.messages_control] # To run a single check, do: pylint graphblas --disable E,W,R,C,I --enable assignment-from-no-return max-line-length = 100 -py-version = "3.10" +py-version = "3.11" enable = ["I"] disable = [ # Error diff --git a/scripts/check_versions.sh b/scripts/check_versions.sh index 5aa88e045..afcf3e0ac 100755 --- a/scripts/check_versions.sh +++ b/scripts/check_versions.sh @@ -3,15 +3,19 @@ # Use, adjust, copy/paste, etc. as necessary to answer your questions. # This may be helpful when updating dependency versions in CI. # Tip: add `--json` for more information. -conda search 'flake8-bugbear[channel=conda-forge]>=24.12.12' -conda search 'flake8-simplify[channel=conda-forge]>=0.21.0' -conda search 'numpy[channel=conda-forge]>=2.2.3' -conda search 'pandas[channel=conda-forge]>=2.2.3' -conda search 'scipy[channel=conda-forge]>=1.15.2' -conda search 'networkx[channel=conda-forge]>=3.4.2' -conda search 'awkward[channel=conda-forge]>=2.7.4' -conda search 'sparse[channel=conda-forge]>=0.15.5' -conda search 'fast_matrix_market[channel=conda-forge]>=1.7.6' -conda search 'numba[channel=conda-forge]>=0.61.0' -conda search 'pyyaml[channel=conda-forge]>=6.0.2' -# conda search 'python[channel=conda-forge]>=3.10 *pypy*' +# +# When updating versions throughout the repo (CI, pyproject.toml, pre-commit, etc.), +# also update these version numbers to match the latest versions we currently test. +conda search 'flake8-bugbear[channel=conda-forge]>=25.11.29' +conda search 'flake8-simplify[channel=conda-forge]>=0.30.0' +conda search 'numpy[channel=conda-forge]>=2.4' +conda search 'pandas[channel=conda-forge]>=3.0' +conda search 'scipy[channel=conda-forge]>=1.17' +conda search 'networkx[channel=conda-forge]>=3.6' +conda search 'awkward[channel=conda-forge]>=2.9' +conda search 'sparse[channel=conda-forge]>=0.15' +# fast_matrix_market is deprecated (no longer maintained; last supported Python is 3.12) +conda search 'numba[channel=conda-forge]>=0.64' +conda search 'pyyaml[channel=conda-forge]>=6.0' +conda search 'python-suitesparse-graphblas[channel=conda-forge]>=10.3.1' +# conda search 'python[channel=conda-forge]>=3.11 *pypy*' diff --git a/scripts/ci_pick_versions.py b/scripts/ci_pick_versions.py new file mode 100755 index 000000000..349791396 --- /dev/null +++ b/scripts/ci_pick_versions.py @@ -0,0 +1,532 @@ +#!/usr/bin/env python +"""Pick random dependency versions for CI testing. + +Randomly selects compatible dependency versions for python-graphblas CI. +Replaces the bash-based version selection in test_and_build.yml. + +Usage (in GitHub Actions workflow): + eval "$(python scripts/ci_pick_versions.py --python 3.12 --source conda-forge)" + +Output: bash-eval-safe key=value lines using the same variable names as the workflow: + npver='=2.2' # conda pin + spver='=1.15' + npver='' # empty = latest (no pin) + sparsever='NA' # NA = skip this package +""" + +import argparse +import random +import sys + +# --------------------------------------------------------------------------- +# Version pools: which versions we want to test per package. +# "" means "latest" (no pin). "NA" means "don't install". +# +# When updating versions here, also update scripts/check_versions.sh +# --------------------------------------------------------------------------- + +NUMPY_VERSIONS = { + "3.11": ["1.24", "1.25", "1.26", "2.0", "2.1", "2.2", "2.3", "2.4", ""], + "3.12": ["1.26", "2.0", "2.1", "2.2", "2.3", "2.4", ""], + "3.13": ["2.1", "2.2", "2.3", "2.4", ""], + "3.14": ["2.3", "2.4", ""], +} + +# Deps that depend on numpy version (1.x vs 2.x path). +# Per-Python sublists narrow to versions with available conda builds. +SCIPY_VERSIONS = { + "1.x": { + "3.11": ["1.9", "1.10", "1.11", "1.12", "1.13", "1.14", ""], + "3.12": ["1.11", "1.12", "1.13", "1.14", ""], + }, + "2.x": ["1.13", "1.14", "1.15", "1.16", "1.17", ""], +} + +PANDAS_VERSIONS = { + "1.x": { + "3.11": ["1.5", "2.0", "2.1", "2.2", "2.3", ""], + "3.12": ["2.1", "2.2", "2.3", ""], + }, + "2.x": ["2.2", "2.3", "3.0", ""], +} + +AWKWARD_VERSIONS = { + "1.x": { + "3.11": ["2.0", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8", "2.9", ""], + "3.12": ["2.4", "2.5", "2.6", "2.7", "2.8", "2.9", ""], + }, + "2.x": ["2.6", "2.7", "2.8", "2.9", ""], +} + +NUMBA_VERSIONS = { + "1.x": ["0.57", "0.58", "0.59", "0.60", "0.61", ""], + "2.x": ["0.62", "0.63", "0.64", ""], +} + +# Deps that only depend on Python version (not numpy) +NETWORKX_VERSIONS = { + "3.11": ["2.8", "3.0", "3.1", "3.2", "3.3", "3.4", "3.5", "3.6", ""], + "3.12": ["3.2", "3.3", "3.4", "3.5", "3.6", ""], + "3.13": ["3.4", "3.5", "3.6", ""], + "3.14": ["3.6", ""], +} + +PYYAML_VERSIONS = { + "3.11": ["5.4", "6.0", ""], + "3.12": ["6.0", ""], + "3.13": ["6.0", ""], + "3.14": ["6.0", ""], +} + +SPARSE_VERSIONS = { + "3.11": ["0.14", "0.15", ""], + "3.12": ["0.14", "0.15", ""], + "3.13": "NA", + "3.14": "NA", +} + +# PSG versions by numpy branch and source type. +# conda-forge uses "=" prefix, wheel/source use "==". +PSG_VERSIONS_NP1 = { + "conda-forge": { + "3.11": [ + "7.4.0", + "7.4.1", + "7.4.2", + "7.4.3.0", + "7.4.3.1", + "7.4.3.2", + "8.0.2.1", + "8.2.0.1", + "8.2.1.0", + ], + "3.12": ["8.2.0.1", "8.2.1.0"], + }, + "wheel": { + "3.11": ["7.4.3.2", "8.0.2.1", "8.2.0.1", "8.2.1.0"], + "3.12": ["8.2.0.1", "8.2.1.0"], + }, + "source": { + "3.11": [ + "7.4.0.0", + "7.4.1.0", + "7.4.2.0", + "7.4.3.0", + "7.4.3.1", + "7.4.3.2", + "8.0.2.1", + "8.2.0.1", + "8.2.1.0", + ], + "3.12": ["8.2.0.1", "8.2.1.0"], + }, +} + +PSG_VERSIONS_NP2 = { + "no_py314": ["9.3.1.0", "9.4.5.0", "10.0.1.1", "10.1.1.0", "10.3.1.0"], + "py314_only": ["10.0.1.1", "10.1.1.0", "10.3.1.0"], +} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _ver(s): + """Parse version string to tuple for comparison. "" means latest (very large).""" + if s in ("", "NA"): + return (9999,) + return tuple(int(x) for x in s.split(".")) + + +# --------------------------------------------------------------------------- +# Constraints +# --------------------------------------------------------------------------- + + +def apply_constraints(v, pyver, scipy_pool, numba_pool): + """Mutate version dict to satisfy all known compatibility constraints. + + Each constraint comment documents the real-world requirement it encodes. + Order matters: numpy/scipy constraints first, then pandas (which may bump scipy/numba), + then Python-version constraints, then numba/numpy constraints. + """ + # --- scipy / numpy constraints --- + + # scipy >=1.15 requires numpy >=1.26.4 + if v["numpy"] in ("1.24", "1.25") and _ver(v["scipy"]) >= (1, 15): + candidates = [s for s in scipy_pool if s and _ver(s) < (1, 15)] + v["scipy"] = random.choice(candidates) if candidates else "1.14" + + # scipy <1.13 doesn't support numpy 2.x (safety net) + np_is_1x = v["numpy"].startswith("1.") if v["numpy"] else False + if not np_is_1x and v["scipy"] not in ("", "NA") and _ver(v["scipy"]) < (1, 13): + v["scipy"] = random.choice([s for s in scipy_pool if _ver(s) >= (1, 13)]) + + # scipy <1.15.1 requires numpy <2.3; scipy <1.16 requires numpy <2.5 + if _ver(v["numpy"]) >= (2, 3): + if v["scipy"] in ("1.13", "1.14"): + v["scipy"] = random.choice(["1.16", "1.17", ""]) + elif v["scipy"] == "1.15": + v["scipy"] = random.choice(["1.15", "1.16", "1.17", ""]) + + # numpy 1.26 + scipy 1.9 conflict + if v["numpy"] == "1.26" and v["scipy"] == "1.9": + v["scipy"] = random.choice(["1.10", "1.11", ""]) + + # --- scipy / Python version availability --- + + # scipy <1.14 has no py3.13 builds; scipy <1.16 has no py3.14 builds + if pyver == "3.14" and v["scipy"] not in ("", "NA") and _ver(v["scipy"]) < (1, 16): + v["scipy"] = random.choice(["1.16", "1.17", ""]) + elif pyver == "3.13" and v["scipy"] == "1.13": + v["scipy"] = random.choice(["1.14", "1.15", "1.16", "1.17", ""]) + + # --- pandas constraints --- + + # pandas <2.3 has no py3.14 builds + if pyver == "3.14" and v["pandas"] == "2.2": + v["pandas"] = random.choice(["2.3", "3.0", ""]) + + # pandas 3.0 requires numba >=0.60 and scipy >=1.14.1 + if v["pandas"] == "3.0": + if v["numba"] not in ("", "NA") and _ver(v["numba"]) < (0, 60): + v["numba"] = "0.60" + if v["scipy"] not in ("", "NA") and _ver(v["scipy"]) < (1, 15): + v["scipy"] = random.choice(["1.15", "1.16", "1.17", ""]) + + # --- awkward / Python version availability --- + + # awkward <2.7 has no py3.13 builds; awkward <2.8 has no py3.14 builds + if pyver == "3.14" and v["awkward"] not in ("", "NA") and _ver(v["awkward"]) < (2, 8): + v["awkward"] = random.choice(["2.8", "2.9", ""]) + elif pyver == "3.13" and v["awkward"] == "2.6": + v["awkward"] = random.choice(["2.7", "2.8", "2.9", ""]) + + # --- numba constraints --- + + # numba minimum by Python version: 0.59 for 3.12, 0.61 for 3.13, 0.63 for 3.14 + numba_min = {"3.11": (0, 57), "3.12": (0, 59), "3.13": (0, 61), "3.14": (0, 63)} + if v["numba"] not in ("", "NA"): + min_ver = numba_min[pyver] + if _ver(v["numba"]) < min_ver: + pool = [n for n in numba_pool if _ver(n) >= min_ver] + v["numba"] = random.choice(pool) if pool else "" + + # numba <0.64 requires numpy <2.4 + if _ver(v["numpy"]) >= (2, 4) and v["numba"] in ("0.62", "0.63"): + v["numba"] = "0.64" + + # numba <0.62 doesn't support numpy 2.x + if not np_is_1x and v["numba"] not in ("", "NA") and _ver(v["numba"]) < (0, 62): + v["numba"] = "NA" + + # --- sparse --- + + # sparse doesn't support Python 3.13+ + if pyver in ("3.13", "3.14"): + v["sparse"] = "NA" + + +# --------------------------------------------------------------------------- +# Main logic +# --------------------------------------------------------------------------- + + +def pick_versions(pyver, source_type): + """Pick random compatible dependency versions. + + Parameters + ---------- + pyver : str + Python version like "3.12" + source_type : str + One of "conda-forge", "wheel", "source", "upstream" + + Returns + ------- + dict + Package name -> version string ("" = latest, "NA" = skip) + """ + # Step 1: Pick numpy + numpy_pool = NUMPY_VERSIONS[pyver] + npver = random.choice(numpy_pool) + + # Upstream needs numpy 2 + if source_type == "upstream" and npver.startswith("1."): + npver = random.choice([v for v in numpy_pool if not v.startswith("1.")] or [""]) + + np_is_1x = npver.startswith("1.") if npver else False + + # Step 2: Pick numpy-dependent deps + if np_is_1x: + scipy_pool = SCIPY_VERSIONS["1.x"].get(pyver, SCIPY_VERSIONS["1.x"]["3.11"]) + pandas_pool = PANDAS_VERSIONS["1.x"].get(pyver, PANDAS_VERSIONS["1.x"]["3.11"]) + awkward_pool = AWKWARD_VERSIONS["1.x"].get(pyver, AWKWARD_VERSIONS["1.x"]["3.11"]) + numba_pool = NUMBA_VERSIONS["1.x"] + else: + scipy_pool = SCIPY_VERSIONS["2.x"] + pandas_pool = PANDAS_VERSIONS["2.x"] + awkward_pool = AWKWARD_VERSIONS["2.x"] + numba_pool = NUMBA_VERSIONS["2.x"] + + v = { + "numpy": npver, + "scipy": random.choice(scipy_pool), + "pandas": random.choice(pandas_pool), + "awkward": random.choice(awkward_pool), + "numba": random.choice(numba_pool), + "networkx": random.choice(NETWORKX_VERSIONS[pyver]), + "pyyaml": random.choice(PYYAML_VERSIONS[pyver]), + "sparse": _pick_scalar_or_list(SPARSE_VERSIONS[pyver]), + } + + # Source builds have issues with some numpy/scipy/pandas versions; + # blank them before constraints so numba/etc constraints see the right numpy. + if source_type == "source": + v["numpy"] = "" + v["scipy"] = "" + v["pandas"] = "" + + # Step 3: Apply compatibility constraints + apply_constraints(v, pyver, scipy_pool, numba_pool) + + # Step 4: Pick psg version + v["psg"] = _pick_psg(v["numpy"], pyver, source_type) + + return v + + +def _pick_scalar_or_list(pool): + """Handle pools that are either "NA" (string) or a list of choices.""" + return pool if isinstance(pool, str) else random.choice(pool) + + +def _pick_psg(npver, pyver, source_type): + """Pick python-suitesparse-graphblas version.""" + if source_type == "upstream": + return "" + + np_is_1x = npver.startswith("1.") if npver else False + eq = "=" if source_type == "conda-forge" else "==" + + if np_is_1x: + pool = PSG_VERSIONS_NP1.get(source_type, {}).get(pyver, []) + if not pool: + return "" + return f"{eq}{random.choice(pool)}" + pool = PSG_VERSIONS_NP2["py314_only"] if pyver == "3.14" else PSG_VERSIONS_NP2["no_py314"] + return random.choice([f"{eq}{ver}" for ver in pool] + [""]) + + +# --------------------------------------------------------------------------- +# Output formatting +# --------------------------------------------------------------------------- + +# Map internal names to the short variable names used in the workflow +_VAR_NAMES = { + "numpy": "npver", + "scipy": "spver", + "pandas": "pdver", + "awkward": "akver", + "networkx": "nxver", + "pyyaml": "yamlver", + "sparse": "sparsever", + "numba": "numbaver", + "psg": "psgver", +} + +_SUMMARY_NAMES = { + "numpy": "np", + "scipy": "sp", + "pandas": "pd", + "awkward": "ak", + "networkx": "nx", + "pyyaml": "yaml", + "sparse": "sparse", + "numba": "numba", + "psg": "psg", +} + + +def format_output(v): + """Format version dict as bash-eval-safe key=value lines. + + Values use '=X.Y' prefix for conda install (e.g., npver='=2.2'). + Empty means latest, NA means skip. + psg already has its prefix baked in. + """ + lines = [] + for key in ("numpy", "scipy", "pandas", "awkward", "networkx", "pyyaml", "sparse", "numba"): + var = _VAR_NAMES[key] + val = v[key] + if val in ("NA", ""): + lines.append(f"{var}='{val}'") + else: + lines.append(f"{var}='={val}'") + + # psg already has = or == prefix + lines.append(f"psgver='{v['psg']}'") + return "\n".join(lines) + + +def format_summary(v): + """One-line summary for CI log.""" + parts = [] + for key in ( + "numpy", + "scipy", + "pandas", + "awkward", + "networkx", + "numba", + "pyyaml", + "sparse", + "psg", + ): + name = _SUMMARY_NAMES[key] + val = v[key] + if val == "NA": + parts.append(f"{name}=NA") + elif val == "": + parts.append(f"{name}=latest") + else: + parts.append(f"{name}={val}") + return "versions: " + " ".join(parts) + + +# --------------------------------------------------------------------------- +# Validation (for testing the script itself) +# --------------------------------------------------------------------------- + + +def validate(v, pyver): + """Check that a version combination satisfies all constraints. Returns list of errors.""" + errors = [] + np_is_1x = v["numpy"].startswith("1.") if v["numpy"] else False + + # scipy >=1.15 requires numpy >=1.26.4 + if v["numpy"] in ("1.24", "1.25") and _ver(v["scipy"]) >= (1, 15): + errors.append(f"scipy {v['scipy']} requires numpy >=1.26.4, got {v['numpy']}") + + # scipy <1.13 requires numpy 1.x + if not np_is_1x and v["scipy"] not in ("", "NA") and _ver(v["scipy"]) < (1, 13): + errors.append(f"scipy {v['scipy']} doesn't support numpy 2.x") + + # scipy <1.15.1 requires numpy <2.3 + if _ver(v["numpy"]) >= (2, 3) and v["scipy"] in ("1.13", "1.14"): + errors.append(f"scipy {v['scipy']} requires numpy <2.3, got {v['numpy']}") + + # numpy 1.26 + scipy 1.9 + if v["numpy"] == "1.26" and v["scipy"] == "1.9": + errors.append("numpy 1.26 + scipy 1.9 conflict") + + # scipy Python availability + if pyver == "3.14" and v["scipy"] not in ("", "NA") and _ver(v["scipy"]) < (1, 16): + errors.append(f"scipy {v['scipy']} has no py3.14 build") + if pyver == "3.13" and v["scipy"] == "1.13": + errors.append("scipy 1.13 has no py3.13 build") + + # pandas Python availability + if pyver == "3.14" and v["pandas"] == "2.2": + errors.append("pandas 2.2 has no py3.14 build") + + # pandas 3.0 requirements + if v["pandas"] == "3.0": + if v["numba"] not in ("", "NA") and _ver(v["numba"]) < (0, 60): + errors.append(f"pandas 3.0 requires numba >=0.60, got {v['numba']}") + if v["scipy"] not in ("", "NA") and _ver(v["scipy"]) < (1, 15): + errors.append(f"pandas 3.0 requires scipy >=1.14.1, got {v['scipy']}") + + # awkward Python availability + if pyver == "3.14" and v["awkward"] not in ("", "NA") and _ver(v["awkward"]) < (2, 8): + errors.append(f"awkward {v['awkward']} has no py3.14 build") + if pyver == "3.13" and v["awkward"] == "2.6": + errors.append("awkward 2.6 has no py3.13 build") + + # numba Python minimums + numba_min = {"3.11": (0, 57), "3.12": (0, 59), "3.13": (0, 61), "3.14": (0, 63)} + if v["numba"] not in ("", "NA") and _ver(v["numba"]) < numba_min[pyver]: + errors.append(f"numba {v['numba']} doesn't support Python {pyver}") + + # numba <0.64 requires numpy <2.4 + if v["numba"] in ("0.62", "0.63") and _ver(v["numpy"]) >= (2, 4): + errors.append(f"numba {v['numba']} requires numpy <2.4, got {v['numpy']}") + + # numba <0.62 requires numpy 1.x + if not np_is_1x and v["numba"] not in ("", "NA") and _ver(v["numba"]) < (0, 62): + errors.append(f"numba {v['numba']} doesn't support numpy 2.x") + + # sparse Python availability + if pyver in ("3.13", "3.14") and v["sparse"] != "NA": + errors.append(f"sparse doesn't support Python {pyver}") + + return errors + + +def stress_test(n=10000): + """Run n random picks for each Python/source combo and validate all.""" + total = 0 + failures = 0 + for pyver in NUMPY_VERSIONS: + for source in ("conda-forge", "wheel", "source", "upstream"): + for _ in range(n): + v = pick_versions(pyver, source) + errs = validate(v, pyver) + total += 1 + if errs: + failures += 1 + print(f"FAIL py{pyver} {source}: {v}", file=sys.stderr) + for e in errs: + print(f" - {e}", file=sys.stderr) + print(f"Stress test: {total} combos, {failures} failures", file=sys.stderr) + return failures + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main(): + parser = argparse.ArgumentParser(description="Pick random CI dependency versions") + parser.add_argument("--python", help="Python version (e.g. 3.12)") + parser.add_argument( + "--source", + choices=["conda-forge", "wheel", "source", "upstream"], + help="Package source type", + ) + parser.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility") + parser.add_argument( + "--validate", action="store_true", help="Run stress test to validate all constraints" + ) + args = parser.parse_args() + + if args.seed is not None: + random.seed(args.seed) + + if args.validate: + failures = stress_test() + sys.exit(1 if failures else 0) + + if not args.python or not args.source: + parser.error("--python and --source are required (unless --validate)") + + pyver = args.python + if pyver not in NUMPY_VERSIONS: + print(f"Error: unsupported Python version {pyver}", file=sys.stderr) + print(f"Supported: {', '.join(NUMPY_VERSIONS.keys())}", file=sys.stderr) + sys.exit(1) + + v = pick_versions(pyver, args.source) + + # Print summary to stderr (visible in CI logs) + print(format_summary(v), file=sys.stderr) + + # Print bash-eval-safe key=value to stdout + print(format_output(v)) + + +if __name__ == "__main__": + main() diff --git a/scripts/create_pickle.py b/scripts/create_pickle.py index 10fe58630..f1baac753 100755 --- a/scripts/create_pickle.py +++ b/scripts/create_pickle.py @@ -4,6 +4,7 @@ Note that the exact binary of the pickle files may differ depending on which Python version is used to create them. """ + import argparse import pickle from pathlib import Path diff --git a/scripts/jit_diagnostics.py b/scripts/jit_diagnostics.py new file mode 100755 index 000000000..b1166171f --- /dev/null +++ b/scripts/jit_diagnostics.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +"""Print JIT diagnostic information for debugging compiler configuration. + +Run with: python scripts/jit_diagnostics.py +Or during CI: python -m scripts.jit_diagnostics (from repo root) + +This prints the GraphBLAS JIT configuration, available compilers, sysconfig +values, and attempts a JIT compilation to verify everything works. +""" + +import os +import pathlib +import shutil +import sys +import sysconfig + + +def main(): + print("=" * 60) + print("JIT Diagnostics") + print("=" * 60) + + print("\n--- Platform ---") + print(f"sys.platform: {sys.platform}") + print(f"Python: {sys.version}") + + print("\n--- Environment ---") + conda = os.environ.get("CONDA_PREFIX", "") + print(f"CONDA_PREFIX: {conda or 'NOT SET'}") + print(f"GITHUB_ACTIONS: {os.environ.get('GITHUB_ACTIONS', 'NOT SET')}") + + print("\n--- sysconfig ---") + for key in ["CC", "CXX", "CFLAGS", "LDFLAGS", "LIBS"]: + val = sysconfig.get_config_var(key) + if val and len(str(val)) > 100: + val = str(val)[:100] + "..." + print(f" {key}: {val}") + print(f" include: {sysconfig.get_path('include')}") + + print("\n--- Compiler search ---") + candidates = ["cc", "gcc", "clang"] + if sys.platform == "linux": + candidates.append("x86_64-conda-linux-gnu-cc") + elif sys.platform == "darwin": + candidates.extend( + [ + "x86_64-apple-darwin13.4.0-clang", + "arm64-apple-darwin20.0.0-clang", + ] + ) + for name in candidates: + which = shutil.which(name) + conda_path = pathlib.Path(conda) / "bin" / name if conda else None + conda_exists = conda_path.exists() if conda_path else False + print(f" {name}: which={which}, conda={'yes' if conda_exists else 'no'}") + + # Import graphblas + try: + import graphblas as gb + + gb.init("suitesparse") + except Exception as e: + print(f"\nERROR: Could not initialize graphblas: {e}") + return 1 + + from graphblas.core.ss import _IS_SSGB7 + + if _IS_SSGB7: + print("\nSuiteSparse:GraphBLAS 7.x — JIT not available") + return 0 + + print("\n--- GraphBLAS JIT defaults (from compiled C library) ---") + for key in [ + "jit_c_control", + "jit_c_compiler_name", + "jit_c_compiler_flags", + "jit_c_linker_flags", + "jit_c_libraries", + "jit_c_cmake_libs", + "jit_cache_path", + ]: + val = gb.ss.config[key] + if isinstance(val, str) and len(val) > 120: + val = val[:120] + "..." + print(f" {key}: {val}") + + # Check if default compiler exists + jit_cc = gb.ss.config["jit_c_compiler_name"] + print("\n--- Compiler path analysis ---") + print(f" Default compiler: {jit_cc}") + print(f" Exists? {pathlib.Path(jit_cc).exists()}") + cc_basename = pathlib.Path(jit_cc).name + if conda: + local_cc = pathlib.Path(conda) / "bin" / cc_basename + print(f" Conda equivalent: {local_cc}") + print(f" Exists? {local_cc.exists()}") + fallback_cc = pathlib.Path(conda) / "bin" / "cc" + print(f" Fallback (cc): {fallback_cc}") + print(f" Exists? {fallback_cc.exists()}") + + # Check for problematic flags + flags = gb.ss.config["jit_c_compiler_flags"] + import re + + if isysroot := re.search(r"-isysroot\s+(\S+)", flags): + path = isysroot.group(1) + print(f"\n -isysroot: {path}") + print(f" Exists? {pathlib.Path(path).exists()}") + fdebug = re.findall(r"-fdebug-prefix-map=(\S+)", flags) + for d in fdebug: + print(f" -fdebug-prefix-map: {d}") + + # Try the fix + print("\n--- Attempting _fix_jit_config ---") + from graphblas.tests.test_ssjit import _fix_jit_config + + result = _fix_jit_config() + # True=JIT working, False=probe failed, None=no conda env + desc = "working" if result is True else "probe failed" if result is False else "no conda" + print(f" Result: {result} ({desc})") + if result is True: + print(f" Compiler: {gb.ss.config['jit_c_compiler_name']}") + flags_after = gb.ss.config["jit_c_compiler_flags"] + if len(flags_after) > 120: + flags_after = flags_after[:120] + "..." + print(f" Flags: {flags_after}") + + # Try JIT compilation + print("\n--- JIT compilation test ---") + if gb.ss.config["jit_c_control"] == "off": + print(" SKIPPED (JIT is off)") + else: + try: + from graphblas import dtypes + + prev_burble = gb.ss.config["burble"] + gb.ss.config["burble"] = True + dtype = dtypes.ss.register_new( + "jit_diag_test", + "typedef struct { int val ; } jit_diag_test ;", + ) + gb.ss.config["burble"] = prev_burble + print(f" SUCCESS: registered type '{dtype.name}'") + except Exception as e: + print(f" FAILED: {type(e).__name__}: {e}") + try: + err_log = gb.ss.config["jit_error_log"] + if err_log: + print(f" JIT error log: {err_log[:500]}") + except Exception: + pass + + # Print final JIT state + print("\n--- Final JIT state ---") + print(f" jit_c_control: {gb.ss.config['jit_c_control']}") + print(f" jit_c_compiler_name: {gb.ss.config['jit_c_compiler_name']}") + + print(f"\n{'=' * 60}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())