diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 2fe73ca77..16d5f11bc 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,11 +1,20 @@ version: 2 +enable-beta-ecosystems: true updates: -- package-ecosystem: "github-actions" +- package-ecosystem: github-actions directory: "/" schedule: - interval: "weekly" + interval: weekly -- package-ecosystem: "gitsubmodule" +- package-ecosystem: gitsubmodule directory: "/" schedule: - interval: "weekly" + interval: weekly + +- package-ecosystem: pre-commit + directory: "/" + schedule: + interval: monthly + groups: + pre-commit: + patterns: ["*"] diff --git a/.github/workflows/alpine-test.yml b/.github/workflows/alpine-test.yml index 2c1eed391..5c999e487 100644 --- a/.github/workflows/alpine-test.yml +++ b/.github/workflows/alpine-test.yml @@ -2,8 +2,11 @@ name: test-alpine on: [push, pull_request, workflow_dispatch] +permissions: + contents: read + jobs: - build: + test: runs-on: ubuntu-latest container: @@ -16,14 +19,14 @@ jobs: steps: - name: Prepare Alpine Linux run: | - apk add sudo git git-daemon python3 py3-pip + apk add sudo git git-daemon python3 py3-pip py3-virtualenv echo 'Defaults env_keep += "CI GITHUB_* RUNNER_*"' >/etc/sudoers.d/ci_env addgroup -g 127 docker - adduser -D -u 1001 runner + adduser -D -u 1001 runner # TODO: Check if this still works on GHA as intended. adduser runner docker shell: sh -exo pipefail {0} # Run this as root, not the "runner" user. - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 @@ -44,23 +47,44 @@ jobs: # and cause subsequent tests to fail cat test/fixtures/.gitconfig >> ~/.gitconfig - - name: Set up virtualenv + - name: Set up virtual environment run: | python -m venv .venv - . .venv/bin/activate - printf '%s=%s\n' 'PATH' "$PATH" 'VIRTUAL_ENV' "$VIRTUAL_ENV" >>"$GITHUB_ENV" - name: Update PyPA packages run: | - # Get the latest pip, wheel, and prior to Python 3.12, setuptools. - python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + . .venv/bin/activate + python -m pip install -U pip 'setuptools; python_version<"3.12"' wheel - name: Install project and test dependencies run: | - pip install ".[test]" + . .venv/bin/activate + pip install '.[test]' + + - name: Show POSIX file ownership + run: | + ls -ld -- \ + "$(pwd)" \ + "$(pwd)/.git" \ + "$(pwd)/git/ext/gitdb" \ + "$(pwd)/git/ext/gitdb/.git" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap/.git" \ + "${HOME:?HOME is not set}/.gitconfig" \ + 2>&1 || true + + - name: Show safe.directory entries + # `actions/checkout`'s safe.directory add is only durable for the + # checkout itself (it writes under a throwaway HOME override and + # then discards it), so by the time this step runs the runner + # user's `~/.gitconfig` has no entries -- and the Alpine container + # chowns the workspace to runner:docker to match the test user, so + # git accepts the ownership without one. Expected: `(none)`. + run: git config --global --get-all safe.directory || echo "(none)" - name: Show version and platform information run: | + . .venv/bin/activate uname -a command -v git python git version @@ -69,4 +93,5 @@ jobs: - name: Test with pytest run: | + . .venv/bin/activate pytest --color=yes -p no:sugar --instafail -vv diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index ae5241898..e243416a8 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -19,38 +19,48 @@ on: jobs: analyze: - name: Analyze + name: Analyze (${{ matrix.language }}) # Runner size impacts CodeQL analysis time. To learn more, please see: # - https://gh.io/recommended-hardware-resources-for-running-codeql # - https://gh.io/supported-runners-and-hardware-resources - # - https://gh.io/using-larger-runners - # Consider using larger runners for possible analysis time improvements. + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} - timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} permissions: - actions: read - contents: read security-events: write strategy: fail-fast: false matrix: - language: [ 'python' ] - # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] - # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both - # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support - + include: + - language: actions + build-mode: none + - language: python + build-mode: none + # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 + + # Add any setup steps before running the `github/codeql-action/init` action. + # This includes steps like installing compilers or runtimes (`actions/setup-node` + # or others). This is typically only required for manual builds. + # - name: Setup runtime (example) + # uses: actions/setup-example@v1 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} - setup-python-dependencies: false + build-mode: ${{ matrix.build-mode }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. @@ -58,23 +68,23 @@ jobs: # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs # queries: security-extended,security-and-quality - - # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v3 - + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. # â„šī¸ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@v4 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/cygwin-test.yml b/.github/workflows/cygwin-test.yml index bde4ea659..17ba4bc82 100644 --- a/.github/workflows/cygwin-test.yml +++ b/.github/workflows/cygwin-test.yml @@ -2,35 +2,47 @@ name: test-cygwin on: [push, pull_request, workflow_dispatch] -jobs: - build: - runs-on: windows-latest +permissions: + contents: read +jobs: + test: strategy: + matrix: + selection: [fast, perf] + include: + - selection: fast + additional-pytest-args: --ignore=test/performance + - selection: perf + additional-pytest-args: test/performance + fail-fast: false + runs-on: windows-latest + env: CHERE_INVOKING: "1" CYGWIN_NOWINPATH: "1" defaults: run: - shell: C:\tools\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr "{0}" + shell: C:\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr "{0}" steps: - name: Force LF line endings run: | git config --global core.autocrlf false # Affects the non-Cygwin git. - shell: bash # Use Git Bash instead of Cygwin Bash for this step. + shell: pwsh # Do this outside Cygwin, to affect actions/checkout. - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 - - name: Set up Cygwin - uses: egor-tensin/setup-cygwin@v4 + - name: Install Cygwin + uses: cygwin/cygwin-install-action@v6 with: - packages: python39=3.9.16-1 python39-pip python39-virtualenv git + packages: git python39 python-pip-wheel python-setuptools-wheel python-wheel-wheel + add-to-path: false # No need to change $PATH outside the Cygwin environment. - name: Arrange for verbose output run: | @@ -41,6 +53,8 @@ jobs: run: | git config --global --add safe.directory "$(pwd)" git config --global --add safe.directory "$(pwd)/.git" + git config --global --add safe.directory "$(pwd)/git/ext/gitdb" + git config --global --add safe.directory "$(pwd)/git/ext/gitdb/gitdb/ext/smmap" git config --global core.autocrlf false - name: Prepare this repo for tests @@ -55,19 +69,70 @@ jobs: # and cause subsequent tests to fail cat test/fixtures/.gitconfig >> ~/.gitconfig - - name: Ensure the "pip" command is available + - name: Set up virtual environment run: | - # This is used unless, and before, an updated pip is installed. - ln -s pip3 /usr/bin/pip + python3.9 -m venv .venv + echo 'BASH_ENV=.venv/bin/activate' >>"$GITHUB_ENV" - name: Update PyPA packages run: | - # Get the latest pip, wheel, and prior to Python 3.12, setuptools. - python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + python -m pip install -U pip 'setuptools; python_version<"3.12"' wheel - name: Install project and test dependencies run: | - pip install ".[test]" + pip install '.[test]' + + - name: Show POSIX file ownership + # Cygwin's `ls -ld` reports the NTFS Owner SID via Cygwin's SID-to-uid + # mapping (well-known SIDs by their RID, machine-local accounts by + # 0x30000+RID). That mapping is what Cygwin git's + # `is_path_owned_by_current_user` reduces to, so this is the view that + # determines whether `safe.directory` is consulted. + run: | + ls -ld -- \ + "$(pwd)" \ + "$(pwd)/.git" \ + "$(pwd)/git/ext/gitdb" \ + "$(pwd)/git/ext/gitdb/.git" \ + "$(pwd)/.git/modules/gitdb" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap/.git" \ + "$(pwd)/.git/modules/gitdb/modules/smmap" \ + "${HOME:?HOME is not set}/.gitconfig" \ + 2>&1 || true + + - name: Show NTFS file ownership + # Authoritative NTFS Owner via Get-Acl, with no Cygwin SID-to-uid layer + # in between -- useful for confirming what the Cygwin view reports as + # "Administrators" is the BUILTIN\Administrators SID (S-1-5-32-544). + shell: pwsh + run: | + $paths = @( + "$pwd", + "$pwd\.git", + "$pwd\git\ext\gitdb", + "$pwd\git\ext\gitdb\.git", + "$pwd\.git\modules\gitdb", + "$pwd\git\ext\gitdb\gitdb\ext\smmap", + "$pwd\git\ext\gitdb\gitdb\ext\smmap\.git", + "$pwd\.git\modules\gitdb\modules\smmap", + "$env:USERPROFILE\.gitconfig" + ) + foreach ($p in $paths) { + if (Test-Path -LiteralPath $p) { + try { + $owner = (Get-Acl -LiteralPath $p).Owner + } catch { + $owner = "ERROR: $($_.Exception.Message)" + } + "{0,-44} {1}" -f $owner, $p + } else { + "(missing: $p)" + } + } + + - name: Show safe.directory entries + run: git config --global --get-all safe.directory - name: Show version and platform information run: | @@ -77,6 +142,6 @@ jobs: python --version python -c 'import os, sys; print(f"sys.platform={sys.platform!r}, os.name={os.name!r}")' - - name: Test with pytest + - name: Test with pytest (${{ matrix.additional-pytest-args }}) run: | - pytest --color=yes -p no:sugar --instafail -vv + pytest --color=yes -p no:sugar --instafail -vv ${{ matrix.additional-pytest-args }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a0e81a993..e32e946c8 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,14 +2,17 @@ name: Lint on: [push, pull_request, workflow_dispatch] +permissions: + contents: read + jobs: lint: - runs-on: ubuntu-latest + runs-on: ubuntu-slim steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: "3.x" diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 031b0e6b2..6746b92c6 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -9,43 +9,62 @@ permissions: contents: read jobs: - build: + test: strategy: - fail-fast: false matrix: - os: ["ubuntu-latest", "macos-13", "macos-14", "windows-latest"] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + os-type: [ubuntu, macos, windows] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"] exclude: - - os: "macos-14" - python-version: "3.7" - - os: "macos-14" - python-version: "3.8" - - os: "macos-14" - python-version: "3.9" + - os-type: macos + python-version: "3.7" # Not available for the ARM-based macOS runners. + - os-type: macos + python-version: "3.13t" + - os-type: macos + python-version: "3.14t" + - os-type: windows + python-version: "3.13" # FIXME: Fix and enable Python 3.13 and 3.14 on Windows (#1955). + - os-type: windows + python-version: "3.13t" + - os-type: windows + python-version: "3.14" + - os-type: windows + python-version: "3.14t" include: + - os-ver: latest + - os-type: ubuntu + python-version: "3.7" + os-ver: "22.04" + - build-docs: true # We ensure documentation builds, except on very old interpreters. + - python-version: "3.7" + build-docs: false + - python-version: "3.8" + build-docs: false - experimental: false - runs-on: ${{ matrix.os }} + fail-fast: false + + runs-on: ${{ matrix.os-type }}-${{ matrix.os-ver }} defaults: run: shell: bash --noprofile --norc -exo pipefail {0} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} allow-prereleases: ${{ matrix.experimental }} - name: Set up WSL (Windows) - if: startsWith(matrix.os, 'windows') - uses: Vampire/setup-wsl@v3.1.1 + if: matrix.os-type == 'windows' + uses: Vampire/setup-wsl@v6.0.0 with: + wsl-version: 1 distribution: Debian - name: Prepare this repo for tests @@ -62,12 +81,67 @@ jobs: - name: Update PyPA packages run: | - # Get the latest pip, wheel, and prior to Python 3.12, setuptools. - python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + python -m pip install -U pip 'setuptools; python_version<"3.12"' wheel - name: Install project and test dependencies run: | - pip install ".[test]" + pip install '.[test]' + + - name: Show POSIX file ownership + # Linux and macOS only. On Windows, Git Bash's `ls -ld` reports a + # uniform uid+gid for every path regardless of NTFS Owner (MSYS2's + # SID-to-uid mapping doesn't have Cygwin's fidelity), so it would + # not be informative here. The NTFS Owner check below covers Windows. + if: matrix.os-type != 'windows' + run: | + ls -ld -- \ + "$(pwd)" \ + "$(pwd)/.git" \ + "$(pwd)/git/ext/gitdb" \ + "$(pwd)/git/ext/gitdb/.git" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap/.git" \ + "${HOME:?HOME is not set}/.gitconfig" \ + 2>&1 || true + + - name: Show NTFS file ownership + # Windows only. Reads NTFS Owner directly via Get-Acl, which is the + # authoritative view for Windows-side ownership questions; the POSIX + # view via Git Bash's MSYS2 layer is not a reliable proxy here. + if: matrix.os-type == 'windows' + shell: pwsh + run: | + $paths = @( + "$pwd", + "$pwd\.git", + "$pwd\git\ext\gitdb", + "$pwd\git\ext\gitdb\.git", + "$pwd\git\ext\gitdb\gitdb\ext\smmap", + "$pwd\git\ext\gitdb\gitdb\ext\smmap\.git", + "$env:USERPROFILE\.gitconfig" + ) + foreach ($p in $paths) { + if (Test-Path -LiteralPath $p) { + try { + $owner = (Get-Acl -LiteralPath $p).Owner + } catch { + $owner = "ERROR: $($_.Exception.Message)" + } + "{0,-44} {1}" -f $owner, $p + } else { + "(missing: $p)" + } + } + + - name: Show safe.directory entries + # `actions/checkout`'s safe.directory add is only durable for the + # checkout itself (it writes under a throwaway HOME override and + # then discards it), so by the time this step runs the runner + # user's `~/.gitconfig` has no entries -- and git accepts the + # workspace's ownership anyway: Git for Windows via its + # Admins-group exemption on the windows matrix; on Linux/macOS + # the workspace is owned by the test user. Expected: `(none)`. + run: git config --global --get-all safe.directory || echo "(none)" - name: Show version and platform information run: | @@ -79,7 +153,7 @@ jobs: # For debugging hook tests on native Windows systems that may have WSL. - name: Show bash.exe candidates (Windows) - if: startsWith(matrix.os, 'windows') + if: matrix.os-type == 'windows' run: | set +e bash.exe -c 'printenv WSL_DISTRO_NAME; uname -a' @@ -87,14 +161,13 @@ jobs: continue-on-error: true - name: Check types with mypy + if: matrix.python-version != '3.7' && matrix.python-version != '3.8' run: | - mypy --python-version=${{ matrix.python-version }} + mypy --python-version="${PYTHON_VERSION%t}" # Version only, with no "t" for free-threaded. env: MYPY_FORCE_COLOR: "1" TERM: "xterm-256color" # For color: https://github.com/python/mypy/issues/13817 - # With new versions of mypy new issues might arise. This is a problem if there is - # nobody able to fix them, so we have to ignore errors until that changes. - continue-on-error: true + PYTHON_VERSION: ${{ matrix.python-version }} - name: Test with pytest run: | @@ -102,6 +175,7 @@ jobs: continue-on-error: false - name: Documentation + if: matrix.build-docs run: | - pip install ".[doc]" + pip install '.[doc]' make -C doc html diff --git a/.gitignore b/.gitignore index d85569405..eab294a65 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ __pycache__/ # Transient editor files *.swp *~ +\#*# +.#*# # Editor configuration nbproject diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 686d5fa1e..9cc97962d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,28 +1,38 @@ repos: +- repo: https://github.com/codespell-project/codespell + rev: v2.4.2 + hooks: + - id: codespell + additional_dependencies: [tomli] + exclude: ^test/fixtures/ + - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.3 + rev: v0.15.15 hooks: - - id: ruff + - id: ruff-check args: ["--fix"] exclude: ^git/ext/ - id: ruff-format exclude: ^git/ext/ - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.9.0.6 + rev: v0.11.0.1 hooks: - id: shellcheck args: [--color] exclude: ^test/fixtures/polyglot$|^git/ext/ - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v6.0.0 hooks: + - id: end-of-file-fixer + exclude: ^test/fixtures/|COPYING|LICENSE + - id: check-symlinks - id: check-toml - id: check-yaml - id: check-merge-conflict - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.16 + rev: v0.25 hooks: - - id: validate-pyproject + - id: validate-pyproject diff --git a/AUTHORS b/AUTHORS index 9311b3962..15333e1e5 100644 --- a/AUTHORS +++ b/AUTHORS @@ -54,5 +54,8 @@ Contributors are: -Wenhan Zhu -Eliah Kagan -Ethan Lin +-Jonas Scharpf +-Gordon Marx +-Enji Cooper Portions derived from other open source works and are clearly marked. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8536d7f73..60e34a651 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,6 +9,37 @@ The following is a short step-by-step rundown of what one typically would do to - Feel free to add yourself to AUTHORS file. - Create a pull request. +## Quality expectations + +Contributions must be made with care and meet the quality bar of the surrounding code. +That means a change should not leave GitPython worse than it was before: it should be +readable, maintainable, tested where practical, documented and consistent with the +existing style and behavior. + +A contribution that works only narrowly but lowers the quality of the +codebase may be declined. The maintainers may not always be able to provide +detailed feedback. + +## AI-assisted contributions + +If AI edits files for you, disclose it in the pull request description and commit +metadata. Prefer making the agent identity part of the commit, for example by using +an AI author such as `$agent $version ` or a co-author via +a `Co-authored-by: ` trailer. + +Agents operating through a person's GitHub account must identify themselves. For +example, comments posted by an agent should say so directly with phrases like +`AI agent on behalf of : ...`. + +Fully AI-generated comments on pull requests or issues must also be disclosed. +Undisclosed AI-generated comments may lead to the pull request or issue being closed. + +AI-assisted proofreading or wording polish does not need disclosure, but it is still +courteous to mention it when the AI materially influenced the final text. + +Automated or "full-auto" AI contributions without a human responsible for reviewing +and standing behind the work may be closed. + ## Fuzzing Test Specific Documentation For details related to contributing to the fuzzing test suite and OSS-Fuzz integration, please diff --git a/README.md b/README.md index d365a6584..412d38205 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ by setting the `GIT_PYTHON_GIT_EXECUTABLE=` environment variable. - Git (1.7.x or newer) - Python >= 3.7 -The list of dependencies are listed in `./requirements.txt` and `./test-requirements.txt`. +The list of dependencies are listed in [`./requirements.txt`](https://github.com/gitpython-developers/GitPython/blob/main/requirements.txt) and [`./test-requirements.txt`](https://github.com/gitpython-developers/GitPython/blob/main/test-requirements.txt). The installer takes care of installing them for you. ### INSTALL @@ -101,7 +101,7 @@ In the less common case that you do not want to install test dependencies, `pip #### With editable *dependencies* (not preferred, and rarely needed) -In rare cases, you may want to work on GitPython and one or both of its [gitdb](https://github.com/gitpython-developers/gitdb) and [smmap](https://github.com/gitpython-developers/smmap) dependencies at the same time, with changes in your local working copy of gitdb or smmap immediatley reflected in the behavior of your local working copy of GitPython. This can be done by making editable installations of those dependencies in the same virtual environment where you install GitPython. +In rare cases, you may want to work on GitPython and one or both of its [gitdb](https://github.com/gitpython-developers/gitdb) and [smmap](https://github.com/gitpython-developers/smmap) dependencies at the same time, with changes in your local working copy of gitdb or smmap immediately reflected in the behavior of your local working copy of GitPython. This can be done by making editable installations of those dependencies in the same virtual environment where you install GitPython. If you want to do that *and* you want the versions in GitPython's git submodules to be used, then pass `-e git/ext/gitdb` and/or `-e git/ext/gitdb/gitdb/ext/smmap` to `pip install`. This can be done in any order, and in separate `pip install` commands or the same one, so long as `-e` appears before *each* path. For example, you can install GitPython, gitdb, and smmap editably in the currently active virtual environment this way: @@ -180,7 +180,7 @@ Style and formatting checks, and running tests on all the different supported Py #### Configuration files -Specific tools are all configured in the `./pyproject.toml` file: +Specific tools are all configured in the [`./pyproject.toml`](https://github.com/gitpython-developers/GitPython/blob/main/pyproject.toml) file: - `pytest` (test runner) - `coverage.py` (code coverage) @@ -189,9 +189,9 @@ Specific tools are all configured in the `./pyproject.toml` file: Orchestration tools: -- Configuration for `pre-commit` is in the `./.pre-commit-config.yaml` file. -- Configuration for `tox` is in `./tox.ini`. -- Configuration for GitHub Actions (CI) is in files inside `./.github/workflows/`. +- Configuration for `pre-commit` is in the [`./.pre-commit-config.yaml`](https://github.com/gitpython-developers/GitPython/blob/main/.pre-commit-config.yaml) file. +- Configuration for `tox` is in [`./tox.ini`](https://github.com/gitpython-developers/GitPython/blob/main/tox.ini). +- Configuration for GitHub Actions (CI) is in files inside [`./.github/workflows/`](https://github.com/gitpython-developers/GitPython/tree/main/.github/workflows). ### Contributions @@ -212,8 +212,8 @@ Please have a look at the [contributions file][contributing]. ### How to make a new release -1. Update/verify the **version** in the `VERSION` file. -2. Update/verify that the `doc/source/changes.rst` changelog file was updated. It should include a link to the forthcoming release page: `https://github.com/gitpython-developers/GitPython/releases/tag/` +1. Update/verify the **version** in the [`VERSION`](https://github.com/gitpython-developers/GitPython/blob/main/VERSION) file. +2. Update/verify that the [`doc/source/changes.rst`](https://github.com/gitpython-developers/GitPython/blob/main/doc/source/changes.rst) changelog file was updated. It should include a link to the forthcoming release page: `https://github.com/gitpython-developers/GitPython/releases/tag/` 3. Commit everything. 4. Run `git tag -s ` to tag the version in Git. 5. _Optionally_ create and activate a [virtual environment](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#creating-a-virtual-environment). (Then the next step can install `build` and `twine`.) @@ -240,7 +240,7 @@ Please have a look at the [contributions file][contributing]. [3-Clause BSD License](https://opensource.org/license/bsd-3-clause/), also known as the New BSD License. See the [LICENSE file][license]. -One file exclusively used for fuzz testing is subject to [a separate license, detailed here](./fuzzing/README.md#license). +One file exclusively used for fuzz testing is subject to [a separate license, detailed here](https://github.com/gitpython-developers/GitPython/blob/main/fuzzing/README.md#license). This file is not included in the wheel or sdist packages published by the maintainers of GitPython. [contributing]: https://github.com/gitpython-developers/GitPython/blob/main/CONTRIBUTING.md diff --git a/SECURITY.md b/SECURITY.md index d39425b70..0aea34845 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -11,4 +11,6 @@ Only the latest version of GitPython can receive security updates. If a vulnerab ## Reporting a Vulnerability -Please report private portions of a vulnerability to . Doing so helps to receive updates and collaborate on the matter, without disclosing it publicliy right away. +Please report private portions of a vulnerability to . Doing so helps to receive updates and collaborate on the matter, without disclosing it publicly right away. + +Vulnerabilities in GitPython's dependencies [gitdb](https://github.com/gitpython-developers/gitdb/blob/master/SECURITY.md) or [smmap](https://github.com/gitpython-developers/smmap/blob/master/SECURITY.md), which primarily exist to support GitPython, can be reported here as well, at that same link. The affected package (`GitPython`, `gitdb`, or `smmap`) can be included in the report, if known. diff --git a/VERSION b/VERSION index d1bf6638d..0bc461141 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.43 +3.1.50 diff --git a/doc/Makefile b/doc/Makefile index ddeadbd7e..7e0d325fe 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line. BUILDDIR = build -SPHINXOPTS = -W +SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = diff --git a/doc/requirements.txt b/doc/requirements.txt index 7769af5ae..24472ba39 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,8 +1,3 @@ -sphinx == 4.3.2 +sphinx >= 7.4.7, < 8 sphinx_rtd_theme -sphinxcontrib-applehelp >= 1.0.2, <= 1.0.4 -sphinxcontrib-devhelp == 1.0.2 -sphinxcontrib-htmlhelp >= 2.0.0, <= 2.0.1 -sphinxcontrib-qthelp == 1.0.3 -sphinxcontrib-serializinghtml == 1.1.5 sphinx-autodoc-typehints diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 0bc757134..b5152b3c5 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,6 +2,61 @@ Changelog ========= +3.1.50 +====== + +Save setting of configuration values, this time sections as well, as follow-up to 3.1.49. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.50 + +3.1.49 +====== + +Save setting of configuration values, +which could be used to inject other more configuration. + +Also more conforming `rev-parse` implementation. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.49 + +3.1.48 +====== + +Safe reference creation in the face of untrusted input. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.48 + + +3.1.47 +====== + +Address various security issues related to bypassing injection-protection +of unsafe Git flags. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.47 + +3.1.46 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.46 + +3.1.45 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.45 + +3.1.44 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.44 + 3.1.43 ====== @@ -20,7 +75,7 @@ https://github.com/gitpython-developers/GitPython/releases/tag/3.1.42 3.1.41 ====== -This release is relevant for security as it fixes a possible arbitary +This release is relevant for security as it fixes a possible arbitrary code execution on Windows. See this PR for details: https://github.com/gitpython-developers/GitPython/pull/1792 @@ -99,7 +154,7 @@ https://github.com/gitpython-developers/gitpython/milestone/61?closed=1 but a necessary fix for https://github.com/gitpython-developers/GitPython/issues/1515. Please take a look at the PR for more information and how to bypass these protections in case they cause breakage: https://github.com/gitpython-developers/GitPython/pull/1521. - + See the following for all changes. https://github.com/gitpython-developers/gitpython/milestone/60?closed=1 @@ -164,38 +219,38 @@ https://github.com/gitpython-developers/gitpython/milestone/53?closed=1 * General: - Remove python 3.6 support - + - Remove distutils ahead of deprecation in standard library. - + - Update sphinx to 4.1.12 and use autodoc-typehints. - + - Include README as long_description on PyPI - + - Test against earliest and latest minor version available on Github Actions (e.g. 3.9.0 and 3.9.7) - + * Typing: - Add types to ALL functions. - + - Ensure py.typed is collected. - + - Increase mypy strictness with disallow_untyped_defs, warn_redundant_casts, warn_unreachable. - + - Use typing.NamedTuple and typing.OrderedDict now 3.6 dropped. - + - Make Protocol classes ABCs at runtime due to new behaviour/bug in 3.9.7 & 3.10.0-rc1 - + - Remove use of typing.TypeGuard until later release, to allow dependent libs time to update. - + - Tracking issue: https://github.com/gitpython-developers/GitPython/issues/1095 * Runtime improvements: - Add clone_multi_options support to submodule.add() - + - Delay calling get_user_id() unless essential, to support sand-boxed environments. - + - Add timeout to handle_process_output(), in case thread.join() hangs. See the following for details: diff --git a/doc/source/index.rst b/doc/source/index.rst index 72db8ee5a..ca5229ac3 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -21,4 +21,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/doc/source/intro.rst b/doc/source/intro.rst index 4f22a0942..d053bd117 100644 --- a/doc/source/intro.rst +++ b/doc/source/intro.rst @@ -122,4 +122,3 @@ License Information =================== GitPython is licensed under the New BSD License. See the LICENSE file for more information. - diff --git a/doc/source/roadmap.rst b/doc/source/roadmap.rst index a573df33a..34c953626 100644 --- a/doc/source/roadmap.rst +++ b/doc/source/roadmap.rst @@ -6,4 +6,3 @@ The full list of milestones including associated tasks can be found on GitHub: https://github.com/gitpython-developers/GitPython/issues Select the respective milestone to filter the list of issues accordingly. - diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst index fd3b14c57..d095d3be3 100644 --- a/doc/source/tutorial.rst +++ b/doc/source/tutorial.rst @@ -513,6 +513,12 @@ The GitDB is a pure-python implementation of the git object database. It is the repo = Repo("path/to/repo", odbt=GitDB) +.. warning:: + ``GitDB`` may fail or become extremely slow when traversing trees in + repositories with very large commits (thousands of changed files in a + single commit). If you encounter ``RecursionError`` or excessive + slowness during tree traversal, switch to ``GitCmdObjectDB`` instead. + GitCmdObjectDB ============== diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py index ca47690ea..afa653d0d 100644 --- a/fuzzing/fuzz-targets/fuzz_submodule.py +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -3,24 +3,21 @@ import os import tempfile from configparser import ParsingError -from utils import is_expected_exception_message, get_max_filename_length - -if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover - path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) - os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary - +from utils import ( + setup_git_environment, + handle_exception, + get_max_filename_length, +) + +# Setup the Git environment +setup_git_environment() from git import Repo, GitCommandError, InvalidGitRepositoryError -if not sys.warnoptions: # pragma: no cover - # The warnings filter below can be overridden by passing the -W option - # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. - import warnings - import logging - # Fuzzing data causes some modules to generate a large number of warnings - # which are not usually interesting and make the test output hard to read, so we ignore them. - warnings.simplefilter("ignore") - logging.getLogger().setLevel(logging.ERROR) +def sanitize_input(input_str, max_length=255): + """Sanitize and truncate inputs to avoid invalid Git operations.""" + sanitized = "".join(ch for ch in input_str if ch.isalnum() or ch in ("-", "_", ".")) + return sanitized[:max_length] def TestOneInput(data): @@ -33,39 +30,64 @@ def TestOneInput(data): try: with tempfile.TemporaryDirectory() as submodule_temp_dir: sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool()) - sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) + commit_message = sanitize_input(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) + sub_repo.index.commit(commit_message) + + submodule_name = sanitize_input( + fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, get_max_filename_length(repo.working_tree_dir)) + ) + ) + + submodule_path = os.path.relpath( + os.path.join(repo.working_tree_dir, submodule_name), + start=repo.working_tree_dir, + ) - submodule_name = f"submodule_{fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))}" - submodule_path = os.path.join(repo.working_tree_dir, submodule_name) - submodule_url = sub_repo.git_dir + # Ensure submodule_path is valid + if not submodule_name or submodule_name.startswith("/") or ".." in submodule_name: + return -1 # Reject invalid input so they are not added to the corpus - submodule = repo.create_submodule(submodule_name, submodule_path, url=submodule_url) - repo.index.commit(f"Added submodule {submodule_name}") + submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir) + repo.index.commit("Added submodule") with submodule.config_writer() as writer: key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) value_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) writer.set_value( - fdp.ConsumeUnicodeNoSurrogates(key_length), fdp.ConsumeUnicodeNoSurrogates(value_length) + sanitize_input(fdp.ConsumeUnicodeNoSurrogates(key_length)), + sanitize_input(fdp.ConsumeUnicodeNoSurrogates(value_length)), ) writer.release() - submodule.update(init=fdp.ConsumeBool(), dry_run=fdp.ConsumeBool(), force=fdp.ConsumeBool()) + submodule.update( + init=fdp.ConsumeBool(), + dry_run=fdp.ConsumeBool(), + force=fdp.ConsumeBool(), + ) + submodule_repo = submodule.module() - new_file_name = fdp.ConsumeUnicodeNoSurrogates( - fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(submodule_repo.working_tree_dir))) + new_file_name = sanitize_input( + fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, get_max_filename_length(submodule_repo.working_tree_dir)) + ) ) new_file_path = os.path.join(submodule_repo.working_tree_dir, new_file_name) with open(new_file_path, "wb") as new_file: new_file.write(fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512))) + submodule_repo.index.add([new_file_path]) submodule_repo.index.commit("Added new file to submodule") repo.submodule_update(recursive=fdp.ConsumeBool()) - submodule_repo.head.reset(commit="HEAD~1", working_tree=fdp.ConsumeBool(), head=fdp.ConsumeBool()) - # Use fdp.PickValueInList to ensure at least one of 'module' or 'configuration' is True + submodule_repo.head.reset( + commit="HEAD~1", + working_tree=fdp.ConsumeBool(), + head=fdp.ConsumeBool(), + ) + module_option_value, configuration_option_value = fdp.PickValueInList( [(True, False), (False, True), (True, True)] ) @@ -86,20 +108,11 @@ def TestOneInput(data): IsADirectoryError, NotADirectoryError, BrokenPipeError, + PermissionError, ): return -1 - except ValueError as e: - expected_messages = [ - "SHA is empty", - "Reference at", - "embedded null byte", - "This submodule instance does not exist anymore", - "cmd stdin was empty", - ] - if is_expected_exception_message(e, expected_messages): - return -1 - else: - raise e + except Exception as e: + return handle_exception(e) def main(): diff --git a/fuzzing/fuzz-targets/utils.py b/fuzzing/fuzz-targets/utils.py index f522d2959..97e6eab98 100644 --- a/fuzzing/fuzz-targets/utils.py +++ b/fuzzing/fuzz-targets/utils.py @@ -1,6 +1,9 @@ import atheris # pragma: no cover import os # pragma: no cover -from typing import List # pragma: no cover +import re # pragma: no cover +import traceback # pragma: no cover +import sys # pragma: no cover +from typing import Set, Tuple, List # pragma: no cover @atheris.instrument_func @@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int: # pragma: no cover int: The maximum filename length. """ return os.pathconf(path, "PC_NAME_MAX") + + +@atheris.instrument_func +def read_lines_from_file(file_path: str) -> list: + """Read lines from a file and return them as a list.""" + try: + with open(file_path, "r") as f: + return [line.strip() for line in f if line.strip()] + except FileNotFoundError: + print(f"File not found: {file_path}") + return [] + except IOError as e: + print(f"Error reading file {file_path}: {e}") + return [] + + +@atheris.instrument_func +def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]: + """Load and parse the exception list from a default or specified file.""" + try: + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + full_path = os.path.join(bundle_dir, file_path) + lines = read_lines_from_file(full_path) + exception_list: Set[Tuple[str, str]] = set() + for line in lines: + match = re.match(r"(.+):(\d+):", line) + if match: + file_path: str = match.group(1).strip() + line_number: str = str(match.group(2).strip()) + exception_list.add((file_path, line_number)) + return exception_list + except Exception as e: + print(f"Error loading exception list: {e}") + return set() + + +@atheris.instrument_func +def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool: + """Match exception traceback with the entries in the exception list.""" + for filename, lineno, _, _ in traceback.extract_tb(exc_traceback): + for file_pattern, line_pattern in exception_list: + # Ensure filename and line_number are strings for regex matching + if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)): + return True + return False + + +@atheris.instrument_func +def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool: + """Check if the exception traceback matches any entry in the exception list.""" + exception_list = load_exception_list(exception_file) + return match_exception_with_traceback(exception_list, exc_traceback) + + +@atheris.instrument_func +def handle_exception(e: Exception) -> int: + """Encapsulate exception handling logic for reusability.""" + exc_traceback = e.__traceback__ + if check_exception_against_list(exc_traceback): + return -1 + else: + raise e + + +@atheris.instrument_func +def setup_git_environment() -> None: + """Set up the environment variables for Git.""" + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover + bundled_git_binary_path = os.path.join(bundle_dir, "git") + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path + + if not sys.warnoptions: # pragma: no cover + # The warnings filter below can be overridden by passing the -W option + # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. + import warnings + import logging + + # Fuzzing data causes some modules to generate a large number of warnings + # which are not usually interesting and make the test output hard to read, so we ignore them. + warnings.simplefilter("ignore") + logging.getLogger().setLevel(logging.ERROR) diff --git a/fuzzing/oss-fuzz-scripts/build.sh b/fuzzing/oss-fuzz-scripts/build.sh index e0b3a50ab..c156e872d 100644 --- a/fuzzing/oss-fuzz-scripts/build.sh +++ b/fuzzing/oss-fuzz-scripts/build.sh @@ -15,5 +15,5 @@ find "$SRC" -maxdepth 1 \ # Build fuzzers in $OUT. find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do - compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." + compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:." done diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh index bbdcf5357..924a3cbf3 100755 --- a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh +++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh @@ -91,6 +91,17 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora" prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing" +pushd "$SRC/gitpython/" +# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the +# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'. +# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise +# anticipated exceptions to reduce false positive test failures. + +git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt" + +popd + + # The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below. python3 -m pip install --upgrade pip # Upgrade to the latest versions known to work at the time the below changes were introduced: diff --git a/git/cmd.py b/git/cmd.py index 90fc39cd6..92ca09c2a 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -60,6 +60,11 @@ overload, ) +if sys.version_info >= (3, 10): + from typing import TypeAlias +else: + from typing_extensions import TypeAlias + from git.types import Literal, PathLike, TBD if TYPE_CHECKING: @@ -207,7 +212,7 @@ def pump_stream( ) if stderr_handler: error_str: Union[str, bytes] = ( - "error: process killed because it timed out." f" kill_after_timeout={kill_after_timeout} seconds" + f"error: process killed because it timed out. kill_after_timeout={kill_after_timeout} seconds" ) if not decode_streams and isinstance(p_stderr, BinaryIO): # Assume stderr_handler needs binary input. @@ -268,12 +273,12 @@ def _safer_popen_windows( if shell: # The original may be immutable, or the caller may reuse it. Mutate a copy. env = {} if env is None else dict(env) - env["NoDefaultCurrentDirectoryInExePath"] = "1" # The "1" can be an value. + env["NoDefaultCurrentDirectoryInExePath"] = "1" # The "1" can be any value. # When not using a shell, the current process does the search in a # CreateProcessW API call, so the variable must be set in our environment. With # a shell, that's unnecessary if https://github.com/python/cpython/issues/101283 - # is patched. In Python versions where it is unpatched, and in the rare case the + # is patched. In Python versions where it is unpatched, in the rare case the # ComSpec environment variable is unset, the search for the shell itself is # unsafe. Setting NoDefaultCurrentDirectoryInExePath in all cases, as done here, # is simpler and protects against that. (As above, the "1" can be any value.) @@ -308,6 +313,234 @@ def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], exc ## -- End Utilities -- @} + +class _AutoInterrupt: + """Process wrapper that terminates the wrapped process on finalization. + + This kills/interrupts the stored process instance once this instance goes out of + scope. It is used to prevent processes piling up in case iterators stop reading. + + All attributes are wired through to the contained process object. + + The wait method is overridden to perform automatic status code checking and possibly + raise. + """ + + __slots__ = ("proc", "args", "status") + + # If this is non-zero it will override any status code during _terminate, used + # to prevent race conditions in testing. + _status_code_if_terminate: int = 0 + + def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None: + self.proc = proc + self.args = args + self.status: Union[int, None] = None + + def _terminate(self) -> None: + """Terminate the underlying process.""" + if self.proc is None: + return + + proc = self.proc + self.proc = None + if proc.stdin: + proc.stdin.close() + if proc.stdout: + proc.stdout.close() + if proc.stderr: + proc.stderr.close() + # Did the process finish already so we have a return code? + try: + if proc.poll() is not None: + self.status = self._status_code_if_terminate or proc.poll() + return + except OSError as ex: + _logger.info("Ignored error after process had died: %r", ex) + + # It can be that nothing really exists anymore... + if os is None or getattr(os, "kill", None) is None: + return + + # Try to kill it. + try: + proc.terminate() + status = proc.wait() # Ensure the process goes away. + + self.status = self._status_code_if_terminate or status + except (OSError, AttributeError) as ex: + # On interpreter shutdown (notably on Windows), parts of the stdlib used by + # subprocess can already be torn down (e.g. `subprocess._winapi` becomes None), + # which can cause AttributeError during terminate(). In that case, we prefer + # to silently ignore to avoid noisy "Exception ignored in: __del__" messages. + _logger.info("Ignored error while terminating process: %r", ex) + # END exception handling + + def __del__(self) -> None: + self._terminate() + + def __getattr__(self, attr: str) -> Any: + return getattr(self.proc, attr) + + # TODO: Bad choice to mimic `proc.wait()` but with different args. + def wait(self, stderr: Union[None, str, bytes] = b"") -> int: + """Wait for the process and return its status code. + + :param stderr: + Previously read value of stderr, in case stderr is already closed. + + :warn: + May deadlock if output or error pipes are used and not handled separately. + + :raise git.exc.GitCommandError: + If the return status is not 0. + """ + if stderr is None: + stderr_b = b"" + stderr_b = force_bytes(data=stderr, encoding="utf-8") + status: Union[int, None] + if self.proc is not None: + status = self.proc.wait() + p_stderr = self.proc.stderr + else: # Assume the underlying proc was killed earlier or never existed. + status = self.status + p_stderr = None + + def read_all_from_possibly_closed_stream(stream: Union[IO[bytes], None]) -> bytes: + if stream: + try: + return stderr_b + force_bytes(stream.read()) + except (OSError, ValueError): + return stderr_b or b"" + else: + return stderr_b or b"" + + # END status handling + + if status != 0: + errstr = read_all_from_possibly_closed_stream(p_stderr) + _logger.debug("AutoInterrupt wait stderr: %r" % (errstr,)) + raise GitCommandError(remove_password_if_present(self.args), status, errstr) + return status + + +_AutoInterrupt.__name__ = "AutoInterrupt" +_AutoInterrupt.__qualname__ = "Git.AutoInterrupt" + + +class _CatFileContentStream: + """Object representing a sized read-only stream returning the contents of + an object. + + This behaves like a stream, but counts the data read and simulates an empty stream + once our sized content region is empty. + + If not all data are read to the end of the object's lifetime, we read the rest to + ensure the underlying stream continues to work. + """ + + __slots__ = ("_stream", "_nbr", "_size") + + def __init__(self, size: int, stream: IO[bytes]) -> None: + self._stream = stream + self._size = size + self._nbr = 0 # Number of bytes read. + + # Special case: If the object is empty, has null bytes, get the final + # newline right away. + if size == 0: + stream.read(1) + # END handle empty streams + + def read(self, size: int = -1) -> bytes: + bytes_left = self._size - self._nbr + if bytes_left == 0: + return b"" + if size > -1: + # Ensure we don't try to read past our limit. + size = min(bytes_left, size) + else: + # They try to read all, make sure it's not more than what remains. + size = bytes_left + # END check early depletion + data = self._stream.read(size) + self._nbr += len(data) + + # Check for depletion, read our final byte to make the stream usable by + # others. + if self._size - self._nbr == 0: + self._stream.read(1) # final newline + # END finish reading + return data + + def readline(self, size: int = -1) -> bytes: + if self._nbr == self._size: + return b"" + + # Clamp size to lowest allowed value. + bytes_left = self._size - self._nbr + if size > -1: + size = min(bytes_left, size) + else: + size = bytes_left + # END handle size + + data = self._stream.readline(size) + self._nbr += len(data) + + # Handle final byte. + if self._size - self._nbr == 0: + self._stream.read(1) + # END finish reading + + return data + + def readlines(self, size: int = -1) -> List[bytes]: + if self._nbr == self._size: + return [] + + # Leave all additional logic to our readline method, we just check the size. + out = [] + nbr = 0 + while True: + line = self.readline() + if not line: + break + out.append(line) + if size > -1: + nbr += len(line) + if nbr > size: + break + # END handle size constraint + # END readline loop + return out + + # skipcq: PYL-E0301 + def __iter__(self) -> "Git.CatFileContentStream": + return self + + def __next__(self) -> bytes: + line = self.readline() + if not line: + raise StopIteration + + return line + + next = __next__ + + def __del__(self) -> None: + bytes_left = self._size - self._nbr + if bytes_left: + # Read and discard - seeking is impossible within a stream. + # This includes any terminating newline. + self._stream.read(bytes_left + 1) + # END handle incomplete read + + +_CatFileContentStream.__name__ = "CatFileContentStream" +_CatFileContentStream.__qualname__ = "Git.CatFileContentStream" + + _USE_SHELL_DEFAULT_MESSAGE = ( "Git.USE_SHELL is deprecated, because only its default value of False is safe. " "It will be removed in a future release." @@ -321,7 +554,7 @@ def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], exc ) -def _warn_use_shell(extra_danger: bool) -> None: +def _warn_use_shell(*, extra_danger: bool) -> None: warnings.warn( _USE_SHELL_DANGER_MESSAGE if extra_danger else _USE_SHELL_DEFAULT_MESSAGE, DeprecationWarning, @@ -337,12 +570,12 @@ class _GitMeta(type): def __getattribute(cls, name: str) -> Any: if name == "USE_SHELL": - _warn_use_shell(False) + _warn_use_shell(extra_danger=False) return super().__getattribute__(name) def __setattr(cls, name: str, value: Any) -> Any: if name == "USE_SHELL": - _warn_use_shell(value) + _warn_use_shell(extra_danger=value) super().__setattr__(name, value) if not TYPE_CHECKING: @@ -711,6 +944,21 @@ def check_unsafe_protocols(cls, url: str) -> None: f"The `{protocol}::` protocol looks suspicious, use `allow_unsafe_protocols=True` to allow it." ) + @classmethod + def _canonicalize_option_name(cls, option: str) -> str: + """Return the option name used for unsafe-option checks. + + Examples: + ``"--upload-pack=/tmp/helper"`` -> ``"upload-pack"`` + ``"upload_pack"`` -> ``"upload-pack"`` + ``"--config core.filemode=false"`` -> ``"config"`` + """ + option_name = option.lstrip("-").split("=", 1)[0] + option_tokens = option_name.split(None, 1) + if not option_tokens: + return "" + return dashify(option_tokens[0]) + @classmethod def check_unsafe_options(cls, options: List[str], unsafe_options: List[str]) -> None: """Check for unsafe options. @@ -718,231 +966,16 @@ def check_unsafe_options(cls, options: List[str], unsafe_options: List[str]) -> Some options that are passed to ``git `` can be used to execute arbitrary commands. These are blocked by default. """ - # Options can be of the form `foo`, `--foo bar`, or `--foo=bar`, so we need to - # check if they start with "--foo" or if they are equal to "foo". - bare_unsafe_options = [option.lstrip("-") for option in unsafe_options] + # Options can be of the form `foo`, `--foo`, `--foo bar`, or `--foo=bar`. + canonical_unsafe_options = {cls._canonicalize_option_name(option): option for option in unsafe_options} for option in options: - for unsafe_option, bare_option in zip(unsafe_options, bare_unsafe_options): - if option.startswith(unsafe_option) or option == bare_option: - raise UnsafeOptionError( - f"{unsafe_option} is not allowed, use `allow_unsafe_options=True` to allow it." - ) - - class AutoInterrupt: - """Process wrapper that terminates the wrapped process on finalization. - - This kills/interrupts the stored process instance once this instance goes out of - scope. It is used to prevent processes piling up in case iterators stop reading. - - All attributes are wired through to the contained process object. - - The wait method is overridden to perform automatic status code checking and - possibly raise. - """ - - __slots__ = ("proc", "args", "status") - - # If this is non-zero it will override any status code during _terminate, used - # to prevent race conditions in testing. - _status_code_if_terminate: int = 0 - - def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None: - self.proc = proc - self.args = args - self.status: Union[int, None] = None - - def _terminate(self) -> None: - """Terminate the underlying process.""" - if self.proc is None: - return - - proc = self.proc - self.proc = None - if proc.stdin: - proc.stdin.close() - if proc.stdout: - proc.stdout.close() - if proc.stderr: - proc.stderr.close() - # Did the process finish already so we have a return code? - try: - if proc.poll() is not None: - self.status = self._status_code_if_terminate or proc.poll() - return - except OSError as ex: - _logger.info("Ignored error after process had died: %r", ex) - - # It can be that nothing really exists anymore... - if os is None or getattr(os, "kill", None) is None: - return - - # Try to kill it. - try: - proc.terminate() - status = proc.wait() # Ensure the process goes away. - - self.status = self._status_code_if_terminate or status - except OSError as ex: - _logger.info("Ignored error after process had died: %r", ex) - # END exception handling - - def __del__(self) -> None: - self._terminate() - - def __getattr__(self, attr: str) -> Any: - return getattr(self.proc, attr) - - # TODO: Bad choice to mimic `proc.wait()` but with different args. - def wait(self, stderr: Union[None, str, bytes] = b"") -> int: - """Wait for the process and return its status code. - - :param stderr: - Previously read value of stderr, in case stderr is already closed. - - :warn: - May deadlock if output or error pipes are used and not handled - separately. - - :raise git.exc.GitCommandError: - If the return status is not 0. - """ - if stderr is None: - stderr_b = b"" - stderr_b = force_bytes(data=stderr, encoding="utf-8") - status: Union[int, None] - if self.proc is not None: - status = self.proc.wait() - p_stderr = self.proc.stderr - else: # Assume the underlying proc was killed earlier or never existed. - status = self.status - p_stderr = None - - def read_all_from_possibly_closed_stream(stream: Union[IO[bytes], None]) -> bytes: - if stream: - try: - return stderr_b + force_bytes(stream.read()) - except (OSError, ValueError): - return stderr_b or b"" - else: - return stderr_b or b"" - - # END status handling + unsafe_option = canonical_unsafe_options.get(cls._canonicalize_option_name(option)) + if unsafe_option is not None: + raise UnsafeOptionError(f"{unsafe_option} is not allowed, use `allow_unsafe_options=True` to allow it.") - if status != 0: - errstr = read_all_from_possibly_closed_stream(p_stderr) - _logger.debug("AutoInterrupt wait stderr: %r" % (errstr,)) - raise GitCommandError(remove_password_if_present(self.args), status, errstr) - return status + AutoInterrupt: TypeAlias = _AutoInterrupt - # END auto interrupt - - class CatFileContentStream: - """Object representing a sized read-only stream returning the contents of - an object. - - This behaves like a stream, but counts the data read and simulates an empty - stream once our sized content region is empty. - - If not all data are read to the end of the object's lifetime, we read the - rest to ensure the underlying stream continues to work. - """ - - __slots__ = ("_stream", "_nbr", "_size") - - def __init__(self, size: int, stream: IO[bytes]) -> None: - self._stream = stream - self._size = size - self._nbr = 0 # Number of bytes read. - - # Special case: If the object is empty, has null bytes, get the final - # newline right away. - if size == 0: - stream.read(1) - # END handle empty streams - - def read(self, size: int = -1) -> bytes: - bytes_left = self._size - self._nbr - if bytes_left == 0: - return b"" - if size > -1: - # Ensure we don't try to read past our limit. - size = min(bytes_left, size) - else: - # They try to read all, make sure it's not more than what remains. - size = bytes_left - # END check early depletion - data = self._stream.read(size) - self._nbr += len(data) - - # Check for depletion, read our final byte to make the stream usable by - # others. - if self._size - self._nbr == 0: - self._stream.read(1) # final newline - # END finish reading - return data - - def readline(self, size: int = -1) -> bytes: - if self._nbr == self._size: - return b"" - - # Clamp size to lowest allowed value. - bytes_left = self._size - self._nbr - if size > -1: - size = min(bytes_left, size) - else: - size = bytes_left - # END handle size - - data = self._stream.readline(size) - self._nbr += len(data) - - # Handle final byte. - if self._size - self._nbr == 0: - self._stream.read(1) - # END finish reading - - return data - - def readlines(self, size: int = -1) -> List[bytes]: - if self._nbr == self._size: - return [] - - # Leave all additional logic to our readline method, we just check the size. - out = [] - nbr = 0 - while True: - line = self.readline() - if not line: - break - out.append(line) - if size > -1: - nbr += len(line) - if nbr > size: - break - # END handle size constraint - # END readline loop - return out - - # skipcq: PYL-E0301 - def __iter__(self) -> "Git.CatFileContentStream": - return self - - def __next__(self) -> bytes: - line = self.readline() - if not line: - raise StopIteration - - return line - - next = __next__ - - def __del__(self) -> None: - bytes_left = self._size - self._nbr - if bytes_left: - # Read and discard - seeking is impossible within a stream. - # This includes any terminating newline. - self._stream.read(bytes_left + 1) - # END handle incomplete read + CatFileContentStream: TypeAlias = _CatFileContentStream def __init__(self, working_dir: Union[None, PathLike] = None) -> None: """Initialize this instance with: @@ -971,7 +1004,7 @@ def __init__(self, working_dir: Union[None, PathLike] = None) -> None: def __getattribute__(self, name: str) -> Any: if name == "USE_SHELL": - _warn_use_shell(False) + _warn_use_shell(extra_danger=False) return super().__getattribute__(name) def __getattr__(self, name: str) -> Any: @@ -1098,9 +1131,28 @@ def execute( information (stdout). :param command: - The command argument list to execute. - It should be a sequence of program arguments, or a string. The - program to execute is the first item in the args sequence or string. + The command to execute. A sequence of program arguments is recommended. + A string is also accepted, but its meaning is strongly platform-dependent. + + By default, a shell is not used. On Unix-like systems, a string is the whole + program name (so ``"git log -n 1"`` raises :class:`GitCommandNotFound`). On + Windows, the program parses the arguments itself, so multi-word strings can + work but are not portable. + + Avoid ``shell=True`` (and :attr:`Git.USE_SHELL`): this runs the command in + a shell, which is generally unsafe. The shell interprets metacharacters + such as ``;``, ``|``, ``&``, ``$(...)``, ``$VAR``, ``%VAR%``, and ``^`` + (depending on the platform) as syntax. Any untrusted text in the command + can then execute arbitrary OS commands. See :attr:`Git.USE_SHELL`. + + Producing a sequence automatically by :func:`shlex.split` and passing it + as the command is far safer than ``shell=True``. But :func:`shlex.split` + parses POSIX shell syntax on all systems, and the result is still unsafe + for anything but *fixed, fully trusted* strings. Do not use it on strings + built by interpolating values: whitespace or quoting in an untrusted value + can still inject arguments. For input derived in any way from untrusted + data, build the argument sequence yourself, while ensuring each argument + is fully sanitized. :param istream: Standard input filehandle passed to :class:`subprocess.Popen`. @@ -1168,6 +1220,11 @@ def execute( needed (nor useful) to work around any known operating system specific issues. + On Unix-like systems, when migrating away from passing string commands with + ``shell=True``, :func:`shlex.split` may serve as a transitional step in rare + cases, with extreme care. (Drop ``shell=True`` and pass the resulting + sequence as the command.) See the `command` parameter above on the risks. + :param env: A dictionary of environment variables to be passed to :class:`subprocess.Popen`. @@ -1269,6 +1326,7 @@ def execute( stdout=stdout_sink, shell=shell, universal_newlines=universal_newlines, + encoding=defenc if universal_newlines else None, **subprocess_kwargs, ) except cmd_not_found_exception as err: @@ -1318,7 +1376,7 @@ def communicate() -> Tuple[AnyStr, AnyStr]: out, err = proc.communicate() watchdog.cancel() if kill_check.is_set(): - err = 'Timeout: the command "%s" did not complete in %d ' "secs." % ( + err = 'Timeout: the command "%s" did not complete in %d secs.' % ( " ".join(redacted_command), timeout, ) @@ -1342,25 +1400,29 @@ def communicate() -> Tuple[AnyStr, AnyStr]: if output_stream is None: stdout_value, stderr_value = communicate() # Strip trailing "\n". - if stdout_value.endswith(newline) and strip_newline_in_stdout: # type: ignore[arg-type] + if stdout_value is not None and stdout_value.endswith(newline) and strip_newline_in_stdout: # type: ignore[arg-type] stdout_value = stdout_value[:-1] - if stderr_value.endswith(newline): # type: ignore[arg-type] + if stderr_value is not None and stderr_value.endswith(newline): # type: ignore[arg-type] stderr_value = stderr_value[:-1] status = proc.returncode else: max_chunk_size = max_chunk_size if max_chunk_size and max_chunk_size > 0 else io.DEFAULT_BUFFER_SIZE - stream_copy(proc.stdout, output_stream, max_chunk_size) - stdout_value = proc.stdout.read() - stderr_value = proc.stderr.read() + if proc.stdout is not None: + stream_copy(proc.stdout, output_stream, max_chunk_size) + stdout_value = proc.stdout.read() + if proc.stderr is not None: + stderr_value = proc.stderr.read() # Strip trailing "\n". - if stderr_value.endswith(newline): # type: ignore[arg-type] + if stderr_value is not None and stderr_value.endswith(newline): # type: ignore[arg-type] stderr_value = stderr_value[:-1] status = proc.wait() # END stdout handling finally: - proc.stdout.close() - proc.stderr.close() + if proc.stdout is not None: + proc.stdout.close() + if proc.stderr is not None: + proc.stderr.close() if self.GIT_PYTHON_TRACE == "full": cmdstr = " ".join(redacted_command) @@ -1550,7 +1612,7 @@ def _call_process( turns into:: - git rev-list max-count 10 --header master + git rev-list --max-count=10 --header master :return: Same as :meth:`execute`. If no args are given, used :meth:`execute`'s diff --git a/git/config.py b/git/config.py index de3508360..82747eadd 100644 --- a/git/config.py +++ b/git/config.py @@ -66,12 +66,15 @@ CONFIG_LEVELS: ConfigLevels_Tup = ("system", "user", "global", "repository") """The configuration level of a configuration file.""" -CONDITIONAL_INCLUDE_REGEXP = re.compile(r"(?<=includeIf )\"(gitdir|gitdir/i|onbranch):(.+)\"") +CONDITIONAL_INCLUDE_REGEXP = re.compile(r"(?<=includeIf )\"(gitdir|gitdir/i|onbranch|hasconfig:remote\.\*\.url):(.+)\"") """Section pattern to detect conditional includes. See: https://git-scm.com/docs/git-config#_conditional_includes """ +UNSAFE_CONFIG_CHARS_RE = re.compile(r"[\r\n\x00]") +"""Characters that cannot be safely written in config names or values.""" + class MetaParserBuilder(abc.ABCMeta): # noqa: B024 """Utility class wrapping base-class methods into decorators that assure read-only @@ -87,15 +90,15 @@ def __new__(cls, name: str, bases: Tuple, clsdict: Dict[str, Any]) -> "MetaParse mutating_methods = clsdict[kmm] for base in bases: methods = (t for t in inspect.getmembers(base, inspect.isroutine) if not t[0].startswith("_")) - for name, method in methods: - if name in clsdict: + for method_name, method in methods: + if method_name in clsdict: continue method_with_values = needs_values(method) - if name in mutating_methods: + if method_name in mutating_methods: method_with_values = set_dirty_and_flush_changes(method_with_values) # END mutating methods handling - clsdict[name] = method_with_values + clsdict[method_name] = method_with_values # END for each name/method pair # END for each base # END if mutating methods configuration is set @@ -496,19 +499,26 @@ def string_decode(v: str) -> str: if mo: # We might just have handled the last line, which could contain a quotation we want to remove. optname, vi, optval = mo.group("option", "vi", "value") + optname = self.optionxform(optname.rstrip()) + if vi in ("=", ":") and ";" in optval and not optval.strip().startswith('"'): pos = optval.find(";") if pos != -1 and optval[pos - 1].isspace(): optval = optval[:pos] optval = optval.strip() - if optval == '""': - optval = "" - # END handle empty string - optname = self.optionxform(optname.rstrip()) - if len(optval) > 1 and optval[0] == '"' and optval[-1] != '"': + + if len(optval) < 2 or optval[0] != '"': + # Does not open quoting. + pass + elif optval[-1] != '"': + # Opens quoting and does not close: appears to start multi-line quoting. is_multi_line = True optval = string_decode(optval[1:]) - # END handle multi-line + elif optval.find("\\", 1, -1) == -1 and optval.find('"', 1, -1) == -1: + # Opens and closes quoting. Single line, and all we need is quote removal. + optval = optval[1:-1] + # TODO: Handle other quoted content, especially well-formed backslash escapes. + # Preserves multiple values for duplicate optnames. cursect.add(optname, optval) else: @@ -542,11 +552,21 @@ def _included_paths(self) -> List[Tuple[str, str]]: :return: The list of paths, where each path is a tuple of (option, value). """ + + def _all_items(section: str) -> List[Tuple[str, str]]: + """Return all (key, value) pairs for a section, including duplicate keys.""" + return [ + (key, value) + for key, values in self._sections[section].items_all() + if key != "__name__" + for value in values + ] + paths = [] for section in self.sections(): if section == "include": - paths += self.items(section) + paths += _all_items(section) match = CONDITIONAL_INCLUDE_REGEXP.search(section) if match is None or self._repo is None: @@ -567,12 +587,12 @@ def _included_paths(self) -> List[Tuple[str, str]]: if keyword.endswith("/i"): value = re.sub( r"[a-zA-Z]", - lambda m: "[{}{}]".format(m.group().lower(), m.group().upper()), + lambda m: f"[{m.group().lower()!r}{m.group().upper()!r}]", value, ) if self._repo.git_dir: - if fnmatch.fnmatchcase(str(self._repo.git_dir), value): - paths += self.items(section) + if fnmatch.fnmatchcase(os.fspath(self._repo.git_dir), value): + paths += _all_items(section) elif keyword == "onbranch": try: @@ -582,8 +602,12 @@ def _included_paths(self) -> List[Tuple[str, str]]: continue if fnmatch.fnmatchcase(branch_name, value): - paths += self.items(section) - + paths += _all_items(section) + elif keyword == "hasconfig:remote.*.url": + for remote in self._repo.remotes: + if fnmatch.fnmatchcase(remote.url, value): + paths += _all_items(section) + break return paths def read(self) -> None: # type: ignore[override] @@ -622,8 +646,6 @@ def read(self) -> None: # type: ignore[override] file_path = cast(IO[bytes], file_path) self._read(file_path, file_path.name) else: - # Assume a path if it is not a file-object. - file_path = cast(PathLike, file_path) try: with open(file_path, "rb") as fp: file_ok = True @@ -757,8 +779,9 @@ def _assure_writable(self, method_name: str) -> None: if self.read_only: raise IOError("Cannot execute non-constant method %s.%s" % (self, method_name)) - def add_section(self, section: str) -> None: + def add_section(self, section: "cp._SectionName") -> None: """Assures added options will stay in order.""" + self._assure_config_name_safe(section, "section") return super().add_section(section) @property @@ -863,6 +886,30 @@ def _value_to_string(self, value: Union[str, bytes, int, float, bool]) -> str: return str(value) return force_text(value) + def _value_to_string_safe(self, value: Union[str, bytes, int, float, bool]) -> str: + value_str = self._value_to_string(value) + if UNSAFE_CONFIG_CHARS_RE.search(value_str): + raise ValueError("Git config values must not contain CR, LF, or NUL") + return value_str + + def _assure_config_name_safe(self, name: "cp._SectionName", label: str) -> None: + if isinstance(name, str) and UNSAFE_CONFIG_CHARS_RE.search(name): + raise ValueError("Git config %s names must not contain CR, LF, or NUL" % label) + + @needs_values + @set_dirty_and_flush_changes + def set( + self, + section: str, + option: str, + value: Union[str, bytes, int, float, bool, None] = None, + ) -> None: + self._assure_config_name_safe(section, "section") + self._assure_config_name_safe(option, "option") + if value is not None: + value = self._value_to_string_safe(value) + return super().set(section, option, value) + @needs_values @set_dirty_and_flush_changes def set_value(self, section: str, option: str, value: Union[str, bytes, int, float, bool]) -> "GitConfigParser": @@ -883,9 +930,12 @@ def set_value(self, section: str, option: str, value: Union[str, bytes, int, flo :return: This instance """ + self._assure_config_name_safe(section, "section") + self._assure_config_name_safe(option, "option") + value_str = self._value_to_string_safe(value) if not self.has_section(section): self.add_section(section) - self.set(section, option, self._value_to_string(value)) + super().set(section, option, value_str) return self @needs_values @@ -910,9 +960,12 @@ def add_value(self, section: str, option: str, value: Union[str, bytes, int, flo :return: This instance """ + self._assure_config_name_safe(section, "section") + self._assure_config_name_safe(option, "option") + value_str = self._value_to_string_safe(value) if not self.has_section(section): self.add_section(section) - self._sections[section].add(option, self._value_to_string(value)) + self._sections[section].add(option, value_str) return self def rename_section(self, section: str, new_name: str) -> "GitConfigParser": @@ -929,6 +982,7 @@ def rename_section(self, section: str, new_name: str) -> "GitConfigParser": """ if not self.has_section(section): raise ValueError("Source section '%s' doesn't exist" % section) + self._assure_config_name_safe(new_name, "section") if self.has_section(new_name): raise ValueError("Destination section '%s' already exists" % new_name) diff --git a/git/diff.py b/git/diff.py index 9c6ae59e0..5af53e556 100644 --- a/git/diff.py +++ b/git/diff.py @@ -3,7 +3,7 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -__all__ = ["DiffConstants", "NULL_TREE", "INDEX", "Diffable", "DiffIndex", "Diff"] +__all__ = ["DiffConstants", "NULL_TREE", "NULL_TREE_SHA", "INDEX", "Diffable", "DiffIndex", "Diff"] import enum import re @@ -23,13 +23,14 @@ List, Match, Optional, + Sequence, Tuple, TYPE_CHECKING, TypeVar, Union, cast, ) -from git.types import Literal, PathLike +from git.types import PathLike, Literal if TYPE_CHECKING: from subprocess import Popen @@ -83,6 +84,9 @@ class DiffConstants(enum.Enum): :const:`git.NULL_TREE` and :const:`Diffable.NULL_TREE`. """ +NULL_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" +"""SHA of Git's canonical empty tree object.""" + INDEX: Literal[DiffConstants.INDEX] = DiffConstants.INDEX """Stand-in indicating you want to diff against the index. @@ -289,7 +293,7 @@ class DiffIndex(List[T_Diff]): The class improves the diff handling convenience. """ - change_type = ("A", "C", "D", "R", "M", "T") + change_type: Sequence[Literal["A", "C", "D", "R", "M", "T"]] = ("A", "C", "D", "R", "M", "T") # noqa: F821 """Change type invariant identifying possible ways a blob can have changed: * ``A`` = Added @@ -598,7 +602,14 @@ def _index_from_patch_format(cls, repo: "Repo", proc: Union["Popen", "Git.AutoIn # FIXME: Here SLURPING raw, need to re-phrase header-regexes linewise. text_list: List[bytes] = [] - handle_process_output(proc, text_list.append, None, finalize_process, decode_streams=False) + stderr_list: List[bytes] = [] + + def finalize_process_with_stderr(proc: Union["Popen", "Git.AutoInterrupt"]) -> None: + finalize_process(proc, stderr=b"".join(stderr_list)) + + handle_process_output( + proc, text_list.append, stderr_list.append, finalize_process_with_stderr, decode_streams=False + ) # For now, we have to bake the stream. text = b"".join(text_list) @@ -764,11 +775,16 @@ def _index_from_raw_format(cls, repo: "Repo", proc: "Popen") -> "DiffIndex[Diff] # :100644 100644 687099101... 37c5e30c8... M .gitignore index: "DiffIndex" = DiffIndex() + stderr_list: List[bytes] = [] + + def finalize_process_with_stderr(proc: Union["Popen", "Git.AutoInterrupt"]) -> None: + finalize_process(proc, stderr=b"".join(stderr_list)) + handle_process_output( proc, lambda byt: cls._handle_diff_line(byt, repo, index), - None, - finalize_process, + stderr_list.append, + finalize_process_with_stderr, decode_streams=False, ) diff --git a/git/ext/gitdb b/git/ext/gitdb index 3d3e9572d..0a019a2e2 160000 --- a/git/ext/gitdb +++ b/git/ext/gitdb @@ -1 +1 @@ -Subproject commit 3d3e9572dc452fea53d328c101b3d1440bbefe40 +Subproject commit 0a019a2e2bd73158cf8b637ad78b5d4b8f15e42e diff --git a/git/index/base.py b/git/index/base.py index 28b60a880..f03b452dc 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -28,6 +28,7 @@ from git.objects import Blob, Commit, Object, Submodule, Tree from git.objects.util import Serializable from git.util import ( + Actor, LazyMixin, LockedFD, join_path_native, @@ -76,7 +77,6 @@ from git.refs.reference import Reference from git.repo import Repo - from git.util import Actor Treeish = Union[Tree, Commit, str, bytes] @@ -407,7 +407,7 @@ def raise_exc(e: Exception) -> NoReturn: r = str(self.repo.working_tree_dir) rs = r + os.sep for path in paths: - abs_path = str(path) + abs_path = os.fspath(path) if not osp.isabs(abs_path): abs_path = osp.join(r, path) # END make absolute path @@ -508,7 +508,7 @@ def iter_blobs( :param predicate: Function(t) returning ``True`` if tuple(stage, Blob) should be yielded by - the iterator. A default filter, the `~git.index.typ.BlobFilter`, allows you + the iterator. A default filter, the :class:`~git.index.typ.BlobFilter`, allows you to yield blobs only if they match a given list of paths. """ for entry in self.entries.values(): @@ -530,7 +530,10 @@ def unmerged_blobs(self) -> Dict[PathLike, List[Tuple[StageType, Blob]]]: stage. That is, a file removed on the 'other' branch whose entries are at stage 3 will not have a stage 3 entry. """ - is_unmerged_blob = lambda t: t[0] != 0 + + def is_unmerged_blob(t: Tuple[StageType, Blob]) -> bool: + return t[0] != 0 + path_map: Dict[PathLike, List[Tuple[StageType, Blob]]] = {} for stage, blob in self.iter_blobs(is_unmerged_blob): path_map.setdefault(blob.path, []).append((stage, blob)) @@ -653,9 +656,12 @@ def _to_relative_path(self, path: PathLike) -> PathLike: return path if self.repo.bare: raise InvalidGitRepositoryError("require non-bare repository") - if not str(path).startswith(str(self.repo.working_tree_dir)): + if not osp.normpath(path).startswith(str(self.repo.working_tree_dir)): raise ValueError("Absolute path %r is not in git repository at %r" % (path, self.repo.working_tree_dir)) - return os.path.relpath(path, self.repo.working_tree_dir) + result = os.path.relpath(path, self.repo.working_tree_dir) + if os.fspath(path).endswith(os.sep) and not result.endswith(os.sep): + result += os.sep + return result def _preprocess_add_items( self, items: Union[PathLike, Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]]] @@ -687,12 +693,17 @@ def _store_path(self, filepath: PathLike, fprogress: Callable) -> BaseIndexEntry This must be ensured in the calling code. """ st = os.lstat(filepath) # Handles non-symlinks as well. + if S_ISLNK(st.st_mode): # In PY3, readlink is a string, but we need bytes. # In PY2, it was just OS encoded bytes, we assumed UTF-8. - open_stream: Callable[[], BinaryIO] = lambda: BytesIO(force_bytes(os.readlink(filepath), encoding=defenc)) + def open_stream() -> BinaryIO: + return BytesIO(force_bytes(os.readlink(filepath), encoding=defenc)) else: - open_stream = lambda: open(filepath, "rb") + + def open_stream() -> BinaryIO: + return open(filepath, "rb") + with open_stream() as stream: fprogress(filepath, False, filepath) istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) @@ -767,7 +778,7 @@ def add( - path string Strings denote a relative or absolute path into the repository pointing - to an existing file, e.g., ``CHANGES``, `lib/myfile.ext``, + to an existing file, e.g., ``CHANGES``, ``lib/myfile.ext``, ``/home/gitrepo/lib/myfile.ext``. Absolute paths must start with working tree directory of this index's @@ -786,7 +797,7 @@ def add( They are added at stage 0. - - :class:~`git.objects.blob.Blob` or + - :class:`~git.objects.blob.Blob` or :class:`~git.objects.submodule.base.Submodule` object Blobs are added as they are assuming a valid mode is set. @@ -812,7 +823,7 @@ def add( - :class:`~git.index.typ.BaseIndexEntry` or type - Handling equals the one of :class:~`git.objects.blob.Blob` objects, but + Handling equals the one of :class:`~git.objects.blob.Blob` objects, but the stage may be explicitly set. Please note that Index Entries require binary sha's. @@ -995,7 +1006,7 @@ def remove( The path string may include globs, such as ``*.c``. - - :class:~`git.objects.blob.Blob` object + - :class:`~git.objects.blob.Blob` object Only the path portion is used in this case. @@ -1025,7 +1036,7 @@ def remove( args.append("--") # Preprocess paths. - paths = self._items_to_rela_paths(items) + paths = list(map(os.fspath, self._items_to_rela_paths(items))) # type: ignore[arg-type] removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() # Process output to gain proper paths. @@ -1117,11 +1128,12 @@ def commit( message: str, parent_commits: Union[List[Commit], None] = None, head: bool = True, - author: Union[None, "Actor"] = None, - committer: Union[None, "Actor"] = None, + author: Union[None, Actor] = None, + committer: Union[None, Actor] = None, author_date: Union[datetime.datetime, str, None] = None, commit_date: Union[datetime.datetime, str, None] = None, skip_hooks: bool = False, + trailers: Union[None, "Dict[str, str]", "List[Tuple[str, str]]"] = None, ) -> Commit: """Commit the current default index file, creating a :class:`~git.objects.commit.Commit` object. @@ -1158,6 +1170,7 @@ def commit( committer=committer, author_date=author_date, commit_date=commit_date, + trailers=trailers, ) if not skip_hooks: run_commit_hook("post-commit", self) @@ -1333,8 +1346,11 @@ def handle_stderr(proc: "Popen[bytes]", iter_checked_out_files: Iterable[PathLik kwargs["as_process"] = True kwargs["istream"] = subprocess.PIPE proc = self.repo.git.checkout_index(args, **kwargs) + # FIXME: Reading from GIL! - make_exc = lambda: GitCommandError(("git-checkout-index",) + tuple(args), 128, proc.stderr.read()) + def make_exc() -> GitCommandError: + return GitCommandError(("git-checkout-index", *args), 128, proc.stderr.read()) + checked_out_files: List[PathLike] = [] for path in paths: @@ -1345,11 +1361,11 @@ def handle_stderr(proc: "Popen[bytes]", iter_checked_out_files: Iterable[PathLik try: self.entries[(co_path, 0)] except KeyError: - folder = str(co_path) + folder = co_path if not folder.endswith("/"): folder += "/" for entry in self.entries.values(): - if str(entry.path).startswith(folder): + if os.fspath(entry.path).startswith(folder): p = entry.path self._write_path_to_stdin(proc, p, p, make_exc, fprogress, read_from_stdout=False) checked_out_files.append(p) @@ -1443,7 +1459,7 @@ def reset( key = entry_key(path, 0) self.entries[key] = nie[key] except KeyError: - # If key is not in theirs, it musn't be in ours. + # If key is not in theirs, it mustn't be in ours. try: del self.entries[key] except KeyError: @@ -1464,12 +1480,11 @@ def reset( return self - # FIXME: This is documented to accept the same parameters as Diffable.diff, but this - # does not handle NULL_TREE for `other`. (The suppressed mypy error is about this.) def diff( self, - other: Union[ # type: ignore[override] + other: Union[ Literal[git_diff.DiffConstants.INDEX], + Literal[git_diff.DiffConstants.NULL_TREE], "Tree", "Commit", str, @@ -1496,6 +1511,44 @@ def diff( if other is self.INDEX: return git_diff.DiffIndex() + if other == git_diff.NULL_TREE or other == git_diff.NULL_TREE_SHA: + args: List[Union[PathLike, str]] = [ + "--cached", + git_diff.NULL_TREE_SHA, + "--abbrev=40", + "--full-index", + ] + + if not any(x in kwargs for x in ("find_renames", "no_renames", "M")): + args.append("-M") + + if create_patch: + args.append("-p") + args.append("--no-ext-diff") + else: + args.append("--raw") + args.append("-z") + + args.append("--no-color") + + if paths is not None and not isinstance(paths, (tuple, list)): + paths = [paths] + + if paths: + args.append("--") + args.extend(paths) + + kwargs["as_process"] = True + proc = self.repo.git.diff(*args, **kwargs) + + diff_method = ( + git_diff.Diff._index_from_patch_format if create_patch else git_diff.Diff._index_from_raw_format + ) + index = diff_method(self.repo, proc) + + proc.wait() + return index + # Index against anything but None is a reverse diff with the respective item. # Handle existing -R flags properly. # Transform strings to the object so that we can call diff on it. diff --git a/git/index/fun.py b/git/index/fun.py index 59cce6ae6..629c19b1e 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -36,7 +36,7 @@ ) from git.util import IndexFileSHA1Writer, finalize_process -from .typ import BaseIndexEntry, IndexEntry, CE_NAMEMASK, CE_STAGESHIFT +from .typ import CE_EXTENDED, BaseIndexEntry, IndexEntry, CE_NAMEMASK, CE_STAGESHIFT from .util import pack, unpack # typing ----------------------------------------------------------------------------- @@ -87,7 +87,7 @@ def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None: return env = os.environ.copy() - env["GIT_INDEX_FILE"] = safe_decode(str(index.path)) + env["GIT_INDEX_FILE"] = safe_decode(os.fspath(index.path)) env["GIT_EDITOR"] = ":" cmd = [hp] try: @@ -158,7 +158,7 @@ def write_cache( write = stream_sha.write # Header - version = 2 + version = 3 if any(entry.extended_flags for entry in entries) else 2 write(b"DIRC") write(pack(">LL", version, len(entries))) @@ -172,6 +172,8 @@ def write_cache( plen = len(path) & CE_NAMEMASK # Path length assert plen == len(path), "Path %s too long to fit into index" % entry.path flags = plen | (entry.flags & CE_NAMEMASK_INV) # Clear possible previous values. + if entry.extended_flags: + flags |= CE_EXTENDED write( pack( ">LLLLLL20sH", @@ -185,6 +187,8 @@ def write_cache( flags, ) ) + if entry.extended_flags: + write(pack(">H", entry.extended_flags)) write(path) real_size = (tell() - beginoffset + 8) & ~7 write(b"\0" * ((beginoffset + real_size) - tell())) @@ -206,8 +210,7 @@ def read_header(stream: IO[bytes]) -> Tuple[int, int]: unpacked = cast(Tuple[int, int], unpack(">LL", stream.read(4 * 2))) version, num_entries = unpacked - # TODO: Handle version 3: extended data, see read-cache.c. - assert version in (1, 2) + assert version in (1, 2, 3), "Unsupported git index version %i, only 1, 2, and 3 are supported" % version return version, num_entries @@ -260,12 +263,15 @@ def read_cache( ctime = unpack(">8s", read(8))[0] mtime = unpack(">8s", read(8))[0] (dev, ino, mode, uid, gid, size, sha, flags) = unpack(">LLLLLL20sH", read(20 + 4 * 6 + 2)) + extended_flags = 0 + if flags & CE_EXTENDED: + extended_flags = unpack(">H", read(2))[0] path_size = flags & CE_NAMEMASK path = read(path_size).decode(defenc) real_size = (tell() - beginoffset + 8) & ~7 read((beginoffset + real_size) - tell()) - entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size)) + entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size, extended_flags)) # entry_key would be the method to use, but we save the effort. entries[(path, entry.stage)] = entry count += 1 diff --git a/git/index/typ.py b/git/index/typ.py index 974252528..927633a9f 100644 --- a/git/index/typ.py +++ b/git/index/typ.py @@ -32,6 +32,9 @@ CE_VALID = 0x8000 CE_STAGESHIFT = 12 +CE_EXT_SKIP_WORKTREE = 0x4000 +CE_EXT_INTENT_TO_ADD = 0x2000 + # } END invariants @@ -87,6 +90,8 @@ class BaseIndexEntryHelper(NamedTuple): uid: int = 0 gid: int = 0 size: int = 0 + # version 3 extended flags, only when (flags & CE_EXTENDED) is set + extended_flags: int = 0 class BaseIndexEntry(BaseIndexEntryHelper): @@ -102,7 +107,7 @@ def __new__( cls, inp_tuple: Union[ Tuple[int, bytes, int, PathLike], - Tuple[int, bytes, int, PathLike, bytes, bytes, int, int, int, int, int], + Tuple[int, bytes, int, PathLike, bytes, bytes, int, int, int, int, int, int], ], ) -> "BaseIndexEntry": """Override ``__new__`` to allow construction from a tuple for backwards @@ -134,6 +139,14 @@ def stage(self) -> int: """ return (self.flags & CE_STAGEMASK) >> CE_STAGESHIFT + @property + def skip_worktree(self) -> bool: + return (self.extended_flags & CE_EXT_SKIP_WORKTREE) > 0 + + @property + def intent_to_add(self) -> bool: + return (self.extended_flags & CE_EXT_INTENT_TO_ADD) > 0 + @classmethod def from_blob(cls, blob: Blob, stage: int = 0) -> "BaseIndexEntry": """:return: Fully equipped BaseIndexEntry at the given stage""" @@ -179,7 +192,7 @@ def from_base(cls, base: "BaseIndexEntry") -> "IndexEntry": Instance of type :class:`BaseIndexEntry`. """ time = pack(">LL", 0, 0) - return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) + return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) # type: ignore[arg-type] @classmethod def from_blob(cls, blob: Blob, stage: int = 0) -> "IndexEntry": @@ -198,5 +211,5 @@ def from_blob(cls, blob: Blob, stage: int = 0) -> "IndexEntry": 0, 0, blob.size, - ) + ) # type: ignore[arg-type] ) diff --git a/git/index/util.py b/git/index/util.py index e59cb609f..982a5afb7 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -15,7 +15,7 @@ # typing ---------------------------------------------------------------------- -from typing import Any, Callable, TYPE_CHECKING, Optional, Type +from typing import Any, Callable, TYPE_CHECKING, Optional, Type, cast from git.types import Literal, PathLike, _T @@ -106,7 +106,7 @@ def git_working_dir(func: Callable[..., _T]) -> Callable[..., _T]: @wraps(func) def set_git_working_dir(self: "IndexFile", *args: Any, **kwargs: Any) -> _T: cur_wd = os.getcwd() - os.chdir(str(self.repo.working_tree_dir)) + os.chdir(cast(PathLike, self.repo.working_tree_dir)) try: return func(self, *args, **kwargs) finally: diff --git a/git/objects/base.py b/git/objects/base.py index eeaebc09b..faf600c6b 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -122,7 +122,7 @@ def new(cls, repo: "Repo", id: Union[str, "Reference"]) -> AnyGitObject: :return: New :class:`Object` instance of a type appropriate to the object type behind `id`. The id of the newly created object will be a binsha even though the - input id may have been a `~git.refs.reference.Reference` or rev-spec. + input id may have been a :class:`~git.refs.reference.Reference` or rev-spec. :param id: :class:`~git.refs.reference.Reference`, rev-spec, or hexsha. @@ -218,7 +218,7 @@ class IndexObject(Object): """Base for all objects that can be part of the index file. The classes representing git object types that can be part of the index file are - :class:`~git.objects.tree.Tree and :class:`~git.objects.blob.Blob`. In addition, + :class:`~git.objects.tree.Tree` and :class:`~git.objects.blob.Blob`. In addition, :class:`~git.objects.submodule.base.Submodule`, which is not really a git object type but can be part of an index file, is also a subclass. """ diff --git a/git/objects/blob.py b/git/objects/blob.py index 58de59642..f7d49c9cc 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -6,6 +6,7 @@ __all__ = ["Blob"] from mimetypes import guess_type +import os import sys if sys.version_info >= (3, 8): @@ -44,5 +45,5 @@ def mime_type(self) -> str: """ guesses = None if self.path: - guesses = guess_type(str(self.path)) + guesses = guess_type(os.fspath(self.path)) return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/git/objects/commit.py b/git/objects/commit.py index d957c9051..da7677ee0 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -289,7 +289,7 @@ def name_rev(self) -> str: """ :return: String describing the commits hex sha based on the closest - `~git.refs.reference.Reference`. + :class:`~git.refs.reference.Reference`. :note: Mostly useful for UI purposes. @@ -349,7 +349,7 @@ def iter_items( return cls._iter_from_process_or_stream(repo, proc) def iter_parents(self, paths: Union[PathLike, Sequence[PathLike]] = "", **kwargs: Any) -> Iterator["Commit"]: - R"""Iterate _all_ parents of this commit. + R"""Iterate *all* parents of this commit. :param paths: Optional path or list of paths limiting the :class:`Commit`\s to those that @@ -377,15 +377,25 @@ def stats(self) -> Stats: :return: :class:`Stats` """ - if not self.parents: - text = self.repo.git.diff_tree(self.hexsha, "--", numstat=True, no_renames=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: + + def process_lines(lines: List[str]) -> str: + text = "" + for file_info, line in zip(lines, lines[len(lines) // 2 :]): + change_type = file_info.split("\t")[0][-1] (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 + text += "%s\t%s\t%s\t%s\n" % (change_type, insertions, deletions, filename) + return text + + if not self.parents: + lines = self.repo.git.diff_tree( + self.hexsha, "--", numstat=True, no_renames=True, root=True, raw=True + ).splitlines()[1:] + text = process_lines(lines) else: - text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True) + lines = self.repo.git.diff( + self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True, raw=True + ).splitlines() + text = process_lines(lines) return Stats._list_from_string(self.repo, text) @property @@ -440,14 +450,7 @@ def trailers_list(self) -> List[Tuple[str, str]]: :return: List containing key-value tuples of whitespace stripped trailer information. """ - cmd = ["git", "interpret-trailers", "--parse"] - proc: Git.AutoInterrupt = self.repo.git.execute( # type: ignore[call-overload] - cmd, - as_process=True, - istream=PIPE, - ) - trailer: str = proc.communicate(str(self.message).encode())[0].decode("utf8") - trailer = trailer.strip() + trailer = self._interpret_trailers(self.repo, self.message, ["--parse"], encoding=self.encoding).strip() if not trailer: return [] @@ -459,6 +462,27 @@ def trailers_list(self) -> List[Tuple[str, str]]: return trailer_list + @classmethod + def _interpret_trailers( + cls, + repo: "Repo", + message: Union[str, bytes], + trailer_args: Sequence[str], + encoding: str = default_encoding, + ) -> str: + message_bytes = message if isinstance(message, bytes) else message.encode(encoding, errors="strict") + cmd = [repo.git.GIT_PYTHON_GIT_EXECUTABLE, "interpret-trailers", *trailer_args] + proc: Git.AutoInterrupt = repo.git.execute( # type: ignore[call-overload] + cmd, + as_process=True, + istream=PIPE, + ) + try: + stdout_bytes, _ = proc.communicate(message_bytes) + return stdout_bytes.decode(encoding, errors="strict") + finally: + finalize_process(proc) + @property def trailers_dict(self) -> Dict[str, List[str]]: """Get the trailers of the message as a dictionary. @@ -560,6 +584,7 @@ def create_from_tree( committer: Union[None, Actor] = None, author_date: Union[None, str, datetime.datetime] = None, commit_date: Union[None, str, datetime.datetime] = None, + trailers: Union[None, Dict[str, str], List[Tuple[str, str]]] = None, ) -> "Commit": """Commit the given tree, creating a :class:`Commit` object. @@ -599,6 +624,14 @@ def create_from_tree( :param commit_date: The timestamp for the committer field. + :param trailers: + Optional trailer key-value pairs to append to the commit message. + Can be a dictionary mapping trailer keys to values, or a list of + ``(key, value)`` tuples (useful when the same key appears multiple + times, e.g. multiple ``Signed-off-by`` trailers). Trailers are + appended using ``git interpret-trailers``. + See :manpage:`git-interpret-trailers(1)`. + :return: :class:`Commit` object representing the new commit. @@ -668,6 +701,21 @@ def create_from_tree( tree = repo.tree(tree) # END tree conversion + # APPLY TRAILERS + if trailers: + trailer_args: List[str] = [] + if isinstance(trailers, dict): + for key, val in trailers.items(): + trailer_args.append("--trailer") + trailer_args.append(f"{key}: {val}") + else: + for key, val in trailers: + trailer_args.append("--trailer") + trailer_args.append(f"{key}: {val}") + + message = cls._interpret_trailers(repo, str(message), trailer_args) + # END apply trailers + # CREATE NEW COMMIT new_commit = cls( repo, @@ -890,7 +938,7 @@ def co_authors(self) -> List[Actor]: if self.message: results = re.findall( r"^Co-authored-by: (.*) <(.*?)>$", - self.message, + str(self.message), re.MULTILINE, ) for author in results: diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index fa60bcdaf..d183672db 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -11,6 +11,7 @@ import stat import sys import uuid +import urllib import git from git.cmd import Git @@ -65,7 +66,7 @@ if TYPE_CHECKING: from git.index import IndexFile from git.objects.commit import Commit - from git.refs import Head + from git.refs import Head, RemoteReference from git.repo import Repo # ----------------------------------------------------------------------------- @@ -351,7 +352,12 @@ def _clone_repo( module_abspath_dir = osp.dirname(module_abspath) if not osp.isdir(module_abspath_dir): os.makedirs(module_abspath_dir) - module_checkout_path = osp.join(str(repo.working_tree_dir), path) + module_checkout_path = osp.join(repo.working_tree_dir, path) # type: ignore[arg-type] + + if url.startswith("../"): + remote_name = cast("RemoteReference", repo.active_branch.tracking_branch()).remote_name + repo_remote_url = repo.remote(remote_name).url + url = os.path.join(repo_remote_url, url) clone = git.Repo.clone_from( url, @@ -535,7 +541,7 @@ def add( if sm.exists(): # Reretrieve submodule from tree. try: - sm = repo.head.commit.tree[str(path)] + sm = repo.head.commit.tree[os.fspath(path)] sm._name = name return sm except KeyError: @@ -794,9 +800,13 @@ def update( + "Cloning url '%s' to '%s' in submodule %r" % (self.url, checkout_module_abspath, self.name), ) if not dry_run: + if self.url.startswith("."): + url = urllib.parse.urljoin(self.repo.remotes.origin.url + "/", self.url) + else: + url = self.url mrepo = self._clone_repo( self.repo, - self.url, + url, self.path, self.name, n=True, diff --git a/git/objects/tag.py b/git/objects/tag.py index a3bb0b882..88671d316 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -14,7 +14,7 @@ import sys from git.compat import defenc -from git.util import hex_to_bin +from git.util import Actor, hex_to_bin from . import base from .util import get_object_type_by_name, parse_actor_and_date @@ -30,7 +30,6 @@ if TYPE_CHECKING: from git.repo import Repo - from git.util import Actor from .blob import Blob from .commit import Commit @@ -64,7 +63,7 @@ def __init__( binsha: bytes, object: Union[None, base.Object] = None, tag: Union[None, str] = None, - tagger: Union[None, "Actor"] = None, + tagger: Union[None, Actor] = None, tagged_date: Union[int, None] = None, tagger_tz_offset: Union[int, None] = None, message: Union[str, None] = None, diff --git a/git/objects/tree.py b/git/objects/tree.py index 09184a781..a3d611c80 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -5,6 +5,7 @@ __all__ = ["TreeModifier", "Tree"] +import os import sys import git.diff as git_diff @@ -50,7 +51,9 @@ # -------------------------------------------------------- -cmp: Callable[[str, str], int] = lambda a, b: (a > b) - (a < b) + +def cmp(a: str, b: str) -> int: + return (a > b) - (a < b) class TreeModifier: @@ -228,7 +231,7 @@ def _iter_convert_to_object(self, iterable: Iterable[TreeCacheTup]) -> Iterator[ raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) from e # END for each item - def join(self, file: str) -> IndexObjUnion: + def join(self, file: PathLike) -> IndexObjUnion: """Find the named object in this tree's contents. :return: @@ -239,6 +242,7 @@ def join(self, file: str) -> IndexObjUnion: If the given file or tree does not exist in this tree. """ msg = "Blob or Tree named %r not found" + file = os.fspath(file) if "/" in file: tree = self item = self @@ -267,7 +271,7 @@ def join(self, file: str) -> IndexObjUnion: raise KeyError(msg % file) # END handle long paths - def __truediv__(self, file: str) -> IndexObjUnion: + def __truediv__(self, file: PathLike) -> IndexObjUnion: """The ``/`` operator is another syntax for joining. See :meth:`join` for details. diff --git a/git/objects/util.py b/git/objects/util.py index 5c56e6134..a68d701f5 100644 --- a/git/objects/util.py +++ b/git/objects/util.py @@ -568,11 +568,11 @@ def addToStack( yield rval # Only continue to next level if this is appropriate! - nd = d + 1 - if depth > -1 and nd > depth: + next_d = d + 1 + if depth > -1 and next_d > depth: continue - addToStack(stack, item, branch_first, nd) + addToStack(stack, item, branch_first, next_d) # END for each item on work stack diff --git a/git/refs/head.py b/git/refs/head.py index 683634451..3c43993e7 100644 --- a/git/refs/head.py +++ b/git/refs/head.py @@ -22,7 +22,6 @@ from git.types import Commit_ish, PathLike if TYPE_CHECKING: - from git.objects import Commit from git.refs import RemoteReference from git.repo import Repo @@ -44,9 +43,6 @@ class HEAD(SymbolicReference): __slots__ = () - # TODO: This can be removed once SymbolicReference.commit has static type hints. - commit: "Commit" - def __init__(self, repo: "Repo", path: PathLike = _HEAD_NAME) -> None: if path != self._HEAD_NAME: raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path)) @@ -149,7 +145,7 @@ class Head(Reference): k_config_remote_ref = "merge" # Branch to merge from remote. @classmethod - def delete(cls, repo: "Repo", *heads: "Union[Head, str]", force: bool = False, **kwargs: Any) -> None: + def delete(cls, repo: "Repo", *heads: "Union[Head, str]", force: bool = False, **kwargs: Any) -> None: # type: ignore[override] """Delete the given heads. :param force: diff --git a/git/refs/log.py b/git/refs/log.py index 17e3a94b3..fbbe66b22 100644 --- a/git/refs/log.py +++ b/git/refs/log.py @@ -4,7 +4,6 @@ __all__ = ["RefLog", "RefLogEntry"] from mmap import mmap -import os.path as osp import re import time as _time @@ -126,7 +125,7 @@ def from_line(cls, line: bytes) -> "RefLogEntry": elif len(fields) == 2: info, msg = fields else: - raise ValueError("Line must have up to two TAB-separated fields." " Got %s" % repr(line_str)) + raise ValueError("Line must have up to two TAB-separated fields. Got %s" % repr(line_str)) # END handle first split oldhexsha = info[:40] @@ -145,7 +144,7 @@ def from_line(cls, line: bytes) -> "RefLogEntry": actor = Actor._from_string(info[82 : email_end + 1]) time, tz_offset = parse_date(info[email_end + 2 :]) # skipcq: PYL-W0621 - return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg)) + return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg)) # type: ignore [arg-type] class RefLog(List[RefLogEntry], Serializable): @@ -212,8 +211,11 @@ def path(cls, ref: "SymbolicReference") -> str: :param ref: :class:`~git.refs.symbolic.SymbolicReference` instance + + :raise ValueError: + If `ref.path` is invalid or escapes the repository's reflog directory. """ - return osp.join(ref.repo.git_dir, "logs", to_native_path(ref.path)) + return to_native_path(ref._get_validated_reflog_path(ref.repo, ref.path)) @classmethod def iter_entries(cls, stream: Union[str, "BytesIO", mmap]) -> Iterator[RefLogEntry]: diff --git a/git/refs/reference.py b/git/refs/reference.py index e5d473779..0c4327225 100644 --- a/git/refs/reference.py +++ b/git/refs/reference.py @@ -3,6 +3,7 @@ __all__ = ["Reference"] +import os from git.util import IterableObj, LazyMixin from .symbolic import SymbolicReference, T_References @@ -65,7 +66,7 @@ def __init__(self, repo: "Repo", path: PathLike, check_path: bool = True) -> Non If ``False``, you can provide any path. Otherwise the path must start with the default path prefix of this type. """ - if check_path and not str(path).startswith(self._common_path_default + "/"): + if check_path and not os.fspath(path).startswith(self._common_path_default + "/"): raise ValueError(f"Cannot instantiate {self.__class__.__name__!r} from path {path}") self.path: str # SymbolicReference converts to string at the moment. super().__init__(repo, path) diff --git a/git/refs/remote.py b/git/refs/remote.py index b4f4f7b36..e16ae70f8 100644 --- a/git/refs/remote.py +++ b/git/refs/remote.py @@ -58,17 +58,20 @@ def delete(cls, repo: "Repo", *refs: "RemoteReference", **kwargs: Any) -> None: `kwargs` are given for comparability with the base class method as we should not narrow the signature. """ + for ref in refs: + cls._check_ref_name_valid(ref.path) + repo.git.branch("-d", "-r", *refs) # The official deletion method will ignore remote symbolic refs - these are # generally ignored in the refs/ folder. We don't though and delete remainders # manually. for ref in refs: try: - os.remove(os.path.join(repo.common_dir, ref.path)) + os.remove(cls._get_validated_path(repo.common_dir, ref.path)) except OSError: pass try: - os.remove(os.path.join(repo.git_dir, ref.path)) + os.remove(cls._get_validated_path(repo.git_dir, ref.path)) except OSError: pass # END for each ref diff --git a/git/refs/symbolic.py b/git/refs/symbolic.py index 510850b2e..020de5e13 100644 --- a/git/refs/symbolic.py +++ b/git/refs/symbolic.py @@ -4,6 +4,7 @@ __all__ = ["SymbolicReference"] import os +from pathlib import Path from gitdb.exc import BadName, BadObject @@ -39,8 +40,8 @@ if TYPE_CHECKING: from git.config import GitConfigParser from git.objects.commit import Actor - from git.refs import Head, TagReference, RemoteReference, Reference from git.refs.log import RefLogEntry + from git.refs.reference import Reference from git.repo import Repo @@ -76,10 +77,10 @@ class SymbolicReference: def __init__(self, repo: "Repo", path: PathLike, check_path: bool = False) -> None: self.repo = repo - self.path = path + self.path: PathLike = path def __str__(self) -> str: - return str(self.path) + return os.fspath(self.path) def __repr__(self) -> str: return '' % (self.__class__.__name__, self.path) @@ -103,12 +104,38 @@ def name(self) -> str: In case of symbolic references, the shortest assumable name is the path itself. """ - return str(self.path) + return os.fspath(self.path) @property def abspath(self) -> PathLike: return join_path_native(_git_dir(self.repo, self.path), self.path) + @staticmethod + def _get_validated_path(base: PathLike, path: PathLike) -> str: + path = os.fspath(path) + base_path = os.path.realpath(os.fspath(base)) + abs_path = os.path.realpath(os.path.join(base_path, path)) + try: + common_path = os.path.commonpath([base_path, abs_path]) + except ValueError as e: + raise ValueError("Reference path %r escapes the repository" % path) from e + if os.path.normcase(common_path) != os.path.normcase(base_path): + raise ValueError("Reference path %r escapes the repository" % path) + return abs_path + + @classmethod + def _get_validated_ref_path(cls, repo: "Repo", path: PathLike) -> str: + """Return the absolute filesystem path for a ref after validating it.""" + cls._check_ref_name_valid(path) + ref_path = os.fspath(path) + return cls._get_validated_path(_git_dir(repo, ref_path), ref_path) + + @classmethod + def _get_validated_reflog_path(cls, repo: "Repo", path: PathLike) -> str: + """Return the absolute filesystem path for a reflog after validating it.""" + cls._check_ref_name_valid(path) + return cls._get_validated_path(os.path.join(repo.git_dir, "logs"), path) + @classmethod def _get_packed_refs_path(cls, repo: "Repo") -> str: return os.path.join(repo.common_dir, "packed-refs") @@ -178,7 +205,7 @@ def _check_ref_name_valid(ref_path: PathLike) -> None: """ previous: Union[str, None] = None one_before_previous: Union[str, None] = None - for c in str(ref_path): + for c in os.fspath(ref_path): if c in " ~^:?*[\\": raise ValueError( f"Invalid reference '{ref_path}': references cannot contain spaces, tildes (~), carets (^)," @@ -212,7 +239,7 @@ def _check_ref_name_valid(ref_path: PathLike) -> None: raise ValueError(f"Invalid reference '{ref_path}': references cannot end with a forward slash (/)") elif previous == "@" and one_before_previous is None: raise ValueError(f"Invalid reference '{ref_path}': references cannot be '@'") - elif any(component.endswith(".lock") for component in str(ref_path).split("/")): + elif any(component.endswith(".lock") for component in Path(ref_path).parts): raise ValueError( f"Invalid reference '{ref_path}': references cannot have slash-separated components that end with" " '.lock'" @@ -235,7 +262,7 @@ def _get_ref_info_helper( tokens: Union[None, List[str], Tuple[str, str]] = None repodir = _git_dir(repo, ref_path) try: - with open(os.path.join(repodir, str(ref_path)), "rt", encoding="UTF-8") as fp: + with open(os.path.join(repodir, ref_path), "rt", encoding="UTF-8") as fp: # type: ignore[arg-type] value = fp.read().rstrip() # Don't only split on spaces, but on whitespace, which allows to parse lines like: # 60b64ef992065e2600bfef6187a97f92398a9144 branch 'master' of git-server:/path/to/repo @@ -387,19 +414,25 @@ def set_object( # set the commit on our reference return self._get_reference().set_object(object, logmsg) - commit = property( - _get_commit, - set_commit, # type: ignore[arg-type] - doc="Query or set commits directly", - ) + @property + def commit(self) -> "Commit": + """Query or set commits directly""" + return self._get_commit() + + @commit.setter + def commit(self, commit: Union[Commit, "SymbolicReference", str]) -> "SymbolicReference": + return self.set_commit(commit) + + @property + def object(self) -> AnyGitObject: + """Return the object our ref currently refers to""" + return self._get_object() - object = property( - _get_object, - set_object, # type: ignore[arg-type] - doc="Return the object our ref currently refers to", - ) + @object.setter + def object(self, object: Union[AnyGitObject, "SymbolicReference", str]) -> "SymbolicReference": + return self.set_object(object) - def _get_reference(self) -> "SymbolicReference": + def _get_reference(self) -> "Reference": """ :return: :class:`~git.refs.reference.Reference` object we point to @@ -411,7 +444,7 @@ def _get_reference(self) -> "SymbolicReference": sha, target_ref_path = self._get_ref_info(self.repo, self.path) if target_ref_path is None: raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha)) - return self.from_path(self.repo, target_ref_path) + return cast("Reference", self.from_path(self.repo, target_ref_path)) def set_reference( self, @@ -478,7 +511,7 @@ def set_reference( # END handle non-existing # END retrieve old hexsha - fpath = self.abspath + fpath = self._get_validated_ref_path(self.repo, self.path) assure_directory_exists(fpath, is_file=True) lfd = LockedFD(fpath) @@ -496,12 +529,14 @@ def set_reference( return self # Aliased reference - reference: Union["Head", "TagReference", "RemoteReference", "Reference"] - reference = property( # type: ignore[assignment] - _get_reference, - set_reference, # type: ignore[arg-type] - doc="Returns the Reference we point to", - ) + @property + def reference(self) -> "Reference": + return self._get_reference() + + @reference.setter + def reference(self, ref: Union[AnyGitObject, "SymbolicReference", str]) -> "SymbolicReference": + return self.set_reference(ref) + ref = reference def is_valid(self) -> bool: @@ -606,7 +641,7 @@ def to_full_path(cls, path: Union[PathLike, "SymbolicReference"]) -> PathLike: full_ref_path = path if not cls._common_path_default: return full_ref_path - if not str(path).startswith(cls._common_path_default + "/"): + if not os.fspath(path).startswith(cls._common_path_default + "/"): full_ref_path = "%s/%s" % (cls._common_path_default, path) return full_ref_path @@ -623,7 +658,7 @@ def delete(cls, repo: "Repo", path: PathLike) -> None: Alternatively the symbolic reference to be deleted. """ full_ref_path = cls.to_full_path(path) - abs_path = os.path.join(repo.common_dir, full_ref_path) + abs_path = cls._get_validated_ref_path(repo, full_ref_path) if os.path.exists(abs_path): os.remove(abs_path) else: @@ -686,9 +721,8 @@ def _create( symbolic reference. Otherwise it will be resolved to the corresponding object and a detached symbolic reference will be created instead. """ - git_dir = _git_dir(repo, path) full_ref_path = cls.to_full_path(path) - abs_ref_path = os.path.join(git_dir, full_ref_path) + abs_ref_path = cls._get_validated_ref_path(repo, full_ref_path) # Figure out target data. target = reference @@ -698,7 +732,7 @@ def _create( if not force and os.path.isfile(abs_ref_path): target_data = str(target) if isinstance(target, SymbolicReference): - target_data = str(target.path) + target_data = os.fspath(target.path) if not resolve: target_data = "ref: " + target_data with open(abs_ref_path, "rb") as fd: @@ -780,8 +814,8 @@ def rename(self, new_path: PathLike, force: bool = False) -> "SymbolicReference" if self.path == new_path: return self - new_abs_path = os.path.join(_git_dir(self.repo, new_path), new_path) - cur_abs_path = os.path.join(_git_dir(self.repo, self.path), self.path) + new_abs_path = self._get_validated_ref_path(self.repo, new_path) + cur_abs_path = self._get_validated_ref_path(self.repo, self.path) if os.path.isfile(new_abs_path): if not force: # If they point to the same file, it's not an error. @@ -834,7 +868,7 @@ def _iter_items( # Read packed refs. for _sha, rela_path in cls._iter_packed_refs(repo): - if rela_path.startswith(str(common_path)): + if rela_path.startswith(os.fspath(common_path)): rela_paths.add(rela_path) # END relative path matches common path # END packed refs reading @@ -908,8 +942,7 @@ def from_path(cls: Type[T_References], repo: "Repo", path: PathLike) -> T_Refere SymbolicReference, ): try: - instance: T_References - instance = ref_type(repo, path) + instance = cast(T_References, ref_type(repo, path)) if instance.__class__ is SymbolicReference and instance.is_detached: raise ValueError("SymbolicRef was detached, we drop it") else: @@ -923,4 +956,4 @@ def from_path(cls: Type[T_References], repo: "Repo", path: PathLike) -> T_Refere def is_remote(self) -> bool: """:return: True if this symbolic reference points to a remote branch""" - return str(self.path).startswith(self._remote_common_path_default + "/") + return os.fspath(self.path).startswith(self._remote_common_path_default + "/") diff --git a/git/refs/tag.py b/git/refs/tag.py index 1e38663ae..4525b09cb 100644 --- a/git/refs/tag.py +++ b/git/refs/tag.py @@ -45,8 +45,8 @@ class TagReference(Reference): _common_default = "tags" _common_path_default = Reference._common_path_default + "/" + _common_default - @property - def commit(self) -> "Commit": # type: ignore[override] # LazyMixin has unrelated commit method + @property # type: ignore[misc] + def commit(self) -> "Commit": # LazyMixin has unrelated commit method """:return: Commit object the tag ref points to :raise ValueError: @@ -80,8 +80,8 @@ def tag(self) -> Union["TagObject", None]: return None # Make object read-only. It should be reasonably hard to adjust an existing tag. - @property - def object(self) -> AnyGitObject: # type: ignore[override] + @property # type: ignore[misc] + def object(self) -> AnyGitObject: return Reference._get_object(self) @classmethod diff --git a/git/remote.py b/git/remote.py index 15e360064..18d4829af 100644 --- a/git/remote.py +++ b/git/remote.py @@ -250,7 +250,7 @@ def _from_line(cls, remote: "Remote", line: str) -> "PushInfo": flags |= cls.NEW_TAG elif "[new branch]" in summary: flags |= cls.NEW_HEAD - # uptodate encoded in control character + # `uptodate` encoded in control character else: # Fast-forward or forced update - was encoded in control character, # but we parse the old and new commit. @@ -517,6 +517,9 @@ def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> NoReturn: # -> raise NotImplementedError +Progress = Union[RemoteProgress, "UpdateProgress", Callable[..., RemoteProgress], None] + + class Remote(LazyMixin, IterableObj): """Provides easy read and write access to a git remote. @@ -872,7 +875,7 @@ def update(self, **kwargs: Any) -> "Remote": def _get_fetch_info_from_stderr( self, proc: "Git.AutoInterrupt", - progress: Union[Callable[..., Any], RemoteProgress, None], + progress: Progress, kill_after_timeout: Union[None, float] = None, ) -> IterableList["FetchInfo"]: progress = to_progress_instance(progress) @@ -894,7 +897,7 @@ def _get_fetch_info_from_stderr( None, progress_handler, finalizer=None, - decode_streams=True, + decode_streams=False, kill_after_timeout=kill_after_timeout, ) @@ -1000,7 +1003,7 @@ def _assert_refspec(self) -> None: def fetch( self, refspec: Union[str, List[str], None] = None, - progress: Union[RemoteProgress, None, "UpdateProgress"] = None, + progress: Progress = None, verbose: bool = True, kill_after_timeout: Union[None, float] = None, allow_unsafe_protocols: bool = False, @@ -1071,7 +1074,7 @@ def fetch( Git.check_unsafe_options(options=list(kwargs.keys()), unsafe_options=self.unsafe_git_fetch_options) proc = self.repo.git.fetch( - "--", self, *args, as_process=True, with_stdout=False, universal_newlines=False, v=verbose, **kwargs + "--", self, *args, as_process=True, with_stdout=False, universal_newlines=True, v=verbose, **kwargs ) res = self._get_fetch_info_from_stderr(proc, progress, kill_after_timeout=kill_after_timeout) if hasattr(self.repo.odb, "update_cache"): @@ -1081,7 +1084,7 @@ def fetch( def pull( self, refspec: Union[str, List[str], None] = None, - progress: Union[RemoteProgress, "UpdateProgress", None] = None, + progress: Progress = None, kill_after_timeout: Union[None, float] = None, allow_unsafe_protocols: bool = False, allow_unsafe_options: bool = False, @@ -1125,7 +1128,7 @@ def pull( Git.check_unsafe_options(options=list(kwargs.keys()), unsafe_options=self.unsafe_git_pull_options) proc = self.repo.git.pull( - "--", self, refspec, with_stdout=False, as_process=True, universal_newlines=False, v=True, **kwargs + "--", self, refspec, with_stdout=False, as_process=True, universal_newlines=True, v=True, **kwargs ) res = self._get_fetch_info_from_stderr(proc, progress, kill_after_timeout=kill_after_timeout) if hasattr(self.repo.odb, "update_cache"): @@ -1135,7 +1138,7 @@ def pull( def push( self, refspec: Union[str, List[str], None] = None, - progress: Union[RemoteProgress, "UpdateProgress", Callable[..., RemoteProgress], None] = None, + progress: Progress = None, kill_after_timeout: Union[None, float] = None, allow_unsafe_protocols: bool = False, allow_unsafe_options: bool = False, diff --git a/git/repo/base.py b/git/repo/base.py index 346248ddb..2d3cf24f0 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -126,6 +126,7 @@ class Repo: working_dir: PathLike """The working directory of the git command.""" + # stored as string for easier processing, but annotated as path for clearer intention _working_tree_dir: Optional[PathLike] = None git_dir: PathLike @@ -179,7 +180,7 @@ def __init__( R"""Create a new :class:`Repo` instance. :param path: - The path to either the root git directory or the bare git repo:: + The path to either the worktree directory or the .git directory itself:: repo = Repo("/Users/mtrier/Development/git-python") repo = Repo("/Users/mtrier/Development/git-python.git") @@ -215,15 +216,13 @@ def __init__( epath = path or os.getenv("GIT_DIR") if not epath: epath = os.getcwd() + epath = os.fspath(epath) if Git.is_cygwin(): # Given how the tests are written, this seems more likely to catch Cygwin # git used from Windows than Windows git used from Cygwin. Therefore # changing to Cygwin-style paths is the relevant operation. - epath = cygpath(str(epath)) + epath = cygpath(epath) - epath = epath or path or os.getcwd() - if not isinstance(epath, str): - epath = str(epath) if expand_vars and re.search(self.re_envvars, epath): warnings.warn( "The use of environment variables in paths is deprecated" @@ -243,6 +242,28 @@ def __init__( # It's important to normalize the paths, as submodules will otherwise # initialize their repo instances with paths that depend on path-portions # that will not exist after being removed. It's just cleaner. + if ( + osp.isfile(osp.join(curpath, "gitdir")) + and osp.isfile(osp.join(curpath, "commondir")) + and osp.isfile(osp.join(curpath, "HEAD")) + ): + git_dir = curpath + + if "GIT_WORK_TREE" in os.environ: + self._working_tree_dir = os.getenv("GIT_WORK_TREE") + else: + # Linked worktree administrative directories store the path to the + # worktree's .git file in their gitdir file (without "gitdir: " prefix). + with open(osp.join(git_dir, "gitdir")) as fp: + worktree_gitfile = fp.read().strip() + + if not osp.isabs(worktree_gitfile): + worktree_gitfile = osp.normpath(osp.join(git_dir, worktree_gitfile)) + + self._working_tree_dir = osp.dirname(worktree_gitfile) + + break + if is_git_dir(curpath): git_dir = curpath # from man git-config : core.worktree @@ -274,7 +295,8 @@ def __init__( sm_gitpath = find_worktree_git_dir(dotgit) if sm_gitpath is not None: - git_dir = expand_path(sm_gitpath, expand_vars) + # worktrees can use relative paths as of Git 2.48, so we join to curpath + git_dir = osp.normpath(osp.join(curpath, sm_gitpath)) self._working_tree_dir = curpath break @@ -354,21 +376,19 @@ def __ne__(self, rhs: object) -> bool: def __hash__(self) -> int: return hash(self.git_dir) - # Description property - def _get_description(self) -> str: + @property + def description(self) -> str: + """The project's description""" filename = osp.join(self.git_dir, "description") with open(filename, "rb") as fp: return fp.read().rstrip().decode(defenc) - def _set_description(self, descr: str) -> None: + @description.setter + def description(self, descr: str) -> None: filename = osp.join(self.git_dir, "description") with open(filename, "wb") as fp: fp.write((descr + "\n").encode(defenc)) - description = property(_get_description, _set_description, doc="the project's description") - del _get_description - del _set_description - @property def working_tree_dir(self) -> Optional[PathLike]: """ @@ -514,7 +534,7 @@ def create_submodule(self, *args: Any, **kwargs: Any) -> Submodule: def iter_submodules(self, *args: Any, **kwargs: Any) -> Iterator[Submodule]: """An iterator yielding Submodule instances. - See the `~git.objects.util.Traversable` interface for a description of `args` + See the :class:`~git.objects.util.Traversable` interface for a description of `args` and `kwargs`. :return: @@ -522,7 +542,7 @@ def iter_submodules(self, *args: Any, **kwargs: Any) -> Iterator[Submodule]: """ return RootModule(self).traverse(*args, **kwargs) - def submodule_update(self, *args: Any, **kwargs: Any) -> Iterator[Submodule]: + def submodule_update(self, *args: Any, **kwargs: Any) -> RootModule: """Update the submodules, keeping the repository consistent as it will take the previous state into consideration. @@ -686,11 +706,7 @@ def _config_reader( git_dir: Optional[PathLike] = None, ) -> GitConfigParser: if config_level is None: - files = [ - self._get_config_path(cast(Lit_config_levels, f), git_dir) - for f in self.config_level - if cast(Lit_config_levels, f) - ] + files = [self._get_config_path(f, git_dir) for f in self.config_level if f] else: files = [self._get_config_path(config_level, git_dir)] return GitConfigParser(files, read_only=True, repo=self) @@ -885,13 +901,14 @@ def _set_daemon_export(self, value: object) -> None: elif not value and fileexists: os.unlink(filename) - daemon_export = property( - _get_daemon_export, - _set_daemon_export, - doc="If True, git-daemon may export this repository", - ) - del _get_daemon_export - del _set_daemon_export + @property + def daemon_export(self) -> bool: + """If True, git-daemon may export this repository""" + return self._get_daemon_export() + + @daemon_export.setter + def daemon_export(self, value: object) -> None: + self._set_daemon_export(value) def _get_alternates(self) -> List[str]: """The list of alternates for this repo from which objects can be retrieved. @@ -929,11 +946,14 @@ def _set_alternates(self, alts: List[str]) -> None: with open(alternates_path, "wb") as f: f.write("\n".join(alts).encode(defenc)) - alternates = property( - _get_alternates, - _set_alternates, - doc="Retrieve a list of alternates paths or set a list paths to be used as alternates", - ) + @property + def alternates(self) -> List[str]: + """Retrieve a list of alternates paths or set a list paths to be used as alternates""" + return self._get_alternates() + + @alternates.setter + def alternates(self, alts: List[str]) -> None: + self._set_alternates(alts) def is_dirty( self, @@ -959,7 +979,7 @@ def is_dirty( if not submodules: default_args.append("--ignore-submodules") if path: - default_args.extend(["--", str(path)]) + default_args.extend(["--", os.fspath(path)]) if index: # diff index against HEAD. if osp.isfile(self.index.path) and len(self.git.diff("--cached", *default_args)): @@ -1045,11 +1065,19 @@ def active_branch(self) -> Head: :raise TypeError: If HEAD is detached. + :raise ValueError: + If HEAD points to the ``.invalid`` ref Git uses to mark refs as + incompatible with older clients. + :return: :class:`~git.refs.head.Head` to the active branch """ - # reveal_type(self.head.reference) # => Reference - return self.head.reference + active_branch = self.head.reference + if active_branch.name == ".invalid": + raise ValueError( + "HEAD points to 'refs/heads/.invalid', which Git uses to mark refs as incompatible with older clients" + ) + return active_branch def blame_incremental(self, rev: str | HEAD | None, file: str, **kwargs: Any) -> Iterator["BlameEntry"]: """Iterator for blame information for the given file at the given revision. @@ -1359,9 +1387,9 @@ def _clone( ) -> "Repo": odbt = kwargs.pop("odbt", odb_default_type) - # When pathlib.Path or other class-based path is passed - if not isinstance(path, str): - path = str(path) + # url may be a path and this has no effect if it is a string + url = os.fspath(url) + path = os.fspath(path) ## A bug win cygwin's Git, when `--bare` or `--separate-git-dir` # it prepends the cwd or(?) the `url` into the `path, so:: @@ -1378,16 +1406,16 @@ def _clone( multi = shlex.split(" ".join(multi_options)) if not allow_unsafe_protocols: - Git.check_unsafe_protocols(str(url)) + Git.check_unsafe_protocols(url) if not allow_unsafe_options: Git.check_unsafe_options(options=list(kwargs.keys()), unsafe_options=cls.unsafe_git_clone_options) - if not allow_unsafe_options and multi_options: - Git.check_unsafe_options(options=multi_options, unsafe_options=cls.unsafe_git_clone_options) + if not allow_unsafe_options and multi: + Git.check_unsafe_options(options=multi, unsafe_options=cls.unsafe_git_clone_options) proc = git.clone( multi, "--", - Git.polish_url(str(url)), + Git.polish_url(url), clone_path, with_extended_output=True, as_process=True, @@ -1482,7 +1510,7 @@ def clone( self.common_dir, path, type(self.odb), - progress, + progress, # type: ignore[arg-type] multi_options, allow_unsafe_protocols=allow_unsafe_protocols, allow_unsafe_options=allow_unsafe_options, @@ -1543,7 +1571,7 @@ def clone_from( url, to_path, GitCmdObjectDB, - progress, + progress, # type: ignore[arg-type] multi_options, allow_unsafe_protocols=allow_unsafe_protocols, allow_unsafe_options=allow_unsafe_options, diff --git a/git/repo/fun.py b/git/repo/fun.py index 182cf82ed..66e7eba69 100644 --- a/git/repo/fun.py +++ b/git/repo/fun.py @@ -20,6 +20,7 @@ import os import os.path as osp from pathlib import Path +import re import stat from string import digits @@ -28,19 +29,21 @@ from git.cmd import Git from git.exc import WorkTreeRepositoryUnsupported from git.objects import Object +from git.objects.util import parse_date from git.refs import SymbolicReference from git.util import cygpath, bin_to_hex, hex_to_bin # Typing ---------------------------------------------------------------------- -from typing import Optional, TYPE_CHECKING, Union, cast, overload +from typing import Iterator, Optional, TYPE_CHECKING, Tuple, Union, cast, overload from git.types import AnyGitObject, Literal, PathLike if TYPE_CHECKING: from git.db import GitCmdObjectDB - from git.objects import Commit, TagObject + from git.objects import Commit from git.refs.reference import Reference + from git.refs.log import RefLog, RefLogEntry from git.refs.tag import Tag from .base import Repo @@ -139,6 +142,23 @@ def short_to_long(odb: "GitCmdObjectDB", hexsha: str) -> Optional[bytes]: # END exception handling +def _describe_to_long(repo: "Repo", name: str) -> Optional[bytes]: + """Resolve git-describe style names to the abbreviated object they contain.""" + match = re.match(r"^.+-\d+-g([0-9A-Fa-f]{4,40})(?:-dirty)?$", name) + if match is None: + match = re.match(r"^.+-g([0-9A-Fa-f]{4,40})(?:-dirty)?$", name) + if match is None: + match = re.match(r"^([0-9A-Fa-f]{4,40})-dirty$", name) + if match is None: + return None + # END handle match + + hexsha = match.group(1) + if len(hexsha) == 40: + return hexsha.encode("ascii") + return short_to_long(repo.odb, hexsha) + + @overload def name_to_object(repo: "Repo", name: str, return_ref: Literal[False] = ...) -> AnyGitObject: ... @@ -192,6 +212,10 @@ def name_to_object(repo: "Repo", name: str, return_ref: bool = False) -> Union[A # END for each base # END handle hexsha + if hexsha is None: + hexsha = _describe_to_long(repo, name) + # END handle describe output + # Didn't find any ref, this is an error. if return_ref: raise BadObject("Couldn't find reference named %r" % name) @@ -227,6 +251,361 @@ def to_commit(obj: Object) -> "Commit": return obj +def _object_from_hexsha(repo: "Repo", hexsha: str) -> AnyGitObject: + return Object.new_from_sha(repo, hex_to_bin(hexsha)) + + +def _current_reflog_ref(repo: "Repo") -> SymbolicReference: + try: + return repo.head.ref + except TypeError: + return repo.head + # END handle detached head + + +def _common_reflog_path(repo: "Repo", ref: SymbolicReference) -> Optional[str]: + if repo.common_dir == repo.git_dir: + return None + # END handle normal repository + return SymbolicReference._get_validated_path(osp.join(repo.common_dir, "logs"), ref.path) + + +def _ref_log(repo: "Repo", ref: SymbolicReference) -> "RefLog": + try: + return ref.log() + except FileNotFoundError: + common_path = _common_reflog_path(repo, ref) + if common_path and osp.isfile(common_path): + from git.refs.log import RefLog + + return RefLog.from_file(common_path) + # END handle linked-worktree branch logs + try: + if ref.path == repo.head.ref.path: + return repo.head.log() + # END handle linked-worktree current branch logs + except TypeError: + pass + # END handle detached head + raise + # END handle missing branch log + + +def _ref_log_entry(repo: "Repo", ref: SymbolicReference, index: int) -> "RefLogEntry": + try: + return ref.log_entry(index) + except FileNotFoundError: + common_path = _common_reflog_path(repo, ref) + if common_path and osp.isfile(common_path): + from git.refs.log import RefLog + + return RefLog.entry_at(common_path, index) + # END handle linked-worktree branch logs + try: + if ref.path == repo.head.ref.path: + return repo.head.log_entry(index) + # END handle linked-worktree current branch logs + except TypeError: + pass + # END handle detached head + raise + # END handle missing branch log + + +def _find_reflog_entry_by_date(repo: "Repo", ref: SymbolicReference, spec: str) -> str: + try: + timestamp, _offset = parse_date(spec) + except ValueError as e: + raise NotImplementedError("Support for additional @{...} modes not implemented") from e + # END handle unsupported dates + log = _ref_log(repo, ref) + if not log: + raise IndexError("Invalid revlog date: %s" % spec) + # END handle empty log + + for entry in reversed(log): + if entry.time[0] <= timestamp: + return entry.newhexsha + # END found candidate + # END for each entry + return log[0].newhexsha + + +def _previous_checked_out_branch(repo: "Repo", nth: int) -> AnyGitObject: + if nth <= 0: + raise ValueError("Invalid previous checkout selector: -%i" % nth) + # END handle invalid input + + seen = 0 + for entry in reversed(_ref_log(repo, repo.head)): + message = entry.message or "" + prefix = "checkout: moving from " + if not message.startswith(prefix): + continue + # END skip non-checkouts + + previous_branch = message[len(prefix) :].split(" to ", 1)[0] + seen += 1 + if seen == nth: + return name_to_object(repo, previous_branch) + # END found selector + # END for each entry + raise IndexError("Invalid previous checkout selector: -%i" % nth) + + +def _tracking_branch_object(repo: "Repo", ref: Optional[SymbolicReference]) -> AnyGitObject: + from git.refs.head import Head + + if ref is None: + try: + head = repo.active_branch + except TypeError as e: + raise BadName("@{upstream}") from e + elif isinstance(ref, Head): + head = ref + elif os.fspath(ref.path).startswith("refs/heads/"): + head = Head(repo, ref.path) + else: + raise BadName("%s@{upstream}" % ref.name) + # END handle head + + tracking_branch = head.tracking_branch() + if tracking_branch is None: + raise BadName("%s@{upstream}" % head.name) + # END handle missing upstream + return tracking_branch.commit + + +def _apply_reflog(repo: "Repo", ref: Optional[SymbolicReference], content: str) -> AnyGitObject: + if content.startswith("+"): + content = content[1:] + # END handle explicit positive sign + + if content.startswith("-"): + if ref is not None: + raise ValueError("Previous checkout selectors do not take an explicit ref") + if content == "-0": + raise ValueError("Negative zero is invalid in reflog selector") + # END handle invalid negative zero + try: + return _previous_checked_out_branch(repo, int(content[1:])) + except ValueError as e: + raise ValueError("Invalid previous checkout selector: %s" % content) from e + # END handle previous checkout branch + + content_lower = content.lower() + if content_lower in ("u", "upstream", "push"): + return _tracking_branch_object(repo, ref) + # END handle sibling branches + + ref = ref or _current_reflog_ref(repo) + try: + entry_no = int(content) + except ValueError: + hexsha = _find_reflog_entry_by_date(repo, ref, content) + else: + if entry_no >= 100000000: + hexsha = _find_reflog_entry_by_date(repo, ref, "%s +0000" % entry_no) + elif entry_no == 0: + return ref.commit + else: + try: + entry = _ref_log_entry(repo, ref, -(entry_no + 1)) + except IndexError as e: + raise IndexError("Invalid revlog index: %i" % entry_no) from e + # END handle index out of bound + hexsha = entry.newhexsha + # END handle offset or date-like timestamp + # END handle content + return _object_from_hexsha(repo, hexsha) + + +def _find_closing_brace(rev: str, start: int) -> int: + depth = 1 + escaped = False + for idx in range(start + 1, len(rev)): + char = rev[idx] + if escaped: + escaped = False + elif char == "\\": + escaped = True + elif char == "{": + depth += 1 + elif char == "}": + depth -= 1 + if depth == 0: + return idx + # END found end + # END handle char + # END for each char + raise ValueError("Missing closing brace to define type in %s" % rev) + + +def _parse_search(pattern: str) -> Tuple[str, bool]: + if not pattern: + raise ValueError("Revision search requires a pattern") + # END handle empty pattern + + if pattern.startswith("!-"): + return pattern[2:], True + if pattern.startswith("!!"): + return pattern[1:], False + if pattern.startswith("!"): + raise ValueError("Need one character after /!, typically -") + return pattern, False + + +def _unescape_braced_regex(pattern: str) -> str: + out = [] + idx = 0 + while idx < len(pattern): + char = pattern[idx] + if char == "\\" and idx + 1 < len(pattern): + next_char = pattern[idx + 1] + if next_char in "{}\\": + out.append(next_char) + else: + out.append(char) + out.append(next_char) + # END handle escaped char + idx += 2 + continue + # END handle backslash + out.append(char) + idx += 1 + # END for each char + return "".join(out) + + +def _find_commit_by_message( + repo: "Repo", rev: Optional[AnyGitObject], pattern: str, braced: bool = False +) -> AnyGitObject: + pattern, negated = _parse_search(_unescape_braced_regex(pattern) if braced else pattern) + try: + regex = re.compile(pattern) + except re.error as e: + raise ValueError("Invalid commit message regex %r" % pattern) from e + # END handle invalid regex + if rev is None: + commits = _all_ref_commits(repo) + else: + commits = _reachable_commits([to_commit(cast(Object, rev))]) + # END handle starting point + + for commit in commits: + message = commit.message + if isinstance(message, bytes): + message = message.decode(commit.encoding, "replace") + # END handle bytes message + matches = regex.search(message or "") is not None + if matches != negated: + return commit + # END found commit + # END for each commit + raise BadName("No commit found matching message pattern %r" % pattern) + + +def _all_ref_commits(repo: "Repo") -> Iterator["Commit"]: + starts = [] + for ref in repo.references: + try: + starts.append(to_commit(cast(Object, ref.object))) + except (BadName, ValueError): + pass + # END skip refs that do not point to commits + # END for each ref + try: + starts.append(repo.head.commit) + except ValueError: + pass + # END handle unborn head + return _reachable_commits(starts) + + +def _reachable_commits(starts: list["Commit"]) -> Iterator["Commit"]: + seen = set() + pending = starts[:] + while pending: + pending.sort(key=lambda commit: commit.committed_date, reverse=True) + commit = pending.pop(0) + if commit.binsha in seen: + continue + # END skip seen commit + seen.add(commit.binsha) + yield commit + pending.extend(commit.parents) + # END while commits remain + + +def _index_lookup(repo: "Repo", spec: str) -> AnyGitObject: + if not spec: + raise ValueError("':' must be followed by a path") + # END handle empty lookup + + stage = 0 + path = spec + if len(spec) >= 2 and spec[1] == ":" and spec[0] in "0123": + stage = int(spec[0]) + path = spec[2:] + # END handle stage + + try: + return repo.index.entries[(path, stage)].to_blob(repo) + except KeyError as e: + raise BadName("Path %r did not exist in the index at stage %i" % (path, stage)) from e + + +def _tree_lookup(obj: AnyGitObject, path: str) -> AnyGitObject: + if obj.type != "tree": + obj = to_commit(cast(Object, obj)).tree + # END get tree + if not path: + return obj + return obj[path] + + +def _peel(obj: AnyGitObject, output_type: str, repo: "Repo", rev: str) -> AnyGitObject: + if output_type.startswith("/"): + return _find_commit_by_message(repo, obj, output_type[1:], braced=True) + if output_type == "": + return deref_tag(obj) if obj.type == "tag" else obj + if output_type == "object": + return obj + if output_type == "commit": + return to_commit(cast(Object, obj)) + if output_type == "tree": + return to_commit(cast(Object, obj)).tree if obj.type != "tree" else obj + if output_type == "blob": + obj = deref_tag(obj) if obj.type == "tag" else obj + if obj.type == output_type: + return obj + # END handle matching type + raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) + if output_type == "tag": + if obj.type == output_type: + return obj + # END handle matching type + raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) + # END handle known types + raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) + + +def _first_rev_token(rev: str) -> Optional[int]: + for idx, char in enumerate(rev): + if char in "^~:": + return idx + if char == "@": + next_char = rev[idx + 1] if idx + 1 < len(rev) else None + if idx == 0 and next_char in (None, "^", "~", ":", "{"): + return idx + if next_char == "{": + return idx + # END handle reflog selector + # END handle at symbol + # END for each char + return None + + def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: """Parse a revision string. Like :manpage:`git-rev-parse(1)`. @@ -253,131 +632,82 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: :raise IndexError: If an invalid reflog index is specified. """ - # Are we in colon search mode? if rev.startswith(":/"): - # Colon search mode - raise NotImplementedError("commit by message search (regex)") - # END handle search + return _find_commit_by_message(repo, None, rev[2:]) + if rev.startswith(":"): + return _index_lookup(repo, rev[1:]) + # END handle top-level colon modes obj: Optional[AnyGitObject] = None ref = None - output_type = "commit" - start = 0 - parsed_to = 0 lr = len(rev) - while start < lr: - if rev[start] not in "^~:@": - start += 1 - continue - # END handle start + first_token = _first_rev_token(rev) + if first_token is None: + return name_to_object(repo, rev) + # END handle plain name + + if first_token == 0: + if rev[0] != "@": + raise ValueError("Revision specifier must start with an object name: %s" % rev) + # END handle invalid leading token + ref = _current_reflog_ref(repo) + obj = ref.commit + start = 0 if rev.startswith("@{") else 1 + else: + if rev[first_token] == "@": + ref = cast("Reference", name_to_object(repo, rev[:first_token], return_ref=True)) + obj = ref.commit + else: + obj = name_to_object(repo, rev[:first_token]) + # END handle anchor + start = first_token + # END initialize anchor + while start < lr: token = rev[start] - if obj is None: - # token is a rev name. - if start == 0: - ref = repo.head.ref - else: - if token == "@": - ref = cast("Reference", name_to_object(repo, rev[:start], return_ref=True)) - else: - obj = name_to_object(repo, rev[:start]) - # END handle token - # END handle refname - else: - if ref is not None: - obj = cast("Commit", ref.commit) - # END handle ref - # END initialize obj on first token - - start += 1 + if token == "@": + if start + 1 >= lr or rev[start + 1] != "{": + raise ValueError("Invalid @ token in revision specifier: %s" % rev) + # END handle invalid @ + end = _find_closing_brace(rev, start + 1) + obj = _apply_reflog(repo, ref if first_token != 0 and start == first_token else None, rev[start + 2 : end]) + ref = None + start = end + 1 + continue + # END handle reflog - # Try to parse {type}. - if start < lr and rev[start] == "{": - end = rev.find("}", start) - if end == -1: - raise ValueError("Missing closing brace to define type in %s" % rev) - output_type = rev[start + 1 : end] # Exclude brace. - - # Handle type. - if output_type == "commit": - pass # Default. - elif output_type == "tree": - try: - obj = cast(AnyGitObject, obj) - obj = to_commit(obj).tree - except (AttributeError, ValueError): - pass # Error raised later. - # END exception handling - elif output_type in ("", "blob"): - obj = cast("TagObject", obj) - if obj and obj.type == "tag": - obj = deref_tag(obj) - else: - # Cannot do anything for non-tags. - pass - # END handle tag - elif token == "@": - # try single int - assert ref is not None, "Require Reference to access reflog" - revlog_index = None - try: - # Transform reversed index into the format of our revlog. - revlog_index = -(int(output_type) + 1) - except ValueError as e: - # TODO: Try to parse the other date options, using parse_date maybe. - raise NotImplementedError("Support for additional @{...} modes not implemented") from e - # END handle revlog index - - try: - entry = ref.log_entry(revlog_index) - except IndexError as e: - raise IndexError("Invalid revlog index: %i" % revlog_index) from e - # END handle index out of bound - - obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) - - # Make it pass the following checks. - output_type = "" - else: - raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) - # END handle output type + if token == ":": + return _tree_lookup(obj, rev[start + 1 :]) + # END handle path - # Empty output types don't require any specific type, its just about - # dereferencing tags. - if output_type and obj and obj.type != output_type: - raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) - # END verify output type + start += 1 - start = end + 1 # Skip brace. - parsed_to = start + if token == "^" and start < lr and rev[start] == "{": + end = _find_closing_brace(rev, start) + obj = _peel(obj, rev[start + 1 : end], repo, rev) + ref = None + start = end + 1 continue # END parse type - # Try to parse a number. num = 0 - if token != ":": - found_digit = False - while start < lr: - if rev[start] in digits: - num = num * 10 + int(rev[start]) - start += 1 - found_digit = True - else: - break - # END handle number - # END number parse loop - - # No explicit number given, 1 is the default. It could be 0 though. - if not found_digit: - num = 1 - # END set default num - # END number parsing only if non-blob mode - - parsed_to = start - # Handle hierarchy walk. + found_digit = False + while start < lr: + if rev[start] in digits: + num = num * 10 + int(rev[start]) + start += 1 + found_digit = True + else: + break + # END handle number + # END number parse loop + + if not found_digit: + num = 1 + # END set default num + try: - obj = cast(AnyGitObject, obj) if token == "~": obj = to_commit(obj) for _ in range(num): @@ -385,35 +715,22 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: # END for each history item to walk elif token == "^": obj = to_commit(obj) - # Must be n'th parent. - if num: + if num == 0: + pass + else: obj = obj.parents[num - 1] - elif token == ":": - if obj.type != "tree": - obj = obj.tree - # END get tree type - obj = obj[rev[start:]] - parsed_to = lr + # END handle parent else: raise ValueError("Invalid token: %r" % token) # END end handle tag except (IndexError, AttributeError) as e: raise BadName( - f"Invalid revision spec '{rev}' - not enough " f"parent commits to reach '{token}{int(num)}'" + f"Invalid revision spec '{rev}' - not enough parent commits to reach '{token}{int(num)}'" ) from e # END exception handling # END parse loop - # Still no obj? It's probably a simple name. - if obj is None: - obj = name_to_object(repo, rev) - parsed_to = lr - # END handle simple name - if obj is None: raise ValueError("Revision specifier could not be parsed: %s" % rev) - if parsed_to != lr: - raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) - return obj diff --git a/git/types.py b/git/types.py index 584450146..100fff43f 100644 --- a/git/types.py +++ b/git/types.py @@ -13,7 +13,6 @@ Sequence as Sequence, Tuple, TYPE_CHECKING, - Type, TypeVar, Union, ) @@ -130,7 +129,8 @@ https://git-scm.com/docs/gitglossary#def_object_type """ -Lit_commit_ish: Type[Literal["commit", "tag"]] +if TYPE_CHECKING: + Lit_commit_ish = Literal["commit", "tag"] """Deprecated. Type of literal strings identifying typically-commitish git object types. Prior to a bugfix, this type had been defined more broadly. Any usage is in practice @@ -248,6 +248,7 @@ class Files_TD(TypedDict): insertions: int deletions: int lines: int + change_type: str class Total_TD(TypedDict): diff --git a/git/util.py b/git/util.py index 11f963e02..712fabe85 100644 --- a/git/util.py +++ b/git/util.py @@ -36,7 +36,7 @@ import logging import os import os.path as osp -import pathlib +from pathlib import Path import platform import re import shutil @@ -272,9 +272,9 @@ def stream_copy(source: BinaryIO, destination: BinaryIO, chunk_size: int = 512 * def join_path(a: PathLike, *p: PathLike) -> PathLike: R"""Join path tokens together similar to osp.join, but always use ``/`` instead of possibly ``\`` on Windows.""" - path = str(a) + path = os.fspath(a) for b in p: - b = str(b) + b = os.fspath(b) if not b: continue if b.startswith("/"): @@ -289,19 +289,19 @@ def join_path(a: PathLike, *p: PathLike) -> PathLike: if sys.platform == "win32": - def to_native_path_windows(path: PathLike) -> PathLike: - path = str(path) + def to_native_path_windows(path: PathLike) -> str: + path = os.fspath(path) return path.replace("/", "\\") def to_native_path_linux(path: PathLike) -> str: - path = str(path) + path = os.fspath(path) return path.replace("\\", "/") to_native_path = to_native_path_windows else: # No need for any work on Linux. def to_native_path_linux(path: PathLike) -> str: - return str(path) + return os.fspath(path) to_native_path = to_native_path_linux @@ -372,7 +372,7 @@ def is_exec(fpath: str) -> bool: progs = [] if not path: path = os.environ["PATH"] - for folder in str(path).split(os.pathsep): + for folder in os.fspath(path).split(os.pathsep): folder = folder.strip('"') if folder: exe_path = osp.join(folder, program) @@ -397,7 +397,7 @@ def _cygexpath(drive: Optional[str], path: str) -> str: p = cygpath(p) elif drive: p = "/proc/cygdrive/%s/%s" % (drive.lower(), p) - p_str = str(p) # ensure it is a str and not AnyPath + p_str = os.fspath(p) # ensure it is a str and not AnyPath return p_str.replace("\\", "/") @@ -418,7 +418,7 @@ def _cygexpath(drive: Optional[str], path: str) -> str: def cygpath(path: str) -> str: """Use :meth:`git.cmd.Git.polish_url` instead, that works on any environment.""" - path = str(path) # Ensure is str and not AnyPath. + path = os.fspath(path) # Ensure is str and not AnyPath. # Fix to use Paths when 3.5 dropped. Or to be just str if only for URLs? if not path.startswith(("/cygdrive", "//", "/proc/cygdrive")): for regex, parser, recurse in _cygpath_parsers: @@ -438,7 +438,7 @@ def cygpath(path: str) -> str: def decygpath(path: PathLike) -> str: - path = str(path) + path = os.fspath(path) m = _decygpath_regex.match(path) if m: drive, rest_path = m.groups() @@ -464,6 +464,12 @@ def _is_cygwin_git(git_executable: str) -> bool: # Just a name given, not a real path. uname_cmd = osp.join(git_dir, "uname") + + if not (Path(uname_cmd).is_file() and os.access(uname_cmd, os.X_OK)): + _logger.debug(f"Failed checking if running in CYGWIN: {uname_cmd} is not an executable") + _is_cygwin_cache[git_executable] = is_cygwin + return is_cygwin + process = subprocess.Popen([uname_cmd], stdout=subprocess.PIPE, universal_newlines=True) uname_out, _ = process.communicate() # retcode = process.poll() @@ -484,7 +490,9 @@ def is_cygwin_git(git_executable: PathLike) -> bool: ... def is_cygwin_git(git_executable: Union[None, PathLike]) -> bool: - if sys.platform == "win32": # TODO: See if we can use `sys.platform != "cygwin"`. + # TODO: when py3.7 support is dropped, use the new interpolation f"{variable=}" + _logger.debug(f"sys.platform={sys.platform!r}, git_executable={git_executable!r}") + if sys.platform != "cygwin": return False elif git_executable is None: return False @@ -515,7 +523,7 @@ def expand_path(p: PathLike, expand_vars: bool = ...) -> str: def expand_path(p: Union[None, PathLike], expand_vars: bool = True) -> Optional[PathLike]: - if isinstance(p, pathlib.Path): + if isinstance(p, Path): return p.resolve() try: p = osp.expanduser(p) # type: ignore[arg-type] @@ -910,6 +918,7 @@ class Stats: deletions = number of deleted lines as int insertions = number of inserted lines as int lines = total number of lines changed as int, or deletions + insertions + change_type = type of change as str, A|C|D|M|R|T|U|X|B ``full-stat-dict`` @@ -938,7 +947,7 @@ def _list_from_string(cls, repo: "Repo", text: str) -> "Stats": "files": {}, } for line in text.splitlines(): - (raw_insertions, raw_deletions, filename) = line.split("\t") + (change_type, raw_insertions, raw_deletions, filename) = line.split("\t") insertions = raw_insertions != "-" and int(raw_insertions) or 0 deletions = raw_deletions != "-" and int(raw_deletions) or 0 hsh["total"]["insertions"] += insertions @@ -949,6 +958,7 @@ def _list_from_string(cls, repo: "Repo", text: str) -> "Stats": "insertions": insertions, "deletions": deletions, "lines": insertions + deletions, + "change_type": change_type, } hsh["files"][filename.strip()] = files_dict return Stats(hsh["total"], hsh["files"]) @@ -1133,7 +1143,7 @@ def _obtain_lock(self) -> None: # END endless loop -class IterableList(List[T_IterableObj]): +class IterableList(List[T_IterableObj]): # type: ignore[type-var] """List of iterable objects allowing to query an object by id or by named index:: heads = repo.heads @@ -1198,24 +1208,20 @@ def __getattr__(self, attr: str) -> T_IterableObj: return list.__getattribute__(self, attr) def __getitem__(self, index: Union[SupportsIndex, int, slice, str]) -> T_IterableObj: # type: ignore[override] - assert isinstance(index, (int, str, slice)), "Index of IterableList should be an int or str" - if isinstance(index, int): return list.__getitem__(self, index) elif isinstance(index, slice): raise ValueError("Index should be an int or str") else: try: - return getattr(self, index) + return getattr(self, cast(str, index)) except AttributeError as e: - raise IndexError("No item found with id %r" % (self._prefix + index)) from e + raise IndexError(f"No item found with id {self._prefix}{index}") from e # END handle getattr def __delitem__(self, index: Union[SupportsIndex, int, slice, str]) -> None: - assert isinstance(index, (int, str)), "Index of IterableList should be an int or str" - delindex = cast(int, index) - if not isinstance(index, int): + if isinstance(index, str): delindex = -1 name = self._prefix + index for i, item in enumerate(self): diff --git a/init-tests-after-clone.sh b/init-tests-after-clone.sh index bfada01b0..a88f983fc 100755 --- a/init-tests-after-clone.sh +++ b/init-tests-after-clone.sh @@ -40,6 +40,11 @@ fi git tag __testing_point__ # The tests need a branch called master. +# +# If master is locally absent but more than one remote has it, checkout fails +# by default even if all remotes agree, and we fall back to creating it at +# HEAD. The reflog we populate below then traces HEAD's history rather than +# a remote master's, but master is reset to __testing_point__ either way. git checkout master -- || git checkout -b master # The tests need a reflog history on the master branch. diff --git a/pyproject.toml b/pyproject.toml index ee54edb78..149f2dc92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ testpaths = "test" # Space separated list of paths from root e.g test tests doc # filterwarnings ignore::WarningType # ignores those warnings [tool.mypy] -python_version = "3.8" files = ["git/", "test/deprecation/"] disallow_untyped_defs = true no_implicit_optional = true @@ -60,16 +59,14 @@ lint.select = [ # "UP", # See: https://docs.astral.sh/ruff/rules/#pyupgrade-up ] lint.extend-select = [ - # "A", # See: https://pypi.org/project/flake8-builtins - "B", # See: https://pypi.org/project/flake8-bugbear - "C4", # See: https://pypi.org/project/flake8-comprehensions - "TCH004", # See: https://docs.astral.sh/ruff/rules/runtime-import-in-type-checking-block/ + # "A", # See: https://pypi.org/project/flake8-builtins + "B", # See: https://pypi.org/project/flake8-bugbear + "C4", # See: https://pypi.org/project/flake8-comprehensions + "TC004", # See: https://docs.astral.sh/ruff/rules/runtime-import-in-type-checking-block/ ] lint.ignore = [ - "E203", # Whitespace before ':' - "E731", # Do not assign a `lambda` expression, use a `def` + # If it becomes necessary to ignore any rules, list them here. ] -lint.ignore-init-module-imports = true lint.unfixable = [ "F401", # Module imported but unused ] @@ -78,3 +75,12 @@ lint.unfixable = [ "test/**" = [ "B018", # useless-expression ] +"fuzzing/fuzz-targets/**" = [ + "E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file +] + + +[tool.codespell] +ignore-words-list="afile,assertIn,doesnt,gud,uptodate" +#count = true +quiet-level = 3 diff --git a/requirements-dev.txt b/requirements-dev.txt index f626644af..066b192b8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,8 +1,8 @@ --r requirements.txt --r test-requirements.txt - -# For additional local testing/linting - to be added elsewhere eventually. -ruff -shellcheck -pytest-icdiff -# pytest-profiling +-r requirements.txt +-r test-requirements.txt + +# For additional local testing/linting - to be added elsewhere eventually. +ruff >=0.8 +shellcheck +pytest-icdiff +# pytest-profiling diff --git a/requirements.txt b/requirements.txt index 7159416a9..61d8403b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ gitdb>=4.0.1,<5 -typing-extensions>=3.7.4.3;python_version<"3.8" +typing-extensions>=3.10.0.2;python_version<"3.10" diff --git a/setup.py b/setup.py index f28fedb85..a7b1eab00 100755 --- a/setup.py +++ b/setup.py @@ -95,7 +95,6 @@ def _stamp_version(filename: str) -> None: # "Development Status :: 7 - Inactive", "Environment :: Console", "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Operating System :: POSIX", "Operating System :: Microsoft :: Windows", diff --git a/test-requirements.txt b/test-requirements.txt index 75e9e81fa..e6e01c683 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,7 +1,7 @@ coverage[toml] ddt >= 1.1.1, != 1.4.3 mock ; python_version < "3.8" -mypy +mypy==1.18.2 ; python_version >= "3.9" # pin mypy version to avoid new errors pre-commit pytest >= 7.3.1 pytest-cov diff --git a/test/deprecation/test_types.py b/test/deprecation/test_types.py index f97375a85..d3c6af645 100644 --- a/test/deprecation/test_types.py +++ b/test/deprecation/test_types.py @@ -36,7 +36,7 @@ def test_can_access_lit_commit_ish_but_it_is_not_usable() -> None: assert 'Literal["commit", "tag"]' in message, "Has new definition." assert "GitObjectTypeString" in message, "Has new type name for old definition." - _: Lit_commit_ish = "commit" # type: ignore[valid-type] + _: Lit_commit_ish = "commit" # It should be as documented (even though deliberately unusable in static checks). assert Lit_commit_ish == Literal["commit", "tag"] diff --git a/test/fixtures/diff_numstat b/test/fixtures/diff_numstat index 44c6ca2d5..b76e467eb 100644 --- a/test/fixtures/diff_numstat +++ b/test/fixtures/diff_numstat @@ -1,2 +1,3 @@ -29 18 a.txt -0 5 b.txt +M 29 18 a.txt +M 0 5 b.txt +A 7 0 c.txt \ No newline at end of file diff --git a/test/fixtures/git_config_with_empty_quotes b/test/fixtures/git_config_with_empty_quotes new file mode 100644 index 000000000..f11fe4248 --- /dev/null +++ b/test/fixtures/git_config_with_empty_quotes @@ -0,0 +1,2 @@ +[core] + filemode = "" diff --git a/test/fixtures/git_config_with_extra_whitespace b/test/fixtures/git_config_with_extra_whitespace new file mode 100644 index 000000000..0f727cb5d --- /dev/null +++ b/test/fixtures/git_config_with_extra_whitespace @@ -0,0 +1,2 @@ +[init] + defaultBranch = trunk diff --git a/test/fixtures/git_config_with_quotes b/test/fixtures/git_config_with_quotes new file mode 100644 index 000000000..40e6710d9 --- /dev/null +++ b/test/fixtures/git_config_with_quotes @@ -0,0 +1,3 @@ +[user] + name = "Cody Veal" + email = "cveal05@gmail.com" diff --git a/test/fixtures/git_config_with_quotes_escapes b/test/fixtures/git_config_with_quotes_escapes new file mode 100644 index 000000000..33332c221 --- /dev/null +++ b/test/fixtures/git_config_with_quotes_escapes @@ -0,0 +1,9 @@ +[custom] + hasnewline = "first\nsecond" + hasbackslash = "foo\\bar" + hasquote = "ab\"cd" + hastrailingbackslash = "word\\" + hasunrecognized = "p\qrs" + hasunescapedquotes = "ab"cd"e" + ordinary = "hello world" + unquoted = good evening diff --git a/test/fixtures/git_config_with_quotes_whitespace_inside b/test/fixtures/git_config_with_quotes_whitespace_inside new file mode 100644 index 000000000..c6014cc61 --- /dev/null +++ b/test/fixtures/git_config_with_quotes_whitespace_inside @@ -0,0 +1,2 @@ +[core] + commentString = "# " diff --git a/test/fixtures/git_config_with_quotes_whitespace_outside b/test/fixtures/git_config_with_quotes_whitespace_outside new file mode 100644 index 000000000..4b1615a51 --- /dev/null +++ b/test/fixtures/git_config_with_quotes_whitespace_outside @@ -0,0 +1,2 @@ +[init] + defaultBranch = "trunk" diff --git a/test/fixtures/index_extended_flags b/test/fixtures/index_extended_flags new file mode 100644 index 000000000..f03713b68 Binary files /dev/null and b/test/fixtures/index_extended_flags differ diff --git a/test/lib/helper.py b/test/lib/helper.py index 5d91447ea..1c110e103 100644 --- a/test/lib/helper.py +++ b/test/lib/helper.py @@ -10,6 +10,7 @@ "with_rw_directory", "with_rw_repo", "with_rw_and_rw_remote_repo", + "PathLikeMock", "TestBase", "VirtualEnvironment", "TestCase", @@ -17,9 +18,11 @@ "skipIf", "GIT_REPO", "GIT_DAEMON_PORT", + "xfail_if_raises", ] import contextlib +from dataclasses import dataclass from functools import wraps import gc import io @@ -33,8 +36,10 @@ import time import unittest import venv +from typing import Union, Type, Tuple import gitdb +import pytest from git.util import rmtree, cwd @@ -49,6 +54,15 @@ _logger = logging.getLogger(__name__) + +@dataclass +class PathLikeMock: + path: str + + def __fspath__(self) -> str: + return self.path + + # { Routines @@ -76,7 +90,7 @@ def __init__(self, input_string): self.stdout = io.BytesIO(input_string) self.stderr = io.BytesIO() - def wait(self): + def wait(self, stderr=None): return 0 poll = wait @@ -149,7 +163,7 @@ def repo_creator(self): os.chdir(rw_repo.working_dir) try: return func(self, rw_repo) - except: # noqa: E722 B001 + except: # noqa: E722 _logger.info("Keeping repo after failure: %s", repo_dir) repo_dir = None raise @@ -309,7 +323,7 @@ def remote_repo_creator(self): with cwd(rw_repo.working_dir): try: return func(self, rw_repo, rw_daemon_repo) - except: # noqa: E722 B001 + except: # noqa: E722 _logger.info( "Keeping repos after failure: \n rw_repo_dir: %s \n rw_daemon_repo_dir: %s", rw_repo_dir, @@ -415,9 +429,15 @@ def __init__(self, env_dir, *, with_pip): if with_pip: # The upgrade_deps parameter to venv.create is 3.9+ only, so do it this way. - command = [self.python, "-m", "pip", "install", "--upgrade", "pip"] - if sys.version_info < (3, 12): - command.append("setuptools") + command = [ + self.python, + "-m", + "pip", + "install", + "--upgrade", + "pip", + 'setuptools; python_version<"3.12"', + ] subprocess.check_output(command) @property @@ -448,3 +468,27 @@ def _executable(self, basename): if osp.isfile(path) or osp.islink(path): return path raise RuntimeError(f"no regular file or symlink {path!r}") + + +@contextlib.contextmanager +def xfail_if_raises( + condition: bool, + *, + raises: Union[Type[BaseException], Tuple[Type[BaseException], ...]], + reason: str = "", + strict: bool = False, +): + """Approximates the behavior of @pytest.mark.xfail(..., raises=...) as a context + manager that can be used within a test, such as when the condition is complex or has + side effects + + One difference is it will not report XPASS if the test passes, but setting `strict` + simulates it by raising an exception""" + try: + yield + except raises: + if condition: + pytest.xfail(reason) + raise + if strict and condition: + pytest.fail("[XPASS(strict)] " + reason) diff --git a/test/test_autointerrupt.py b/test/test_autointerrupt.py new file mode 100644 index 000000000..645ec402c --- /dev/null +++ b/test/test_autointerrupt.py @@ -0,0 +1,33 @@ +from git.cmd import Git + + +class _DummyProc: + """Minimal stand-in for subprocess.Popen used to exercise AutoInterrupt. + + We deliberately raise AttributeError from terminate() to simulate interpreter + shutdown on Windows where subprocess internals (e.g. subprocess._winapi) may + already be torn down. + """ + + stdin = None + stdout = None + stderr = None + + def poll(self): + return None + + def terminate(self): + raise AttributeError("TerminateProcess") + + def wait(self): # pragma: no cover - should not be reached in this test + raise AssertionError("wait() should not be called if terminate() fails") + + +def test_autointerrupt_terminate_ignores_attributeerror(): + ai = Git.AutoInterrupt(_DummyProc(), args=["git", "rev-list"]) + + # Should not raise, even if terminate() triggers AttributeError. + ai._terminate() + + # Ensure the reference is cleared to avoid repeated attempts. + assert ai.proc is None diff --git a/test/test_clone.py b/test/test_clone.py index 126ef0063..653d50aa3 100644 --- a/test/test_clone.py +++ b/test/test_clone.py @@ -1,12 +1,23 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +import os +import os.path as osp +import pathlib +import sys +import tempfile +from unittest import skip + +from git import GitCommandError, Repo +from git.exc import UnsafeOptionError, UnsafeProtocolError + +from test.lib import TestBase, with_rw_directory, with_rw_repo, PathLikeMock + from pathlib import Path import re import git - -from test.lib import TestBase, with_rw_directory +import pytest class TestClone(TestBase): @@ -29,3 +40,307 @@ def test_checkout_in_non_empty_dir(self, rw_dir): ) else: self.fail("GitCommandError not raised") + + @with_rw_directory + def test_clone_from_pathlib(self, rw_dir): + original_repo = Repo.init(osp.join(rw_dir, "repo")) + + Repo.clone_from(pathlib.Path(original_repo.git_dir), pathlib.Path(rw_dir) / "clone_pathlib") + + @with_rw_directory + def test_clone_from_pathlike(self, rw_dir): + original_repo = Repo.init(osp.join(rw_dir, "repo")) + Repo.clone_from(PathLikeMock(original_repo.git_dir), PathLikeMock(os.path.join(rw_dir, "clone_pathlike"))) + + @with_rw_directory + def test_clone_from_pathlib_withConfig(self, rw_dir): + original_repo = Repo.init(osp.join(rw_dir, "repo")) + + cloned = Repo.clone_from( + original_repo.git_dir, + pathlib.Path(rw_dir) / "clone_pathlib_withConfig", + multi_options=[ + "--recurse-submodules=repo", + "--config core.filemode=false", + "--config submodule.repo.update=checkout", + "--config filter.lfs.clean='git-lfs clean -- %f'", + ], + allow_unsafe_options=True, + ) + + self.assertEqual(cloned.config_reader().get_value("submodule", "active"), "repo") + self.assertEqual(cloned.config_reader().get_value("core", "filemode"), False) + self.assertEqual(cloned.config_reader().get_value('submodule "repo"', "update"), "checkout") + self.assertEqual( + cloned.config_reader().get_value('filter "lfs"', "clean"), + "git-lfs clean -- %f", + ) + + def test_clone_from_with_path_contains_unicode(self): + with tempfile.TemporaryDirectory() as tmpdir: + unicode_dir_name = "\u0394" + path_with_unicode = os.path.join(tmpdir, unicode_dir_name) + os.makedirs(path_with_unicode) + + try: + Repo.clone_from( + url=self._small_repo_url(), + to_path=path_with_unicode, + ) + except UnicodeEncodeError: + self.fail("Raised UnicodeEncodeError") + + @with_rw_directory + @skip( + """The referenced repository was removed, and one needs to set up a new + password controlled repo under the org's control.""" + ) + def test_leaking_password_in_clone_logs(self, rw_dir): + password = "fakepassword1234" + try: + Repo.clone_from( + url="https://fakeuser:{}@fakerepo.example.com/testrepo".format(password), + to_path=rw_dir, + ) + except GitCommandError as err: + assert password not in str(err), "The error message '%s' should not contain the password" % err + # Working example from a blank private project. + Repo.clone_from( + url="https://gitlab+deploy-token-392045:mLWhVus7bjLsy8xj8q2V@gitlab.com/mercierm/test_git_python", + to_path=rw_dir, + ) + + @with_rw_repo("HEAD") + def test_clone_unsafe_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + unsafe_options = [ + f"--upload-pack='touch {tmp_file}'", + f"-u 'touch {tmp_file}'", + "--config=protocol.ext.allow=always", + "-c protocol.ext.allow=always", + ] + for unsafe_option in unsafe_options: + with self.assertRaises(UnsafeOptionError): + rw_repo.clone(tmp_dir, multi_options=[unsafe_option]) + assert not tmp_file.exists() + + unsafe_options = [ + {"upload-pack": f"touch {tmp_file}"}, + {"upload_pack": f"touch {tmp_file}"}, + {"u": f"touch {tmp_file}"}, + {"config": "protocol.ext.allow=always"}, + {"c": "protocol.ext.allow=always"}, + ] + for unsafe_option in unsafe_options: + with self.assertRaises(UnsafeOptionError): + rw_repo.clone(tmp_dir, **unsafe_option) + assert not tmp_file.exists() + + @with_rw_repo("HEAD") + def test_clone_unsafe_options_are_checked_after_splitting_multi_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + payload = "--single-branch --config protocol.ext.allow=always" + + with self.assertRaises(UnsafeOptionError): + rw_repo.clone(tmp_dir, multi_options=[payload]) + + @pytest.mark.xfail( + sys.platform == "win32", + reason=( + "File not created. A separate Windows command may be needed. This and the " + "currently passing test test_clone_unsafe_options must be adjusted in the " + "same way. Until then, test_clone_unsafe_options is unreliable on Windows." + ), + raises=AssertionError, + ) + @with_rw_repo("HEAD") + def test_clone_unsafe_options_allowed(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + unsafe_options = [ + f"--upload-pack='touch {tmp_file}'", + f"-u 'touch {tmp_file}'", + ] + for i, unsafe_option in enumerate(unsafe_options): + destination = tmp_dir / str(i) + assert not tmp_file.exists() + # The options will be allowed, but the command will fail. + with self.assertRaises(GitCommandError): + rw_repo.clone(destination, multi_options=[unsafe_option], allow_unsafe_options=True) + assert tmp_file.exists() + tmp_file.unlink() + + unsafe_options = [ + "--config=protocol.ext.allow=always", + "-c protocol.ext.allow=always", + ] + for i, unsafe_option in enumerate(unsafe_options): + destination = tmp_dir / str(i) + assert not destination.exists() + rw_repo.clone(destination, multi_options=[unsafe_option], allow_unsafe_options=True) + assert destination.exists() + + @with_rw_repo("HEAD") + def test_clone_safe_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + options = [ + "--depth=1", + "--single-branch", + "-q", + ] + for option in options: + destination = tmp_dir / option + assert not destination.exists() + rw_repo.clone(destination, multi_options=[option]) + assert destination.exists() + + @with_rw_repo("HEAD") + def test_clone_from_unsafe_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + unsafe_options = [ + f"--upload-pack='touch {tmp_file}'", + f"-u 'touch {tmp_file}'", + "--config=protocol.ext.allow=always", + "-c protocol.ext.allow=always", + ] + for unsafe_option in unsafe_options: + with self.assertRaises(UnsafeOptionError): + Repo.clone_from(rw_repo.working_dir, tmp_dir, multi_options=[unsafe_option]) + assert not tmp_file.exists() + + unsafe_options = [ + {"upload-pack": f"touch {tmp_file}"}, + {"upload_pack": f"touch {tmp_file}"}, + {"u": f"touch {tmp_file}"}, + {"config": "protocol.ext.allow=always"}, + {"c": "protocol.ext.allow=always"}, + ] + for unsafe_option in unsafe_options: + with self.assertRaises(UnsafeOptionError): + Repo.clone_from(rw_repo.working_dir, tmp_dir, **unsafe_option) + assert not tmp_file.exists() + + @with_rw_repo("HEAD") + def test_clone_from_unsafe_options_are_checked_after_splitting_multi_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + payload = "--single-branch --config protocol.ext.allow=always" + + with self.assertRaises(UnsafeOptionError): + Repo.clone_from(rw_repo.working_dir, tmp_dir, multi_options=[payload]) + + @pytest.mark.xfail( + sys.platform == "win32", + reason=( + "File not created. A separate Windows command may be needed. This and the " + "currently passing test test_clone_from_unsafe_options must be adjusted in the " + "same way. Until then, test_clone_from_unsafe_options is unreliable on Windows." + ), + raises=AssertionError, + ) + @with_rw_repo("HEAD") + def test_clone_from_unsafe_options_allowed(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + unsafe_options = [ + f"--upload-pack='touch {tmp_file}'", + f"-u 'touch {tmp_file}'", + ] + for i, unsafe_option in enumerate(unsafe_options): + destination = tmp_dir / str(i) + assert not tmp_file.exists() + # The options will be allowed, but the command will fail. + with self.assertRaises(GitCommandError): + Repo.clone_from( + rw_repo.working_dir, destination, multi_options=[unsafe_option], allow_unsafe_options=True + ) + assert tmp_file.exists() + tmp_file.unlink() + + unsafe_options = [ + "--config=protocol.ext.allow=always", + "-c protocol.ext.allow=always", + ] + for i, unsafe_option in enumerate(unsafe_options): + destination = tmp_dir / str(i) + assert not destination.exists() + Repo.clone_from( + rw_repo.working_dir, destination, multi_options=[unsafe_option], allow_unsafe_options=True + ) + assert destination.exists() + + @with_rw_repo("HEAD") + def test_clone_from_safe_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + options = [ + "--depth=1", + "--single-branch", + "-q", + ] + for option in options: + destination = tmp_dir / option + assert not destination.exists() + Repo.clone_from(rw_repo.common_dir, destination, multi_options=[option]) + assert destination.exists() + + def test_clone_from_unsafe_protocol(self): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + urls = [ + f"ext::sh -c touch% {tmp_file}", + "fd::17/foo", + ] + for url in urls: + with self.assertRaises(UnsafeProtocolError): + Repo.clone_from(url, tmp_dir / "repo") + assert not tmp_file.exists() + + def test_clone_from_unsafe_protocol_allowed(self): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + urls = [ + f"ext::sh -c touch% {tmp_file}", + "fd::/foo", + ] + for url in urls: + # The URL will be allowed into the command, but the command will + # fail since we don't have that protocol enabled in the Git config file. + with self.assertRaises(GitCommandError): + Repo.clone_from(url, tmp_dir / "repo", allow_unsafe_protocols=True) + assert not tmp_file.exists() + + def test_clone_from_unsafe_protocol_allowed_and_enabled(self): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + urls = [ + f"ext::sh -c touch% {tmp_file}", + ] + allow_ext = [ + "--config=protocol.ext.allow=always", + ] + for url in urls: + # The URL will be allowed into the command, and the protocol is enabled, + # but the command will fail since it can't read from the remote repo. + assert not tmp_file.exists() + with self.assertRaises(GitCommandError): + Repo.clone_from( + url, + tmp_dir / "repo", + multi_options=allow_ext, + allow_unsafe_protocols=True, + allow_unsafe_options=True, + ) + assert tmp_file.exists() + tmp_file.unlink() diff --git a/test/test_commit.py b/test/test_commit.py index 5832258de..b56ad3a18 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -135,9 +135,12 @@ def test_stats(self): commit = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") stats = commit.stats - def check_entries(d): + def check_entries(d, has_change_type=False): assert isinstance(d, dict) - for key in ("insertions", "deletions", "lines"): + keys = ("insertions", "deletions", "lines") + if has_change_type: + keys += ("change_type",) + for key in keys: assert key in d # END assertion helper @@ -148,7 +151,7 @@ def check_entries(d): assert "files" in stats.total for _filepath, d in stats.files.items(): - check_entries(d) + check_entries(d, True) # END for each stated file # Check that data is parsed properly. @@ -563,3 +566,142 @@ def test_commit_co_authors(self): Actor("test_user_2", "another_user-email@github.com"), Actor("test_user_3", "test_user_3@github.com"), ] + + @with_rw_directory + def test_create_from_tree_with_trailers_dict(self, rw_dir): + """Test that create_from_tree supports adding trailers via a dict.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_trailers_dict")) + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + tree = rw_repo.index.write_tree() + + trailers = {"Issue": "123", "Signed-off-by": "Test User "} + commit = Commit.create_from_tree( + rw_repo, + tree, + "Test commit with trailers", + head=True, + trailers=trailers, + ) + + assert "Issue: 123" in commit.message + assert "Signed-off-by: Test User " in commit.message + assert commit.trailers_dict == { + "Issue": ["123"], + "Signed-off-by": ["Test User "], + } + + @with_rw_directory + def test_create_from_tree_with_trailers_list(self, rw_dir): + """Test that create_from_tree supports adding trailers via a list of tuples.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_trailers_list")) + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + tree = rw_repo.index.write_tree() + + trailers = [ + ("Signed-off-by", "Alice "), + ("Signed-off-by", "Bob "), + ("Issue", "456"), + ] + commit = Commit.create_from_tree( + rw_repo, + tree, + "Test commit with multiple trailers", + head=True, + trailers=trailers, + ) + + assert "Signed-off-by: Alice " in commit.message + assert "Signed-off-by: Bob " in commit.message + assert "Issue: 456" in commit.message + assert commit.trailers_dict == { + "Signed-off-by": ["Alice ", "Bob "], + "Issue": ["456"], + } + + @with_rw_directory + def test_create_from_tree_with_non_utf8_trailers(self, rw_dir): + """Test that trailer creation and parsing respect the configured commit encoding.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_trailers_non_utf8")) + with rw_repo.config_writer() as writer: + writer.set_value("i18n", "commitencoding", "ISO-8859-1") + + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + tree = rw_repo.index.write_tree() + + commit = Commit.create_from_tree( + rw_repo, + tree, + "RÊsumÊ", + head=True, + trailers={"Reviewed-by": "AndrÊ "}, + ) + + assert commit.encoding == "ISO-8859-1" + assert "RÊsumÊ" in commit.message + assert "Reviewed-by: AndrÊ " in commit.message + assert commit.trailers_list == [("Reviewed-by", "AndrÊ ")] + + @with_rw_directory + def test_trailers_list_with_non_utf8_message_bytes(self, rw_dir): + """Test that trailer parsing handles non-UTF-8 commit message bytes.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_trailers_non_utf8_bytes")) + with rw_repo.config_writer() as writer: + writer.set_value("i18n", "commitencoding", "ISO-8859-1") + + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + tree = rw_repo.index.write_tree() + + commit = Commit.create_from_tree( + rw_repo, + tree, + "RÊsumÊ", + head=True, + trailers={"Reviewed-by": "AndrÊ "}, + ) + + bytes_commit = Commit( + rw_repo, + commit.binsha, + message=commit.message.encode(commit.encoding), + encoding=commit.encoding, + ) + + assert bytes_commit.trailers_list == [("Reviewed-by", "AndrÊ ")] + + def test_interpret_trailers_encodes_before_launching_process(self): + """Test that encoding failures happen before spawning interpret-trailers.""" + repo = Mock() + repo.git = Mock() + repo.git.GIT_PYTHON_GIT_EXECUTABLE = "git" + + with self.assertRaises(UnicodeEncodeError): + Commit._interpret_trailers(repo, "Euro: â‚Ŧ", ["--parse"], encoding="ISO-8859-1") + + repo.git.execute.assert_not_called() + + @with_rw_directory + def test_index_commit_with_trailers(self, rw_dir): + """Test that IndexFile.commit() supports adding trailers.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_index_trailers")) + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + + trailers = {"Reviewed-by": "Reviewer "} + commit = rw_repo.index.commit( + "Test index commit with trailers", + trailers=trailers, + ) + + assert "Reviewed-by: Reviewer " in commit.message + assert commit.trailers_dict == { + "Reviewed-by": ["Reviewer "], + } diff --git a/test/test_config.py b/test/test_config.py index 92997422d..3ddaf0a4b 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -150,6 +150,70 @@ def test_config_value_with_trailing_new_line(self): git_config = GitConfigParser(config_file) git_config.read() # This should not throw an exception + @with_rw_directory + def test_set_value_rejects_config_injection(self, rw_dir): + config_path = osp.join(rw_dir, "config") + payload = "foo\n[core]\nhooksPath=/tmp/hooks" + + with GitConfigParser(config_path, read_only=False) as git_config: + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value("user", "name", payload) + + with GitConfigParser(config_path, read_only=True) as git_config: + self.assertFalse(git_config.has_section("user")) + self.assertFalse(git_config.has_section("core")) + + @with_rw_directory + def test_set_value_rejects_unsafe_section_and_option_names(self, rw_dir): + config_path = osp.join(rw_dir, "config") + bad_keys = ("user]\n[core", "user]\r[core", "user]\x00[core") + + with GitConfigParser(config_path, read_only=False) as git_config: + git_config.add_section("user") + for bad_key in bad_keys: + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.add_section(bad_key) + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set(bad_key, "hooksPath", "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set("user", bad_key, "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value(bad_key, "hooksPath", "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value("user", bad_key, "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.add_value(bad_key, "hooksPath", "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.add_value("user", bad_key, "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.rename_section("user", bad_key) + + git_config.set_value("user", "name", "safe") + + with GitConfigParser(config_path, read_only=True) as git_config: + self.assertEqual(git_config.get_value("user", "name"), "safe") + self.assertFalse(git_config.has_section("core")) + + @with_rw_directory + def test_set_and_add_value_reject_unsafe_value_characters(self, rw_dir): + config_path = osp.join(rw_dir, "config") + bad_values = ("foo\rbar", "foo\nbar", "foo\x00bar", b"foo\nbar") + + with GitConfigParser(config_path, read_only=False) as git_config: + git_config.add_section("user") + for bad_value in bad_values: + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set("user", "name", bad_value) + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value("user", "name", bad_value) + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.add_value("user", "name", bad_value) + + git_config.set_value("user", "name", "safe") + + with GitConfigParser(config_path, read_only=True) as git_config: + self.assertEqual(git_config.get_value("user", "name"), "safe") + def test_base(self): path_repo = fixture_path("git_config") path_global = fixture_path("git_config_global") @@ -246,6 +310,43 @@ def check_test_value(cr, value): with GitConfigParser(fpa, read_only=True) as cr: check_test_value(cr, tv) + @with_rw_directory + def test_multiple_include_paths_with_same_key(self, rw_dir): + """Test that multiple 'path' entries under [include] are all respected. + + Regression test for https://github.com/gitpython-developers/GitPython/issues/2099. + Git config allows multiple ``path`` values under ``[include]``, e.g.:: + + [include] + path = file1 + path = file2 + + Previously only one of these was included because _OMD.items() returns + only the last value for each key. + """ + # Create two config files to be included. + fp_inc1 = osp.join(rw_dir, "inc1.cfg") + fp_inc2 = osp.join(rw_dir, "inc2.cfg") + fp_main = osp.join(rw_dir, "main.cfg") + + with GitConfigParser(fp_inc1, read_only=False) as cw: + cw.set_value("user", "name", "from-inc1") + + with GitConfigParser(fp_inc2, read_only=False) as cw: + cw.set_value("core", "bar", "from-inc2") + + # Write a config with two path entries under a single [include] section. + # We write it manually because set_value would overwrite the key. + with open(fp_main, "w") as f: + f.write("[include]\n") + f.write(f"\tpath = {fp_inc1}\n") + f.write(f"\tpath = {fp_inc2}\n") + + with GitConfigParser(fp_main, read_only=True) as cr: + # Both included files should be loaded. + assert cr.get_value("user", "name") == "from-inc1" + assert cr.get_value("core", "bar") == "from-inc2" + @pytest.mark.xfail( sys.platform == "win32", reason='Second config._has_includes() assertion fails (for "config is included if path is matching git_dir")', @@ -373,6 +474,41 @@ def test_conditional_includes_from_branch_name_error(self, rw_dir): assert not config._has_includes() assert config._included_paths() == [] + @with_rw_directory + def test_conditional_includes_remote_url(self, rw_dir): + # Initiate mocked repository. + repo = mock.Mock() + repo.remotes = [mock.Mock(url="https://github.com/foo/repo")] + + # Initiate config files. + path1 = osp.join(rw_dir, "config1") + path2 = osp.join(rw_dir, "config2") + template = '[includeIf "hasconfig:remote.*.url:{}"]\n path={}\n' + + # Ensure that config with hasconfig and full url is correct. + with open(path1, "w") as stream: + stream.write(template.format("https://github.com/foo/repo", path2)) + + with GitConfigParser(path1, repo=repo) as config: + assert config._has_includes() + assert config._included_paths() == [("path", path2)] + + # Ensure that config with hasconfig and incorrect url is incorrect. + with open(path1, "w") as stream: + stream.write(template.format("incorrect", path2)) + + with GitConfigParser(path1, repo=repo) as config: + assert not config._has_includes() + assert config._included_paths() == [] + + # Ensure that config with hasconfig and url using glob pattern is correct. + with open(path1, "w") as stream: + stream.write(template.format("**/**github.com*/**", path2)) + + with GitConfigParser(path1, repo=repo) as config: + assert config._has_includes() + assert config._included_paths() == [("path", path2)] + def test_rename(self): file_obj = self._to_memcache(fixture_path("git_config")) with GitConfigParser(file_obj, read_only=False, merge_includes=False) as cw: @@ -391,13 +527,17 @@ def test_complex_aliases(self): with GitConfigParser(file_obj, read_only=False) as w_config: self.assertEqual( w_config.get("alias", "rbi"), - '"!g() { git rebase -i origin/${1:-master} ; } ; g"', + "!g() { git rebase -i origin/${1:-master} ; } ; g", ) self.assertEqual( file_obj.getvalue(), self._to_memcache(fixture_path(".gitconfig")).getvalue(), ) + def test_config_with_extra_whitespace(self): + cr = GitConfigParser(fixture_path("git_config_with_extra_whitespace"), read_only=True) + self.assertEqual(cr.get("init", "defaultBranch"), "trunk") + def test_empty_config_value(self): cr = GitConfigParser(fixture_path("git_config_with_empty_value"), read_only=True) @@ -406,6 +546,44 @@ def test_empty_config_value(self): with self.assertRaises(cp.NoOptionError): cr.get_value("color", "ui") + def test_config_with_quotes(self): + cr = GitConfigParser(fixture_path("git_config_with_quotes"), read_only=True) + + self.assertEqual(cr.get("user", "name"), "Cody Veal") + self.assertEqual(cr.get("user", "email"), "cveal05@gmail.com") + + def test_config_with_empty_quotes(self): + cr = GitConfigParser(fixture_path("git_config_with_empty_quotes"), read_only=True) + self.assertEqual(cr.get("core", "filemode"), "", "quotes can form a literal empty string as value") + + def test_config_with_quotes_with_literal_whitespace(self): + cr = GitConfigParser(fixture_path("git_config_with_quotes_whitespace_inside"), read_only=True) + self.assertEqual(cr.get("core", "commentString"), "# ") + + def test_config_with_quotes_with_whitespace_outside_value(self): + cr = GitConfigParser(fixture_path("git_config_with_quotes_whitespace_outside"), read_only=True) + self.assertEqual(cr.get("init", "defaultBranch"), "trunk") + + def test_config_with_quotes_containing_escapes(self): + """For now just suppress quote removal. But it would be good to interpret most of these.""" + cr = GitConfigParser(fixture_path("git_config_with_quotes_escapes"), read_only=True) + + # These can eventually be supported by substituting the represented character. + self.assertEqual(cr.get("custom", "hasnewline"), R'"first\nsecond"') + self.assertEqual(cr.get("custom", "hasbackslash"), R'"foo\\bar"') + self.assertEqual(cr.get("custom", "hasquote"), R'"ab\"cd"') + self.assertEqual(cr.get("custom", "hastrailingbackslash"), R'"word\\"') + self.assertEqual(cr.get("custom", "hasunrecognized"), R'"p\qrs"') + + # It is less obvious whether and what to eventually do with this. + self.assertEqual(cr.get("custom", "hasunescapedquotes"), '"ab"cd"e"') + + # Cases where quote removal is clearly safe should happen even after those. + self.assertEqual(cr.get("custom", "ordinary"), "hello world") + + # Cases without quotes should still parse correctly even after those, too. + self.assertEqual(cr.get("custom", "unquoted"), "good evening") + def test_get_values_works_without_requiring_any_other_calls_first(self): file_obj = self._to_memcache(fixture_path("git_config_multiple")) cr = GitConfigParser(file_obj, read_only=True) diff --git a/test/test_docs.py b/test/test_docs.py index cc0bbf26a..c3426a807 100644 --- a/test/test_docs.py +++ b/test/test_docs.py @@ -6,9 +6,6 @@ import gc import os import os.path -import sys - -import pytest from test.lib import TestBase from test.lib.helper import with_rw_directory @@ -478,11 +475,6 @@ def test_references_and_objects(self, rw_dir): repo.git.clear_cache() - @pytest.mark.xfail( - sys.platform == "cygwin", - reason="Cygwin GitPython can't find SHA for submodule", - raises=ValueError, - ) def test_submodules(self): # [1-test_submodules] repo = self.rorepo diff --git a/test/test_exc.py b/test/test_exc.py index c1eae7240..2e979f5a1 100644 --- a/test/test_exc.py +++ b/test/test_exc.py @@ -52,7 +52,7 @@ _streams_n_substrings = ( None, - "steram", + "stream", "ÎŋÎŧÎŋ΁ΆÎŋ stream", ) diff --git a/test/test_fixture_health.py b/test/test_fixture_health.py new file mode 100644 index 000000000..b18d5e8f9 --- /dev/null +++ b/test/test_fixture_health.py @@ -0,0 +1,131 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Verify that fixture directories are usable by git. + +If a fixture directory is missing, isn't an initialized git repository, +or is rejected by git for "dubious ownership", dependent tests +elsewhere in the suite fail in opaque ways. The checks here name the +preconditions directly so a misconfigured environment is recognizable +from the test output rather than from a cascade of unrelated-seeming +failures. + +These tests do not exercise GitPython's production code. They verify +the conditions under which production code is exercised are valid. +""" + +import subprocess +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent + +# Directories git must trust for the test suite to operate normally. The +# current set is the GitPython working tree plus the working trees of its +# gitdb submodule and the smmap submodule nested inside gitdb. New entries +# should be added here whenever the test suite gains a dependency on git +# accepting another directory. +FIXTURE_DIRS = [ + pytest.param(REPO_ROOT, id="repo_root"), + pytest.param(REPO_ROOT / "git" / "ext" / "gitdb", id="gitdb"), + pytest.param( + REPO_ROOT / "git" / "ext" / "gitdb" / "gitdb" / "ext" / "smmap", + id="smmap", + ), +] + +# Submodule working trees that must be present and initialized for the +# test suite to operate normally: gitdb at `git/ext/gitdb`, and smmap +# nested inside gitdb at `git/ext/gitdb/gitdb/ext/smmap`. The paths +# below are anchored at REPO_ROOT (the GitPython source tree), not at +# any rorepo redirection target. +SUBMODULE_DIRS = [ + pytest.param(REPO_ROOT / "git" / "ext" / "gitdb", id="gitdb"), + pytest.param( + REPO_ROOT / "git" / "ext" / "gitdb" / "gitdb" / "ext" / "smmap", + id="smmap", + ), +] + + +@pytest.mark.parametrize("fixture_dir", FIXTURE_DIRS) +def test_fixture_dir_is_trusted_by_git(fixture_dir: Path) -> None: + """git accepts ``fixture_dir`` as its own repository owned by a trusted user. + + Run ``git -C rev-parse --show-toplevel`` and assert it + succeeds and reports ``fixture_dir`` itself as the toplevel. Failure + typically means the directory's on-disk ownership doesn't match the + running user and the CI workflow's ``safe.directory`` list is missing + an entry that would override the check. + """ + if not fixture_dir.exists(): + pytest.skip(f"{fixture_dir} not present (run `git submodule update --init --recursive` from the repo root)") + if not (fixture_dir / ".git").exists(): + pytest.skip( + f"{fixture_dir} has no .git marker " + "(submodule not initialized; run " + "`git submodule update --init --recursive` from the repo root)" + ) + try: + result = subprocess.run( + ["git", "-C", str(fixture_dir), "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + check=False, + ) + except FileNotFoundError: + pytest.skip("git is not installed or not on PATH") + assert result.returncode == 0, ( + f"git refuses to operate in {fixture_dir}.\n" + f"stderr: {result.stderr.strip()}\n" + "The directory's owner doesn't match the running user and no " + "`safe.directory` entry overrides the check. On CI, the " + "workflow's `safe.directory` list typically needs an entry for " + "this path. Locally, this is unexpected and usually indicates " + "an ownership problem worth investigating." + ) + reported = Path(result.stdout.strip()) + assert reported.samefile(fixture_dir), ( + f"git reports the toplevel as {reported}, " + f"not as {fixture_dir} itself. " + "This usually means the directory is not an initialized git " + "repository (its `.git` marker may be stale or pointing elsewhere)." + ) + + +@pytest.mark.parametrize("submodule_dir", SUBMODULE_DIRS) +def test_required_submodule_is_initialized(submodule_dir: Path) -> None: + """The submodule's working tree is present and initialized. + + Failure means the source tree is a git clone but the submodule's + working tree hasn't been populated. Skipped when the source tree + itself isn't a git clone (e.g. an extracted release tarball), since + ``git submodule update`` cannot operate there; setups that handle + submodules in a separately-prepared tree (via + ``GIT_PYTHON_TEST_GIT_REPO_BASE``) are exempted from this check. + """ + if not (REPO_ROOT / ".git").exists(): + pytest.skip( + "Source tree is not a git clone (no .git in REPO_ROOT); submodules " + "cannot be initialized via `git submodule update` here. Setups " + "that prepare submodules in a separately-pointed tree (via " + "GIT_PYTHON_TEST_GIT_REPO_BASE) are exempted from this check." + ) + # The assertion messages below recommend `git submodule update --init + # --recursive` rather than `init-tests-after-clone.sh`, even though the + # latter is the documented entry point for first-time test setup. Two + # reasons: the script performs `git reset --hard` operations that can + # destroy local work, and #1713 showed the script itself can carry + # submodule-init regressions, in which case recommending it would lead + # developers in a circle. The direct git command is a safe minimal fix + # for this test's specific failure mode and bypasses any such regression. + assert submodule_dir.is_dir(), ( + f"Submodule working tree missing: {submodule_dir}.\n" + "Run `git submodule update --init --recursive` from the repo root." + ) + assert (submodule_dir / ".git").exists(), ( + f"Submodule directory exists but has no .git marker: {submodule_dir}.\n" + "The submodule hasn't been initialized. " + "Run `git submodule update --init --recursive` from the repo root." + ) diff --git a/test/test_fun.py b/test/test_fun.py index b8593b400..a456b8aab 100644 --- a/test/test_fun.py +++ b/test/test_fun.py @@ -243,6 +243,7 @@ def test_tree_traversal(self): B_old = self.rorepo.tree("1f66cfbbce58b4b552b041707a12d437cc5f400a") # old base tree # Two very different trees. + entries = traverse_trees_recursive(odb, [B_old.binsha, H.binsha], "") self._assert_tree_entries(entries, 2) @@ -251,7 +252,10 @@ def test_tree_traversal(self): self._assert_tree_entries(oentries, 2) # Single tree. - is_no_tree = lambda i, d: i.type != "tree" + + def is_no_tree(i, _d): + return i.type != "tree" + entries = traverse_trees_recursive(odb, [B.binsha], "") assert len(entries) == len(list(B.traverse(predicate=is_no_tree))) self._assert_tree_entries(entries, 1) diff --git a/test/test_git.py b/test/test_git.py index 94e68ecf0..24b60af9d 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -6,6 +6,7 @@ import contextlib import gc import inspect +import io import logging import os import os.path as osp @@ -26,6 +27,7 @@ import ddt from git import Git, GitCommandError, GitCommandNotFound, Repo, cmd, refresh +from git.exc import UnsafeOptionError from git.util import cwd, finalize_process from test.lib import TestBase, fixture_path, with_rw_directory @@ -153,6 +155,21 @@ def test_it_transforms_kwargs_into_git_command_arguments(self): res = self.git.transform_kwargs(**{"s": True, "t": True}) self.assertEqual({"-s", "-t"}, set(res)) + def test_check_unsafe_options_normalizes_kwargs(self): + cases = [ + (["upload_pack"], ["--upload-pack"]), + (["receive_pack"], ["--receive-pack"]), + (["exec"], ["--exec"]), + (["u"], ["-u"]), + (["c"], ["-c"]), + (["--upload-pack=/tmp/helper"], ["--upload-pack"]), + (["--config core.filemode=false"], ["--config"]), + ] + + for options, unsafe_options in cases: + with self.assertRaises(UnsafeOptionError): + Git.check_unsafe_options(options=options, unsafe_options=unsafe_options) + _shell_cases = ( # value_in_call, value_from_class, expected_popen_arg (None, False, False), @@ -201,6 +218,25 @@ def test_it_logs_istream_summary_for_stdin(self, case): def test_it_executes_git_and_returns_result(self): self.assertRegex(self.git.execute(["git", "version"]), r"^git version [\d\.]{2}.*$") + def test_it_output_stream_with_stdout_is_false(self): + temp_stream = io.BytesIO() + self.git.execute( + ["git", "version"], + output_stream=temp_stream, + with_stdout=False, + ) + self.assertEqual(temp_stream.tell(), 0) + + def test_it_executes_git_without_stdout_redirect(self): + returncode, stdout, stderr = self.git.execute( + ["git", "version"], + with_extended_output=True, + with_stdout=False, + ) + self.assertEqual(returncode, 0) + self.assertIsNone(stdout) + self.assertIsNotNone(stderr) + @ddt.data( # chdir_to_repo, shell, command, use_shell_impostor (False, False, ["git", "version"], False), @@ -747,7 +783,7 @@ def test_environment(self, rw_dir): path = osp.join(rw_dir, "failing-script.sh") with open(path, "wt") as stream: - stream.write("#!/usr/bin/env sh\n" "echo FOO\n") + stream.write("#!/usr/bin/env sh\necho FOO\n") os.chmod(path, 0o777) rw_repo = Repo.init(osp.join(rw_dir, "repo")) @@ -762,17 +798,18 @@ def test_environment(self, rw_dir): def test_handle_process_output(self): from git.cmd import handle_process_output, safer_popen - line_count = 5002 - count = [None, 0, 0] + expected_line_count = 5002 + actual_lines = [None, [], []] - def counter_stdout(line): - count[1] += 1 + def stdout_handler(line): + actual_lines[1].append(line) - def counter_stderr(line): - count[2] += 1 + def stderr_handler(line): + actual_lines[2].append(line) cmdline = [ sys.executable, + "-S", # Keep any `CoverageWarning` messages out of the subprocess stderr. fixture_path("cat_file.py"), str(fixture_path("issue-301_stderr")), ] @@ -784,10 +821,10 @@ def counter_stderr(line): shell=False, ) - handle_process_output(proc, counter_stdout, counter_stderr, finalize_process) + handle_process_output(proc, stdout_handler, stderr_handler, finalize_process) - self.assertEqual(count[1], line_count) - self.assertEqual(count[2], line_count) + self.assertEqual(len(actual_lines[1]), expected_line_count, repr(actual_lines[1])) + self.assertEqual(len(actual_lines[2]), expected_line_count, repr(actual_lines[2])) def test_execute_kwargs_set_agrees_with_method(self): parameter_names = inspect.signature(cmd.Git.execute).parameters.keys() diff --git a/test/test_index.py b/test/test_index.py index b92258c92..3be750dbb 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -16,13 +16,15 @@ import subprocess import sys import tempfile +from unittest import mock from gitdb.base import IStream import ddt import pytest -from git import BlobFilter, Diff, Git, IndexFile, Object, Repo, Tree +from git import BlobFilter, Diff, Git, IndexFile, NULL_TREE, Object, Repo, Tree +from git.diff import NULL_TREE_SHA from git.exc import ( CheckoutError, GitCommandError, @@ -36,14 +38,8 @@ from git.objects import Blob from git.util import Actor, cwd, hex_to_bin, rmtree -from test.lib import ( - TestBase, - VirtualEnvironment, - fixture, - fixture_path, - with_rw_directory, - with_rw_repo, -) +from test.lib import TestBase, VirtualEnvironment, fixture, fixture_path, with_rw_directory, with_rw_repo, PathLikeMock +from test.lib.helper import xfail_if_raises HOOKS_SHEBANG = "#!/usr/bin/env sh\n" @@ -178,6 +174,19 @@ def _decode(stdout): _win_bash_status = WinBashStatus.check() +def _windows_supports_symlinks(): + if sys.platform != "win32": + return False + + with tempfile.TemporaryDirectory(prefix="gitpython-symlink-check-") as temp_dir: + link_path = osp.join(temp_dir, "link") + try: + os.symlink("missing-target", link_path) + except (NotImplementedError, OSError): + return False + return S_ISLNK(os.lstat(link_path)[ST_MODE]) + + def _make_hook(git_dir, name, content, make_exec=True): """A helper to create a hook""" hp = hook_path(name, git_dir) @@ -329,7 +338,10 @@ def test_index_file_from_tree(self, rw_repo): assert len([e for e in three_way_index.entries.values() if e.stage != 0]) # ITERATE BLOBS - merge_required = lambda t: t[0] != 0 + + def merge_required(t): + return t[0] != 0 + merge_blobs = list(three_way_index.iter_blobs(merge_required)) assert merge_blobs assert merge_blobs[0][0] in (1, 2, 3) @@ -544,6 +556,39 @@ def test_index_file_diffing(self, rw_repo): rval = index.checkout("lib") assert len(list(rval)) > 1 + @with_rw_directory + def test_index_file_diff_null_tree_with_initial_index(self, rw_dir): + repo = Repo.init(rw_dir) + filename = ".gitkeep" + file_path = osp.join(repo.working_tree_dir, filename) + with open(file_path, "w") as fp: + fp.write("# Initial file\n") + + index = repo.index + index.add([filename]) + index.write() + + index = IndexFile(repo) + self.assertEqual(len(index.diff(None)), 0) + + diff = index.diff(NULL_TREE) + self.assertEqual(len(diff), 1) + self.assertEqual(diff[0].change_type, "A") + assert diff[0].new_file + self.assertEqual(diff[0].b_path, filename) + + self.assertEqual(len(index.diff(NULL_TREE, paths=filename)), 1) + self.assertEqual(len(index.diff(NULL_TREE_SHA, paths=filename)), 1) + self.assertEqual(len(index.diff(NULL_TREE, paths="missing")), 0) + + patch = index.diff(NULL_TREE, create_patch=True) + self.assertEqual(len(patch), 1) + self.assertIn(b"+# Initial file", patch[0].diff) + + with self.assertRaises(GitCommandError) as exc_info: + index.diff(NULL_TREE, bogus_option=True) + self.assertIn("usage: git diff", exc_info.exception.stderr) + def _count_existing(self, repo, files): """Return count of files that actually exist in the repository directory.""" existing = 0 @@ -555,365 +600,369 @@ def _count_existing(self, repo, files): # END num existing helper - @pytest.mark.xfail( - sys.platform == "win32" and Git().config("core.symlinks") == "true", - reason="Assumes symlinks are not created on Windows and opens a symlink to a nonexistent target.", - raises=FileNotFoundError, - ) @with_rw_repo("0.1.6") def test_index_mutation(self, rw_repo): - index = rw_repo.index - num_entries = len(index.entries) - cur_head = rw_repo.head - - uname = "Thomas MÃŧller" - umail = "sd@company.com" - with rw_repo.config_writer() as writer: - writer.set_value("user", "name", uname) - writer.set_value("user", "email", umail) - self.assertEqual(writer.get_value("user", "name"), uname) - - # Remove all of the files, provide a wild mix of paths, BaseIndexEntries, - # IndexEntries. - def mixed_iterator(): - count = 0 - for entry in index.entries.values(): - type_id = count % 5 - if type_id == 0: # path (str) - yield entry.path - elif type_id == 1: # path (PathLike) - yield Path(entry.path) - elif type_id == 2: # blob - yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) - elif type_id == 3: # BaseIndexEntry - yield BaseIndexEntry(entry[:4]) - elif type_id == 4: # IndexEntry - yield entry - else: - raise AssertionError("Invalid Type") - count += 1 - # END for each entry - - # END mixed iterator - deleted_files = index.remove(mixed_iterator(), working_tree=False) - assert deleted_files - self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) - self.assertEqual(len(index.entries), 0) - - # Reset the index to undo our changes. - index.reset() - self.assertEqual(len(index.entries), num_entries) - - # Remove with working copy. - deleted_files = index.remove(mixed_iterator(), working_tree=True) - assert deleted_files - self.assertEqual(self._count_existing(rw_repo, deleted_files), 0) - - # Reset everything. - index.reset(working_tree=True) - self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) - - # Invalid type. - self.assertRaises(TypeError, index.remove, [1]) - - # Absolute path. - deleted_files = index.remove([osp.join(rw_repo.working_tree_dir, "lib")], r=True) - assert len(deleted_files) > 1 - self.assertRaises(ValueError, index.remove, ["/doesnt/exists"]) - - # TEST COMMITTING - # Commit changed index. - cur_commit = cur_head.commit - commit_message = "commit default head by Frèderic Çauflâ‚Ŧ" - - new_commit = index.commit(commit_message, head=False) - assert cur_commit != new_commit - self.assertEqual(new_commit.author.name, uname) - self.assertEqual(new_commit.author.email, umail) - self.assertEqual(new_commit.committer.name, uname) - self.assertEqual(new_commit.committer.email, umail) - self.assertEqual(new_commit.message, commit_message) - self.assertEqual(new_commit.parents[0], cur_commit) - self.assertEqual(len(new_commit.parents), 1) - self.assertEqual(cur_head.commit, cur_commit) - - # Commit with other actor. - cur_commit = cur_head.commit - - my_author = Actor("Frèderic Çauflâ‚Ŧ", "author@example.com") - my_committer = Actor("Committing Frèderic Çauflâ‚Ŧ", "committer@example.com") - commit_actor = index.commit(commit_message, author=my_author, committer=my_committer) - assert cur_commit != commit_actor - self.assertEqual(commit_actor.author.name, "Frèderic Çauflâ‚Ŧ") - self.assertEqual(commit_actor.author.email, "author@example.com") - self.assertEqual(commit_actor.committer.name, "Committing Frèderic Çauflâ‚Ŧ") - self.assertEqual(commit_actor.committer.email, "committer@example.com") - self.assertEqual(commit_actor.message, commit_message) - self.assertEqual(commit_actor.parents[0], cur_commit) - self.assertEqual(len(new_commit.parents), 1) - self.assertEqual(cur_head.commit, commit_actor) - self.assertEqual(cur_head.log()[-1].actor, my_committer) - - # Commit with author_date and commit_date. - cur_commit = cur_head.commit - commit_message = "commit with dates by Avinash Sajjanshetty" - - new_commit = index.commit( - commit_message, - author_date="2006-04-07T22:13:13", - commit_date="2005-04-07T22:13:13", - ) - assert cur_commit != new_commit - print(new_commit.authored_date, new_commit.committed_date) - self.assertEqual(new_commit.message, commit_message) - self.assertEqual(new_commit.authored_date, 1144447993) - self.assertEqual(new_commit.committed_date, 1112911993) - - # Same index, no parents. - commit_message = "index without parents" - commit_no_parents = index.commit(commit_message, parent_commits=[], head=True) - self.assertEqual(commit_no_parents.message, commit_message) - self.assertEqual(len(commit_no_parents.parents), 0) - self.assertEqual(cur_head.commit, commit_no_parents) - - # same index, multiple parents. - commit_message = "Index with multiple parents\n commit with another line" - commit_multi_parent = index.commit(commit_message, parent_commits=(commit_no_parents, new_commit)) - self.assertEqual(commit_multi_parent.message, commit_message) - self.assertEqual(len(commit_multi_parent.parents), 2) - self.assertEqual(commit_multi_parent.parents[0], commit_no_parents) - self.assertEqual(commit_multi_parent.parents[1], new_commit) - self.assertEqual(cur_head.commit, commit_multi_parent) - - # Re-add all files in lib. - # Get the lib folder back on disk, but get an index without it. - index.reset(new_commit.parents[0], working_tree=True).reset(new_commit, working_tree=False) - lib_file_path = osp.join("lib", "git", "__init__.py") - assert (lib_file_path, 0) not in index.entries - assert osp.isfile(osp.join(rw_repo.working_tree_dir, lib_file_path)) - - # Directory. - entries = index.add(["lib"], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert len(entries) > 1 - - # Glob. - entries = index.reset(new_commit).add([osp.join("lib", "git", "*.py")], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - self.assertEqual(len(entries), 14) - - # Same file. - entries = index.reset(new_commit).add( - [osp.join(rw_repo.working_tree_dir, "lib", "git", "head.py")] * 2, - fprogress=self._fprogress_add, - ) - self._assert_entries(entries) - self.assertEqual(entries[0].mode & 0o644, 0o644) - # Would fail, test is too primitive to handle this case. - # self._assert_fprogress(entries) - self._reset_progress() - self.assertEqual(len(entries), 2) - - # Missing path. - self.assertRaises(OSError, index.reset(new_commit).add, ["doesnt/exist/must/raise"]) - - # Blob from older revision overrides current index revision. - old_blob = new_commit.parents[0].tree.blobs[0] - entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - self.assertEqual(index.entries[(old_blob.path, 0)].hexsha, old_blob.hexsha) - self.assertEqual(len(entries), 1) - - # Mode 0 not allowed. - null_hex_sha = Diff.NULL_HEX_SHA - null_bin_sha = b"\0" * 20 - self.assertRaises( - ValueError, - index.reset(new_commit).add, - [BaseIndexEntry((0, null_bin_sha, 0, "doesntmatter"))], - ) - - # Add new file. - new_file_relapath = "my_new_file" - self._make_file(new_file_relapath, "hello world", rw_repo) - entries = index.reset(new_commit).add( - [BaseIndexEntry((0o10644, null_bin_sha, 0, new_file_relapath))], - fprogress=self._fprogress_add, - ) - self._assert_entries(entries) - self._assert_fprogress(entries) - self.assertEqual(len(entries), 1) - self.assertNotEqual(entries[0].hexsha, null_hex_sha) - - # Add symlink. - if sys.platform != "win32": - for target in ("/etc/nonexisting", "/etc/passwd", "/etc"): - basename = "my_real_symlink" - - link_file = osp.join(rw_repo.working_tree_dir, basename) - os.symlink(target, link_file) - entries = index.reset(new_commit).add([link_file], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - self.assertEqual(len(entries), 1) - self.assertTrue(S_ISLNK(entries[0].mode)) - self.assertTrue(S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode)) - - # We expect only the target to be written. - self.assertEqual( - index.repo.odb.stream(entries[0].binsha).read().decode("ascii"), - target, - ) - - os.remove(link_file) - # END for each target - # END real symlink test - - # Add fake symlink and assure it checks out as a symlink. - fake_symlink_relapath = "my_fake_symlink" - link_target = "/etc/that" - fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) - fake_entry = BaseIndexEntry((0o120000, null_bin_sha, 0, fake_symlink_relapath)) - entries = index.reset(new_commit).add([fake_entry], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert entries[0].hexsha != null_hex_sha - self.assertEqual(len(entries), 1) - self.assertTrue(S_ISLNK(entries[0].mode)) - - # Check that this also works with an alternate method. - full_index_entry = IndexEntry.from_base(BaseIndexEntry((0o120000, entries[0].binsha, 0, entries[0].path))) - entry_key = index.entry_key(full_index_entry) - index.reset(new_commit) - - assert entry_key not in index.entries - index.entries[entry_key] = full_index_entry - index.write() - index.update() # Force reread of entries. - new_entry = index.entries[entry_key] - assert S_ISLNK(new_entry.mode) + with xfail_if_raises( + sys.platform == "win32" and (Git().config("core.symlinks") == "true" or _windows_supports_symlinks()), + raises=(FileNotFoundError, GitCommandError), + reason="Assumes symlinks are not created on Windows and opens a symlink to a nonexistent target.", + ): + index = rw_repo.index + num_entries = len(index.entries) + cur_head = rw_repo.head + + uname = "Thomas MÃŧller" + umail = "sd@company.com" + with rw_repo.config_writer() as writer: + writer.set_value("user", "name", uname) + writer.set_value("user", "email", umail) + self.assertEqual(writer.get_value("user", "name"), uname) + + # Remove all of the files, provide a wild mix of paths, BaseIndexEntries, + # IndexEntries. + def mixed_iterator(): + count = 0 + for entry in index.entries.values(): + type_id = count % 5 + if type_id == 0: # path (str) + yield entry.path + elif type_id == 1: # path (PathLike) + yield Path(entry.path) + elif type_id == 2: # path mock (PathLike) + yield PathLikeMock(entry.path) + elif type_id == 3: # path mock in a blob + yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) + elif type_id == 4: # blob + yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) + elif type_id == 5: # BaseIndexEntry + yield BaseIndexEntry(entry[:4]) + elif type_id == 6: # IndexEntry + yield entry + else: + raise AssertionError("Invalid Type") + count += 1 + # END for each entry + + # END mixed iterator + deleted_files = index.remove(mixed_iterator(), working_tree=False) + assert deleted_files + self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) + self.assertEqual(len(index.entries), 0) + + # Reset the index to undo our changes. + index.reset() + self.assertEqual(len(index.entries), num_entries) + + # Remove with working copy. + deleted_files = index.remove(mixed_iterator(), working_tree=True) + assert deleted_files + self.assertEqual(self._count_existing(rw_repo, deleted_files), 0) + + # Reset everything. + index.reset(working_tree=True) + self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) + + # Invalid type. + self.assertRaises(TypeError, index.remove, [1]) + + # Absolute path. + deleted_files = index.remove([osp.join(rw_repo.working_tree_dir, "lib")], r=True) + assert len(deleted_files) > 1 + self.assertRaises(ValueError, index.remove, ["/doesnt/exists"]) + + # TEST COMMITTING + # Commit changed index. + cur_commit = cur_head.commit + commit_message = "commit default head by Frèderic Çauflâ‚Ŧ" + + new_commit = index.commit(commit_message, head=False) + assert cur_commit != new_commit + self.assertEqual(new_commit.author.name, uname) + self.assertEqual(new_commit.author.email, umail) + self.assertEqual(new_commit.committer.name, uname) + self.assertEqual(new_commit.committer.email, umail) + self.assertEqual(new_commit.message, commit_message) + self.assertEqual(new_commit.parents[0], cur_commit) + self.assertEqual(len(new_commit.parents), 1) + self.assertEqual(cur_head.commit, cur_commit) + + # Commit with other actor. + cur_commit = cur_head.commit + + my_author = Actor("Frèderic Çauflâ‚Ŧ", "author@example.com") + my_committer = Actor("Committing Frèderic Çauflâ‚Ŧ", "committer@example.com") + commit_actor = index.commit(commit_message, author=my_author, committer=my_committer) + assert cur_commit != commit_actor + self.assertEqual(commit_actor.author.name, "Frèderic Çauflâ‚Ŧ") + self.assertEqual(commit_actor.author.email, "author@example.com") + self.assertEqual(commit_actor.committer.name, "Committing Frèderic Çauflâ‚Ŧ") + self.assertEqual(commit_actor.committer.email, "committer@example.com") + self.assertEqual(commit_actor.message, commit_message) + self.assertEqual(commit_actor.parents[0], cur_commit) + self.assertEqual(len(new_commit.parents), 1) + self.assertEqual(cur_head.commit, commit_actor) + self.assertEqual(cur_head.log()[-1].actor, my_committer) + + # Commit with author_date and commit_date. + cur_commit = cur_head.commit + commit_message = "commit with dates by Avinash Sajjanshetty" + + new_commit = index.commit( + commit_message, + author_date="2006-04-07T22:13:13", + commit_date="2005-04-07T22:13:13", + ) + assert cur_commit != new_commit + print(new_commit.authored_date, new_commit.committed_date) + self.assertEqual(new_commit.message, commit_message) + self.assertEqual(new_commit.authored_date, 1144447993) + self.assertEqual(new_commit.committed_date, 1112911993) + + # Same index, no parents. + commit_message = "index without parents" + commit_no_parents = index.commit(commit_message, parent_commits=[], head=True) + self.assertEqual(commit_no_parents.message, commit_message) + self.assertEqual(len(commit_no_parents.parents), 0) + self.assertEqual(cur_head.commit, commit_no_parents) + + # same index, multiple parents. + commit_message = "Index with multiple parents\n commit with another line" + commit_multi_parent = index.commit(commit_message, parent_commits=(commit_no_parents, new_commit)) + self.assertEqual(commit_multi_parent.message, commit_message) + self.assertEqual(len(commit_multi_parent.parents), 2) + self.assertEqual(commit_multi_parent.parents[0], commit_no_parents) + self.assertEqual(commit_multi_parent.parents[1], new_commit) + self.assertEqual(cur_head.commit, commit_multi_parent) + + # Re-add all files in lib. + # Get the lib folder back on disk, but get an index without it. + index.reset(new_commit.parents[0], working_tree=True).reset(new_commit, working_tree=False) + lib_file_path = osp.join("lib", "git", "__init__.py") + assert (lib_file_path, 0) not in index.entries + assert osp.isfile(osp.join(rw_repo.working_tree_dir, lib_file_path)) + + # Directory. + entries = index.add(["lib"], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert len(entries) > 1 + + # Glob. + entries = index.reset(new_commit).add([osp.join("lib", "git", "*.py")], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + self.assertEqual(len(entries), 14) + + # Same file. + entries = index.reset(new_commit).add( + [osp.join(rw_repo.working_tree_dir, "lib", "git", "head.py")] * 2, + fprogress=self._fprogress_add, + ) + self._assert_entries(entries) + self.assertEqual(entries[0].mode & 0o644, 0o644) + # Would fail, test is too primitive to handle this case. + # self._assert_fprogress(entries) + self._reset_progress() + self.assertEqual(len(entries), 2) + + # Missing path. + self.assertRaises(OSError, index.reset(new_commit).add, ["doesnt/exist/must/raise"]) + + # Blob from older revision overrides current index revision. + old_blob = new_commit.parents[0].tree.blobs[0] + entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + self.assertEqual(index.entries[(old_blob.path, 0)].hexsha, old_blob.hexsha) + self.assertEqual(len(entries), 1) + + # Mode 0 not allowed. + null_hex_sha = Diff.NULL_HEX_SHA + null_bin_sha = b"\0" * 20 + self.assertRaises( + ValueError, + index.reset(new_commit).add, + [BaseIndexEntry((0, null_bin_sha, 0, "doesntmatter"))], + ) - # A tree created from this should contain the symlink. - tree = index.write_tree() - assert fake_symlink_relapath in tree - index.write() # Flush our changes for the checkout. - - # Check out the fake link, should be a link then. - assert not S_ISLNK(os.stat(fake_symlink_path)[ST_MODE]) - os.remove(fake_symlink_path) - index.checkout(fake_symlink_path) - - # On Windows, we currently assume we will never get symlinks. - if sys.platform == "win32": - # Symlinks should contain the link as text (which is what a - # symlink actually is). - with open(fake_symlink_path, "rt") as fd: - self.assertEqual(fd.read(), link_target) - else: - self.assertTrue(S_ISLNK(os.lstat(fake_symlink_path)[ST_MODE])) - - # TEST RENAMING - def assert_mv_rval(rval): - for source, dest in rval: - assert not osp.exists(source) and osp.exists(dest) - # END for each renamed item - - # END move assertion utility - - self.assertRaises(ValueError, index.move, ["just_one_path"]) - # Try to move a file onto an existing file. - files = ["AUTHORS", "LICENSE"] - self.assertRaises(GitCommandError, index.move, files) - - # Again, with force. - assert_mv_rval(index.move(files, f=True)) - - # Move files into a directory - dry run. - paths = ["LICENSE", "VERSION", "doc"] - rval = index.move(paths, dry_run=True) - self.assertEqual(len(rval), 2) - assert osp.exists(paths[0]) - - # Again, no dry run. - rval = index.move(paths) - assert_mv_rval(rval) - - # Move dir into dir. - rval = index.move(["doc", "test"]) - assert_mv_rval(rval) - - # TEST PATH REWRITING - ###################### - count = [0] - - def rewriter(entry): - rval = str(count[0]) - count[0] += 1 - return rval - - # END rewriter - - def make_paths(): - """Help out the test by yielding two existing paths and one new path.""" - yield "CHANGES" - yield "ez_setup.py" - yield index.entries[index.entry_key("README", 0)] - yield index.entries[index.entry_key(".gitignore", 0)] - - for fid in range(3): - fname = "newfile%i" % fid - with open(fname, "wb") as fd: - fd.write(b"abcd") - yield Blob(rw_repo, Blob.NULL_BIN_SHA, 0o100644, fname) - # END for each new file - - # END path producer - paths = list(make_paths()) - self._assert_entries(index.add(paths, path_rewriter=rewriter)) - - for filenum in range(len(paths)): - assert index.entry_key(str(filenum), 0) in index.entries - - # TEST RESET ON PATHS - ###################### - arela = "aa" - brela = "bb" - afile = self._make_file(arela, "adata", rw_repo) - bfile = self._make_file(brela, "bdata", rw_repo) - akey = index.entry_key(arela, 0) - bkey = index.entry_key(brela, 0) - keys = (akey, bkey) - absfiles = (afile, bfile) - files = (arela, brela) - - for fkey in keys: - assert fkey not in index.entries - - index.add(files, write=True) - nc = index.commit("2 files committed", head=False) - - for fkey in keys: - assert fkey in index.entries - - # Just the index. - index.reset(paths=(arela, afile)) - assert akey not in index.entries - assert bkey in index.entries - - # Now with working tree - files on disk as well as entries must be recreated. - rw_repo.head.commit = nc - for absfile in absfiles: - os.remove(absfile) - - index.reset(working_tree=True, paths=files) - - for fkey in keys: - assert fkey in index.entries - for absfile in absfiles: - assert osp.isfile(absfile) + # Add new file. + new_file_relapath = "my_new_file" + self._make_file(new_file_relapath, "hello world", rw_repo) + entries = index.reset(new_commit).add( + [BaseIndexEntry((0o10644, null_bin_sha, 0, new_file_relapath))], + fprogress=self._fprogress_add, + ) + self._assert_entries(entries) + self._assert_fprogress(entries) + self.assertEqual(len(entries), 1) + self.assertNotEqual(entries[0].hexsha, null_hex_sha) + + # Add symlink. + if sys.platform != "win32": + for target in ("/etc/nonexisting", "/etc/passwd", "/etc"): + basename = "my_real_symlink" + + link_file = osp.join(rw_repo.working_tree_dir, basename) + os.symlink(target, link_file) + entries = index.reset(new_commit).add([link_file], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + self.assertEqual(len(entries), 1) + self.assertTrue(S_ISLNK(entries[0].mode)) + self.assertTrue(S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode)) + + # We expect only the target to be written. + self.assertEqual( + index.repo.odb.stream(entries[0].binsha).read().decode("ascii"), + target, + ) + + os.remove(link_file) + # END for each target + # END real symlink test + + # Add fake symlink and assure it checks out as a symlink. + fake_symlink_relapath = "my_fake_symlink" + link_target = "/etc/that" + fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) + fake_entry = BaseIndexEntry((0o120000, null_bin_sha, 0, fake_symlink_relapath)) + entries = index.reset(new_commit).add([fake_entry], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert entries[0].hexsha != null_hex_sha + self.assertEqual(len(entries), 1) + self.assertTrue(S_ISLNK(entries[0].mode)) + + # Check that this also works with an alternate method. + full_index_entry = IndexEntry.from_base(BaseIndexEntry((0o120000, entries[0].binsha, 0, entries[0].path))) + entry_key = index.entry_key(full_index_entry) + index.reset(new_commit) + + assert entry_key not in index.entries + index.entries[entry_key] = full_index_entry + index.write() + index.update() # Force reread of entries. + new_entry = index.entries[entry_key] + assert S_ISLNK(new_entry.mode) + + # A tree created from this should contain the symlink. + tree = index.write_tree() + assert fake_symlink_relapath in tree + index.write() # Flush our changes for the checkout. + + # Check out the fake link, should be a link then. + assert not S_ISLNK(os.stat(fake_symlink_path)[ST_MODE]) + os.remove(fake_symlink_path) + index.checkout(fake_symlink_path) + + # On Windows, we currently assume we will never get symlinks. + if sys.platform == "win32": + # Symlinks should contain the link as text (which is what a + # symlink actually is). + with open(fake_symlink_path, "rt") as fd: + self.assertEqual(fd.read(), link_target) + else: + self.assertTrue(S_ISLNK(os.lstat(fake_symlink_path)[ST_MODE])) + + # TEST RENAMING + def assert_mv_rval(rval): + for source, dest in rval: + assert not osp.exists(source) and osp.exists(dest) + # END for each renamed item + + # END move assertion utility + + self.assertRaises(ValueError, index.move, ["just_one_path"]) + # Try to move a file onto an existing file. + files = ["AUTHORS", "LICENSE"] + self.assertRaises(GitCommandError, index.move, files) + + # Again, with force. + assert_mv_rval(index.move(files, f=True)) + + # Move files into a directory - dry run. + paths = ["LICENSE", "VERSION", "doc"] + rval = index.move(paths, dry_run=True) + self.assertEqual(len(rval), 2) + assert osp.exists(paths[0]) + + # Again, no dry run. + rval = index.move(paths) + assert_mv_rval(rval) + + # Move dir into dir. + rval = index.move(["doc", "test"]) + assert_mv_rval(rval) + + # TEST PATH REWRITING + ###################### + count = [0] + + def rewriter(entry): + rval = str(count[0]) + count[0] += 1 + return rval + + # END rewriter + + def make_paths(): + """Help out the test by yielding two existing paths and one new path.""" + yield "CHANGES" + yield "ez_setup.py" + yield index.entries[index.entry_key("README", 0)] + yield index.entries[index.entry_key(".gitignore", 0)] + + for fid in range(3): + fname = "newfile%i" % fid + with open(fname, "wb") as fd: + fd.write(b"abcd") + yield Blob(rw_repo, Blob.NULL_BIN_SHA, 0o100644, fname) + # END for each new file + + # END path producer + paths = list(make_paths()) + self._assert_entries(index.add(paths, path_rewriter=rewriter)) + + for filenum in range(len(paths)): + assert index.entry_key(str(filenum), 0) in index.entries + + # TEST RESET ON PATHS + ###################### + arela = "aa" + brela = "bb" + afile = self._make_file(arela, "adata", rw_repo) + bfile = self._make_file(brela, "bdata", rw_repo) + akey = index.entry_key(arela, 0) + bkey = index.entry_key(brela, 0) + keys = (akey, bkey) + absfiles = (afile, bfile) + files = (arela, brela) + + for fkey in keys: + assert fkey not in index.entries + + index.add(files, write=True) + nc = index.commit("2 files committed", head=False) + + for fkey in keys: + assert fkey in index.entries + + # Just the index. + index.reset(paths=(arela, afile)) + assert akey not in index.entries + assert bkey in index.entries + + # Now with working tree - files on disk as well as entries must be recreated. + rw_repo.head.commit = nc + for absfile in absfiles: + os.remove(absfile) + + index.reset(working_tree=True, paths=files) + + for fkey in keys: + assert fkey in index.entries + for absfile in absfiles: + assert osp.isfile(absfile) @with_rw_repo("HEAD") def test_compare_write_tree(self, rw_repo): @@ -1015,10 +1064,31 @@ class Mocked: rel = index._to_relative_path(path) self.assertEqual(rel, os.path.relpath(path, root)) + def test__to_relative_path_absolute_trailing_slash(self): + repo_root = os.path.join(osp.abspath(os.sep), "directory1", "repo_root") + + class Mocked: + bare = False + git_dir = repo_root + working_tree_dir = repo_root + + repo = Mocked() + path = os.path.join(repo_root, f"directory2{os.sep}") + index = IndexFile(repo) + + expected_path = f"directory2{os.sep}" + actual_path = index._to_relative_path(path) + self.assertEqual(expected_path, actual_path) + + with mock.patch("git.index.base.os.path") as ospath_mock: + ospath_mock.relpath.return_value = f"directory2{os.sep}" + actual_path = index._to_relative_path(path) + self.assertEqual(expected_path, actual_path) + @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, reason="Can't run a hook on Windows without bash.exe.", - rasies=HookExecutionError, + raises=HookExecutionError, ) @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.WslNoDistro, @@ -1077,7 +1147,7 @@ def test_hook_uses_shell_not_from_cwd(self, rw_dir, case): @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, reason="Can't run a hook on Windows without bash.exe.", - rasies=HookExecutionError, + raises=HookExecutionError, ) @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.WslNoDistro, @@ -1120,7 +1190,7 @@ def test_pre_commit_hook_fail(self, rw_repo): @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, reason="Can't run a hook on Windows without bash.exe.", - rasies=HookExecutionError, + raises=HookExecutionError, ) @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Wsl, @@ -1173,7 +1243,7 @@ def test_commit_msg_hook_fail(self, rw_repo): raise AssertionError("Should have caught a HookExecutionError") @with_rw_repo("HEAD") - def test_index_add_pathlike(self, rw_repo): + def test_index_add_pathlib(self, rw_repo): git_dir = Path(rw_repo.git_dir) file = git_dir / "file.txt" @@ -1181,6 +1251,69 @@ def test_index_add_pathlike(self, rw_repo): rw_repo.index.add(file) + @with_rw_repo("HEAD") + def test_index_add_pathlike(self, rw_repo): + git_dir = Path(rw_repo.git_dir) + + file = git_dir / "file.txt" + file.touch() + + rw_repo.index.add(PathLikeMock(str(file))) + + @with_rw_repo("HEAD") + def test_index_add_non_normalized_path(self, rw_repo): + git_dir = Path(rw_repo.git_dir) + + file = git_dir / "file.txt" + file.touch() + non_normalized_path = file.as_posix() + if os.name != "nt": + non_normalized_path = "/" + non_normalized_path[1:].replace("/", "//") + + rw_repo.index.add(non_normalized_path) + + def test_index_file_v3(self): + index = IndexFile(self.rorepo, fixture_path("index_extended_flags")) + assert index.entries + assert index.version == 3 + assert len(index.entries) == 4 + assert index.entries[("init.t", 0)].skip_worktree + + # Write the data - it must match the original. + with tempfile.NamedTemporaryFile() as tmpfile: + index.write(tmpfile.name) + assert Path(tmpfile.name).read_bytes() == Path(fixture_path("index_extended_flags")).read_bytes() + + @with_rw_directory + def test_index_file_v3_with_git_command(self, tmp_dir): + tmp_dir = Path(tmp_dir) + with cwd(tmp_dir): + git = Git(tmp_dir) + git.init() + + file = tmp_dir / "file.txt" + file.write_text("hello") + git.add("--intent-to-add", "file.txt") # intent-to-add sets extended flag + + repo = Repo(tmp_dir) + index = repo.index + + assert len(index.entries) == 1 + assert index.version == 3 + entry = list(index.entries.values())[0] + assert entry.path == "file.txt" + assert entry.intent_to_add + + file2 = tmp_dir / "file2.txt" + file2.write_text("world") + index.add(["file2.txt"]) + index.write() + + status_str = git.status(porcelain=True) + status_lines = status_str.splitlines() + assert " A file.txt" in status_lines + assert "A file2.txt" in status_lines + class TestIndexUtils: @pytest.mark.parametrize("file_path_type", [str, Path]) diff --git a/test/test_installation.py b/test/test_installation.py index ae6472e98..7c82bd403 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -2,6 +2,7 @@ # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ import ast +import functools import os import subprocess @@ -11,50 +12,22 @@ class TestInstallation(TestBase): @with_rw_directory def test_installation(self, rw_dir): - venv = self._set_up_venv(rw_dir) + venv, run = self._set_up_venv(rw_dir) - result = subprocess.run( - [venv.pip, "install", "."], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - self.assertEqual( - 0, - result.returncode, - msg=result.stderr or result.stdout or "Can't install project", - ) + result = run([venv.pip, "install", "."]) + self._check_result(result, "Can't install project") - result = subprocess.run( - [venv.python, "-c", "import git"], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - self.assertEqual( - 0, - result.returncode, - msg=result.stderr or result.stdout or "Self-test failed", - ) + result = run([venv.python, "-c", "import git"]) + self._check_result(result, "Self-test failed") - result = subprocess.run( - [venv.python, "-c", "import gitdb; import smmap"], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - self.assertEqual( - 0, - result.returncode, - msg=result.stderr or result.stdout or "Dependencies not installed", - ) + result = run([venv.python, "-c", "import gitdb; import smmap"]) + self._check_result(result, "Dependencies not installed") # Even IF gitdb or any other dependency is supplied during development by # inserting its location into PYTHONPATH or otherwise patched into sys.path, # make sure it is not wrongly inserted as the *first* entry. - result = subprocess.run( - [venv.python, "-c", "import sys; import git; print(sys.path)"], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - syspath = result.stdout.decode("utf-8").splitlines()[0] + result = run([venv.python, "-c", "import sys; import git; print(sys.path)"]) + syspath = result.stdout.splitlines()[0] syspath = ast.literal_eval(syspath) self.assertEqual( "", @@ -64,10 +37,37 @@ def test_installation(self, rw_dir): @staticmethod def _set_up_venv(rw_dir): + # Initialize the virtual environment. venv = VirtualEnvironment(rw_dir, with_pip=True) + + # Make its src directory a symlink to our own top-level source tree. os.symlink( os.path.dirname(os.path.dirname(__file__)), venv.sources, target_is_directory=True, ) - return venv + + # Create a convenience function to run commands in it. + run = functools.partial( + subprocess.run, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + cwd=venv.sources, + env={**os.environ, "PYTHONWARNINGS": "error"}, + ) + + return venv, run + + def _check_result(self, result, failure_summary): + self.assertEqual( + 0, + result.returncode, + msg=self._prepare_failure_message(result, failure_summary), + ) + + @staticmethod + def _prepare_failure_message(result, failure_summary): + stdout = result.stdout.rstrip() + stderr = result.stderr.rstrip() + return f"{failure_summary}\n\nstdout:\n{stdout}\n\nstderr:\n{stderr}" diff --git a/test/test_quick_doc.py b/test/test_quick_doc.py index 4ef75f4aa..98658e02f 100644 --- a/test/test_quick_doc.py +++ b/test/test_quick_doc.py @@ -173,7 +173,7 @@ def test_cloned_repo_object(self, local_dir): # [15-test_cloned_repo_object] def print_files_from_git(root, level=0): for entry in root: - print(f'{"-" * 4 * level}| {entry.path}, {entry.type}') + print(f"{'-' * 4 * level}| {entry.path}, {entry.type}") if entry.type == "tree": print_files_from_git(entry, level + 1) diff --git a/test/test_refs.py b/test/test_refs.py index 08096e69e..d77b34eba 100644 --- a/test/test_refs.py +++ b/test/test_refs.py @@ -3,6 +3,7 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +import contextlib from itertools import chain import os.path as osp from pathlib import Path @@ -18,6 +19,7 @@ RefLog, Reference, RemoteReference, + Repo, SymbolicReference, TagReference, ) @@ -25,10 +27,22 @@ import git.refs as refs from git.util import Actor -from test.lib import TestBase, with_rw_repo +from test.lib import TestBase, with_rw_repo, PathLikeMock class TestRefs(TestBase): + @contextlib.contextmanager + def _repo_with_initial_commit(self, base_dir): + repo_dir = base_dir / "repo" + repo = Repo.init(repo_dir) + (repo_dir / "file.txt").write_text("initial\n", encoding="utf-8") + repo.index.add(["file.txt"]) + repo.index.commit("initial") + try: + yield repo + finally: + repo.git.clear_cache() + def test_from_path(self): # Should be able to create any reference directly. for ref_type in (Reference, Head, TagReference, RemoteReference): @@ -43,6 +57,25 @@ def test_from_path(self): self.assertRaises(ValueError, TagReference, self.rorepo, "refs/invalid/tag") # Works without path check. TagReference(self.rorepo, "refs/invalid/tag", check_path=False) + # Check remoteness + assert Reference(self.rorepo, "refs/remotes/origin").is_remote() + + def test_from_pathlike(self): + # Should be able to create any reference directly. + for ref_type in (Reference, Head, TagReference, RemoteReference): + for name in ("rela_name", "path/rela_name"): + full_path = ref_type.to_full_path(PathLikeMock(name)) + instance = ref_type.from_path(self.rorepo, PathLikeMock(full_path)) + assert isinstance(instance, ref_type) + # END for each name + # END for each type + + # Invalid path. + self.assertRaises(ValueError, TagReference, self.rorepo, "refs/invalid/tag") + # Works without path check. + TagReference(self.rorepo, PathLikeMock("refs/invalid/tag"), check_path=False) + # Check remoteness + assert Reference(self.rorepo, PathLikeMock("refs/remotes/origin")).is_remote() def test_tag_base(self): tag_object_refs = [] @@ -629,6 +662,115 @@ def test_refs_outside_repo(self): ref_file_name = Path(ref_file.name).name self.assertRaises(BadName, self.rorepo.commit, f"../../{ref_file_name}") + def test_reference_create_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_write.txt" + + self.assertRaises(ValueError, Reference.create, repo, "../../../outside_write.txt", "HEAD") + assert not outside_path.exists() + + def test_symbolic_reference_create_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_write.txt" + + self.assertRaises(ValueError, SymbolicReference.create, repo, "../../outside_write.txt", "HEAD") + assert not outside_path.exists() + + def test_symbolic_reference_set_reference_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_write.txt" + + self.assertRaises(ValueError, SymbolicReference(repo, "../../outside_write.txt").set_reference, "HEAD") + assert not outside_path.exists() + + def test_symbolic_reference_rename_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_move.txt" + ref = SymbolicReference.create(repo, "SAFE_RENAME_SOURCE", "HEAD") + + self.assertRaises(ValueError, ref.rename, "../../outside_move.txt") + assert not outside_path.exists() + assert Path(ref.abspath).is_file() + + def test_symbolic_reference_delete_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_delete.txt" + outside_path.write_text("do not delete\n", encoding="utf-8") + + self.assertRaises(ValueError, SymbolicReference.delete, repo, "../../outside_delete.txt") + assert outside_path.read_text(encoding="utf-8") == "do not delete\n" + + def test_symbolic_reference_log_append_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_reflog.txt" + + ref = SymbolicReference(repo, "../../../outside_reflog.txt") + self.assertRaises( + ValueError, ref.log_append, Commit.NULL_BIN_SHA, "do not write", repo.head.commit.binsha + ) + assert not outside_path.exists() + + def test_symbolic_reference_set_reference_rejects_symlink_escape(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_dir = base_dir / "outside_refs" + outside_dir.mkdir() + outside_path = outside_dir / "escaped" + + refs_heads_dir = Path(repo.common_dir) / "refs" / "heads" + refs_heads_dir.mkdir(parents=True, exist_ok=True) + symlink_path = refs_heads_dir / "link_out" + try: + symlink_path.symlink_to(outside_dir, target_is_directory=True) + except (OSError, NotImplementedError) as ex: + self.skipTest("symlinks unavailable on this platform: %s" % ex) + if osp.realpath(symlink_path / "escaped") == osp.abspath(symlink_path / "escaped"): + self.skipTest("realpath does not resolve directory symlinks on this platform") + + ref = SymbolicReference(repo, "refs/heads/link_out/escaped") + self.assertRaises(ValueError, ref.set_reference, "HEAD") + assert not outside_path.exists() + + def test_remote_reference_delete_cleanup_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + git_dir = base_dir / "repo" / ".git" + git_dir.mkdir(parents=True) + outside_path = base_dir / "outside_remote_delete.txt" + outside_path.write_text("do not delete\n", encoding="utf-8") + + class GitStub: + branch_called = False + + def branch(self, *args): + self.branch_called = True + + class RepoStub: + pass + + repo = RepoStub() + repo.git = GitStub() + repo.common_dir = str(git_dir) + repo.git_dir = str(git_dir) + ref = RemoteReference(repo, "../../outside_remote_delete.txt", check_path=False) + + self.assertRaises(ValueError, RemoteReference.delete, repo, ref) + assert not repo.git.branch_called + assert outside_path.read_text(encoding="utf-8") == "do not delete\n" + def test_validity_ref_names(self): """Ensure ref names are checked for validity. diff --git a/test/test_remote.py b/test/test_remote.py index 5ddb41bc0..2230c8df4 100644 --- a/test/test_remote.py +++ b/test/test_remote.py @@ -44,7 +44,7 @@ class TestRemoteProgress(RemoteProgress): __slots__ = ("_seen_lines", "_stages_per_op", "_num_progress_messages") - def __init__(self): + def __init__(self) -> None: super().__init__() self._seen_lines = [] self._stages_per_op = {} @@ -103,6 +103,9 @@ def assert_received_message(self): assert self._num_progress_messages +TestRemoteProgress.__test__ = False # type: ignore + + class TestRemote(TestBase): def tearDown(self): gc.collect() @@ -684,7 +687,12 @@ def test_multiple_urls(self, rw_repo): def test_fetch_error(self): rem = self.rorepo.remote("origin") - with self.assertRaisesRegex(GitCommandError, "[Cc]ouldn't find remote ref __BAD_REF__"): + msg = ( + r"[Cc]ouldn't find remote ref __BAD_REF__|" + r"could not read Username|" + r"expected flush after ref listing" + ) + with self.assertRaisesRegex(GitCommandError, msg): rem.fetch("__BAD_REF__") @with_rw_repo("0.1.6", bare=False) @@ -824,7 +832,7 @@ def test_fetch_unsafe_options(self, rw_repo): remote = rw_repo.remote("origin") tmp_dir = Path(tdir) tmp_file = tmp_dir / "pwn" - unsafe_options = [{"upload-pack": f"touch {tmp_file}"}] + unsafe_options = [{"upload-pack": f"touch {tmp_file}"}, {"upload_pack": f"touch {tmp_file}"}] for unsafe_option in unsafe_options: with self.assertRaises(UnsafeOptionError): remote.fetch(**unsafe_option) @@ -892,7 +900,7 @@ def test_pull_unsafe_options(self, rw_repo): remote = rw_repo.remote("origin") tmp_dir = Path(tdir) tmp_file = tmp_dir / "pwn" - unsafe_options = [{"upload-pack": f"touch {tmp_file}"}] + unsafe_options = [{"upload-pack": f"touch {tmp_file}"}, {"upload_pack": f"touch {tmp_file}"}] for unsafe_option in unsafe_options: with self.assertRaises(UnsafeOptionError): remote.pull(**unsafe_option) @@ -961,10 +969,9 @@ def test_push_unsafe_options(self, rw_repo): tmp_dir = Path(tdir) tmp_file = tmp_dir / "pwn" unsafe_options = [ - { - "receive-pack": f"touch {tmp_file}", - "exec": f"touch {tmp_file}", - } + {"receive-pack": f"touch {tmp_file}"}, + {"receive_pack": f"touch {tmp_file}"}, + {"exec": f"touch {tmp_file}"}, ] for unsafe_option in unsafe_options: assert not tmp_file.exists() @@ -988,10 +995,9 @@ def test_push_unsafe_options_allowed(self, rw_repo): tmp_dir = Path(tdir) tmp_file = tmp_dir / "pwn" unsafe_options = [ - { - "receive-pack": f"touch {tmp_file}", - "exec": f"touch {tmp_file}", - } + {"receive-pack": f"touch {tmp_file}"}, + {"receive_pack": f"touch {tmp_file}"}, + {"exec": f"touch {tmp_file}"}, ] for unsafe_option in unsafe_options: # The options will be allowed, but the command will fail. diff --git a/test/test_repo.py b/test/test_repo.py index e38da5bb6..d2dd1ea5d 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -14,7 +14,8 @@ import pickle import sys import tempfile -from unittest import mock, skip +from unittest import mock +from pathlib import Path import pytest @@ -36,11 +37,11 @@ Submodule, Tree, ) -from git.exc import BadObject, UnsafeOptionError, UnsafeProtocolError +from git.exc import BadObject from git.repo.fun import touch from git.util import bin_to_hex, cwd, cygpath, join_path_native, rmfile, rmtree -from test.lib import TestBase, fixture, with_rw_directory, with_rw_repo +from test.lib import TestBase, fixture, with_rw_directory, with_rw_repo, PathLikeMock def iter_flatten(lol): @@ -105,6 +106,11 @@ def test_repo_creation_pathlib(self, rw_repo): r_from_gitdir = Repo(pathlib.Path(rw_repo.git_dir)) self.assertEqual(r_from_gitdir.git_dir, rw_repo.git_dir) + @with_rw_repo("0.3.2.1") + def test_repo_creation_pathlike(self, rw_repo): + r_from_gitdir = Repo(PathLikeMock(rw_repo.git_dir)) + self.assertEqual(r_from_gitdir.git_dir, rw_repo.git_dir) + def test_description(self): txt = "Test repository" self.rorepo.description = txt @@ -140,6 +146,23 @@ def test_commit_from_revision(self): self.assertEqual(commit.type, "commit") self.assertEqual(self.rorepo.commit(commit), commit) + @with_rw_directory + def test_commit_from_tag_starting_with_at(self, rw_dir): + repo = Repo.init(rw_dir) + with repo.config_writer() as writer: + writer.set_value("user", "name", "GitPython Tests") + writer.set_value("user", "email", "gitpython@example.com") + + tracked_file = Path(rw_dir) / "hello.txt" + tracked_file.write_text("hello") + repo.index.add([str(tracked_file)]) + commit = repo.index.commit("init") + repo.create_tag("@foo") + + self.assertEqual(repo.tags["@foo"].commit, commit) + self.assertEqual(repo.commit("@"), commit) + self.assertEqual(repo.commit("@foo"), commit) + def test_commits(self): mc = 10 commits = list(self.rorepo.iter_commits("0.1.6", max_count=mc)) @@ -214,285 +237,6 @@ def test_date_format(self, rw_dir): # @-timestamp is the format used by git commit hooks. repo.index.commit("Commit messages", commit_date="@1400000000 +0000") - @with_rw_directory - def test_clone_from_pathlib(self, rw_dir): - original_repo = Repo.init(osp.join(rw_dir, "repo")) - - Repo.clone_from(original_repo.git_dir, pathlib.Path(rw_dir) / "clone_pathlib") - - @with_rw_directory - def test_clone_from_pathlib_withConfig(self, rw_dir): - original_repo = Repo.init(osp.join(rw_dir, "repo")) - - cloned = Repo.clone_from( - original_repo.git_dir, - pathlib.Path(rw_dir) / "clone_pathlib_withConfig", - multi_options=[ - "--recurse-submodules=repo", - "--config core.filemode=false", - "--config submodule.repo.update=checkout", - "--config filter.lfs.clean='git-lfs clean -- %f'", - ], - allow_unsafe_options=True, - ) - - self.assertEqual(cloned.config_reader().get_value("submodule", "active"), "repo") - self.assertEqual(cloned.config_reader().get_value("core", "filemode"), False) - self.assertEqual(cloned.config_reader().get_value('submodule "repo"', "update"), "checkout") - self.assertEqual( - cloned.config_reader().get_value('filter "lfs"', "clean"), - "git-lfs clean -- %f", - ) - - def test_clone_from_with_path_contains_unicode(self): - with tempfile.TemporaryDirectory() as tmpdir: - unicode_dir_name = "\u0394" - path_with_unicode = os.path.join(tmpdir, unicode_dir_name) - os.makedirs(path_with_unicode) - - try: - Repo.clone_from( - url=self._small_repo_url(), - to_path=path_with_unicode, - ) - except UnicodeEncodeError: - self.fail("Raised UnicodeEncodeError") - - @with_rw_directory - @skip( - """The referenced repository was removed, and one needs to set up a new - password controlled repo under the org's control.""" - ) - def test_leaking_password_in_clone_logs(self, rw_dir): - password = "fakepassword1234" - try: - Repo.clone_from( - url="https://fakeuser:{}@fakerepo.example.com/testrepo".format(password), - to_path=rw_dir, - ) - except GitCommandError as err: - assert password not in str(err), "The error message '%s' should not contain the password" % err - # Working example from a blank private project. - Repo.clone_from( - url="https://gitlab+deploy-token-392045:mLWhVus7bjLsy8xj8q2V@gitlab.com/mercierm/test_git_python", - to_path=rw_dir, - ) - - @with_rw_repo("HEAD") - def test_clone_unsafe_options(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - unsafe_options = [ - f"--upload-pack='touch {tmp_file}'", - f"-u 'touch {tmp_file}'", - "--config=protocol.ext.allow=always", - "-c protocol.ext.allow=always", - ] - for unsafe_option in unsafe_options: - with self.assertRaises(UnsafeOptionError): - rw_repo.clone(tmp_dir, multi_options=[unsafe_option]) - assert not tmp_file.exists() - - unsafe_options = [ - {"upload-pack": f"touch {tmp_file}"}, - {"u": f"touch {tmp_file}"}, - {"config": "protocol.ext.allow=always"}, - {"c": "protocol.ext.allow=always"}, - ] - for unsafe_option in unsafe_options: - with self.assertRaises(UnsafeOptionError): - rw_repo.clone(tmp_dir, **unsafe_option) - assert not tmp_file.exists() - - @pytest.mark.xfail( - sys.platform == "win32", - reason=( - "File not created. A separate Windows command may be needed. This and the " - "currently passing test test_clone_unsafe_options must be adjusted in the " - "same way. Until then, test_clone_unsafe_options is unreliable on Windows." - ), - raises=AssertionError, - ) - @with_rw_repo("HEAD") - def test_clone_unsafe_options_allowed(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - unsafe_options = [ - f"--upload-pack='touch {tmp_file}'", - f"-u 'touch {tmp_file}'", - ] - for i, unsafe_option in enumerate(unsafe_options): - destination = tmp_dir / str(i) - assert not tmp_file.exists() - # The options will be allowed, but the command will fail. - with self.assertRaises(GitCommandError): - rw_repo.clone(destination, multi_options=[unsafe_option], allow_unsafe_options=True) - assert tmp_file.exists() - tmp_file.unlink() - - unsafe_options = [ - "--config=protocol.ext.allow=always", - "-c protocol.ext.allow=always", - ] - for i, unsafe_option in enumerate(unsafe_options): - destination = tmp_dir / str(i) - assert not destination.exists() - rw_repo.clone(destination, multi_options=[unsafe_option], allow_unsafe_options=True) - assert destination.exists() - - @with_rw_repo("HEAD") - def test_clone_safe_options(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - options = [ - "--depth=1", - "--single-branch", - "-q", - ] - for option in options: - destination = tmp_dir / option - assert not destination.exists() - rw_repo.clone(destination, multi_options=[option]) - assert destination.exists() - - @with_rw_repo("HEAD") - def test_clone_from_unsafe_options(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - unsafe_options = [ - f"--upload-pack='touch {tmp_file}'", - f"-u 'touch {tmp_file}'", - "--config=protocol.ext.allow=always", - "-c protocol.ext.allow=always", - ] - for unsafe_option in unsafe_options: - with self.assertRaises(UnsafeOptionError): - Repo.clone_from(rw_repo.working_dir, tmp_dir, multi_options=[unsafe_option]) - assert not tmp_file.exists() - - unsafe_options = [ - {"upload-pack": f"touch {tmp_file}"}, - {"u": f"touch {tmp_file}"}, - {"config": "protocol.ext.allow=always"}, - {"c": "protocol.ext.allow=always"}, - ] - for unsafe_option in unsafe_options: - with self.assertRaises(UnsafeOptionError): - Repo.clone_from(rw_repo.working_dir, tmp_dir, **unsafe_option) - assert not tmp_file.exists() - - @pytest.mark.xfail( - sys.platform == "win32", - reason=( - "File not created. A separate Windows command may be needed. This and the " - "currently passing test test_clone_from_unsafe_options must be adjusted in the " - "same way. Until then, test_clone_from_unsafe_options is unreliable on Windows." - ), - raises=AssertionError, - ) - @with_rw_repo("HEAD") - def test_clone_from_unsafe_options_allowed(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - unsafe_options = [ - f"--upload-pack='touch {tmp_file}'", - f"-u 'touch {tmp_file}'", - ] - for i, unsafe_option in enumerate(unsafe_options): - destination = tmp_dir / str(i) - assert not tmp_file.exists() - # The options will be allowed, but the command will fail. - with self.assertRaises(GitCommandError): - Repo.clone_from( - rw_repo.working_dir, destination, multi_options=[unsafe_option], allow_unsafe_options=True - ) - assert tmp_file.exists() - tmp_file.unlink() - - unsafe_options = [ - "--config=protocol.ext.allow=always", - "-c protocol.ext.allow=always", - ] - for i, unsafe_option in enumerate(unsafe_options): - destination = tmp_dir / str(i) - assert not destination.exists() - Repo.clone_from( - rw_repo.working_dir, destination, multi_options=[unsafe_option], allow_unsafe_options=True - ) - assert destination.exists() - - @with_rw_repo("HEAD") - def test_clone_from_safe_options(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - options = [ - "--depth=1", - "--single-branch", - "-q", - ] - for option in options: - destination = tmp_dir / option - assert not destination.exists() - Repo.clone_from(rw_repo.common_dir, destination, multi_options=[option]) - assert destination.exists() - - def test_clone_from_unsafe_protocol(self): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - urls = [ - f"ext::sh -c touch% {tmp_file}", - "fd::17/foo", - ] - for url in urls: - with self.assertRaises(UnsafeProtocolError): - Repo.clone_from(url, tmp_dir / "repo") - assert not tmp_file.exists() - - def test_clone_from_unsafe_protocol_allowed(self): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - urls = [ - f"ext::sh -c touch% {tmp_file}", - "fd::/foo", - ] - for url in urls: - # The URL will be allowed into the command, but the command will - # fail since we don't have that protocol enabled in the Git config file. - with self.assertRaises(GitCommandError): - Repo.clone_from(url, tmp_dir / "repo", allow_unsafe_protocols=True) - assert not tmp_file.exists() - - def test_clone_from_unsafe_protocol_allowed_and_enabled(self): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - urls = [ - f"ext::sh -c touch% {tmp_file}", - ] - allow_ext = [ - "--config=protocol.ext.allow=always", - ] - for url in urls: - # The URL will be allowed into the command, and the protocol is enabled, - # but the command will fail since it can't read from the remote repo. - assert not tmp_file.exists() - with self.assertRaises(GitCommandError): - Repo.clone_from( - url, - tmp_dir / "repo", - multi_options=allow_ext, - allow_unsafe_protocols=True, - allow_unsafe_options=True, - ) - assert tmp_file.exists() - tmp_file.unlink() - @with_rw_repo("HEAD") def test_max_chunk_size(self, repo): class TestOutputStream(TestBase): @@ -643,6 +387,15 @@ def test_is_dirty_with_path(self, rwrepo): assert rwrepo.is_dirty(path="doc") is False assert rwrepo.is_dirty(untracked_files=True, path="doc") is True + @with_rw_repo("HEAD") + def test_is_dirty_with_pathlib_and_pathlike(self, rwrepo): + with open(osp.join(rwrepo.working_dir, "git", "util.py"), "at") as f: + f.write("junk") + assert rwrepo.is_dirty(path=Path("git")) is True + assert rwrepo.is_dirty(path=PathLikeMock("git")) is True + assert rwrepo.is_dirty(path=Path("doc")) is False + assert rwrepo.is_dirty(path=PathLikeMock("doc")) is False + def test_head(self): self.assertEqual(self.rorepo.head.reference.object, self.rorepo.active_branch.object) @@ -1064,9 +817,9 @@ def test_rev_parse(self): # TODO: Dereference tag into a blob 0.1.7^{blob} - quite a special one. # Needs a tag which points to a blob. - # ref^0 returns commit being pointed to, same with ref~0, and ^{} + # ref^0 returns commit being pointed to, same with ref~0, ^{}, and ^{commit} tag = rev_parse("0.1.4") - for token in ("~0", "^0", "^{}"): + for token in ("~0", "^0", "^{}", "^{commit}"): self.assertEqual(tag.object, rev_parse("0.1.4%s" % token)) # END handle multiple tokens @@ -1112,18 +865,18 @@ def test_rev_parse(self): # Currently, nothing more is supported. self.assertRaises(NotImplementedError, rev_parse, "@{1 week ago}") - # The last position. - assert rev_parse("@{1}") != head.commit + # The previous position, if this checkout has enough reflog history. + try: + previous = rev_parse("@{1}") + except IndexError: + pass + else: + self.assertNotEqual(previous, head.commit) def test_repo_odbtype(self): target_type = GitCmdObjectDB self.assertIsInstance(self.rorepo.odb, target_type) - @pytest.mark.xfail( - sys.platform == "cygwin", - reason="Cygwin GitPython can't find submodule SHA", - raises=ValueError, - ) def test_submodules(self): self.assertEqual(len(self.rorepo.submodules), 1) # non-recursive self.assertGreaterEqual(len(list(self.rorepo.iter_submodules())), 2) @@ -1226,6 +979,46 @@ def test_empty_repo(self, rw_dir): assert "BAD MESSAGE" not in contents, "log is corrupt" + @with_rw_directory + def test_active_branch_raises_value_error_when_head_ref_is_invalid(self, rw_dir): + repo = Repo.init(rw_dir) + with open(osp.join(rw_dir, ".git", "HEAD"), "w") as f: + f.write("ref: refs/heads/.invalid\n") + + self.assertRaisesRegex( + ValueError, + r"refs/heads/\.invalid.*older clients", + lambda: repo.active_branch, + ) + + @with_rw_directory + def test_empty_repo_reftable_active_branch(self, rw_dir): + git = Git(rw_dir) + try: + git.init(ref_format="reftable") + except GitCommandError as err: + if err.status == 129: + pytest.skip("git init --ref-format is not supported by this git version") + raise + + repo = Repo(rw_dir) + self.assertEqual(repo.head.reference.name, ".invalid") + self.assertRaisesRegex( + ValueError, + r"refs/heads/\.invalid.*older clients", + lambda: repo.active_branch, + ) + + @with_rw_directory + def test_active_branch_raises_type_error_when_head_is_detached(self, rw_dir): + repo = Repo.init(rw_dir) + with open(osp.join(rw_dir, "a.txt"), "w") as f: + f.write("a") + repo.index.add(["a.txt"]) + repo.index.commit("initial commit") + repo.git.checkout(repo.head.commit.hexsha) + self.assertRaisesRegex(TypeError, "detached symbolic reference", lambda: repo.active_branch) + def test_merge_base(self): repo = self.rorepo c1 = "f6aa8d1" @@ -1296,7 +1089,7 @@ def test_is_valid_object(self): self.assertFalse(repo.is_valid_object(tag_sha, "commit")) @with_rw_directory - def test_git_work_tree_dotgit(self, rw_dir): + def test_git_work_tree_dotgit(self, rw_dir, use_relative_paths=False): """Check that we find .git as a worktree file and find the worktree based on it.""" git = Git(rw_dir) @@ -1308,7 +1101,11 @@ def test_git_work_tree_dotgit(self, rw_dir): worktree_path = join_path_native(rw_dir, "worktree_repo") if Git.is_cygwin(): worktree_path = cygpath(worktree_path) - rw_master.git.worktree("add", worktree_path, branch.name) + wt_add_kwargs = {"insert_kwargs_after": "add"} + # relative worktree paths introduced in git 2.48.0 + if use_relative_paths and git.version_info[:3] >= (2, 48, 0): + wt_add_kwargs["relative_paths"] = True + rw_master.git.worktree("add", worktree_path, branch.name, **wt_add_kwargs) # This ensures that we can read the repo's gitdir correctly. repo = Repo(worktree_path) @@ -1326,6 +1123,15 @@ def test_git_work_tree_dotgit(self, rw_dir): self.assertIsInstance(repo.heads["aaaaaaaa"], Head) + def test_git_work_tree_dotgit_relative(self): + """Check that we find .git as a worktree file containing a relative path + and find the worktree based on it.""" + if Git().version_info[:3] < (2, 48, 0): + pytest.skip("relative worktree feature unsupported, needs git 2.48.0 or later") + # this class inherits from TestCase so we can't use pytest.mark.parametrize on + # test_git_work_tree_dotgit; delegate instead + self.test_git_work_tree_dotgit(use_relative_paths=True) + @with_rw_directory def test_git_work_tree_env(self, rw_dir): """Check that we yield to GIT_WORK_TREE.""" @@ -1350,6 +1156,42 @@ def test_git_work_tree_env(self, rw_dir): self.assertEqual(r.working_tree_dir, repo_dir) self.assertEqual(r.working_dir, repo_dir) + @with_rw_directory + def test_git_work_tree_env_in_linked_worktree(self, rw_dir): + """Check that Repo() autodiscovers a linked worktree when GIT_DIR is set.""" + git = Git(rw_dir) + if git.version_info[:3] < (2, 5, 1): + raise RuntimeError("worktree feature unsupported (test needs git 2.5.1 or later)") + + rw_master = self.rorepo.clone(join_path_native(rw_dir, "master_repo")) + branch = rw_master.create_head("bbbbbbbb") + worktree_path = join_path_native(rw_dir, "worktree_repo") + if Git.is_cygwin(): + worktree_path = cygpath(worktree_path) + + rw_master.git.worktree("add", worktree_path, branch.name) + + git_dir = Git(worktree_path).rev_parse("--git-dir") + + patched_env = dict(os.environ) + patched_env["GIT_DIR"] = git_dir + patched_env.pop("GIT_WORK_TREE", None) + patched_env.pop("GIT_COMMON_DIR", None) + + with mock.patch.dict(os.environ, patched_env, clear=True): + old_cwd = os.getcwd() + try: + os.chdir(worktree_path) + + explicit = Repo(os.getcwd()) + autodiscovered = Repo() + + self.assertTrue(osp.samefile(explicit.working_tree_dir, worktree_path)) + self.assertTrue(osp.samefile(autodiscovered.working_tree_dir, worktree_path)) + self.assertTrue(osp.samefile(autodiscovered.working_tree_dir, explicit.working_tree_dir)) + finally: + os.chdir(old_cwd) + @with_rw_directory def test_rebasing(self, rw_dir): r = Repo.init(rw_dir) diff --git a/test/test_rev_parse.py b/test/test_rev_parse.py new file mode 100644 index 000000000..b00347668 --- /dev/null +++ b/test/test_rev_parse.py @@ -0,0 +1,159 @@ +# Copyright (C) 2026 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +from pathlib import Path + +import pytest + +from git import Repo +from git.refs import RemoteReference +from git.refs import SymbolicReference +from gitdb.exc import BadName + + +def _write(repo, path, content): + full_path = Path(repo.working_tree_dir) / path + full_path.parent.mkdir(parents=True, exist_ok=True) + full_path.write_text(content) + repo.index.add([str(full_path)]) + + +@pytest.fixture +def rev_parse_repo(tmp_path): + repo = Repo.init(tmp_path) + with repo.config_writer() as writer: + writer.set_value("user", "name", "GitPython Tests") + writer.set_value("user", "email", "gitpython@example.com") + + _write(repo, "README.md", "root\n") + _write(repo, "CHANGES", "root changes\n") + _write(repo, "dir/file.txt", "root file\n") + root = repo.index.commit("root commit") + repo.create_tag("ann", ref=root, message="annotated tag") + + _write(repo, "README.md", "release\n") + release = repo.index.commit("release candidate") + repo.create_tag("v1.0", ref=release) + main = repo.active_branch + + _write(repo, "side.txt", "side\n") + side_commit = repo.index.commit("side branch", parent_commits=[root], head=False, skip_hooks=True) + repo.create_head("side", side_commit) + + merge = repo.index.commit("merge side", parent_commits=[release, side_commit], skip_hooks=True) + repo.head.log_append(side_commit.binsha, "checkout: moving from side to main", merge.binsha) + + repo.create_head("aaaaaaaa", merge) + repo.create_tag("@foo", ref=merge) + + return { + "repo": repo, + "root": root, + "release": release, + "side": side_commit, + "merge": merge, + "main": main, + } + + +def test_rev_parse_names_hex_and_describe_forms(rev_parse_repo): + repo = rev_parse_repo["repo"] + release = rev_parse_repo["release"] + merge = rev_parse_repo["merge"] + + assert repo.rev_parse("@") == merge + assert repo.rev_parse("@foo") == merge + assert repo.rev_parse("aaaaaaaa") == merge + assert repo.rev_parse(merge.hexsha[:7]) == merge + describe_name = "anything-9-g%s" % merge.hexsha[:7] + assert repo.rev_parse("v1.0-1-g%s" % merge.hexsha[:7]) == merge + assert repo.rev_parse(describe_name) == merge + assert repo.rev_parse("%s-dirty" % merge.hexsha[:7]) == merge + + repo.create_tag(describe_name, ref=release) + assert repo.rev_parse(describe_name) == release + + +def test_rev_parse_navigation_and_peeling(rev_parse_repo): + repo = rev_parse_repo["repo"] + root = rev_parse_repo["root"] + release = rev_parse_repo["release"] + side = rev_parse_repo["side"] + merge = rev_parse_repo["merge"] + tag = repo.rev_parse("ann") + + assert repo.rev_parse("HEAD^0") == merge + assert repo.rev_parse("HEAD~0") == merge + assert repo.rev_parse("HEAD^1") == release + assert repo.rev_parse("HEAD^2") == side + assert repo.rev_parse("HEAD~") == release + assert repo.rev_parse("HEAD^^") == root + + assert tag.type == "tag" + assert repo.rev_parse("ann^{object}") == tag + assert repo.rev_parse("ann^{tag}") == tag + assert repo.rev_parse("ann^{}") == root + assert repo.rev_parse("ann^{commit}") == root + assert repo.rev_parse("HEAD^{tree}") == merge.tree + with pytest.raises(ValueError): + repo.rev_parse("HEAD^{/}") + + +def test_rev_parse_tree_and_index_paths(rev_parse_repo): + repo = rev_parse_repo["repo"] + merge = rev_parse_repo["merge"] + + assert repo.rev_parse("HEAD:") == merge.tree + assert repo.rev_parse("HEAD:README.md") == merge.tree["README.md"] + assert repo.rev_parse("HEAD^{tree}:README.md") == merge.tree["README.md"] + assert repo.rev_parse(":README.md").binsha == merge.tree["README.md"].binsha + assert repo.rev_parse(":0:README.md").binsha == merge.tree["README.md"].binsha + + +def test_rev_parse_reflog_selectors(rev_parse_repo): + repo = rev_parse_repo["repo"] + merge = rev_parse_repo["merge"] + side = rev_parse_repo["side"] + main = rev_parse_repo["main"] + release = rev_parse_repo["release"] + + assert repo.rev_parse("@{0}") == merge + assert repo.rev_parse("@{+0}") == merge + assert repo.rev_parse("@{1}") == release + assert repo.rev_parse("%s@{0}" % main.name) == merge + assert repo.rev_parse("@{-1}") == side + + SymbolicReference.create(repo, "refs/remotes/origin/%s" % main.name, merge) + main.set_tracking_branch(RemoteReference(repo, "refs/remotes/origin/%s" % main.name)) + assert repo.rev_parse("%s@{upstream}" % main.name) == merge + + +def test_rev_parse_commit_message_search(rev_parse_repo): + repo = rev_parse_repo["repo"] + release = rev_parse_repo["release"] + merge = rev_parse_repo["merge"] + + assert repo.rev_parse(":/release") == release + assert repo.rev_parse("HEAD^{/release}") == release + assert repo.rev_parse("HEAD^{/!-release}") == merge + + +def test_rev_parse_rejects_invalid_object_specs(rev_parse_repo): + repo = rev_parse_repo["repo"] + + with pytest.raises(ValueError): + repo.rev_parse(":") + with pytest.raises(ValueError): + repo.rev_parse(":/") + with pytest.raises(ValueError): + repo.rev_parse(":/[") + with pytest.raises(ValueError): + repo.rev_parse("HEAD^{/[}") + with pytest.raises(ValueError): + repo.rev_parse("@{-0}") + with pytest.raises(ValueError): + repo.rev_parse("HEAD^{invalid}") + with pytest.raises(BadName): + repo.rev_parse(":missing") diff --git a/test/test_stats.py b/test/test_stats.py index eec73c802..91d2cf6ae 100644 --- a/test/test_stats.py +++ b/test/test_stats.py @@ -14,13 +14,19 @@ def test_list_from_string(self): output = fixture("diff_numstat").decode(defenc) stats = Stats._list_from_string(self.rorepo, output) - self.assertEqual(2, stats.total["files"]) - self.assertEqual(52, stats.total["lines"]) - self.assertEqual(29, stats.total["insertions"]) + self.assertEqual(3, stats.total["files"]) + self.assertEqual(59, stats.total["lines"]) + self.assertEqual(36, stats.total["insertions"]) self.assertEqual(23, stats.total["deletions"]) self.assertEqual(29, stats.files["a.txt"]["insertions"]) self.assertEqual(18, stats.files["a.txt"]["deletions"]) + self.assertEqual("M", stats.files["a.txt"]["change_type"]) self.assertEqual(0, stats.files["b.txt"]["insertions"]) self.assertEqual(5, stats.files["b.txt"]["deletions"]) + self.assertEqual("M", stats.files["b.txt"]["change_type"]) + + self.assertEqual(7, stats.files["c.txt"]["insertions"]) + self.assertEqual(0, stats.files["c.txt"]["deletions"]) + self.assertEqual("A", stats.files["c.txt"]["change_type"]) diff --git a/test/test_submodule.py b/test/test_submodule.py index d88f9dab0..778d22e3f 100644 --- a/test/test_submodule.py +++ b/test/test_submodule.py @@ -28,7 +28,7 @@ from git.repo.fun import find_submodule_git_dir, touch from git.util import HIDE_WINDOWS_KNOWN_ERRORS, join_path_native, to_native_path_linux -from test.lib import TestBase, with_rw_directory, with_rw_repo +from test.lib import TestBase, with_rw_directory, with_rw_repo, PathLikeMock @contextlib.contextmanager @@ -58,6 +58,7 @@ def update(self, op, cur_count, max_count, message=""): print(op, cur_count, max_count, message) +TestRootProgress.__test__ = False prog = TestRootProgress() @@ -174,6 +175,10 @@ def _do_base_tests(self, rwrepo): sma = Submodule.add(rwrepo, sm.name, sm.path) assert sma.path == sm.path + # Adding existing as pathlike + sma = Submodule.add(rwrepo, sm.name, PathLikeMock(sm.path)) + assert sma.path == sm.path + # No url and no module at path fails. self.assertRaises(ValueError, Submodule.add, rwrepo, "newsubm", "pathtorepo", url=None) @@ -475,11 +480,6 @@ def test_base_rw(self, rwrepo): def test_base_bare(self, rwrepo): self._do_base_tests(rwrepo) - @pytest.mark.xfail( - sys.platform == "cygwin", - reason="Cygwin GitPython can't find submodule SHA", - raises=ValueError, - ) @pytest.mark.xfail( HIDE_WINDOWS_KNOWN_ERRORS, reason=( @@ -508,9 +508,9 @@ def test_root_module(self, rwrepo): with rm.config_writer(): pass - # Deep traversal gitdb / async. + # Deep traversal yields gitdb and its nested smmap. rsmsp = [sm.path for sm in rm.traverse()] - assert len(rsmsp) >= 2 # gitdb and async [and smmap], async being a child of gitdb. + assert rsmsp == ["git/ext/gitdb", "gitdb/ext/smmap"] # Cannot set the parent commit as root module's path didn't exist. self.assertRaises(ValueError, rm.set_parent_commit, "HEAD") @@ -753,6 +753,22 @@ def test_add_empty_repo(self, rwdir): ) # END for each checkout mode + @with_rw_directory + @_patch_git_config("protocol.file.allow", "always") + def test_update_submodule_with_relative_path(self, rwdir): + repo_path = osp.join(rwdir, "parent") + repo = git.Repo.init(repo_path) + module_repo_path = osp.join(rwdir, "module") + module_repo = git.Repo.init(module_repo_path) + module_repo.git.commit(m="test", allow_empty=True) + repo.git.submodule("add", "../module", "module") + repo.index.commit("add submodule") + + cloned_repo_path = osp.join(rwdir, "cloned_repo") + cloned_repo = git.Repo.clone_from(repo_path, cloned_repo_path) + + cloned_repo.submodule_update(init=True, recursive=True) + @with_rw_directory @_patch_git_config("protocol.file.allow", "always") def test_list_only_valid_submodules(self, rwdir): @@ -916,7 +932,7 @@ def assert_exists(sm, value=True): csm.repo.index.commit("Have to commit submodule change for algorithm to pick it up") assert csm.url == "bar" - self.assertRaises( + self.assertRaises( # noqa: B017 Exception, rsm.update, recursive=True, @@ -990,6 +1006,7 @@ def test_rename(self, rwdir): # garbage collector detailed in https://github.com/python/cpython/issues/97922.) if sys.platform == "win32" and sys.version_info >= (3, 12): gc.collect() + gc.collect() # Some finalizer scenarios need two collections, at least in theory. new_path = "renamed/myname" assert sm.move(new_path).name == new_path @@ -1310,6 +1327,17 @@ def test_submodule_update_unsafe_options(self, rw_repo): submodule.update(clone_multi_options=[unsafe_option]) assert not tmp_file.exists() + @with_rw_repo("HEAD") + def test_submodule_update_unsafe_options_are_checked_after_splitting_multi_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = Path(tdir) + payload = "--single-branch --config protocol.ext.allow=always" + submodule = Submodule(rw_repo, b"\0" * 20, name="new", path="new", url=str(tmp_dir)) + + with self.assertRaises(UnsafeOptionError): + submodule.update(clone_multi_options=[payload]) + assert not submodule.module_exists() + @with_rw_repo("HEAD") def test_submodule_update_unsafe_options_allowed(self, rw_repo): with tempfile.TemporaryDirectory() as tdir: @@ -1334,3 +1362,23 @@ def test_submodule_update_unsafe_options_allowed(self, rw_repo): for unsafe_option in unsafe_options: with self.assertRaises(GitCommandError): submodule.update(clone_multi_options=[unsafe_option], allow_unsafe_options=True) + + @with_rw_directory + @_patch_git_config("protocol.file.allow", "always") + def test_submodule_update_relative_url(self, rwdir): + parent_path = osp.join(rwdir, "parent") + parent_repo = git.Repo.init(parent_path) + submodule_path = osp.join(rwdir, "module") + submodule_repo = git.Repo.init(submodule_path) + submodule_repo.git.commit(m="initial commit", allow_empty=True) + + parent_repo.git.submodule("add", "../module", "module") + parent_repo.index.commit("add submodule with relative URL") + + cloned_path = osp.join(rwdir, "cloned_repo") + cloned_repo = git.Repo.clone_from(parent_path, cloned_path) + + cloned_repo.submodule_update(init=True, recursive=True) + + has_module = any(sm.name == "module" for sm in cloned_repo.submodules) + assert has_module, "Relative submodule was not updated properly" diff --git a/test/test_tree.py b/test/test_tree.py index 73158113d..629fd4d32 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -8,10 +8,14 @@ from pathlib import Path import subprocess +import pytest + from git.objects import Blob, Tree +from git.repo import Repo from git.util import cwd from test.lib import TestBase, with_rw_directory +from .lib.helper import PathLikeMock, with_rw_repo class TestTree(TestBase): @@ -126,12 +130,18 @@ def test_traverse(self): assert len(list(root)) == len(list(root.traverse(depth=1))) # Only choose trees. - trees_only = lambda i, d: i.type == "tree" + + def trees_only(i, _d): + return i.type == "tree" + trees = list(root.traverse(predicate=trees_only)) assert len(trees) == len([i for i in root.traverse() if trees_only(i, 0)]) # Test prune. - lib_folder = lambda t, d: t.path == "lib" + + def lib_folder(t, _d): + return t.path == "lib" + pruned_trees = list(root.traverse(predicate=trees_only, prune=lib_folder)) assert len(pruned_trees) < len(trees) @@ -155,3 +165,57 @@ def test_traverse(self): assert root[item.path] == item == root / item.path # END for each item assert found_slash + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_string_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + blob = repo.tree() / ".gitignore" + assert isinstance(blob, Blob) + assert blob.hexsha == "787b3d442a113b78e343deb585ab5531eb7187fa" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_pathlike_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + blob = repo.tree() / PathLikeMock(".gitignore") + assert isinstance(blob, Blob) + assert blob.hexsha == "787b3d442a113b78e343deb585ab5531eb7187fa" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_invalid_string_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + with pytest.raises(KeyError): + repo.tree() / "doesnotexist" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_invalid_pathlike_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + with pytest.raises(KeyError): + repo.tree() / PathLikeMock("doesnotexist") + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_nested_string_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + blob = repo.tree() / "git/__init__.py" + assert isinstance(blob, Blob) + assert blob.hexsha == "d87dcbdbb65d2782e14eea27e7f833a209c052f3" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_nested_pathlike_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + blob = repo.tree() / PathLikeMock("git/__init__.py") + assert isinstance(blob, Blob) + assert blob.hexsha == "d87dcbdbb65d2782e14eea27e7f833a209c052f3" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_folder_string_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + tree = repo.tree() / "git" + assert isinstance(tree, Tree) + assert tree.hexsha == "ec8ae429156d65afde4bbb3455570193b56f0977" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_folder_pathlike_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + tree = repo.tree() / PathLikeMock("git") + assert isinstance(tree, Tree) + assert tree.hexsha == "ec8ae429156d65afde4bbb3455570193b56f0977" diff --git a/test/test_util.py b/test/test_util.py index dad2f3dcd..e7453769a 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -34,6 +34,7 @@ LockFile, cygpath, decygpath, + is_cygwin_git, get_user_id, remove_password_if_present, rmtree, @@ -112,7 +113,7 @@ def test_deletes_dir_with_readonly_files(self, tmp_path): sys.platform == "cygwin", reason="Cygwin can't set the permissions that make the test meaningful.", ) - def test_avoids_changing_permissions_outside_tree(self, tmp_path): + def test_avoids_changing_permissions_outside_tree(self, tmp_path, request): # Automatically works on Windows, but on Unix requires either special handling # or refraining from attempting to fix PermissionError by making chmod calls. @@ -124,9 +125,32 @@ def test_avoids_changing_permissions_outside_tree(self, tmp_path): dir2 = tmp_path / "dir2" dir2.mkdir() - (dir2 / "symlink").symlink_to(dir1 / "file") + symlink = dir2 / "symlink" + symlink.symlink_to(dir1 / "file") dir2.chmod(stat.S_IRUSR | stat.S_IXUSR) + def preen_dir2(): + """Don't leave unwritable directories behind. + + pytest has difficulties cleaning up after the fact on some platforms, + e.g., macOS, and whines incessantly until the issue is resolved--regardless + of the pytest session. + """ + rwx = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR + if not dir2.exists(): + return + if symlink.exists(): + try: + # Try lchmod first, if the platform supports it. + symlink.lchmod(rwx) + except NotImplementedError: + # The platform (probably win32) doesn't support lchmod; fall back to chmod. + symlink.chmod(rwx) + dir2.chmod(rwx) + rmtree(dir2) + + request.addfinalizer(preen_dir2) + try: rmtree(dir2) except PermissionError: @@ -349,6 +373,24 @@ def test_decygpath(self, wpath, cpath): assert wcpath == wpath.replace("/", "\\"), cpath +class TestIsCygwinGit: + """Tests for :func:`is_cygwin_git`""" + + def test_on_path_executable(self): + # Currently we assume tests run on Cygwin use Cygwin git. See #533 and #1455 for background. + if sys.platform == "cygwin": + assert is_cygwin_git("git") + else: + assert not is_cygwin_git("git") + + def test_none_executable(self): + assert not is_cygwin_git(None) + + def test_with_missing_uname(self): + """Test for handling when `uname` isn't in the same directory as `git`""" + assert not is_cygwin_git("/bogus_path/git") + + class _Member: """A member of an IterableList."""