diff --git a/.github/workflows/gem-push.yml b/.github/workflows/gem-push.yml new file mode 100644 index 0000000..d7c4d72 --- /dev/null +++ b/.github/workflows/gem-push.yml @@ -0,0 +1,37 @@ +name: Ruby Gem + +on: + push: + tags: + - '*' + workflow_dispatch: + +jobs: + build: + name: Build + Publish + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - uses: actions/checkout@v6 + - name: Set up Ruby 3.4 + uses: ruby/setup-ruby@v1 + with: + ruby-version: '3.4' + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + + - name: Generate man page + run: bundle exec rake docs + + - name: Publish to RubyGems + run: | + mkdir -p $HOME/.gem + touch $HOME/.gem/credentials + chmod 0600 $HOME/.gem/credentials + printf -- "---\n:rubygems_api_key: ${GEM_HOST_API_KEY}\n" > $HOME/.gem/credentials + gem build *.gemspec + gem push *.gem + env: + GEM_HOST_API_KEY: "${{secrets.RUBYGEMS_AUTH_TOKEN}}" diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml new file mode 100644 index 0000000..f0cba33 --- /dev/null +++ b/.github/workflows/ruby.yml @@ -0,0 +1,50 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake +# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby + +name: Ruby + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + ruby-version: ['3.2', '3.3', '3.4', '4.0.1'] + steps: + - uses: actions/checkout@v6 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby-version }} + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + - name: Gem Build + run: gem build pathspec.gemspec + - name: Gem Install + run: gem install ./pathspec*.gem + + test: + runs-on: ubuntu-latest + strategy: + matrix: + ruby-version: ['3.2', '3.3', '3.4', '4.0.1'] + raketasks: ['rubocop', 'spec', 'spec_integration', 'docs'] + steps: + - uses: actions/checkout@v6 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby-version }} + bundler-cache: true # runs 'bundle install' and caches installed gems automatically + - name: Rake Tasks + env: + TASK: ${{matrix.raketasks}} + run: bundle exec rake $TASK diff --git a/.mise.toml b/.mise.toml new file mode 100644 index 0000000..1b8df9a --- /dev/null +++ b/.mise.toml @@ -0,0 +1,39 @@ +[tools] +ruby = "3.4.1" +"gem:bundler" = "4.0.4" + +[tasks.install] +run = "bundle install" +description = "Install gem dependencies" + +[tasks.test] +run = "bundle exec rake" +description = "Run rubocop, unit tests, integration tests, and docs" + +[tasks."test:unit"] +run = "bundle exec rake spec" +description = "Run unit tests only" + +[tasks."test:integration"] +run = "bundle exec rake spec_integration" +description = "Run integration tests only" + +[tasks."test:all"] +run = "bundle exec rake spec_all" +description = "Run all tests (unit + integration)" + +[tasks."test:matrix"] +run = "bundle exec rake test_matrix" +description = "Run tests across all Ruby versions in Docker" + +[tasks.build] +run = "gem build pathspec.gemspec" +description = "Build the gem" + +[tasks.benchmark] +run = "bundle exec rake benchmark" +description = "Run performance benchmarks (not included in CI)" + +[env] +# Ensure bundler uses the project's vendor/bundle directory +_.file = ".env" diff --git a/.rubocop.yml b/.rubocop.yml index 28dc231..47260ef 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,7 +1,16 @@ inherit_from: .rubocop_todo.yml +AllCops: + TargetRubyVersion: 3.2 + NewCops: enable + Style/NumericPredicate: Enabled: false Layout/ClosingHeredocIndentation: Enabled: false + +# This rule is silly +# https://github.com/rubocop/rubocop/issues/10080 +Lint/AmbiguousOperatorPrecedence: + Enabled: false diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 86655e2..40eed2d 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,27 +1,33 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2018-01-11 16:42:16 -0800 using RuboCop version 0.52.1. +# on 2024-04-08 19:36:04 UTC using RuboCop version 1.63.0. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new # versions of RuboCop, may require this file to be generated again. +# Offense count: 7 +# Configuration parameters: EnforcedStyle, AllowedGems, Include. +# SupportedStyles: Gemfile, gems.rb, gemspec +# Include: **/*.gemspec, **/Gemfile, **/gems.rb +Gemspec/DevelopmentDependencies: + Exclude: + - 'pathspec.gemspec' + # Offense count: 2 -# Cop supports --auto-correct. +# This cop supports safe autocorrection (--autocorrect). Layout/BlockEndNewline: Exclude: - 'spec/unit/pathspec_spec.rb' # Offense count: 6 -# Cop supports --auto-correct. -# Configuration parameters: EnforcedStyle. -# SupportedStyles: auto_detection, squiggly, active_support, powerpack, unindent -Layout/IndentHeredoc: +# This cop supports safe autocorrection (--autocorrect). +Layout/HeredocIndentation: Exclude: - 'spec/unit/pathspec_spec.rb' # Offense count: 8 -# Cop supports --auto-correct. +# This cop supports safe autocorrection (--autocorrect). # Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces, SpaceBeforeBlockParameters. # SupportedStyles: space, no_space # SupportedStylesForEmptyBraces: space, no_space @@ -30,39 +36,37 @@ Layout/SpaceInsideBlockBraces: - 'lib/pathspec.rb' - 'spec/unit/pathspec_spec.rb' -# Offense count: 2 -Lint/ImplicitStringConcatenation: - Exclude: - - 'lib/pathspec/gitignorespec.rb' - # Offense count: 1 -# Cop supports --auto-correct. -# Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods. +# This cop supports safe autocorrection (--autocorrect). +# Configuration parameters: AutoCorrect, AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods. Lint/UnusedMethodArgument: Exclude: - 'lib/pathspec/spec.rb' # Offense count: 3 +# This cop supports unsafe autocorrection (--autocorrect-all). +# Configuration parameters: AutoCorrect. Lint/UselessAssignment: Exclude: - 'spec/unit/pathspec_spec.rb' -# Offense count: 2 +# Offense count: 1 +# This cop supports safe autocorrection (--autocorrect). +# Configuration parameters: AutoCorrect, CheckForMethodsWithNoSideEffects. Lint/Void: Exclude: - 'lib/pathspec.rb' - - 'lib/pathspec/gitignorespec.rb' # Offense count: 3 +# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes. Metrics/AbcSize: - Enabled: false - Max: 62 + Max: 63 -# Offense count: 7 -# Configuration parameters: CountComments, ExcludedMethods. +# Offense count: 8 +# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns. +# AllowedMethods: refine Metrics/BlockLength: - Enabled: false - Max: 270 + Max: 300 # Offense count: 2 # Configuration parameters: CountBlocks. @@ -70,62 +74,40 @@ Metrics/BlockNesting: Max: 4 # Offense count: 1 -# Configuration parameters: CountComments. +# Configuration parameters: CountComments, CountAsOne. Metrics/ClassLength: - Max: 146 + Max: 123 -# Offense count: 2 +# Offense count: 1 +# Configuration parameters: AllowedMethods, AllowedPatterns. Metrics/CyclomaticComplexity: - Max: 26 + Max: 18 # Offense count: 3 -# Configuration parameters: CountComments. +# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns. Metrics/MethodLength: - Max: 77 + Max: 69 # Offense count: 2 +# Configuration parameters: AllowedMethods, AllowedPatterns. Metrics/PerceivedComplexity: - Max: 32 + Max: 31 # Offense count: 25 -# Cop supports --auto-correct. -# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, IgnoredMethods. -# SupportedStyles: line_count_based, semantic, braces_for_chaining +# This cop supports safe autocorrection (--autocorrect). +# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods. +# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces # ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object # FunctionalMethods: let, let!, subject, watch -# IgnoredMethods: lambda, proc, it +# AllowedMethods: lambda, proc, it Style/BlockDelimiters: Exclude: - 'spec/unit/pathspec_spec.rb' -# Offense count: 11 -# Cop supports --auto-correct. -# Configuration parameters: EnforcedStyle. -# SupportedStyles: when_needed, always, never -Style/FrozenStringLiteralComment: - Exclude: - - 'Gemfile' - - 'Rakefile' - - 'lib/pathspec.rb' - - 'lib/pathspec/gitignorespec.rb' - - 'lib/pathspec/regexspec.rb' - - 'lib/pathspec/spec.rb' - - 'pathspec.gemspec' - - 'spec/spec_helper.rb' - - 'spec/unit/pathspec/gitignorespec_spec.rb' - - 'spec/unit/pathspec/spec_spec.rb' - - 'spec/unit/pathspec_spec.rb' - # Offense count: 1 -# Cop supports --auto-correct. -# Configuration parameters: IgnoredMethods. -# IgnoredMethods: respond_to, define_method +# This cop supports unsafe autocorrection (--autocorrect-all). +# Configuration parameters: AllowMethodsWithArguments, AllowedMethods, AllowedPatterns, AllowComments. +# AllowedMethods: define_method Style/SymbolProc: Exclude: - 'lib/pathspec.rb' - -# Offense count: 7 -# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns. -# URISchemes: http, https -Metrics/LineLength: - Max: 108 diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..041df9a --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +ruby 3.4.1 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 73f17bb..0000000 --- a/.travis.yml +++ /dev/null @@ -1,6 +0,0 @@ -language: ruby -script: 'bundle exec rake spec' -rvm: - - 2.4.6 - - 2.5.6 - - 2.6.4 diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..baa53c0 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,121 @@ +# AGENTS.md + +PathSpec Ruby - .gitignore-style pattern matching in Ruby + +## Project overview + +Ruby gem implementing .gitignore-style pattern matching with both library API and CLI tool. Supports Ruby 3.2-4.0.1 with comprehensive test coverage and multi-Ruby CI. + +## Setup commands + +```bash +# Install mise (Ruby version manager) +brew install mise # macOS +# Other platforms: https://mise.jdx.dev/getting-started.html + +# Activate mise +eval "$(mise activate zsh)" # or bash, fish, etc. + +# Install Ruby and bundler versions +mise install + +# Install dependencies +mise run install +# or: bundle install +``` + +## Testing + +```bash +# Run all tests (lint, unit, integration, docs) +mise run test +# or: bundle exec rake + +# Unit tests only +mise run test:unit +# or: bundle exec rake spec + +# Integration tests (CLI) only +mise run test:integration +# or: bundle exec rake spec_integration + +# All specs (unit + integration) +mise run test:all +# or: bundle exec rake spec_all + +# Cross-Ruby matrix testing via Docker +mise run test:matrix +# or: bundle exec rake test_matrix +``` + +## Code style + +- Use RuboCop 1.63.5 for linting +- Method length limit: 69 lines +- Use single quotes for strings without interpolation +- Use `%w[]` for word arrays +- Auto-fix with: `bundle exec rubocop --autocorrect` +- For large data arrays: add `# rubocop:disable Metrics/MethodLength` + +## Build and release + +```bash +# Build gem +mise run build +# or: gem build pathspec.gemspec + +# Generate documentation +bundle exec rake docs + +# Development install +rake install +``` + +## Project structure + +``` +pathspec-ruby/ +├── lib/pathspec/ # Core library +│ ├── pathspec.rb # Main PathSpec class +│ └── patterns/ # Pattern implementations +├── bin/pathspec-rb # CLI executable +├── spec/ +│ ├── unit/ # Library tests +│ └── integration/ # CLI tests +├── benchmarks/ # Performance tests +├── docs/ # Documentation source +├── Rakefile # Build tasks +├── .tool-versions # Ruby/bundler versions +└── pathspec.gemspec # Gem spec +``` + +## Development workflow + +1. Make changes to `lib/` code +2. Add/update tests in `spec/unit/` for library changes +3. Add/update tests in `spec/integration/` for CLI changes +4. Run `mise run test` - must pass before committing +5. Fix any RuboCop offenses +6. Test cross-Ruby with `mise run test:matrix` +7. Commit with descriptive messages + +## CLI tool usage + +```bash +bundle exec pathspec-rb -f .gitignore match "file.swp" +bundle exec pathspec-rb -f .gitignore specs_match "file.swp" +bundle exec pathspec-rb -f .gitignore tree ./src +``` + +## Common issues + +**Bundler conflicts**: Always use `mise run install` to ensure correct versions from `.tool-versions` + +**RuboCop failures**: Auto-fix with `bundle exec rubocop --autocorrect`. Method length is common issue - extract large data arrays to separate methods with rubocop:disable comments + +**CI failures**: Check GitHub Actions. RuboCop and integration test failures are most common causes + +## Dependencies + +- **Runtime**: None (pure Ruby) +- **Development**: rspec (~> 3.10), rubocop (~> 1.63.0), fakefs (~> 2.5), kramdown (~> 2.3), benchmark-ips (~> 2.0) \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 927f6dd..5b48d7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,93 @@ # pathspec-ruby CHANGELOG +## 3.0.1 (Patch Release) + +### Note + +This is a version bump only release to correct an oversight in the 3.0.0 release where the gemspec version was not updated before publishing. The gem published as 3.0.0 on RubyGems had version 2.1.0 in the gemspec. + +For the actual feature changes and improvements, see the [3.0.0 release](https://github.com/highb/pathspec-ruby/releases/tag/3.0.0). + +## 3.0.0 (Major Release) + +### Breaking Changes + +- (Maint) Remove Ruby 3.1 support (EOL March 2025). The Gem now requires >= 3.2.0 +- Updated minimum required Ruby version in gemspec from >= 3.1.0 to >= 3.2.0 + +### Features + +- Added `match?` predicate method as alias for `match` to follow Ruby conventions +- Added comprehensive CLI integration test suite (23 tests covering all commands, flags, and error handling) +- Added mise (formerly rtx) tooling support for managing Ruby and bundler versions +- Added `test_matrix` rake task to run tests across all Ruby versions (3.2, 3.3, 3.4, 4.0.1) using Docker +- Separated unit tests (`rake spec`) and integration tests (`rake spec_integration`) +- Added `spec_all` rake task to run complete test suite with unified coverage reporting + +### Maintenance + +- Added Ruby 3.4 to testing matrix (Stable, Tested) +- Added Ruby 4.0.1 to testing matrix (Preview, Tested) +- Updated CI workflows to use Ruby 3.4 for gem publishing +- Updated CI to run integration tests across all Ruby versions +- Updated bundler requirement from `~> 2.2` to `>= 2.5` for Ruby 3.2-4.0 compatibility +- Added `irb` as development dependency (required for Ruby 4.0+) +- Updated RuboCop TargetRubyVersion to 3.2 to match gemspec requirement +- Updated README with comprehensive development setup documentation + - mise installation and usage + - Development tasks and workflows + - Testing across Ruby version matrix +- Updated README with comprehensive "Deprecated Rubies" section documenting historical deprecations +- Updated "Supported Rubies" section in README to reflect current testing matrix (3.2, 3.3, 3.4, 4.0.1) +- Improved test coverage from 99.48% to 99.65% (573/575 lines) + +## 2.1.0 + +## refactor/perf + +- Add missing frozen_string_literal comments to reduce object allocations + +## build + +- Updated Rubocop to 1.18.3 +- Fixed/re-enabled Rubocop +- Updated fakefs to 1.3 +- Cleaned up unnecessary spec files from the Gem + +Thanks for the above contributions @ericproulx! #50 + +## 2.0.0 + +- (Maint) Remove deprecated/security release versions of Ruby. The Gem will now only support and be tested against >= 3.1.0 e.g. 3.1, 3.2, and 3.3. + +## 1.1.3 (Patch/Bug Fix Release) + +- Fixed Man page generation bug in GH Actions + +## 1.1.1 (Patch/Bug Fix Release) + +- (Maint) Updated Supported Ruby Versions (>= 2.6.9 is the earliest supported now) +- (Maint) Linting corrections +- Setup a CI system with GH Actions to do better validation of the gem before release. + +## 1.1.0 (Minor Release) + +:alert: This release was mis-tagged. Use 1.1.1 instead. :alert: + +- (Maint) Updated Supported Ruby Versions +- (Maint) Linting corrections + +## 1.0.0 (Major Release) + +- Adds a required ruby version of 2.6 (reason for major version bump) +- Adds man/html docs + +## 0.2.1 (Patch/Bug Fix Release) + +- Fixes incorrectly pushed gem on Rubygems.org + ## 0.2.0 (Minor Release) + - (Feature) A CLI tool, pathspec-rb, is now provided with the gem. - (API Change) New namespace for gem: `PathSpec`: Everything is now namespaced under `PathSpec`, to prevent naming collisions with other libraries. Thanks @tenderlove! - (License) License version updated to Apache 2. Thanks @kytrinyx! @@ -9,20 +96,24 @@ - (Maint) Added Rubocop and made some corrections ## 0.1.2 (Patch/Bug Fix Release) + - Fix for regexp matching Thanks @incase! #16 - File handling cleanup Thanks @martinandert! #13 - `from_filename` actually works now! Thanks @martinandert! #12 ## 0.1.0 (Minor Release) + - Port new edgecase handling from [python-path-specification](https://github.com/cpburnz/python-path-specification/pull/8). Many thanks to @jdpace! :) - Removed EOL Ruby support - Added current Ruby stable to Travis testing ## 0.0.2 (Patch/Bug Fix Release) + - Fixed issues with Ruby 1.8.7/2.1.1 - Added more testing scripts - Fixed Windows path related issues - Cleanup unnecessary things in gem ## 0.0.1 + - Initial version. diff --git a/Gemfile b/Gemfile index fa75df1..7f4f5e9 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,5 @@ +# frozen_string_literal: true + source 'https://rubygems.org' gemspec diff --git a/README.md b/README.md index 36b3b06..5d4e094 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,25 @@ # pathspec-ruby -[](https://badge.fury.io/rb/pathspec) [](https://travis-ci.org/highb/pathspec-ruby) [](https://codeclimate.com/github/highb/pathspec-ruby/maintainability) +[](https://badge.fury.io/rb/pathspec) [](https://github.com/highb/pathspec-ruby/actions/workflows/ruby.yml) [](https://codeclimate.com/github/highb/pathspec-ruby/maintainability) + +[man Page as HTML](http://highb.github.io/pathspec-ruby/) [Supported Rubies](https://www.ruby-lang.org/en/downloads/): -- 2.4.6 (Maintenance) -- 2.5.6 (Stable) -- 2.6.4 (Stable) +- 3.2 (Stable, Tested) +- 3.3 (Stable, Tested) +- 3.4 (Stable, Tested) +- 4.0.1 (Preview, Tested) + +## Deprecated Rubies + +The following Ruby versions are no longer supported. If you need to use an older Ruby version, please use an earlier version of this gem: + +- Ruby 3.1: Deprecated in version 3.0.0 (current project, not yet released) +- Ruby 3.0: Deprecated in version 2.0.0 (January 2024) +- Ruby 2.x (2.6-2.7): Deprecated in version 2.0.0 (January 2024) +- Ruby 2.5 and earlier: Deprecated in version 1.0.0 (January 2021) +- Ruby 1.8 and 1.9: Deprecated in version 0.2.0 (circa 2017) Match Path Specifications, such as .gitignore, in Ruby! @@ -23,19 +36,19 @@ gem install pathspec ## CLI Usage ```bash -➜ test-pathspec cat .gitignore +➜ cat .gitignore *.swp /coverage/ -➜ test-pathspec be pathspec-rb specs_match "coverage/foo" +➜ bundle exec pathspec-rb specs_match "coverage/foo" /coverage/ -➜ test-pathspec be pathspec-rb specs_match "file.swp" +➜ bundle exec pathspec-rb specs_match "file.swp" *.swp -➜ test-pathspec be pathspec-rb match "file.swp" -➜ test-pathspec echo $? +➜ bundle exec pathspec-rb match "file.swp" +➜ echo $? 0 -➜ test-pathspec ls +➜ ls Gemfile Gemfile.lock coverage file.swp source.rb -➜ test-pathspec be pathspec-rb tree . +➜ bundle exec pathspec-rb tree . ./coverage ./coverage/index.html ./file.swp @@ -49,7 +62,7 @@ require 'pathspec' # Create a .gitignore-style Pathspec by giving it newline separated gitignore # lines, an array of gitignore lines, or any other enumable object that will # give strings matching the .gitignore-style (File, etc.) -gitignore = Pathspec.from_filename('spec/files/gitignore_readme') +gitignore = PathSpec.from_filename('spec/files/gitignore_readme') # Our .gitignore in this example contains: # !**/important.txt @@ -75,6 +88,36 @@ gitignore.match_paths ['/abc/123', '/abc/important.txt', '/abc/'] # There is no CLI equivalent to this. ``` +## Example Usage in Gemspec + +``` +lib = File.expand_path("lib", __dir__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require "gemspec_pathspec_test/version" +require 'pathspec' + +Gem::Specification.new do |spec| + spec.name = "gemspec_pathspec_test" + spec.version = GemspecPathspecTest::VERSION + spec.authors = ["Brandon High"] + spec.email = ["highb@users.noreply.github.com"] + + spec.summary = "whatever" + + spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'" + + ps = PathSpec.from_filename('.gitignore') + spec.files = Dir['lib/*.rb'].reject { |f| ps.match(f) } + spec.bindir = "exe" + spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.require_paths = ["lib"] + + spec.add_development_dependency "bundler", "~> 2.0" + spec.add_development_dependency "rake", "~> 10.0" + spec.add_development_dependency "rspec", "~> 3.0" +end +``` + ## Building/Installing from Source ```shell @@ -82,6 +125,76 @@ git clone git@github.com:highb/pathspec-ruby.git cd pathspec-ruby && bash ./build_from_source.sh ``` +## Development Setup + +This project uses [mise](https://mise.jdx.dev/) for managing Ruby and bundler versions. + +### Install mise + +```shell +# macOS +brew install mise + +# Other platforms: https://mise.jdx.dev/getting-started.html +``` + +### Activate mise + +Add to your shell profile (`~/.zshrc`, `~/.bashrc`, etc.): + +```shell +eval "$(mise activate zsh)" # or bash, fish, etc. +``` + +### Install Dependencies + +```shell +# Install Ruby and bundler versions defined in .tool-versions +mise install + +# Install gem dependencies +mise run install +# or: bundle install +``` + +### Development Tasks + +```shell +# Run all tests (rubocop, unit tests, integration tests, docs) +mise run test +# or: bundle exec rake + +# Run only unit tests +mise run test:unit +# or: bundle exec rake spec + +# Run only integration tests +mise run test:integration +# or: bundle exec rake spec_integration + +# Run all specs (unit + integration) +mise run test:all +# or: bundle exec rake spec_all + +# Run tests across all Ruby versions (3.2, 3.3, 3.4, 4.0.1) using Docker +mise run test:matrix +# or: bundle exec rake test_matrix + +# Build the gem +mise run build +# or: gem build pathspec.gemspec +``` + +The `test:matrix` task runs the full test suite across all supported Ruby versions in Docker containers, matching the CI environment. Integration tests cover the CLI executable (`bin/pathspec-rb`). + ## Contributing Pull requests, bug reports, and feature requests welcome! :smile: I've tried to write exhaustive tests but who knows what cases I've missed. + +## Releasing + +This is mainly a reminder to myself but the release process is: +1. Make sure CI is passing +2. Update the CHANGELOG with relevant changes to Gem consumers +3. Update version in gemspec with correct SemVer bump for scope of changes +4. Tag/release using GitHub UI and the Build & Push workflow should do the rest. diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..13bbab9 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,551 @@ +# PathSpec Ruby Roadmap + +This document outlines potential features and enhancements for the pathspec-ruby library. Items are organized by category and represent ideas for future development, not commitments or timelines. + +## Additional Pattern Format Support + +### Docker Ignore Patterns + +Add support for `.dockerignore` files, which use a similar but slightly different syntax from `.gitignore`. + +**Key differences:** +- Different handling of `**` at the start of patterns +- Exception patterns work differently than gitignore +- Docker-specific pattern matching semantics + +**Implementation approach:** +- Add `PathSpec.from_dockerignore()` factory method +- Implement `DockerIgnoreSpec` class extending base `Spec` +- Document differences between Docker and Git pattern formats + +**Use case:** Enable Docker users to validate and test their `.dockerignore` files programmatically. + +--- + +### Mercurial Ignore Patterns + +Add support for `.hgignore` files used by Mercurial VCS. + +**Key features:** +- Support `syntax: regexp` directive for regex patterns +- Support `syntax: glob` directive for glob patterns +- Handle Mercurial-specific pattern semantics + +**Implementation approach:** +- Add `PathSpec.from_hgignore()` factory method +- Parse and respect syntax directives within files +- Implement `HgIgnoreSpec` class with dual-mode support + +**Use case:** Useful for polyglot VCS users and teams migrating between version control systems. + +--- + +### NPM/Yarn Ignore Patterns + +Add support for `.npmignore` and `.yarnignore` files with their subtle differences from gitignore. + +**Key differences:** +- Different default exclusions (node_modules, etc.) +- Different handling of empty directories +- Package-specific matching behaviors + +**Implementation approach:** +- Add `PathSpec.from_npmignore()` factory method +- Document default exclusions +- Handle package manager-specific semantics + +**Use case:** Enable JavaScript/Node.js developers to validate package exclusions. + +--- + +### Rsync Exclude Patterns + +Add support for rsync's exclude pattern format (`.rsyncignore`). + +**Key features:** +- Rsync has its own pattern syntax similar to gitignore but with differences +- Support for include/exclude pattern lists +- Support for merge-file directives + +**Implementation approach:** +- Add `PathSpec.from_rsyncignore()` factory method +- Implement `RsyncIgnoreSpec` class +- Document rsync-specific pattern behaviors + +**Use case:** Useful for deployment scripts and backup automation. + +--- + +## Performance Enhancements + +### Alternative Regex Backends + +Implement support for high-performance regex engines as alternatives to Ruby's built-in regex. + +**Potential backends:** +- **re2** - Google's RE2 engine via the `re2` gem (linear time matching, no backtracking) +- **Oniguruma** - Ruby's default regex engine, but could be used more explicitly +- **Hyperscan** - Intel's high-performance regex engine (if Ruby bindings become available) + +**Implementation approach:** +- Add backend selection API: `PathSpec.new(patterns, :git, backend: :re2)` +- Benchmark each backend with the existing benchmark suite +- Document trade-offs (feature support vs. performance) +- Follow the pattern used by Python's pathspec library + +**Benefits:** +- Significant performance improvements for pattern-heavy workloads +- Better performance scaling with 100+ patterns +- Reduced CPU usage in high-throughput scenarios + +**Considerations:** +- Some backends may not support all Ruby regex features +- Additional gem dependencies required +- Cross-platform compatibility testing needed + +--- + +### Pattern Compilation Caching + +Add intelligent caching for compiled pattern objects. + +**Features:** +- Cache compiled PathSpec objects by pattern set hash +- LRU eviction for memory management +- Thread-safe cache implementation + +**Use case:** Applications that repeatedly create PathSpec objects with the same patterns. + +--- + +## Experimental: Native Rust Backend + +Explore adding an optional Rust-powered native extension using the `ignore` crate as an alternative backend to the pure Ruby implementation. + +**Primary goal:** Learn Ruby/Rust FFI integration and gain hands-on experience with cross-language tooling. + +**Secondary goal:** Understand performance characteristics and share findings with the community. + +### Implementation Approach + +**Phase 1: Separate gem exploration** +- Create standalone `pathspec-native` gem with Rust implementation +- Use the Rust `ignore` crate (from ripgrep) as the pattern matching engine +- Implement Ruby bindings using `magnus` or `rb-sys` +- Maintain 100% API compatibility with pathspec-ruby +- Benchmark extensively against pure Ruby implementation + +**Phase 2: Optional integration (if Phase 1 succeeds)** +- Integrate as optional backend in main gem +- Maintain pure Ruby as default with automatic fallback +- Allow users to opt-in to native backend: `PathSpec.new(patterns, :git, backend: :native)` +- Ensure zero impact on users who don't want native dependencies + +### Technical Considerations + +**FFI tooling options:** +- **magnus** - Ruby bindings for Rust (modern, actively maintained) +- **rb-sys** - Low-level Ruby bindings (more control, more complexity) +- Compare both approaches and document trade-offs + +**The `ignore` crate benefits:** +- Battle-tested in ripgrep (widely used, well-optimized) +- Supports .gitignore patterns natively +- High-performance parallel file tree walking +- Maintained by experienced Rust developer (BurntSushi) + +**Cross-platform compilation:** +- Ensure builds work on Linux, macOS, Windows +- Set up CI/CD for automated native builds +- Consider pre-compiled gems for common platforms +- Document local build requirements + +### Goals + +1. **Learning outcomes:** + - Understand Ruby C extension API vs. Rust FFI approaches + - Learn `magnus`/`rb-sys` tooling and best practices + - Experience cross-platform native gem distribution + +2. **Performance insights:** + - Benchmark native vs. Ruby across different workload sizes + - Identify when native implementation provides benefits + - Measure memory usage differences + - Test with real-world .gitignore files (1, 10, 100, 1000+ patterns) + +3. **Community contribution:** + - Write detailed blog post about findings + - Share performance data and methodology + - Document FFI integration lessons learned + - Provide reusable example for other gem authors + +4. **Maintain pure Ruby as first-class:** + - Pure Ruby version remains the default + - No degradation of pure Ruby implementation + - Native backend is purely additive + +### Success Criteria + +- ✅ Native extension compiles successfully on Linux, macOS, and Windows +- ✅ 100% API compatibility - drop-in replacement for pure Ruby backend +- ✅ Comprehensive benchmark suite comparing both implementations +- ✅ Documented performance characteristics with clear guidance +- ✅ CI/CD pipeline builds and tests native extensions +- ✅ Clear installation instructions for native dependencies +- ✅ Published blog post with findings and recommendations + +### Non-Goals + +**This is explicitly NOT:** +- A rewrite of the gem - pure Ruby implementation stays and remains primary +- A required dependency - native extension is optional only +- A response to performance complaints - this is exploratory learning +- A commitment to long-term maintenance of native code +- An attempt to deprecate the Ruby implementation + +**What we're NOT optimizing for:** +- Absolute maximum performance - learning is the priority +- Production-critical performance - pure Ruby is already fast enough +- Replacing other tools - this is about understanding trade-offs + +### Open Questions + +Questions to answer through exploration: + +1. **Performance questions:** + - At what pattern count does native backend become beneficial? + - How does performance scale with directory tree size? + - What's the FFI call overhead for small workloads? + - Is parallel tree walking worth the complexity? + +2. **Developer experience questions:** + - How painful is cross-platform native gem distribution? + - What's the learning curve for magnus vs. rb-sys? + - How do we handle build failures gracefully? + - What's the maintenance burden of native code? + +3. **User experience questions:** + - Is opt-in vs. opt-out the right choice? + - How do we communicate when native backend is beneficial? + - What happens when native extension fails to load? + - Should we pre-compile for common platforms? + +### Potential Outcomes + +**Best case:** +- Learn valuable FFI skills +- Discover significant performance benefits for certain workloads +- Share useful findings with community +- Provide optional native backend for power users + +**Realistic case:** +- Learn valuable FFI skills +- Find that pure Ruby is "fast enough" for most use cases +- Discover native backend helps only for extreme workloads (1000+ patterns) +- Document when native extensions are/aren't worth it + +**Acceptable case:** +- Learn valuable FFI skills +- Conclude that FFI overhead negates performance benefits +- Decide not to integrate into main gem +- Share lessons learned in blog post + +All outcomes are valuable because the primary goal is learning. + +### Resources & References + +- **Rust `ignore` crate**: https://docs.rs/ignore/ +- **magnus (Ruby ↔ Rust)**: https://github.com/matsadler/magnus +- **rb-sys**: https://github.com/oxidize-rb/rb-sys +- **ripgrep** (uses `ignore` crate): https://github.com/BurntSushi/ripgrep +- **Rust native extensions guide**: https://github.com/oxidize-rb/oxidize-rb + +--- + +## Pattern Validation & Quality + +### Pattern Linting and Validation + +Add tools to check patterns for common mistakes and suggest improvements. + +**Features:** + +1. **Syntax error detection:** + - Invalid glob patterns (e.g., `*.txt/` - wildcard with trailing slash) + - Malformed bracket expressions + - Escaped characters in wrong contexts + +2. **Semantic warnings:** + - Patterns that can never match (e.g., `/foo` when matching relative paths) + - Redundant patterns (e.g., `*.log` followed by `error.log`) + - Overly broad patterns that might match unintended files + +3. **Performance suggestions:** + - Recommend more efficient pattern ordering + - Suggest combining similar patterns + - Identify expensive regex patterns + +4. **Style recommendations:** + - Inconsistent path separator usage + - Unnecessary escaping + - Patterns that could be simplified + +**Implementation approach:** +- Add `PathSpec#validate` method returning array of issues +- Implement `PathSpec::Linter` class with configurable rules +- Provide severity levels (error, warning, info) +- Include suggested fixes where applicable + +**API example:** +```ruby +pathspec = PathSpec.from_filename('.gitignore') +issues = pathspec.validate + +issues.each do |issue| + puts "#{issue.severity}: #{issue.message}" + puts " Pattern: #{issue.pattern}" + puts " Suggestion: #{issue.suggestion}" if issue.suggestion +end +``` + +**Use case:** Help developers write better ignore files and catch mistakes before they cause issues. + +--- + +### Pattern Testing Framework + +Add utilities to test pattern matching behavior. + +**Features:** +- Assert that specific paths match/don't match +- Generate test paths from patterns +- Coverage analysis (which patterns are actually being used) + +**Use case:** CI/CD pipelines that verify ignore patterns work as expected. + +--- + +## Advanced Filtering Features + +### Case-Insensitive Matching + +Add optional flag for case-insensitive pattern matching. + +**Implementation approach:** +- Add `case_sensitive` option to PathSpec constructor +- Default to true (current behavior) for backwards compatibility +- Convert patterns to case-insensitive regexes when disabled + +**API example:** +```ruby +# Match *.TXT, *.txt, *.Txt, etc. +pathspec = PathSpec.new(['*.txt'], :git, case_sensitive: false) +``` + +**Use case:** Cross-platform projects where case sensitivity varies (Windows vs. Linux/macOS). + +--- + +### Attribute-Based Filtering + +Extend pattern matching beyond filenames to include file attributes. + +**Features:** + +1. **File size patterns:** + - `size:>10MB` - Files larger than 10MB + - `size:<1KB` - Files smaller than 1KB + - `size:100KB..10MB` - Files in size range + +2. **Modification time patterns:** + - `modified:>7d` - Modified in last 7 days + - `modified:<2023-01-01` - Modified before date + - `modified:today` - Modified today + +3. **File permission patterns:** + - `mode:executable` - Executable files + - `mode:0644` - Specific permission mode + - `mode:user-writable` - Files writable by owner + +4. **File type patterns:** + - `type:symlink` - Symbolic links + - `type:directory` - Directories + - `type:file` - Regular files + +**Implementation approach:** +- Add `PathSpec#match_tree_with_attrs` method +- Create `AttributeSpec` class for attribute-based filtering +- Support combining path patterns with attribute filters + +**API example:** +```ruby +pathspec = PathSpec.new(['*.log', 'size:>100MB'], :git) +large_logs = pathspec.match_tree('logs/') +``` + +**Use case:** Clean up scripts, archival tools, security audits. + +--- + +### Glob Expansion + +Add ability to expand glob patterns into matching file lists without needing a root directory. + +**Features:** +- Generate all possible matches for a pattern +- Useful for testing and documentation +- Support limiting depth and count + +**Use case:** Pattern documentation, test generation, interactive explorers. + +--- + +## API Enhancements + +### Streaming API + +Add support for processing large file lists without loading everything into memory. + +**Implementation approach:** +- Add `PathSpec#match_stream` that yields matches +- Support lazy evaluation with Enumerator +- Optimize for large directory trees + +**API example:** +```ruby +pathspec = PathSpec.from_filename('.gitignore') +pathspec.match_stream('huge_directory/') do |matched_path| + process(matched_path) +end +``` + +**Use case:** Large repositories, file system indexing, backup tools. + +--- + +### Pattern Introspection + +Add methods to analyze and understand pattern behavior. + +**Features:** +- List all patterns that would match a given path +- Explain why a path matched (which pattern, why) +- Extract pattern metadata (complexity, type, etc.) + +**API example:** +```ruby +pathspec = PathSpec.from_filename('.gitignore') +explanation = pathspec.explain('coverage/index.html') +# => "Matched by pattern 'coverage/' at line 15 (directory match)" +``` + +**Use case:** Debugging ignore rules, understanding complex ignore files. + +--- + +### Pattern Composition + +Add utilities to combine multiple PathSpec objects with set operations. + +**Features:** +- Union: Match if any PathSpec matches +- Intersection: Match only if all PathSpecs match +- Difference: Match first PathSpec but not second + +**API example:** +```ruby +gitignore = PathSpec.from_filename('.gitignore') +dockerignore = PathSpec.from_filename('.dockerignore') + +# Files ignored by git OR docker +either = gitignore.union(dockerignore) + +# Files ignored by git but NOT by docker +git_only = gitignore.difference(dockerignore) +``` + +**Use case:** Complex filtering logic, comparing ignore files. + +--- + +## Testing & Quality + +### Compatibility Test Suite + +Add comprehensive tests against reference implementations. + +**Features:** +- Test against git's actual ignore behavior +- Test against docker's ignore behavior +- Cross-reference with Python pathspec library +- Generate compatibility reports + +**Use case:** Ensure accuracy and find edge cases. + +--- + +### Property-Based Testing + +Add property-based tests using the `rspec-propcheck` gem. + +**Features:** +- Generate random patterns and paths +- Verify invariants hold for all inputs +- Find edge cases automatically + +**Use case:** Improve robustness and find bugs. + +--- + +## Documentation & Tooling + +### Interactive Pattern Tester + +Create a CLI tool or web interface for testing patterns interactively. + +**Features:** +- Live pattern editing with instant feedback +- Visual highlighting of matches +- Pattern explanation and suggestions +- Save/load pattern sets + +**Use case:** Learning, debugging, pattern development. + +--- + +### Pattern Migration Tools + +Add utilities to convert between different pattern formats. + +**Features:** +- Convert gitignore to dockerignore +- Convert rsync excludes to gitignore +- Highlight patterns that don't translate cleanly + +**Use case:** Migrating between tools, maintaining consistency. + +--- + +## Contributing + +This roadmap is a living document. If you'd like to: +- Propose new features +- Discuss implementation approaches +- Contribute implementations + +Please open an issue or pull request on GitHub to start the conversation. + +--- + +## Prioritization Considerations + +When evaluating which features to implement, consider: + +1. **User impact** - How many users would benefit? +2. **Maintenance burden** - How much ongoing maintenance is required? +3. **Compatibility** - Does it maintain backwards compatibility? +4. **Dependencies** - Does it require new dependencies? +5. **Complexity** - How complex is the implementation? +6. **Standards compliance** - Does it follow established standards? + +Features that provide high user value with manageable complexity and maintenance burden should be prioritized. diff --git a/Rakefile b/Rakefile index a9f557c..6e77210 100644 --- a/Rakefile +++ b/Rakefile @@ -1,24 +1,99 @@ +# frozen_string_literal: true + begin require 'rspec/core/rake_task' - RSpec::Core::RakeTask.new(:spec) + + RSpec::Core::RakeTask.new(:spec) do |t| + t.pattern = 'spec/unit/**/*_spec.rb' + end + + RSpec::Core::RakeTask.new(:spec_integration) do |t| + t.pattern = 'spec/integration/**/*_spec.rb' + end + + RSpec::Core::RakeTask.new(:spec_all) do |t| + t.pattern = 'spec/**/*_spec.rb' + end rescue LoadError puts 'rspec rake task failed to load' end require 'rubocop/rake_task' +require 'kramdown' +require 'fileutils' RuboCop::RakeTask.new(:rubocop) do |t| t.options = ['--display-cop-names'] end -task default: %i[rubocop spec man_pages] +task default: %i[rubocop spec spec_integration docs] + +desc 'Generate man page for executable script' +task :docs do + kramdown = Kramdown::Document.new(File.read('docs/pathspec-rb.md')) + + File.write('docs/index.html', kramdown.to_html) + + FileUtils.mkdir_p 'docs/man' + File.write('docs/man/pathspec-rb.man.1', kramdown.to_man) +end + +desc 'Run tests across all Ruby versions using Docker' +task :test_matrix do + ruby_versions = ['3.2', '3.3', '3.4', '4.0.1'] + failed_versions = [] + + ruby_versions.each do |version| + puts "\n#{'=' * 80}" + puts "Testing with Ruby #{version}" + puts '=' * 80 + + cmd = [ + 'docker', 'run', '--rm', + '-v', "#{Dir.pwd}:/app", + '-w', '/app', + "ruby:#{version}", + 'bash', '-c', + 'bundle install && bundle exec rake rubocop spec spec_integration docs' + ].shelljoin + + success = system(cmd) + failed_versions << version unless success + end + + puts "\n#{'=' * 80}" + if failed_versions.any? + puts "FAILED on Ruby versions: #{failed_versions.join(', ')}" + puts '=' * 80 + exit 1 + else + puts 'All Ruby versions passed!' + puts '=' * 80 + end +end + +desc 'Run performance benchmarks (requires benchmark-ips gem)' +task :benchmark do + puts 'Running performance benchmarks...' + puts 'This may take several minutes to complete.' + puts + + benchmark_script = File.join(__dir__, 'benchmarks', 'pattern_scaling.rb') + + unless File.exist?(benchmark_script) + puts 'Error: Benchmark script not found at benchmarks/pattern_scaling.rb' + exit 1 + end -desc "Generate man page for executable script" -task :man_pages do - rst2man = %x{which rst2man}.chomp - unless File.executable?(rst2man) - abort("rst2man could not be found and is needed to build man pages") + # Check if benchmark-ips is available + begin + require 'benchmark/ips' + rescue LoadError + puts 'Error: benchmark-ips gem is not installed' + puts 'Please run: bundle install' + exit 1 end - %x{rst2man docs/man/pathspec-rb.man.1.rst > docs/man/pathspec-rb.man.1} + # Run the benchmark script + system('ruby', benchmark_script) || exit(1) end diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..c35afb3 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,158 @@ +# PathSpec Performance Benchmarks + +This document describes the performance benchmarking methodology and results for the pathspec-ruby library. + +## Methodology + +The benchmarks measure pattern matching performance as the number of patterns increases. This is based on the approach used by [python-pathspec](https://github.com/cpburnz/python-pathspec/blob/master/benchmarks_backends.md). + +### Test Configuration + +- **Pattern counts tested**: 1, 5, 15, 25, 50, 100, 150 patterns +- **Test dataset**: 1000 representative file paths with varying directory depths and file extensions +- **Pattern types**: Mix of glob patterns, directory matches, negations, and complex wildcards +- **Warmup time**: 2 seconds per test +- **Benchmark time**: 5 seconds per test +- **Measurement**: Iterations per second (i/s) using the `benchmark-ips` gem + +### Operations Benchmarked + +1. **Single path matching**: Testing `match()` method on 10 individual paths +2. **Batch path matching**: Testing `match_paths()` method on 100 paths at once +3. **Pattern initialization**: Testing `PathSpec.new()` construction time + +### Important Notes + +- File system I/O is not tested; all paths are pre-generated in memory +- Patterns are representative of real-world `.gitignore` files +- Tests focus on GitIgnore-style patterns (the most common use case) +- Results show how performance scales with pattern complexity + +## Running Benchmarks + +To run the benchmarks on your system: + +### Using mise (recommended) + +```bash +# Run benchmarks directly +mise run benchmark +``` + +### Using rake directly + +```bash +# Install dependencies (includes benchmark-ips gem) +bundle install + +# Run benchmarks (this takes several minutes) +bundle exec rake benchmark +``` + +The benchmark task is **not** included in CI pipelines and should be run manually when needed. + +## Results + +### Hardware: Apple M4 Pro + +**Specifications:** +- Processor: Apple M4 Pro +- Cores: 12 (8 performance + 4 efficiency) +- Memory: 24 GB RAM +- OS: macOS + +**Ruby Version:** 3.4.1 (2024-12-25 revision 48d4efcb85) +PRISM [arm64-darwin25] + +#### Single Path Matching Performance + +Testing 10 individual path matches per iteration. + +| Patterns | Iterations/sec | Relative to baseline | Time per iteration | +|----------|----------------|----------------------|--------------------| +| 1 | 442,619 | 1.0x | 2.26 μs | +| 5 | 109,999 | 0.25x (4x slower) | 9.09 μs | +| 15 | 50,291 | 0.11x (8.8x slower) | 19.88 μs | +| 25 | 31,099 | 0.07x (14x slower) | 32.16 μs | +| 50 | 16,539 | 0.04x (27x slower) | 60.46 μs | +| 100 | 8,361 | 0.02x (53x slower) | 119.61 μs | +| 150 | 5,659 | 0.01x (78x slower) | 176.71 μs | + +#### Batch Path Matching Performance (100 paths) + +Testing `match_paths()` with 100 paths per iteration. + +| Patterns | Iterations/sec | Relative to baseline | Time per iteration | +|----------|----------------|----------------------|--------------------| +| 1 | 1,458 | 1.0x | 686 μs | +| 5 | 1,307 | 0.90x (1.1x slower) | 765 μs | +| 15 | 1,137 | 0.78x (1.3x slower) | 879 μs | +| 25 | 1,004 | 0.69x (1.5x slower) | 996 μs | +| 50 | 775 | 0.53x (1.9x slower) | 1.29 ms | +| 100 | 518 | 0.36x (2.8x slower) | 1.93 ms | +| 150 | 392 | 0.27x (3.7x slower) | 2.55 ms | + +#### Pattern Initialization Performance + +Testing `PathSpec.new()` construction time per iteration. + +| Patterns | Iterations/sec | Relative to baseline | Time per iteration | +|----------|----------------|----------------------|--------------------| +| 1 | 285,824 | 1.0x | 3.50 μs | +| 5 | 59,726 | 0.21x (4.8x slower) | 16.74 μs | +| 15 | 18,280 | 0.06x (16x slower) | 54.71 μs | +| 25 | 10,683 | 0.04x (27x slower) | 93.60 μs | +| 50 | 5,003 | 0.02x (57x slower) | 199.88 μs | +| 100 | 2,443 | 0.01x (117x slower) | 409.33 μs | +| 150 | 1,461 | 0.01x (196x slower) | 684.60 μs | + +### Analysis + +#### Performance Scaling Characteristics + +1. **Linear to slightly super-linear degradation**: All three operations show performance degradation that's roughly proportional to pattern count, with initialization showing the steepest decline. + +2. **Operation-specific impacts**: + - **Initialization** is most affected: ~196x slower at 150 patterns (0.68ms vs 3.5μs) + - **Single path matching**: ~78x slower at 150 patterns (177μs vs 2.3μs) + - **Batch matching**: ~3.7x slower at 150 patterns (2.55ms vs 686μs) - most resilient + +3. **Practical performance thresholds**: + - **1-25 patterns**: Excellent performance for all operations (< 100μs for initialization) + - **25-50 patterns**: Still very fast, suitable for most applications + - **50-100 patterns**: Noticeable but acceptable performance (~400μs initialization) + - **100+ patterns**: May be noticeable in tight loops or high-throughput scenarios + +4. **Batch matching efficiency**: The `match_paths()` method shows the best scaling because it amortizes the pattern matching cost across multiple paths. Even with 150 patterns, it can process ~39,200 paths per second (392 iterations × 100 paths). + +5. **Initialization vs matching trade-off**: + - Single pattern initialization is very fast (3.5μs), making it viable to create PathSpec objects on-demand + - With 100+ patterns, initialization cost becomes significant (~400μs), suggesting benefits from caching PathSpec objects + - However, matching operations remain efficient enough for most use cases + +6. **Real-world implications**: + - Typical `.gitignore` files have 20-50 meaningful patterns: performance is excellent + - Large enterprise `.gitignore` files with 100+ patterns: still sub-millisecond for most operations + - For high-frequency matching (e.g., watching file systems), cache PathSpec objects rather than recreating + +## Future Enhancements + +Potential areas for future benchmarking: + +1. **Pattern Complexity**: Compare simple glob patterns vs complex regex patterns +2. **Negation Overhead**: Test performance impact of `!` negation patterns +3. **Directory vs File Patterns**: Compare performance of patterns with trailing `/` +4. **Memory Profiling**: Track memory usage as pattern count increases +5. **Real-world Files**: Test against actual large `.gitignore` files from popular projects +6. **Tree Traversal**: Benchmark `match_tree()` on directory structures of varying sizes +7. **Pattern Types**: Compare GitIgnore vs Regex pattern performance +8. **Ruby Versions**: Compare performance across Ruby 3.2, 3.3, 3.4, and 4.0 + +## Contributing + +When contributing benchmark results: + +1. Always specify your hardware details (processor, cores, memory) +2. Include Ruby version used for testing +3. Run benchmarks multiple times to verify consistency +4. Note any significant background processes that might affect results +5. Update this README with your findings diff --git a/benchmarks/pattern_scaling.rb b/benchmarks/pattern_scaling.rb new file mode 100755 index 0000000..2af3388 --- /dev/null +++ b/benchmarks/pattern_scaling.rb @@ -0,0 +1,261 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'bundler/setup' +require 'benchmark/ips' +require 'pathspec' + +# Benchmark configuration +PATTERN_COUNTS = [1, 5, 15, 25, 50, 100, 150].freeze +WARMUP_TIME = 2 +BENCHMARK_TIME = 5 + +# Generate a representative set of file paths for testing +def generate_test_paths(count = 1000) + paths = [] + + # Mix of different path types + extensions = %w[.rb .txt .log .tmp .swp .md .yml .json .xml .css .js .html] + directories = %w[src lib test spec config docs bin tmp coverage vendor] + + count.times do |i| + depth = rand(1..4) + parts = depth.times.map { directories.sample } + filename = "file#{i}#{extensions.sample}" + paths << File.join(*parts, filename) + end + + paths +end + +# Generate gitignore patterns of varying complexity +def generate_patterns(count) + base_patterns = base_gitignore_patterns + + # Return the first 'count' patterns, cycling if needed + if count <= base_patterns.length + base_patterns.take(count) + else + patterns = base_patterns.dup + remaining = count - base_patterns.length + remaining.times do |i| + patterns << "generated_pattern_#{i}/**/*" + end + patterns + end +end + +# rubocop:disable Metrics/MethodLength +def base_gitignore_patterns + [ + '*.log', + '*.tmp', + '*.swp', + 'coverage/', + 'tmp/', + 'vendor/bundle/', + '.DS_Store', + '*.gem', + 'node_modules/', + 'dist/', + 'build/', + '*.pyc', + '__pycache__/', + '.env', + '.env.local', + 'secrets.yml', + '*.sqlite3', + 'log/*.log', + 'tmp/**/*', + 'public/assets/', + '.bundle/', + 'vendor/cache/', + 'doc/', + '.yardoc/', + 'coverage/**/*', + '*.rbc', + '*.sassc', + '.sass-cache/', + 'Gemfile.lock', + '.ruby-version', + '.ruby-gemset', + '.rvmrc', + '/config/database.yml', + '/config/secrets.yml', + '/config/credentials.yml.enc', + 'npm-debug.log', + 'yarn-error.log', + '.idea/', + '*.iml', + '.vscode/', + '*.code-workspace', + '.project', + '.classpath', + '.settings/', + 'target/', + '*.class', + '*.jar', + '*.war', + 'bin/', + 'obj/', + '*.exe', + '*.dll', + '*.so', + '*.dylib', + '**/*.backup', + '**/*.bak', + '**/*.old', + '**/._*', + '**/.~lock.*', + 'test/fixtures/files/', + 'spec/fixtures/files/', + '!important.txt', + '!config/database.yml.example', + 'cache/**/*.cache', + '*.min.js', + '*.min.css', + 'dist/**/*.map', + 'vendor/**/*.js', + 'public/uploads/', + 'storage/', + '**/*.zip', + '**/*.tar.gz', + '**/*.rar', + '.git/', + '.gitignore', + '.gitmodules', + '.gitattributes', + 'Thumbs.db', + 'Desktop.ini', + '*.lnk', + '*.stackdump', + '*.pid', + '*.seed', + '*.log.*', + 'pids/', + 'logs/', + 'results/', + '.npm/', + '.eslintcache', + '.stylelintcache', + 'reports/', + '*.tsbuildinfo', + '.tox/', + '.pytest_cache/', + '.coverage', + 'htmlcov/', + '*.prof', + '*.lprof', + '*.sage.py', + 'celerybeat-schedule', + '*.mo', + '*.pot', + 'local_settings.py', + 'db.sqlite3', + 'db.sqlite3-journal', + 'media/', + 'staticfiles/', + '.webassets-cache/', + 'instance/', + '.scrapy/', + '.ipynb_checkpoints/', + '__pypackages__/', + '*.manifest', + '*.spec', + 'pip-log.txt', + 'pip-delete-this-directory.txt', + '.env.*.local', + '.cache/', + '.parcel-cache/', + '.next/', + 'out/', + '.nuxt/', + '.vuepress/dist/', + '.serverless/', + '.fusebox/', + '.dynamodb/', + '.tern-port', + '.vscode-test', + '.yarn/cache/', + '.yarn/unplugged/', + '.yarn/build-state.yml', + '.yarn/install-state.gz', + '.pnp.*' + ] +end +# rubocop:enable Metrics/MethodLength + +puts 'PathSpec Performance Benchmark' +puts '=' * 80 +puts 'Testing pattern matching performance with varying pattern counts' +puts 'Hardware: Apple M4 Pro (12 cores: 8 performance + 4 efficiency), 24 GB RAM' +puts "Ruby Version: #{RUBY_VERSION}" +puts 'Test Configuration:' +puts " - Pattern counts: #{PATTERN_COUNTS.join(', ')}" +puts ' - Test paths: 1000 representative file paths' +puts " - Warmup time: #{WARMUP_TIME}s" +puts " - Benchmark time: #{BENCHMARK_TIME}s per test" +puts '=' * 80 +puts + +# Pre-generate test data +test_paths = generate_test_paths(1000) +puts "Generated #{test_paths.length} test paths for benchmarking\n\n" + +results = {} + +PATTERN_COUNTS.each do |pattern_count| + patterns = generate_patterns(pattern_count) + pathspec = PathSpec.new(patterns, :git) + + puts "Benchmarking with #{pattern_count} patterns..." + puts '-' * 80 + + results[pattern_count] = {} + + # Benchmark 1: Single path matching + Benchmark.ips do |x| + x.config(time: BENCHMARK_TIME, warmup: WARMUP_TIME) + + x.report('match (single path)') do + test_paths.first(10).each do |path| + pathspec.match(path) + end + end + end + + # Store the result (we'll capture this manually from output) + puts + + # Benchmark 2: Batch path matching + # Note: match_paths expects paths relative to root, so we pass empty root + Benchmark.ips do |x| + x.config(time: BENCHMARK_TIME, warmup: WARMUP_TIME) + + x.report('match_paths (100 paths)') do + pathspec.match_paths(test_paths.first(100), '') + end + end + + puts + + # Benchmark 3: Pattern initialization + Benchmark.ips do |x| + x.config(time: BENCHMARK_TIME, warmup: WARMUP_TIME) + + x.report('initialization') do + PathSpec.new(patterns, :git) + end + end + + puts "\n" +end + +puts '=' * 80 +puts 'Benchmark complete!' +puts '=' * 80 +puts "\nTo analyze results:" +puts '1. Review the iterations/second (i/s) for each pattern count' +puts '2. Compare how performance scales as pattern count increases' +puts '3. Identify which operations are most affected by pattern count' +puts "\nNote: Higher i/s (iterations per second) indicates better performance" diff --git a/bin/pathspec-rb b/bin/pathspec-rb index 8c8969e..26096ce 100755 --- a/bin/pathspec-rb +++ b/bin/pathspec-rb @@ -59,7 +59,7 @@ when 'specs_match' end when 'tree' tree_matches = spec.match_tree(path) - if !tree_matches.empty? + if tree_matches.any? puts "Files in #{path} that match #{options[:spec_filename]}" if options[:verbose] puts tree_matches else diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..be9c694 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,82 @@ +
pathspec - Test pathspecs against a specific path
+ +pathspec-rb [OPTIONS] [SUBCOMMAND] [PATH] NAME PATH
pathspc-rb is a tool that accompanies the pathspec-ruby library to help
+you test what match results the library would find using path specs. You can
+either find all specs matching a path, find all files matching specs, or
+verify that a path would match any spec.
https://github.com/highb/pathspec-ruby
+ +| Name | +Description | +
|---|---|
| specs_match | +Find all specs matching path | +
| tree | +Find all files under path matching the spec | +
| match | +Check if the path matches any spec | +
-f <FILENAME>, --file <FILENAME>pathspec-rb defaults to loading .gitignore.-t [git|regex], --type [git|regex]-f option). Defaults to git.-v, --verboseFind all files ignored by git under your source directory:
+ + $ pathspec-rb tree src/
+
+
+List all spec rules that would match for the specified path:
+ + $ pathspec-rb specs_match build/
+
+
+Check that a path matches at least one of the specs in a new version of a +gitignore file:
+ + $ pathspec-rb match -f .gitignore.new spec/fixtures/
+
+
+Brandon High highb@users.noreply.github.com
+ +Gabriel Filion
diff --git a/docs/man/pathspec-rb.man.1.rst b/docs/man/pathspec-rb.man.1.rst deleted file mode 100644 index d18bbfc..0000000 --- a/docs/man/pathspec-rb.man.1.rst +++ /dev/null @@ -1,62 +0,0 @@ -=========== -pathspec-rb -=========== - --------------------------------------- -Test pathspecs against a specific path --------------------------------------- - -:Author: Gabriel Filion -:Date: 2019 -:Manual section: 1 - -Synopsis -======== - -| pathspec-rb [options] [subcommand] [path]